From e749188c6fe063eedd8e91af12d036195b2fd6e8 Mon Sep 17 00:00:00 2001
From: "C. Titus Brown" <titus@idyll.org>
Date: Fri, 5 Apr 2013 11:07:44 -0700
Subject: [PATCH 1/1] fiz

W. Trevor King: I dropped everything from the original c5cb901 except
for the notebooks/testing-with-nose* additions.

Conflicts:
	day1-afternoon.rst
	notebooks/using-screed.ipynb
---
 notebooks/testing-with-nose-bak.ipynb | 271 ++++++++++++++++++++++++++
 notebooks/testing-with-nose.ipynb     | 143 ++++++++++++++
 2 files changed, 414 insertions(+)
 create mode 100644 notebooks/testing-with-nose-bak.ipynb
 create mode 100644 notebooks/testing-with-nose.ipynb

diff --git a/notebooks/testing-with-nose-bak.ipynb b/notebooks/testing-with-nose-bak.ipynb
new file mode 100644
index 0000000..7069ab7
--- /dev/null
+++ b/notebooks/testing-with-nose-bak.ipynb
@@ -0,0 +1,271 @@
+{
+ "metadata": {
+  "name": "testing-with-nose-bak"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## unit tests\n",
+      "\n",
+      "This is an example of unit testing with nose.  We are trying to make sure that the function calc_gc properly calculated the gc fraction of the DNA sequence.\n",
+      "\n",
+      "Problems worked through in class included --\n",
+      "\n",
+      "1. the sequence contained 'N's\n",
+      "2. the sequence contained lowercase char\n",
+      "3. divide by zero for sequences with no A, T, C, G"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%file calc_gc.py\n",
+      "def calc_gc(sequence):\n",
+      "    sequence = sequence.upper()                    # make all chars uppercase\n",
+      "    n = sequence.count('T') + sequence.count('A')  # count only A, T,\n",
+      "    m = sequence.count('G') + sequence.count('C')  # C, and G -- nothing else (no Ns, Rs, Ws, etc.)\n",
+      "    if n + m == 0:\n",
+      "        return 0.                                  # avoid divide-by-zero\n",
+      "    return float(m) / float(n + m)\n",
+      "\n",
+      "def test_1():\n",
+      "    result = round(calc_gc('ATGGCAT'), 2)\n",
+      "    print 'hello, this is a test; the value of result is', result\n",
+      "    assert result == 0.43\n",
+      "    \n",
+      "def test_2(): # test handling N\n",
+      "    result = round(calc_gc('NATGC'), 2)\n",
+      "    assert result == 0.5, result\n",
+      "    \n",
+      "def test_3(): # test handling lowercase\n",
+      "    result = round(calc_gc('natgc'), 2)\n",
+      "    assert result == 0.5, result\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting calc_gc.py\n"
+       ]
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Running nosetests\n",
+      "\n",
+      "Here, the 'nosetests' command looks through calc_gc.py, finds all functions named test_, and runs them."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests calc_gc.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "...\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 3 tests in 0.001s\r\n",
+        "\r\n",
+        "OK\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "You can also run nosetests with a '-v' option:"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests -v calc_gc.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "calc_gc.test_1 ... ok\r\n",
+        "calc_gc.test_2 ... ok\r\n",
+        "calc_gc.test_3 ... ok\r\n",
+        "\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 3 tests in 0.001s\r\n",
+        "\r\n",
+        "OK\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Regression testing\n",
+      "\n",
+      "Here I'm going to set up some regression tests, where we're simply comparing the output of a previously run script with the output of that script now.  If we're running on the same data, we should get the same answer... right?\n",
+      "\n",
+      "The script just calculates the average of the average GC content of each sequence in 25k.fq.gz."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%file gc-of-seqs.py\n",
+      "import sys\n",
+      "import screed\n",
+      "import calc_gc\n",
+      "\n",
+      "filename = sys.argv[1]    # take the sequence filename in from the command line\n",
+      "total_gc = []\n",
+      "for record in screed.open(filename):\n",
+      "    gc = calc_gc.calc_gc(record.sequence)\n",
+      "    total_gc.append(gc)\n",
+      "    \n",
+      "print sum(total_gc) / float(len(total_gc))"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting gc-of-seqs.py"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n"
+       ]
+      }
+     ],
+     "prompt_number": 4
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "# run the script and look at the output -- then write that output into the following file.\n",
+      "!python gc-of-seqs.py 25k.fq.gz"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "0.607911191366\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 5
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%file test_gc_script.py\n",
+      "import subprocess\n",
+      "\n",
+      "correct_output = \"0.607911191366\\n\"   # this is taken from the previous exec'd cell\n",
+      "\n",
+      "# the following function checks to see if running this script at the command line\n",
+      "# returns the right result.  make sure you're running this from *within* the python/ subdirectory\n",
+      "# of the 2012-11-scripps/ repository.\n",
+      "def test_run():\n",
+      "    p = subprocess.Popen('python gc-of-seqs.py 25k.fq.gz', shell=True, stdout=subprocess.PIPE)\n",
+      "    (stdout, stderr) = p.communicate()\n",
+      "    assert stdout == correct_output\n",
+      "    \n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting test_gc_script.py"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n"
+       ]
+      }
+     ],
+     "prompt_number": 6
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests test_gc_script.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        ".\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 1 test in 0.937s\r\n",
+        "\r\n",
+        "OK\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 7
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 7
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/notebooks/testing-with-nose.ipynb b/notebooks/testing-with-nose.ipynb
new file mode 100644
index 0000000..741773a
--- /dev/null
+++ b/notebooks/testing-with-nose.ipynb
@@ -0,0 +1,143 @@
+{
+ "metadata": {
+  "name": "testing-with-nose"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%file calc_gc.py\n",
+      "\n",
+      "def calc_gc(dna):\n",
+      "    dna = dna.upper()\n",
+      "    gc = dna.count('G') + dna.count('C')\n",
+      "    at = dna.count('A') + dna.count('T')\n",
+      "    \n",
+      "    total = gc + at\n",
+      "    if total == 0:\n",
+      "        return 0.0\n",
+      "    \n",
+      "    frac = gc / float(total)\n",
+      "    return frac\n",
+      "\n",
+      "def test_correct_counting_gc():\n",
+      "    x = calc_gc('ATGC')\n",
+      "    assert x == 0.5, x\n",
+      "    \n",
+      "def test_correct_counting_at():\n",
+      "    x = calc_gc('AAAA')\n",
+      "    assert x == 0.0, x\n",
+      "\n",
+      "def test_3():\n",
+      "    x = calc_gc('atgc')\n",
+      "    assert x == 0.5, x\n",
+      "    \n",
+      "def test_4():\n",
+      "    x = calc_gc('AtgCNN')\n",
+      "    assert x == 0.5, x\n",
+      "    \n",
+      "def test_5():\n",
+      "    x = calc_gc('NNNNNN')\n",
+      "    assert x == 0.0, x"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting calc_gc.py"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "\n"
+       ]
+      }
+     ],
+     "prompt_number": 23
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests -v calc_gc.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "calc_gc.test_correct_counting_gc ... ok\r\n",
+        "calc_gc.test_correct_counting_at ... ok\r\n",
+        "calc_gc.test_3 ... ok\r\n",
+        "calc_gc.test_4 ... ok\r\n",
+        "calc_gc.test_5 ... ok\r\n",
+        "\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 5 tests in 0.001s\r\n",
+        "\r\n",
+        "OK\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 24
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!pip install git+https://github.com/ged-lab/screed.git\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Downloading/unpacking git+https://github.com/ged-lab/screed.git\r\n",
+        "  Cloning https://github.com/ged-lab/screed.git to /var/folders/y6/s1r3q87933s6gjdrtmts1npc0000gq/T/pip-Ha7xSv-build\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "  Running setup.py egg_info for package from git+https://github.com/ged-lab/screed.git\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "    \r\n",
+        "Cleaning up...\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 25
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
-- 
2.26.2