updated notebook links test-ctb
authorC. Titus Brown <titus@idyll.org>
Sat, 17 Nov 2012 15:40:20 +0000 (07:40 -0800)
committerW. Trevor King <wking@tremily.us>
Fri, 1 Nov 2013 04:07:59 +0000 (21:07 -0700)
W. Trevor King: I dropped everything from the original 8113d9e except
for the python/testing* modification.

Conflicts:
day2.rst
python-packages.rst

python/testing-with-nose.ipynb

index 5dbe383c6993ccf66cf0eb5c17096a394a8f0c58..45ec9ac2992e44dcf430a6237430f12dc751b611 100644 (file)
@@ -7,17 +7,32 @@
  "worksheets": [
   {
    "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## unit tests\n",
+      "\n",
+      "This is an example of unit testing with nose.  We are trying to make sure that the function calc_gc properly calculated the gc fraction of the DNA sequence.\n",
+      "\n",
+      "Problems worked through in class included --\n",
+      "\n",
+      "1. the sequence contained 'N's\n",
+      "2. the sequence contained lowercase char\n",
+      "3. divide by zero for sequences with no A, T, C, G"
+     ]
+    },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
       "%%file calc_gc.py\n",
       "def calc_gc(sequence):\n",
-      "    sequence = sequence.upper()\n",
-      "    n = sequence.count('T') + sequence.count('A')\n",
-      "    m = sequence.count('G') + sequence.count('C')\n",
+      "    sequence = sequence.upper()                    # make all chars uppercase\n",
+      "    n = sequence.count('T') + sequence.count('A')  # count only A, T,\n",
+      "    m = sequence.count('G') + sequence.count('C')  # C, and G -- nothing else (no Ns, Rs, Ws, etc.)\n",
       "    if n + m == 0:\n",
-      "        return 0.\n",
+      "        return 0.                                  # avoid divide-by-zero\n",
       "    return float(m) / float(n + m)\n",
       "\n",
       "def test_1():\n",
       "    print 'hello, this is a test; the value of result is', result\n",
       "    assert result == 0.43\n",
       "    \n",
-      "def test_2():\n",
+      "def test_2(): # test handling N\n",
       "    result = round(calc_gc('NATGC'), 2)\n",
       "    assert result == 0.5, result\n",
       "    \n",
-      "def test_3():\n",
+      "def test_3(): # test handling lowercase\n",
       "    result = round(calc_gc('natgc'), 2)\n",
       "    assert result == 0.5, result\n"
      ],
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "Overwriting calc_gc.py"
+        "Overwriting calc_gc.py\n"
        ]
-      },
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Running nosetests\n",
+      "\n",
+      "Here, the 'nosetests' command looks through calc_gc.py, finds all functions named test_, and runs them."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests calc_gc.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
       {
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "\n"
+        "...\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 3 tests in 0.001s\r\n",
+        "\r\n",
+        "OK\r\n"
        ]
       }
      ],
-     "prompt_number": 42
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "You can also run nosetests with a '-v' option:"
+     ]
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "!nosetests calc_gc.py"
+      "!nosetests -v calc_gc.py"
      ],
      "language": "python",
      "metadata": {},
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "...\r\n",
+        "calc_gc.test_1 ... ok\r\n",
+        "calc_gc.test_2 ... ok\r\n",
+        "calc_gc.test_3 ... ok\r\n",
+        "\r\n",
         "----------------------------------------------------------------------\r\n",
         "Ran 3 tests in 0.001s\r\n",
         "\r\n",
        ]
       }
      ],
-     "prompt_number": 43
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Regression testing\n",
+      "\n",
+      "Here I'm going to set up some regression tests, where we're simply comparing the output of a previously run script with the output of that script now.  If we're running on the same data, we should get the same answer... right?\n",
+      "\n",
+      "The script just calculates the average of the average GC content of each sequence in 25k.fq.gz."
+     ]
     },
     {
      "cell_type": "code",
       "import screed\n",
       "import calc_gc\n",
       "\n",
-      "filename = sys.argv[1]\n",
+      "filename = sys.argv[1]    # take the sequence filename in from the command line\n",
       "total_gc = []\n",
       "for record in screed.open(filename):\n",
       "    gc = calc_gc.calc_gc(record.sequence)\n",
        "output_type": "stream",
        "stream": "stdout",
        "text": [
-        "Writing gc-of-seqs.py"
+        "Overwriting gc-of-seqs.py"
        ]
       },
       {
        ]
       }
      ],
-     "prompt_number": 44
+     "prompt_number": 4
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
+      "# run the script and look at the output -- then write that output into the following file.\n",
       "!python gc-of-seqs.py 25k.fq.gz"
      ],
      "language": "python",
        ]
       }
      ],
-     "prompt_number": 47
+     "prompt_number": 5
     },
     {
      "cell_type": "code",
       "%%file test_gc_script.py\n",
       "import subprocess\n",
       "\n",
-      "correct_output = \"0.607911191366\\n\"\n",
+      "correct_output = \"0.607911191366\\n\"   # this is taken from the previous exec'd cell\n",
       "\n",
+      "# the following function checks to see if running this script at the command line\n",
+      "# returns the right result.  make sure you're running this from *within* the python/ subdirectory\n",
+      "# of the 2012-11-scripps/ repository.\n",
       "def test_run():\n",
       "    p = subprocess.Popen('python gc-of-seqs.py 25k.fq.gz', shell=True, stdout=subprocess.PIPE)\n",
       "    (stdout, stderr) = p.communicate()\n",
        ]
       }
      ],
-     "prompt_number": 52
+     "prompt_number": 6
     },
     {
      "cell_type": "code",
        "text": [
         ".\r\n",
         "----------------------------------------------------------------------\r\n",
-        "Ran 1 test in 0.969s\r\n",
+        "Ran 1 test in 0.937s\r\n",
         "\r\n",
         "OK\r\n"
        ]
       }
      ],
-     "prompt_number": 53
+     "prompt_number": 7
     },
     {
      "cell_type": "code",
      "input": [],
      "language": "python",
      "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 7
     }
    ],
    "metadata": {}