updated notebook links

author C. Titus Brown <titus@idyll.org>

Sat, 17 Nov 2012 15:40:20 +0000 (07:40 -0800)

committer W. Trevor King <wking@tremily.us>

Fri, 1 Nov 2013 04:07:59 +0000 (21:07 -0700)
author C. Titus Brown <titus@idyll.org>
Sat, 17 Nov 2012 15:40:20 +0000 (07:40 -0800)
committer W. Trevor King <wking@tremily.us>
Fri, 1 Nov 2013 04:07:59 +0000 (21:07 -0700)
diff --git a/python/testing-with-nose.ipynb b/python/testing-with-nose.ipynb

index 5dbe383c6993ccf66cf0eb5c17096a394a8f0c58..45ec9ac2992e44dcf430a6237430f12dc751b611 100644 (file)
--- a/python/testing-with-nose.ipynb
+++ b/python/testing-with-nose.ipynb
@@ -7,17 +7,32 @@
   "worksheets": [
    {
     "cells": [
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## unit tests\n",
+      "\n",
+      "This is an example of unit testing with nose.  We are trying to make sure that the function calc_gc properly calculated the gc fraction of the DNA sequence.\n",
+      "\n",
+      "Problems worked through in class included --\n",
+      "\n",
+      "1. the sequence contained 'N's\n",
+      "2. the sequence contained lowercase char\n",
+      "3. divide by zero for sequences with no A, T, C, G"
+     ]
+    },
      {
       "cell_type": "code",
       "collapsed": false,
       "input": [
        "%%file calc_gc.py\n",
        "def calc_gc(sequence):\n",
-      "    sequence = sequence.upper()\n",
-      "    n = sequence.count('T') + sequence.count('A')\n",
-      "    m = sequence.count('G') + sequence.count('C')\n",
+      "    sequence = sequence.upper()                    # make all chars uppercase\n",
+      "    n = sequence.count('T') + sequence.count('A')  # count only A, T,\n",
+      "    m = sequence.count('G') + sequence.count('C')  # C, and G -- nothing else (no Ns, Rs, Ws, etc.)\n",
        "    if n + m == 0:\n",
-      "        return 0.\n",
+      "        return 0.                                  # avoid divide-by-zero\n",
        "    return float(m) / float(n + m)\n",
        "\n",
        "def test_1():\n",
@@ -25,11 +40,11 @@
        "    print 'hello, this is a test; the value of result is', result\n",
        "    assert result == 0.43\n",
        "    \n",
-      "def test_2():\n",
+      "def test_2(): # test handling N\n",
        "    result = round(calc_gc('NATGC'), 2)\n",
        "    assert result == 0.5, result\n",
        "    \n",
-      "def test_3():\n",
+      "def test_3(): # test handling lowercase\n",
        "    result = round(calc_gc('natgc'), 2)\n",
        "    assert result == 0.5, result\n"
       ],
@@ -40,24 +55,56 @@
         "output_type": "stream",
         "stream": "stdout",
         "text": [
-        "Overwriting calc_gc.py"
+        "Overwriting calc_gc.py\n"
         ]
-      },
+      }
+     ],
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Running nosetests\n",
+      "\n",
+      "Here, the 'nosetests' command looks through calc_gc.py, finds all functions named test_, and runs them."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!nosetests calc_gc.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
        {
         "output_type": "stream",
         "stream": "stdout",
         "text": [
-        "\n"
+        "...\r\n",
+        "----------------------------------------------------------------------\r\n",
+        "Ran 3 tests in 0.001s\r\n",
+        "\r\n",
+        "OK\r\n"
         ]
        }
       ],
-     "prompt_number": 42
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "You can also run nosetests with a '-v' option:"
+     ]
      },
      {
       "cell_type": "code",
       "collapsed": false,
       "input": [
-      "!nosetests calc_gc.py"
+      "!nosetests -v calc_gc.py"
       ],
       "language": "python",
       "metadata": {},
@@ -66,7 +113,10 @@
         "output_type": "stream",
         "stream": "stdout",
         "text": [
-        "...\r\n",
+        "calc_gc.test_1 ... ok\r\n",
+        "calc_gc.test_2 ... ok\r\n",
+        "calc_gc.test_3 ... ok\r\n",
+        "\r\n",
          "----------------------------------------------------------------------\r\n",
          "Ran 3 tests in 0.001s\r\n",
          "\r\n",
@@ -74,7 +124,18 @@
         ]
        }
       ],
-     "prompt_number": 43
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Regression testing\n",
+      "\n",
+      "Here I'm going to set up some regression tests, where we're simply comparing the output of a previously run script with the output of that script now.  If we're running on the same data, we should get the same answer... right?\n",
+      "\n",
+      "The script just calculates the average of the average GC content of each sequence in 25k.fq.gz."
+     ]
      },
      {
       "cell_type": "code",
@@ -85,7 +146,7 @@
        "import screed\n",
        "import calc_gc\n",
        "\n",
-      "filename = sys.argv[1]\n",
+      "filename = sys.argv[1]    # take the sequence filename in from the command line\n",
        "total_gc = []\n",
        "for record in screed.open(filename):\n",
        "    gc = calc_gc.calc_gc(record.sequence)\n",
@@ -100,7 +161,7 @@
         "output_type": "stream",
         "stream": "stdout",
         "text": [
-        "Writing gc-of-seqs.py"
+        "Overwriting gc-of-seqs.py"
         ]
        },
        {
@@ -111,12 +172,13 @@
         ]
        }
       ],
-     "prompt_number": 44
+     "prompt_number": 4
      },
      {
       "cell_type": "code",
       "collapsed": false,
       "input": [
+      "# run the script and look at the output -- then write that output into the following file.\n",
        "!python gc-of-seqs.py 25k.fq.gz"
       ],
       "language": "python",
@@ -130,7 +192,7 @@
         ]
        }
       ],
-     "prompt_number": 47
+     "prompt_number": 5
      },
      {
       "cell_type": "code",
@@ -139,8 +201,11 @@
        "%%file test_gc_script.py\n",
        "import subprocess\n",
        "\n",
-      "correct_output = \"0.607911191366\\n\"\n",
+      "correct_output = \"0.607911191366\\n\"   # this is taken from the previous exec'd cell\n",
        "\n",
+      "# the following function checks to see if running this script at the command line\n",
+      "# returns the right result.  make sure you're running this from *within* the python/ subdirectory\n",
+      "# of the 2012-11-scripps/ repository.\n",
        "def test_run():\n",
        "    p = subprocess.Popen('python gc-of-seqs.py 25k.fq.gz', shell=True, stdout=subprocess.PIPE)\n",
        "    (stdout, stderr) = p.communicate()\n",
@@ -165,7 +230,7 @@
         ]
        }
       ],
-     "prompt_number": 52
+     "prompt_number": 6
      },
      {
       "cell_type": "code",
@@ -182,13 +247,13 @@
         "text": [
          ".\r\n",
          "----------------------------------------------------------------------\r\n",
-        "Ran 1 test in 0.969s\r\n",
+        "Ran 1 test in 0.937s\r\n",
          "\r\n",
          "OK\r\n"
         ]
        }
       ],
-     "prompt_number": 53
+     "prompt_number": 7
      },
      {
       "cell_type": "code",
@@ -196,7 +261,8 @@
       "input": [],
       "language": "python",
       "metadata": {},
-     "outputs": []
+     "outputs": [],
+     "prompt_number": 7
      }
     ],
     "metadata": {}
author	C. Titus Brown <titus@idyll.org>
	Sat, 17 Nov 2012 15:40:20 +0000 (07:40 -0800)
committer	W. Trevor King <wking@tremily.us>
	Fri, 1 Nov 2013 04:07:59 +0000 (21:07 -0700)