From e749188c6fe063eedd8e91af12d036195b2fd6e8 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Fri, 5 Apr 2013 11:07:44 -0700 Subject: [PATCH 1/1] fiz W. Trevor King: I dropped everything from the original c5cb901 except for the notebooks/testing-with-nose* additions. Conflicts: day1-afternoon.rst notebooks/using-screed.ipynb --- notebooks/testing-with-nose-bak.ipynb | 271 ++++++++++++++++++++++++++ notebooks/testing-with-nose.ipynb | 143 ++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 notebooks/testing-with-nose-bak.ipynb create mode 100644 notebooks/testing-with-nose.ipynb diff --git a/notebooks/testing-with-nose-bak.ipynb b/notebooks/testing-with-nose-bak.ipynb new file mode 100644 index 0000000..7069ab7 --- /dev/null +++ b/notebooks/testing-with-nose-bak.ipynb @@ -0,0 +1,271 @@ +{ + "metadata": { + "name": "testing-with-nose-bak" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## unit tests\n", + "\n", + "This is an example of unit testing with nose. We are trying to make sure that the function calc_gc properly calculated the gc fraction of the DNA sequence.\n", + "\n", + "Problems worked through in class included --\n", + "\n", + "1. the sequence contained 'N's\n", + "2. the sequence contained lowercase char\n", + "3. divide by zero for sequences with no A, T, C, G" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file calc_gc.py\n", + "def calc_gc(sequence):\n", + " sequence = sequence.upper() # make all chars uppercase\n", + " n = sequence.count('T') + sequence.count('A') # count only A, T,\n", + " m = sequence.count('G') + sequence.count('C') # C, and G -- nothing else (no Ns, Rs, Ws, etc.)\n", + " if n + m == 0:\n", + " return 0. # avoid divide-by-zero\n", + " return float(m) / float(n + m)\n", + "\n", + "def test_1():\n", + " result = round(calc_gc('ATGGCAT'), 2)\n", + " print 'hello, this is a test; the value of result is', result\n", + " assert result == 0.43\n", + " \n", + "def test_2(): # test handling N\n", + " result = round(calc_gc('NATGC'), 2)\n", + " assert result == 0.5, result\n", + " \n", + "def test_3(): # test handling lowercase\n", + " result = round(calc_gc('natgc'), 2)\n", + " assert result == 0.5, result\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting calc_gc.py\n" + ] + } + ], + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Running nosetests\n", + "\n", + "Here, the 'nosetests' command looks through calc_gc.py, finds all functions named test_, and runs them." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!nosetests calc_gc.py" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "...\r\n", + "----------------------------------------------------------------------\r\n", + "Ran 3 tests in 0.001s\r\n", + "\r\n", + "OK\r\n" + ] + } + ], + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also run nosetests with a '-v' option:" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!nosetests -v calc_gc.py" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "calc_gc.test_1 ... ok\r\n", + "calc_gc.test_2 ... ok\r\n", + "calc_gc.test_3 ... ok\r\n", + "\r\n", + "----------------------------------------------------------------------\r\n", + "Ran 3 tests in 0.001s\r\n", + "\r\n", + "OK\r\n" + ] + } + ], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Regression testing\n", + "\n", + "Here I'm going to set up some regression tests, where we're simply comparing the output of a previously run script with the output of that script now. If we're running on the same data, we should get the same answer... right?\n", + "\n", + "The script just calculates the average of the average GC content of each sequence in 25k.fq.gz." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file gc-of-seqs.py\n", + "import sys\n", + "import screed\n", + "import calc_gc\n", + "\n", + "filename = sys.argv[1] # take the sequence filename in from the command line\n", + "total_gc = []\n", + "for record in screed.open(filename):\n", + " gc = calc_gc.calc_gc(record.sequence)\n", + " total_gc.append(gc)\n", + " \n", + "print sum(total_gc) / float(len(total_gc))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting gc-of-seqs.py" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# run the script and look at the output -- then write that output into the following file.\n", + "!python gc-of-seqs.py 25k.fq.gz" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "0.607911191366\r\n" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file test_gc_script.py\n", + "import subprocess\n", + "\n", + "correct_output = \"0.607911191366\\n\" # this is taken from the previous exec'd cell\n", + "\n", + "# the following function checks to see if running this script at the command line\n", + "# returns the right result. make sure you're running this from *within* the python/ subdirectory\n", + "# of the 2012-11-scripps/ repository.\n", + "def test_run():\n", + " p = subprocess.Popen('python gc-of-seqs.py 25k.fq.gz', shell=True, stdout=subprocess.PIPE)\n", + " (stdout, stderr) = p.communicate()\n", + " assert stdout == correct_output\n", + " \n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting test_gc_script.py" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!nosetests test_gc_script.py" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + ".\r\n", + "----------------------------------------------------------------------\r\n", + "Ran 1 test in 0.937s\r\n", + "\r\n", + "OK\r\n" + ] + } + ], + "prompt_number": 7 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 7 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/notebooks/testing-with-nose.ipynb b/notebooks/testing-with-nose.ipynb new file mode 100644 index 0000000..741773a --- /dev/null +++ b/notebooks/testing-with-nose.ipynb @@ -0,0 +1,143 @@ +{ + "metadata": { + "name": "testing-with-nose" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file calc_gc.py\n", + "\n", + "def calc_gc(dna):\n", + " dna = dna.upper()\n", + " gc = dna.count('G') + dna.count('C')\n", + " at = dna.count('A') + dna.count('T')\n", + " \n", + " total = gc + at\n", + " if total == 0:\n", + " return 0.0\n", + " \n", + " frac = gc / float(total)\n", + " return frac\n", + "\n", + "def test_correct_counting_gc():\n", + " x = calc_gc('ATGC')\n", + " assert x == 0.5, x\n", + " \n", + "def test_correct_counting_at():\n", + " x = calc_gc('AAAA')\n", + " assert x == 0.0, x\n", + "\n", + "def test_3():\n", + " x = calc_gc('atgc')\n", + " assert x == 0.5, x\n", + " \n", + "def test_4():\n", + " x = calc_gc('AtgCNN')\n", + " assert x == 0.5, x\n", + " \n", + "def test_5():\n", + " x = calc_gc('NNNNNN')\n", + " assert x == 0.0, x" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting calc_gc.py" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 23 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!nosetests -v calc_gc.py" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "calc_gc.test_correct_counting_gc ... ok\r\n", + "calc_gc.test_correct_counting_at ... ok\r\n", + "calc_gc.test_3 ... ok\r\n", + "calc_gc.test_4 ... ok\r\n", + "calc_gc.test_5 ... ok\r\n", + "\r\n", + "----------------------------------------------------------------------\r\n", + "Ran 5 tests in 0.001s\r\n", + "\r\n", + "OK\r\n" + ] + } + ], + "prompt_number": 24 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "!pip install git+https://github.com/ged-lab/screed.git\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Downloading/unpacking git+https://github.com/ged-lab/screed.git\r\n", + " Cloning https://github.com/ged-lab/screed.git to /var/folders/y6/s1r3q87933s6gjdrtmts1npc0000gq/T/pip-Ha7xSv-build\r\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " Running setup.py egg_info for package from git+https://github.com/ged-lab/screed.git\r\n" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + " \r\n", + "Cleaning up...\r\n" + ] + } + ], + "prompt_number": 25 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file -- 2.26.2