From fc47dfeeaf677443a275f8249706f0051d8acb5f Mon Sep 17 00:00:00 2001 From: Jon Speicher Date: Fri, 26 Jul 2013 12:08:17 -0400 Subject: [PATCH] Add Refactoring description and exercise --- .../sw_engineering/SoftwareEngineering.ipynb | 280 ++++++++++++++++-- 1 file changed, 248 insertions(+), 32 deletions(-) diff --git a/python/sw_engineering/SoftwareEngineering.ipynb b/python/sw_engineering/SoftwareEngineering.ipynb index 4b9c52a..90ec739 100644 --- a/python/sw_engineering/SoftwareEngineering.ipynb +++ b/python/sw_engineering/SoftwareEngineering.ipynb @@ -126,31 +126,69 @@ ] } ], - "prompt_number": 7 + "prompt_number": 1 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see how it works." + ] }, { "cell_type": "code", "collapsed": false, "input": [ - "ls" + "import sightings\n", + "sightings.count_wolverines('big_animals.txt')" ], "language": "python", "metadata": {}, "outputs": [ { - "output_type": "stream", - "stream": "stdout", + "output_type": "pyout", + "prompt_number": 2, "text": [ - "README.md instructor_notebook.ipynb\r\n", - "animals.txt ipython_nose.py\r\n", - "big_animals.txt macguffin_animals.txt\r\n", - "dev_notes.md merida_animals.txt\r\n", - "dingwall_animals.txt student_notebook.ipynb\r\n", - "fergus_animals.txt\r\n" + "117" ] } ], - "prompt_number": 1 + "prompt_number": 2 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "# Refactoring\n", + "***\n", + "\n", + "Look at `count_wolverines`. You'll notice that there's a lot going on within this function. It:\n", + "\n", + "* Opens a file\n", + "* Reads lines from a file\n", + "* Closes the file\n", + "* Parses the string data from the file into usable data types\n", + "* Filters out records specific to a single animal\n", + "* Sums up a specific field contained within that set of filtered records\n", + "\n", + "You can imagine that we might want to do many things with data from the sightings file. For one, we want to average the number of sightings for a particular animal. Perhaps we want to count up the number of unique animals seen in a data set. Perhaps we want to figure out which days of the year have the most elk sightings. It's easy to see how we could modify the `count_wolverines` function above to achieve all of these goals, but if we simply replicated that entire function a half-dozen times, we would be repeating the code that opens the file and reads and splits the lines a half-dozen times, too.\n", + "\n", + "One approach to reducing this duplication is to *decompose* the function above into several separate functions, each with a single, small, well-defined responsibility (remember our list of good function criteria from the Intro session). This is often known as [refactoring](http://en.wikipedia.org/wiki/Code_refactoring). The goal is to rearrange code to make it easier to read, maintain, and reuse while preserving the existing functionality.\n", + "\n", + "We are going to *extract* a new function from `count_wolverines`.\n", + "\n", + "Create a new function called `read_sightings_from_file`. The function should:\n", + "\n", + "* Accept a sightings filename as a parameter\n", + "* Open the file\n", + "* Read the file's lines\n", + "* Split the lines\n", + "* Store each column of each line in a list dedicated to holding that type of data (i.e. dates, times, animals, counts)\n", + "* Return the lists\n", + "\n", + "For example, given the following file:" + ] }, { "cell_type": "code", @@ -173,48 +211,127 @@ ] } ], - "prompt_number": 2 + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "your function would return:\n", + "\n", + " (['2011-04-22', '2011-04-23', ...], ['21:06', '14:12', ...], ['Grizzly', 'Elk', ...], [36, 25, ...])\n", + "\n", + "Keep in mind that most of this functionality is already implemented in `count_wolverines`, so you can copy liberally from that function." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results\n", + "\n", + "When you are done, the file `sightings.py` should look something like this (run the cell below to create it if you need help)." + ] }, { "cell_type": "code", "collapsed": false, "input": [ - "def read_file(ifile):\n", - " open_file = open(ifile, 'r')\n", - " \n", - " time = []\n", - " date = []\n", - " animal = []\n", - " count = []\n", + "%%file sightings.py\n", + "def read_sightings_from_file(filename):\n", + " ''' Given a plain text file containing animal sighting data in the form\n", + " date time animal count\n", + " returns four lists, each containing the data from one column.'''\n", " \n", - " for iline in open_file:\n", - " s = iline.split()\n", - " date.append(s[0])\n", - " time.append(s[1])\n", - " animal.append(s[2])\n", - " count.append(int(s[3]))\n", + " animal_file = open(filename, 'r')\n", + " animal_file_lines = animal_file.readlines()\n", + " animal_file.close()\n", " \n", - " open_file.close()\n", + " dates = []\n", + " times = []\n", + " animals = []\n", + " counts = []\n", " \n", - " return date, time, animal, count" + " for line in animal_file_lines:\n", + " date, time, animal, count_string = line.split()\n", + " dates.append(date)\n", + " times.append(time)\n", + " animals.append(animal)\n", + " counts.append(int(count_string))\n", + "\n", + " return dates, times, animals, counts\n", + "\n", + "def count_wolverines(filename):\n", + " '''Given a plain text file containing animal sighting data in the form \n", + " date time animal count\n", + " returns the total count of wolverines sighted.'''\n", + " animal_file = open(filename, 'r')\n", + " animal_file_lines = animal_file.readlines()\n", + " animal_file.close()\n", + "\n", + " total_count = 0\n", + " for line in animal_file_lines:\n", + " date, time, animal, count_string = line.split()\n", + " if animal == 'Wolverine':\n", + " total_count = total_count + int(count_string)\n", + " return total_count" ], "language": "python", "metadata": {}, - "outputs": [], - "prompt_number": 15 + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting sightings.py\n" + ] + } + ], + "prompt_number": 12 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see how it works." + ] }, { "cell_type": "code", "collapsed": false, "input": [ - "read_file('animals.txt')" + "cat animals.txt" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "2011-04-22 21:06 Grizzly 36\r\n", + "2011-04-23 14:12 Elk 25\r\n", + "2011-04-23 10:24 Elk 26\r\n", + "2011-04-23 20:08 Wolverine 31\r\n", + "2011-04-23 18:46 Muskox 20\r\n" + ] + } + ], + "prompt_number": 13 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "reload(sightings)\n", + "sightings.read_sightings_from_file('animals.txt')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", - "prompt_number": 7, + "prompt_number": 14, "text": [ "(['2011-04-22', '2011-04-23', '2011-04-23', '2011-04-23', '2011-04-23'],\n", " ['21:06', '14:12', '10:24', '20:08', '18:46'],\n", @@ -223,7 +340,106 @@ ] } ], - "prompt_number": 7 + "prompt_number": 14 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***\n", + "**Aside: Updating `count_wolverines`**\n", + "\n", + "Now that we have extracted the `read_sightings_from_file` function, we could remove the duplicated code from `count_wolverines`, which would simplify that function a bit." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%file sightings.py\n", + "def read_sightings_from_file(filename):\n", + " ''' Given a plain text file containing animal sighting data in the form\n", + " date time animal count\n", + " returns four lists, each containing the data from one column.'''\n", + " \n", + " animal_file = open(filename, 'r')\n", + " animal_file_lines = animal_file.readlines()\n", + " animal_file.close()\n", + " \n", + " dates = []\n", + " times = []\n", + " animals = []\n", + " counts = []\n", + " \n", + " for line in animal_file_lines:\n", + " date, time, animal, count_string = line.split()\n", + " dates.append(date)\n", + " times.append(time)\n", + " animals.append(animal)\n", + " counts.append(int(count_string))\n", + " \n", + " return dates, times, animals, counts\n", + "\n", + "def count_wolverines(filename):\n", + " '''Given a plain text file containing animal sighting data in the form \n", + " date time animal count\n", + " returns the total count of wolverines sighted.'''\n", + " \n", + " dates, times, animals, counts = read_sightings_from_file(filename)\n", + " \n", + " total_count = 0\n", + " \n", + " for animal, count in zip(animals, counts):\n", + " if animal == 'Wolverine':\n", + " total_count = total_count + count\n", + " return total_count" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "Overwriting sightings.py\n" + ] + } + ], + "prompt_number": 15 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Does it work?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "reload(sightings)\n", + "sightings.count_wolverines('big_animals.txt')" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "pyout", + "prompt_number": 16, + "text": [ + "117" + ] + } + ], + "prompt_number": 16 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "***" + ] }, { "cell_type": "code", -- 2.26.2