Add Refactoring description and exercise
authorJon Speicher <jon.speicher@gmail.com>
Fri, 26 Jul 2013 16:08:17 +0000 (12:08 -0400)
committerW. Trevor King <wking@tremily.us>
Sat, 9 Nov 2013 18:27:50 +0000 (10:27 -0800)
python/sw_engineering/SoftwareEngineering.ipynb

index 4b9c52ae64f0d3e2e43a425a2e5a3bd2e8b30fbe..90ec73943691911812454ed467795ee10e1ba54a 100644 (file)
        ]
       }
      ],
-     "prompt_number": 7
+     "prompt_number": 1
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Let's see how it works."
+     ]
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "ls"
+      "import sightings\n",
+      "sightings.count_wolverines('big_animals.txt')"
      ],
      "language": "python",
      "metadata": {},
      "outputs": [
       {
-       "output_type": "stream",
-       "stream": "stdout",
+       "output_type": "pyout",
+       "prompt_number": 2,
        "text": [
-        "README.md                  instructor_notebook.ipynb\r\n",
-        "animals.txt                ipython_nose.py\r\n",
-        "big_animals.txt            macguffin_animals.txt\r\n",
-        "dev_notes.md               merida_animals.txt\r\n",
-        "dingwall_animals.txt       student_notebook.ipynb\r\n",
-        "fergus_animals.txt\r\n"
+        "117"
        ]
       }
      ],
-     "prompt_number": 1
+     "prompt_number": 2
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "***\n",
+      "# Refactoring\n",
+      "***\n",
+      "\n",
+      "Look at `count_wolverines`. You'll notice that there's a lot going on within this function. It:\n",
+      "\n",
+      "* Opens a file\n",
+      "* Reads lines from a file\n",
+      "* Closes the file\n",
+      "* Parses the string data from the file into usable data types\n",
+      "* Filters out records specific to a single animal\n",
+      "* Sums up a specific field contained within that set of filtered records\n",
+      "\n",
+      "You can imagine that we might want to do many things with data from the sightings file. For one, we want to average the number of sightings for a particular animal. Perhaps we want to count up the number of unique animals seen in a data set. Perhaps we want to figure out which days of the year have the most elk sightings. It's easy to see how we could modify the `count_wolverines` function above to achieve all of these goals, but if we simply replicated that entire function a half-dozen times, we would be repeating the code that opens the file and reads and splits the lines a half-dozen times, too.\n",
+      "\n",
+      "One approach to reducing this duplication is to *decompose* the function above into several separate functions, each with a single, small, well-defined responsibility (remember our list of good function criteria from the Intro session). This is often known as [refactoring](http://en.wikipedia.org/wiki/Code_refactoring). The goal is to rearrange code to make it easier to read, maintain, and reuse while preserving the existing functionality.\n",
+      "\n",
+      "We are going to *extract* a new function from `count_wolverines`.\n",
+      "\n",
+      "Create a new function called `read_sightings_from_file`. The function should:\n",
+      "\n",
+      "* Accept a sightings filename as a parameter\n",
+      "* Open the file\n",
+      "* Read the file's lines\n",
+      "* Split the lines\n",
+      "* Store each column of each line in a list dedicated to holding that type of data (i.e. dates, times, animals, counts)\n",
+      "* Return the lists\n",
+      "\n",
+      "For example, given the following file:"
+     ]
     },
     {
      "cell_type": "code",
        ]
       }
      ],
-     "prompt_number": 2
+     "prompt_number": 3
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "your function would return:\n",
+      "\n",
+      "    (['2011-04-22', '2011-04-23', ...], ['21:06', '14:12', ...], ['Grizzly', 'Elk', ...], [36, 25, ...])\n",
+      "\n",
+      "Keep in mind that most of this functionality is already implemented in `count_wolverines`, so you can copy liberally from that function."
+     ]
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "## Results\n",
+      "\n",
+      "When you are done, the file `sightings.py` should look something like this (run the cell below to create it if you need help)."
+     ]
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "def read_file(ifile):\n",
-      "    open_file = open(ifile, 'r')\n",
-      "    \n",
-      "    time = []\n",
-      "    date = []\n",
-      "    animal = []\n",
-      "    count = []\n",
+      "%%file sightings.py\n",
+      "def read_sightings_from_file(filename):\n",
+      "    ''' Given a plain text file containing animal sighting data in the form\n",
+      "            date time animal count\n",
+      "        returns four lists, each containing the data from one column.'''\n",
       "    \n",
-      "    for iline in open_file:\n",
-      "        s = iline.split()\n",
-      "        date.append(s[0])\n",
-      "        time.append(s[1])\n",
-      "        animal.append(s[2])\n",
-      "        count.append(int(s[3]))\n",
+      "    animal_file = open(filename, 'r')\n",
+      "    animal_file_lines = animal_file.readlines()\n",
+      "    animal_file.close()\n",
       "    \n",
-      "    open_file.close()\n",
+      "    dates = []\n",
+      "    times = []\n",
+      "    animals = []\n",
+      "    counts = []\n",
       "    \n",
-      "    return date, time, animal, count"
+      "    for line in animal_file_lines:\n",
+      "        date, time, animal, count_string = line.split()\n",
+      "        dates.append(date)\n",
+      "        times.append(time)\n",
+      "        animals.append(animal)\n",
+      "        counts.append(int(count_string))\n",
+      "\n",
+      "    return dates, times, animals, counts\n",
+      "\n",
+      "def count_wolverines(filename):\n",
+      "    '''Given a plain text file containing animal sighting data in the form \n",
+      "           date time animal count\n",
+      "       returns the total count of wolverines sighted.'''\n",
+      "    animal_file = open(filename, 'r')\n",
+      "    animal_file_lines = animal_file.readlines()\n",
+      "    animal_file.close()\n",
+      "\n",
+      "    total_count = 0\n",
+      "    for line in animal_file_lines:\n",
+      "        date, time, animal, count_string = line.split()\n",
+      "        if animal == 'Wolverine':\n",
+      "            total_count = total_count + int(count_string)\n",
+      "    return total_count"
      ],
      "language": "python",
      "metadata": {},
-     "outputs": [],
-     "prompt_number": 15
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting sightings.py\n"
+       ]
+      }
+     ],
+     "prompt_number": 12
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Let's see how it works."
+     ]
     },
     {
      "cell_type": "code",
      "collapsed": false,
      "input": [
-      "read_file('animals.txt')"
+      "cat animals.txt"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "2011-04-22 21:06 Grizzly 36\r\n",
+        "2011-04-23 14:12 Elk 25\r\n",
+        "2011-04-23 10:24 Elk 26\r\n",
+        "2011-04-23 20:08 Wolverine 31\r\n",
+        "2011-04-23 18:46 Muskox 20\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 13
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "reload(sightings)\n",
+      "sightings.read_sightings_from_file('animals.txt')"
      ],
      "language": "python",
      "metadata": {},
      "outputs": [
       {
        "output_type": "pyout",
-       "prompt_number": 7,
+       "prompt_number": 14,
        "text": [
         "(['2011-04-22', '2011-04-23', '2011-04-23', '2011-04-23', '2011-04-23'],\n",
         " ['21:06', '14:12', '10:24', '20:08', '18:46'],\n",
        ]
       }
      ],
-     "prompt_number": 7
+     "prompt_number": 14
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "***\n",
+      "**Aside: Updating `count_wolverines`**\n",
+      "\n",
+      "Now that we have extracted the `read_sightings_from_file` function, we could remove the duplicated code from `count_wolverines`, which would simplify that function a bit."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "%%file sightings.py\n",
+      "def read_sightings_from_file(filename):\n",
+      "    ''' Given a plain text file containing animal sighting data in the form\n",
+      "            date time animal count\n",
+      "        returns four lists, each containing the data from one column.'''\n",
+      "    \n",
+      "    animal_file = open(filename, 'r')\n",
+      "    animal_file_lines = animal_file.readlines()\n",
+      "    animal_file.close()\n",
+      "    \n",
+      "    dates = []\n",
+      "    times = []\n",
+      "    animals = []\n",
+      "    counts = []\n",
+      "    \n",
+      "    for line in animal_file_lines:\n",
+      "        date, time, animal, count_string = line.split()\n",
+      "        dates.append(date)\n",
+      "        times.append(time)\n",
+      "        animals.append(animal)\n",
+      "        counts.append(int(count_string))\n",
+      "        \n",
+      "    return dates, times, animals, counts\n",
+      "\n",
+      "def count_wolverines(filename):\n",
+      "    '''Given a plain text file containing animal sighting data in the form \n",
+      "           date time animal count\n",
+      "       returns the total count of wolverines sighted.'''\n",
+      "    \n",
+      "    dates, times, animals, counts = read_sightings_from_file(filename)\n",
+      "    \n",
+      "    total_count = 0\n",
+      "    \n",
+      "    for animal, count in zip(animals, counts):\n",
+      "        if animal == 'Wolverine':\n",
+      "            total_count = total_count + count\n",
+      "    return total_count"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Overwriting sightings.py\n"
+       ]
+      }
+     ],
+     "prompt_number": 15
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "Does it work?"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "reload(sightings)\n",
+      "sightings.count_wolverines('big_animals.txt')"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "pyout",
+       "prompt_number": 16,
+       "text": [
+        "117"
+       ]
+      }
+     ],
+     "prompt_number": 16
+    },
+    {
+     "cell_type": "markdown",
+     "metadata": {},
+     "source": [
+      "***"
+     ]
     },
     {
      "cell_type": "code",