Reorganizing material to put novice + intermediate at top level
[swc-boot-camp.git] / sql / novice / 02-sort-dup.ipynb
diff --git a/sql/novice/02-sort-dup.ipynb b/sql/novice/02-sort-dup.ipynb
deleted file mode 100644 (file)
index 8ffe0db..0000000
+++ /dev/null
@@ -1,427 +0,0 @@
-{
- "metadata": {
-  "name": ""
- },
- "nbformat": 3,
- "nbformat_minor": 0,
- "worksheets": [
-  {
-   "cells": [
-    {
-     "cell_type": "heading",
-     "level": 2,
-     "metadata": {},
-     "source": [
-      "Sorting and Removing Duplicates"
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {
-      "cell_tags": [
-       "objectives"
-      ]
-     },
-     "source": [
-      "#### Objectives\n",
-      "\n",
-      "*   Write queries that display results in a particular order.\n",
-      "*   Write queries that eliminate duplicate values from data."
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "Data is often redundant,\n",
-      "so queries often return redundant information.\n",
-      "For example,\n",
-      "if we select the quantitites that have been measured\n",
-      "from the `survey` table,\n",
-      "we get this:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%load_ext sqlitemagic"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [],
-     "prompt_number": 1
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select quant from Survey;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>temp</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>temp</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>temp</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>temp</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>rad</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102358c90>"
-       ]
-      }
-     ],
-     "prompt_number": 2
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "We can eliminate the redundant output\n",
-      "to make the result more readable\n",
-      "by adding the `distinct` keyword\n",
-      "to our query:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select distinct quant from Survey;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>rad</td></tr>\n",
-        "<tr><td>sal</td></tr>\n",
-        "<tr><td>temp</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102358d90>"
-       ]
-      }
-     ],
-     "prompt_number": 3
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "If we select more than one column&mdash;for example,\n",
-      "both the survey site ID and the quantity measured&mdash;then\n",
-      "the distinct pairs of values are returned:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select distinct taken, quant from Survey;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>619</td><td>rad</td></tr>\n",
-        "<tr><td>619</td><td>sal</td></tr>\n",
-        "<tr><td>622</td><td>rad</td></tr>\n",
-        "<tr><td>622</td><td>sal</td></tr>\n",
-        "<tr><td>734</td><td>rad</td></tr>\n",
-        "<tr><td>734</td><td>sal</td></tr>\n",
-        "<tr><td>734</td><td>temp</td></tr>\n",
-        "<tr><td>735</td><td>rad</td></tr>\n",
-        "<tr><td>735</td><td>sal</td></tr>\n",
-        "<tr><td>735</td><td>temp</td></tr>\n",
-        "<tr><td>751</td><td>rad</td></tr>\n",
-        "<tr><td>751</td><td>temp</td></tr>\n",
-        "<tr><td>751</td><td>sal</td></tr>\n",
-        "<tr><td>752</td><td>rad</td></tr>\n",
-        "<tr><td>752</td><td>sal</td></tr>\n",
-        "<tr><td>752</td><td>temp</td></tr>\n",
-        "<tr><td>837</td><td>rad</td></tr>\n",
-        "<tr><td>837</td><td>sal</td></tr>\n",
-        "<tr><td>844</td><td>rad</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102353c90>"
-       ]
-      }
-     ],
-     "prompt_number": 4
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "Notice in both cases that duplicates are removed\n",
-      "even if they didn't appear to be adjacent in the database.\n",
-      "Again,\n",
-      "it's important to remember that rows aren't actually ordered:\n",
-      "they're just displayed that way."
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "#### Challenges\n",
-      "\n",
-      "1.  Write a query that selects distinct dates from the `Site` table."
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "As we mentioned earlier,\n",
-      "database records are not stored in any particular order.\n",
-      "This means that query results aren't necessarily sorted,\n",
-      "and even if they are,\n",
-      "we often want to sort them in a different way,\n",
-      "e.g., by the name of the project instead of by the name of the scientist.\n",
-      "We can do this in SQL by adding an `order by` clause to our query:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select * from Person order by ident;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>danforth</td><td>Frank</td><td>Danforth</td></tr>\n",
-        "<tr><td>dyer</td><td>William</td><td>Dyer</td></tr>\n",
-        "<tr><td>lake</td><td>Anderson</td><td>Lake</td></tr>\n",
-        "<tr><td>pb</td><td>Frank</td><td>Pabodie</td></tr>\n",
-        "<tr><td>roe</td><td>Valentina</td><td>Roerich</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102353b10>"
-       ]
-      }
-     ],
-     "prompt_number": 5
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "By default,\n",
-      "results are sorted in ascending order\n",
-      "(i.e.,\n",
-      "from least to greatest).\n",
-      "We can sort in the opposite order using `desc` (for \"descending\"):"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select * from person order by ident desc;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>roe</td><td>Valentina</td><td>Roerich</td></tr>\n",
-        "<tr><td>pb</td><td>Frank</td><td>Pabodie</td></tr>\n",
-        "<tr><td>lake</td><td>Anderson</td><td>Lake</td></tr>\n",
-        "<tr><td>dyer</td><td>William</td><td>Dyer</td></tr>\n",
-        "<tr><td>danforth</td><td>Frank</td><td>Danforth</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102353c50>"
-       ]
-      }
-     ],
-     "prompt_number": 6
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "(And if we want to make it clear that we're sorting in ascending order,\n",
-      "we can use `asc` instead of `desc`.)\n",
-      "  \n",
-      "We can also sort on several fields at once.\n",
-      "For example,\n",
-      "this query sorts results first in ascending order by `taken`,\n",
-      "and then in descending order by `person`\n",
-      "within each group of equal `taken` values:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select taken, person from Survey order by taken asc, person desc;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>619</td><td>dyer</td></tr>\n",
-        "<tr><td>619</td><td>dyer</td></tr>\n",
-        "<tr><td>622</td><td>dyer</td></tr>\n",
-        "<tr><td>622</td><td>dyer</td></tr>\n",
-        "<tr><td>734</td><td>pb</td></tr>\n",
-        "<tr><td>734</td><td>pb</td></tr>\n",
-        "<tr><td>734</td><td>lake</td></tr>\n",
-        "<tr><td>735</td><td>pb</td></tr>\n",
-        "<tr><td>735</td><td>None</td></tr>\n",
-        "<tr><td>735</td><td>None</td></tr>\n",
-        "<tr><td>751</td><td>pb</td></tr>\n",
-        "<tr><td>751</td><td>pb</td></tr>\n",
-        "<tr><td>751</td><td>lake</td></tr>\n",
-        "<tr><td>752</td><td>roe</td></tr>\n",
-        "<tr><td>752</td><td>lake</td></tr>\n",
-        "<tr><td>752</td><td>lake</td></tr>\n",
-        "<tr><td>752</td><td>lake</td></tr>\n",
-        "<tr><td>837</td><td>roe</td></tr>\n",
-        "<tr><td>837</td><td>lake</td></tr>\n",
-        "<tr><td>837</td><td>lake</td></tr>\n",
-        "<tr><td>844</td><td>roe</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x1023557d0>"
-       ]
-      }
-     ],
-     "prompt_number": 7
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "This is easier to understand if we also remove duplicates:"
-     ]
-    },
-    {
-     "cell_type": "code",
-     "collapsed": false,
-     "input": [
-      "%%sqlite survey.db\n",
-      "select distinct taken, person from Survey order by taken asc, person desc;"
-     ],
-     "language": "python",
-     "metadata": {},
-     "outputs": [
-      {
-       "html": [
-        "<table>\n",
-        "<tr><td>619</td><td>dyer</td></tr>\n",
-        "<tr><td>622</td><td>dyer</td></tr>\n",
-        "<tr><td>734</td><td>pb</td></tr>\n",
-        "<tr><td>734</td><td>lake</td></tr>\n",
-        "<tr><td>735</td><td>pb</td></tr>\n",
-        "<tr><td>735</td><td>None</td></tr>\n",
-        "<tr><td>751</td><td>pb</td></tr>\n",
-        "<tr><td>751</td><td>lake</td></tr>\n",
-        "<tr><td>752</td><td>roe</td></tr>\n",
-        "<tr><td>752</td><td>lake</td></tr>\n",
-        "<tr><td>837</td><td>roe</td></tr>\n",
-        "<tr><td>837</td><td>lake</td></tr>\n",
-        "<tr><td>844</td><td>roe</td></tr>\n",
-        "</table>"
-       ],
-       "metadata": {},
-       "output_type": "display_data",
-       "text": [
-        "<IPython.core.display.HTML at 0x102353b10>"
-       ]
-      }
-     ],
-     "prompt_number": 8
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {},
-     "source": [
-      "#### Challenges\n",
-      "\n",
-      "1.  Write a query that returns the distinct dates in the `Visited` table.\n",
-      "\n",
-      "2.  Write a query that displays the full names of the scientists in the `Person` table, ordered by family name."
-     ]
-    },
-    {
-     "cell_type": "markdown",
-     "metadata": {
-      "cell_tags": [
-       "keypoints"
-      ]
-     },
-     "source": [
-      "#### Key Points\n",
-      "\n",
-      "*   The records in a database table are not intrinsically ordered:\n",
-      "    if we want to display them in some order,\n",
-      "    we must specify that explicitly.\n",
-      "*   The values in a database are not guaranteed to be unique:\n",
-      "    if we want to eliminate duplicates,\n",
-      "    we must specify that explicitly as well."
-     ]
-    }
-   ],
-   "metadata": {}
-  }
- ]
-}
\ No newline at end of file