--- /dev/null
+#!/usr/bin/env python
+# Copyright (C) 2010 W. Trevor King <wking@drexel.edu>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Grab stock prices by ticker symbol.
+"""
+
+import logging
+from StringIO import StringIO
+import urllib2
+
+from lxml import etree
+
+
+class Grabber (object):
+ "Base ckass for website-specific quote scrapers."
+ def __init__(self, url):
+ self._url = url
+
+ def quote(self, ticker):
+ "Floating point quote for the given `ticker` symbol string."
+ url = self._get_url(ticker)
+ logging.info('get quote for %s from %s using %s'
+ % (ticker, url, self))
+ info,html = self._get_html(url)
+ quote = self._parse_html(html)
+ return quote
+
+ def _get_url(self, ticker):
+ "URL listing the quote for the given `ticker` symbol string."
+ return self._url % ticker
+
+ def _get_html(self, url):
+ "Page info and html associated with the given `url`."
+ f = urllib2.urlopen(url)
+ info = f.info()
+ html = f.read()
+ f.close()
+ return (info, html)
+
+ def _parse_html(self, html):
+ """Extract the floating point quote from the page's `html`.
+
+ This method must be overriden by website-specific subclasses.
+ """
+ raise NotImplementedError()
+
+
+class GoogleGrabber (Grabber):
+ "Grab quotes from Google Finance."
+ def __init__(self):
+ super(GoogleGrabber, self).__init__(
+ url='http://www.google.com/finance?q=%s')
+
+ def _parse_html(self, html):
+ """Extract quote from a snippet that looks like::
+
+ <span class="pr">
+ <span id="ref_29312_l">
+ 64.77
+ </span>
+ </span>
+ """
+ parser = etree.HTMLParser()
+ tree = etree.parse(StringIO(html), parser)
+ root = tree.getroot()
+ span = root.xpath(".//span[@class='pr']")[0]
+ text = ''.join(span.itertext()).strip()
+ return float(text)
+
+
+class YahooGrabber (Grabber):
+ "Grab quotes from Yahoo! Finance."
+ def __init__(self):
+ super(YahooGrabber, self).__init__(
+ url='http://finance.yahoo.com/q?s=%s')
+
+ def _parse_html(self, html):
+ """Extract quote from a snippet that looks like::
+
+ <tr>
+ <th ...>Last Trade:</th>
+ <td ...>
+ <big>
+ <b>
+ <span ...>
+ 64.74
+ </span>
+ </b>
+ </big>
+ </td>
+ </tr>
+
+ For the implementation, see the `LXML tutorial`_.
+
+ .. _LXML tutorial:
+ http://codespeak.net/lxml/tutorial.html#using-xpath-to-find-text
+ """
+ parser = etree.HTMLParser()
+ tree = etree.parse(StringIO(html), parser)
+ root = tree.getroot()
+ rows = root.xpath('.//tr') #[[td/text() = 'Last Trade:']")
+ for row in rows:
+ has_label = row.xpath(".//th/text() = 'Last Trade:'")
+ if has_label:
+ break
+ assert has_label, '\n---\n\n'.join([
+ etree.tostring(row, pretty_print=True) for row in rows])
+ data = row.xpath('.//td')[0]
+ text = ''.join(data.itertext()).strip()
+ return float(text)
+
+
+GRABBERS = {}
+# Create a dictionary of (name, grabber) pairs. For example
+# GRABBERS['google'] = GoogleGrabber
+for name,obj in locals().items():
+ match = False
+ try:
+ if issubclass(obj, Grabber) and obj != Grabber:
+ match = True
+ except TypeError:
+ pass
+ if match:
+ n = name[:-len('Grabber')].lower()
+ GRABBERS[n] = obj
+del name, obj, match
+
+
+if __name__ == '__main__':
+ from optparse import OptionParser
+
+ p = OptionParser(usage='%prog [options] TICKER ...')
+ p.disable_interspersed_args()
+ p.add_option('-v', '--verbose', dest='verbose', default=0, action='count',
+ help='increment verbosity')
+ grabbers = sorted(GRABBERS.keys())
+ p.add_option('-g', '--grabber', dest='grabber', default='google',
+ type='choice', choices=grabbers,
+ help='select grabber from %s (%%default)' % grabbers)
+
+ options,args = p.parse_args()
+
+ log_levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
+ log_level = log_levels[min(options.verbose, len(log_levels)-1)]
+ logging.basicConfig(level=log_level)
+
+ g = GRABBERS[options.grabber]()
+ print '\t'.join([str(g.quote(ticker)) for ticker in args])