import logging
from StringIO import StringIO
+import time
import urllib2
from lxml import etree
class Grabber (object):
"Base ckass for website-specific quote scrapers."
- def __init__(self, url):
+ def __init__(self, url, sleep=1):
self._url = url
+ self._sleep_seconds = sleep
def quote(self, ticker):
"Floating point quote for the given `ticker` symbol string."
info = f.info()
html = f.read()
f.close()
+ time.sleep(self._sleep_seconds)
return (info, html)
def _parse_html(self, html):
class GoogleGrabber (Grabber):
- "Grab quotes from Google Finance."
+ """Grab quotes from Google Finance.
+
+ From Google's `Terms of Service`_:
+
+ 5.3 You agree not to access (or attempt to access) any of the
+ Services by any means other than through the interface that is
+ provided by Google, unless you have been specifically allowed to
+ do so in a separate agreement with Google. You specifically
+ agree not to access (or attempt to access) any of the Services
+ through any automated means (including use of scripts or web
+ crawlers) and shall ensure that you comply with the instructions
+ set out in any robots.txt file present on the Services.
+
+ However, I think the distinction between "browser", which Google
+ clearly does allow, and "script run interactively from the command
+ line" is pretty blurry.
+
+ .. _Terms of Service: http://www.google.com/accounts/TOS?loc=us
+ """
def __init__(self):
super(GoogleGrabber, self).__init__(
url='http://www.google.com/finance?q=%s')
class YahooGrabber (Grabber):
- "Grab quotes from Yahoo! Finance."
+ """Grab quotes from Yahoo! Finance.
+
+ Yahoo's `Terms of Service`_ don't seem to have any explicitly
+ relevant terms.
+
+ .. _Terms of Service:
+ http://info.yahoo.com/legal/us/yahoo/utos/utos-173.html
+ """
def __init__(self):
super(YahooGrabber, self).__init__(
url='http://finance.yahoo.com/q?s=%s')
p.add_option('-v', '--verbose', dest='verbose', default=0, action='count',
help='increment verbosity')
grabbers = sorted(GRABBERS.keys())
- p.add_option('-g', '--grabber', dest='grabber', default='google',
+ p.add_option('-g', '--grabber', dest='grabber', default='yahoo',
type='choice', choices=grabbers,
help='select grabber from %s (%%default)' % grabbers)