From e6674e6d296923b6f7d6a7a3a6bed681a8ec403b Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Tue, 28 Dec 2010 17:16:35 -0500 Subject: [PATCH] Comment on Terms of Service in ticker scraping script. --- posts/ticker/ticker.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/posts/ticker/ticker.py b/posts/ticker/ticker.py index a175fc5..f96aadd 100755 --- a/posts/ticker/ticker.py +++ b/posts/ticker/ticker.py @@ -19,6 +19,7 @@ import logging from StringIO import StringIO +import time import urllib2 from lxml import etree @@ -26,8 +27,9 @@ from lxml import etree class Grabber (object): "Base ckass for website-specific quote scrapers." - def __init__(self, url): + def __init__(self, url, sleep=1): self._url = url + self._sleep_seconds = sleep def quote(self, ticker): "Floating point quote for the given `ticker` symbol string." @@ -48,6 +50,7 @@ class Grabber (object): info = f.info() html = f.read() f.close() + time.sleep(self._sleep_seconds) return (info, html) def _parse_html(self, html): @@ -59,7 +62,25 @@ class Grabber (object): class GoogleGrabber (Grabber): - "Grab quotes from Google Finance." + """Grab quotes from Google Finance. + + From Google's `Terms of Service`_: + + 5.3 You agree not to access (or attempt to access) any of the + Services by any means other than through the interface that is + provided by Google, unless you have been specifically allowed to + do so in a separate agreement with Google. You specifically + agree not to access (or attempt to access) any of the Services + through any automated means (including use of scripts or web + crawlers) and shall ensure that you comply with the instructions + set out in any robots.txt file present on the Services. + + However, I think the distinction between "browser", which Google + clearly does allow, and "script run interactively from the command + line" is pretty blurry. + + .. _Terms of Service: http://www.google.com/accounts/TOS?loc=us + """ def __init__(self): super(GoogleGrabber, self).__init__( url='http://www.google.com/finance?q=%s') @@ -82,7 +103,14 @@ class GoogleGrabber (Grabber): class YahooGrabber (Grabber): - "Grab quotes from Yahoo! Finance." + """Grab quotes from Yahoo! Finance. + + Yahoo's `Terms of Service`_ don't seem to have any explicitly + relevant terms. + + .. _Terms of Service: + http://info.yahoo.com/legal/us/yahoo/utos/utos-173.html + """ def __init__(self): super(YahooGrabber, self).__init__( url='http://finance.yahoo.com/q?s=%s') @@ -147,7 +175,7 @@ if __name__ == '__main__': p.add_option('-v', '--verbose', dest='verbose', default=0, action='count', help='increment verbosity') grabbers = sorted(GRABBERS.keys()) - p.add_option('-g', '--grabber', dest='grabber', default='google', + p.add_option('-g', '--grabber', dest='grabber', default='yahoo', type='choice', choices=grabbers, help='select grabber from %s (%%default)' % grabbers) -- 2.26.2