scrape.nasdaq: Add logging for easier debugging
authorW. Trevor King <wking@tremily.us>
Sat, 16 Feb 2013 02:28:14 +0000 (21:28 -0500)
committerW. Trevor King <wking@tremily.us>
Sat, 16 Feb 2013 02:28:52 +0000 (21:28 -0500)
If things fail silently, it's hard to know what went wrong.

insider/scrape/nasdaq.py

index 0f49e87ac8ac55fa3690519450fc0371ba501ea0..4ee45afcc20a36e88a296885c2b3ce9528c12291 100644 (file)
@@ -3,6 +3,7 @@
 
 import datetime
 import decimal
+import logging
 import urllib2
 
 from bs4 import BeautifulSoup
@@ -10,6 +11,10 @@ from bs4 import BeautifulSoup
 from insider.models import add_transaction
 
 
+LOG = logging.getLogger('insider.scrape.nasdaq')
+LOG.addHandler(logging.StreamHandler())
+LOG.setLevel(logging.INFO)
+
 CODE = {
     'person': {
         # Convert NASDAQ's people (generaly LAST FIRST[ MIDDLE]) to
@@ -48,6 +53,7 @@ def get_trades(ticker):
         data = {'exchange': 'UNKNOWN', 'exchange_symbol': exchange_symbol,
                 'company': 'UNKNOWN', 'company_symbol': company_symbol,
                 'source': url}
+        LOG.debug('scraping a new row: {}'.format(row))
         for i,(field,class_) in enumerate([
                 (None, None),  # first column is blank
                 ('person', 'Holddata'),
@@ -62,14 +68,20 @@ def get_trades(ticker):
                 ('', None)
                 ]):
             if i >= len(tds):
+                LOG.debug('not enough columns in the row ({} < {})'.format(
+                        len(tds), i))
                 break # not enough columns in this row
             if field == None:
+                LOG.debug('nothing interesting in column {}'.format(field))
                 continue  # nothing interesting in this field
             elif field == '':
                 line_match = True
+                LOG.debug('row matched')
                 break  # we made it to the end of the list
             classes = dict(tds[i].attrs).get('class', [])
             if class_ and class_ not in classes:
+                LOG.debug('wrong class ({}, expected {})'.format(
+                        classes, class_))
                 break  # wrong class
             value = tds[i].text
             if class_ == 'Holdnum':  # decode numerical values
@@ -86,7 +98,7 @@ def get_trades(ticker):
                         last,first_plus = value.title().split(' ', 1)
                         value = ' '.join((first_plus, last))
                     else:
-                        print('unknown code {} for {} field\n{}'.format(
+                        LOG.error('unknown code {} for {} field\n{}'.format(
                                 value, field, url))
                         raise
             elif field == 'date':
@@ -112,14 +124,11 @@ def get_trades(ticker):
 if __name__ == '__main__':
     import sys
 
-    verbose = False
-
     for ticker in sys.argv[1:]:
         for trade in get_trades(ticker=ticker):
-            if verbose:
+            for field,value in sorted(trade.items()):
                 # display the trade we're looking at
-                for field,value in sorted(trade.items()):
-                    print('{}\t{}'.format(field, value))
+                LOG.info('{}\t{}'.format(field, value))
 
             add_transaction(
                 person=trade['person'],
@@ -132,6 +141,3 @@ if __name__ == '__main__':
                 value=trade['value'],
                 source=trade['source'],
                 )
-
-            if verbose:
-                print('')