From: W. Trevor King Date: Sat, 16 Feb 2013 02:28:14 +0000 (-0500) Subject: scrape.nasdaq: Add logging for easier debugging X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=90ff51c9222f41561b65e952f21770bf5b6524e5;p=insider.git scrape.nasdaq: Add logging for easier debugging If things fail silently, it's hard to know what went wrong. --- diff --git a/insider/scrape/nasdaq.py b/insider/scrape/nasdaq.py index 0f49e87..4ee45af 100644 --- a/insider/scrape/nasdaq.py +++ b/insider/scrape/nasdaq.py @@ -3,6 +3,7 @@ import datetime import decimal +import logging import urllib2 from bs4 import BeautifulSoup @@ -10,6 +11,10 @@ from bs4 import BeautifulSoup from insider.models import add_transaction +LOG = logging.getLogger('insider.scrape.nasdaq') +LOG.addHandler(logging.StreamHandler()) +LOG.setLevel(logging.INFO) + CODE = { 'person': { # Convert NASDAQ's people (generaly LAST FIRST[ MIDDLE]) to @@ -48,6 +53,7 @@ def get_trades(ticker): data = {'exchange': 'UNKNOWN', 'exchange_symbol': exchange_symbol, 'company': 'UNKNOWN', 'company_symbol': company_symbol, 'source': url} + LOG.debug('scraping a new row: {}'.format(row)) for i,(field,class_) in enumerate([ (None, None), # first column is blank ('person', 'Holddata'), @@ -62,14 +68,20 @@ def get_trades(ticker): ('', None) ]): if i >= len(tds): + LOG.debug('not enough columns in the row ({} < {})'.format( + len(tds), i)) break # not enough columns in this row if field == None: + LOG.debug('nothing interesting in column {}'.format(field)) continue # nothing interesting in this field elif field == '': line_match = True + LOG.debug('row matched') break # we made it to the end of the list classes = dict(tds[i].attrs).get('class', []) if class_ and class_ not in classes: + LOG.debug('wrong class ({}, expected {})'.format( + classes, class_)) break # wrong class value = tds[i].text if class_ == 'Holdnum': # decode numerical values @@ -86,7 +98,7 @@ def get_trades(ticker): last,first_plus = value.title().split(' ', 1) value = ' '.join((first_plus, last)) else: - print('unknown code {} for {} field\n{}'.format( + LOG.error('unknown code {} for {} field\n{}'.format( value, field, url)) raise elif field == 'date': @@ -112,14 +124,11 @@ def get_trades(ticker): if __name__ == '__main__': import sys - verbose = False - for ticker in sys.argv[1:]: for trade in get_trades(ticker=ticker): - if verbose: + for field,value in sorted(trade.items()): # display the trade we're looking at - for field,value in sorted(trade.items()): - print('{}\t{}'.format(field, value)) + LOG.info('{}\t{}'.format(field, value)) add_transaction( person=trade['person'], @@ -132,6 +141,3 @@ if __name__ == '__main__': value=trade['value'], source=trade['source'], ) - - if verbose: - print('')