$ PYTHONPATH=".:$PYTHONPATH" python example/manage.py runserver
+Scraping
+========
+
+Entering transaction data by hand can be tedious and error prone. To
+automate the task, you should write scrapers to look up and enter
+transaction data automatically. To get you started, I've written
+`insider/scrape/nasdaq.py`, which scrapes `NASDAQ's interface`__ to
+`EDGAR`_\'s data. Use the scraper with something like::
+
+ $ export PYTHONPATH='.'
+ $ export DJANGO_SETTINGS_MODULE='example.settings'
+ $ python insider/scrape/nasdaq.py NYSE:RHT NASDAQ:GOOG
+
+__ NASDAQ_
+
Hacking
=======
.. _dt2-docs: http://django-tables2.readthedocs.org/en/latest/
.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
.. _Django documentation: https://docs.djangoproject.com/
+.. _NASDAQ: http://www.nasdaq.com/reference/ownership.stm
+.. _EDGAR: http://www.edgar-online.com/
.. _Django tutorial: https://docs.djangoproject.com/en/1.3/intro/tutorial01/
--- /dev/null
+"""Scrape insider trade information from `holdings.nasdaq.com`
+"""
+
+import datetime
+import decimal
+import urllib2
+
+from BeautifulSoup import BeautifulSoup
+
+from insider.models import add_transaction
+
+
+CODE = {
+ 'person': {
+ # Convert NASDAQ's people (generaly LAST FIRST[ MIDDLE]) to
+ # Person.name. This is for hardcoded overrides when the
+ # default algorithm fails.
+ },
+ 'relation': {'OFF': 'officer', 'DIR': 'Director'},
+ 'transaction': {
+ '': None,
+ 'AS': 'Automatic Sell',
+ 'AB': 'Automatic Buy',
+ 'JS': 'Disposition (Non Open Market)',
+ 'JB': 'Acquisition (Non Open Market)',
+ 'OE': 'Option Exercise',
+ 'S': 'Sell',
+ 'B': 'Buy',
+ },
+ 'ownership': {
+ 'D': 'direct',
+ 'IN': 'indirect',
+ },
+ }
+
+
+def get_trades(ticker):
+ exchange_symbol,company_symbol = ticker.split(':')
+ url = 'http://holdings.nasdaq.com/asp/Form4.asp?selected={}'.format(
+ company_symbol)
+ html = urllib2.urlopen(url).read()
+ soup = BeautifulSoup(html)
+ table = soup('table', {'class': 'holdings', 'width': '100%'})[1]
+ for row in table('tr'):
+ # extract data values
+ tds = row('td')
+ line_match = False
+ data = {'exchange': 'UNKNOWN', 'exchange_symbol': exchange_symbol,
+ 'company': 'UNKNOWN', 'company_symbol': company_symbol,
+ 'source': url}
+ for i,(field,class_) in enumerate([
+ (None, None), # first column is blank
+ ('person', 'Holddata'),
+ ('relation', 'Holddata'),
+ ('date', 'date'),
+ ('form', 'Holddata'),
+ ('transaction', 'Holddata'),
+ ('ownership', 'Holddata'),
+ ('shares traded', 'Holdnum'),
+ ('last price', 'Holdnum'),
+ ('shares held', 'Holdnum'),
+ ('', None)
+ ]):
+ if i >= len(tds):
+ break # not enough columns in this row
+ if field == None:
+ continue # nothing interesting in this field
+ elif field == '':
+ line_match = True
+ break # we made it to the end of the list
+ if dict(tds[i].attrs).get('class', None) != class_:
+ break # wrong class
+ value = tds[i].text
+ if class_ == 'Holdnum': # decode numerical values
+ if value == '-':
+ value = None
+ else:
+ value = float(value.strip('$()').replace(',', ''))
+ elif field in CODE: # decode abbreviated values
+ code = CODE[field]
+ try:
+ value = code[value]
+ except KeyError:
+ if field == 'person': # fall back to default name
+ last,first_plus = value.title().split(' ', 1)
+ value = ' '.join((first_plus, last))
+ else:
+ print('unknown code {} for {} field\n{}'.format(
+ value, field, url))
+ raise
+ elif field == 'date':
+ value = datetime.datetime.strptime(value, '%m/%d/%Y')
+ data[field] = value
+ if not line_match:
+ continue
+ if data['transaction'] and 'Non Open Market' in data['transaction']:
+ data['exchange'] = 'non-open market'
+ data['exchange_symbol'] = '-'
+ if data['shares traded'] and data['transaction']:
+ for key in ['sell', 'option', 'disposition']:
+ if key in data['transaction'].lower():
+ data['shares traded'] *= -1 # - for selling
+ # estimate total price
+ price = data['last price'] or 0
+ shares = data['shares traded'] or 0
+ value = decimal.Decimal(price * shares)
+ data['value'] = value.quantize(decimal.Decimal('.01'))
+ yield data
+
+
+if __name__ == '__main__':
+ import sys
+
+ verbose = False
+
+ for ticker in sys.argv[1:]:
+ for trade in get_trades(ticker=ticker):
+ if verbose:
+ # display the trade we're looking at
+ for field,value in sorted(trade.items()):
+ print('{}\t{}'.format(field, value))
+
+ add_transaction(
+ person=trade['person'],
+ date=trade['date'],
+ exchange=trade['exchange'],
+ exchange_symbol=trade['exchange_symbol'],
+ company=trade['company'],
+ company_symbol=trade['company_symbol'],
+ shares=trade['shares traded'],
+ value=trade['value'],
+ source=trade['source'],
+ )
+
+ if verbose:
+ print('')