Add nasdaq.py scraper and update README.
[insider.git] / insider / scrape / nasdaq.py
1 """Scrape insider trade information from `holdings.nasdaq.com`
2 """
3
4 import datetime
5 import decimal
6 import urllib2
7
8 from BeautifulSoup import BeautifulSoup
9
10 from insider.models import add_transaction
11
12
13 CODE = {
14     'person': {
15         # Convert NASDAQ's people (generaly LAST FIRST[ MIDDLE]) to
16         # Person.name.  This is for hardcoded overrides when the
17         # default algorithm fails.
18         },
19     'relation': {'OFF': 'officer', 'DIR': 'Director'},
20     'transaction': {
21         '': None,
22         'AS': 'Automatic Sell',
23         'AB': 'Automatic Buy',
24         'JS': 'Disposition (Non Open Market)',
25         'JB': 'Acquisition (Non Open Market)',
26         'OE': 'Option Exercise',
27         'S': 'Sell',
28         'B': 'Buy',
29         },
30     'ownership': {
31         'D': 'direct',
32         'IN': 'indirect',
33         },
34     }
35
36
37 def get_trades(ticker):
38     exchange_symbol,company_symbol = ticker.split(':')
39     url = 'http://holdings.nasdaq.com/asp/Form4.asp?selected={}'.format(
40         company_symbol)
41     html = urllib2.urlopen(url).read()
42     soup = BeautifulSoup(html)
43     table = soup('table', {'class': 'holdings', 'width': '100%'})[1]
44     for row in table('tr'):
45         # extract data values
46         tds = row('td')
47         line_match = False
48         data = {'exchange': 'UNKNOWN', 'exchange_symbol': exchange_symbol,
49                 'company': 'UNKNOWN', 'company_symbol': company_symbol,
50                 'source': url}
51         for i,(field,class_) in enumerate([
52                 (None, None),  # first column is blank
53                 ('person', 'Holddata'),
54                 ('relation', 'Holddata'),
55                 ('date', 'date'),
56                 ('form', 'Holddata'),
57                 ('transaction', 'Holddata'),
58                 ('ownership', 'Holddata'),
59                 ('shares traded', 'Holdnum'),
60                 ('last price', 'Holdnum'),
61                 ('shares held', 'Holdnum'),
62                 ('', None)
63                 ]):
64             if i >= len(tds):
65                 break # not enough columns in this row
66             if field == None:
67                 continue  # nothing interesting in this field
68             elif field == '':
69                 line_match = True
70                 break  # we made it to the end of the list
71             if dict(tds[i].attrs).get('class', None) != class_:
72                 break  # wrong class
73             value = tds[i].text
74             if class_ == 'Holdnum':  # decode numerical values
75                 if value == '-':
76                     value = None
77                 else:
78                     value = float(value.strip('$()').replace(',', ''))
79             elif field in CODE:  # decode abbreviated values
80                 code = CODE[field]
81                 try:
82                     value = code[value]
83                 except KeyError:
84                     if field == 'person':  # fall back to default name
85                         last,first_plus = value.title().split(' ', 1)
86                         value = ' '.join((first_plus, last))
87                     else:
88                         print('unknown code {} for {} field\n{}'.format(
89                                 value, field, url))
90                         raise
91             elif field == 'date':
92                 value = datetime.datetime.strptime(value, '%m/%d/%Y')
93             data[field] = value
94         if not line_match:
95             continue
96         if data['transaction'] and 'Non Open Market' in data['transaction']:
97             data['exchange'] = 'non-open market'
98             data['exchange_symbol'] = '-'
99         if data['shares traded'] and data['transaction']:
100             for key in ['sell', 'option', 'disposition']:
101                 if key in data['transaction'].lower():
102                     data['shares traded'] *= -1  # - for selling
103         # estimate total price
104         price = data['last price'] or 0
105         shares = data['shares traded'] or 0
106         value = decimal.Decimal(price * shares)
107         data['value'] = value.quantize(decimal.Decimal('.01'))
108         yield data
109
110
111 if __name__ == '__main__':
112     import sys
113
114     verbose = False
115
116     for ticker in sys.argv[1:]:
117         for trade in get_trades(ticker=ticker):
118             if verbose:
119                 # display the trade we're looking at
120                 for field,value in sorted(trade.items()):
121                     print('{}\t{}'.format(field, value))
122
123             add_transaction(
124                 person=trade['person'],
125                 date=trade['date'],
126                 exchange=trade['exchange'],
127                 exchange_symbol=trade['exchange_symbol'],
128                 company=trade['company'],
129                 company_symbol=trade['company_symbol'],
130                 shares=trade['shares traded'],
131                 value=trade['value'],
132                 source=trade['source'],
133                 )
134
135             if verbose:
136                 print('')