scrape.nasdaq: Convert to BeautifulSoup v4
[insider.git] / insider / scrape / nasdaq.py
1 """Scrape insider trade information from `holdings.nasdaq.com`
2 """
3
4 import datetime
5 import decimal
6 import urllib2
7
8 from bs4 import BeautifulSoup
9
10 from insider.models import add_transaction
11
12
13 CODE = {
14     'person': {
15         # Convert NASDAQ's people (generaly LAST FIRST[ MIDDLE]) to
16         # Person.name.  This is for hardcoded overrides when the
17         # default algorithm fails.
18         },
19     'relation': {'OFF': 'officer', 'DIR': 'Director'},
20     'transaction': {
21         '': None,
22         'AS': 'Automatic Sell',
23         'AB': 'Automatic Buy',
24         'JS': 'Disposition (Non Open Market)',
25         'JB': 'Acquisition (Non Open Market)',
26         'OE': 'Option Exercise',
27         'S': 'Sell',
28         'B': 'Buy',
29         },
30     'ownership': {
31         'D': 'direct',
32         'IN': 'indirect',
33         },
34     }
35
36
37 def get_trades(ticker):
38     exchange_symbol,company_symbol = ticker.split(':')
39     url = 'http://holdings.nasdaq.com/asp/Form4.asp?selected={}'.format(
40         company_symbol)
41     html = urllib2.urlopen(url).read()
42     soup = BeautifulSoup(html)
43     table = soup('table', {'class': 'holdings', 'width': '100%'})[1]
44     for row in table('tr'):
45         # extract data values
46         tds = row('td')
47         line_match = False
48         data = {'exchange': 'UNKNOWN', 'exchange_symbol': exchange_symbol,
49                 'company': 'UNKNOWN', 'company_symbol': company_symbol,
50                 'source': url}
51         for i,(field,class_) in enumerate([
52                 (None, None),  # first column is blank
53                 ('person', 'Holddata'),
54                 ('relation', 'Holddata'),
55                 ('date', 'date'),
56                 ('form', 'Holddata'),
57                 ('transaction', 'Holddata'),
58                 ('ownership', 'Holddata'),
59                 ('shares traded', 'Holdnum'),
60                 ('last price', 'Holdnum'),
61                 ('shares held', 'Holdnum'),
62                 ('', None)
63                 ]):
64             if i >= len(tds):
65                 break # not enough columns in this row
66             if field == None:
67                 continue  # nothing interesting in this field
68             elif field == '':
69                 line_match = True
70                 break  # we made it to the end of the list
71             classes = dict(tds[i].attrs).get('class', [])
72             if class_ and class_ not in classes:
73                 break  # wrong class
74             value = tds[i].text
75             if class_ == 'Holdnum':  # decode numerical values
76                 if value == '-':
77                     value = None
78                 else:
79                     value = float(value.strip('$()').replace(',', ''))
80             elif field in CODE:  # decode abbreviated values
81                 code = CODE[field]
82                 try:
83                     value = code[value]
84                 except KeyError:
85                     if field == 'person':  # fall back to default name
86                         last,first_plus = value.title().split(' ', 1)
87                         value = ' '.join((first_plus, last))
88                     else:
89                         print('unknown code {} for {} field\n{}'.format(
90                                 value, field, url))
91                         raise
92             elif field == 'date':
93                 value = datetime.datetime.strptime(value, '%m/%d/%Y')
94             data[field] = value
95         if not line_match:
96             continue
97         if data['transaction'] and 'Non Open Market' in data['transaction']:
98             data['exchange'] = 'non-open market'
99             data['exchange_symbol'] = '-'
100         if data['shares traded'] and data['transaction']:
101             for key in ['sell', 'option', 'disposition']:
102                 if key in data['transaction'].lower():
103                     data['shares traded'] *= -1  # - for selling
104         # estimate total price
105         price = data['last price'] or 0
106         shares = data['shares traded'] or 0
107         value = decimal.Decimal(price * shares)
108         data['value'] = value.quantize(decimal.Decimal('.01'))
109         yield data
110
111
112 if __name__ == '__main__':
113     import sys
114
115     verbose = False
116
117     for ticker in sys.argv[1:]:
118         for trade in get_trades(ticker=ticker):
119             if verbose:
120                 # display the trade we're looking at
121                 for field,value in sorted(trade.items()):
122                     print('{}\t{}'.format(field, value))
123
124             add_transaction(
125                 person=trade['person'],
126                 date=trade['date'],
127                 exchange=trade['exchange'],
128                 exchange_symbol=trade['exchange_symbol'],
129                 company=trade['company'],
130                 company_symbol=trade['company_symbol'],
131                 shares=trade['shares traded'],
132                 value=trade['value'],
133                 source=trade['source'],
134                 )
135
136             if verbose:
137                 print('')