124e423785873f2cc3b3fb4beab87db9b8802f72
[rss2email.git] / rss2email.py
1 #!/usr/bin/python
2 """rss2email: get RSS feeds emailed to you
3 http://www.aaronsw.com/2002/rss2email
4
5 Usage: python rss2email.py feedfile action [options]
6   feedfile: name of the file to store feed info in
7   action [options]:
8         new [youremail] (create new feedfile)
9         email [yournewemail] (update default email)
10         run [--no-send]
11         add feedurl [youremail]
12         list
13         delete n
14 """
15 __version__ = "2.3"
16 __author__ = "Aaron Swartz (me@aaronsw.com)"
17 __copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2."
18 ___contributors__ = ["Dean Jackson (dino@grorg.org)", 
19                                          "Brian Lalor (blalor@ithacabands.org)",
20                                          "Joey Hess", 'Matej Cepl']
21
22 ### Vaguely Customizable Options ###
23
24 # The email address messages are from by default:
25 DEFAULT_FROM = "bozo@dev.null"
26
27 # 1: Only use the DEFAULT_FROM address.
28 # 0: Use the email address specified by the feed, when possible.
29 FORCE_FROM = 0
30
31 # 1: Receive one email per post
32 # 0: Receive an email every time a post changes
33 TRUST_GUID = 1
34
35 # 1: Generate Date header based on item's date, when possible
36 # 0: Generate Date header based on time sent
37 DATE_HEADER = 0
38
39 # 1: Treat the contents of <description> as HTML
40 # 0: Send the contents of <description> as is, without conversion
41 TREAT_DESCRIPTION_AS_HTML = 1
42
43 # 1: Apply Q-P conversion (required for some MUAs)
44 # 0: Send message in 8-bits
45 # http://cr.yp.to/smtp/8bitmime.html
46 QP_REQUIRED = 0
47
48 # 1: Name feeds as they're being processed.
49 # 0: Keep quiet.
50 VERBOSE = 0
51
52 def send(fr, to, message):
53         i, o = os.popen2(["/usr/sbin/sendmail", to])
54         i.write(message)
55         i.close(); o.close()
56         del i, o
57         
58 # def send(fr, to, message):
59 #       import smtplib
60 #       s = smtplib.SMTP("vorpal.notabug.com:26")
61 #       s.sendmail(fr, [to], message)
62
63 ### End of Options ###
64
65 # Read options from config file if present.
66 import sys
67 sys.path.append(".")
68 try:
69         from config import *
70 except:
71         pass
72
73 from html2text import html2text, expandEntities
74 import feedparser
75 import cPickle as pickle, fcntl, md5, time, os, traceback
76 if QP_REQUIRED: import mimify; from StringIO import StringIO as SIO
77 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
78
79 def e(obj, val, ee=1):
80         x = obj[val]
81         if ee: x = expandEntities(x)
82         if type(x) is unicode: x = x.encode('utf-8')
83         return x.strip()
84
85 def quoteEmailName(s):
86         return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"'
87
88 def getContent(item, url):
89         if item.has_key('content') and item['content']:
90                 for c in item['content']:
91                         if c['type'] == 'text/plain': return e(c, 'value')
92
93                 for c in item['content']:
94                         if c['type'].find('html') != -1:
95                                 return html2text(e(c, 'value', ee=0), c['base'])
96                 
97                 return e(item['content'][0], 'value')
98                         
99         if item.has_key('description'): 
100                 if TREAT_DESCRIPTION_AS_HTML:
101                         return html2text(e(item, 'description', ee=0), url)
102                 else:
103                         return e(item, 'description')
104         
105         if item.has_key('summary'): return e(item, 'summary')
106         return ""
107
108 def getID(item, content):
109         if TRUST_GUID:
110                 if item.has_key('id') and item['id']: return e(item, 'id')
111
112         if content: return md5.new(content).hexdigest()
113         if item.has_key('link'): return e(item, 'link')
114         if item.has_key('title'): return md5.new(e(item, 'title')).hexdigest()
115
116 class Feed:
117         def __init__(self, url, to):
118                 self.url, self.etag, self.modified, self.seen = url, None, None, {}
119                 self.to = to            
120
121 def load(lock=1):
122         ff2 = open(feedfile, 'r')
123         feeds = pickle.load(ff2)
124         if lock:
125                 fcntl.flock(ff2, fcntl.LOCK_EX)
126                 #HACK: to deal with lock caching
127                 ff2 = open(feedfile, 'r')
128                 feeds = pickle.load(ff2)
129                 fcntl.flock(ff2, fcntl.LOCK_EX)
130
131         return feeds, ff2
132
133 def unlock(feeds, ff2):
134         pickle.dump(feeds, open(feedfile+'.tmp', 'w'))
135         os.rename(feedfile+'.tmp', feedfile)
136         fcntl.flock(ff2, fcntl.LOCK_UN)
137         
138 def add(url, to=None):
139         feeds, ff2 = load()
140         if not isstr(feeds[0]) and to is None:
141                 raise 'NoEmail', "Run `email newaddr` or `add url addr`."
142         feeds.append(Feed(url, to))
143         unlock(feeds, ff2)
144
145 def run():
146         feeds, ff2 = load()
147         try:
148                 if isstr(feeds[0]): default_to = feeds[0]; ifeeds = feeds[1:]
149                 else: ifeeds = feeds
150                 
151                 for f in ifeeds:
152                         try: 
153                                 if VERBOSE: print "Processing", f.url
154                                 result = feedparser.parse(f.url, f.etag, f.modified)
155                                 
156                                 if result.has_key('status') and result['status'] == 301: f.url = result['url']
157                                 
158                                 if result.has_key('encoding'): enc = result['encoding']
159                                 else: enc = 'utf-8'
160                                 
161                                 c, ert = result['channel'], 'errorreportsto'
162                                 
163                                 headers = "From: "
164                                 if c.has_key('title'): headers += quoteEmailName(e(c, 'title')) + ' '
165                                 if FORCE_FROM and c.has_key(ert) and c[ert].startswith('mailto:'):
166                                         fr = c[ert][7:]
167                                 else:
168                                         fr = DEFAULT_FROM
169                                 
170                                 headers += '<'+fr+'>'
171                                                 
172                                 headers += "\nTo: " + (f.to or default_to) # set a default email!
173                                 if not QP_REQUIRED:
174                                         headers += '\nContent-Type: text/plain; charset="' + enc + '"'
175                                 
176                                 if not result['items'] and ((not result.has_key('status') or (result.has_key('status') and result['status'] != 304))):
177                                         print "W: no items; invalid feed? (" + f.url + ")"
178                                         continue
179                         
180                                 for i in result['items']:
181                                         content = getContent(i, f.url)
182                                         id = getID(i, content)
183                                 
184                                         if i.has_key('link') and i['link']: link = e(i, 'link')
185                                         else: link = None
186                                         
187                                         if i.has_key('id') and i['id']: frameid = e(i, 'id')
188                                         else: frameid = id
189                                         
190                                         if f.seen.has_key(frameid) and f.seen[frameid] == id:
191                                                 continue # have seen
192                         
193                                         if i.has_key('title'): title = e(i, 'title')
194                                         else: title = content[:70].replace("\n", " ")
195                                         
196                                         if DATE_HEADER and i.has_key('date_parsed'):
197                                                 datetime = i['date_parsed']     
198                                         else:
199                                                 datetime = time.gmtime()
200                                         
201                                         message = (headers
202                                                            + "\nSubject: " + title
203                                                            + "\nDate: " + time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
204                                                            + "\nUser-Agent: rss2email"
205                                                            + "\n")
206                                         
207                                         message += "\n" + content.strip() + "\n"
208                                         
209                                         if link: message += "\nURL: " + link + "\n"
210                                         
211                                         if QP_REQUIRED:
212                                                 mimify.CHARSET = enc
213                                                 ins, outs = SIO(message), SIO()
214                                                 mimify.mimify(ins, outs); outs.seek(0)
215                                                 message = outs.read()
216                                         
217                                         send(fr, (f.to or default_to), message)
218                         
219                                         f.seen[frameid] = id
220                                         
221                                 f.etag, f.modified = result.get('etag', None), result.get('modified', None)
222                         except:
223                                 print "E: could not parse", f.url
224                                 traceback.print_exc()
225                                 continue
226
227         finally:                
228                 unlock(feeds, ff2)
229
230 def list():
231         feeds, ff2 = load(lock=0)
232         
233         if isstr(feeds[0]):
234                 default_to = feeds[0]; ifeeds = feeds[1:]; i=1
235                 print "default email:", default_to
236         else: ifeeds = feeds; i = 0
237         for f in ifeeds:
238                 print `i`+':', f.url, '('+(f.to or ('default: '+default_to))+')'
239                 i+= 1
240
241 def delete(n):
242         feeds, ff2 = load()
243         feeds = feeds[:n] + feeds[n+1:]
244         unlock(feeds, ff2)
245         
246 def email(addr):
247         feeds, ff2 = load()
248         if isstr(feeds[0]): feeds[0] = addr
249         else: feeds = [addr] + feeds
250         unlock(feeds, ff2)
251
252 if __name__ == '__main__':
253         ie, args = "InputError", sys.argv
254         try:
255                 if len(args) < 3: raise ie, "insufficient args"
256                 feedfile, action, args = args[1], args[2], args[3:]
257                 
258                 if action == "run": 
259                         if args and args[0] == "--no-send":
260                                 def send(x,y,z):
261                                         if VERBOSE: print 'Not sending', (
262                                         [x for x in z.splitlines() if x.startswith("Subject:")][0])
263                         run()
264
265                 elif action == "email":
266                         email(args[0])
267                         print "W: Feed IDs may have changed. Run `list` before `delete`."
268
269                 elif action == "add": add(*args)
270
271                 elif action == "new": 
272                         if len(args) == 1: d = [args[0]]
273                         else: d = []
274                         pickle.dump(d, open(feedfile, 'w'))
275
276                 elif action == "list": list()
277
278                 elif action == "delete": delete(int(args[0]))
279
280                 else:
281                         raise ie, "invalid action"
282                         
283         except ie, e:
284                 print "E:", e
285                 print
286                 print __doc__