Bump to version 2.66.
[rss2email.git] / rss2email.py
1 #!/usr/bin/python
2 """rss2email: get RSS feeds emailed to you
3 http://rss2email.infogami.com
4
5 Usage:
6   new [emailaddress] (create new feedfile)
7   email newemailaddress (update default email)
8   run [--no-send] [num]
9   add feedurl [emailaddress]
10   list
11   reset
12   delete n
13 """
14 __version__ = "2.66"
15 __author__ = "Lindsey Smith (lindsey@allthingsrss.com)"
16 __copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2 or 3."
17 ___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess", 
18                      "Matej Cepl", "Martin 'Joey' Schulze", 
19                      "Marcel Ackermann (http://www.DreamFlasher.de)", 
20                      "Lindsey Smith", "Aaron Swartz (original author)" ]
21
22 import urllib2
23 urllib2.install_opener(urllib2.build_opener())
24
25 ### Vaguely Customizable Options ###
26
27 # The email address messages are from by default:
28 DEFAULT_FROM = "bozo@dev.null.invalid"
29
30 # 1: Send text/html messages when possible.
31 # 0: Convert HTML to plain text.
32 HTML_MAIL = 0
33
34 # 1: Only use the DEFAULT_FROM address.
35 # 0: Use the email address specified by the feed, when possible.
36 FORCE_FROM = 0
37
38 # 1: Receive one email per post.
39 # 0: Receive an email every time a post changes.
40 TRUST_GUID = 1
41
42 # 1: Generate Date header based on item's date, when possible.
43 # 0: Generate Date header based on time sent.
44 DATE_HEADER = 0
45
46 # A tuple consisting of some combination of
47 # ('issued', 'created', 'modified', 'expired')
48 # expressing ordered list of preference in dates 
49 # to use for the Date header of the email.
50 DATE_HEADER_ORDER = ('modified', 'issued', 'created')
51
52 # 1: Apply Q-P conversion (required for some MUAs).
53 # 0: Send message in 8-bits.
54 # http://cr.yp.to/smtp/8bitmime.html
55 #DEPRECATED 
56 QP_REQUIRED = 0
57 #DEPRECATED 
58         
59 # 1: Name feeds as they're being processed.
60 # 0: Keep quiet.
61 VERBOSE = 0
62
63 # 1: Use the publisher's email if you can't find the author's.
64 # 0: Just use the DEFAULT_FROM email instead.
65 USE_PUBLISHER_EMAIL = 0
66
67 # 1: Use SMTP_SERVER to send mail.
68 # 0: Call /usr/sbin/sendmail to send mail.
69 SMTP_SEND = 0
70
71 SMTP_SERVER = "smtp.yourisp.net:25"
72 AUTHREQUIRED = 0 # if you need to use SMTP AUTH set to 1
73 SMTP_USER = 'username'  # for SMTP AUTH, set SMTP username here
74 SMTP_PASS = 'password'  # for SMTP AUTH, set SMTP password here
75
76 # Set this to add a bonus header to all emails (start with '\n').
77 BONUS_HEADER = ''
78 # Example: BONUS_HEADER = '\nApproved: joe@bob.org'
79
80 # Set this to override From addresses. Keys are feed URLs, values are new titles.
81 OVERRIDE_FROM = {}
82
83 # Set this to override the timeout (in seconds) for feed server response
84 FEED_TIMEOUT = 60
85
86 # Optional CSS styling
87 USE_CSS_STYLING = 0
88 STYLE_SHEET='h1 {font: 18pt Georgia, "Times New Roman";} body {font: 12pt Arial;} a:link {font: 12pt Arial; font-weight: bold; color: #0000cc} blockquote {font-family: monospace; }  .header { background: #e0ecff; border-bottom: solid 4px #c3d9ff; padding: 5px; margin-top: 0px; color: red;} .header a { font-size: 20px; text-decoration: none; } .footer { background: #c3d9ff; border-top: solid 4px #c3d9ff; padding: 5px; margin-bottom: 0px; } #entry {border: solid 4px #c3d9ff; } #body { margin-left: 5px; margin-right: 5px; }'
89
90 # If you have an HTTP Proxy set this in the format 'http://your.proxy.here:8080/'
91 PROXY=""
92
93 # To most correctly encode emails with international characters, we iterate through the list below and use the first character set that works
94 # Eventually (and theoretically) ISO-8859-1 and UTF-8 are our catch-all failsafes
95 CHARSET_LIST='US-ASCII', 'BIG5', 'ISO-2022-JP', 'ISO-8859-1', 'UTF-8'
96
97 from email.MIMEText import MIMEText
98 from email.Header import Header
99 from email.Utils import parseaddr, formataddr
100                          
101 # Note: You can also override the send function.
102
103 def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
104         """Send an email.
105         
106         All arguments should be Unicode strings (plain ASCII works as well).
107         
108         Only the real name part of sender and recipient addresses may contain
109         non-ASCII characters.
110         
111         The email will be properly MIME encoded and delivered though SMTP to
112         localhost port 25.  This is easy to change if you want something different.
113         
114         The charset of the email will be the first one out of the list
115         that can represent all the characters occurring in the email.
116         """
117
118         # Header class is smart enough to try US-ASCII, then the charset we
119         # provide, then fall back to UTF-8.
120         header_charset = 'ISO-8859-1'
121         
122         # We must choose the body charset manually
123         for body_charset in CHARSET_LIST:
124             try:
125                 body.encode(body_charset)
126             except (UnicodeError, LookupError):
127                 pass
128             else:
129                 break
130
131         # Split real name (which is optional) and email address parts
132         sender_name, sender_addr = parseaddr(sender)
133         recipient_name, recipient_addr = parseaddr(recipient)
134         
135         # We must always pass Unicode strings to Header, otherwise it will
136         # use RFC 2047 encoding even on plain ASCII strings.
137         sender_name = str(Header(unicode(sender_name), header_charset))
138         recipient_name = str(Header(unicode(recipient_name), header_charset))
139         
140         # Make sure email addresses do not contain non-ASCII characters
141         sender_addr = sender_addr.encode('ascii')
142         recipient_addr = recipient_addr.encode('ascii')
143         
144         # Create the message ('plain' stands for Content-Type: text/plain)
145         msg = MIMEText(body.encode(body_charset), contenttype, body_charset)
146         msg['To'] = formataddr((recipient_name, recipient_addr))
147         msg['Subject'] = Header(unicode(subject), header_charset)
148         for hdr in extraheaders.keys():
149                 try:
150                         msg[hdr] = Header(unicode(extraheaders[hdr], header_charset))
151                 except:
152                         msg[hdr] = Header(extraheaders[hdr])
153                 
154         fromhdr = formataddr((sender_name, sender_addr))
155         msg['From'] = fromhdr
156                 
157         msg_as_string = msg.as_string()
158 #DEPRECATED     if QP_REQUIRED:
159 #DEPRECATED             ins, outs = SIO(msg_as_string), SIO()
160 #DEPRECATED             mimify.mimify(ins, outs)
161 #DEPRECATED             msg_as_string = outs.getvalue()
162                 
163         if SMTP_SEND:
164                 if not smtpserver: 
165                         import smtplib
166                         
167                         try:
168                                 smtpserver = smtplib.SMTP(SMTP_SERVER)
169                         except KeyboardInterrupt:
170                                 raise
171                         except Exception, e:
172                                 print >>warn, ""
173                                 print >>warn, ('Fatal error: could not connect to mail server "%s"' % SMTP_SERVER)
174                                 print >>warn, ('Check your config.py file to confirm that SMTP_SERVER and other mail server settings are configured properly')
175                                 if hasattr(e, 'reason'):
176                                         print >>warn, "Reason:", e.reason
177                                 sys.exit(1)
178                                         
179                         if AUTHREQUIRED:
180                                 try:
181                                         smtpserver.ehlo()
182                                         smtpserver.starttls()
183                                         smtpserver.ehlo()
184                                         smtpserver.login(SMTP_USER, SMTP_PASS)
185                                 except KeyboardInterrupt:
186                                         raise
187                                 except Exception, e:
188                                         print >>warn, ""
189                                         print >>warn, ('Fatal error: could not authenticate with mail server "%s" as user "%s"' % (SMTP_SERVER, SMTP_USER))
190                                         print >>warn, ('Check your config.py file to confirm that SMTP_SERVER and other mail server settings are configured properly')
191                                         if hasattr(e, 'reason'):
192                                                 print >>warn, "Reason:", e.reason
193                                         sys.exit(1)
194                                         
195                 smtpserver.sendmail(sender, recipient, msg_as_string)
196                 return smtpserver
197
198         else:
199                 try:
200                         p = subprocess.Popen(["/usr/sbin/sendmail", recipient], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
201                         p.communicate(msg_as_string)
202                         status = p.returncode
203                         assert status != None, "just a sanity check"
204                         if status != 0:
205                                 print >>warn, ""
206                                 print >>warn, ('Fatal error: sendmail exited with code %s' % status)
207                                 sys.exit(1)
208                 except:
209                         print '''Error attempting to send email via sendmail. Possibly you need to configure your config.py to use a SMTP server? Please refer to the rss2email documentation or website (http://rss2email.infogami.com) for complete documentation of config.py. The options below may suffice for configuring email:
210 # 1: Use SMTP_SERVER to send mail.
211 # 0: Call /usr/sbin/sendmail to send mail.
212 SMTP_SEND = 0
213
214 SMTP_SERVER = "smtp.yourisp.net:25"
215 AUTHREQUIRED = 0 # if you need to use SMTP AUTH set to 1
216 SMTP_USER = 'username'  # for SMTP AUTH, set SMTP username here
217 SMTP_PASS = 'password'  # for SMTP AUTH, set SMTP password here
218 '''
219                         sys.exit(1)
220                 return None
221
222 ## html2text options ##
223
224 # Use Unicode characters instead of their ascii psuedo-replacements
225 UNICODE_SNOB = 0
226
227 # Put the links after each paragraph instead of at the end.
228 LINKS_EACH_PARAGRAPH = 0
229
230 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
231 BODY_WIDTH = 0
232
233 ### Load the Options ###
234
235 # Read options from config file if present.
236 import sys
237 sys.path.insert(0,".")
238 try:
239         from config import *
240 except:
241         pass
242
243 warn = sys.stderr
244         
245 if QP_REQUIRED:
246         print >>warn, "QP_REQUIRED has been deprecated in rss2email."
247
248 ### Import Modules ###
249
250 import cPickle as pickle, time, os, traceback, sys, types, subprocess
251 hash = ()
252 try:
253         import hashlib
254         hash = hashlib.md5
255 except ImportError:
256         import md5
257         hash = md5.new
258
259 unix = 0
260 try:
261         import fcntl
262 # A pox on SunOS file locking methods   
263         if (sys.platform.find('sunos') == -1): 
264                 unix = 1
265 except:
266         pass
267                 
268 import socket; socket_errors = []
269 for e in ['error', 'gaierror']:
270         if hasattr(socket, e): socket_errors.append(getattr(socket, e))
271
272 #DEPRECATED import mimify 
273 #DEPRECATED from StringIO import StringIO as SIO 
274 #DEPRECATED mimify.CHARSET = 'utf-8'
275
276 import feedparser
277 feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"
278
279 import html2text as h2t
280
281 h2t.UNICODE_SNOB = UNICODE_SNOB
282 h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
283 h2t.BODY_WIDTH = BODY_WIDTH
284 html2text = h2t.html2text
285
286 ### Utility Functions ###
287
288 import threading
289 class TimeoutError(Exception): pass
290
291 class InputError(Exception): pass
292
293 def timelimit(timeout, function):
294 #    def internal(function):
295         def internal2(*args, **kw):
296             """
297             from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/473878
298             """
299             class Calculator(threading.Thread):
300                 def __init__(self):
301                     threading.Thread.__init__(self)
302                     self.result = None
303                     self.error = None
304                 
305                 def run(self):
306                     try:
307                         self.result = function(*args, **kw)
308                     except:
309                         self.error = sys.exc_info()
310             
311             c = Calculator()
312             c.setDaemon(True) # don't hold up exiting
313             c.start()
314             c.join(timeout)
315             if c.isAlive():
316                 raise TimeoutError
317             if c.error:
318                 raise c.error[0], c.error[1]
319             return c.result
320         return internal2
321 #    return internal
322     
323
324 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
325 def ishtml(t): return type(t) is type(())
326 def contains(a,b): return a.find(b) != -1
327 def unu(s): # I / freakin' hate / that unicode
328         if type(s) is types.UnicodeType: return s.encode('utf-8')
329         else: return s
330
331 ### Parsing Utilities ###
332
333 def getContent(entry, HTMLOK=0):
334         """Select the best content from an entry, deHTMLizing if necessary.
335         If raw HTML is best, an ('HTML', best) tuple is returned. """
336         
337         # How this works:
338         #  * We have a bunch of potential contents. 
339         #  * We go thru looking for our first choice. 
340         #    (HTML or text, depending on HTMLOK)
341         #  * If that doesn't work, we go thru looking for our second choice.
342         #  * If that still doesn't work, we just take the first one.
343         #
344         # Possible future improvement:
345         #  * Instead of just taking the first one
346         #    pick the one in the "best" language.
347         #  * HACK: hardcoded HTMLOK, should take a tuple of media types
348         
349         conts = entry.get('content', [])
350         
351         if entry.get('summary_detail', {}):
352                 conts += [entry.summary_detail]
353         
354         if conts:
355                 if HTMLOK:
356                         for c in conts:
357                                 if contains(c.type, 'html'): return ('HTML', c.value)
358         
359                 if not HTMLOK: # Only need to convert to text if HTML isn't OK
360                         for c in conts:
361                                 if contains(c.type, 'html'):
362                                         return html2text(c.value)
363                 
364                 for c in conts:
365                         if c.type == 'text/plain': return c.value
366         
367                 return conts[0].value   
368         
369         return ""
370
371 def getID(entry):
372         """Get best ID from an entry."""
373         if TRUST_GUID:
374                 if 'id' in entry and entry.id: return entry.id
375
376         content = getContent(entry)
377         if content and content != "\n": return hash(unu(content)).hexdigest()
378         if 'link' in entry: return entry.link
379         if 'title' in entry: return hash(unu(entry.title)).hexdigest()
380
381 def getName(r, entry):
382         """Get the best name."""
383
384         feed = r.feed
385         if hasattr(r, "url") and r.url in OVERRIDE_FROM.keys():
386                 return OVERRIDE_FROM[r.url]
387         
388         name = feed.get('title', '')
389
390         if 'name' in entry.get('author_detail', []): # normally {} but py2.1
391                 if entry.author_detail.name:
392                         if name: name += ": "
393                         det=entry.author_detail.name
394                         try:
395                             name +=  entry.author_detail.name
396                         except UnicodeDecodeError:
397                             name +=  unicode(entry.author_detail.name, 'utf-8')
398
399         elif 'name' in feed.get('author_detail', []):
400                 if feed.author_detail.name:
401                         if name: name += ", "
402                         name += feed.author_detail.name
403         
404         return name
405
406 def getEmail(feed, entry):
407         """Get the best email_address."""
408
409         if FORCE_FROM: return DEFAULT_FROM
410         
411         if 'email' in entry.get('author_detail', []):
412                 return entry.author_detail.email
413         
414         if 'email' in feed.get('author_detail', []):
415                 return feed.author_detail.email
416                 
417         #TODO: contributors
418         
419         if USE_PUBLISHER_EMAIL:
420                 if 'email' in feed.get('publisher_detail', []):
421                         return feed.publisher_detail.email
422                 
423                 if feed.get("errorreportsto", ''):
424                         return feed.errorreportsto
425                         
426         return DEFAULT_FROM
427
428 ### Simple Database of Feeds ###
429
430 class Feed:
431         def __init__(self, url, to):
432                 self.url, self.etag, self.modified, self.seen = url, None, None, {}
433                 self.to = to            
434
435 def load(lock=1):
436         if not os.path.exists(feedfile):
437                 print 'Feedfile "%s" does not exist.  If you\'re using r2e for the first time, you' % feedfile
438                 print "have to run 'r2e new' first."
439                 sys.exit(1)
440         try:
441                 feedfileObject = open(feedfile, 'r')
442         except IOError, e:
443                 print "Feedfile could not be opened: %s" % e
444                 sys.exit(1)
445         feeds = pickle.load(feedfileObject)
446         
447         if lock:
448                 locktype = 0
449                 if unix:
450                         locktype = fcntl.LOCK_EX
451                         fcntl.flock(feedfileObject.fileno(), locktype)
452                 #HACK: to deal with lock caching
453                 feedfileObject = open(feedfile, 'r')
454                 feeds = pickle.load(feedfileObject)
455                 if unix: 
456                         fcntl.flock(feedfileObject.fileno(), locktype)
457
458         return feeds, feedfileObject
459
460 def unlock(feeds, feedfileObject):
461         if not unix: 
462                 pickle.dump(feeds, open(feedfile, 'w'))
463         else:   
464                 pickle.dump(feeds, open(feedfile+'.tmp', 'w'))
465                 os.rename(feedfile+'.tmp', feedfile)
466                 fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_UN)
467
468 #@timelimit(FEED_TIMEOUT)               
469 def parse(url, etag, modified):
470         if PROXY == '':
471                 return feedparser.parse(url, etag, modified)
472         else:
473                 proxy = urllib2.ProxyHandler( {"http":PROXY} )
474                 return feedparser.parse(url, etag, modified, handlers = [proxy])        
475         
476                 
477 ### Program Functions ###
478
479 def add(*args):
480         if len(args) == 2 and contains(args[1], '@') and not contains(args[1], '://'):
481                 urls, to = [args[0]], args[1]
482         else:
483                 urls, to = args, None
484         
485         feeds, feedfileObject = load()
486         if (feeds and not isstr(feeds[0]) and to is None) or (not len(feeds) and to is None):
487                 print "No email address has been defined. Please run 'r2e email emailaddress' or"
488                 print "'r2e add url emailaddress'."
489                 sys.exit(1)
490         for url in urls: feeds.append(Feed(url, to))
491         unlock(feeds, feedfileObject)
492
493 def run(num=None):
494         feeds, feedfileObject = load()
495         smtpserver = None
496         try:
497                 # We store the default to address as the first item in the feeds list.
498                 # Here we take it out and save it for later.
499                 default_to = ""
500                 if feeds and isstr(feeds[0]): default_to = feeds[0]; ifeeds = feeds[1:] 
501                 else: ifeeds = feeds
502                 
503                 if num: ifeeds = [feeds[num]]
504                 feednum = 0
505                 
506                 for f in ifeeds:
507                         try: 
508                                 feednum += 1
509                                 if VERBOSE: print >>warn, 'I: Processing [%d] "%s"' % (feednum, f.url)
510                                 r = {}
511                                 try:
512                                         r = timelimit(FEED_TIMEOUT, parse)(f.url, f.etag, f.modified)
513                                 except TimeoutError:
514                                         print >>warn, 'W: feed [%d] "%s" timed out' % (feednum, f.url)
515                                         continue
516                                 
517                                 # Handle various status conditions, as required
518                                 if 'status' in r:
519                                         if r.status == 301: f.url = r['url']
520                                         elif r.status == 410:
521                                                 print >>warn, "W: feed gone; deleting", f.url
522                                                 feeds.remove(f)
523                                                 continue
524                                 
525                                 http_status = r.get('status', 200)
526                                 if VERBOSE > 1: print >>warn, "I: http status", http_status
527                                 http_headers = r.get('headers', {
528                                   'content-type': 'application/rss+xml', 
529                                   'content-length':'1'})
530                                 exc_type = r.get("bozo_exception", Exception()).__class__
531                                 if http_status != 304 and not r.entries and not r.get('version', ''):
532                                         if http_status not in [200, 302]: 
533                                                 print >>warn, "W: error %d [%d] %s" % (http_status, feednum, f.url)
534
535                                         elif contains(http_headers.get('content-type', 'rss'), 'html'):
536                                                 print >>warn, "W: looks like HTML [%d] %s"  % (feednum, f.url)
537
538                                         elif http_headers.get('content-length', '1') == '0':
539                                                 print >>warn, "W: empty page [%d] %s" % (feednum, f.url)
540
541                                         elif hasattr(socket, 'timeout') and exc_type == socket.timeout:
542                                                 print >>warn, "W: timed out on [%d] %s" % (feednum, f.url)
543                                         
544                                         elif exc_type == IOError:
545                                                 print >>warn, 'W: "%s" [%d] %s' % (r.bozo_exception, feednum, f.url)
546                                         
547                                         elif hasattr(feedparser, 'zlib') and exc_type == feedparser.zlib.error:
548                                                 print >>warn, "W: broken compression [%d] %s" % (feednum, f.url)
549                                         
550                                         elif exc_type in socket_errors:
551                                                 exc_reason = r.bozo_exception.args[1]
552                                                 print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
553
554                                         elif exc_type == urllib2.URLError:
555                                                 if r.bozo_exception.reason.__class__ in socket_errors:
556                                                         exc_reason = r.bozo_exception.reason.args[1]
557                                                 else:
558                                                         exc_reason = r.bozo_exception.reason
559                                                 print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
560                                         
561                                         elif exc_type == AttributeError:
562                                                 print >>warn, "W: %s [%d] %s" % (r.bozo_exception, feednum, f.url)
563                                         
564                                         elif exc_type == KeyboardInterrupt:
565                                                 raise r.bozo_exception
566                                                 
567                                         elif r.bozo:
568                                                 print >>warn, 'E: error in [%d] "%s" feed (%s)' % (feednum, f.url, r.get("bozo_exception", "can't process"))
569
570                                         else:
571                                                 print >>warn, "=== rss2email encountered a problem with this feed ==="
572                                                 print >>warn, "=== See the rss2email FAQ at http://www.allthingsrss.com/rss2email/ for assistance ==="
573                                                 print >>warn, "=== If this occurs repeatedly, send this to lindsey@allthingsrss.com ==="
574                                                 print >>warn, "E:", r.get("bozo_exception", "can't process"), f.url
575                                                 print >>warn, r
576                                                 print >>warn, "rss2email", __version__
577                                                 print >>warn, "feedparser", feedparser.__version__
578                                                 print >>warn, "html2text", h2t.__version__
579                                                 print >>warn, "Python", sys.version
580                                                 print >>warn, "=== END HERE ==="
581                                         continue
582                                 
583                                 r.entries.reverse()
584                                 
585                                 for entry in r.entries:
586                                         id = getID(entry)
587                                         
588                                         # If TRUST_GUID isn't set, we get back hashes of the content.
589                                         # Instead of letting these run wild, we put them in context
590                                         # by associating them with the actual ID (if it exists).
591                                         
592                                         frameid = entry.get('id', id)
593                                         
594                                         # If this item's ID is in our database
595                                         # then it's already been sent
596                                         # and we don't need to do anything more.
597                                         
598                                         if f.seen.has_key(frameid) and f.seen[frameid] == id: continue
599
600                                         if not (f.to or default_to):
601                                                 print "No default email address defined. Please run 'r2e email emailaddress'"
602                                                 print "Ignoring feed %s" % f.url
603                                                 break
604                                         
605                                         if 'title_detail' in entry and entry.title_detail:
606                                                 title = entry.title_detail.value
607                                                 if contains(entry.title_detail.type, 'html'):
608                                                         title = html2text(title)
609                                         else:
610                                                 title = getContent(entry)[:70]
611
612                                         title = title.replace("\n", " ").strip()
613                                         
614                                         datetime = time.gmtime()
615
616                                         if DATE_HEADER:
617                                                 for datetype in DATE_HEADER_ORDER:
618                                                         kind = datetype+"_parsed"
619                                                         if kind in entry and entry[kind]: datetime = entry[kind]
620                                                 
621                                         link = entry.get('link', "")
622                                         
623                                         from_addr = getEmail(r.feed, entry)
624                                         
625                                         name = getName(r, entry)
626                                         fromhdr = '"'+ name + '" <' + from_addr + ">"
627                                         tohdr = (f.to or default_to)
628                                         subjecthdr = title
629                                         datehdr = time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
630                                         useragenthdr = "rss2email"
631                                         extraheaders = {'Date': datehdr, 'User-Agent': useragenthdr, 'X-RSS-Feed': f.url, 'X-RSS-ID': id}
632                                         if BONUS_HEADER != '':
633                                                 for hdr in BONUS_HEADER.strip().splitlines():
634                                                         pos = hdr.strip().find(':')
635                                                         if pos > 0:
636                                                                 extraheaders[hdr[:pos]] = hdr[pos+1:].strip()
637                                                         else:
638                                                                 print >>warn, "W: malformed BONUS HEADER", BONUS_HEADER 
639                                         
640                                         entrycontent = getContent(entry, HTMLOK=HTML_MAIL)
641                                         contenttype = 'plain'
642                                         content = ''
643                                         if USE_CSS_STYLING and HTML_MAIL:
644                                                 contenttype = 'html'
645                                                 content = "<html>\n" 
646                                                 content += '<head><style><!--' + STYLE_SHEET + '//--></style></head>\n'
647                                                 content += '<body>\n'
648                                                 content += '<div id="entry">\n'
649                                                 content += '<h1'
650                                                 content += ' class="header"'
651                                                 content += '><a href="'+link+'">'+subjecthdr+'</a></h1>\n\n'
652                                                 if ishtml(entrycontent):
653                                                         body = entrycontent[1].strip()
654                                                 else:
655                                                         body = entrycontent.strip()
656                                                 if body != '':  
657                                                         content += '<div id="body"><table><tr><td>\n' + body + '</td></tr></table></div>\n'
658                                                 content += '\n<p class="footer">URL: <a href="'+link+'">'+link+'</a>'
659                                                 if hasattr(entry,'enclosures'):
660                                                         for enclosure in entry.enclosures:
661                                                                 if (hasattr(enclosure, 'url') and enclosure.url != ""):
662                                                                         content += ('<br/>Enclosure: <a href="'+enclosure.url+'">'+enclosure.url+"</a>\n")
663                                                                 if (hasattr(enclosure, 'src') and enclosure.src != ""):
664                                                                         content += ('<br/>Enclosure: <a href="'+enclosure.src+'">'+enclosure.src+'</a><br/><img src="'+enclosure.src+'"\n')
665                                                 content += '</p></div>\n'
666                                                 content += "\n\n</body></html>"
667                                         else:   
668                                                 if ishtml(entrycontent):
669                                                         contenttype = 'html'
670                                                         content = "<html>\n" 
671                                                         content = ("<html><body>\n\n" + 
672                                                                    '<h1><a href="'+link+'">'+subjecthdr+'</a></h1>\n\n' +
673                                                                    entrycontent[1].strip() + # drop type tag (HACK: bad abstraction)
674                                                                    '<p>URL: <a href="'+link+'">'+link+'</a></p>' )
675                                                                    
676                                                         if hasattr(entry,'enclosures'):
677                                                                 for enclosure in entry.enclosures:
678                                                                         if enclosure.url != "":
679                                                                                 content += ('Enclosure: <a href="'+enclosure.url+'">'+enclosure.url+"</a><br/>\n")
680                                                         
681                                                         content += ("\n</body></html>")
682                                                 else:
683                                                         content = entrycontent.strip() + "\n\nURL: "+link
684                                                         if hasattr(entry,'enclosures'):
685                                                                 for enclosure in entry.enclosures:
686                                                                         if enclosure.url != "":
687                                                                                 content += ('\nEnclosure: ' + enclosure.url + "\n")
688
689                                         smtpserver = send(fromhdr, tohdr, subjecthdr, content, contenttype, extraheaders, smtpserver)
690                         
691                                         f.seen[frameid] = id
692                                         
693                                 f.etag, f.modified = r.get('etag', None), r.get('modified', None)
694                         except (KeyboardInterrupt, SystemExit):
695                                 raise
696                         except:
697                                 print >>warn, "=== rss2email encountered a problem with this feed ==="
698                                 print >>warn, "=== See the rss2email FAQ at http://www.allthingsrss.com/rss2email/ for assistance ==="
699                                 print >>warn, "=== If this occurs repeatedly, send this to lindsey@allthingsrss.com ==="
700                                 print >>warn, "E: could not parse", f.url
701                                 traceback.print_exc(file=warn)
702                                 print >>warn, "rss2email", __version__
703                                 print >>warn, "feedparser", feedparser.__version__
704                                 print >>warn, "html2text", h2t.__version__
705                                 print >>warn, "Python", sys.version
706                                 print >>warn, "=== END HERE ==="
707                                 continue
708
709         finally:                
710                 unlock(feeds, feedfileObject)
711                 if smtpserver:
712                         smtpserver.quit()
713
714 def list():
715         feeds, feedfileObject = load(lock=0)
716         default_to = ""
717         
718         if feeds and isstr(feeds[0]):
719                 default_to = feeds[0]; ifeeds = feeds[1:]; i=1
720                 print "default email:", default_to
721         else: ifeeds = feeds; i = 0
722         for f in ifeeds:
723                 print `i`+':', f.url, '('+(f.to or ('default: '+default_to))+')'
724                 if not (f.to or default_to):
725                         print "   W: Please define a default address with 'r2e email emailaddress'"
726                 i+= 1
727
728 def delete(n):
729         feeds, feedfileObject = load()
730         if (n == 0) and (feeds and isstr(feeds[0])):
731                 print >>warn, "W: ID has to be equal to or higher than 1"
732         elif n >= len(feeds):
733                 print >>warn, "W: no such feed"
734         else:
735                 print >>warn, "W: deleting feed %s" % feeds[n].url
736                 feeds = feeds[:n] + feeds[n+1:]
737                 if n != len(feeds):
738                         print >>warn, "W: feed IDs have changed, list before deleting again"
739         unlock(feeds, feedfileObject)
740         
741 def reset():
742         feeds, feedfileObject = load()
743         if feeds and isstr(feeds[0]):
744                 ifeeds = feeds[1:]
745         else: ifeeds = feeds
746         for f in ifeeds:
747                 if VERBOSE: print "Resetting %d already seen items" % len(f.seen)
748                 f.seen = {}
749                 f.etag = None
750                 f.modified = None
751         
752         unlock(feeds, feedfileObject)
753         
754 def email(addr):
755         feeds, feedfileObject = load()
756         if feeds and isstr(feeds[0]): feeds[0] = addr
757         else: feeds = [addr] + feeds
758         unlock(feeds, feedfileObject)
759
760 if __name__ == '__main__':
761         args = sys.argv
762         try:
763                 if len(args) < 3: raise InputError, "insufficient args"
764                 feedfile, action, args = args[1], args[2], args[3:]
765                 
766                 if action == "run": 
767                         if args and args[0] == "--no-send":
768                                 def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
769                                         if VERBOSE: print 'Not sending:', unu(subject)
770
771                         if args and args[-1].isdigit(): run(int(args[-1]))
772                         else: run()
773
774                 elif action == "email":
775                         if not args:
776                                 raise InputError, "Action '%s' requires an argument" % action
777                         else:
778                                 email(args[0])
779
780                 elif action == "add": add(*args)
781
782                 elif action == "new": 
783                         if len(args) == 1: d = [args[0]]
784                         else: d = []
785                         pickle.dump(d, open(feedfile, 'w'))
786
787                 elif action == "list": list()
788
789                 elif action in ("help", "--help", "-h"): print __doc__
790
791                 elif action == "delete":
792                         if not args:
793                                 raise InputError, "Action '%s' requires an argument" % action
794                         elif args[0].isdigit():
795                                 delete(int(args[0]))
796                         else:
797                                 raise InputError, "Action '%s' requires a number as its argument" % action
798
799                 elif action == "reset": reset()
800
801                 else:
802                         raise InputError, "Invalid action"
803                 
804         except InputError, e:
805                 print "E:", e
806                 print
807                 print __doc__
808