From ba7568a01602af2097e8d44f3b3c16d09d25ca31 Mon Sep 17 00:00:00 2001 From: Aaron Swartz Date: Thu, 26 Aug 2004 12:00:00 +0000 Subject: [PATCH] Add rss2email v2.52 support html descriptions (tx Peter Vangorp), bonus header, override from, python2.1 support, unicode headers (tx Lars Wirzenius), remove extra comma, better error reporting. Downloaded from: http://web.archive.org/web/20050214084518/http://www.aaronsw.com/2002/rss2email/rss2email-2.52.py --- rss2email.py | 115 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 77 insertions(+), 38 deletions(-) diff --git a/rss2email.py b/rss2email.py index 0ee6aca..294144c 100644 --- a/rss2email.py +++ b/rss2email.py @@ -10,12 +10,11 @@ Usage: list delete n """ -__version__ = "2.511" +__version__ = "2.52" __author__ = "Aaron Swartz (me@aaronsw.com)" __copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2." -___contributors__ = ["Dean Jackson (dino@grorg.org)", - "Brian Lalor (blalor@ithacabands.org)", - "Joey Hess", 'Matej Cepl'] +___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess", + "Matej Cepl", "Martin 'Joey' Schulze"] ### Vaguely Customizable Options ### @@ -63,26 +62,35 @@ SMTP_SEND = 0 SMTP_SERVER = "smtp.yourisp.net:25" +# Set this to add a bonus header to all emails (start with '\n'). +BONUS_HEADER = '' +# Example: BONUS_HEADER = '\nApproved: joe@bob.org' + +# Set this to override From addresses. Keys are feed URLs, values are new titles. +OVERRIDE_FROM = {} + # Note: You can also override the send function. def send(fr, to, message): if SMTP_SEND: smtpserver.sendmail(fr, [to], message) else: - i, o = os.popen2(["/usr/sbin/sendmail", to]) - i.write(message) - i.close(); o.close() - del i, o + i, o = os.popen2(["/usr/sbin/sendmail", to]) + i.write(message) + i.close(); o.close() + del i, o + ## html2text options ## -# 1: Use Unicode characters -# 0: Use ASCII psuedo-replacements +# Use Unicode characters instead of their ascii psuedo-replacements UNICODE_SNOB = 0 -# 1: Put the links after each paragraph -# 0: Put all links at the end +# Put the links after each paragraph instead of at the end. LINKS_EACH_PARAGRAPH = 0 +# Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.) +BODY_WIDTH = 0 + ### Load the Options ### # Read options from config file if present. @@ -95,38 +103,43 @@ except: ### Import Modules ### -import cPickle as pickle, fcntl, md5, time, os, traceback, urllib2, sys +import cPickle as pickle, fcntl, md5, time, os, traceback, urllib2, sys, types import socket; socket_errors = [] for e in ['error', 'gaierror']: if hasattr(socket, e): socket_errors.append(getattr(socket, e)) -if QP_REQUIRED: import mimify; from StringIO import StringIO as SIO +import mimify; from StringIO import StringIO as SIO; mimify.CHARSET = 'utf-8' if SMTP_SEND: import smtplib; smtpserver = smtplib.SMTP(SMTP_SERVER) else: smtpserver = None import feedparser feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.aaronsw.com/2002/rss2email/" -import html2text +import html2text as h2t -html2text.UNICODE_SNOB = UNICODE_SNOB -html2text.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH -html2text = html2text.html2text +h2t.UNICODE_SNOB = UNICODE_SNOB +h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH +h2t.BODY_WIDTH = BODY_WIDTH +html2text = h2t.html2text ### Utility Functions ### warn = sys.stderr def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u'')) -def ishtml(t): return type(t) is tuple +def ishtml(t): return type(t) is type(()) def contains(a,b): return a.find(b) != -1 def unu(s): # I / freakin' hate / that unicode - if type(s) is unicode: return s.encode('utf-8') + if type(s) is types.UnicodeType: return s.encode('utf-8') else: return s def quote822(s): """Quote names in email according to RFC822.""" return '"' + unu(s).replace("\\", "\\\\").replace('"', '\\"') + '"' +def header7bit(s): + """QP_CORRUPT headers.""" + return mimify.mime_encode_header(s + ' ')[:-1] + ### Parsing Utilities ### def getContent(entry, HTMLOK=0): @@ -145,7 +158,10 @@ def getContent(entry, HTMLOK=0): # pick the one in the "best" language. # * HACK: hardcoded HTMLOK, should take a tuple of media types - if entry.get('content', []): + if entry.get('summary_detail', {}): + entry.content = entry.get('content', []) + [entry.summary_detail] + + if entry.content: if HTMLOK: for c in entry.content: if contains(c.type, 'html'): return ('HTML', c.value) @@ -160,11 +176,6 @@ def getContent(entry, HTMLOK=0): return entry.content[0].value - if entry.get('summary_detail', {}): - s = entry.summary_detail.value - if contains(entry.summary_detail.type, 'html'): s = html2text(s) - return s - return "" def getID(entry): @@ -177,18 +188,24 @@ def getID(entry): if 'link' in entry: return entry.link if 'title' in entry: return md5.new(unu(entry.title)).hexdigest() -def getName(feed, entry): +def getName(r, entry): """Get the best name.""" + + feed = r.feed + if r.url in OVERRIDE_FROM.keys(): + return unu(OVERRIDE_FROM[r.url]) name = feed.get('title', '') if 'name' in entry.get('author_detail', []): # normally {} but py2.1 - if name: name += ", " - name += entry.author_detail.name + if entry.author_detail.name: + if name: name += ", " + name += entry.author_detail.name elif 'name' in feed.get('author_detail', []): - if name: name += ", " - name += feed.author_detail.name + if feed.author_detail.name: + if name: name += ", " + name += feed.author_detail.name return name @@ -225,18 +242,18 @@ def load(lock=1): feedfileObject = open(feedfile, 'r') feeds = pickle.load(feedfileObject) if lock: - fcntl.flock(feedfileObject, fcntl.LOCK_EX) + fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_EX) #HACK: to deal with lock caching feedfileObject = open(feedfile, 'r') feeds = pickle.load(feedfileObject) - fcntl.flock(feedfileObject, fcntl.LOCK_EX) + fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_EX) return feeds, feedfileObject def unlock(feeds, feedfileObject): pickle.dump(feeds, open(feedfile+'.tmp', 'w')) os.rename(feedfile+'.tmp', feedfile) - fcntl.flock(feedfileObject, fcntl.LOCK_UN) + fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_UN) ### Program Functions ### @@ -292,6 +309,16 @@ def run(num=None): elif hasattr(socket, 'timeout') and exc_type == socket.timeout: print >>warn, "W: timed out on", f.url + + elif exc_type == IOError: + print >>warn, "W:", r.bozo_exception, f.url + + elif hasattr(feedparser, 'zlib') and exc_type == feedparser.zlib.error: + print >>warn, "W: broken compression", f.url + + elif exc_type in socket_errors: + exc_reason = r.bozo_exception.args[1] + print >>warn, "W:", exc_reason, f.url elif exc_type == urllib2.URLError: if r.bozo_exception.reason.__class__ in socket_errors: @@ -299,11 +326,17 @@ def run(num=None): else: exc_reason = r.bozo_exception.reason print >>warn, "W:", exc_reason, f.url + + elif exc_type == KeyboardInterrupt: + raise r.bozo_exception else: print >>warn, "=== SEND THE FOLLOWING TO rss2email@aaronsw.com ===" print >>warn, "E:", r.get("bozo_exception", "can't process"), f.url print >>warn, r + print >>warn, "feedparser", feedparser.__version__ + print >>warn, "html2text", h2t.__version__ + print >>warn, "Python", sys.version print >>warn, "=== END HERE ===" continue @@ -343,15 +376,17 @@ def run(num=None): content = getContent(entry, HTMLOK=HTML_MAIL) link = unu(entry.get('link', "")) + print entry.link from_addr = unu(getEmail(r.feed, entry)) message = ( - "From: " + quote822(getName(r.feed, entry)) + " <"+from_addr+">" + - "\nTo: " + unu(f.to or default_to) + # set a default email! - "\nSubject: " + title + + "From: " + quote822(header7bit(getName(r, entry))) + " <"+from_addr+">" + + "\nTo: " + header7bit(unu(f.to or default_to)) + # set a default email! + "\nSubject: " + header7bit(title) + "\nDate: " + time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime) + "\nUser-Agent: rss2email" + # really should be X-Mailer + BONUS_HEADER + "\nContent-Type: ") # but backwards-compatibility if ishtml(content): @@ -369,7 +404,6 @@ def run(num=None): message += '; charset="utf-8"\n\n' + content + "\n" if QP_REQUIRED: - mimify.CHARSET = 'utf-8' ins, outs = SIO(message), SIO() mimify.mimify(ins, outs) message = outs.getvalue() @@ -379,10 +413,15 @@ def run(num=None): f.seen[frameid] = id f.etag, f.modified = r.get('etag', None), r.get('modified', None) + except KeyboardInterrupt: + raise except: print >>warn, "=== SEND THE FOLLOWING TO rss2email@aaronsw.com ===" print >>warn, "E: could not parse", f.url traceback.print_exc(file=warn) + print >>warn, "feedparser", feedparser.__version__ + print >>warn, "html2text", h2t.__version__ + print >>warn, "Python", sys.version print>>warn, "=== END HERE ===" continue -- 2.26.2