rss2email.py

   1 #!/usr/bin/python
   2 """rss2email: get RSS feeds emailed to you
   3 http://rss2email.infogami.com
   4
   5 Usage:
   6   new [emailaddress] (create new feedfile)
   7   email newemailaddress (update default email)
   8   run [--no-send] [num]
   9   add feedurl [emailaddress]
  10   list
  11   reset
  12   delete n
  13 """
  14 __version__ = "2.66"
  15 __author__ = "Lindsey Smith (lindsey@allthingsrss.com)"
  16 __copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2 or 3."
  17 ___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess",
  18                      "Matej Cepl", "Martin 'Joey' Schulze",
  19                      "Marcel Ackermann (http://www.DreamFlasher.de)",
  20                      "Lindsey Smith", "Aaron Swartz (original author)" ]
  21
  22 import urllib2
  23 urllib2.install_opener(urllib2.build_opener())
  24
  25 ### Vaguely Customizable Options ###
  26
  27 # The email address messages are from by default:
  28 DEFAULT_FROM = "bozo@dev.null.invalid"
  29
  30 # 1: Send text/html messages when possible.
  31 # 0: Convert HTML to plain text.
  32 HTML_MAIL = 0
  33
  34 # 1: Only use the DEFAULT_FROM address.
  35 # 0: Use the email address specified by the feed, when possible.
  36 FORCE_FROM = 0
  37
  38 # 1: Receive one email per post.
  39 # 0: Receive an email every time a post changes.
  40 TRUST_GUID = 1
  41
  42 # 1: Generate Date header based on item's date, when possible.
  43 # 0: Generate Date header based on time sent.
  44 DATE_HEADER = 0
  45
  46 # A tuple consisting of some combination of
  47 # ('issued', 'created', 'modified', 'expired')
  48 # expressing ordered list of preference in dates
  49 # to use for the Date header of the email.
  50 DATE_HEADER_ORDER = ('modified', 'issued', 'created')
  51
  52 # 1: Apply Q-P conversion (required for some MUAs).
  53 # 0: Send message in 8-bits.
  54 # http://cr.yp.to/smtp/8bitmime.html
  55 #DEPRECATED
  56 QP_REQUIRED = 0
  57 #DEPRECATED
  58
  59 # 1: Name feeds as they're being processed.
  60 # 0: Keep quiet.
  61 VERBOSE = 0
  62
  63 # 1: Use the publisher's email if you can't find the author's.
  64 # 0: Just use the DEFAULT_FROM email instead.
  65 USE_PUBLISHER_EMAIL = 0
  66
  67 # 1: Use SMTP_SERVER to send mail.
  68 # 0: Call /usr/sbin/sendmail to send mail.
  69 SMTP_SEND = 0
  70
  71 SMTP_SERVER = "smtp.yourisp.net:25"
  72 AUTHREQUIRED = 0 # if you need to use SMTP AUTH set to 1
  73 SMTP_USER = 'username'  # for SMTP AUTH, set SMTP username here
  74 SMTP_PASS = 'password'  # for SMTP AUTH, set SMTP password here
  75
  76 # Set this to add a bonus header to all emails (start with '\n').
  77 BONUS_HEADER = ''
  78 # Example: BONUS_HEADER = '\nApproved: joe@bob.org'
  79
  80 # Set this to override From addresses. Keys are feed URLs, values are new titles.
  81 OVERRIDE_FROM = {}
  82
  83 # Set this to override the timeout (in seconds) for feed server response
  84 FEED_TIMEOUT = 60
  85
  86 # Optional CSS styling
  87 USE_CSS_STYLING = 0
  88 STYLE_SHEET='h1 {font: 18pt Georgia, "Times New Roman";} body {font: 12pt Arial;} a:link {font: 12pt Arial; font-weight: bold; color: #0000cc} blockquote {font-family: monospace; }  .header { background: #e0ecff; border-bottom: solid 4px #c3d9ff; padding: 5px; margin-top: 0px; color: red;} .header a { font-size: 20px; text-decoration: none; } .footer { background: #c3d9ff; border-top: solid 4px #c3d9ff; padding: 5px; margin-bottom: 0px; } #entry {border: solid 4px #c3d9ff; } #body { margin-left: 5px; margin-right: 5px; }'
  89
  90 # If you have an HTTP Proxy set this in the format 'http://your.proxy.here:8080/'
  91 PROXY=""
  92
  93 # To most correctly encode emails with international characters, we iterate through the list below and use the first character set that works
  94 # Eventually (and theoretically) ISO-8859-1 and UTF-8 are our catch-all failsafes
  95 CHARSET_LIST='US-ASCII', 'BIG5', 'ISO-2022-JP', 'ISO-8859-1', 'UTF-8'
  96
  97 from email.MIMEText import MIMEText
  98 from email.Header import Header
  99 from email.Utils import parseaddr, formataddr
 100
 101 # Note: You can also override the send function.
 102
 103 def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
 104         """Send an email.
 105
 106         All arguments should be Unicode strings (plain ASCII works as well).
 107
 108         Only the real name part of sender and recipient addresses may contain
 109         non-ASCII characters.
 110
 111         The email will be properly MIME encoded and delivered though SMTP to
 112         localhost port 25.  This is easy to change if you want something different.
 113
 114         The charset of the email will be the first one out of the list
 115         that can represent all the characters occurring in the email.
 116         """
 117
 118         # Header class is smart enough to try US-ASCII, then the charset we
 119         # provide, then fall back to UTF-8.
 120         header_charset = 'ISO-8859-1'
 121
 122         # We must choose the body charset manually
 123         for body_charset in CHARSET_LIST:
 124             try:
 125                 body.encode(body_charset)
 126             except (UnicodeError, LookupError):
 127                 pass
 128             else:
 129                 break
 130
 131         # Split real name (which is optional) and email address parts
 132         sender_name, sender_addr = parseaddr(sender)
 133         recipient_name, recipient_addr = parseaddr(recipient)
 134
 135         # We must always pass Unicode strings to Header, otherwise it will
 136         # use RFC 2047 encoding even on plain ASCII strings.
 137         sender_name = str(Header(unicode(sender_name), header_charset))
 138         recipient_name = str(Header(unicode(recipient_name), header_charset))
 139
 140         # Make sure email addresses do not contain non-ASCII characters
 141         sender_addr = sender_addr.encode('ascii')
 142         recipient_addr = recipient_addr.encode('ascii')
 143
 144         # Create the message ('plain' stands for Content-Type: text/plain)
 145         msg = MIMEText(body.encode(body_charset), contenttype, body_charset)
 146         msg['To'] = formataddr((recipient_name, recipient_addr))
 147         msg['Subject'] = Header(unicode(subject), header_charset)
 148         for hdr in extraheaders.keys():
 149                 try:
 150                         msg[hdr] = Header(unicode(extraheaders[hdr], header_charset))
 151                 except:
 152                         msg[hdr] = Header(extraheaders[hdr])
 153
 154         fromhdr = formataddr((sender_name, sender_addr))
 155         msg['From'] = fromhdr
 156
 157         msg_as_string = msg.as_string()
 158 #DEPRECATED     if QP_REQUIRED:
 159 #DEPRECATED             ins, outs = SIO(msg_as_string), SIO()
 160 #DEPRECATED             mimify.mimify(ins, outs)
 161 #DEPRECATED             msg_as_string = outs.getvalue()
 162
 163         if SMTP_SEND:
 164                 if not smtpserver:
 165                         import smtplib
 166
 167                         try:
 168                                 smtpserver = smtplib.SMTP(SMTP_SERVER)
 169                         except KeyboardInterrupt:
 170                                 raise
 171                         except Exception, e:
 172                                 print >>warn, ""
 173                                 print >>warn, ('Fatal error: could not connect to mail server "%s"' % SMTP_SERVER)
 174                                 print >>warn, ('Check your config.py file to confirm that SMTP_SERVER and other mail server settings are configured properly')
 175                                 if hasattr(e, 'reason'):
 176                                         print >>warn, "Reason:", e.reason
 177                                 sys.exit(1)
 178
 179                         if AUTHREQUIRED:
 180                                 try:
 181                                         smtpserver.ehlo()
 182                                         smtpserver.starttls()
 183                                         smtpserver.ehlo()
 184                                         smtpserver.login(SMTP_USER, SMTP_PASS)
 185                                 except KeyboardInterrupt:
 186                                         raise
 187                                 except Exception, e:
 188                                         print >>warn, ""
 189                                         print >>warn, ('Fatal error: could not authenticate with mail server "%s" as user "%s"' % (SMTP_SERVER, SMTP_USER))
 190                                         print >>warn, ('Check your config.py file to confirm that SMTP_SERVER and other mail server settings are configured properly')
 191                                         if hasattr(e, 'reason'):
 192                                                 print >>warn, "Reason:", e.reason
 193                                         sys.exit(1)
 194
 195                 smtpserver.sendmail(sender, recipient, msg_as_string)
 196                 return smtpserver
 197
 198         else:
 199                 try:
 200                         p = subprocess.Popen(["/usr/sbin/sendmail", recipient], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
 201                         p.communicate(msg_as_string)
 202                         status = p.returncode
 203                         assert status != None, "just a sanity check"
 204                         if status != 0:
 205                                 print >>warn, ""
 206                                 print >>warn, ('Fatal error: sendmail exited with code %s' % status)
 207                                 sys.exit(1)
 208                 except:
 209                         print '''Error attempting to send email via sendmail. Possibly you need to configure your config.py to use a SMTP server? Please refer to the rss2email documentation or website (http://rss2email.infogami.com) for complete documentation of config.py. The options below may suffice for configuring email:
 210 # 1: Use SMTP_SERVER to send mail.
 211 # 0: Call /usr/sbin/sendmail to send mail.
 212 SMTP_SEND = 0
 213
 214 SMTP_SERVER = "smtp.yourisp.net:25"
 215 AUTHREQUIRED = 0 # if you need to use SMTP AUTH set to 1
 216 SMTP_USER = 'username'  # for SMTP AUTH, set SMTP username here
 217 SMTP_PASS = 'password'  # for SMTP AUTH, set SMTP password here
 218 '''
 219                         sys.exit(1)
 220                 return None
 221
 222 ## html2text options ##
 223
 224 # Use Unicode characters instead of their ascii psuedo-replacements
 225 UNICODE_SNOB = 0
 226
 227 # Put the links after each paragraph instead of at the end.
 228 LINKS_EACH_PARAGRAPH = 0
 229
 230 # Wrap long lines at position. 0 for no wrapping. (Requires Python 2.3.)
 231 BODY_WIDTH = 0
 232
 233 ### Load the Options ###
 234
 235 # Read options from config file if present.
 236 import sys
 237 sys.path.insert(0,".")
 238 try:
 239         from config import *
 240 except:
 241         pass
 242
 243 warn = sys.stderr
 244
 245 if QP_REQUIRED:
 246         print >>warn, "QP_REQUIRED has been deprecated in rss2email."
 247
 248 ### Import Modules ###
 249
 250 import cPickle as pickle, time, os, traceback, sys, types, subprocess
 251 hash = ()
 252 try:
 253         import hashlib
 254         hash = hashlib.md5
 255 except ImportError:
 256         import md5
 257         hash = md5.new
 258
 259 unix = 0
 260 try:
 261         import fcntl
 262 # A pox on SunOS file locking methods
 263         if (sys.platform.find('sunos') == -1):
 264                 unix = 1
 265 except:
 266         pass
 267
 268 import socket; socket_errors = []
 269 for e in ['error', 'gaierror']:
 270         if hasattr(socket, e): socket_errors.append(getattr(socket, e))
 271
 272 #DEPRECATED import mimify
 273 #DEPRECATED from StringIO import StringIO as SIO
 274 #DEPRECATED mimify.CHARSET = 'utf-8'
 275
 276 import feedparser
 277 feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"
 278
 279 import html2text as h2t
 280
 281 h2t.UNICODE_SNOB = UNICODE_SNOB
 282 h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
 283 h2t.BODY_WIDTH = BODY_WIDTH
 284 html2text = h2t.html2text
 285
 286 ### Utility Functions ###
 287
 288 import threading
 289 class TimeoutError(Exception): pass
 290
 291 class InputError(Exception): pass
 292
 293 def timelimit(timeout, function):
 294 #    def internal(function):
 295         def internal2(*args, **kw):
 296             """
 297             from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/473878
 298             """
 299             class Calculator(threading.Thread):
 300                 def __init__(self):
 301                     threading.Thread.__init__(self)
 302                     self.result = None
 303                     self.error = None
 304
 305                 def run(self):
 306                     try:
 307                         self.result = function(*args, **kw)
 308                     except:
 309                         self.error = sys.exc_info()
 310
 311             c = Calculator()
 312             c.setDaemon(True) # don't hold up exiting
 313             c.start()
 314             c.join(timeout)
 315             if c.isAlive():
 316                 raise TimeoutError
 317             if c.error:
 318                 raise c.error[0], c.error[1]
 319             return c.result
 320         return internal2
 321 #    return internal
 322
 323
 324 def isstr(f): return isinstance(f, type('')) or isinstance(f, type(u''))
 325 def ishtml(t): return type(t) is type(())
 326 def contains(a,b): return a.find(b) != -1
 327 def unu(s): # I / freakin' hate / that unicode
 328         if type(s) is types.UnicodeType: return s.encode('utf-8')
 329         else: return s
 330
 331 ### Parsing Utilities ###
 332
 333 def getContent(entry, HTMLOK=0):
 334         """Select the best content from an entry, deHTMLizing if necessary.
 335         If raw HTML is best, an ('HTML', best) tuple is returned. """
 336
 337         # How this works:
 338         #  * We have a bunch of potential contents.
 339         #  * We go thru looking for our first choice.
 340         #    (HTML or text, depending on HTMLOK)
 341         #  * If that doesn't work, we go thru looking for our second choice.
 342         #  * If that still doesn't work, we just take the first one.
 343         #
 344         # Possible future improvement:
 345         #  * Instead of just taking the first one
 346         #    pick the one in the "best" language.
 347         #  * HACK: hardcoded HTMLOK, should take a tuple of media types
 348
 349         conts = entry.get('content', [])
 350
 351         if entry.get('summary_detail', {}):
 352                 conts += [entry.summary_detail]
 353
 354         if conts:
 355                 if HTMLOK:
 356                         for c in conts:
 357                                 if contains(c.type, 'html'): return ('HTML', c.value)
 358
 359                 if not HTMLOK: # Only need to convert to text if HTML isn't OK
 360                         for c in conts:
 361                                 if contains(c.type, 'html'):
 362                                         return html2text(c.value)
 363
 364                 for c in conts:
 365                         if c.type == 'text/plain': return c.value
 366
 367                 return conts[0].value
 368
 369         return ""
 370
 371 def getID(entry):
 372         """Get best ID from an entry."""
 373         if TRUST_GUID:
 374                 if 'id' in entry and entry.id: return entry.id
 375
 376         content = getContent(entry)
 377         if content and content != "\n": return hash(unu(content)).hexdigest()
 378         if 'link' in entry: return entry.link
 379         if 'title' in entry: return hash(unu(entry.title)).hexdigest()
 380
 381 def getName(r, entry):
 382         """Get the best name."""
 383
 384         feed = r.feed
 385         if hasattr(r, "url") and r.url in OVERRIDE_FROM.keys():
 386                 return OVERRIDE_FROM[r.url]
 387
 388         name = feed.get('title', '')
 389
 390         if 'name' in entry.get('author_detail', []): # normally {} but py2.1
 391                 if entry.author_detail.name:
 392                         if name: name += ": "
 393                         det=entry.author_detail.name
 394                         try:
 395                             name +=  entry.author_detail.name
 396                         except UnicodeDecodeError:
 397                             name +=  unicode(entry.author_detail.name, 'utf-8')
 398
 399         elif 'name' in feed.get('author_detail', []):
 400                 if feed.author_detail.name:
 401                         if name: name += ", "
 402                         name += feed.author_detail.name
 403
 404         return name
 405
 406 def getEmail(feed, entry):
 407         """Get the best email_address."""
 408
 409         if FORCE_FROM: return DEFAULT_FROM
 410
 411         if 'email' in entry.get('author_detail', []):
 412                 return entry.author_detail.email
 413
 414         if 'email' in feed.get('author_detail', []):
 415                 return feed.author_detail.email
 416
 417         #TODO: contributors
 418
 419         if USE_PUBLISHER_EMAIL:
 420                 if 'email' in feed.get('publisher_detail', []):
 421                         return feed.publisher_detail.email
 422
 423                 if feed.get("errorreportsto", ''):
 424                         return feed.errorreportsto
 425
 426         return DEFAULT_FROM
 427
 428 ### Simple Database of Feeds ###
 429
 430 class Feed:
 431         def __init__(self, url, to):
 432                 self.url, self.etag, self.modified, self.seen = url, None, None, {}
 433                 self.to = to
 434
 435 def load(lock=1):
 436         if not os.path.exists(feedfile):
 437                 print 'Feedfile "%s" does not exist.  If you\'re using r2e for the first time, you' % feedfile
 438                 print "have to run 'r2e new' first."
 439                 sys.exit(1)
 440         try:
 441                 feedfileObject = open(feedfile, 'r')
 442         except IOError, e:
 443                 print "Feedfile could not be opened: %s" % e
 444                 sys.exit(1)
 445         feeds = pickle.load(feedfileObject)
 446
 447         if lock:
 448                 locktype = 0
 449                 if unix:
 450                         locktype = fcntl.LOCK_EX
 451                         fcntl.flock(feedfileObject.fileno(), locktype)
 452                 #HACK: to deal with lock caching
 453                 feedfileObject = open(feedfile, 'r')
 454                 feeds = pickle.load(feedfileObject)
 455                 if unix:
 456                         fcntl.flock(feedfileObject.fileno(), locktype)
 457
 458         return feeds, feedfileObject
 459
 460 def unlock(feeds, feedfileObject):
 461         if not unix:
 462                 pickle.dump(feeds, open(feedfile, 'w'))
 463         else:
 464                 pickle.dump(feeds, open(feedfile+'.tmp', 'w'))
 465                 os.rename(feedfile+'.tmp', feedfile)
 466                 fcntl.flock(feedfileObject.fileno(), fcntl.LOCK_UN)
 467
 468 #@timelimit(FEED_TIMEOUT)
 469 def parse(url, etag, modified):
 470         if PROXY == '':
 471                 return feedparser.parse(url, etag, modified)
 472         else:
 473                 proxy = urllib2.ProxyHandler( {"http":PROXY} )
 474                 return feedparser.parse(url, etag, modified, handlers = [proxy])
 475
 476
 477 ### Program Functions ###
 478
 479 def add(*args):
 480         if len(args) == 2 and contains(args[1], '@') and not contains(args[1], '://'):
 481                 urls, to = [args[0]], args[1]
 482         else:
 483                 urls, to = args, None
 484
 485         feeds, feedfileObject = load()
 486         if (feeds and not isstr(feeds[0]) and to is None) or (not len(feeds) and to is None):
 487                 print "No email address has been defined. Please run 'r2e email emailaddress' or"
 488                 print "'r2e add url emailaddress'."
 489                 sys.exit(1)
 490         for url in urls: feeds.append(Feed(url, to))
 491         unlock(feeds, feedfileObject)
 492
 493 def run(num=None):
 494         feeds, feedfileObject = load()
 495         smtpserver = None
 496         try:
 497                 # We store the default to address as the first item in the feeds list.
 498                 # Here we take it out and save it for later.
 499                 default_to = ""
 500                 if feeds and isstr(feeds[0]): default_to = feeds[0]; ifeeds = feeds[1:]
 501                 else: ifeeds = feeds
 502
 503                 if num: ifeeds = [feeds[num]]
 504                 feednum = 0
 505
 506                 for f in ifeeds:
 507                         try:
 508                                 feednum += 1
 509                                 if VERBOSE: print >>warn, 'I: Processing [%d] "%s"' % (feednum, f.url)
 510                                 r = {}
 511                                 try:
 512                                         r = timelimit(FEED_TIMEOUT, parse)(f.url, f.etag, f.modified)
 513                                 except TimeoutError:
 514                                         print >>warn, 'W: feed [%d] "%s" timed out' % (feednum, f.url)
 515                                         continue
 516
 517                                 # Handle various status conditions, as required
 518                                 if 'status' in r:
 519                                         if r.status == 301: f.url = r['url']
 520                                         elif r.status == 410:
 521                                                 print >>warn, "W: feed gone; deleting", f.url
 522                                                 feeds.remove(f)
 523                                                 continue
 524
 525                                 http_status = r.get('status', 200)
 526                                 if VERBOSE > 1: print >>warn, "I: http status", http_status
 527                                 http_headers = r.get('headers', {
 528                                   'content-type': 'application/rss+xml',
 529                                   'content-length':'1'})
 530                                 exc_type = r.get("bozo_exception", Exception()).__class__
 531                                 if http_status != 304 and not r.entries and not r.get('version', ''):
 532                                         if http_status not in [200, 302]:
 533                                                 print >>warn, "W: error %d [%d] %s" % (http_status, feednum, f.url)
 534
 535                                         elif contains(http_headers.get('content-type', 'rss'), 'html'):
 536                                                 print >>warn, "W: looks like HTML [%d] %s"  % (feednum, f.url)
 537
 538                                         elif http_headers.get('content-length', '1') == '0':
 539                                                 print >>warn, "W: empty page [%d] %s" % (feednum, f.url)
 540
 541                                         elif hasattr(socket, 'timeout') and exc_type == socket.timeout:
 542                                                 print >>warn, "W: timed out on [%d] %s" % (feednum, f.url)
 543
 544                                         elif exc_type == IOError:
 545                                                 print >>warn, 'W: "%s" [%d] %s' % (r.bozo_exception, feednum, f.url)
 546
 547                                         elif hasattr(feedparser, 'zlib') and exc_type == feedparser.zlib.error:
 548                                                 print >>warn, "W: broken compression [%d] %s" % (feednum, f.url)
 549
 550                                         elif exc_type in socket_errors:
 551                                                 exc_reason = r.bozo_exception.args[1]
 552                                                 print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
 553
 554                                         elif exc_type == urllib2.URLError:
 555                                                 if r.bozo_exception.reason.__class__ in socket_errors:
 556                                                         exc_reason = r.bozo_exception.reason.args[1]
 557                                                 else:
 558                                                         exc_reason = r.bozo_exception.reason
 559                                                 print >>warn, "W: %s [%d] %s" % (exc_reason, feednum, f.url)
 560
 561                                         elif exc_type == AttributeError:
 562                                                 print >>warn, "W: %s [%d] %s" % (r.bozo_exception, feednum, f.url)
 563
 564                                         elif exc_type == KeyboardInterrupt:
 565                                                 raise r.bozo_exception
 566
 567                                         elif r.bozo:
 568                                                 print >>warn, 'E: error in [%d] "%s" feed (%s)' % (feednum, f.url, r.get("bozo_exception", "can't process"))
 569
 570                                         else:
 571                                                 print >>warn, "=== rss2email encountered a problem with this feed ==="
 572                                                 print >>warn, "=== See the rss2email FAQ at http://www.allthingsrss.com/rss2email/ for assistance ==="
 573                                                 print >>warn, "=== If this occurs repeatedly, send this to lindsey@allthingsrss.com ==="
 574                                                 print >>warn, "E:", r.get("bozo_exception", "can't process"), f.url
 575                                                 print >>warn, r
 576                                                 print >>warn, "rss2email", __version__
 577                                                 print >>warn, "feedparser", feedparser.__version__
 578                                                 print >>warn, "html2text", h2t.__version__
 579                                                 print >>warn, "Python", sys.version
 580                                                 print >>warn, "=== END HERE ==="
 581                                         continue
 582
 583                                 r.entries.reverse()
 584
 585                                 for entry in r.entries:
 586                                         id = getID(entry)
 587
 588                                         # If TRUST_GUID isn't set, we get back hashes of the content.
 589                                         # Instead of letting these run wild, we put them in context
 590                                         # by associating them with the actual ID (if it exists).
 591
 592                                         frameid = entry.get('id', id)
 593
 594                                         # If this item's ID is in our database
 595                                         # then it's already been sent
 596                                         # and we don't need to do anything more.
 597
 598                                         if f.seen.has_key(frameid) and f.seen[frameid] == id: continue
 599
 600                                         if not (f.to or default_to):
 601                                                 print "No default email address defined. Please run 'r2e email emailaddress'"
 602                                                 print "Ignoring feed %s" % f.url
 603                                                 break
 604
 605                                         if 'title_detail' in entry and entry.title_detail:
 606                                                 title = entry.title_detail.value
 607                                                 if contains(entry.title_detail.type, 'html'):
 608                                                         title = html2text(title)
 609                                         else:
 610                                                 title = getContent(entry)[:70]
 611
 612                                         title = title.replace("\n", " ").strip()
 613
 614                                         datetime = time.gmtime()
 615
 616                                         if DATE_HEADER:
 617                                                 for datetype in DATE_HEADER_ORDER:
 618                                                         kind = datetype+"_parsed"
 619                                                         if kind in entry and entry[kind]: datetime = entry[kind]
 620
 621                                         link = entry.get('link', "")
 622
 623                                         from_addr = getEmail(r.feed, entry)
 624
 625                                         name = getName(r, entry)
 626                                         fromhdr = '"'+ name + '" <' + from_addr + ">"
 627                                         tohdr = (f.to or default_to)
 628                                         subjecthdr = title
 629                                         datehdr = time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
 630                                         useragenthdr = "rss2email"
 631                                         extraheaders = {'Date': datehdr, 'User-Agent': useragenthdr, 'X-RSS-Feed': f.url, 'X-RSS-ID': id}
 632                                         if BONUS_HEADER != '':
 633                                                 for hdr in BONUS_HEADER.strip().splitlines():
 634                                                         pos = hdr.strip().find(':')
 635                                                         if pos > 0:
 636                                                                 extraheaders[hdr[:pos]] = hdr[pos+1:].strip()
 637                                                         else:
 638                                                                 print >>warn, "W: malformed BONUS HEADER", BONUS_HEADER
 639
 640                                         entrycontent = getContent(entry, HTMLOK=HTML_MAIL)
 641                                         contenttype = 'plain'
 642                                         content = ''
 643                                         if USE_CSS_STYLING and HTML_MAIL:
 644                                                 contenttype = 'html'
 645                                                 content = "<html>\n"
 646                                                 content += '<head><style><!--' + STYLE_SHEET + '//--></style></head>\n'
 647                                                 content += '<body>\n'
 648                                                 content += '<div id="entry">\n'
 649                                                 content += '<h1'
 650                                                 content += ' class="header"'
 651                                                 content += '><a href="'+link+'">'+subjecthdr+'</a></h1>\n\n'
 652                                                 if ishtml(entrycontent):
 653                                                         body = entrycontent[1].strip()
 654                                                 else:
 655                                                         body = entrycontent.strip()
 656                                                 if body != '':
 657                                                         content += '<div id="body"><table><tr><td>\n' + body + '</td></tr></table></div>\n'
 658                                                 content += '\n<p class="footer">URL: <a href="'+link+'">'+link+'</a>'
 659                                                 if hasattr(entry,'enclosures'):
 660                                                         for enclosure in entry.enclosures:
 661                                                                 if (hasattr(enclosure, 'url') and enclosure.url != ""):
 662                                                                         content += ('<br/>Enclosure: <a href="'+enclosure.url+'">'+enclosure.url+"</a>\n")
 663                                                                 if (hasattr(enclosure, 'src') and enclosure.src != ""):
 664                                                                         content += ('<br/>Enclosure: <a href="'+enclosure.src+'">'+enclosure.src+'</a><br/><img src="'+enclosure.src+'"\n')
 665                                                 content += '</p></div>\n'
 666                                                 content += "\n\n</body></html>"
 667                                         else:
 668                                                 if ishtml(entrycontent):
 669                                                         contenttype = 'html'
 670                                                         content = "<html>\n"
 671                                                         content = ("<html><body>\n\n" +
 672                                                                    '<h1><a href="'+link+'">'+subjecthdr+'</a></h1>\n\n' +
 673                                                                    entrycontent[1].strip() + # drop type tag (HACK: bad abstraction)
 674                                                                    '<p>URL: <a href="'+link+'">'+link+'</a></p>' )
 675
 676                                                         if hasattr(entry,'enclosures'):
 677                                                                 for enclosure in entry.enclosures:
 678                                                                         if enclosure.url != "":
 679                                                                                 content += ('Enclosure: <a href="'+enclosure.url+'">'+enclosure.url+"</a><br/>\n")
 680
 681                                                         content += ("\n</body></html>")
 682                                                 else:
 683                                                         content = entrycontent.strip() + "\n\nURL: "+link
 684                                                         if hasattr(entry,'enclosures'):
 685                                                                 for enclosure in entry.enclosures:
 686                                                                         if enclosure.url != "":
 687                                                                                 content += ('\nEnclosure: ' + enclosure.url + "\n")
 688
 689                                         smtpserver = send(fromhdr, tohdr, subjecthdr, content, contenttype, extraheaders, smtpserver)
 690
 691                                         f.seen[frameid] = id
 692
 693                                 f.etag, f.modified = r.get('etag', None), r.get('modified', None)
 694                         except (KeyboardInterrupt, SystemExit):
 695                                 raise
 696                         except:
 697                                 print >>warn, "=== rss2email encountered a problem with this feed ==="
 698                                 print >>warn, "=== See the rss2email FAQ at http://www.allthingsrss.com/rss2email/ for assistance ==="
 699                                 print >>warn, "=== If this occurs repeatedly, send this to lindsey@allthingsrss.com ==="
 700                                 print >>warn, "E: could not parse", f.url
 701                                 traceback.print_exc(file=warn)
 702                                 print >>warn, "rss2email", __version__
 703                                 print >>warn, "feedparser", feedparser.__version__
 704                                 print >>warn, "html2text", h2t.__version__
 705                                 print >>warn, "Python", sys.version
 706                                 print >>warn, "=== END HERE ==="
 707                                 continue
 708
 709         finally:
 710                 unlock(feeds, feedfileObject)
 711                 if smtpserver:
 712                         smtpserver.quit()
 713
 714 def list():
 715         feeds, feedfileObject = load(lock=0)
 716         default_to = ""
 717
 718         if feeds and isstr(feeds[0]):
 719                 default_to = feeds[0]; ifeeds = feeds[1:]; i=1
 720                 print "default email:", default_to
 721         else: ifeeds = feeds; i = 0
 722         for f in ifeeds:
 723                 print `i`+':', f.url, '('+(f.to or ('default: '+default_to))+')'
 724                 if not (f.to or default_to):
 725                         print "   W: Please define a default address with 'r2e email emailaddress'"
 726                 i+= 1
 727
 728 def delete(n):
 729         feeds, feedfileObject = load()
 730         if (n == 0) and (feeds and isstr(feeds[0])):
 731                 print >>warn, "W: ID has to be equal to or higher than 1"
 732         elif n >= len(feeds):
 733                 print >>warn, "W: no such feed"
 734         else:
 735                 print >>warn, "W: deleting feed %s" % feeds[n].url
 736                 feeds = feeds[:n] + feeds[n+1:]
 737                 if n != len(feeds):
 738                         print >>warn, "W: feed IDs have changed, list before deleting again"
 739         unlock(feeds, feedfileObject)
 740
 741 def reset():
 742         feeds, feedfileObject = load()
 743         if feeds and isstr(feeds[0]):
 744                 ifeeds = feeds[1:]
 745         else: ifeeds = feeds
 746         for f in ifeeds:
 747                 if VERBOSE: print "Resetting %d already seen items" % len(f.seen)
 748                 f.seen = {}
 749                 f.etag = None
 750                 f.modified = None
 751
 752         unlock(feeds, feedfileObject)
 753
 754 def email(addr):
 755         feeds, feedfileObject = load()
 756         if feeds and isstr(feeds[0]): feeds[0] = addr
 757         else: feeds = [addr] + feeds
 758         unlock(feeds, feedfileObject)
 759
 760 if __name__ == '__main__':
 761         args = sys.argv
 762         try:
 763                 if len(args) < 3: raise InputError, "insufficient args"
 764                 feedfile, action, args = args[1], args[2], args[3:]
 765
 766                 if action == "run":
 767                         if args and args[0] == "--no-send":
 768                                 def send(sender, recipient, subject, body, contenttype, extraheaders=None, smtpserver=None):
 769                                         if VERBOSE: print 'Not sending:', unu(subject)
 770
 771                         if args and args[-1].isdigit(): run(int(args[-1]))
 772                         else: run()
 773
 774                 elif action == "email":
 775                         if not args:
 776                                 raise InputError, "Action '%s' requires an argument" % action
 777                         else:
 778                                 email(args[0])
 779
 780                 elif action == "add": add(*args)
 781
 782                 elif action == "new":
 783                         if len(args) == 1: d = [args[0]]
 784                         else: d = []
 785                         pickle.dump(d, open(feedfile, 'w'))
 786
 787                 elif action == "list": list()
 788
 789                 elif action in ("help", "--help", "-h"): print __doc__
 790
 791                 elif action == "delete":
 792                         if not args:
 793                                 raise InputError, "Action '%s' requires an argument" % action
 794                         elif args[0].isdigit():
 795                                 delete(int(args[0]))
 796                         else:
 797                                 raise InputError, "Action '%s' requires a number as its argument" % action
 798
 799                 elif action == "reset": reset()
 800
 801                 else:
 802                         raise InputError, "Invalid action"
 803
 804         except InputError, e:
 805                 print "E:", e
 806                 print
 807                 print __doc__
 808