From: Aaron Swartz <me@aaronsw.com>
Date: Wed, 28 Jan 2004 12:00:00 +0000 (+0000)
Subject: Add rss2email v2.0
X-Git-Tag: v2.0
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=edc2ad5bf45ea8211eb277872515f252b30b8e44;p=rss2email.git

Add rss2email v2.0

Mostly rewritten.

Downloaded from:

  http://web.archive.org/web/20040416222743/http://www.aaronsw.com/2002/rss2email/rss2email-2.0.py
---

edc2ad5bf45ea8211eb277872515f252b30b8e44
diff --git a/rss2email.py b/rss2email.py
new file mode 100644
index 0000000..5602285
--- /dev/null
+++ b/rss2email.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+"""rss2email: get RSS feeds emailed to you
+http://www.aaronsw.com/2002/rss2email
+
+Usage: python rss2email.py feedfile action [options]
+  feedfile: name of the file to store feed info in
+  action [options]:
+	new (create new feedfile)
+	run
+	add url name from to
+	list
+	delete num
+"""
+__version__ = "2.0"
+__author__ = "Aaron Swartz (me@aaronsw.com)"
+__copyright__ = "(C) 2004 Aaron Swartz. GNU GPL 2."
+___contributors__ = ["Dean Jackson (dino@grorg.org)", 
+					 "Brian Lalor (blalor@ithacabands.org)"]
+
+### Vaguely Customizable Options ###
+
+# The email address messages are from by default:
+DEFAULT_FROM = "bozo@dev.null"
+
+# 1: Receive one email per post
+# 0: Receive an email every time a post changes
+TRUST_GUID = 1
+
+# 1: Treat the contents of <description> as HTML
+# 0: Send the contents of <description> as is, without conversion
+TREAT_DESCRIPTION_AS_HTML = 1
+
+# def send(fr, to, message):
+# 	import smtplib
+# 	s = smtplib.SMTP("vorpal.notabug.com:26")
+# 	s.sendmail(fr, to, message)
+
+def send(fr, to, message):
+	os.popen2(["/usr/sbin/sendmail", to])[0].write(message)
+
+### End of Options ###
+
+from html2text import html2text, expandEntities
+import feedparser
+import cPickle as pickle, fcntl, md5, time, os
+
+def e(obj, val):
+	x = expandEntities(obj[val])
+	if type(x) is unicode: x = x.encode('utf-8')
+	return x.strip()
+
+def getContent(item, url):
+	if item.has_key('content') and item['content']:
+		for c in item['content']:
+			if c['type'] == 'text/plain': return c['value']
+
+		for c in item['content']:
+			if c['type'].find('html') != -1:
+				return html2text(c['value'], c['base'])
+		
+		return item['content'][0]['value']
+			
+	if item.has_key('description'): 
+		if TREAT_DESCRIPTION_AS_HTML:
+			return html2text(item['description'], url)
+		else:
+			return item['description']
+	
+	if item.has_key('summary'): return item['summary']
+	return ""
+
+def getID(item, content):
+	if TRUST_GUID:
+		if item.has_key('id'): return item['id']
+
+	if content: return md5.new(content).hexdigest()
+	if item.has_key('link'): return item['link']
+	if item.has_key('title'): return md5.new(item['title']).hexdigest()
+
+class Feed:
+	def __init__(self, url, to):
+		self.url, self.etag, self.modified, self.seen = url, None, None, {}
+		self.to = to		
+
+def load():
+	ff2 = open(feedfile, 'r')
+	feeds = pickle.load(ff2)
+	fcntl.flock(ff2, fcntl.LOCK_EX)
+	return feeds, ff2
+
+def unlock(feeds, ff2):
+	pickle.dump(feeds, open(feedfile, 'w'))
+	fcntl.flock(ff2, fcntl.LOCK_UN)
+	
+def add(url, to):
+	feeds, ff2 = load()
+	feeds.append(Feed(url, to))
+	unlock(feeds, ff2)
+
+def run():
+	feeds, ff2 = load()	
+	for f in feeds:
+		result = feedparser.parse(f.url, f.etag, f.modified)
+		
+		if result.has_key('encoding'): enc = result['encoding']
+		else: enc = 'utf-8'
+		
+		c, ert = result['channel'], 'errorreportsto'
+		
+		headers = "From: "
+		if c.has_key('title'): headers += e(c, 'title')
+		if c.has_key(ert) and c[ert].startswith('mailto:'):
+			fr = c[ert][8:]
+		else:
+			fr = DEFAULT_FROM
+		
+		headers += '<'+fr+'>'
+		headers += "\nTo: " + f.to
+		headers += "\nContent-type: text/plain; charset=" + enc
+		
+		if not result['items'] and ((not result.has_key('status') or (result.has_key('status') and result['status'] != 304))):
+			print "W: no items; invalid feed? (" + f.url + ")"
+			continue
+	
+		for i in result['items']:
+			content = getContent(i, f.url)
+			id = getID(i, content)
+		
+			if i.has_key('link'): frameid = link = e(i, 'link')
+			else: frameid = id; link = None
+			
+			if f.seen.has_key(frameid) and f.seen[frameid] == id:
+				continue # have seen
+	
+			if i.has_key('title'): title = e(i, 'title')
+			else: title = content[:70].replace("\n", " ")
+				
+			message = (headers
+					   + "\nSubject: " + title
+					   + "\nDate:" + time.strftime("%a, %d %b %Y %H:%M:%S -0000", time.gmtime())
+					   + "\n")
+	
+			if link: message += "\nURL: " + link + "\n"
+			
+			message += "\n" + content
+			
+			send(fr, f.to, message)
+	
+			f.seen[frameid] = id
+			
+		f.etag, f.modified = result.get('etag', None), result.get('modified', None)
+	
+	unlock(feeds, ff2)
+
+def list():
+	feeds, ff2 = load()
+	i = 0
+	for f in feeds:
+		print `i`+':', f.url, '('+f.to+')'
+		i+= 1
+
+	unlock(feeds, ff2)
+
+def delete(n):
+	feeds, ff2 = load()
+	feeds = feeds[:n] + feeds[n+1:]
+	unlock(feeds, ff2)
+
+if __name__ == "__main__":
+	import sys
+	if len(sys.argv) < 3: print __doc__
+	else: 
+		feedfile, action = sys.argv[1], sys.argv[2]
+		
+		if action == "run": run()
+		elif action == "add": add(*sys.argv[3:])
+		elif action == "new": pickle.dump([], open(feedfile, 'w'))
+		elif action == "list": list()
+		elif action == "delete": delete(int(sys.argv[3]))
+		else:
+			print __doc__
+		
\ No newline at end of file