"Lindsey Smith (maintainer)", "Erik Hetzner", "Aaron Swartz (original author)" ]
import urllib2
+import BeautifulSoup
urllib2.install_opener(urllib2.build_opener())
### Vaguely Customizable Options ###
if not HTMLOK: # Only need to convert to text if HTML isn't OK
for c in conts:
if contains(c.type, 'html'):
- return html2text(c.value)
+ cleanerhtml = BeautifulSoup.BeautifulSoup(c.value)
+ return html2text(unicode(cleanerhtml))
for c in conts:
if c.type == 'text/plain': return c.value
return ""
def getID(entry):
- """Get best ID from an entry."""
+ """Get best ID from an entry.
+ NEEDS UNIT TESTS"""
if TRUST_GUID:
if 'id' in entry and entry.id:
# Newer versions of feedparser could return a dictionary
if 'link' in entry: return entry.link
if 'title' in entry: return hash(unu(entry.title)).hexdigest()
-def getName(r, entry):
+def getName(fullfeed, entry):
"""Get the best name.
NEEDS UNIT TESTS"""
if NO_FRIENDLY_NAME: return ''
- feed = r.feed
- if hasattr(r, "url") and r.url in OVERRIDE_FROM.keys():
- return OVERRIDE_FROM[r.url]
+ feedinfo = fullfeed.feed
+ if hasattr(fullfeed, "url") and fullfeed.url in OVERRIDE_FROM.keys():
+ return OVERRIDE_FROM[fullfeed.url]
- name = feed.get('title', '')
+ name = feedinfo.get('title', '')
if 'name' in entry.get('author_detail', []): # normally {} but py2.1
if entry.author_detail.name:
except UnicodeDecodeError:
name += unicode(entry.author_detail.name, 'utf-8')
- elif 'name' in feed.get('author_detail', []):
- if feed.author_detail.name:
+ elif 'name' in feedinfo.get('author_detail', []):
+ if feedinfo.author_detail.name:
if name: name += ", "
- name += feed.author_detail.name
+ name += feedinfo.author_detail.name
return name
return DEFAULT_EMAIL[r.url]
return DEFAULT_FROM
+def getTags(entry):
+ """If the entry has any tags, build a tagline and return as a string. Otherwise returns empty string"""
+ tagline = ""
+ if 'tags' in entry:
+ tags = entry.get('tags')
+ taglist = []
+ if tags:
+ for tag in tags:
+ if tag.has_key('term'): taglist.append(tag['term'])
+ if taglist:
+ tagline = ",".join(taglist)
+
+ return tagline
+
+
### Simple Database of Feeds ###
class Feed:
useragenthdr = "rss2email"
# Add post tags, if available
- tagline = ""
- if 'tags' in entry:
- tags = entry.get('tags')
- taglist = []
- if tags:
- for tag in tags:
- taglist.append(tag['term'])
- if taglist:
- tagline = ",".join(taglist)
-
+ tagline = getTags(entry)
+
extraheaders = {'Date': datehdr, 'User-Agent': useragenthdr, 'X-RSS-Feed': f.url, 'X-RSS-ID': id, 'X-RSS-URL': link, 'X-RSS-TAGS' : tagline}
if BONUS_HEADER != '':
for hdr in BONUS_HEADER.strip().splitlines():