From a55809611f9706e7004791c060bd7e5a90a2dcb9 Mon Sep 17 00:00:00 2001 From: "U-SEVEN\\lindsey" Date: Fri, 24 Jun 2011 11:07:22 -0700 Subject: [PATCH] Better attribute handling. Factored out tag handling into getTags() --- rss2email.py | 50 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/rss2email.py b/rss2email.py index 3299cd5..995750b 100644 --- a/rss2email.py +++ b/rss2email.py @@ -24,6 +24,7 @@ ___contributors__ = ["Dean Jackson", "Brian Lalor", "Joey Hess", "Lindsey Smith (maintainer)", "Erik Hetzner", "Aaron Swartz (original author)" ] import urllib2 +import BeautifulSoup urllib2.install_opener(urllib2.build_opener()) ### Vaguely Customizable Options ### @@ -382,7 +383,8 @@ def getContent(entry, HTMLOK=0): if not HTMLOK: # Only need to convert to text if HTML isn't OK for c in conts: if contains(c.type, 'html'): - return html2text(c.value) + cleanerhtml = BeautifulSoup.BeautifulSoup(c.value) + return html2text(unicode(cleanerhtml)) for c in conts: if c.type == 'text/plain': return c.value @@ -392,7 +394,8 @@ def getContent(entry, HTMLOK=0): return "" def getID(entry): - """Get best ID from an entry.""" + """Get best ID from an entry. + NEEDS UNIT TESTS""" if TRUST_GUID: if 'id' in entry and entry.id: # Newer versions of feedparser could return a dictionary @@ -406,17 +409,17 @@ def getID(entry): if 'link' in entry: return entry.link if 'title' in entry: return hash(unu(entry.title)).hexdigest() -def getName(r, entry): +def getName(fullfeed, entry): """Get the best name. NEEDS UNIT TESTS""" if NO_FRIENDLY_NAME: return '' - feed = r.feed - if hasattr(r, "url") and r.url in OVERRIDE_FROM.keys(): - return OVERRIDE_FROM[r.url] + feedinfo = fullfeed.feed + if hasattr(fullfeed, "url") and fullfeed.url in OVERRIDE_FROM.keys(): + return OVERRIDE_FROM[fullfeed.url] - name = feed.get('title', '') + name = feedinfo.get('title', '') if 'name' in entry.get('author_detail', []): # normally {} but py2.1 if entry.author_detail.name: @@ -427,10 +430,10 @@ def getName(r, entry): except UnicodeDecodeError: name += unicode(entry.author_detail.name, 'utf-8') - elif 'name' in feed.get('author_detail', []): - if feed.author_detail.name: + elif 'name' in feedinfo.get('author_detail', []): + if feedinfo.author_detail.name: if name: name += ", " - name += feed.author_detail.name + name += feedinfo.author_detail.name return name @@ -469,6 +472,21 @@ def getEmail(r, entry): return DEFAULT_EMAIL[r.url] return DEFAULT_FROM +def getTags(entry): + """If the entry has any tags, build a tagline and return as a string. Otherwise returns empty string""" + tagline = "" + if 'tags' in entry: + tags = entry.get('tags') + taglist = [] + if tags: + for tag in tags: + if tag.has_key('term'): taglist.append(tag['term']) + if taglist: + tagline = ",".join(taglist) + + return tagline + + ### Simple Database of Feeds ### class Feed: @@ -689,16 +707,8 @@ def run(num=None): useragenthdr = "rss2email" # Add post tags, if available - tagline = "" - if 'tags' in entry: - tags = entry.get('tags') - taglist = [] - if tags: - for tag in tags: - taglist.append(tag['term']) - if taglist: - tagline = ",".join(taglist) - + tagline = getTags(entry) + extraheaders = {'Date': datehdr, 'User-Agent': useragenthdr, 'X-RSS-Feed': f.url, 'X-RSS-ID': id, 'X-RSS-URL': link, 'X-RSS-TAGS' : tagline} if BONUS_HEADER != '': for hdr in BONUS_HEADER.strip().splitlines(): -- 2.26.2