From f1284d7eefafbdecd85c721eaf4fa16c03ffd30e Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 28 Sep 2013 09:51:03 -0700 Subject: [PATCH] feed: Add 'default' argument to Feed._html2text for HTMLParseError This allows us to easily fall back on an unconverted string in the event that the input HTML is malformed. We already caught HTMLParseError when converting HTML to plain test for non-html mail, but we didn't catch it in Feed._get_entry_title. Now we gracefully handle the situation by treating the malformed HTML as plain text. --- rss2email/feed.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/rss2email/feed.py b/rss2email/feed.py index 3999b0c..16191e2 100644 --- a/rss2email/feed.py +++ b/rss2email/feed.py @@ -416,9 +416,14 @@ class Feed (object): not version): raise _error.ProcessingError(parsed=parsed, feed=feed) - def _html2text(self, html, baseurl=''): + def _html2text(self, html, baseurl='', default=None): self.config.setup_html2text(section=self.section) - return _html2text.html2text(html=html, baseurl=baseurl) + try: + return _html2text.html2text(html=html, baseurl=baseurl) + except _html_parser.HTMLParseError as e: + if default is not None: + return default + raise def _process_entry(self, parsed, entry): id_ = self._get_entry_id(entry) @@ -501,12 +506,12 @@ class Feed (object): if hasattr(entry, 'title_detail') and entry.title_detail: title = entry.title_detail.value if 'html' in entry.title_detail.type: - title = self._html2text(title) + title = self._html2text(title, default=title) else: content = self._get_entry_content(entry) value = content['value'] if content['type'] in ('text/html', 'application/xhtml+xml'): - value = self._html2text(value) + value = self._html2text(value, default=value) title = value[:70] title = title.replace('\n', ' ').strip() return title -- 2.26.2