+# -*- coding: utf-8 -*-
# Copyright (C) 2004-2013 Aaron Swartz
# Brian Lalor
# Dean Jackson
+# Dennis Keitzel <github@pinshot.net>
# Erik Hetzner
# Etienne Millon <me@emillon.org>
+# J. Lewis Muir <jlmuir@imca-cat.org>
# Joey Hess
# Lindsey Smith <lindsey.smith@gmail.com>
# Marcel Ackermann
del e # cleanup namespace
_SOCKET_ERRORS = tuple(_SOCKET_ERRORS)
+# drv_libxml2 raises:
+# TypeError: 'str' does not support the buffer interface
+_feedparser.PREFERRED_XML_PARSERS = []
+
class Feed (object):
"""Utility class for feed manipulation and storage.
>>> feed.url
'http://example.com/feed.atom'
- Names can only contain ASCII letters, digits, and '._-'. Here the
+ Names can only contain letters, digits, and '._-'. Here the
invalid space causes an exception:
>>> Feed(name='invalid name')
...
rss2email.error.InvalidFeedName: invalid feed name 'invalid name'
+ However, you aren't restricted to ASCII letters:
+
+ >>> Feed(name='Αθήνα')
+ <Feed Αθήνα (None -> )>
+
You must define a URL:
>>> Feed(name='feed-without-a-url', to='a@b.com').run(send=False)
>>> CONFIG['DEFAULT']['to'] = ''
>>> test_section = CONFIG.pop('feed.test-feed')
+
"""
- _name_regexp = _re.compile('^[a-zA-Z0-9._-]+$')
+ _name_regexp = _re.compile('^[\w\d.-]+$')
# saved/loaded from feed.dat using __getstate__/__setstate__.
_dynamic_attributes = [
elif isinstance(exc, _sax.SAXParseException):
_LOG.error('sax parsing error: {}: {}'.format(exc, self))
warned = True
+ elif (parsed.bozo and
+ isinstance(exc, _feedparser.CharacterEncodingOverride)):
+ _LOG.warning(
+ 'incorrectly declared encoding: {}: {}'.format(exc, self))
+ warned = True
elif parsed.bozo or exc:
if exc is None:
exc = "can't process"
not version):
raise _error.ProcessingError(parsed=parsed, feed=feed)
- def _html2text(self, html, baseurl=''):
+ def _html2text(self, html, baseurl='', default=None):
self.config.setup_html2text(section=self.section)
- return _html2text.html2text(html=html, baseurl=baseurl)
+ try:
+ return _html2text.html2text(html=html, baseurl=baseurl)
+ except _html_parser.HTMLParseError as e:
+ if default is not None:
+ return default
+ raise
def _process_entry(self, parsed, entry):
id_ = self._get_entry_id(entry)
if hasattr(entry, 'title_detail') and entry.title_detail:
title = entry.title_detail.value
if 'html' in entry.title_detail.type:
- title = self._html2text(title)
+ title = self._html2text(title, default=title)
else:
content = self._get_entry_content(entry)
value = content['value']
if content['type'] in ('text/html', 'application/xhtml+xml'):
- value = self._html2text(value)
+ value = self._html2text(value, default=value)
title = value[:70]
title = title.replace('\n', ' ').strip()
return title
"""
if not self.name_format:
return ''
- data = {'feed': self}
+ data = {
+ 'feed': self,
+ 'feed-title': '<feed title>',
+ 'author': '<author>',
+ 'publisher': '<publisher>',
+ }
feed = parsed.feed
data['feed-title'] = feed.get('title', '')
for x in [entry, feed]: