+# -*- coding: utf-8 -*-
# Copyright (C) 2004-2013 Aaron Swartz
# Brian Lalor
# Dean Jackson
+# Dennis Keitzel <github@pinshot.net>
# Erik Hetzner
# Etienne Millon <me@emillon.org>
+# J. Lewis Muir <jlmuir@imca-cat.org>
# Joey Hess
# Lindsey Smith <lindsey.smith@gmail.com>
# Marcel Ackermann
"""
import collections as _collections
+from email.mime.message import MIMEMessage as _MIMEMessage
+from email.mime.multipart import MIMEMultipart as _MIMEMultipart
from email.utils import formataddr as _formataddr
import hashlib as _hashlib
import html.parser as _html_parser
from . import util as _util
-_feedparser.USER_AGENT = 'rss2email/{} +{}'.format(__version__, __url__)
+_USER_AGENT = 'rss2email/{} +{}'.format(__version__, __url__)
+_feedparser.USER_AGENT = _USER_AGENT
_urllib_request.install_opener(_urllib_request.build_opener())
_SOCKET_ERRORS = []
for e in ['error', 'herror', 'gaierror']:
del e # cleanup namespace
_SOCKET_ERRORS = tuple(_SOCKET_ERRORS)
+# drv_libxml2 raises:
+# TypeError: 'str' does not support the buffer interface
+_feedparser.PREFERRED_XML_PARSERS = []
+
class Feed (object):
"""Utility class for feed manipulation and storage.
>>> feed.url
'http://example.com/feed.atom'
- Names can only contain ASCII letters, digits, and '._-'. Here the
+ Names can only contain letters, digits, and '._-'. Here the
invalid space causes an exception:
>>> Feed(name='invalid name')
...
rss2email.error.InvalidFeedName: invalid feed name 'invalid name'
+ However, you aren't restricted to ASCII letters:
+
+ >>> Feed(name='Αθήνα')
+ <Feed Αθήνα (None -> )>
+
You must define a URL:
>>> Feed(name='feed-without-a-url', to='a@b.com').run(send=False)
>>> CONFIG['DEFAULT']['to'] = ''
>>> test_section = CONFIG.pop('feed.test-feed')
+
"""
- _name_regexp = _re.compile('^[a-zA-Z0-9._-]+$')
+ _name_regexp = _re.compile('^[\w\d.-]+$')
# saved/loaded from feed.dat using __getstate__/__setstate__.
_dynamic_attributes = [
# hints for value conversion
_boolean_attributes = [
+ 'digest',
'force_from',
'use_publisher_email',
- 'friendly_name',
'active',
'date_header',
'trust_guid',
_function_attributes = [
'post_process',
+ 'digest_post_process',
]
def __init__(self, name=None, url=None, to=None, config=None):
elif isinstance(exc, _sax.SAXParseException):
_LOG.error('sax parsing error: {}: {}'.format(exc, self))
warned = True
+ elif (parsed.bozo and
+ isinstance(exc, _feedparser.CharacterEncodingOverride)):
+ _LOG.warning(
+ 'incorrectly declared encoding: {}: {}'.format(exc, self))
+ warned = True
elif parsed.bozo or exc:
if exc is None:
exc = "can't process"
not version):
raise _error.ProcessingError(parsed=parsed, feed=feed)
- def _html2text(self, html, baseurl=''):
+ def _html2text(self, html, baseurl='', default=None):
self.config.setup_html2text(section=self.section)
- return _html2text.html2text(html=html, baseurl=baseurl)
+ try:
+ return _html2text.html2text(html=html, baseurl=baseurl)
+ except _html_parser.HTMLParseError as e:
+ if default is not None:
+ return default
+ raise
def _process_entry(self, parsed, entry):
id_ = self._get_entry_id(entry)
extra_headers = _collections.OrderedDict((
('Date', self._get_entry_date(entry)),
('Message-ID', '<{}@dev.null.invalid>'.format(_uuid.uuid4())),
- ('User-Agent', 'rss2email'),
+ ('User-Agent', _USER_AGENT),
('X-RSS-Feed', self.url),
('X-RSS-ID', id_),
('X-RSS-URL', self._get_entry_link(entry)),
if hasattr(entry, 'title_detail') and entry.title_detail:
title = entry.title_detail.value
if 'html' in entry.title_detail.type:
- title = self._html2text(title)
+ title = self._html2text(title, default=title)
else:
content = self._get_entry_content(entry)
value = content['value']
if content['type'] in ('text/html', 'application/xhtml+xml'):
- value = self._html2text(value)
+ value = self._html2text(value, default=value)
title = value[:70]
title = title.replace('\n', ' ').strip()
return title
... '</feed>\\n'
... )
>>> entry = parsed.entries[0]
- >>> f.friendly_name = False
+ >>> f.name_format = ''
>>> f._get_entry_name(parsed, entry)
''
- >>> f.friendly_name = True
+ >>> f.name_format = '{author}'
>>> f._get_entry_name(parsed, entry)
'Example author'
+ >>> f.name_format = '{feed-title}: {author}'
+ >>> f._get_entry_name(parsed, entry)
+ ': Example author'
+ >>> f.name_format = '{author} ({feed.name})'
+ >>> f._get_entry_name(parsed, entry)
+ 'Example author (test-feed)'
"""
- if not self.friendly_name:
+ if not self.name_format:
return ''
- parts = ['']
+ data = {
+ 'feed': self,
+ 'feed-title': '<feed title>',
+ 'author': '<author>',
+ 'publisher': '<publisher>',
+ }
feed = parsed.feed
- parts.append(feed.get('title', ''))
+ data['feed-title'] = feed.get('title', '')
for x in [entry, feed]:
if 'name' in x.get('author_detail', []):
if x.author_detail.name:
- if ''.join(parts):
- parts.append(': ')
- parts.append(x.author_detail.name)
+ data['author'] = x.author_detail.name
break
- if not ''.join(parts) and self.use_publisher_email:
- if 'name' in feed.get('publisher_detail', []):
- if ''.join(parts):
- parts.append(': ')
- parts.append(feed.publisher_detail.name)
- return _html2text.unescape(''.join(parts))
+ if 'name' in feed.get('publisher_detail', []):
+ data['publisher'] = feed.publisher_detail.name
+ name = self.name_format.format(**data)
+ return _html2text.unescape(name)
def _validate_email(self, email, default=None):
"""Do a basic quality check on email address
lines.extend([
'</head>',
'<body>',
- '<div id="entry>',
+ '<div id="entry">',
'<h1 class="header"><a href="{}">{}</a></h1>'.format(
link, subject),
'<div id="body">',
if not self.to:
raise _error.NoToEmailAddress(feed=self)
parsed = self._fetch()
+
+ if self.digest:
+ digest = self._new_digest()
+ seen = []
+
for (guid, id_, sender, message) in self._process(parsed):
_LOG.debug('new message: {}'.format(message['Subject']))
- if send:
- self._send(sender=sender, message=message)
+ if self.digest:
+ seen.append((guid, id_))
+ self._append_to_digest(digest=digest, message=message)
+ else:
+ if send:
+ self._send(sender=sender, message=message)
+ if guid not in self.seen:
+ self.seen[guid] = {}
+ self.seen[guid]['id'] = id_
+
+ if self.digest and seen:
+ if self.digest_post_process:
+ digest = self.digest_post_process(
+ feed=self, parsed=parsed, seen=seen, message=digest)
+ if not digest:
+ return
+ self._send_digest(
+ digest=digest, seen=seen, sender=sender, send=send)
+
+ self.etag = parsed.get('etag', None)
+ self.modified = parsed.get('modified', None)
+
+ def _new_digest(self):
+ digest = _MIMEMultipart('digest')
+ digest['To'] = self.to # TODO: _Header(), _formataddr((recipient_name, recipient_addr))
+ digest['Subject'] = 'digest for {}'.format(self.name)
+ digest['Message-ID'] = '<{}@dev.null.invalid>'.format(_uuid.uuid4())
+ digest['User-Agent'] = _USER_AGENT
+ digest['X-RSS-Feed'] = self.url
+ return digest
+
+ def _append_to_digest(self, digest, message):
+ part = _MIMEMessage(message)
+ part.add_header('Content-Disposition', 'attachment')
+ digest.attach(part)
+
+ def _send_digest(self, digest, seen, sender, send=True):
+ """Send a digest message
+
+ The date is extracted from the last message in the digest
+ payload. We assume that this part exists. If you don't have
+ any messages in the digest, don't call this function.
+ """
+ digest['From'] = sender # TODO: _Header(), _formataddr()...
+ last_part = digest.get_payload()[-1]
+ last_message = last_part.get_payload()[0]
+ digest['Date'] = last_message['Date']
+
+ _LOG.debug('new digest for {}'.format(self))
+ if send:
+ self._send(sender=sender, message=digest)
+ for (guid, id_) in seen:
if guid not in self.seen:
self.seen[guid] = {}
self.seen[guid]['id'] = id_
- self.etag = parsed.get('etag', None)
- self.modified = parsed.get('modified', None)