From 3f9adb574906b421488c258f31b2e79cae65b1cf Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sat, 13 Apr 2013 18:05:03 -0400 Subject: [PATCH] feed: Add the digest setting for multi-entry email For high-volume feeds, some users want to receive a single email per Feed.run() instead of a separate email for each new entry in the feed. If you enable the new digest setting, the per-entry messages are packed into a single multipart/digest message instead of being mailed individually. The MIME details for digests are spelled out in RFC 2046 [1]. Peripheral changes: * Added rss2email.feed._USER_AGENT, to get version information into the User-Agent message headers and to avoid repeating myself. * Normalize multipart MIME boundaries for easier testing of multipart/digest messages. [1]: http://tools.ietf.org/html/rfc2046#section-5.1.5 Signed-off-by: W. Trevor King --- rss2email/config.py | 3 + rss2email/feed.py | 64 ++++++++++++-- test/gmane/2.config | 4 + test/gmane/2.expected | 191 ++++++++++++++++++++++++++++++++++++++++++ test/test.py | 14 +++- 5 files changed, 269 insertions(+), 7 deletions(-) create mode 100644 test/gmane/2.config create mode 100644 test/gmane/2.expected diff --git a/rss2email/config.py b/rss2email/config.py index 4e0dbc9..1a2faa0 100644 --- a/rss2email/config.py +++ b/rss2email/config.py @@ -81,6 +81,9 @@ CONFIG['DEFAULT'] = _collections.OrderedDict(( # True: Fetch, process, and email feeds. # False: Don't fetch, process, or email feeds ('active', str(True)), + # True: Send a single, multi-entry email per feed per rss2email run. + # False: Send a single email per entry. + ('digest', str(False)), # True: Generate Date header based on item's date, when possible. # False: Generate Date header based on time sent. ('date-header', str(False)), diff --git a/rss2email/feed.py b/rss2email/feed.py index 0e67d31..b2497d7 100644 --- a/rss2email/feed.py +++ b/rss2email/feed.py @@ -28,6 +28,8 @@ """ import collections as _collections +from email.mime.message import MIMEMessage as _MIMEMessage +from email.mime.multipart import MIMEMultipart as _MIMEMultipart from email.utils import formataddr as _formataddr import hashlib as _hashlib import html.parser as _html_parser @@ -52,7 +54,8 @@ from . import error as _error from . import util as _util -_feedparser.USER_AGENT = 'rss2email/{} +{}'.format(__version__, __url__) +_USER_AGENT = 'rss2email/{} +{}'.format(__version__, __url__) +_feedparser.USER_AGENT = _USER_AGENT _urllib_request.install_opener(_urllib_request.build_opener()) _SOCKET_ERRORS = [] for e in ['error', 'herror', 'gaierror']: @@ -158,6 +161,7 @@ class Feed (object): # hints for value conversion _boolean_attributes = [ + 'digest', 'force_from', 'use_publisher_email', 'friendly_name', @@ -428,7 +432,7 @@ class Feed (object): extra_headers = _collections.OrderedDict(( ('Date', self._get_entry_date(entry)), ('Message-ID', '<{}@dev.null.invalid>'.format(_uuid.uuid4())), - ('User-Agent', 'rss2email'), + ('User-Agent', _USER_AGENT), ('X-RSS-Feed', self.url), ('X-RSS-ID', id_), ('X-RSS-URL', self._get_entry_link(entry)), @@ -788,12 +792,60 @@ class Feed (object): if not self.to: raise _error.NoToEmailAddress(feed=self) parsed = self._fetch() + + if self.digest: + digest = self._new_digest() + seen = [] + for (guid, id_, sender, message) in self._process(parsed): _LOG.debug('new message: {}'.format(message['Subject'])) - if send: - self._send(sender=sender, message=message) + if self.digest: + seen.append((guid, id_)) + self._append_to_digest(digest=digest, message=message) + else: + if send: + self._send(sender=sender, message=message) + if guid not in self.seen: + self.seen[guid] = {} + self.seen[guid]['id'] = id_ + + if self.digest and seen: + self._send_digest( + digest=digest, seen=seen, sender=sender, send=send) + + self.etag = parsed.get('etag', None) + self.modified = parsed.get('modified', None) + + def _new_digest(self): + digest = _MIMEMultipart('digest') + digest['To'] = self.to # TODO: _Header(), _formataddr((recipient_name, recipient_addr)) + digest['Subject'] = 'digest for {}'.format(self.name) + digest['Message-ID'] = '<{}@dev.null.invalid>'.format(_uuid.uuid4()) + digest['User-Agent'] = _USER_AGENT + digest['X-RSS-Feed'] = self.url + return digest + + def _append_to_digest(self, digest, message): + part = _MIMEMessage(message) + part.add_header('Content-Disposition', 'attachment') + digest.attach(part) + + def _send_digest(self, digest, seen, sender, send=True): + """Send a digest message + + The date is extracted from the last message in the digest + payload. We assume that this part exists. If you don't have + any messages in the digest, don't call this function. + """ + digest['From'] = sender # TODO: _Header(), _formataddr()... + last_part = digest.get_payload()[-1] + last_message = last_part.get_payload()[0] + digest['Date'] = last_message['Date'] + + _LOG.debug('new digest for {}'.format(self)) + if send: + self._send(sender=sender, message=digest) + for (guid, id_) in seen: if guid not in self.seen: self.seen[guid] = {} self.seen[guid]['id'] = id_ - self.etag = parsed.get('etag', None) - self.modified = parsed.get('modified', None) diff --git a/test/gmane/2.config b/test/gmane/2.config new file mode 100644 index 0000000..e4abd7a --- /dev/null +++ b/test/gmane/2.config @@ -0,0 +1,4 @@ +[DEFAULT] +to = a@b.com +date-header = True +digest = True diff --git a/test/gmane/2.expected b/test/gmane/2.expected new file mode 100644 index 0000000..28ed04f --- /dev/null +++ b/test/gmane/2.expected @@ -0,0 +1,191 @@ +SENT BY: "gmane.mail.rss2email: W. Trevor King" +Content-Type: multipart/digest; boundary="===============...==" +MIME-Version: 1.0 +To: a@b.com +Subject: digest for test +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +From: "gmane.mail.rss2email: W. Trevor King" +Date: Tue, 13 Nov 2012 14:36:22 -0000 + +--===============...== +Content-Type: message/rfc822 +MIME-Version: 1.0 +Content-Disposition: attachment + +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: "gmane.mail.rss2email: W. Trevor King" +To: a@b.com +Subject: Re: new maintainer and mailing list for rss2email +Date: Mon, 12 Nov 2012 21:20:22 -0000 +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +X-RSS-ID: http://permalink.gmane.org/gmane.mail.rss2email/1 +X-RSS-URL: http://permalink.gmane.org/gmane.mail.rss2email/1 + +Alrighty, this is the first email on the list and also my first + + production mlmmj list, so I've CCed you both directly. Etienne, let + me know if you get the direct email but not the list email, in which + case I'll try and figure out what I've miss-configured ;). Lindsey, + I'll direct future rss2email stuff to my new list, so subscribe if + you're interested. + + On Mon, Nov 12, 2012 at 06:17:50PM +0100, Etienne Millon wrote: + + Wonderful. Let me know if you come up with anything during a + test-drive, and I'll get it in before the 3.0 release. + + + The 2.x config format is pure Python, which means the users can do + whatever they want there (including monkey-patching urllib2, changing + the rss2email version number, etc.). It's hard to imagine a robust + way to migrate everything a user may have done in there. + + + If you want to take a stab at it, I'll be happy to add it to a contrib + directory :). + + + Great :). + + On Mon, Nov 12, 2012 at 01:48:13PM -0500, W. Trevor King wrote: + + Done: https://github.com/wking/rss2email + + + + + +URL: http://permalink.gmane.org/gmane.mail.rss2email/1 +--===============...== +Content-Type: message/rfc822 +MIME-Version: 1.0 +Content-Disposition: attachment + +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: "gmane.mail.rss2email: Etienne Millon" +To: a@b.com +Subject: Re: new maintainer and mailing list for rss2email +Date: Tue, 13 Nov 2012 10:48:07 -0000 +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +X-RSS-ID: http://permalink.gmane.org/gmane.mail.rss2email/2 +X-RSS-URL: http://permalink.gmane.org/gmane.mail.rss2email/2 + +* W. Trevor King public.gmane.org> [121112 23:18]: + + It seems to work, though it may have been grouped together with my + MDA. I'll tell you if I don't receive a mail where I'm not CCed. + + + We're finalizing a release ATM, so it will be the perfect time to try + a new rss2email release in a couple of months. + + + The idea is more to migrate the low hanging fruits (maybe 95% of + users) so that they don't lose their config. I was thinking to just + eval() the config file and output the relevant variables to the new + format. We'll see how it turns out :) + + + Do you prefer taking pull requests there or as a discussion on the + mailing list (git send-email style) ? + + + + + +URL: http://permalink.gmane.org/gmane.mail.rss2email/2 +--===============...== +Content-Type: message/rfc822 +MIME-Version: 1.0 +Content-Disposition: attachment + +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: "gmane.mail.rss2email: W. Trevor King" +To: a@b.com +Subject: Re: new maintainer and mailing list for rss2email +Date: Tue, 13 Nov 2012 12:20:20 -0000 +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +X-RSS-ID: http://permalink.gmane.org/gmane.mail.rss2email/3 +X-RSS-URL: http://permalink.gmane.org/gmane.mail.rss2email/3 + + + send-email style, although I'll accept anything ;). + + + + + +URL: http://permalink.gmane.org/gmane.mail.rss2email/3 +--===============...== +Content-Type: message/rfc822 +MIME-Version: 1.0 +Content-Disposition: attachment + +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: "gmane.mail.rss2email: Etienne Millon" +To: a@b.com +Subject: Re: new maintainer and mailing list for rss2email +Date: Tue, 13 Nov 2012 12:42:13 -0000 +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +X-RSS-ID: http://permalink.gmane.org/gmane.mail.rss2email/4 +X-RSS-URL: http://permalink.gmane.org/gmane.mail.rss2email/4 + +* W. Trevor King public.gmane.org> [121113 13:21]: + + Ack. + + Also, confirming that the mailing list works. + + + + + +URL: http://permalink.gmane.org/gmane.mail.rss2email/4 +--===============...== +Content-Type: message/rfc822 +MIME-Version: 1.0 +Content-Disposition: attachment + +MIME-Version: 1.0 +Content-Type: text/plain; charset="us-ascii" +Content-Transfer-Encoding: 7bit +From: "gmane.mail.rss2email: W. Trevor King" +To: a@b.com +Subject: split massive package into modules +Date: Tue, 13 Nov 2012 14:36:22 -0000 +Message-ID: <...@dev.null.invalid> +User-Agent: rss2email/3.2 +https://github.com/wking/rss2email +X-RSS-Feed: gmane/feed.rss +X-RSS-ID: http://permalink.gmane.org/gmane.mail.rss2email/5 +X-RSS-URL: http://permalink.gmane.org/gmane.mail.rss2email/5 + +I just split the 1769-line rss2email.py module into a more manageable + + package with sub-modules: + + https://github.com/wking/rss2email/commit/066602efa088b4a89d67e23011613b4459db3c92 + + + + + +URL: http://permalink.gmane.org/gmane.mail.rss2email/5 +--===============...==-- diff --git a/test/test.py b/test/test.py index 79e1f8c..7ac74b3 100755 --- a/test/test.py +++ b/test/test.py @@ -23,6 +23,7 @@ del _stringio MESSAGE_ID_REGEXP = _re.compile( '^Message-ID: <[^@]*@dev.null.invalid>$', _re.MULTILINE) +BOUNDARY_REGEXP = _re.compile('===============[^=]+==') class Send (list): @@ -40,16 +41,27 @@ def clean_result(text): """Cleanup dynamic portions of the generated email headers >>> text = ( + ... 'Content-Type: multipart/digest;\\n' + ... ' boundary="===============7509425281347501533=="\\n' + ... 'MIME-Version: 1.0\\n' ... 'Date: Tue, 23 Aug 2011 15:57:37 -0000\\n' ... 'Message-ID: <9dff03db-f5a7@dev.null.invalid>\\n' ... 'User-Agent: rss2email\\n' ... ) >>> print(clean_result(text).rstrip()) + Content-Type: multipart/digest; + boundary="===============...==" + MIME-Version: 1.0 Date: Tue, 23 Aug 2011 15:57:37 -0000 Message-ID: <...@dev.null.invalid> User-Agent: rss2email """ - return MESSAGE_ID_REGEXP.sub('Message-ID: <...@dev.null.invalid>', text) + for regexp,replacement in [ + (MESSAGE_ID_REGEXP, 'Message-ID: <...@dev.null.invalid>'), + (BOUNDARY_REGEXP, '===============...=='), + ]: + text = regexp.sub(replacement, text) + return text def test(dirname=None, config_path=None, force=False): if dirname is None: -- 2.26.2