rss2email/feed.py

   1 # Copyright (C) 2004-2013 Aaron Swartz
   2 #                         Brian Lalor
   3 #                         Dean Jackson
   4 #                         Erik Hetzner
   5 #                         Etienne Millon <me@emillon.org>
   6 #                         Joey Hess
   7 #                         Lindsey Smith <lindsey.smith@gmail.com>
   8 #                         Marcel Ackermann
   9 #                         Martin 'Joey' Schulze
  10 #                         Matej Cepl
  11 #                         W. Trevor King <wking@tremily.us>
  12 #
  13 # This file is part of rss2email.
  14 #
  15 # rss2email is free software: you can redistribute it and/or modify it under
  16 # the terms of the GNU General Public License as published by the Free Software
  17 # Foundation, either version 2 of the License, or (at your option) version 3 of
  18 # the License.
  19 #
  20 # rss2email is distributed in the hope that it will be useful, but WITHOUT ANY
  21 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
  22 # A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
  23 #
  24 # You should have received a copy of the GNU General Public License along with
  25 # rss2email.  If not, see <http://www.gnu.org/licenses/>.
  26
  27 """Define the ``Feed`` class for handling a single feed
  28 """
  29
  30 import collections as _collections
  31 from email.utils import formataddr as _formataddr
  32 import hashlib as _hashlib
  33 import re as _re
  34 import socket as _socket
  35 import time as _time
  36 import urllib.error as _urllib_error
  37 import urllib.request as _urllib_request
  38 import uuid as _uuid
  39 import xml.sax as _sax
  40 import xml.sax.saxutils as _saxutils
  41
  42 import feedparser as _feedparser
  43 import html2text as _html2text
  44
  45 from . import __url__
  46 from . import __version__
  47 from . import LOG as _LOG
  48 from . import config as _config
  49 from . import email as _email
  50 from . import error as _error
  51 from . import util as _util
  52
  53
  54 _feedparser.USER_AGENT = 'rss2email/{} +{}'.format(__version__, __url__)
  55 _urllib_request.install_opener(_urllib_request.build_opener())
  56 _SOCKET_ERRORS = []
  57 for e in ['error', 'gaierror']:
  58     if hasattr(_socket, e):
  59         _SOCKET_ERRORS.append(getattr(_socket, e))
  60 del e  # cleanup namespace
  61 _SOCKET_ERRORS = tuple(_SOCKET_ERRORS)
  62
  63
  64 class Feed (object):
  65     """Utility class for feed manipulation and storage.
  66
  67     >>> import pickle
  68     >>> import sys
  69     >>> from .config import CONFIG
  70
  71     >>> feed = Feed(
  72     ...    name='test-feed', url='http://example.com/feed.atom', to='a@b.com')
  73     >>> print(feed)
  74     test-feed (http://example.com/feed.atom -> a@b.com)
  75     >>> feed.section
  76     'feed.test-feed'
  77     >>> feed.from_email
  78     'user@rss2email.invalid'
  79
  80     >>> feed.from_email = 'a@b.com'
  81     >>> feed.save_to_config()
  82     >>> feed.config.write(sys.stdout)  # doctest: +REPORT_UDIFF, +ELLIPSIS
  83     [DEFAULT]
  84     from = user@rss2email.invalid
  85     ...
  86     verbose = warning
  87     <BLANKLINE>
  88     [feed.test-feed]
  89     url = http://example.com/feed.atom
  90     from = a@b.com
  91     to = a@b.com
  92     <BLANKLINE>
  93
  94     >>> feed.etag = 'dummy etag'
  95     >>> string = pickle.dumps(feed)
  96     >>> feed = pickle.loads(string)
  97     >>> feed.load_from_config(config=CONFIG)
  98     >>> feed.etag
  99     'dummy etag'
 100     >>> feed.url
 101     'http://example.com/feed.atom'
 102
 103     Names can only contain ASCII letters, digits, and '._-'.  Here the
 104     invalid space causes an exception:
 105
 106     >>> Feed(name='invalid name')
 107     Traceback (most recent call last):
 108       ...
 109     rss2email.error.InvalidFeedName: invalid feed name 'invalid name'
 110
 111     You must define a URL:
 112
 113     >>> Feed(name='feed-without-a-url', to='a@b.com').run(send=False)
 114     Traceback (most recent call last):
 115       ...
 116     rss2email.error.InvalidFeedConfig: invalid feed configuration {'url': None}
 117
 118
 119     Cleanup `CONFIG`.
 120
 121     >>> CONFIG['DEFAULT']['to'] = ''
 122     >>> test_section = CONFIG.pop('feed.test-feed')
 123     """
 124     _name_regexp = _re.compile('^[a-zA-Z0-9._-]+$')
 125
 126     # saved/loaded from feed.dat using __getstate__/__setstate__.
 127     _dynamic_attributes = [
 128         'name',
 129         'etag',
 130         'modified',
 131         'seen',
 132         ]
 133
 134     ## saved/loaded from ConfigParser instance
 135     # attributes that aren't in DEFAULT
 136     _non_default_configured_attributes = [
 137         'url',
 138         ]
 139     # attributes that are in DEFAULT
 140     _default_configured_attributes = [
 141         key.replace('-', '_') for key in _config.CONFIG['DEFAULT'].keys()]
 142     _default_configured_attributes[
 143         _default_configured_attributes.index('from')
 144         ] = 'from_email'  # `from` is a Python keyword
 145     # all attributes that are saved/loaded from .config
 146     _configured_attributes = (
 147         _non_default_configured_attributes + _default_configured_attributes)
 148     # attribute name -> .config option
 149     _configured_attribute_translations = dict(
 150         (attr,attr) for attr in _non_default_configured_attributes)
 151     _configured_attribute_translations.update(dict(
 152             zip(_default_configured_attributes,
 153                 _config.CONFIG['DEFAULT'].keys())))
 154     # .config option -> attribute name
 155     _configured_attribute_inverse_translations = dict(
 156         (v,k) for k,v in _configured_attribute_translations.items())
 157
 158     # hints for value conversion
 159     _boolean_attributes = [
 160         'force_from',
 161         'use_publisher_email',
 162         'friendly_name',
 163         'active',
 164         'date_header',
 165         'trust_guid',
 166         'html_mail',
 167         'use_css',
 168         'unicode_snob',
 169         'links_after_each_paragraph',
 170         'use_smtp',
 171         'smtp_ssl',
 172         ]
 173
 174     _integer_attributes = [
 175         'feed_timeout',
 176         'body_width',
 177         ]
 178
 179     _list_attributes = [
 180         'date_header_order',
 181         'encodings',
 182         ]
 183
 184     def __init__(self, name=None, url=None, to=None, config=None):
 185         self._set_name(name=name)
 186         self.reset()
 187         self.__setstate__(dict(
 188                 (attr, getattr(self, attr))
 189                 for attr in self._dynamic_attributes))
 190         self.load_from_config(config=config)
 191         if url:
 192             self.url = url
 193         if to:
 194             self.to = to
 195
 196     def __str__(self):
 197         return '{} ({} -> {})'.format(self.name, self.url, self.to)
 198
 199     def __repr__(self):
 200         return '<Feed {}>'.format(str(self))
 201
 202     def __getstate__(self):
 203         "Save dyamic attributes"
 204         return dict(
 205             (key,getattr(self,key)) for key in self._dynamic_attributes)
 206
 207     get_state = __getstate__  # make it publicly accessible
 208
 209     def __setstate__(self, state):
 210         "Restore dynamic attributes"
 211         keys = sorted(state.keys())
 212         if keys != sorted(self._dynamic_attributes):
 213             raise ValueError(state)
 214         self._set_name(name=state['name'])
 215         self.__dict__.update(state)
 216
 217     set_state = __setstate__  # make it publicly accessible
 218
 219     def save_to_config(self):
 220         "Save configured attributes"
 221         data = _collections.OrderedDict()
 222         default = self.config['DEFAULT']
 223         for attr in self._configured_attributes:
 224             key = self._configured_attribute_translations[attr]
 225             value = getattr(self, attr)
 226             if value is not None:
 227                 value = self._get_configured_option_value(
 228                     attribute=attr, value=value)
 229                 if (attr in self._non_default_configured_attributes or
 230                     value != default[key]):
 231                     data[key] = value
 232         self.config[self.section] = data
 233
 234     def load_from_config(self, config=None):
 235         "Restore configured attributes"
 236         if config is None:
 237             config = _config.CONFIG
 238         self.config = config
 239         if self.section in self.config:
 240             data = self.config[self.section]
 241         else:
 242             data = self.config['DEFAULT']
 243         keys = sorted(data.keys())
 244         expected = sorted(self._configured_attribute_translations.values())
 245         if keys != expected:
 246             for key in expected:
 247                 if (key not in keys and
 248                     key not in self._non_default_configured_attributes):
 249                     raise _error.InvalidFeedConfig(
 250                         setting=key, feed=self,
 251                         message='missing configuration key: {}'.format(key))
 252             for key in keys:
 253                 if key not in expected:
 254                     raise _error.InvalidFeedConfig(
 255                         setting=key, feed=self,
 256                         message='extra configuration key: {}'.format(key))
 257         data = dict(
 258             (self._configured_attribute_inverse_translations[k],
 259              self._get_configured_attribute_value(
 260                   attribute=self._configured_attribute_inverse_translations[k],
 261                   key=k, data=data))
 262             for k in data.keys())
 263         for attr in self._non_default_configured_attributes:
 264             if attr not in data:
 265                 data[attr] = None
 266         self.__dict__.update(data)
 267
 268     def _get_configured_option_value(self, attribute, value):
 269         if value and attribute in self._list_attributes:
 270             return ', '.join(value)
 271         return str(value)
 272
 273     def _get_configured_attribute_value(self, attribute, key, data):
 274         if attribute in self._boolean_attributes:
 275             return data.getboolean(key)
 276         elif attribute in self._integer_attributes:
 277             return data.getint(key)
 278         elif attribute in self._list_attributes:
 279             return [x.strip() for x in data[key].split(',')]
 280         return data[key]
 281
 282     def reset(self):
 283         """Reset dynamic data
 284         """
 285         self.etag = None
 286         self.modified = None
 287         self.seen = {}
 288
 289     def _set_name(self, name):
 290         if not self._name_regexp.match(name):
 291             raise _error.InvalidFeedName(name=name, feed=self)
 292         self.name = name
 293         self.section = 'feed.{}'.format(self.name)
 294
 295     def _fetch(self):
 296         """Fetch and parse a feed using feedparser.
 297
 298         >>> feed = Feed(
 299         ...    name='test-feed',
 300         ...    url='http://feeds.feedburner.com/allthingsrss/hJBr')
 301         >>> parsed = feed._fetch()
 302         >>> parsed.status
 303         200
 304         """
 305         _LOG.info('fetch {}'.format(self))
 306         if not self.url:
 307             raise _error.InvalidFeedConfig(setting='url', feed=self)
 308         if self.section in self.config:
 309             config = self.config[self.section]
 310         else:
 311             config = self.config['DEFAULT']
 312         proxy = config['proxy']
 313         timeout = config.getint('feed-timeout')
 314         kwargs = {}
 315         if proxy:
 316             kwargs['handlers'] = [_urllib_request.ProxyHandler({'http':proxy})]
 317         f = _util.TimeLimitedFunction(timeout, _feedparser.parse)
 318         return f(self.url, self.etag, modified=self.modified, **kwargs)
 319
 320     def _process(self, parsed):
 321         _LOG.info('process {}'.format(self))
 322         self._check_for_errors(parsed)
 323         for entry in reversed(parsed.entries):
 324             _LOG.debug('processing {}'.format(entry.get('id', 'no-id')))
 325             processed = self._process_entry(parsed=parsed, entry=entry)
 326             if processed:
 327                 yield processed
 328
 329     def _check_for_errors(self, parsed):
 330         warned = False
 331         status = getattr(parsed, 'status', 200)
 332         _LOG.debug('HTTP status {}'.format(status))
 333         if status == 301:
 334             _LOG.info('redirect {} from {} to {}'.format(
 335                     self.name, self.url, parsed['url']))
 336             self.url = parsed['url']
 337         elif status not in [200, 302, 304]:
 338             raise _error.HTTPError(status=status, feed=self)
 339
 340         http_headers = parsed.get('headers', {})
 341         if http_headers:
 342             _LOG.debug('HTTP headers: {}'.format(http_headers))
 343         if not http_headers:
 344             _LOG.warning('could not get HTTP headers: {}'.format(self))
 345             warned = True
 346         else:
 347             if 'html' in http_headers.get('content-type', 'rss'):
 348                 _LOG.warning('looks like HTML: {}'.format(self))
 349                 warned = True
 350             if http_headers.get('content-length', '1') == '0':
 351                 _LOG.warning('empty page: {}'.format(self))
 352                 warned = True
 353
 354         version = parsed.get('version', None)
 355         if version:
 356             _LOG.debug('feed version {}'.format(version))
 357         else:
 358             _LOG.warning('unrecognized version: {}'.format(self))
 359             warned = True
 360
 361         exc = parsed.get('bozo_exception', None)
 362         if isinstance(exc, _socket.timeout):
 363             _LOG.error('timed out: {}'.format(self))
 364             warned = True
 365         elif isinstance(exc, _SOCKET_ERRORS):
 366             reason = exc.args[1]
 367             _LOG.error('{}: {}'.format(exc, self))
 368             warned = True
 369         elif (hasattr(exc, 'reason') and
 370               isinstance(exc.reason, _urllib_error.URLError)):
 371             if isinstance(exc.reason, _SOCKET_ERRORS):
 372                 reason = exc.reason.args[1]
 373             else:
 374                 reason = exc.reason
 375             _LOG.error('{}: {}'.format(exc, self))
 376             warned = True
 377         elif isinstance(exc, _feedparser.zlib.error):
 378             _LOG.error('broken compression: {}'.format(self))
 379             warned = True
 380         elif isinstance(exc, (IOError, AttributeError)):
 381             _LOG.error('{}: {}'.format(exc, self))
 382             warned = True
 383         elif isinstance(exc, KeyboardInterrupt):
 384             raise exc
 385         elif isinstance(exc, _sax.SAXParseException):
 386             _LOG.error('sax parsing error: {}: {}'.format(exc, self))
 387             warned = True
 388         elif parsed.bozo or exc:
 389             if exc is None:
 390                 exc = "can't process"
 391             _LOG.error('processing error: {}: {}'.format(exc, self))
 392             warned = True
 393
 394         if (not warned and
 395             status in [200, 302] and
 396             not parsed.entries and
 397             not version):
 398             raise _error.ProcessingError(parsed=parsed, feed=feed)
 399
 400     def _process_entry(self, parsed, entry):
 401         id_ = self._get_entry_id(entry)
 402         # If .trust_guid isn't set, we get back hashes of the content.
 403         # Instead of letting these run wild, we put them in context
 404         # by associating them with the actual ID (if it exists).
 405         guid = entry.get('id', id_)
 406         if isinstance(guid, dict):
 407             guid = guid.values()[0]
 408         if guid in self.seen:
 409             if self.seen[guid]['id'] == id_:
 410                 _LOG.debug('already seen {}'.format(id_))
 411                 return  # already seen
 412         sender = self._get_entry_email(parsed=parsed, entry=entry)
 413         subject = self._get_entry_title(entry)
 414         extra_headers = _collections.OrderedDict((
 415                 ('Date', self._get_entry_date(entry)),
 416                 ('Message-ID', '<{}@dev.null.invalid>'.format(_uuid.uuid4())),
 417                 ('User-Agent', 'rss2email'),
 418                 ('X-RSS-Feed', self.url),
 419                 ('X-RSS-ID', id_),
 420                 ('X-RSS-URL', self._get_entry_link(entry)),
 421                 ('X-RSS-TAGS', self._get_entry_tags(entry)),
 422                 ))
 423         for k,v in extra_headers.items():  # remove empty tags, etc.
 424             if v is None:
 425                 extra_headers.pop(k)
 426         if self.bonus_header:
 427             for header in self.bonus_header.splitlines():
 428                 if ':' in header:
 429                     key,value = header.split(':', 1)
 430                     extra_headers[key.strip()] = value.strip()
 431                 else:
 432                     _LOG.warning(
 433                         'malformed bonus-header: {}'.format(
 434                             self.bonus_header))
 435
 436         content = self._get_entry_content(entry)
 437         content = self._process_entry_content(
 438             entry=entry, content=content, subject=subject)
 439         message = _email.get_message(
 440             sender=sender,
 441             recipient=self.to,
 442             subject=subject,
 443             body=content['value'],
 444             content_type=content['type'].split('/', 1)[1],
 445             extra_headers=extra_headers,
 446             config=self.config,
 447             section=self.section)
 448         return (guid, id_, sender, message)
 449
 450     def _get_entry_id(self, entry):
 451         """Get best ID from an entry."""
 452         if self.trust_guid:
 453             if getattr(entry, 'id', None):
 454                 # Newer versions of feedparser could return a dictionary
 455                 if isinstance(entry.id, dict):
 456                     return entry.id.values()[0]
 457                 return entry.id
 458         content = self._get_entry_content(entry)
 459         content_value = content['value'].strip()
 460         if content_value:
 461             return _hashlib.sha1(
 462                 content_value.encode('unicode-escape')).hexdigest()
 463         elif getattr(entry, 'link', None):
 464             return _hashlib.sha1(
 465                 entry.link.encode('unicode-escape')).hexdigest()
 466         elif getattr(entry, 'title', None):
 467             return _hashlib.sha1(
 468                 entry.title.encode('unicode-escape')).hexdigest()
 469
 470     def _get_entry_link(self, entry):
 471         return entry.get('link', None)
 472
 473     def _get_entry_title(self, entry):
 474         if hasattr(entry, 'title_detail') and entry.title_detail:
 475             title = entry.title_detail.value
 476             if 'html' in entry.title_detail.type:
 477                 title = _html2text.html2text(title)
 478         else:
 479             content = self._get_entry_content(entry)
 480             value = content['value']
 481             if content['type'] in ('text/html', 'application/xhtml+xml'):
 482                 value = _html2text.html2text(value)
 483             title = value[:70]
 484         title = title.replace('\n', ' ').strip()
 485         return title
 486
 487     def _get_entry_date(self, entry):
 488         datetime = _time.gmtime()
 489         if self.date_header:
 490             for datetype in self.date_header_order:
 491                 kind = datetype + '_parsed'
 492                 if entry.get(kind, None):
 493                     datetime = entry[kind]
 494                     break
 495         return _time.strftime("%a, %d %b %Y %H:%M:%S -0000", datetime)
 496
 497     def _get_entry_name(self, parsed, entry):
 498         """Get the best name
 499
 500         >>> import feedparser
 501         >>> f = Feed(name='test-feed')
 502         >>> parsed = feedparser.parse(
 503         ...     '<feed xmlns="http://www.w3.org/2005/Atom">\\n'
 504         ...     '  <entry>\\n'
 505         ...     '    <author>\\n'
 506         ...     '      <name>Example author</name>\\n'
 507         ...     '      <email>me@example.com</email>\\n'
 508         ...     '      <url>http://example.com/</url>\\n'
 509         ...     '    </author>\\n'
 510         ...     '  </entry>\\n'
 511         ...     '</feed>\\n'
 512         ...     )
 513         >>> entry = parsed.entries[0]
 514         >>> f.friendly_name = False
 515         >>> f._get_entry_name(parsed, entry)
 516         ''
 517         >>> f.friendly_name = True
 518         >>> f._get_entry_name(parsed, entry)
 519         'Example author'
 520         """
 521         if not self.friendly_name:
 522             return ''
 523         parts = ['']
 524         feed = parsed.feed
 525         parts.append(feed.get('title', ''))
 526         for x in [entry, feed]:
 527             if 'name' in x.get('author_detail', []):
 528                 if x.author_detail.name:
 529                     if ''.join(parts):
 530                         parts.append(': ')
 531                     parts.append(x.author_detail.name)
 532                     break
 533         if not ''.join(parts) and self.use_publisher_email:
 534             if 'name' in feed.get('publisher_detail', []):
 535                 if ''.join(parts):
 536                     parts.append(': ')
 537                 parts.append(feed.publisher_detail.name)
 538         return _html2text.unescape(''.join(parts))
 539
 540     def _validate_email(self, email, default=None):
 541         """Do a basic quality check on email address
 542
 543         Return `default` if the address doesn't appear to be
 544         well-formed.  If `default` is `None`, return
 545         `self.from_email`.
 546
 547         >>> f = Feed(name='test-feed')
 548         >>> f._validate_email('valid@example.com', 'default@example.com')
 549         'valid@example.com'
 550         >>> f._validate_email('invalid@', 'default@example.com')
 551         'default@example.com'
 552         >>> f._validate_email('@invalid', 'default@example.com')
 553         'default@example.com'
 554         >>> f._validate_email('invalid', 'default@example.com')
 555         'default@example.com'
 556         """
 557         parts = email.split('@')
 558         if len(parts) != 2 or '' in parts:
 559             if default is None:
 560                 return self.from_email
 561             return default
 562         return email
 563
 564     def _get_entry_address(self, parsed, entry):
 565         """Get the best From email address ('<jdoe@a.com>')
 566
 567         If the best guess isn't well-formed (something@somthing.com),
 568         use `self.from_email` instead.
 569         """
 570         if self.force_from:
 571             return self.from_email
 572         feed = parsed.feed
 573         if 'email' in entry.get('author_detail', []):
 574             return self._validate_email(entry.author_detail.email)
 575         elif 'email' in feed.get('author_detail', []):
 576             return self._validate_email(feed.author_detail.email)
 577         if self.use_publisher_email:
 578             if 'email' in feed.get('publisher_detail', []):
 579                 return self._validate_email(feed.publisher_detail.email)
 580             if feed.get('errorreportsto', None):
 581                 return self._validate_email(feed.errorreportsto)
 582         _LOG.debug('no sender address found, fallback to default')
 583         return self.from_email
 584
 585     def _get_entry_email(self, parsed, entry):
 586         """Get the best From email address ('John <jdoe@a.com>')
 587         """
 588         name = self._get_entry_name(parsed=parsed, entry=entry)
 589         address = self._get_entry_address(parsed=parsed, entry=entry)
 590         return _formataddr((name, address))
 591
 592     def _get_entry_tags(self, entry):
 593         """Add post tags, if available
 594
 595         >>> f = Feed(name='test-feed')
 596         >>> f._get_entry_tags({
 597         ...         'tags': [{'term': 'tag1',
 598         ...                   'scheme': None,
 599         ...                   'label': None}]})
 600         'tag1'
 601         >>> f._get_entry_tags({
 602         ...         'tags': [{'term': 'tag1',
 603         ...                   'scheme': None,
 604         ...                   'label': None},
 605         ...                  {'term': 'tag2',
 606         ...                   'scheme': None,
 607         ...                   'label': None}]})
 608         'tag1,tag2'
 609
 610         Test some troublesome cases.  No tags:
 611
 612         >>> f._get_entry_tags({})
 613
 614         Empty tags:
 615
 616         >>> f._get_entry_tags({'tags': []})
 617
 618         Tags without a ``term`` entry:
 619
 620         >>> f._get_entry_tags({
 621         ...         'tags': [{'scheme': None,
 622         ...                   'label': None}]})
 623
 624         Tags with an empty term:
 625
 626         >>> f._get_entry_tags({
 627         ...         'tags': [{'term': '',
 628         ...                   'scheme': None,
 629         ...                   'label': None}]})
 630         """
 631         taglist = [tag['term'] for tag in entry.get('tags', [])
 632                    if tag.get('term', '')]
 633         if taglist:
 634             return ','.join(taglist)
 635
 636     def _get_entry_content(self, entry):
 637         """Select the best content from an entry.
 638
 639         Returns a feedparser content dict.
 640         """
 641         # How this works:
 642         #  * We have a bunch of potential contents.
 643         #  * We go thru looking for our first choice.
 644         #    (HTML or text, depending on self.html_mail)
 645         #  * If that doesn't work, we go thru looking for our second choice.
 646         #  * If that still doesn't work, we just take the first one.
 647         #
 648         # Possible future improvement:
 649         #  * Instead of just taking the first one
 650         #    pick the one in the "best" language.
 651         #  * HACK: hardcoded .html_mail, should take a tuple of media types
 652         contents = list(entry.get('content', []))
 653         if entry.get('summary_detail', None):
 654             contents.append(entry.summary_detail)
 655         if self.html_mail:
 656             types = ['text/html', 'text/plain']
 657         else:
 658             types = ['text/plain', 'text/html']
 659         for content_type in types:
 660             for content in contents:
 661                 if content['type'] == content_type:
 662                     return content
 663         if contents:
 664             return contents[0]
 665         return {'type': 'text/plain', 'value': ''}
 666
 667     def _process_entry_content(self, entry, content, subject):
 668         "Convert entry content to the requested format."
 669         link = self._get_entry_link(entry)
 670         if self.html_mail:
 671             lines = [
 672                 '<!DOCTYPE html>',
 673                 '<html>',
 674                 '  <head>',
 675                 ]
 676             if self.use_css and self.css:
 677                 lines.extend([
 678                         '    <style type="text/css">',
 679                         self.css,
 680                         '    </style>',
 681                         ])
 682             lines.extend([
 683                     '</head>',
 684                     '<body>',
 685                     '<div id="entry>',
 686                     '<h1 class="header"><a href="{}">{}</a></h1>'.format(
 687                         link, subject),
 688                     '<div id="body"><table><tr><td>',
 689                     ])
 690             if content['type'] in ('text/html', 'application/xhtml+xml'):
 691                 lines.append(content['value'].strip())
 692             else:
 693                 lines.append(_saxutils.escape(content['value'].strip()))
 694             lines.append('</td></tr></table></div>')
 695             lines.extend([
 696                     '<div class="footer">'
 697                     '<p>URL: <a href="{0}">{0}</a></p>'.format(link),
 698                     ])
 699             for enclosure in getattr(entry, 'enclosures', []):
 700                 if getattr(enclosure, 'url', None):
 701                     lines.append(
 702                         '<p>Enclosure: <a href="{0}">{0}</a></p>'.format(
 703                             enclosure.url))
 704                 if getattr(enclosure, 'src', None):
 705                     lines.append(
 706                         '<p>Enclosure: <a href="{0}">{0}</a></p>'.format(
 707                             enclosure.src))
 708                     lines.append(
 709                         '<p><img src="{}" /></p>'.format(enclosure.src))
 710             for elink in getattr(entry, 'links', []):
 711                 if elink.get('rel', None) == 'via':
 712                     url = elink['href']
 713                     title = url
 714                     if elink.get('title', None):
 715                         title = elink['title']
 716                     lines.append('<p>Via <a href="{}">{}</a></p>'.format(
 717                             url, title))
 718             lines.extend([
 719                     '</div>',  # /footer
 720                     '</div>',  # /entry
 721                     '</body>',
 722                     '</html>',
 723                     ''])
 724             content['type'] = 'text/html'
 725             content['value'] = '\n'.join(lines)
 726             return content
 727         else:  # not self.html_mail
 728             if content['type'] in ('text/html', 'application/xhtml+xml'):
 729                 lines = [_html2text.html2text(content['value'])]
 730             else:
 731                 lines = [content['value']]
 732             lines.append('')
 733             lines.append('URL: {}'.format(link))
 734             for enclosure in getattr(entry, 'enclosures', []):
 735                 if getattr(enclosure, 'url', None):
 736                     lines.append('Enclosure: {}'.format(enclosure.url))
 737                 if getattr(enclosure, 'src', None):
 738                     lines.append('Enclosure: {}'.format(enclosure.src))
 739             for elink in getattr(entry, 'links', []):
 740                 if elink.get('rel', None) == 'via':
 741                     url = elink['href']
 742                     title = url
 743                     if elink.get('title', None):
 744                         title = elink['title']
 745                     lines.append('Via: {} {}'.format(title, url))
 746             content['type'] = 'text/plain'
 747             content['value'] = '\n'.join(lines)
 748             return content
 749
 750     def _send(self, sender, message):
 751         _LOG.info('send message for {}'.format(self))
 752         section = self.section
 753         if section not in self.config:
 754             section = 'DEFAULT'
 755         _email.send(sender=sender, recipient=self.to, message=message,
 756                     config=self.config, section=section)
 757
 758     def run(self, send=True):
 759         """Fetch and process the feed, mailing entry emails.
 760
 761         >>> feed = Feed(
 762         ...    name='test-feed',
 763         ...    url='http://feeds.feedburner.com/allthingsrss/hJBr')
 764         >>> def send(sender, message):
 765         ...    print('send from {}:'.format(sender))
 766         ...    print(message.as_string())
 767         >>> feed._send = send
 768         >>> feed.to = 'jdoe@dummy.invalid'
 769         >>> #parsed = feed.run()  # enable for debugging
 770         """
 771         if not self.to:
 772             raise _error.NoToEmailAddress(feed=self)
 773         parsed = self._fetch()
 774         for (guid, id_, sender, message) in self._process(parsed):
 775             _LOG.debug('new message: {}'.format(message['Subject']))
 776             if send:
 777                 self._send(sender=sender, message=message)
 778             if guid not in self.seen:
 779                 self.seen[guid] = {}
 780             self.seen[guid]['id'] = id_
 781         self.etag = parsed.get('etag', None)
 782         self.modified = parsed.get('modified', None)