From: W. Trevor King Date: Sun, 13 Oct 2013 21:54:29 +0000 (-0700) Subject: command: Sluggify feed names on opmlimport X-Git-Tag: v3.8~1^2~2 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=88cc8df510a55b6616dae274851110a7b162ee23;p=rss2email.git command: Sluggify feed names on opmlimport Gaëtan Harter writes [1]: > Importing the following opml file fails with `invalid feed name > 'Arch Linux: Recent news updates` > > > > > Google reader export > > > title="Arch Linux: Recent news updates" type="rss" > xmlUrl="http://www.archlinux.org/feeds/news/" > htmlUrl="https://www.archlinux.org/news/" /> > > > > It fails because the `text` field is used directly as `name` for > creating a Feed object. ConfigParser can handle colons and accented characters in their section names [2], but Feed._set_name checks names against Feed._name_regexp which only allows ASCII letters, digits, periods, underscores, and the hyphen-minus (U+002D). Add an inverse name_slug_regexp to opmlimport that replaces any runs of illegal characters with a single hyphen-minus, to avoid crashing if the text attribute contains anything illegal. [1]: https://github.com/wking/rss2email/issues/24#issuecomment-26224593 [2]: http://docs.python.org/3/library/configparser.html#supported-ini-file-structure Reported-by: Gaëtan Harter Signed-off-by: W. Trevor King --- diff --git a/rss2email/command.py b/rss2email/command.py index 54c0808..aca5de8 100644 --- a/rss2email/command.py +++ b/rss2email/command.py @@ -17,6 +17,7 @@ """rss2email commands """ +import re as _re import sys as _sys import xml.dom.minidom as _minidom import xml.sax.saxutils as _saxutils @@ -131,6 +132,7 @@ def opmlimport(feeds, args): raise _error.OPMLReadError() from e if args.file: f.close() + name_slug_regexp = _re.compile('[^a-zA-Z0-9._-]+') for feed in new_feeds: if feed.hasAttribute('xmlUrl'): url = _saxutils.unescape(feed.getAttribute('xmlUrl')) @@ -138,7 +140,7 @@ def opmlimport(feeds, args): if feed.hasAttribute('text'): text = _saxutils.unescape(feed.getAttribute('text')) if text != url: - name = text + name = name_slug_regexp.sub('-', text) feed = feeds.new_feed(name=name, url=url) _LOG.info('add new feed {}'.format(feed)) feeds.save()