From 88cc8df510a55b6616dae274851110a7b162ee23 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Sun, 13 Oct 2013 14:54:29 -0700 Subject: [PATCH] command: Sluggify feed names on opmlimport MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Gaëtan Harter writes [1]: > Importing the following opml file fails with `invalid feed name > 'Arch Linux: Recent news updates` > > > > > Google reader export > > > title="Arch Linux: Recent news updates" type="rss" > xmlUrl="http://www.archlinux.org/feeds/news/" > htmlUrl="https://www.archlinux.org/news/" /> > > > > It fails because the `text` field is used directly as `name` for > creating a Feed object. ConfigParser can handle colons and accented characters in their section names [2], but Feed._set_name checks names against Feed._name_regexp which only allows ASCII letters, digits, periods, underscores, and the hyphen-minus (U+002D). Add an inverse name_slug_regexp to opmlimport that replaces any runs of illegal characters with a single hyphen-minus, to avoid crashing if the text attribute contains anything illegal. [1]: https://github.com/wking/rss2email/issues/24#issuecomment-26224593 [2]: http://docs.python.org/3/library/configparser.html#supported-ini-file-structure Reported-by: Gaëtan Harter Signed-off-by: W. Trevor King --- rss2email/command.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rss2email/command.py b/rss2email/command.py index 54c0808..aca5de8 100644 --- a/rss2email/command.py +++ b/rss2email/command.py @@ -17,6 +17,7 @@ """rss2email commands """ +import re as _re import sys as _sys import xml.dom.minidom as _minidom import xml.sax.saxutils as _saxutils @@ -131,6 +132,7 @@ def opmlimport(feeds, args): raise _error.OPMLReadError() from e if args.file: f.close() + name_slug_regexp = _re.compile('[^a-zA-Z0-9._-]+') for feed in new_feeds: if feed.hasAttribute('xmlUrl'): url = _saxutils.unescape(feed.getAttribute('xmlUrl')) @@ -138,7 +140,7 @@ def opmlimport(feeds, args): if feed.hasAttribute('text'): text = _saxutils.unescape(feed.getAttribute('text')) if text != url: - name = text + name = name_slug_regexp.sub('-', text) feed = feeds.new_feed(name=name, url=url) _LOG.info('add new feed {}'.format(feed)) feeds.save() -- 2.26.2