3 # Copyright (C) 2009-2010, William Trevor King <wking@tremily.us>
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 """Produce `RFC 4287` compliant Atom 1.0 XML from the command line.
20 .. _RFC 4287: http://tools.ietf.org/html/rfc4287
23 from optparse import OptionParser
27 from lxml import etree
28 from lxml import objectify
29 from lxml.html import XHTML_NAMESPACE
32 GENERATOR_NAME = u'atomgen [based on lxml]'
34 ATOM_NAMESPACE = 'http://www.w3.org/2005/Atom'
36 ATOM = '{%s}' % ATOM_NAMESPACE
37 XHTML = '{%s}' % XHTML_NAMESPACE
40 None : ATOM_NAMESPACE,
41 'html': XHTML_NAMESPACE,
45 def _id(url, time_published):
46 """Convert a URL to an Atom ID
48 Following Mark Pilgrim's suggestions_.
49 >>> _id('http://example.com/blog#5', 0)
50 u'tag:example.com,1970-01-01:/blog/5'
52 Tags conform to RFC4151 tag syntax. You're restricted to one post
53 per day with a single url.
55 .. _suggestions: http://diveintomark.org/archives/2004/05/28/howto-atom-id
56 .. _tag syntax: http://tools.ietf.org/html/rfc4151#section-2.1
58 # Discard everything before the domain name
60 if url.startswith(start):
61 url = url[len(start):]
62 # Change all # characters to /
63 url = url.replace(u'#', u'/')
64 # Extract the domain name
65 end_of_domain_index = url.find(u'/')
66 if end_of_domain_index == -1:
70 domain = url[0:end_of_domain_index]
71 trailer = url[end_of_domain_index:]
72 # Immediately after the domain name, insert a comma, year-month-date, colon
73 time_string = time.strftime("%Y-%m-%d", time.gmtime(time_published))
74 url = u"tag:%s,%s:%s" % (domain, time_string, trailer)
77 def _timestamp(seconds=None):
78 """Return an `RFC 3339`_ timestamp.
80 Complete date plus hours, minutes and seconds::
82 YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z)
84 Where the the trailing `Z` designates times in UTC.
87 u'1970-01-01T00:00:00Z'
89 .. _RFC 3339: http://www.ietf.org/rfc/rfc3339.txt
91 utc = time.gmtime(seconds)
92 string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc)
93 return unicode(string)
96 class Command (object):
97 """A command exposed via the command line."""
101 parser = self._get_parser()
102 options,args = parser.parse_args(argv)
103 return self._run(options, args)
105 def _get_parser(self):
106 raise NotImplementedError()
108 def _run(self, options, args):
109 raise NotImplementedError()
112 class NewFeedCommand (Command):
115 >>> c = NewFeedCommand()
116 >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
117 ... '--author-uri', 'http://www.physics.drexel.edu/~wking/',
118 ... '--author-email', 'wking@tremily.us',
119 ... 'http://www.physics.drexel.edu/~wking/phys201'])
120 >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
121 ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF
122 <?xml version='1.0' encoding='UTF-8'?>
123 <feed xmlns="http://www.w3.org/2005/Atom">
124 <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
125 <title>Physics 201</title>
127 <name>W. Trevor King</name>
128 <email>wking@tremily.us</email>
129 <uri>http://www.physics.drexel.edu/~wking/</uri>
131 <generator version="0.2">atomgen [based on lxml]</generator>
132 <updated>...</updated>
138 def _get_parser(self):
139 usage = ['%prog [general-options] new [options] URI',
142 ' URI is a URI used to generate a unique ID for the feed']
143 parser = OptionParser(usage='\n'.join(usage))
144 parser.disable_interspersed_args()
145 parser.add_option('-t', '--title', dest='title', metavar='TITLE',
147 parser.add_option('-a', '--author', dest='author', metavar='NAME',
148 help='Feed author name')
149 parser.add_option('-u', '--author-uri', dest='author_uri',
150 metavar='URI', help='Feed author homepage URI')
151 parser.add_option('-e', '--author-email', dest='author_email',
152 metavar='EMAIL', help='Feed author email address')
155 def _run(self, options, args):
158 feed = objectify.Element(ATOM + 'feed', nsmap=NSMAP)
161 etree.SubElement(feed, ATOM + 'id')
162 feed.id = _id(uri, tpub)
165 etree.SubElement(feed, ATOM + 'title')
166 feed.title = options.title
168 if options.author or options.author_email or options.author_uri:
169 etree.SubElement(feed, ATOM + 'author')
171 etree.SubElement(feed.author, ATOM + 'name')
172 feed.author.name = options.author
173 if options.author_email:
174 etree.SubElement(feed.author, ATOM + 'email')
175 feed.author.email = options.author_email
176 if options.author_uri:
177 etree.SubElement(feed.author, ATOM + 'uri')
178 feed.author.uri = options.author_uri
180 etree.SubElement(feed, ATOM + 'generator')
181 feed.generator = GENERATOR_NAME
182 feed.generator.attrib['version'] = __version__
184 etree.SubElement(feed, ATOM + 'updated')
185 feed.updated = _timestamp(tpub)
187 # remove http://codespeak.net/lxml/objectify/pytype namespace
188 objectify.deannotate(feed)
189 etree.cleanup_namespaces(feed)
194 class AddEntryCommand (Command):
195 """Add an entry to an existing feed.
197 >>> from os import close, remove
198 >>> from StringIO import StringIO
199 >>> from tempfile import mkstemp
201 First, create a feed to edit.
203 >>> c = NewFeedCommand()
204 >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
205 ... '--author-uri', 'http://www.physics.drexel.edu/~wking/',
206 ... '--author-email', 'wking@tremily.us',
207 ... 'http://www.physics.drexel.edu/~wking/phys201'])
208 >>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-')
210 >>> root = etree.ElementTree(feed)
213 Now add an entry to that feed.
215 >>> c = AddEntryCommand()
216 >>> stdin = sys.stdin
217 >>> sys.stdin = StringIO('Changes will be noted in this feed.')
218 >>> feed = c.run(['--input', path, 'Feed purpose',
219 ... 'http://www.physics.drexel.edu/~wking/phys201'])
220 >>> sys.stdin = stdin
221 >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
222 ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF
223 <?xml version='1.0' encoding='UTF-8'?>
224 <feed xmlns="http://www.w3.org/2005/Atom">
225 <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
226 <title>Physics 201</title>
228 <name>W. Trevor King</name>
229 <email>wking@tremily.us</email>
230 <uri>http://www.physics.drexel.edu/~wking/</uri>
232 <generator version="0.2">atomgen [based on lxml]</generator>
233 <updated>...</updated>
235 <title>Feed purpose</title>
236 <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
237 <link href="http://www.physics.drexel.edu/~wking/phys201"/>
238 <published>...</published>
239 <updated>...</updated>
240 <content type="xhtml">
241 <html:div xmlns:html="http://www.w3.org/1999/xhtml">Changes will be noted in this feed.</html:div>
247 Note that we cannot move the html namespace declaration to the
248 `<feed>` start tag until there is a way to update namespace maps
249 on the fly. See `lxml bug 555602`_.
251 .. _lxml bug 555602: https://bugs.launchpad.net/lxml/+bug/555602
259 def _get_parser(self):
260 usage = ['%prog [general-options] add [options] TITLE LINK',
263 ' TITLE is the title of the new entry',
264 ' LINK is the URI of that the entry refers to']
265 parser = OptionParser(usage='\n'.join(usage))
266 parser.disable_interspersed_args()
267 parser.add_option('-i', '--input', dest='ifilename', metavar='FILE',
268 help=('Input file for generated feed '
269 '(defaults to stdin)'))
270 parser.add_option('-c', '--content', dest='content', metavar='FILE',
271 help=('Input file for entry content '
272 '(defaults to stdin, unless input is stdin, '
273 'in which case this option is required.)'))
276 def _run(self, options, args):
277 title = unicode(args[0])
278 link = unicode(args[1])
280 parser = objectify.makeparser()
282 if options.ifilename == None:
283 assert options.content != None, (
284 'Need to use one of --input or --content')
285 root = objectify.parse(sys.stdin, parser=parser)
287 root = objectify.parse(options.ifilename, parser=parser)
289 feed = root.getroot()
291 if options.content == None:
292 content = sys.stdin.read()
294 content = file(options.content, 'r').read()
296 entry = etree.SubElement(feed, ATOM + 'entry')
297 etree.SubElement(entry, ATOM + 'title')
301 etree.SubElement(entry, ATOM + 'id')
302 entry.id = _id(link, tpub)
304 etree.SubElement(entry, ATOM + 'link')
305 entry.link.attrib['href'] = link
307 etree.SubElement(entry, ATOM + 'published')
308 entry.published = _timestamp(tpub)
310 etree.SubElement(entry, ATOM + 'updated')
311 entry.updated = _timestamp(tpub)
313 etree.SubElement(entry, ATOM + 'content')
314 entry.content.attrib['type'] = 'xhtml'
315 etree.SubElement(entry.content, XHTML + 'div')
316 entry.content[XHTML + 'div'] = content
318 if not hasattr(feed, u'updated') :
319 etree.SubElement(feed, ATOM + 'updated')
320 feed.updated = _timestamp(tpub)
322 # remove http://codespeak.net/lxml/objectify/pytype namespace
323 objectify.deannotate(feed)
324 etree.cleanup_namespaces(feed)
334 if __name__ == "__main__" and True:
335 commands = [NewFeedCommand(), AddEntryCommand()]
336 command_dict = dict([(c.name, c) for c in commands])
337 usage = ['%prog [options] command [command-options]',
339 'Where command is one of']
340 usage.extend([' %s\t%s' % (c.name, c.__doc__.splitlines()[0])
343 parser = OptionParser(usage='\n'.join(usage))
344 parser.disable_interspersed_args()
345 parser.add_option('-o', '--output', dest='ofilename', metavar='FILE',
346 help='Output file for generated feed (defaults to stdout)')
347 parser.add_option('--test', dest='test', action='store_true',
348 help='Run the module test suite')
349 (options, args) = parser.parse_args()
351 if options.test == True:
355 command_name = args[0]
356 command = command_dict[command_name]
358 feed = command.run(args)
360 ostring = etree.tostring(
361 feed, pretty_print=True, xml_declaration=True, encoding='UTF-8')
362 if options.ofilename == None:
365 with file(options.ofilename, 'w') as of: