From: W. Trevor King Date: Sat, 20 Nov 2010 16:59:29 +0000 (-0500) Subject: Cleanups in atomgen.py and update for Amara 2.0a4. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c928ef284fd3b57af98515d8181ed70e0d8b3a19;p=mw2txt.git Cleanups in atomgen.py and update for Amara 2.0a4. --- diff --git a/posts/Atomgen/atomgen.py b/posts/Atomgen/atomgen.py old mode 100644 new mode 100755 index 428c649..ce6b79f --- a/posts/Atomgen/atomgen.py +++ b/posts/Atomgen/atomgen.py @@ -1,56 +1,57 @@ -#!/usr/bin/python +#!/usr/bin/env python # -# atomgen - Produce RFC4287 compliant Atom 1.0 xml from the command line. +# Copyright (C) 2009-2010, William Trevor King # -# Copyright (C) 2009, William Trevor King +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. # -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 3 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -# See the GNU General Public License for more details. +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. -# -# The author may be contacted at on the Internet, or -# write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St., -# Philadelphia PA 19104, USA. -# -# RFC4287 can be found at http://tools.ietf.org/html/rfc4287 -# -# tested on Amara version 2.0a1 +# along with this program. If not, see . -from amara import bindery as AB -from amara import xml_print -from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, \ - XHTML_NAMESPACE +"""Produce `RFC 4287` compliant Atom 1.0 XML from the command line. + +Tested on Amara_ version 2.0a4. + +.. _RFC 4287: http://tools.ietf.org/html/rfc4287 +.. _Amara: http://wiki.xml3k.org/Amara2 +""" + +from optparse import OptionParser +import sys import time -GENERATOR_NAME = u'atomgen [amara] (based on atomixlib)' -VERSION = '0.1' +from amara import bindery as AB +from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, XHTML_NAMESPACE -def id(url, time_published): - """ - Convert a URL to an Atom ID following - http://diveintomark.org/archives/2004/05/28/howto-atom-id - >>> id('http://example.com/blog#5', 0) + +__version__ = '0.2' +GENERATOR_NAME = u'atomgen [based on amara2]' +XML_WRITE_KWARGS = {'writer':'xml-indent'} + + +def _id(url, time_published): + """Convert a URL to an Atom ID + + Following Mark Pilgrim's suggestions_. + >>> _id('http://example.com/blog#5', 0) u'tag:example.com,1970-01-01:/blog/5' - - Tags conform to RFC4151 tag syntax - http://tools.ietf.org/html/rfc4151#section-2.1 - - You're restricted to one post per day with a single url. + + Tags conform to RFC4151 tag syntax. You're restricted to one post + per day with a single url. + + .. _suggestions: http://diveintomark.org/archives/2004/05/28/howto-atom-id + .. _tag syntax: http://tools.ietf.org/html/rfc4151#section-2.1 """ # Discard everything before the domain name start = u"http://" - if url[0:len(start)] == start: + if url.startswith(start): url = url[len(start):] # Change all # characters to / url = url.replace(u'#', u'/') @@ -67,193 +68,279 @@ def id(url, time_published): url = u"tag:%s,%s:%s" % (domain, time_string, trailer) return url -def timestamp(seconds=None): - """ - >>> timestamp(0) +def _timestamp(seconds=None): + """Return an `RFC 3339`_ timestamp. + + Complete date plus hours, minutes and seconds:: + + YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z) + + Where the the trailing `Z` designates times in UTC. + + >>> _timestamp(0) u'1970-01-01T00:00:00Z' + + .. _RFC 3339: http://www.ietf.org/rfc/rfc3339.txt """ - # rfc3339 - # Complete date plus hours, minutes and seconds: - # YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z) - # Where the the trailing 'Z' designates times in UTC - # see http://www.ietf.org/rfc/rfc3339.txt utc = time.gmtime(seconds) string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc) return unicode(string) -## define some helper functions for the interface - -def new_feed(args): - from optparse import OptionParser +class Command (object): + """A command exposed via the command line.""" + name = None - parser = OptionParser(usage="""%prog [general-options] new [options] URI + def run(self, argv): + parser = self._get_parser() + options,args = parser.parse_args(argv) + return self._run(options, args) -Where - URI is a URI used to generate a unique ID for the feed -""".rstrip('\n')) - parser.disable_interspersed_args() - parser.add_option('-t', '--title', dest='title', metavar='TITLE', - help='Feed title') - parser.add_option('-a', '--author', dest='author', metavar='NAME', - help='Feed author name') - parser.add_option('-u', '--author-uri', dest='author_uri', metavar='URI', - help='Feed author homepage URI') - parser.add_option('-e', '--author-email', dest='author_email', - metavar='EMAIL', help='Feed author email address') - (options, args) = parser.parse_args(args) - uri = unicode(args[0]) + def _get_parser(self): + raise NotImplementedError() - doc = AB.nodes.entity_base() - doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed')) - - #doc.feed.xml_append(doc.xml_element_factory(None, u'link')) - #doc.feed.link.xml_attributes.setnode( \ - # doc.feed.entry.link.xml_attribute_factory(None, u'href', link) \ - # ) - - tpub = time.time() - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) - doc.feed.id = id(uri, tpub) - - if options.title != None: - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title')) - doc.feed.title = unicode(options.title) - - if options.author != None: - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'author')) - doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'name')) - doc.feed.author.name = unicode(options.author) + def _run(self, options, args): + raise NotImplementedError() + + +class NewFeedCommand (Command): + """Create a new feed + + >>> c = NewFeedCommand() + >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King', + ... '--author-uri', 'http://www.physics.drexel.edu/~wking/', + ... '--author-email', 'wking@drexel.edu', + ... 'http://www.physics.drexel.edu/~wking/phys201']) + >>> doc.xml_write(**XML_WRITE_KWARGS) + ... # doctest: +ELLIPSIS, +REPORT_UDIFF + + + tag:www.physics.drexel.edu,...:/~wking/phys201 + Physics 201 + + W. Trevor King + wking@drexel.edu + http://www.physics.drexel.edu/~wking/ + + atomgen [based on amara2] + ... + + """ + name = 'new' + + def _get_parser(self): + usage = ['%prog [general-options] new [options] URI', + '', + 'Where', + ' URI is a URI used to generate a unique ID for the feed'] + parser = OptionParser(usage='\n'.join(usage)) + parser.disable_interspersed_args() + parser.add_option('-t', '--title', dest='title', metavar='TITLE', + help='Feed title') + parser.add_option('-a', '--author', dest='author', metavar='NAME', + help='Feed author name') + parser.add_option('-u', '--author-uri', dest='author_uri', + metavar='URI', help='Feed author homepage URI') + parser.add_option('-e', '--author-email', dest='author_email', + metavar='EMAIL', help='Feed author email address') + return parser + + def _run(self, options, args): + uri = unicode(args[0]) + + doc = AB.nodes.entity_base() + doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed')) + + tpub = time.time() + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) + doc.feed.id = _id(uri, tpub) + + if options.title != None: + doc.feed.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'title')) + doc.feed.title = unicode(options.title) + + if options.author != None: + doc.feed.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'author')) + doc.feed.author.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'name')) + doc.feed.author.name = unicode(options.author) if options.author_email: - doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'email')) + doc.feed.author.xml_append( + doc.xml_element_factory(ATOM_NAMESPACE, u'email')) doc.feed.author.email = unicode(options.author_email) if options.author_uri: - doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'uri')) + doc.feed.author.xml_append( + doc.xml_element_factory(ATOM_NAMESPACE, u'uri')) doc.feed.author.uri = unicode(options.author_uri) - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'generator')) - doc.feed.generator = u'%s' % GENERATOR_NAME - doc.feed.generator.xml_attributes.setnode( \ - doc.feed.generator.xml_attribute_factory(ATOM_NAMESPACE, u'version', VERSION) \ - ) + doc.feed.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'generator')) + doc.feed.generator = u'%s' % GENERATOR_NAME + doc.feed.generator.xml_attributes.setnode( + doc.feed.generator.xml_attribute_factory( + ATOM_NAMESPACE, u'version', __version__)) - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) - doc.feed.updated = timestamp(tpub) + doc.feed.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'updated')) + doc.feed.updated = _timestamp(tpub) - return doc + return doc -def add_entry(args): - from sys import stdin - from optparse import OptionParser - - parser = OptionParser(usage="""%prog [general-options] add [options] TITLE LINK -Where - TITLE is the title of the new entry - LINK is the URI of that the entry refers to -""".rstrip('\n')) - parser.disable_interspersed_args() - parser.add_option('-i', '--input', dest='ifilename', metavar='FILE', - help='Input file for generated feed (defaults to stdin)') - parser.add_option('-c', '--content', dest='content', metavar='FILE', - help='Input file for entry content (defaults to stdin, unless input is stdin, in which case this option is required.)') - (options, args) = parser.parse_args(args) - title = unicode(args[0]) - link = unicode(args[1]) - - if options.ifilename == None: - assert options.content != None, 'Need to use one of --input or --content' - doc = AB.parse(sys.stdin) - else: - doc = AB.parse(options.ifilename) - - if options.content == None: - content = unicode(sys.stdin.read()) - else: - content = file(options.content, 'r').read().decode('utf-8') - # convert content out of unicode. Avoids ?bug? in - # generator.ax_amara.construct_xhtml_text calls - # amara.bindery.xml_append_fragment which gives - # ValueError: String must be of type string, not unicode - content = str(content) - - new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry') - doc.feed.xml_append(new_entry) +class AddEntryCommand (Command): + """Add an entry to an existing feed. - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title')) - new_entry.title = title + >>> from os import close, remove + >>> from StringIO import StringIO + >>> from tempfile import mkstemp - tpub = time.time() - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) - new_entry.id = id(link, tpub) + First, create a feed to edit. - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link')) - new_entry.link.xml_attributes.setnode( \ - new_entry.link.xml_attribute_factory(ATOM_NAMESPACE, u'href', link) \ - ) + >>> c = NewFeedCommand() + >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King', + ... '--author-uri', 'http://www.physics.drexel.edu/~wking/', + ... '--author-email', 'wking@drexel.edu', + ... 'http://www.physics.drexel.edu/~wking/phys201']) + >>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-') + >>> close(fd) + >>> with open(path, 'w') as f: + ... doc.xml_write(stream=f, **XML_WRITE_KWARGS) - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'published')) - new_entry.published = timestamp(tpub) - - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) - new_entry.updated = timestamp(tpub) + Now add an entry to that feed. + + >>> c = AddEntryCommand() + >>> stdin = sys.stdin + >>> sys.stdin = StringIO('Changes will be noted in this feed.') + >>> doc = c.run(['--input', path, 'Feed purpose', + ... 'http://www.physics.drexel.edu/~wking/phys201']) + >>> sys.stdin = stdin + >>> doc.xml_write(**XML_WRITE_KWARGS) + + Cleanup. + + >>> remove(path) + """ + name = 'add' + + def _get_parser(self): + usage = ['%prog [general-options] add [options] TITLE LINK', + '', + 'Where', + ' TITLE is the title of the new entry', + ' LINK is the URI of that the entry refers to'] + parser = OptionParser(usage='\n'.join(usage)) + parser.disable_interspersed_args() + parser.add_option('-i', '--input', dest='ifilename', metavar='FILE', + help=('Input file for generated feed ' + '(defaults to stdin)')) + parser.add_option('-c', '--content', dest='content', metavar='FILE', + help=('Input file for entry content ' + '(defaults to stdin, unless input is stdin, ' + 'in which case this option is required.)')) + return parser + + def _run(self, options, args): + title = unicode(args[0]) + link = unicode(args[1]) - new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'content')) - new_entry.content.xml_attributes.setnode( \ - new_entry.content.xml_attribute_factory(ATOM_NAMESPACE, u'type', u'xhtml') \ - ) - new_entry.content.xml_append(doc.xml_element_factory(XHTML_NAMESPACE, u'div')) - #print new_entry.content.xml_attributes - #.getnode(XHTML_NAMESPACE, u'div').xml_prefix = u'html' - #new_entry.content.xml_prefix = u'html' - new_entry.content.div = content + if options.ifilename == None: + assert options.content != None, ( + 'Need to use one of --input or --content') + doc = AB.parse(sys.stdin) + else: + doc = AB.parse(options.ifilename) - if not hasattr(doc.feed, u'updated') : - doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) - doc.feed.updated = timestamp(tpub) + if options.content == None: + content = unicode(sys.stdin.read()) + else: + content = file(options.content, 'r').read().decode('utf-8') + + # convert content out of unicode. Avoids ?bug? in + # generator.ax_amara.construct_xhtml_text calls + # amara.bindery.xml_append_fragment which gives + # ValueError: String must be of type string, not unicode + #content = str(content) + + new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry') + print >> sys.stderr, options.ifilename + print >> sys.stderr, open(options.ifilename, 'r').read() + print >> sys.stderr, dir(doc) + doc.feed.xml_append(new_entry) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title')) + new_entry.title = title + + tpub = time.time() + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) + new_entry.id = _id(link, tpub) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link')) + new_entry.link.xml_attributes.setnode( + new_entry.link.xml_attribute_factory( + ATOM_NAMESPACE, u'href', link)) + + new_entry.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'published')) + new_entry.published = _timestamp(tpub) + + new_entry.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'updated')) + new_entry.updated = _timestamp(tpub) + + new_entry.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'content')) + new_entry.content.xml_attributes.setnode( + new_entry.content.xml_attribute_factory( + ATOM_NAMESPACE, u'type', u'xhtml')) + new_entry.content.xml_append(doc.xml_element_factory( + XHTML_NAMESPACE, u'div')) + new_entry.content.div = content + + if not hasattr(doc.feed, u'updated') : + doc.feed.xml_append(doc.xml_element_factory( + ATOM_NAMESPACE, u'updated')) + doc.feed.updated = _timestamp(tpub) + + return doc - return doc def test(): import doctest doctest.testmod() + if __name__ == "__main__" and True: - import sys - from optparse import OptionParser - - parser = OptionParser(usage="""%prog [options] command [command-options] + commands = [NewFeedCommand(), AddEntryCommand()] + command_dict = dict([(c.name, c) for c in commands]) + usage = ['%prog [options] command [command-options]', + '', + 'Where command is one of'] + usage.extend([' %s\t%s' % (c.name, c.__doc__.splitlines()[0]) + for c in commands]) -Where command is one of - new Create a new feed - add Add an entry to an existing feed -""".rstrip('\n')) + parser = OptionParser(usage='\n'.join(usage)) parser.disable_interspersed_args() parser.add_option('-o', '--output', dest='ofilename', metavar='FILE', help='Output file for generated feed (defaults to stdout)') parser.add_option('--test', dest='test', action='store_true', help='Run the module test suite') (options, args) = parser.parse_args() - + if options.test == True: test() sys.exit(0) - command = args[0] + command_name = args[0] + command = command_dict[command_name] args = args[1:] - - if command == "new": - f = new_feed(args) - elif command == "add": - f = add_entry(args) - else: - raise Exception, "Unrecognized command: %s" % command + doc = command.run(args) if options.ofilename == None: - xml_print(f, indent=True) + doc.xml_write(**XML_WRITE_KWARGS) print # add trailing endline else: - of = file(options.ofilename, 'w') - xml_print(f, stream=of, indent=True) - print >> of, '' # add trailing endline - of.close() + with file(options.ofilename, 'w') as of: + doc.xml_write(stream=of, **XML_WRITE_KWARGS) + print >> of, '' # add trailing endline