From: W. Trevor King <wking@drexel.edu>
Date: Sat, 20 Nov 2010 16:59:29 +0000 (-0500)
Subject: Cleanups in atomgen.py and update for Amara 2.0a4.
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c928ef284fd3b57af98515d8181ed70e0d8b3a19;p=mw2txt.git

Cleanups in atomgen.py and update for Amara 2.0a4.
---

diff --git a/posts/Atomgen/atomgen.py b/posts/Atomgen/atomgen.py
old mode 100644
new mode 100755
index 428c649..ce6b79f
--- a/posts/Atomgen/atomgen.py
+++ b/posts/Atomgen/atomgen.py
@@ -1,56 +1,57 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
-# atomgen - Produce RFC4287 compliant Atom 1.0 xml from the command line.
+# Copyright (C) 2009-2010, William Trevor King <wking@drexel.edu>
 #
-# Copyright (C) 2009, William Trevor King
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
 #
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License as
-# published by the Free Software Foundation; either version 3 of the
-# License, or (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the GNU General Public License for more details.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-# 02111-1307, USA.
-#
-# The author may be contacted at <wking@drexel.edu> on the Internet, or
-# write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St.,
-# Philadelphia PA 19104, USA.
-#
-# RFC4287 can be found at http://tools.ietf.org/html/rfc4287
-#
-# tested on Amara version 2.0a1
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from amara import bindery as AB
-from amara import xml_print
-from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, \
-    XHTML_NAMESPACE
+"""Produce `RFC 4287` compliant Atom 1.0 XML from the command line.
+
+Tested on Amara_ version 2.0a4.
+
+.. _RFC 4287: http://tools.ietf.org/html/rfc4287
+.. _Amara: http://wiki.xml3k.org/Amara2
+"""
+
+from optparse import OptionParser
+import sys
 import time
 
-GENERATOR_NAME = u'atomgen [amara] (based on atomixlib)'
-VERSION = '0.1'
+from amara import bindery as AB
+from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, XHTML_NAMESPACE
 
-def id(url, time_published):
-    """
-    Convert a URL to an Atom ID following 
-      http://diveintomark.org/archives/2004/05/28/howto-atom-id
-    >>> id('http://example.com/blog#5', 0)
+
+__version__ = '0.2'
+GENERATOR_NAME = u'atomgen [based on amara2]'
+XML_WRITE_KWARGS = {'writer':'xml-indent'}
+
+
+def _id(url, time_published):
+    """Convert a URL to an Atom ID
+
+    Following Mark Pilgrim's suggestions_.
+    >>> _id('http://example.com/blog#5', 0)
     u'tag:example.com,1970-01-01:/blog/5'
-    
-    Tags conform to RFC4151 tag syntax
-      http://tools.ietf.org/html/rfc4151#section-2.1
-    
-    You're restricted to one post per day with a single url.
+
+    Tags conform to RFC4151 tag syntax.  You're restricted to one post
+    per day with a single url.
+
+    .. _suggestions: http://diveintomark.org/archives/2004/05/28/howto-atom-id
+    .. _tag syntax: http://tools.ietf.org/html/rfc4151#section-2.1
     """
     # Discard everything before the domain name
     start = u"http://"
-    if url[0:len(start)] == start:
+    if url.startswith(start):
         url = url[len(start):]
     # Change all # characters to /
     url = url.replace(u'#', u'/')
@@ -67,193 +68,279 @@ def id(url, time_published):
     url = u"tag:%s,%s:%s" % (domain, time_string, trailer)
     return url
 
-def timestamp(seconds=None):
-    """
-    >>> timestamp(0)
+def _timestamp(seconds=None):
+    """Return an `RFC 3339`_ timestamp.
+
+    Complete date plus hours, minutes and seconds::
+
+        YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z)
+
+    Where the the trailing `Z` designates times in UTC.
+
+    >>> _timestamp(0)
     u'1970-01-01T00:00:00Z'
+
+    .. _RFC 3339: http://www.ietf.org/rfc/rfc3339.txt
     """
-    # rfc3339
-    # Complete date plus hours, minutes and seconds:
-    #    YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z)
-    # Where the the trailing 'Z' designates times in UTC
-    # see http://www.ietf.org/rfc/rfc3339.txt
     utc = time.gmtime(seconds)
     string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc)
     return unicode(string)
 
 
-## define some helper functions for the interface
-
-def new_feed(args):
-    from optparse import OptionParser
+class Command (object):
+    """A command exposed via the command line."""
+    name = None
 
-    parser = OptionParser(usage="""%prog [general-options] new [options] URI
+    def run(self, argv):
+        parser = self._get_parser()
+        options,args = parser.parse_args(argv)
+        return self._run(options, args)
 
-Where
-  URI is a URI used to generate a unique ID for the feed
-""".rstrip('\n'))
-    parser.disable_interspersed_args()
-    parser.add_option('-t', '--title', dest='title', metavar='TITLE',
-                      help='Feed title')
-    parser.add_option('-a', '--author', dest='author', metavar='NAME',
-                      help='Feed author name')
-    parser.add_option('-u', '--author-uri', dest='author_uri', metavar='URI',
-                      help='Feed author homepage URI')
-    parser.add_option('-e', '--author-email', dest='author_email',
-                      metavar='EMAIL', help='Feed author email address')
-    (options, args) = parser.parse_args(args)
-    uri = unicode(args[0])
+    def _get_parser(self):
+        raise NotImplementedError()
     
-    doc = AB.nodes.entity_base()
-    doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed'))
-
-    #doc.feed.xml_append(doc.xml_element_factory(None, u'link'))
-    #doc.feed.link.xml_attributes.setnode( \
-    #    doc.feed.entry.link.xml_attribute_factory(None, u'href', link) \
-    #    )
-
-    tpub = time.time()
-    doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
-    doc.feed.id = id(uri, tpub)
-
-    if options.title != None:
-        doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title'))
-        doc.feed.title = unicode(options.title)
-
-    if options.author != None:
-        doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'author'))
-        doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'name'))
-        doc.feed.author.name = unicode(options.author)
+    def _run(self, options, args):
+        raise NotImplementedError()
+
+
+class NewFeedCommand (Command):
+    """Create a new feed
+
+    >>> c = NewFeedCommand()
+    >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
+    ...     '--author-uri', 'http://www.physics.drexel.edu/~wking/',
+    ...     '--author-email', 'wking@drexel.edu',
+    ...     'http://www.physics.drexel.edu/~wking/phys201'])
+    >>> doc.xml_write(**XML_WRITE_KWARGS)
+    ... # doctest: +ELLIPSIS, +REPORT_UDIFF
+    <?xml version="1.0" encoding="UTF-8"?>
+    <feed xmlns="http://www.w3.org/2005/Atom">
+      <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
+      <title>Physics 201</title>
+      <author>
+        <name>W. Trevor King</name>
+        <email>wking@drexel.edu</email>
+        <uri>http://www.physics.drexel.edu/~wking/</uri>
+      </author>
+      <generator version="0.2">atomgen [based on amara2]</generator>
+      <updated>...</updated>
+    </feed>
+    """
+    name = 'new'
+
+    def _get_parser(self):
+        usage = ['%prog [general-options] new [options] URI',
+                 '',
+                 'Where',
+                 '  URI is a URI used to generate a unique ID for the feed']
+        parser = OptionParser(usage='\n'.join(usage))
+        parser.disable_interspersed_args()
+        parser.add_option('-t', '--title', dest='title', metavar='TITLE',
+                          help='Feed title')
+        parser.add_option('-a', '--author', dest='author', metavar='NAME',
+                          help='Feed author name')
+        parser.add_option('-u', '--author-uri', dest='author_uri',
+                          metavar='URI', help='Feed author homepage URI')
+        parser.add_option('-e', '--author-email', dest='author_email',
+                          metavar='EMAIL', help='Feed author email address')
+        return parser
+
+    def _run(self, options, args):
+        uri = unicode(args[0])
+
+        doc = AB.nodes.entity_base()
+        doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed'))
+
+        tpub = time.time()
+        doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
+        doc.feed.id = _id(uri, tpub)
+
+        if options.title != None:
+            doc.feed.xml_append(doc.xml_element_factory(
+                    ATOM_NAMESPACE, u'title'))
+            doc.feed.title = unicode(options.title)
+
+        if options.author != None:
+            doc.feed.xml_append(doc.xml_element_factory(
+                    ATOM_NAMESPACE, u'author'))
+            doc.feed.author.xml_append(doc.xml_element_factory(
+                    ATOM_NAMESPACE, u'name'))
+            doc.feed.author.name = unicode(options.author)
         if options.author_email:
-            doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'email'))
+            doc.feed.author.xml_append(
+                doc.xml_element_factory(ATOM_NAMESPACE, u'email'))
             doc.feed.author.email = unicode(options.author_email)
         if options.author_uri:
-            doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'uri'))
+            doc.feed.author.xml_append(
+                doc.xml_element_factory(ATOM_NAMESPACE, u'uri'))
             doc.feed.author.uri = unicode(options.author_uri)
 
-    doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'generator'))
-    doc.feed.generator = u'%s' % GENERATOR_NAME
-    doc.feed.generator.xml_attributes.setnode( \
-        doc.feed.generator.xml_attribute_factory(ATOM_NAMESPACE, u'version', VERSION) \
-        )
+        doc.feed.xml_append(doc.xml_element_factory(
+                ATOM_NAMESPACE, u'generator'))
+        doc.feed.generator = u'%s' % GENERATOR_NAME
+        doc.feed.generator.xml_attributes.setnode(
+            doc.feed.generator.xml_attribute_factory(
+                ATOM_NAMESPACE, u'version', __version__))
 
-    doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated'))
-    doc.feed.updated = timestamp(tpub)
+        doc.feed.xml_append(doc.xml_element_factory(
+                ATOM_NAMESPACE, u'updated'))
+        doc.feed.updated = _timestamp(tpub)
 
-    return doc
+        return doc
 
-def add_entry(args):
-    from sys import stdin
-    from optparse import OptionParser
-    
-    parser = OptionParser(usage="""%prog [general-options] add [options] TITLE LINK
 
-Where
-  TITLE is the title of the new entry
-  LINK is the URI of that the entry refers to
-""".rstrip('\n'))
-    parser.disable_interspersed_args()
-    parser.add_option('-i', '--input', dest='ifilename', metavar='FILE',
-                      help='Input file for generated feed (defaults to stdin)')
-    parser.add_option('-c', '--content', dest='content', metavar='FILE',
-                      help='Input file for entry content (defaults to stdin, unless input is stdin, in which case this option is required.)')
-    (options, args) = parser.parse_args(args)
-    title = unicode(args[0])
-    link = unicode(args[1])
-    
-    if options.ifilename == None:
-        assert options.content != None, 'Need to use one of --input or --content'
-        doc = AB.parse(sys.stdin)
-    else:
-        doc = AB.parse(options.ifilename)
-    
-    if options.content == None:
-        content = unicode(sys.stdin.read())
-    else:
-        content = file(options.content, 'r').read().decode('utf-8')
-    # convert content out of unicode.  Avoids ?bug? in
-    # generator.ax_amara.construct_xhtml_text calls
-    # amara.bindery.xml_append_fragment which gives
-    #   ValueError: String must be of type string, not unicode
-    content = str(content)
-    
-    new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry')
-    doc.feed.xml_append(new_entry)
+class AddEntryCommand (Command):
+    """Add an entry to an existing feed.
 
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title'))
-    new_entry.title = title
+    >>> from os import close, remove
+    >>> from StringIO import StringIO
+    >>> from tempfile import mkstemp
 
-    tpub = time.time()
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
-    new_entry.id = id(link, tpub)
+    First, create a feed to edit.
 
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link'))
-    new_entry.link.xml_attributes.setnode( \
-        new_entry.link.xml_attribute_factory(ATOM_NAMESPACE, u'href', link) \
-        )
+    >>> c = NewFeedCommand()
+    >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
+    ...     '--author-uri', 'http://www.physics.drexel.edu/~wking/',
+    ...     '--author-email', 'wking@drexel.edu',
+    ...     'http://www.physics.drexel.edu/~wking/phys201'])
+    >>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-')
+    >>> close(fd)
+    >>> with open(path, 'w') as f:
+    ...     doc.xml_write(stream=f, **XML_WRITE_KWARGS)
 
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'published'))
-    new_entry.published = timestamp(tpub)
-    
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated'))
-    new_entry.updated = timestamp(tpub)
+    Now add an entry to that feed.
+
+    >>> c = AddEntryCommand()
+    >>> stdin = sys.stdin
+    >>> sys.stdin = StringIO('Changes will be noted in this feed.')
+    >>> doc = c.run(['--input', path, 'Feed purpose',
+    ...     'http://www.physics.drexel.edu/~wking/phys201'])
+    >>> sys.stdin = stdin
+    >>> doc.xml_write(**XML_WRITE_KWARGS)
+
+    Cleanup.
+
+    >>> remove(path)
+    """
+    name = 'add'
+
+    def _get_parser(self):
+        usage = ['%prog [general-options] add [options] TITLE LINK',
+                 '',
+                 'Where',
+                 '  TITLE is the title of the new entry',
+                 '  LINK is the URI of that the entry refers to']
+        parser = OptionParser(usage='\n'.join(usage))
+        parser.disable_interspersed_args()
+        parser.add_option('-i', '--input', dest='ifilename', metavar='FILE',
+                          help=('Input file for generated feed '
+                                '(defaults to stdin)'))
+        parser.add_option('-c', '--content', dest='content', metavar='FILE',
+                          help=('Input file for entry content '
+                                '(defaults to stdin, unless input is stdin, '
+                                'in which case this option is required.)'))
+        return parser
+
+    def _run(self, options, args):
+        title = unicode(args[0])
+        link = unicode(args[1])
 
-    new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'content'))
-    new_entry.content.xml_attributes.setnode( \
-        new_entry.content.xml_attribute_factory(ATOM_NAMESPACE, u'type', u'xhtml') \
-        )
-    new_entry.content.xml_append(doc.xml_element_factory(XHTML_NAMESPACE, u'div'))
-    #print new_entry.content.xml_attributes
-    #.getnode(XHTML_NAMESPACE, u'div').xml_prefix = u'html'
-    #new_entry.content.xml_prefix = u'html'
-    new_entry.content.div = content
+        if options.ifilename == None:
+            assert options.content != None, (
+                'Need to use one of --input or --content')
+            doc = AB.parse(sys.stdin)
+        else:
+            doc = AB.parse(options.ifilename)
 
-    if not hasattr(doc.feed, u'updated') :
-        doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated'))
-    doc.feed.updated = timestamp(tpub)
+        if options.content == None:
+            content = unicode(sys.stdin.read())
+        else:
+            content = file(options.content, 'r').read().decode('utf-8')
+
+        # convert content out of unicode.  Avoids ?bug? in
+        # generator.ax_amara.construct_xhtml_text calls
+        # amara.bindery.xml_append_fragment which gives
+        #   ValueError: String must be of type string, not unicode
+        #content = str(content)
+
+        new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry')
+        print >> sys.stderr, options.ifilename
+        print >> sys.stderr, open(options.ifilename, 'r').read()
+        print >> sys.stderr, dir(doc)
+        doc.feed.xml_append(new_entry)
+
+        new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title'))
+        new_entry.title = title
+
+        tpub = time.time()
+        new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
+        new_entry.id = _id(link, tpub)
+
+        new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link'))
+        new_entry.link.xml_attributes.setnode(
+            new_entry.link.xml_attribute_factory(
+                ATOM_NAMESPACE, u'href', link))
+
+        new_entry.xml_append(doc.xml_element_factory(
+                ATOM_NAMESPACE, u'published'))
+        new_entry.published = _timestamp(tpub)
+
+        new_entry.xml_append(doc.xml_element_factory(
+                ATOM_NAMESPACE, u'updated'))
+        new_entry.updated = _timestamp(tpub)
+
+        new_entry.xml_append(doc.xml_element_factory(
+                ATOM_NAMESPACE, u'content'))
+        new_entry.content.xml_attributes.setnode(
+            new_entry.content.xml_attribute_factory(
+                ATOM_NAMESPACE, u'type', u'xhtml'))
+        new_entry.content.xml_append(doc.xml_element_factory(
+                XHTML_NAMESPACE, u'div'))
+        new_entry.content.div = content
+
+        if not hasattr(doc.feed, u'updated') :
+            doc.feed.xml_append(doc.xml_element_factory(
+                    ATOM_NAMESPACE, u'updated'))
+        doc.feed.updated = _timestamp(tpub)
+
+        return doc
 
-    return doc
 
 def test():
     import doctest
     doctest.testmod()
 
+
 if __name__ == "__main__" and True:
-    import sys
-    from optparse import OptionParser
-    
-    parser = OptionParser(usage="""%prog [options] command [command-options]
+    commands = [NewFeedCommand(), AddEntryCommand()]
+    command_dict = dict([(c.name, c) for c in commands])
+    usage = ['%prog [options] command [command-options]',
+             '',
+             'Where command is one of']
+    usage.extend(['  %s\t%s' % (c.name, c.__doc__.splitlines()[0])
+                  for c in commands])
 
-Where command is one of
-  new   Create a new feed
-  add   Add an entry to an existing feed
-""".rstrip('\n'))
+    parser = OptionParser(usage='\n'.join(usage))
     parser.disable_interspersed_args()
     parser.add_option('-o', '--output', dest='ofilename', metavar='FILE',
                       help='Output file for generated feed (defaults to stdout)')
     parser.add_option('--test', dest='test', action='store_true',
                       help='Run the module test suite')
     (options, args) = parser.parse_args()
-    
+
     if options.test == True:
         test()
         sys.exit(0)
 
-    command = args[0]
+    command_name = args[0]
+    command = command_dict[command_name]
     args = args[1:]
-
-    if command == "new":
-        f = new_feed(args)
-    elif command == "add":
-        f = add_entry(args)
-    else:
-        raise Exception, "Unrecognized command: %s" % command
+    doc = command.run(args)
 
     if options.ofilename == None:
-        xml_print(f, indent=True)
+        doc.xml_write(**XML_WRITE_KWARGS)
         print # add trailing endline
     else:
-        of = file(options.ofilename, 'w')
-        xml_print(f, stream=of, indent=True)
-        print >> of, '' # add trailing endline
-        of.close()
+        with file(options.ofilename, 'w') as of:
+            doc.xml_write(stream=of, **XML_WRITE_KWARGS)
+            print >> of, '' # add trailing endline