From: W. Trevor King Date: Sat, 20 Nov 2010 14:13:58 +0000 (-0500) Subject: Bring atomgen example and source into this repository. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=83fe36d7a4bf9f1d83d4e0e89e081e2a6d6740c1;p=mw2txt.git Bring atomgen example and source into this repository. --- diff --git a/posts/Atomgen.mdwn b/posts/Atomgen.mdwn index 7a7b99d..169fa22 100644 --- a/posts/Atomgen.mdwn +++ b/posts/Atomgen.mdwn @@ -1,29 +1,50 @@ [[!meta title="Atomgen"]] [[!meta date="2009-03-28 05:48:23"]] -I've been using [Atom -feeds](http://en.wikipedia.org/wiki/Atom_%28standard%29) to publish -announcements to my course websites recently. The ‘please check this -page daily for changes’ directives I saw on a few other pages were too -painful to bear ;). I've written a little PHP script to convert the -atom.xml into XHTML so non-atom students can still read the -announcements online, and I've been using -[rss2email](http://rss2email.infogami.com/) in a cron job and a -[procmail](http://www.procmail.org/) rule to automatically email the -class any new announcements I post in the feed. It works rather well -I think, even if noone ends up actually looking at the feed ;). - -Anyhow, I wrote up a little command line wrapper around Sylvain -Hellegouarch's [Atomixlib](http://pypi.python.org/pypi/atomixlib/), -which is based on [Amara](http://xml3k.org/Amara/) or -[ElementTree](http://pypi.python.org/pypi/elementtree/). However, -Sylvain seems to be falling behind as ElementTree and Amara continue -to evolve, so I recently rewrote my script to run off -[Amara2](http://xml3k.org/Amara2) directly, since Amara's _bindery_ -interface is wonderful (well, for data manipulation anyway. + +I've been using [Atom feeds][] to publish announcements to my course +websites recently. The ‘please check this page daily for changes’ +directives I saw on a few other pages were too painful to bear ;). +I've written a little PHP script to convert the atom.xml into XHTML so +non-atom students can still read the announcements online, and I've +been using [rss2email][] in a cron job and a [procmail][] rule to +automatically email the class any new announcements I post in the +feed. It works rather well I think, even if noone ends up actually +looking at the feed ;). + +Anyhow, I wrote up a little command line wrapper ([[atomgen.py]]) +around Sylvain Hellegouarch's [Atomixlib][], which is based on +[Amara2][] or [ElementTree][]. However, Sylvain seems to be falling +behind as ElementTree and Amara continue to evolve, so I recently +rewrote my script to run off [Amara2][] directly, since Amara's +*bindery* interface is wonderful (well, for data manipulation anyway. Attributes, prefixes, and tree construction don't make much sense to me yet...). -You can grab my script and see some simple usage examples on my [code -page](http://www.physics.drexel.edu/~wking/code/#atomgen). + atomgen -o atom.xml new --title 'Physics 201' --author 'W. Trevor King' \ + http://www.physics.drexel.edu/~wking/phys201 + echo "Changes to the Phys201 website will be noted in this feed." | \ + atomgen -o atom.xml add -i atom.xml 'Feed purpose' \ + http://www.physics.drexel.edu/~wking/phys201 + +If your distro does not package Amara, install it with + + sudo apt-get install python-setuptools + easy_install --prefix=~ amara + +or use [[pip|Distributing Python]]. Gentoo packages Amara-1.2.0.2, +but there are Amara2 ebuilds in Tiziano Müller's [dev-zero][] +[overlay][]. + +[Atom feeds]: http://en.wikipedia.org/wiki/Atom_%28standard%29 +[rss2email]: http://rss2email.infogami.com/ +[procmail]: http://www.procmail.org/ +[Atomixlib]: http://pypi.python.org/pypi/atomixlib/ +[Amara]: http://xml3k.org/Amara/ +[Amara2]: http://xml3k.org/Amara2/ +[ElementTree]: http://pypi.python.org/pypi/elementtree/ +[dev-zero]: http://git.overlays.gentoo.org/gitweb/?p=dev/dev-zero.git +[overlay]: http://www.gentoo.org/proj/en/overlays/userguide.xml +[[!tag tags/blogging]] [[!tag tags/programming]] +[[!tag tags/python]] diff --git a/posts/Atomgen/atomgen.py b/posts/Atomgen/atomgen.py new file mode 100644 index 0000000..428c649 --- /dev/null +++ b/posts/Atomgen/atomgen.py @@ -0,0 +1,259 @@ +#!/usr/bin/python +# +# atomgen - Produce RFC4287 compliant Atom 1.0 xml from the command line. +# +# Copyright (C) 2009, William Trevor King +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. +# +# The author may be contacted at on the Internet, or +# write to Trevor King, Drexel University, Physics Dept., 3141 Chestnut St., +# Philadelphia PA 19104, USA. +# +# RFC4287 can be found at http://tools.ietf.org/html/rfc4287 +# +# tested on Amara version 2.0a1 + +from amara import bindery as AB +from amara import xml_print +from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, \ + XHTML_NAMESPACE +import time + +GENERATOR_NAME = u'atomgen [amara] (based on atomixlib)' +VERSION = '0.1' + +def id(url, time_published): + """ + Convert a URL to an Atom ID following + http://diveintomark.org/archives/2004/05/28/howto-atom-id + >>> id('http://example.com/blog#5', 0) + u'tag:example.com,1970-01-01:/blog/5' + + Tags conform to RFC4151 tag syntax + http://tools.ietf.org/html/rfc4151#section-2.1 + + You're restricted to one post per day with a single url. + """ + # Discard everything before the domain name + start = u"http://" + if url[0:len(start)] == start: + url = url[len(start):] + # Change all # characters to / + url = url.replace(u'#', u'/') + # Extract the domain name + end_of_domain_index = url.find(u'/') + if end_of_domain_index == -1: + domain = url + trailer = u'' + else: + domain = url[0:end_of_domain_index] + trailer = url[end_of_domain_index:] + # Immediately after the domain name, insert a comma, year-month-date, colon + time_string = time.strftime("%Y-%m-%d", time.gmtime(time_published)) + url = u"tag:%s,%s:%s" % (domain, time_string, trailer) + return url + +def timestamp(seconds=None): + """ + >>> timestamp(0) + u'1970-01-01T00:00:00Z' + """ + # rfc3339 + # Complete date plus hours, minutes and seconds: + # YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30Z) + # Where the the trailing 'Z' designates times in UTC + # see http://www.ietf.org/rfc/rfc3339.txt + utc = time.gmtime(seconds) + string = time.strftime('%Y-%m-%dT%H:%M:%SZ', utc) + return unicode(string) + + +## define some helper functions for the interface + +def new_feed(args): + from optparse import OptionParser + + parser = OptionParser(usage="""%prog [general-options] new [options] URI + +Where + URI is a URI used to generate a unique ID for the feed +""".rstrip('\n')) + parser.disable_interspersed_args() + parser.add_option('-t', '--title', dest='title', metavar='TITLE', + help='Feed title') + parser.add_option('-a', '--author', dest='author', metavar='NAME', + help='Feed author name') + parser.add_option('-u', '--author-uri', dest='author_uri', metavar='URI', + help='Feed author homepage URI') + parser.add_option('-e', '--author-email', dest='author_email', + metavar='EMAIL', help='Feed author email address') + (options, args) = parser.parse_args(args) + uri = unicode(args[0]) + + doc = AB.nodes.entity_base() + doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed')) + + #doc.feed.xml_append(doc.xml_element_factory(None, u'link')) + #doc.feed.link.xml_attributes.setnode( \ + # doc.feed.entry.link.xml_attribute_factory(None, u'href', link) \ + # ) + + tpub = time.time() + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) + doc.feed.id = id(uri, tpub) + + if options.title != None: + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title')) + doc.feed.title = unicode(options.title) + + if options.author != None: + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'author')) + doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'name')) + doc.feed.author.name = unicode(options.author) + if options.author_email: + doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'email')) + doc.feed.author.email = unicode(options.author_email) + if options.author_uri: + doc.feed.author.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'uri')) + doc.feed.author.uri = unicode(options.author_uri) + + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'generator')) + doc.feed.generator = u'%s' % GENERATOR_NAME + doc.feed.generator.xml_attributes.setnode( \ + doc.feed.generator.xml_attribute_factory(ATOM_NAMESPACE, u'version', VERSION) \ + ) + + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) + doc.feed.updated = timestamp(tpub) + + return doc + +def add_entry(args): + from sys import stdin + from optparse import OptionParser + + parser = OptionParser(usage="""%prog [general-options] add [options] TITLE LINK + +Where + TITLE is the title of the new entry + LINK is the URI of that the entry refers to +""".rstrip('\n')) + parser.disable_interspersed_args() + parser.add_option('-i', '--input', dest='ifilename', metavar='FILE', + help='Input file for generated feed (defaults to stdin)') + parser.add_option('-c', '--content', dest='content', metavar='FILE', + help='Input file for entry content (defaults to stdin, unless input is stdin, in which case this option is required.)') + (options, args) = parser.parse_args(args) + title = unicode(args[0]) + link = unicode(args[1]) + + if options.ifilename == None: + assert options.content != None, 'Need to use one of --input or --content' + doc = AB.parse(sys.stdin) + else: + doc = AB.parse(options.ifilename) + + if options.content == None: + content = unicode(sys.stdin.read()) + else: + content = file(options.content, 'r').read().decode('utf-8') + # convert content out of unicode. Avoids ?bug? in + # generator.ax_amara.construct_xhtml_text calls + # amara.bindery.xml_append_fragment which gives + # ValueError: String must be of type string, not unicode + content = str(content) + + new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry') + doc.feed.xml_append(new_entry) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title')) + new_entry.title = title + + tpub = time.time() + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id')) + new_entry.id = id(link, tpub) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link')) + new_entry.link.xml_attributes.setnode( \ + new_entry.link.xml_attribute_factory(ATOM_NAMESPACE, u'href', link) \ + ) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'published')) + new_entry.published = timestamp(tpub) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) + new_entry.updated = timestamp(tpub) + + new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'content')) + new_entry.content.xml_attributes.setnode( \ + new_entry.content.xml_attribute_factory(ATOM_NAMESPACE, u'type', u'xhtml') \ + ) + new_entry.content.xml_append(doc.xml_element_factory(XHTML_NAMESPACE, u'div')) + #print new_entry.content.xml_attributes + #.getnode(XHTML_NAMESPACE, u'div').xml_prefix = u'html' + #new_entry.content.xml_prefix = u'html' + new_entry.content.div = content + + if not hasattr(doc.feed, u'updated') : + doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'updated')) + doc.feed.updated = timestamp(tpub) + + return doc + +def test(): + import doctest + doctest.testmod() + +if __name__ == "__main__" and True: + import sys + from optparse import OptionParser + + parser = OptionParser(usage="""%prog [options] command [command-options] + +Where command is one of + new Create a new feed + add Add an entry to an existing feed +""".rstrip('\n')) + parser.disable_interspersed_args() + parser.add_option('-o', '--output', dest='ofilename', metavar='FILE', + help='Output file for generated feed (defaults to stdout)') + parser.add_option('--test', dest='test', action='store_true', + help='Run the module test suite') + (options, args) = parser.parse_args() + + if options.test == True: + test() + sys.exit(0) + + command = args[0] + args = args[1:] + + if command == "new": + f = new_feed(args) + elif command == "add": + f = add_entry(args) + else: + raise Exception, "Unrecognized command: %s" % command + + if options.ofilename == None: + xml_print(f, indent=True) + print # add trailing endline + else: + of = file(options.ofilename, 'w') + xml_print(f, stream=of, indent=True) + print >> of, '' # add trailing endline + of.close()