"""Produce `RFC 4287` compliant Atom 1.0 XML from the command line.
-Tested on Amara_ version 2.0a4.
-
.. _RFC 4287: http://tools.ietf.org/html/rfc4287
-.. _Amara: http://wiki.xml3k.org/Amara2
"""
from optparse import OptionParser
import sys
import time
-from amara import bindery as AB
-from amara.namespaces import ATOM_NAMESPACE, XML_NAMESPACE, XHTML_NAMESPACE
-
+from lxml import etree
+from lxml import objectify
+from lxml.html import XHTML_NAMESPACE
__version__ = '0.2'
-GENERATOR_NAME = u'atomgen [based on amara2]'
-XML_WRITE_KWARGS = {'writer':'xml-indent'}
+GENERATOR_NAME = u'atomgen [based on lxml]'
+
+ATOM_NAMESPACE = 'http://www.w3.org/2005/Atom'
+
+ATOM = '{%s}' % ATOM_NAMESPACE
+XHTML = '{%s}' % XHTML_NAMESPACE
+
+NSMAP = {
+ None : ATOM_NAMESPACE,
+ 'html': XHTML_NAMESPACE,
+ }
def _id(url, time_published):
"""Create a new feed
>>> c = NewFeedCommand()
- >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
+ >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
... '--author-uri', 'http://www.physics.drexel.edu/~wking/',
... '--author-email', 'wking@drexel.edu',
... 'http://www.physics.drexel.edu/~wking/phys201'])
- >>> doc.xml_write(**XML_WRITE_KWARGS)
- ... # doctest: +ELLIPSIS, +REPORT_UDIFF
- <?xml version="1.0" encoding="UTF-8"?>
+ >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
+ ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF
+ <?xml version='1.0' encoding='UTF-8'?>
<feed xmlns="http://www.w3.org/2005/Atom">
<id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
<title>Physics 201</title>
<email>wking@drexel.edu</email>
<uri>http://www.physics.drexel.edu/~wking/</uri>
</author>
- <generator version="0.2">atomgen [based on amara2]</generator>
+ <generator version="0.2">atomgen [based on lxml]</generator>
<updated>...</updated>
</feed>
+ <BLANKLINE>
"""
name = 'new'
return parser
def _run(self, options, args):
- uri = unicode(args[0])
+ uri = args[0]
- doc = AB.nodes.entity_base()
- doc.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'feed'))
+ feed = objectify.Element(ATOM + 'feed', nsmap=NSMAP)
tpub = time.time()
- doc.feed.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
- doc.feed.id = _id(uri, tpub)
-
- if options.title != None:
- doc.feed.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'title'))
- doc.feed.title = unicode(options.title)
-
- if options.author != None:
- doc.feed.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'author'))
- doc.feed.author.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'name'))
- doc.feed.author.name = unicode(options.author)
+ etree.SubElement(feed, ATOM + 'id')
+ feed.id = _id(uri, tpub)
+
+ if options.title:
+ etree.SubElement(feed, ATOM + 'title')
+ feed.title = options.title
+
+ if options.author or options.author_email or options.author_uri:
+ etree.SubElement(feed, ATOM + 'author')
+ if options.author:
+ etree.SubElement(feed.author, ATOM + 'name')
+ feed.author.name = options.author
if options.author_email:
- doc.feed.author.xml_append(
- doc.xml_element_factory(ATOM_NAMESPACE, u'email'))
- doc.feed.author.email = unicode(options.author_email)
+ etree.SubElement(feed.author, ATOM + 'email')
+ feed.author.email = options.author_email
if options.author_uri:
- doc.feed.author.xml_append(
- doc.xml_element_factory(ATOM_NAMESPACE, u'uri'))
- doc.feed.author.uri = unicode(options.author_uri)
+ etree.SubElement(feed.author, ATOM + 'uri')
+ feed.author.uri = options.author_uri
+
+ etree.SubElement(feed, ATOM + 'generator')
+ feed.generator = GENERATOR_NAME
+ feed.generator.attrib['version'] = __version__
- doc.feed.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'generator'))
- doc.feed.generator = u'%s' % GENERATOR_NAME
- doc.feed.generator.xml_attributes.setnode(
- doc.feed.generator.xml_attribute_factory(
- ATOM_NAMESPACE, u'version', __version__))
+ etree.SubElement(feed, ATOM + 'updated')
+ feed.updated = _timestamp(tpub)
- doc.feed.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'updated'))
- doc.feed.updated = _timestamp(tpub)
+ # remove http://codespeak.net/lxml/objectify/pytype namespace
+ objectify.deannotate(feed)
+ etree.cleanup_namespaces(feed)
- return doc
+ return feed
class AddEntryCommand (Command):
First, create a feed to edit.
>>> c = NewFeedCommand()
- >>> doc = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
+ >>> feed = c.run(['--title', 'Physics 201', '--author', 'W. Trevor King',
... '--author-uri', 'http://www.physics.drexel.edu/~wking/',
... '--author-email', 'wking@drexel.edu',
... 'http://www.physics.drexel.edu/~wking/phys201'])
>>> fd,path = mkstemp(suffix='.atom', prefix='atomgen-')
>>> close(fd)
- >>> with open(path, 'w') as f:
- ... doc.xml_write(stream=f, **XML_WRITE_KWARGS)
+ >>> root = etree.ElementTree(feed)
+ >>> root.write(path)
Now add an entry to that feed.
>>> c = AddEntryCommand()
>>> stdin = sys.stdin
>>> sys.stdin = StringIO('Changes will be noted in this feed.')
- >>> doc = c.run(['--input', path, 'Feed purpose',
+ >>> feed = c.run(['--input', path, 'Feed purpose',
... 'http://www.physics.drexel.edu/~wking/phys201'])
>>> sys.stdin = stdin
- >>> doc.xml_write(**XML_WRITE_KWARGS)
+ >>> print etree.tostring(feed, pretty_print=True, xml_declaration=True,
+ ... encoding='UTF-8') # doctest: +ELLIPSIS, +REPORT_UDIFF
+ <?xml version='1.0' encoding='UTF-8'?>
+ <feed xmlns="http://www.w3.org/2005/Atom">
+ <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
+ <title>Physics 201</title>
+ <author>
+ <name>W. Trevor King</name>
+ <email>wking@drexel.edu</email>
+ <uri>http://www.physics.drexel.edu/~wking/</uri>
+ </author>
+ <generator version="0.2">atomgen [based on lxml]</generator>
+ <updated>...</updated>
+ <entry>
+ <title>Feed purpose</title>
+ <id>tag:www.physics.drexel.edu,...:/~wking/phys201</id>
+ <link href="http://www.physics.drexel.edu/~wking/phys201"/>
+ <published>...</published>
+ <updated>...</updated>
+ <content type="xhtml">
+ <html:div xmlns:html="http://www.w3.org/1999/xhtml">Changes will be noted in this feed.</html:div>
+ </content>
+ </entry>
+ </feed>
+ <BLANKLINE>
+
+ Note that we cannot move the html namespace declaration to the
+ `<feed>` start tag until there is a way to update namespace maps
+ on the fly. See `lxml bug 555602`_.
+
+ .. _lxml bug 555602: https://bugs.launchpad.net/lxml/+bug/555602
Cleanup.
title = unicode(args[0])
link = unicode(args[1])
+ parser = etree.XMLParser(remove_blank_text=True)
+
if options.ifilename == None:
assert options.content != None, (
'Need to use one of --input or --content')
- doc = AB.parse(sys.stdin)
+ root = objectify.parse(sys.stdin, parser=parser)
else:
- doc = AB.parse(options.ifilename)
+ root = objectify.parse(options.ifilename, parser=parser)
+
+ feed = root.getroot()
if options.content == None:
- content = unicode(sys.stdin.read())
+ content = sys.stdin.read()
else:
- content = file(options.content, 'r').read().decode('utf-8')
-
- # convert content out of unicode. Avoids ?bug? in
- # generator.ax_amara.construct_xhtml_text calls
- # amara.bindery.xml_append_fragment which gives
- # ValueError: String must be of type string, not unicode
- #content = str(content)
+ content = file(options.content, 'r').read()
- new_entry = doc.xml_element_factory(ATOM_NAMESPACE, u'entry')
- print >> sys.stderr, options.ifilename
- print >> sys.stderr, open(options.ifilename, 'r').read()
- print >> sys.stderr, dir(doc)
- doc.feed.xml_append(new_entry)
-
- new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'title'))
- new_entry.title = title
+ entry = etree.SubElement(feed, ATOM + 'entry')
+ etree.SubElement(entry, ATOM + 'title')
+ entry.title = title
tpub = time.time()
- new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'id'))
- new_entry.id = _id(link, tpub)
+ etree.SubElement(entry, ATOM + 'id')
+ entry.id = _id(link, tpub)
+
+ etree.SubElement(entry, ATOM + 'link')
+ entry.link.attrib['href'] = link
- new_entry.xml_append(doc.xml_element_factory(ATOM_NAMESPACE, u'link'))
- new_entry.link.xml_attributes.setnode(
- new_entry.link.xml_attribute_factory(
- ATOM_NAMESPACE, u'href', link))
+ etree.SubElement(entry, ATOM + 'published')
+ entry.published = _timestamp(tpub)
- new_entry.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'published'))
- new_entry.published = _timestamp(tpub)
+ etree.SubElement(entry, ATOM + 'updated')
+ entry.updated = _timestamp(tpub)
- new_entry.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'updated'))
- new_entry.updated = _timestamp(tpub)
+ etree.SubElement(entry, ATOM + 'content')
+ entry.content.attrib['type'] = 'xhtml'
+ etree.SubElement(entry.content, XHTML + 'div')
+ entry.content[XHTML + 'div'] = content
- new_entry.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'content'))
- new_entry.content.xml_attributes.setnode(
- new_entry.content.xml_attribute_factory(
- ATOM_NAMESPACE, u'type', u'xhtml'))
- new_entry.content.xml_append(doc.xml_element_factory(
- XHTML_NAMESPACE, u'div'))
- new_entry.content.div = content
+ if not hasattr(feed, u'updated') :
+ etree.SubElement(feed, ATOM + 'updated')
+ feed.updated = _timestamp(tpub)
- if not hasattr(doc.feed, u'updated') :
- doc.feed.xml_append(doc.xml_element_factory(
- ATOM_NAMESPACE, u'updated'))
- doc.feed.updated = _timestamp(tpub)
+ # remove http://codespeak.net/lxml/objectify/pytype namespace
+ objectify.deannotate(feed)
+ etree.cleanup_namespaces(feed)
- return doc
+ return feed
def test():
command_name = args[0]
command = command_dict[command_name]
args = args[1:]
- doc = command.run(args)
+ feed = command.run(args)
+ ostring = etree.tostring(
+ feed, pretty_print=True, xml_declaration=True, encoding='UTF-8')
if options.ofilename == None:
- doc.xml_write(**XML_WRITE_KWARGS)
- print # add trailing endline
+ print ostring,
else:
with file(options.ofilename, 'w') as of:
- doc.xml_write(stream=of, **XML_WRITE_KWARGS)
- print >> of, '' # add trailing endline
+ of.write(ostring)