Add portage.xml.metadata to parse metadata.xml
authorSebastian Luther <SebastianLuther@gmx.de>
Mon, 6 Sep 2010 12:33:10 +0000 (14:33 +0200)
committerZac Medico <zmedico@gentoo.org>
Mon, 6 Sep 2010 17:06:43 +0000 (10:06 -0700)
This is a slightly modified version of gentoolkit.metadata

pym/portage/xml/__init__.py [new file with mode: 0644]
pym/portage/xml/metadata.py [new file with mode: 0644]

diff --git a/pym/portage/xml/__init__.py b/pym/portage/xml/__init__.py
new file mode 100644 (file)
index 0000000..21a391a
--- /dev/null
@@ -0,0 +1,2 @@
+# Copyright 2010 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
diff --git a/pym/portage/xml/metadata.py b/pym/portage/xml/metadata.py
new file mode 100644 (file)
index 0000000..70208f8
--- /dev/null
@@ -0,0 +1,338 @@
+# Copyright 2010 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+"""Provides an easy-to-use python interface to Gentoo's metadata.xml file.
+
+       Example usage:
+               >>> from portage.xml.metadata import MetaDataXML
+               >>> pkg_md = MetaDataXML('/usr/portage/app-misc/gourmet/metadata.xml')
+               >>> pkg_md
+               <MetaDataXML '/usr/portage/app-misc/gourmet/metadata.xml'>
+               >>> pkg_md.herds()
+               ['no-herd']
+               >>> for maint in pkg_md.maintainers():
+               ...     print "{0} ({1})".format(maint.email, maint.name)
+               ...
+               nixphoeni@gentoo.org (Joe Sapp)
+               >>> for flag in pkg_md.use():
+               ...     print flag.name, "->", flag.description
+               ...
+               rtf -> Enable export to RTF
+               gnome-print -> Enable printing support using gnome-print
+               >>> upstream = pkg_md.upstream()
+               >>> upstream
+               [<_Upstream {'docs': [], 'remoteid': [], 'maintainer':
+                [<_Maintainer 'Thomas_Hinkle@alumni.brown.edu'>], 'bugtracker': [],
+                'changelog': []}>]
+               >>> upstream[0].maintainer[0].name
+               'Thomas Mills Hinkle'
+"""
+
+__all__ = ('MetaDataXML',)
+
+try:
+       import xml.etree.cElementTree as etree
+except ImportError:
+       import xml.etree.ElementTree as etree
+
+import re
+import portage
+from portage import os
+
+class _Maintainer(object):
+       """An object for representing one maintainer.
+
+       @type email: str or None
+       @ivar email: Maintainer's email address. Used for both Gentoo and upstream.
+       @type name: str or None
+       @ivar name: Maintainer's name. Used for both Gentoo and upstream.
+       @type description: str or None
+       @ivar description: Description of what a maintainer does. Gentoo only.
+       @type restrict: str or None
+       @ivar restrict: e.g. &gt;=portage-2.2 means only maintains versions
+               of Portage greater than 2.2. Should be DEPEND string with < and >
+               converted to &lt; and &gt; respectively. 
+       @type status: str or None
+       @ivar status: If set, either 'active' or 'inactive'. Upstream only.
+       """
+
+       def __init__(self, node):
+               self.email = None
+               self.name = None
+               self.description = None
+               self.restrict = node.get('restrict')
+               self.status = node.get('status')
+               maint_attrs = node.getchildren()
+               for attr in maint_attrs:
+                       setattr(self, attr.tag, attr.text)
+
+       def __repr__(self):
+               return "<%s %r>" % (self.__class__.__name__, self.email)
+
+
+class _Useflag(object):
+       """An object for representing one USE flag.
+
+       @todo: Is there any way to have a keyword option to leave in
+               <pkg> and <cat> for later processing?
+       @type name: str or None
+       @ivar name: USE flag
+       @type restrict: str or None
+       @ivar restrict: e.g. &gt;=portage-2.2 means flag is only avaiable in
+               versions greater than 2.2
+       @type description: str
+       @ivar description: description of the USE flag
+       """
+
+       def __init__(self, node):
+               self.name = node.get('name')
+               self.restrict = node.get('restrict')
+               _desc = ''
+               if node.text:
+                       _desc = node.text
+               for child in node.getchildren():
+                       _desc += child.text if child.text else ''
+                       _desc += child.tail if child.tail else ''
+               # This takes care of tabs and newlines left from the file
+               self.description = re.sub('\s+', ' ', _desc)
+
+       def __repr__(self):
+               return "<%s %r>" % (self.__class__.__name__, self.name)
+
+
+class _Upstream(object):
+       """An object for representing one package's upstream.
+
+       @type maintainers: list
+       @ivar maintainers: L{_Maintainer} objects for each upstream maintainer
+       @type changelogs: list
+       @ivar changelogs: URLs to upstream's ChangeLog file in str format
+       @type docs: list
+       @ivar docs: Sequence of tuples containing URLs to upstream documentation
+               in the first slot and 'lang' attribute in the second, e.g.,
+               [('http.../docs/en/tut.html', None), ('http.../doc/fr/tut.html', 'fr')]
+       @type bugtrackers: list
+       @ivar bugtrackers: URLs to upstream's bugtracker. May also contain an email
+               address if prepended with 'mailto:'
+       @type remoteids: list
+       @ivar remoteids: Sequence of tuples containing the project's hosting site
+               name in the first slot and the project's ID name or number for that
+               site in the second, e.g., [('sourceforge', 'systemrescuecd')]
+       """
+
+       def __init__(self, node):
+               self.node = node
+               self.maintainers = self.upstream_maintainers()
+               self.changelogs = self.upstream_changelogs()
+               self.docs = self.upstream_documentation()
+               self.bugtrackers = self.upstream_bugtrackers()
+               self.remoteids = self.upstream_remoteids()
+
+       def __repr__(self):
+               return "<%s %r>" % (self.__class__.__name__, self.__dict__)
+
+       def upstream_bugtrackers(self):
+               """Retrieve upstream bugtracker location from xml node."""
+               return [e.text for e in self.node.findall('bugs-to')]
+
+       def upstream_changelogs(self):
+               """Retrieve upstream changelog location from xml node."""
+               return [e.text for e in self.node.findall('changelog')]
+
+       def upstream_documentation(self):
+               """Retrieve upstream documentation location from xml node."""
+               result = []
+               for elem in self.node.findall('doc'):
+                       lang = elem.get('lang')
+                       result.append((elem.text, lang))
+               return result
+       
+       def upstream_maintainers(self):
+               """Retrieve upstream maintainer information from xml node."""
+               return [_Maintainer(m) for m in self.node.findall('maintainer')]
+
+       def upstream_remoteids(self):
+               """Retrieve upstream remote ID from xml node."""
+               return [(e.text, e.get('type')) for e in self.node.findall('remote-id')]
+
+
+class MetaDataXML(object):
+       """Access metadata.xml"""
+
+       def __init__(self, metadata_xml_path, settings=None):
+               """Parse a valid metadata.xml file.
+
+               @type metadata_xml_path: str
+               @param metadata_xml_path: path to a valid metadata.xml file
+               @raise IOError: if C{metadata_xml_path} can not be read
+               """
+
+               self.metadata_xml_path = metadata_xml_path
+               self.settings = settings
+               self._xml_tree = None
+
+               if self.settings is None:
+                       self.settings = portage.settings
+
+               try:
+                       self._xml_tree = etree.parse(metadata_xml_path)
+               except ImportError:
+                       pass
+
+               # Used for caching
+               self._herdstree = None
+               self._descriptions = None
+               self._maintainers = None
+               self._herds = None
+               self._useflags = None
+               self._upstream = None
+
+       def __repr__(self):
+               return "<%s %r>" % (self.__class__.__name__, self.metadata_xml_path)
+
+       def _get_herd_email(self, herd):
+               """Get a herd's email address.
+
+               @type herd: str
+               @param herd: herd whose email you want
+               @rtype: str or None
+               @return: email address or None if herd is not in herds.xml
+               @raise IOError: if $PORTDIR/metadata/herds.xml can not be read
+               """
+
+               if self._herdstree is None:
+                       herds_path = os.path.join(self.settings['PORTDIR'], 'metadata/herds.xml')
+                       try:
+                               self._herdstree = etree.parse(herds_path)
+                       except (ImportError, IOError, SyntaxError):
+                               return None
+
+               # Some special herds are not listed in herds.xml
+               if herd in ('no-herd', 'maintainer-wanted', 'maintainer-needed'):
+                       return None
+
+               for node in self._herdstree.getiterator('herd'):
+                       if node.findtext('name') == herd:
+                               return node.findtext('email')
+
+       def herds(self, include_email=False):
+               """Return a list of text nodes for <herd>.
+
+               @type include_email: bool
+               @keyword include_email: if True, also look up the herd's email
+               @rtype: tuple
+               @return: if include_email is False, return a list of strings;
+                        if include_email is True, return a list of tuples containing:
+                                        [('herd1', 'herd1@gentoo.org'), ('no-herd', None);
+               """
+               if self._herds is None:
+                       if self._xml_tree is None:
+                               self._herds = tuple()
+                       else:
+                               herds = []
+                               for elem in self._xml_tree.findall('herd'):
+                                       text = elem.text
+                                       if text is None:
+                                               text = ''
+                                       if include_email:
+                                               herd_mail = self._get_herd_email(text)
+                                               herds.append((text, herd_mail))
+                                       else:
+                                               herds.append(text)
+                               self._herds = tuple(herds)
+
+               return self._herds
+
+       def descriptions(self):
+               """Return a list of text nodes for <longdescription>.
+
+               @rtype: list
+               @return: package description in string format
+               @todo: Support the C{lang} attribute
+               """
+               if self._descriptions is None:
+                       if self._xml_tree is None:
+                               self._descriptions = tuple()
+                       else:
+                               self._descriptions = tuple(e.text \
+                                       for e in self._xml_tree.findall("longdescription"))
+
+               return self._descriptions
+
+       def maintainers(self):
+               """Get maintainers' name, email and description.
+
+               @rtype: list
+               @return: a sequence of L{_Maintainer} objects in document order.
+               """
+
+               if self._maintainers is None:
+                       if self._xml_tree is None:
+                               self._maintainers = tuple()
+                       else:
+                               self._maintainers = tuple(_Maintainer(node) \
+                                       for node in self._xml_tree.findall('maintainer'))
+
+               return self._maintainers
+
+       def use(self):
+               """Get names and descriptions for USE flags defined in metadata.
+
+               @rtype: list
+               @return: a sequence of L{_Useflag} objects in document order.
+               """
+
+               if self._useflags is None:
+                       if self._xml_tree is None:
+                               self._useflags = tuple()
+                       else:
+                               self._useflags = tuple(_Useflag(node) \
+                                       for node in self._xml_tree.getiterator('flag'))
+
+               return self._useflags
+
+       def upstream(self):
+               """Get upstream contact information.
+
+               @rtype: list
+               @return: a sequence of L{_Upstream} objects in document order.
+               """
+
+               if self._upstream is None:
+                       if self._xml_tree is None:
+                               self._useflags = tuple()
+                       else:
+                               self._upstream = tuple(_Upstream(node) \
+                                       for node in self._xml_tree.findall('upstream'))
+
+               return self._upstream
+
+       def format_maintainer_string(self):
+               """Format string containing maintainers and herds (emails if possible).
+               Used by emerge to display maintainer information.
+               Entries are sorted according to the rules stated on the bug wranglers page.
+
+               @rtype: String
+               @return: a string containing maintainers and herds
+               """
+               maintainers = []
+               for maintainer in self.maintainers():
+                       if maintainer.email is None:
+                               maintainers.append(maintainer.name)
+                       else:
+                               maintainers.append(maintainer.email)
+
+               for herd, email in self.herds(include_email=True):
+                       if email is None:
+                               maintainers.append(herd)
+                       else:
+                               maintainers.append(email)
+
+               maint_str = ""
+               if maintainers:
+                       maint_str = maintainers[0]
+                       maintainers = maintainers[1:]
+               if maintainers:
+                       maint_str += " " + ",".join(maintainers)
+
+               return maint_str