From 9b2d0beb3200553380889395e296a6bf961dd0d3 Mon Sep 17 00:00:00 2001 From: Zac Medico Date: Sun, 3 Mar 2013 09:59:21 -0800 Subject: [PATCH] repoman: check metadata.xml xml decl, bug #328113 --- bin/repoman | 93 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 22 deletions(-) diff --git a/bin/repoman b/bin/repoman index 5618cf198..8c49c06e5 100755 --- a/bin/repoman +++ b/bin/repoman @@ -508,6 +508,9 @@ suspect_virtual = { "dev-libs/libusb-compat":"virtual/libusb", } +metadata_xml_encoding = 'UTF-8' +metadata_xml_declaration = '' % \ + (metadata_xml_encoding,) metadata_doctype_name = 'pkgmetadata' metadata_dtd_uri = 'http://www.gentoo.org/dtd/metadata.dtd' # force refetch if the local copy creation time is older than this @@ -1274,17 +1277,38 @@ for k, v in repoman_settings.thirdpartymirrors().items(): v += "/" thirdpartymirrors[v] = k +class _XMLParser(xml.etree.ElementTree.XMLParser): + + def __init__(self, data, **kwargs): + xml.etree.ElementTree.XMLParser.__init__(self, **kwargs) + self._portage_data = data + if hasattr(self, 'parser'): + self._base_XmlDeclHandler = self.parser.XmlDeclHandler + self.parser.XmlDeclHandler = self._portage_XmlDeclHandler + self._base_StartDoctypeDeclHandler = \ + self.parser.StartDoctypeDeclHandler + self.parser.StartDoctypeDeclHandler = \ + self._portage_StartDoctypeDeclHandler + + def _portage_XmlDeclHandler(self, version, encoding, standalone): + if self._base_XmlDeclHandler is not None: + self._base_XmlDeclHandler(version, encoding, standalone) + self._portage_data["XML_DECLARATION"] = (version, encoding, standalone) + + def _portage_StartDoctypeDeclHandler(self, doctypeName, systemId, publicId, + has_internal_subset): + if self._base_StartDoctypeDeclHandler is not None: + self._base_StartDoctypeDeclHandler(doctypeName, systemId, publicId, + has_internal_subset) + self._portage_data["DOCTYPE"] = (doctypeName, systemId, publicId) + class _MetadataTreeBuilder(xml.etree.ElementTree.TreeBuilder): """ Implements doctype() as required to avoid deprecation warnings with >=python-2.7. """ - def __init__(self, data): - xml.etree.ElementTree.TreeBuilder.__init__(self) - self._portage_data = data - def doctype(self, name, pubid, system): - self._portage_data["DOCTYPE"] = (name, pubid, system) + pass try: herd_base = make_herd_base(os.path.join(repoman_settings["PORTDIR"], "metadata/herds.xml")) @@ -1644,43 +1668,68 @@ for x in effective_scanlist: else: metadata_bad = False xml_info = {} + xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder()) # read metadata.xml into memory try: _metadata_xml = xml.etree.ElementTree.parse( _unicode_encode(os.path.join(checkdir, "metadata.xml"), encoding=_encodings['fs'], errors='strict'), - parser=xml.etree.ElementTree.XMLParser( - target=_MetadataTreeBuilder(xml_info))) + parser=xml_parser) except (ExpatError, SyntaxError, EnvironmentError) as e: metadata_bad = True stats["metadata.bad"] += 1 fails["metadata.bad"].append("%s/metadata.xml: %s" % (x, e)) del e else: - if sys.hexversion < 0x2070000 or \ + if not hasattr(xml_parser, 'parser') or \ + sys.hexversion < 0x2070000 or \ (sys.hexversion > 0x3000000 and sys.hexversion < 0x3020000): # doctype is not parsed with python 2.6 or 3.1 pass - elif "DOCTYPE" not in xml_info: - metadata_bad = True - stats["metadata.bad"] += 1 - fails["metadata.bad"].append("%s/metadata.xml: %s" % (x, - "DOCTYPE is missing")) else: - doctype_name, doctype_pubid, doctype_system = \ - xml_info["DOCTYPE"] - if doctype_system != metadata_dtd_uri: + if "XML_DECLARATION" not in xml_info: stats["metadata.bad"] += 1 fails["metadata.bad"].append("%s/metadata.xml: " - "DOCTYPE: SYSTEM should refer to '%s', not '%s'" % - (x, metadata_dtd_uri, doctype_system)) + "xml declaration is missing on first line, " + "should be '%s'" % (x, metadata_xml_declaration)) + else: + xml_version, xml_encoding, xml_standalone = \ + xml_info["XML_DECLARATION"] + if xml_encoding is None or \ + xml_encoding.upper() != metadata_xml_encoding: + stats["metadata.bad"] += 1 + if xml_encoding is None: + encoding_problem = "but it is undefined" + else: + encoding_problem = "not '%s'" % xml_encoding + fails["metadata.bad"].append("%s/metadata.xml: " + "xml declaration encoding should be '%s', %s" % + (x, metadata_xml_encoding, encoding_problem)) - if doctype_name != metadata_doctype_name: + if "DOCTYPE" not in xml_info: + metadata_bad = True stats["metadata.bad"] += 1 - fails["metadata.bad"].append("%s/metadata.xml: " - "DOCTYPE: name should be '%s', not '%s'" % - (x, metadata_doctype_name, doctype_name)) + fails["metadata.bad"].append("%s/metadata.xml: %s" % (x, + "DOCTYPE is missing")) + else: + doctype_name, doctype_system, doctype_pubid = \ + xml_info["DOCTYPE"] + if doctype_system != metadata_dtd_uri: + stats["metadata.bad"] += 1 + if doctype_system is None: + system_problem = "but it is undefined" + else: + system_problem = "not '%s'" % doctype_system + fails["metadata.bad"].append("%s/metadata.xml: " + "DOCTYPE: SYSTEM should refer to '%s', %s" % + (x, metadata_dtd_uri, system_problem)) + + if doctype_name != metadata_doctype_name: + stats["metadata.bad"] += 1 + fails["metadata.bad"].append("%s/metadata.xml: " + "DOCTYPE: name should be '%s', not '%s'" % + (x, metadata_doctype_name, doctype_name)) # load USE flags from metadata.xml try: -- 2.26.2