Handle unicode encoding when dumping to stdout and start migration to using StringIO

author fuzzyray <fuzzyray@gentoo.org>

Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)

committer fuzzyray <fuzzyray@gentoo.org>

Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)
author fuzzyray <fuzzyray@gentoo.org>
Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)
committer fuzzyray <fuzzyray@gentoo.org>
Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)
diff --git a/src/glsa-check/glsa-check b/src/glsa-check/glsa-check

index 5150d2c741fb93556108ff88a360348bb04efcc3..24e2dcf2b266f991f5f073b9f8c2a87a0bf35a70 100755 (executable)
--- a/src/glsa-check/glsa-check
+++ b/src/glsa-check/glsa-check
@@ -5,6 +5,7 @@
  
  import os
  import sys
+import codecs
  sys.path.insert(0, "/usr/lib/gentoolkit/pym")
  try:
         import portage
@@ -197,7 +198,9 @@ for p in params[:]:
  
  glsalist.extend([g for g in params if g not in glsalist])
  
-def summarylist(myglsalist, fd1=sys.stdout, fd2=sys.stderr):
+def summarylist(myglsalist, fd1=sys.stdout, fd2=sys.stderr, encoding="utf-8"):
+       fd1 = codecs.getwriter(encoding)(fd1)
+       fd2 = codecs.getwriter(encoding)(fd2)
         if not quiet:
                 fd2.write(white("[A]")+" means this GLSA was already applied,\n")
                 fd2.write(green("[U]")+" means the system is not affected and\n")
diff --git a/src/glsa-check/glsa.py b/src/glsa-check/glsa.py

index de6f48dd07b81663566dcc87e7e5e24fdcb1f76d..ec2a6799e5267f51d1d0283a20ec7eadcf4eb03b 100644 (file)
--- a/src/glsa-check/glsa.py
+++ b/src/glsa-check/glsa.py
@@ -16,8 +16,10 @@ __author__ = "Marius Mauch <genone@gentoo.org>"
  import os
  import sys
  import urllib
+import codecs
  import re
  import xml.dom.minidom
+from StringIO import StringIO
  
  if sys.version_info[0:2] < (2, 3):
         raise NotImplementedError("Python versions below 2.3 have broken XML code " \
@@ -175,7 +177,7 @@ def getListElements(listnode):
                 rValue.append(getText(li, format="strip"))
         return rValue
  
-def getText(node, format):
+def getText(node, format, textfd = None):
         """
         This is the main parser function. It takes a node and traverses
         recursive over the subnodes, getting the text of each (and the
@@ -193,50 +195,58 @@ def getText(node, format):
                                         replaces multiple spaces with one space.
                                         I{xml} does some more formatting, depending on the
                                         type of the encountered nodes.
+       @type   textfd: writable file-like object
+       @param  textfd: the file-like object to write the output to
         @rtype:         String
         @return:        the (formatted) content of the node and its subnodes
+                       except if textfd was not none
         """
-       rValue = ""
+       if not textfd:
+               textfd = StringIO()
+               returnNone = False
+       else:
+               returnNone = True
         if format in ["strip", "keep"]:
                 if node.nodeName in ["uri", "mail"]:
-                       rValue += node.childNodes[0].data+": "+node.getAttribute("link")
+                       textfd.write(node.childNodes[0].data+": "+node.getAttribute("link"))
                 else:
                         for subnode in node.childNodes:
                                 if subnode.nodeName == "#text":
-                                       rValue += subnode.data
+                                       textfd.write(subnode.data)
                                 else:
-                                       rValue += getText(subnode, format)
-       else:
+                                       getText(subnode, format, textfd)
+       else: # format = "xml"
                 for subnode in node.childNodes:
                         if subnode.nodeName == "p":
                                 for p_subnode in subnode.childNodes:
                                         if p_subnode.nodeName == "#text":
-                                               rValue += p_subnode.data.strip()
+                                               textfd.write(p_subnode.data.strip())
                                         elif p_subnode.nodeName in ["uri", "mail"]:
-                                               rValue += p_subnode.childNodes[0].data
-                                               rValue += " ( "+p_subnode.getAttribute("link")+" )"
-                               rValue += NEWLINE_ESCAPE
+                                               textfd.write(p_subnode.childNodes[0].data)
+                                               textfd.write(" ( "+p_subnode.getAttribute("link")+" )")
+                               textfd.write(NEWLINE_ESCAPE)
                         elif subnode.nodeName == "ul":
                                 for li in getListElements(subnode):
-                                       rValue += "-"+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" "
+                                       textfd.write("-"+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" ")
                         elif subnode.nodeName == "ol":
                                 i = 0
                                 for li in getListElements(subnode):
                                         i = i+1
-                                       rValue += str(i)+"."+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" "
+                                       textfd.write(str(i)+"."+SPACE_ESCAPE+li+NEWLINE_ESCAPE+" ")
                         elif subnode.nodeName == "code":
-                               rValue += getText(subnode, format="keep").replace("\n", NEWLINE_ESCAPE)
-                               if rValue[-1*len(NEWLINE_ESCAPE):] != NEWLINE_ESCAPE:
-                                       rValue += NEWLINE_ESCAPE
+                               textfd.write(getText(subnode, format="keep").lstrip().replace("\n", NEWLINE_ESCAPE))
+                               textfd.write(NEWLINE_ESCAPE)
                         elif subnode.nodeName == "#text":
-                               rValue += subnode.data
+                               textfd.write(subnode.data)
                         else:
                                 raise GlsaFormatException("Invalid Tag found: ", subnode.nodeName)
+       if returnNone:
+               return None
+       rValue = textfd.getvalue()
         if format == "strip":
                 rValue = rValue.strip(" \n\t")
                 rValue = re.sub("[\s]{2,}", " ", rValue)
-       # Hope that the utf conversion doesn't break anything else
-       return rValue.encode("utf_8")
+       return rValue
  
  def getMultiTagsText(rootnode, tagname, format):
         """
@@ -591,16 +601,17 @@ class Glsa:
                 self.services = self.affected.getElementsByTagName("service")
                 return None
  
-       def dump(self, outstream=sys.stdout):
+       def dump(self, outstream=sys.stdout, encoding="utf-8"):
                 """
                 Dumps a plaintext representation of this GLSA to I{outfile} or 
                 B{stdout} if it is ommitted. You can specify an alternate
-               I{encoding} if needed (default is latin1).
+               I{encoding} if needed (default is utf-8).
                 
                 @type   outstream: File
                 @param  outfile: Stream that should be used for writing
                                                  (defaults to sys.stdout)
                 """
+               outstream = codecs.getwriter(encoding)(outstream)
                 width = int(self.config["PRINTWIDTH"])
                 outstream.write(center("GLSA %s: \n%s" % (self.nr, self.title), width)+"\n")
                 outstream.write((width*"=")+"\n")
author	fuzzyray <fuzzyray@gentoo.org>
	Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)
committer	fuzzyray <fuzzyray@gentoo.org>
	Wed, 20 May 2009 21:10:44 +0000 (21:10 -0000)
src/glsa-check/glsa-check		patch \| blob \| history
src/glsa-check/glsa.py		patch \| blob \| history