repoman: unroll escaped lines so we can check the entirety of it

author Mike Frysinger <vapier@gentoo.org>

Thu, 24 May 2012 04:05:30 +0000 (00:05 -0400)

committer Mike Frysinger <vapier@gentoo.org>

Fri, 25 May 2012 16:20:12 +0000 (12:20 -0400)
author Mike Frysinger <vapier@gentoo.org>
Thu, 24 May 2012 04:05:30 +0000 (00:05 -0400)
committer Mike Frysinger <vapier@gentoo.org>
Fri, 25 May 2012 16:20:12 +0000 (12:20 -0400)
diff --git a/pym/repoman/checks.py b/pym/repoman/checks.py

index 77df603a27a18f9f8f766bd4537f800515bd8587..a413968e86a9e3393b280d858dee288c7875bad5 100644 (file)
--- a/pym/repoman/checks.py
+++ b/pym/repoman/checks.py
@@ -5,6 +5,7 @@
  """This module contains functions used in Repoman to ascertain the quality
  and correctness of an ebuild."""
  
+import codecs
  import re
  import time
  import repoman.errors as errors
@@ -692,8 +693,11 @@ _here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$')
  _ignore_comment_re = re.compile(r'^\s*#')
  
  def run_checks(contents, pkg):
+       unicode_escape_codec = codecs.lookup('unicode_escape')
+       unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
         checks = _constant_checks
         here_doc_delim = None
+       multiline = None
  
         for lc in checks:
                 lc.new(pkg)
@@ -707,19 +711,56 @@ def run_checks(contents, pkg):
                         here_doc = _here_doc_re.match(line)
                         if here_doc is not None:
                                 here_doc_delim = re.compile(r'^\s*%s$' % here_doc.group(1))
+               if here_doc_delim is not None:
+                       continue
+
+               # Unroll multiline escaped strings so that we can check things:
+               #               inherit foo bar \
+               #                       moo \
+               #                       cow
+               # This will merge these lines like so:
+               #               inherit foo bar         moo     cow
+               try:
+                       # A normal line will end in the two bytes: <\> <\n>.  So decoding
+                       # that will result in python thinking the <\n> is being escaped
+                       # and eat the single <\> which makes it hard for us to detect.
+                       # Instead, strip the newline (which we know all lines have), and
+                       # append a <0>.  Then when python escapes it, if the line ended
+                       # in a <\>, we'll end up with a <\0> marker to key off of.  This
+                       # shouldn't be a problem with any valid ebuild ...
+                       line_escaped = unicode_escape(line.rstrip('\n') + '0')
+               except SystemExit:
+                       raise
+               except:
+                       # Who knows what kind of crazy crap an ebuild will have
+                       # in it -- don't allow it to kill us.
+                       line_escaped = line
+               if multiline:
+                       # Chop off the \ and \n bytes from the previous line.
+                       multiline = multiline[:-2] + line
+                       if not line_escaped.endswith('\0'):
+                               line = multiline
+                               num = multinum
+                               multiline = None
+                       else:
+                               continue
+               else:
+                       if line_escaped.endswith('\0'):
+                               multinum = num
+                               multiline = line
+                               continue
  
-               if here_doc_delim is None:
-                       # We're not in a here-document.
-                       is_comment = _ignore_comment_re.match(line) is not None
-                       for lc in checks:
-                               if is_comment and lc.ignore_comment:
-                                       continue
-                               if lc.check_eapi(pkg.metadata['EAPI']):
-                                       ignore = lc.ignore_line
-                                       if not ignore or not ignore.match(line):
-                                               e = lc.check(num, line)
-                                               if e:
-                                                       yield lc.repoman_check_name, e % (num + 1)
+               # Finally we have a full line to parse.
+               is_comment = _ignore_comment_re.match(line) is not None
+               for lc in checks:
+                       if is_comment and lc.ignore_comment:
+                               continue
+                       if lc.check_eapi(pkg.metadata['EAPI']):
+                               ignore = lc.ignore_line
+                               if not ignore or not ignore.match(line):
+                                       e = lc.check(num, line)
+                                       if e:
+                                               yield lc.repoman_check_name, e % (num + 1)
  
         for lc in checks:
                 i = lc.end()
author	Mike Frysinger <vapier@gentoo.org>
	Thu, 24 May 2012 04:05:30 +0000 (00:05 -0400)
committer	Mike Frysinger <vapier@gentoo.org>
	Fri, 25 May 2012 16:20:12 +0000 (12:20 -0400)