Convert all the checks in repoman.checks to use a new LineCheck
authorZac Medico <zmedico@gentoo.org>
Sat, 10 Nov 2007 03:12:09 +0000 (03:12 -0000)
committerZac Medico <zmedico@gentoo.org>
Sat, 10 Nov 2007 03:12:09 +0000 (03:12 -0000)
interface that takes a single line as an argument. This has at
least a few of benefits:

 * Eliminates lots of redundant code
 * Error messages are ordered by line number across all checks

The performance is slightly worse due to the increased number
of method calls, but it's not really noticeable in comparison
to the time consumed by dependency checks.

Thanks to Petteri Räty <betelgeuse@gentoo.org> for the initial
patch which I only made a few minor modifications to.

svn path=/main/trunk/; revision=8483

bin/repoman
pym/repoman/checks.py

index 85d27d20af658361f8476a9db84b296f88f2915c..ca7530b9f72d06238256bc6c267a5de0235c87be 100755 (executable)
@@ -24,7 +24,7 @@ from commands import getstatusoutput
 from fileinput import input
 from grp import getgrnam
 from itertools import izip
-from stat import S_ISDIR, ST_CTIME, ST_GID, ST_MTIME
+from stat import S_ISDIR, ST_CTIME
 
 try:
        import cPickle as pickle
@@ -44,13 +44,11 @@ except ImportError:
 del os.environ["PORTAGE_LEGACY_GLOBALS"]
 
 try:
-       from repoman.checks import EbuildWhitespace, EbuildHeader, EbuildQuote, \
-               EbuildAssignment, EbuildNestedDie, EbuildUselessDodoc, EbuildUselessCdS
+       from repoman.checks import run_checks
 except ImportError:
        from os import path as osp
        sys.path.insert(0, osp.join(osp.dirname(osp.dirname(osp.realpath(__file__))), 'pym'))
-       from repoman.checks import EbuildWhitespace, EbuildHeader, EbuildQuote, \
-               EbuildAssignment, EbuildNestedDie, EbuildUselessDodoc, EbuildUselessCdS
+       from repoman.checks import run_checks
 
 import portage.checksum
 import portage.const
@@ -1364,35 +1362,16 @@ for x in scanlist:
                                for mybad in mybadrestrict:
                                        fails["RESTRICT.invalid"].append(x+"/"+y+".ebuild: %s" % mybad)
                # Syntax Checks
-               path = checkdir + '/' + y + '.ebuild'
-               myear = time.gmtime(os.stat(path)[ST_MTIME])[0]
-               f = open(path, 'rb')
+               relative_path = os.path.join(x, y + ".ebuild")
+               full_path = os.path.join(repodir, relative_path)
+               f = open(full_path, 'rb')
                try:
-                       contents = f.readlines()
+                       for check_name, e in run_checks(f, os.stat(full_path).st_mtime):
+                               stats[check_name] += 1
+                               fails[check_name].append(relative_path + ': %s' % e)
                finally:
                        f.close()
                        del f
-               for check in (EbuildWhitespace, EbuildQuote,
-                       EbuildAssignment, EbuildUselessDodoc, EbuildUselessCdS):
-                       c = check(contents)
-                       errors = c.Run()
-                       for e in errors:
-                               stats[c.repoman_check_name] += 1
-                               fails[c.repoman_check_name].append(x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-               del check
-               check = EbuildHeader(contents, str(myear))
-               errors = check.Run()
-               for e in errors:
-                       stats[check.repoman_check_name] += 1
-                       fails[check.repoman_check_name].append(x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-               del check
-               check = EbuildNestedDie(contents)
-               errors = check.Run()
-               for e in errors:
-                       stats[check.repoman_check_name] += 1
-                       fails[check.repoman_check_name].append(
-                               x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-               del check, errors, path, contents, myear
 
                if options.force:
                        # The dep_check() calls are the most expensive QA test. If --force
index b6c430569407d4df160b6f3b590e12f6ebe6680a..63a09750120a8d35c42c66a96fcf3af8d672c94f 100644 (file)
@@ -11,38 +11,20 @@ from repoman.errors import COPYRIGHT_ERROR, LICENSE_ERROR, CVS_HEADER_ERROR, \
        LEADING_SPACES_ERROR, READONLY_ASSIGNMENT_ERROR, TRAILING_WHITESPACE_ERROR, \
        MISSING_QUOTES_ERROR, NESTED_DIE_ERROR, REDUNDANT_CD_S_ERROR
 
+class LineCheck(object):
+       """Run a check on a line of an ebuild."""
+       """A regular expression to determine whether to ignore the line"""
+       ignore_line = False
 
-class ContentCheckException(Exception):
-       """Parent class for exceptions relating to invalid content"""
-       
-       def __init__(self, str):
-               Exception.__init__(self, str)
-
-
-class ContentCheck(object):
-       """Given a file-like object, run checks over it's content
-       
-       Args:
-         contents - A file-like object, preferably a StringIO instance
-       
-       Raises:
-               ContentCheckException or a subclass
-       """
-       repoman_check_name = None
-       
-       def __init__(self, contents):
-               self.contents = contents
-               pass
-       
-       def Run(self):
-               """Run the check against the contents, return a sequence of errors
-                  of the form ((line, error),...)
-               """
+       def check(self, num, line):
+               """Run the check on line and return error if there is one"""
                pass
 
-
-class EbuildHeader(ContentCheck):
+class EbuildHeader(LineCheck):
        """Ensure ebuilds have proper headers
+               Copyright header errors
+               CVS header errors
+               License header errors
        
        Args:
                modification_year - Year the ebuild was last modified
@@ -56,35 +38,24 @@ class EbuildHeader(ContentCheck):
        gentoo_license = r'# Distributed under the terms of the GNU General Public License v2'
        cvs_header = re.compile(r'^#\s*\$Header.*\$$')
 
-       def __init__(self, contents, modification_year):
-               ContentCheck.__init__(self, contents)
-               self.modification_year = modification_year
+       def __init__(self, st_mtime):
+               self.modification_year = str(time.gmtime(st_mtime)[0])
                self.gentoo_copyright_re = re.compile(self.gentoo_copyright % self.modification_year)
 
-       def Run(self):
-               """Locate simple header mistakes in an ebuild
-               Copyright header errors
-               CVS header errors
-               License header errors
-               """
-               errors = []
-               for num, line in enumerate(self.contents):
-                       if num == 0:
-                               match = self.gentoo_copyright_re.match(line)
-                               if not match:
-                                       errors.append((num + 1, COPYRIGHT_ERROR))
-                       if num == 1 and line.strip() != self.gentoo_license:
-                               errors.append((num + 1, LICENSE_ERROR))
-                       if num == 2:
-                               match = self.cvs_header.match(line)
-                               if not match:
-                                       errors.append((num + 1, CVS_HEADER_ERROR))
-                       if num > 2:
-                               return errors
-               return errors
+       def check(self, num, line):
+               if num > 2:
+                       return
+               elif num == 0:
+                       if not self.gentoo_copyright_re.match(line):
+                               return COPYRIGHT_ERROR
+               elif num == 1 and line.strip() != self.gentoo_license:
+                       return LICENSE_ERROR
+               elif num == 2:
+                       if not self.cvs_header.match(line):
+                               return CVS_HEADER_ERROR
 
 
-class EbuildWhitespace(ContentCheck):
+class EbuildWhitespace(LineCheck):
        """Ensure ebuilds have proper whitespacing"""
 
        repoman_check_name = 'ebuild.minorsyn'
@@ -93,28 +64,13 @@ class EbuildWhitespace(ContentCheck):
        leading_spaces = re.compile(r'^[\S\t]')
        trailing_whitespace = re.compile(r'.*([\S]$)')  
 
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
-
-       def Run(self):
-               """Locate simple whitespace errors
-               Lines with leading spaces or trailing whitespace
-               """
-               errors = []
-               for num, line in enumerate(self.contents):
-                       match = self.ignore_line.match(line)
-                       if match:
-                               continue
-                       match = self.leading_spaces.match(line)
-                       if not match:
-                               errors.append((num + 1, LEADING_SPACES_ERROR))
-                       match = self.trailing_whitespace.match(line)
-                       if not match:
-                               errors.append((num + 1, TRAILING_WHITESPACE_ERROR))
-               return errors
-
+       def check(self, num, line):
+               if not self.leading_spaces.match(line):
+                       return LEADING_SPACES_ERROR
+               if not self.trailing_whitespace.match(line):
+                       return TRAILING_WHITESPACE_ERROR
 
-class EbuildQuote(ContentCheck):
+class EbuildQuote(LineCheck):
        """Ensure ebuilds have valid quoting around things like D,FILESDIR, etc..."""
 
        repoman_check_name = 'ebuild.minorsyn'
@@ -127,58 +83,44 @@ class EbuildQuote(ContentCheck):
        cond_begin =  re.compile(r'(^|\s+)\[\[($|\\$|\s+)')
        cond_end =  re.compile(r'(^|\s+)\]\]($|\\$|\s+)')
        
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
-
-       def Run(self):
-               """Locate simple errors in ebuilds:
-               Missing quotes around variables that may contain spaces
-               """
+       def check(self, num, line):
+               if self.var_reference.search(line) is None:
+                       return
+               # There can be multiple matches / violations on a single line. We
+               # have to make sure none of the matches are violators. Once we've
+               # found one violator, any remaining matches on the same line can
+               # be ignored.
+               pos = 0
+               while pos <= len(line) - 1:
+                       missing_quotes = self.missing_quotes.search(line, pos)
+                       if not missing_quotes:
+                               break
+                       # If the last character of the previous match is a whitespace
+                       # character, that character may be needed for the next
+                       # missing_quotes match, so search overlaps by 1 character.
+                       group = missing_quotes.group()
+                       pos = missing_quotes.end() - 1
+
+                       # Filter out some false positives that can
+                       # get through the missing_quotes regex.
+                       if self.var_reference.search(group) is None:
+                               continue
 
-               errors = []
-               for num, line in enumerate(self.contents):
-                       if self.ignore_line.match(line) is not None:
+                       # This is an attempt to avoid false positives without getting
+                       # too complex, while possibly allowing some (hopefully
+                       # unlikely) violations to slip through. We just assume
+                       # everything is correct if the there is a ' [[ ' or a ' ]] '
+                       # anywhere in the whole line (possibly continued over one
+                       # line).
+                       if self.cond_begin.search(line) is not None:
                                continue
-                       if self.var_reference.search(line) is None:
+                       if self.cond_end.search(line) is not None:
                                continue
-                       # There can be multiple matches / violations on a single line. We
-                       # have to make sure none of the matches are violators. Once we've
-                       # found one violator, any remaining matches on the same line can
-                       # be ignored.
-                       pos = 0
-                       while pos <= len(line) - 1:
-                               missing_quotes = self.missing_quotes.search(line, pos)
-                               if not missing_quotes:
-                                       break
-                               # If the last character of the previous match is a whitespace
-                               # character, that character may be needed for the next
-                               # missing_quotes match, so search overlaps by 1 character.
-                               group = missing_quotes.group()
-                               pos = missing_quotes.end() - 1
-
-                               # Filter out some false positives that can
-                               # get through the missing_quotes regex.
-                               if self.var_reference.search(group) is None:
-                                       continue
 
-                               # This is an attempt to avoid false positives without getting
-                               # too complex, while possibly allowing some (hopefully
-                               # unlikely) violations to slip through. We just assume
-                               # everything is correct if the there is a ' [[ ' or a ' ]] '
-                               # anywhere in the whole line (possibly continued over one
-                               # line).
-                               if self.cond_begin.search(line) is not None:
-                                       continue
-                               if self.cond_end.search(line) is not None:
-                                       continue
+                       # Any remaining matches on the same line can be ignored.
+                       return MISSING_QUOTES_ERROR
 
-                               errors.append((num + 1, MISSING_QUOTES_ERROR))
-                               # Any remaining matches on the same line can be ignored.
-                               break
-               return errors
-
-
-class EbuildAssignment(ContentCheck):
+class EbuildAssignment(LineCheck):
        """Ensure ebuilds don't assign to readonly variables."""
 
        repoman_check_name = 'variable.readonly'
@@ -187,80 +129,65 @@ class EbuildAssignment(ContentCheck):
        line_continuation = re.compile(r'([^#]*\S)(\s+|\t)\\$')
        ignore_line = re.compile(r'(^$)|(^(\t)*#)')
 
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
-
-       def Run(self):
-               """Locate simple errors in ebuilds:
-               Assigning to read-only variables.
-               """
+       def __init__(self):
+               self.previous_line = None
 
-               errors = []
-               previous_line = None
-               # enumerate is 0 indexed, so add one when necesseary
-               for num, line in enumerate(self.contents):
-                       match = self.ignore_line.match(line)
-                       if match:
-                               continue
-                       match = self.readonly_assignment.match(line)
-                       if match and (not previous_line or not self.line_continuation.match(previous_line)):
-                               errors.append((num + 1, READONLY_ASSIGNMENT_ERROR))
-                       previous_line = line
-               return errors
+       def check(self, num, line):
+               match = self.readonly_assignment.match(line)
+               e = None
+               if match and (not self.previous_line or not self.line_continuation.match(self.previous_line)):
+                       e = READONLY_ASSIGNMENT_ERROR
+               self.previous_line = line
+               return e
 
-class EbuildNestedDie(ContentCheck):
+class EbuildNestedDie(LineCheck):
        """Check ebuild for nested die statements (die statements in subshells"""
        
        repoman_check_name = 'ebuild.nesteddie'
        nesteddie_re = re.compile(r'^[^#]*\([^)]*\bdie\b')
        
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
-
-       def Run(self):
-               errors = []
-               for num, line in enumerate(self.contents):
-                       match = self.nesteddie_re.match(line)
-                       if match:
-                               errors.append((num + 1, NESTED_DIE_ERROR))
-               return errors
+       def check(self, num, line):
+               if self.nesteddie_re.match(line):
+                       return NESTED_DIE_ERROR
 
-class EbuildUselessDodoc(ContentCheck):
+class EbuildUselessDodoc(LineCheck):
        """Check ebuild for useless files in dodoc arguments."""
        repoman_check_name = 'ebuild.minorsyn'
        uselessdodoc_re = re.compile(
                r'^\s*dodoc(\s+|\s+.*\s+)(ABOUT-NLS|COPYING|LICENSE)($|\s)')
 
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
+       def check(self, num, line):
+               match = self.uselessdodoc_re.match(line)
+               if match:
+                       return "Useless dodoc '%s'" % (match.group(2), ) + " on line: %d"
 
-       def Run(self):
-               errors = []
-               uselessdodoc_re = self.uselessdodoc_re
-               for num, line in enumerate(self.contents):
-                       match = uselessdodoc_re.match(line)
-                       if match:
-                               errors.append((num + 1, "Useless dodoc '%s'" % \
-                                       (match.group(2), ) + " on line: %d"))
-               return errors
-
-class EbuildUselessCdS(ContentCheck):
+class EbuildUselessCdS(LineCheck):
        """Check for redundant cd ${S} statements"""
        repoman_check_name = 'ebuild.minorsyn'
        method_re = re.compile(r'^\s*src_(compile|install|test)\s*\(\)')
        cds_re = re.compile(r'^\s*cd\s+("\$(\{S\}|S)"|\$(\{S\}|S))\s')
 
-       def __init__(self, contents):
-               ContentCheck.__init__(self, contents)
-
-       def Run(self):
-               errors = []
-               check_next_line = False
-               for num, line in enumerate(self.contents):
-                       if check_next_line:
-                               check_next_line = False
-                               if self.cds_re.match(line):
-                                       errors.append((num + 1, REDUNDANT_CD_S_ERROR))
-                       elif self.method_re.match(line):
-                               check_next_line = True
-               return errors
+       def __init__(self):
+               self.check_next_line = False
+
+       def check(self, num, line):
+               if self.check_next_line:
+                       self.check_next_line = False
+                       if self.cds_re.match(line):
+                               return REDUNDANT_CD_S_ERROR
+               elif self.method_re.match(line):
+                       self.check_next_line = True
+
+def run_checks(contents, st_mtime):
+       checks = []
+       for c in (EbuildWhitespace, EbuildQuote, EbuildAssignment,
+                       EbuildUselessDodoc, EbuildUselessCdS, EbuildNestedDie):
+               checks.append(c())
+       checks.append(EbuildHeader(st_mtime))
+       for num, line in enumerate(contents):
+               for lc in checks:
+                       ignore = lc.ignore_line
+                       if not ignore or not ignore.match(line):
+                               e = lc.check(num, line)
+                               if e:
+                                       yield lc.repoman_check_name, e % (num + 1)