From 219ea251208f4ffa1e7eca9fd815c8c66c3499f5 Mon Sep 17 00:00:00 2001
From: Zac Medico <zmedico@gentoo.org>
Date: Sat, 10 Nov 2007 03:12:09 +0000
Subject: [PATCH] Convert all the checks in repoman.checks to use a new
 LineCheck interface that takes a single line as an argument. This has at
 least a few of benefits:
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

 * Eliminates lots of redundant code
 * Error messages are ordered by line number across all checks

The performance is slightly worse due to the increased number
of method calls, but it's not really noticeable in comparison
to the time consumed by dependency checks.

Thanks to Petteri RÃ¤ty <betelgeuse@gentoo.org> for the initial
patch which I only made a few minor modifications to.

svn path=/main/trunk/; revision=8483
---
 bin/repoman           |  39 ++----
 pym/repoman/checks.py | 285 ++++++++++++++++--------------------------
 2 files changed, 115 insertions(+), 209 deletions(-)

diff --git a/bin/repoman b/bin/repoman
index 85d27d20a..ca7530b9f 100755
--- a/bin/repoman
+++ b/bin/repoman
@@ -24,7 +24,7 @@ from commands import getstatusoutput
 from fileinput import input
 from grp import getgrnam
 from itertools import izip
-from stat import S_ISDIR, ST_CTIME, ST_GID, ST_MTIME
+from stat import S_ISDIR, ST_CTIME
 
 try:
 	import cPickle as pickle
@@ -44,13 +44,11 @@ except ImportError:
 del os.environ["PORTAGE_LEGACY_GLOBALS"]
 
 try:
-	from repoman.checks import EbuildWhitespace, EbuildHeader, EbuildQuote, \
-		EbuildAssignment, EbuildNestedDie, EbuildUselessDodoc, EbuildUselessCdS
+	from repoman.checks import run_checks
 except ImportError:
 	from os import path as osp
 	sys.path.insert(0, osp.join(osp.dirname(osp.dirname(osp.realpath(__file__))), 'pym'))
-	from repoman.checks import EbuildWhitespace, EbuildHeader, EbuildQuote, \
-		EbuildAssignment, EbuildNestedDie, EbuildUselessDodoc, EbuildUselessCdS
+	from repoman.checks import run_checks
 
 import portage.checksum
 import portage.const
@@ -1364,35 +1362,16 @@ for x in scanlist:
 				for mybad in mybadrestrict:
 					fails["RESTRICT.invalid"].append(x+"/"+y+".ebuild: %s" % mybad)
 		# Syntax Checks
-		path = checkdir + '/' + y + '.ebuild'
-		myear = time.gmtime(os.stat(path)[ST_MTIME])[0]
-		f = open(path, 'rb')
+		relative_path = os.path.join(x, y + ".ebuild")
+		full_path = os.path.join(repodir, relative_path)
+		f = open(full_path, 'rb')
 		try:
-			contents = f.readlines()
+			for check_name, e in run_checks(f, os.stat(full_path).st_mtime):
+				stats[check_name] += 1
+				fails[check_name].append(relative_path + ': %s' % e)
 		finally:
 			f.close()
 			del f
-		for check in (EbuildWhitespace, EbuildQuote,
-			EbuildAssignment, EbuildUselessDodoc, EbuildUselessCdS):
-			c = check(contents)
-			errors = c.Run()
-			for e in errors:
-				stats[c.repoman_check_name] += 1
-				fails[c.repoman_check_name].append(x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-		del check
-		check = EbuildHeader(contents, str(myear))
-		errors = check.Run()
-		for e in errors:
-			stats[check.repoman_check_name] += 1
-			fails[check.repoman_check_name].append(x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-		del check
-		check = EbuildNestedDie(contents)
-		errors = check.Run()
-		for e in errors:
-			stats[check.repoman_check_name] += 1
-			fails[check.repoman_check_name].append(
-				x + '/' + y + '.ebuild: %s' % e[1] % e[0])
-		del check, errors, path, contents, myear
 
 		if options.force:
 			# The dep_check() calls are the most expensive QA test. If --force
diff --git a/pym/repoman/checks.py b/pym/repoman/checks.py
index b6c430569..63a097501 100644
--- a/pym/repoman/checks.py
+++ b/pym/repoman/checks.py
@@ -11,38 +11,20 @@ from repoman.errors import COPYRIGHT_ERROR, LICENSE_ERROR, CVS_HEADER_ERROR, \
 	LEADING_SPACES_ERROR, READONLY_ASSIGNMENT_ERROR, TRAILING_WHITESPACE_ERROR, \
 	MISSING_QUOTES_ERROR, NESTED_DIE_ERROR, REDUNDANT_CD_S_ERROR
 
+class LineCheck(object):
+	"""Run a check on a line of an ebuild."""
+	"""A regular expression to determine whether to ignore the line"""
+	ignore_line = False
 
-class ContentCheckException(Exception):
-	"""Parent class for exceptions relating to invalid content"""
-	
-	def __init__(self, str):
-		Exception.__init__(self, str)
-
-
-class ContentCheck(object):
-	"""Given a file-like object, run checks over it's content
-	
-	Args:
-	  contents - A file-like object, preferably a StringIO instance
-	
-	Raises:
-		ContentCheckException or a subclass
-	"""
-	repoman_check_name = None
-	
-	def __init__(self, contents):
-		self.contents = contents
-		pass
-	
-	def Run(self):
-		"""Run the check against the contents, return a sequence of errors
-		   of the form ((line, error),...)
-		"""
+	def check(self, num, line):
+		"""Run the check on line and return error if there is one"""
 		pass
 
-
-class EbuildHeader(ContentCheck):
+class EbuildHeader(LineCheck):
 	"""Ensure ebuilds have proper headers
+		Copyright header errors
+		CVS header errors
+		License header errors
 	
 	Args:
 		modification_year - Year the ebuild was last modified
@@ -56,35 +38,24 @@ class EbuildHeader(ContentCheck):
 	gentoo_license = r'# Distributed under the terms of the GNU General Public License v2'
 	cvs_header = re.compile(r'^#\s*\$Header.*\$$')
 
-	def __init__(self, contents, modification_year):
-		ContentCheck.__init__(self, contents)
-		self.modification_year = modification_year
+	def __init__(self, st_mtime):
+		self.modification_year = str(time.gmtime(st_mtime)[0])
 		self.gentoo_copyright_re = re.compile(self.gentoo_copyright % self.modification_year)
 
-	def Run(self):
-		"""Locate simple header mistakes in an ebuild
-		Copyright header errors
-		CVS header errors
-		License header errors
-		"""
-		errors = []
-		for num, line in enumerate(self.contents):
-			if num == 0:
-				match = self.gentoo_copyright_re.match(line)
-				if not match:
-					errors.append((num + 1, COPYRIGHT_ERROR))
-			if num == 1 and line.strip() != self.gentoo_license:
-				errors.append((num + 1, LICENSE_ERROR))
-			if num == 2:
-				match = self.cvs_header.match(line)
-				if not match:
-					errors.append((num + 1, CVS_HEADER_ERROR))
-			if num > 2:
-				return errors
-		return errors
+	def check(self, num, line):
+		if num > 2:
+			return
+		elif num == 0:
+			if not self.gentoo_copyright_re.match(line):
+				return COPYRIGHT_ERROR
+		elif num == 1 and line.strip() != self.gentoo_license:
+			return LICENSE_ERROR
+		elif num == 2:
+			if not self.cvs_header.match(line):
+				return CVS_HEADER_ERROR
 
 
-class EbuildWhitespace(ContentCheck):
+class EbuildWhitespace(LineCheck):
 	"""Ensure ebuilds have proper whitespacing"""
 
 	repoman_check_name = 'ebuild.minorsyn'
@@ -93,28 +64,13 @@ class EbuildWhitespace(ContentCheck):
 	leading_spaces = re.compile(r'^[\S\t]')
 	trailing_whitespace = re.compile(r'.*([\S]$)')	
 
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
-
-	def Run(self):
-		"""Locate simple whitespace errors
-		Lines with leading spaces or trailing whitespace
-		"""
-		errors = []
-		for num, line in enumerate(self.contents):
-			match = self.ignore_line.match(line)
-			if match:
-				continue
-			match = self.leading_spaces.match(line)
-			if not match:
-				errors.append((num + 1, LEADING_SPACES_ERROR))
-			match = self.trailing_whitespace.match(line)
-			if not match:
-				errors.append((num + 1, TRAILING_WHITESPACE_ERROR))
-		return errors
-
+	def check(self, num, line):
+		if not self.leading_spaces.match(line):
+			return LEADING_SPACES_ERROR
+		if not self.trailing_whitespace.match(line):
+			return TRAILING_WHITESPACE_ERROR
 
-class EbuildQuote(ContentCheck):
+class EbuildQuote(LineCheck):
 	"""Ensure ebuilds have valid quoting around things like D,FILESDIR, etc..."""
 
 	repoman_check_name = 'ebuild.minorsyn'
@@ -127,58 +83,44 @@ class EbuildQuote(ContentCheck):
 	cond_begin =  re.compile(r'(^|\s+)\[\[($|\\$|\s+)')
 	cond_end =  re.compile(r'(^|\s+)\]\]($|\\$|\s+)')
 	
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
-
-	def Run(self):
-		"""Locate simple errors in ebuilds:
-		Missing quotes around variables that may contain spaces
-		"""
+	def check(self, num, line):
+		if self.var_reference.search(line) is None:
+			return
+		# There can be multiple matches / violations on a single line. We
+		# have to make sure none of the matches are violators. Once we've
+		# found one violator, any remaining matches on the same line can
+		# be ignored.
+		pos = 0
+		while pos <= len(line) - 1:
+			missing_quotes = self.missing_quotes.search(line, pos)
+			if not missing_quotes:
+				break
+			# If the last character of the previous match is a whitespace
+			# character, that character may be needed for the next
+			# missing_quotes match, so search overlaps by 1 character.
+			group = missing_quotes.group()
+			pos = missing_quotes.end() - 1
+
+			# Filter out some false positives that can
+			# get through the missing_quotes regex.
+			if self.var_reference.search(group) is None:
+				continue
 
-		errors = []
-		for num, line in enumerate(self.contents):
-			if self.ignore_line.match(line) is not None:
+			# This is an attempt to avoid false positives without getting
+			# too complex, while possibly allowing some (hopefully
+			# unlikely) violations to slip through. We just assume
+			# everything is correct if the there is a ' [[ ' or a ' ]] '
+			# anywhere in the whole line (possibly continued over one
+			# line).
+			if self.cond_begin.search(line) is not None:
 				continue
-			if self.var_reference.search(line) is None:
+			if self.cond_end.search(line) is not None:
 				continue
-			# There can be multiple matches / violations on a single line. We
-			# have to make sure none of the matches are violators. Once we've
-			# found one violator, any remaining matches on the same line can
-			# be ignored.
-			pos = 0
-			while pos <= len(line) - 1:
-				missing_quotes = self.missing_quotes.search(line, pos)
-				if not missing_quotes:
-					break
-				# If the last character of the previous match is a whitespace
-				# character, that character may be needed for the next
-				# missing_quotes match, so search overlaps by 1 character.
-				group = missing_quotes.group()
-				pos = missing_quotes.end() - 1
-
-				# Filter out some false positives that can
-				# get through the missing_quotes regex.
-				if self.var_reference.search(group) is None:
-					continue
 
-				# This is an attempt to avoid false positives without getting
-				# too complex, while possibly allowing some (hopefully
-				# unlikely) violations to slip through. We just assume
-				# everything is correct if the there is a ' [[ ' or a ' ]] '
-				# anywhere in the whole line (possibly continued over one
-				# line).
-				if self.cond_begin.search(line) is not None:
-					continue
-				if self.cond_end.search(line) is not None:
-					continue
+			# Any remaining matches on the same line can be ignored.
+			return MISSING_QUOTES_ERROR
 
-				errors.append((num + 1, MISSING_QUOTES_ERROR))
-				# Any remaining matches on the same line can be ignored.
-				break
-		return errors
-
-
-class EbuildAssignment(ContentCheck):
+class EbuildAssignment(LineCheck):
 	"""Ensure ebuilds don't assign to readonly variables."""
 
 	repoman_check_name = 'variable.readonly'
@@ -187,80 +129,65 @@ class EbuildAssignment(ContentCheck):
 	line_continuation = re.compile(r'([^#]*\S)(\s+|\t)\\$')
 	ignore_line = re.compile(r'(^$)|(^(\t)*#)')
 
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
-
-	def Run(self):
-		"""Locate simple errors in ebuilds:
-		Assigning to read-only variables.
-		"""
+	def __init__(self):
+		self.previous_line = None
 
-		errors = []
-		previous_line = None
-		# enumerate is 0 indexed, so add one when necesseary
-		for num, line in enumerate(self.contents):
-			match = self.ignore_line.match(line)
-			if match:
-				continue
-			match = self.readonly_assignment.match(line)
-			if match and (not previous_line or not self.line_continuation.match(previous_line)):
-				errors.append((num + 1, READONLY_ASSIGNMENT_ERROR))
-			previous_line = line
-		return errors
+	def check(self, num, line):
+		match = self.readonly_assignment.match(line)
+		e = None
+		if match and (not self.previous_line or not self.line_continuation.match(self.previous_line)):
+			e = READONLY_ASSIGNMENT_ERROR
+		self.previous_line = line
+		return e
 
-class EbuildNestedDie(ContentCheck):
+class EbuildNestedDie(LineCheck):
 	"""Check ebuild for nested die statements (die statements in subshells"""
 	
 	repoman_check_name = 'ebuild.nesteddie'
 	nesteddie_re = re.compile(r'^[^#]*\([^)]*\bdie\b')
 	
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
-
-	def Run(self):
-		errors = []
-		for num, line in enumerate(self.contents):
-			match = self.nesteddie_re.match(line)
-			if match:
-				errors.append((num + 1, NESTED_DIE_ERROR))
-		return errors
+	def check(self, num, line):
+		if self.nesteddie_re.match(line):
+			return NESTED_DIE_ERROR
 
-class EbuildUselessDodoc(ContentCheck):
+class EbuildUselessDodoc(LineCheck):
 	"""Check ebuild for useless files in dodoc arguments."""
 	repoman_check_name = 'ebuild.minorsyn'
 	uselessdodoc_re = re.compile(
 		r'^\s*dodoc(\s+|\s+.*\s+)(ABOUT-NLS|COPYING|LICENSE)($|\s)')
 
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
+	def check(self, num, line):
+		match = self.uselessdodoc_re.match(line)
+		if match:
+			return "Useless dodoc '%s'" % (match.group(2), ) + " on line: %d"
 
-	def Run(self):
-		errors = []
-		uselessdodoc_re = self.uselessdodoc_re
-		for num, line in enumerate(self.contents):
-			match = uselessdodoc_re.match(line)
-			if match:
-				errors.append((num + 1, "Useless dodoc '%s'" % \
-					(match.group(2), ) + " on line: %d"))
-		return errors
-
-class EbuildUselessCdS(ContentCheck):
+class EbuildUselessCdS(LineCheck):
 	"""Check for redundant cd ${S} statements"""
 	repoman_check_name = 'ebuild.minorsyn'
 	method_re = re.compile(r'^\s*src_(compile|install|test)\s*\(\)')
 	cds_re = re.compile(r'^\s*cd\s+("\$(\{S\}|S)"|\$(\{S\}|S))\s')
 
-	def __init__(self, contents):
-		ContentCheck.__init__(self, contents)
-
-	def Run(self):
-		errors = []
-		check_next_line = False
-		for num, line in enumerate(self.contents):
-			if check_next_line:
-				check_next_line = False
-				if self.cds_re.match(line):
-					errors.append((num + 1, REDUNDANT_CD_S_ERROR))
-			elif self.method_re.match(line):
-				check_next_line = True
-		return errors
+	def __init__(self):
+		self.check_next_line = False
+
+	def check(self, num, line):
+		if self.check_next_line:
+			self.check_next_line = False
+			if self.cds_re.match(line):
+				return REDUNDANT_CD_S_ERROR
+		elif self.method_re.match(line):
+			self.check_next_line = True
+
+def run_checks(contents, st_mtime):
+	checks = []
+	for c in (EbuildWhitespace, EbuildQuote, EbuildAssignment,
+			EbuildUselessDodoc, EbuildUselessCdS, EbuildNestedDie):
+		checks.append(c())
+	checks.append(EbuildHeader(st_mtime))
+	for num, line in enumerate(contents):
+		for lc in checks:
+			ignore = lc.ignore_line
+			if not ignore or not ignore.match(line):
+				e = lc.check(num, line)
+				if e:
+					yield lc.repoman_check_name, e % (num + 1)
-- 
2.26.2