From: stevenknight Date: Thu, 17 Apr 2003 13:15:34 +0000 (+0000) Subject: Update to the right Java parser. (Charles Crain) X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=693721051e3cfecb21df0c63637ee00b05119ff6;p=scons.git Update to the right Java parser. (Charles Crain) git-svn-id: http://scons.tigris.org/svn/scons/trunk@649 fdb21ef1-2011-0410-befe-b5e4ea1792b1 --- diff --git a/src/engine/SCons/Tool/javac.py b/src/engine/SCons/Tool/javac.py index 60233b32..dbee6562 100644 --- a/src/engine/SCons/Tool/javac.py +++ b/src/engine/SCons/Tool/javac.py @@ -44,25 +44,61 @@ java_parsing = 1 if java_parsing: # Parse Java files for class names. # - # This is a really simple and cool parser from Charles Crain + # This is a really cool parser from Charles Crain # that finds appropriate class names in Java source. - _reToken = re.compile(r'[^\\]([\'"])|([\{\}])|' + - r'(?:^|[\{\}\s;])((?:class|interface)'+ - r'\s+[A-Za-z_]\w*)|' + - r'(new\s+[A-Za-z_]\w*\s*\([^\)]*\)\s*\{)|' + - r'(//[^\r\n]*)|(/\*|\*/)') + # A regular expression that will find, in a java file, + # any alphanumeric token (keyword, class name, specifier); open or + # close brackets; a single-line comment "//"; the multi-line comment + # begin and end tokens /* and */; single or double quotes; and + # single or double quotes preceeded by a backslash. + _reToken = re.compile(r'(//[^\r\n]*|\\[\'"]|[\'"\{\}]|[A-Za-z_][\w\.]*|' + + r'/\*|\*/)') class OuterState: + """The initial state for parsing a Java file for classes, + interfaces, and anonymous inner classes.""" def __init__(self): self.listClasses = [] self.listOutputs = [] self.stackBrackets = [] self.brackets = 0 self.nextAnon = 1 + self.package = None + + def __getClassState(self): + try: + return self.classState + except AttributeError: + ret = ClassState(self) + self.classState = ret + return ret + + def __getPackageState(self): + try: + return self.packageState + except AttributeError: + ret = PackageState(self) + self.packageState = ret + return ret + + def __getAnonClassState(self): + try: + return self.anonState + except AttributeError: + ret = SkipState(1, AnonClassState(self)) + self.anonState = ret + return ret + + def __getSkipState(self): + try: + return self.skipState + except AttributeError: + ret = SkipState(1, self) + self.skipState = ret + return ret def parseToken(self, token): - #print token if token[:2] == '//': pass # ignore comment elif token == '/*': @@ -76,61 +112,101 @@ if java_parsing: self.listOutputs.append(string.join(self.listClasses, '$')) self.listClasses.pop() self.stackBrackets.pop() - elif token == '"': - return IgnoreState('"', self) - elif token == "'": - return IgnoreState("'", self) - elif token[:3] == "new": + elif token == '"' or token == "'": + return IgnoreState(token, self) + elif token == "new": # anonymous inner class if len(self.listClasses) > 0: - clazz = self.listClasses[0] - self.listOutputs.append('%s$%d' % (clazz, self.nextAnon)) - self.brackets = self.brackets + 1 - self.nextAnon = self.nextAnon + 1 - elif token[:5] == 'class': + return self.__getAnonClassState() + return self.__getSkipState() # Skip the class name + elif token == 'class' or token == 'interface': if len(self.listClasses) == 0: self.nextAnon = 1 - self.listClasses.append(string.join(string.split(token[6:]))) - self.stackBrackets.append(self.brackets) - elif token[:9] == 'interface': - if len(self.listClasses) == 0: - self.nextAnon = 1 - self.listClasses.append(string.join(string.split(token[10:]))) self.stackBrackets.append(self.brackets) + return self.__getClassState() + elif token == 'package': + return self.__getPackageState() return self + def addAnonClass(self): + """Add an anonymous inner class""" + clazz = self.listClasses[0] + self.listOutputs.append('%s$%d' % (clazz, self.nextAnon)) + self.brackets = self.brackets + 1 + self.nextAnon = self.nextAnon + 1 + + def setPackage(self, package): + self.package = package + + class AnonClassState: + """A state that looks for anonymous inner classes.""" + def __init__(self, outer_state): + # outer_state is always an instance of OuterState + self.outer_state = outer_state + self.tokens_to_find = 2 + def parseToken(self, token): + # This is an anonymous class if and only if the next token is a bracket + if token == '{': + self.outer_state.addAnonClass() + return self.outer_state + + class SkipState: + """A state that will skip a specified number of tokens before + reverting to the previous state.""" + def __init__(self, tokens_to_skip, old_state): + self.tokens_to_skip = tokens_to_skip + self.old_state = old_state + def parseToken(self, token): + self.tokens_to_skip = self.tokens_to_skip - 1 + if self.tokens_to_skip < 1: + return self.old_state + return self + + class ClassState: + """A state we go into when we hit a class or interface keyword.""" + def __init__(self, outer_state): + # outer_state is always an instance of OuterState + self.outer_state = outer_state + def parseToken(self, token): + # the only token we get should be the name of the class. + self.outer_state.listClasses.append(token) + return self.outer_state + class IgnoreState: + """A state that will ignore all tokens until it gets to a + specified token.""" def __init__(self, ignore_until, old_state): self.ignore_until = ignore_until self.old_state = old_state def parseToken(self, token): - if token == self.ignore_until: + if self.ignore_until == token: return self.old_state return self - def parse_java(file): - contents = open(file, 'r').read() - - # Is there a more efficient way to do this than to split - # the contents like this? - pkg_dir = None - for line in string.split(contents, "\n"): - if line[:7] == 'package': - pkg = string.split(line)[1] - if pkg[-1] == ';': - pkg = pkg[:-1] - pkg_dir = apply(os.path.join, string.split(pkg, '.')) - break + class PackageState: + """The state we enter when we encounter the package keyword. + We assume the next token will be the package name.""" + def __init__(self, outer_state): + # outer_state is always an instance of OuterState + self.outer_state = outer_state + def parseToken(self, token): + self.outer_state.setPackage(token) + return self.outer_state + def parse_java(fn): + """Parse a .java file and return a double of package directory, + plus a list of .class files that compiling that .java file will + produce""" + package = None initial = OuterState() currstate = initial - for matches in _reToken.findall(contents): + for token in _reToken.findall(open(fn, 'r').read()): # The regex produces a bunch of groups, but only one will # have anything in it. - token = filter(lambda x: x, matches)[0] currstate = currstate.parseToken(token) - - return pkg_dir, initial.listOutputs + if initial.package: + package = string.replace(initial.package, '.', os.sep) + return (package, initial.listOutputs) else: # Don't actually parse Java files for class names.