Update to the right Java parser. (Charles Crain)
authorstevenknight <stevenknight@fdb21ef1-2011-0410-befe-b5e4ea1792b1>
Thu, 17 Apr 2003 13:15:34 +0000 (13:15 +0000)
committerstevenknight <stevenknight@fdb21ef1-2011-0410-befe-b5e4ea1792b1>
Thu, 17 Apr 2003 13:15:34 +0000 (13:15 +0000)
git-svn-id: http://scons.tigris.org/svn/scons/trunk@649 fdb21ef1-2011-0410-befe-b5e4ea1792b1

src/engine/SCons/Tool/javac.py

index 60233b32584af990fae30a4049410dfefa857ff4..dbee6562fc5173f4bbe5aaabbcb4a79d4c444e66 100644 (file)
@@ -44,25 +44,61 @@ java_parsing = 1
 if java_parsing:
     # Parse Java files for class names.
     #
-    # This is a really simple and cool parser from Charles Crain
+    # This is a really cool parser from Charles Crain
     # that finds appropriate class names in Java source.
 
-    _reToken = re.compile(r'[^\\]([\'"])|([\{\}])|' +
-                          r'(?:^|[\{\}\s;])((?:class|interface)'+
-                          r'\s+[A-Za-z_]\w*)|' +
-                          r'(new\s+[A-Za-z_]\w*\s*\([^\)]*\)\s*\{)|' +
-                          r'(//[^\r\n]*)|(/\*|\*/)')
+    # A regular expression that will find, in a java file,
+    # any alphanumeric token (keyword, class name, specifier); open or
+    # close brackets; a single-line comment "//"; the multi-line comment
+    # begin and end tokens /* and */; single or double quotes; and
+    # single or double quotes preceeded by a backslash.
+    _reToken = re.compile(r'(//[^\r\n]*|\\[\'"]|[\'"\{\}]|[A-Za-z_][\w\.]*|' +
+                          r'/\*|\*/)')
 
     class OuterState:
+        """The initial state for parsing a Java file for classes,
+        interfaces, and anonymous inner classes."""
         def __init__(self):
             self.listClasses = []
             self.listOutputs = []
             self.stackBrackets = []
             self.brackets = 0
             self.nextAnon = 1
+            self.package = None
+
+        def __getClassState(self):
+            try:
+                return self.classState
+            except AttributeError:
+                ret = ClassState(self)
+                self.classState = ret
+                return ret
+
+        def __getPackageState(self):
+            try:
+                return self.packageState
+            except AttributeError:
+                ret = PackageState(self)
+                self.packageState = ret
+                return ret
+
+        def __getAnonClassState(self):
+            try:
+                return self.anonState
+            except AttributeError:
+                ret = SkipState(1, AnonClassState(self))
+                self.anonState = ret
+                return ret
+
+        def __getSkipState(self):
+            try:
+                return self.skipState
+            except AttributeError:
+                ret = SkipState(1, self)
+                self.skipState = ret
+                return ret
 
         def parseToken(self, token):
-            #print token
             if token[:2] == '//':
                 pass # ignore comment
             elif token == '/*':
@@ -76,61 +112,101 @@ if java_parsing:
                     self.listOutputs.append(string.join(self.listClasses, '$'))
                     self.listClasses.pop()
                     self.stackBrackets.pop()
-            elif token == '"':
-                return IgnoreState('"', self)
-            elif token == "'":
-                return IgnoreState("'", self)
-            elif token[:3] == "new":
+            elif token == '"' or token == "'":
+                return IgnoreState(token, self)
+            elif token == "new":
                 # anonymous inner class
                 if len(self.listClasses) > 0:
-                    clazz = self.listClasses[0]
-                    self.listOutputs.append('%s$%d' % (clazz, self.nextAnon))
-                    self.brackets = self.brackets + 1
-                    self.nextAnon = self.nextAnon + 1
-            elif token[:5] == 'class':
+                    return self.__getAnonClassState()
+                return self.__getSkipState() # Skip the class name
+            elif token == 'class' or token == 'interface':
                 if len(self.listClasses) == 0:
                     self.nextAnon = 1
-                self.listClasses.append(string.join(string.split(token[6:])))
-                self.stackBrackets.append(self.brackets)
-            elif token[:9] == 'interface':
-                if len(self.listClasses) == 0:
-                    self.nextAnon = 1
-                self.listClasses.append(string.join(string.split(token[10:])))
                 self.stackBrackets.append(self.brackets)
+                return self.__getClassState()
+            elif token == 'package':
+                return self.__getPackageState()
             return self
 
+        def addAnonClass(self):
+            """Add an anonymous inner class"""
+            clazz = self.listClasses[0]
+            self.listOutputs.append('%s$%d' % (clazz, self.nextAnon))
+            self.brackets = self.brackets + 1
+            self.nextAnon = self.nextAnon + 1
+
+        def setPackage(self, package):
+            self.package = package
+
+    class AnonClassState:
+        """A state that looks for anonymous inner classes."""
+        def __init__(self, outer_state):
+            # outer_state is always an instance of OuterState
+            self.outer_state = outer_state
+            self.tokens_to_find = 2
+        def parseToken(self, token):
+            # This is an anonymous class if and only if the next token is a bracket
+            if token == '{':
+                self.outer_state.addAnonClass()
+            return self.outer_state
+
+    class SkipState:
+        """A state that will skip a specified number of tokens before
+        reverting to the previous state."""
+        def __init__(self, tokens_to_skip, old_state):
+            self.tokens_to_skip = tokens_to_skip
+            self.old_state = old_state
+        def parseToken(self, token):
+            self.tokens_to_skip = self.tokens_to_skip - 1
+            if self.tokens_to_skip < 1:
+                return self.old_state
+            return self
+
+    class ClassState:
+        """A state we go into when we hit a class or interface keyword."""
+        def __init__(self, outer_state):
+            # outer_state is always an instance of OuterState
+            self.outer_state = outer_state
+        def parseToken(self, token):
+            # the only token we get should be the name of the class.
+            self.outer_state.listClasses.append(token)
+            return self.outer_state
+
     class IgnoreState:
+        """A state that will ignore all tokens until it gets to a
+        specified token."""
         def __init__(self, ignore_until, old_state):
             self.ignore_until = ignore_until
             self.old_state = old_state
         def parseToken(self, token):
-            if token == self.ignore_until:
+            if self.ignore_until == token:
                 return self.old_state
             return self
 
-    def parse_java(file):
-        contents = open(file, 'r').read()
-
-        # Is there a more efficient way to do this than to split
-        # the contents like this?
-        pkg_dir = None
-        for line in string.split(contents, "\n"):
-            if line[:7] == 'package':
-                pkg = string.split(line)[1]
-                if pkg[-1] == ';':
-                    pkg = pkg[:-1]
-                pkg_dir = apply(os.path.join, string.split(pkg, '.'))
-                break
+    class PackageState:
+        """The state we enter when we encounter the package keyword.
+        We assume the next token will be the package name."""
+        def __init__(self, outer_state):
+            # outer_state is always an instance of OuterState
+            self.outer_state = outer_state
+        def parseToken(self, token):
+            self.outer_state.setPackage(token)
+            return self.outer_state
 
+    def parse_java(fn):
+        """Parse a .java file and return a double of package directory,
+        plus a list of .class files that compiling that .java file will
+        produce"""
+        package = None
         initial = OuterState()
         currstate = initial
-        for matches in _reToken.findall(contents):
+        for token in _reToken.findall(open(fn, 'r').read()):
             # The regex produces a bunch of groups, but only one will
             # have anything in it.
-            token = filter(lambda x: x, matches)[0]
             currstate = currstate.parseToken(token)
-
-        return pkg_dir, initial.listOutputs
+        if initial.package:
+            package = string.replace(initial.package, '.', os.sep)
+        return (package, initial.listOutputs)
 
 else:
     # Don't actually parse Java files for class names.