import os
import os.path
import re
-import string
java_parsing = 1
+default_java_version = '1.4'
+
if java_parsing:
# Parse Java files for class names.
#
# This is a really cool parser from Charles Crain
# that finds appropriate class names in Java source.
- # A regular expression that will find, in a java file: newlines;
- # any alphanumeric token (keyword, class name, specifier); open or
- # close brackets; a single-line comment "//"; the multi-line comment
- # begin and end tokens /* and */; single or double quotes;
- # single or double quotes preceeded by a backslash; array
- # declarations "[]".
- _reToken = re.compile(r'(\n|\\\\|//|\\[\'"]|[\'"\{\}]|[A-Za-z_][\w\.]*|' +
+ # A regular expression that will find, in a java file:
+ # newlines;
+ # double-backslashes;
+ # a single-line comment "//";
+ # single or double quotes preceeded by a backslash;
+ # single quotes, double quotes, open or close braces, semi-colons,
+ # periods, open or close parentheses;
+ # floating-point numbers;
+ # any alphanumeric token (keyword, class name, specifier);
+ # any alphanumeric token surrounded by angle brackets (generics);
+ # the multi-line comment begin and end tokens /* and */;
+ # array declarations "[]".
+ _reToken = re.compile(r'(\n|\\\\|//|\\[\'"]|[\'"\{\}\;\.\(\)]|' +
+ r'\d*\.\d*|[A-Za-z_][\w\$\.]*|<[A-Za-z_]\w+>|' +
r'/\*|\*/|\[\])')
class OuterState:
"""The initial state for parsing a Java file for classes,
interfaces, and anonymous inner classes."""
- def __init__(self):
+ def __init__(self, version=default_java_version):
+
+ if not version in ('1.1', '1.2', '1.3','1.4', '1.5', '1.6',
+ '5', '6'):
+ msg = "Java version %s not supported" % version
+ raise NotImplementedError(msg)
+
+ self.version = version
self.listClasses = []
self.listOutputs = []
self.stackBrackets = []
self.brackets = 0
self.nextAnon = 1
+ self.localClasses = []
+ self.stackAnonClassBrackets = []
+ self.anonStacksStack = [[0]]
self.package = None
+ def trace(self):
+ pass
+
def __getClassState(self):
try:
return self.classState
try:
return self.anonState
except AttributeError:
+ self.outer_state = self
ret = SkipState(1, AnonClassState(self))
self.anonState = ret
return ret
ret = SkipState(1, self)
self.skipState = ret
return ret
+
+ def __getAnonStack(self):
+ return self.anonStacksStack[-1]
+
+ def openBracket(self):
+ self.brackets = self.brackets + 1
+
+ def closeBracket(self):
+ self.brackets = self.brackets - 1
+ if len(self.stackBrackets) and \
+ self.brackets == self.stackBrackets[-1]:
+ self.listOutputs.append('$'.join(self.listClasses))
+ self.localClasses.pop()
+ self.listClasses.pop()
+ self.anonStacksStack.pop()
+ self.stackBrackets.pop()
+ if len(self.stackAnonClassBrackets) and \
+ self.brackets == self.stackAnonClassBrackets[-1]:
+ self.__getAnonStack().pop()
+ self.stackAnonClassBrackets.pop()
def parseToken(self, token):
if token[:2] == '//':
elif token == '/*':
return IgnoreState('*/', self)
elif token == '{':
- self.brackets = self.brackets + 1
+ self.openBracket()
elif token == '}':
- self.brackets = self.brackets - 1
- if len(self.stackBrackets) and \
- self.brackets == self.stackBrackets[-1]:
- self.listOutputs.append(string.join(self.listClasses, '$'))
- self.listClasses.pop()
- self.stackBrackets.pop()
- elif token == '"' or token == "'":
+ self.closeBracket()
+ elif token in [ '"', "'" ]:
return IgnoreState(token, self)
elif token == "new":
# anonymous inner class
return self.__getClassState()
elif token == 'package':
return self.__getPackageState()
+ elif token == '.':
+ # Skip the attribute, it might be named "class", in which
+ # case we don't want to treat the following token as
+ # an inner class name...
+ return self.__getSkipState()
return self
def addAnonClass(self):
"""Add an anonymous inner class"""
- clazz = self.listClasses[0]
- self.listOutputs.append('%s$%d' % (clazz, self.nextAnon))
- self.brackets = self.brackets + 1
+ if self.version in ('1.1', '1.2', '1.3', '1.4'):
+ clazz = self.listClasses[0]
+ self.listOutputs.append('%s$%d' % (clazz, self.nextAnon))
+ elif self.version in ('1.5', '1.6', '5', '6'):
+ self.stackAnonClassBrackets.append(self.brackets)
+ className = []
+ className.extend(self.listClasses)
+ self.__getAnonStack()[-1] = self.__getAnonStack()[-1] + 1
+ for anon in self.__getAnonStack():
+ className.append(str(anon))
+ self.listOutputs.append('$'.join(className))
+
self.nextAnon = self.nextAnon + 1
+ self.__getAnonStack().append(0)
def setPackage(self, package):
self.package = package
class AnonClassState:
"""A state that looks for anonymous inner classes."""
- def __init__(self, outer_state):
+ def __init__(self, old_state):
# outer_state is always an instance of OuterState
- self.outer_state = outer_state
- self.tokens_to_find = 2
+ self.outer_state = old_state.outer_state
+ self.old_state = old_state
+ self.brace_level = 0
def parseToken(self, token):
- # This is an anonymous class if and only if the next token
- # is a bracket
+ # This is an anonymous class if and only if the next
+ # non-whitespace token is a bracket. Everything between
+ # braces should be parsed as normal java code.
+ if token[:2] == '//':
+ return IgnoreState('\n', self)
+ elif token == '/*':
+ return IgnoreState('*/', self)
+ elif token == '\n':
+ return self
+ elif token[0] == '<' and token[-1] == '>':
+ return self
+ elif token == '(':
+ self.brace_level = self.brace_level + 1
+ return self
+ if self.brace_level > 0:
+ if token == 'new':
+ # look further for anonymous inner class
+ return SkipState(1, AnonClassState(self))
+ elif token in [ '"', "'" ]:
+ return IgnoreState(token, self)
+ elif token == ')':
+ self.brace_level = self.brace_level - 1
+ return self
if token == '{':
self.outer_state.addAnonClass()
- elif token in ['"', "'"]:
- return IgnoreState(token, self)
- return self.outer_state
+ return self.old_state.parseToken(token)
class SkipState:
"""A state that will skip a specified number of tokens before
# the next non-whitespace token should be the name of the class
if token == '\n':
return self
+ # If that's an inner class which is declared in a method, it
+ # requires an index prepended to the class-name, e.g.
+ # 'Foo$1Inner' (Tigris Issue 2087)
+ if self.outer_state.localClasses and \
+ self.outer_state.stackBrackets[-1] > \
+ self.outer_state.stackBrackets[-2]+1:
+ locals = self.outer_state.localClasses[-1]
+ try:
+ idx = locals[token]
+ locals[token] = locals[token]+1
+ except KeyError:
+ locals[token] = 1
+ token = str(locals[token]) + token
+ self.outer_state.localClasses.append({})
self.outer_state.listClasses.append(token)
+ self.outer_state.anonStacksStack.append([0])
return self.outer_state
class IgnoreState:
self.outer_state.setPackage(token)
return self.outer_state
- def parse_java_file(fn):
- return parse_java(open(fn, 'r').read())
+ def parse_java_file(fn, version=default_java_version):
+ return parse_java(open(fn, 'r').read(), version)
- def parse_java(contents):
+ def parse_java(contents, version=default_java_version, trace=None):
"""Parse a .java file and return a double of package directory,
plus a list of .class files that compiling that .java file will
produce"""
package = None
- initial = OuterState()
+ initial = OuterState(version)
currstate = initial
for token in _reToken.findall(contents):
# The regex produces a bunch of groups, but only one will
# have anything in it.
currstate = currstate.parseToken(token)
+ if trace: trace(token, currstate)
if initial.package:
- package = string.replace(initial.package, '.', os.sep)
+ package = initial.package.replace('.', os.sep)
return (package, initial.listOutputs)
else:
the path to the file is the same as the package name.
"""
return os.path.split(file)
+
+# Local Variables:
+# tab-width:4
+# indent-tabs-mode:nil
+# End:
+# vim: set expandtab tabstop=4 shiftwidth=4: