6 import cPickle as pickle
15 cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=object)
17 from Cython import Plex, Utils
18 from Cython.Plex.Scanners import Scanner
19 from Cython.Plex.Errors import UnrecognizedInput
20 from Errors import CompileError, error
21 from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT
23 from StringEncoding import EncodedString
26 plex_version = Plex._version
27 except AttributeError:
29 #print "Plex version:", plex_version ###
33 scanner_debug_flags = 0
34 scanner_dump_file = None
35 binary_lexicon_pickle = 1
36 notify_lexicon_unpickling = 0
37 notify_lexicon_pickling = 1
41 #-----------------------------------------------------------------
43 def hash_source_file(path):
44 # Try to calculate a hash code for the given source file.
45 # Returns an empty string if the file cannot be accessed.
46 #print "Hashing", path ###
48 from hashlib import md5 as new_md5
50 from md5 import new as new_md5
57 print("Unable to hash scanner source file (%s)" % e)
62 # Normalise spaces/tabs. We don't know what sort of
63 # space-tab substitution the file may have been
64 # through, so we replace all spans of spaces and
65 # tabs by a single space.
67 text = re.sub("[ \t]+", " ", text)
68 hash = new_md5(text.encode("ASCII")).hexdigest()
71 def open_pickled_lexicon(expected_hash):
72 # Try to open pickled lexicon file and verify that
73 # it matches the source file. Returns the opened
74 # file if successful, otherwise None. ???
78 if os.path.exists(lexicon_pickle):
80 f = open(lexicon_pickle, "rb")
81 actual_hash = pickle.load(f)
82 if actual_hash == expected_hash:
86 print("Lexicon hash mismatch:") ###
87 print(" expected " + expected_hash) ###
88 print(" got " + actual_hash) ###
89 except (IOError, pickle.UnpicklingError), e:
90 print("Warning: Unable to read pickled lexicon " + lexicon_pickle)
96 def try_to_unpickle_lexicon():
97 global lexicon, lexicon_pickle, lexicon_hash
98 dir = os.path.dirname(__file__)
99 source_file = os.path.join(dir, "Lexicon.py")
100 lexicon_hash = hash_source_file(source_file)
101 lexicon_pickle = os.path.join(dir, "Lexicon.pickle")
102 f = open_pickled_lexicon(lexicon_hash)
104 if notify_lexicon_unpickling:
106 print("Unpickling lexicon...")
108 lexicon = pickle.load(f)
110 print "WARNING: Exception while loading lexicon pickle, regenerating"
114 if notify_lexicon_unpickling:
116 print("Done (%.2f seconds)" % (t1 - t0))
118 def create_new_lexicon():
121 print("Creating lexicon...")
122 lexicon = make_lexicon()
124 print("Done (%.2f seconds)" % (t1 - t0))
126 def pickle_lexicon():
129 f = open(lexicon_pickle, "wb")
131 print("Warning: Unable to save pickled lexicon in " + lexicon_pickle)
133 if notify_lexicon_pickling:
135 print("Pickling lexicon...")
136 pickle.dump(lexicon_hash, f, binary_lexicon_pickle)
137 pickle.dump(lexicon, f, binary_lexicon_pickle)
139 if notify_lexicon_pickling:
141 print("Done (%.2f seconds)" % (t1 - t0))
145 if not lexicon and plex_version is None:
146 try_to_unpickle_lexicon()
149 if plex_version is None:
153 #------------------------------------------------------------------
156 "global", "include", "ctypedef", "cdef", "def", "class",
157 "print", "del", "pass", "break", "continue", "return",
158 "raise", "import", "exec", "try", "except", "finally",
159 "while", "if", "elif", "else", "for", "in", "assert",
160 "and", "or", "not", "is", "in", "lambda", "from", "yield",
161 "cimport", "by", "with", "cpdef", "DEF", "IF", "ELIF", "ELSE"
164 class Method(object):
166 def __init__(self, name):
168 self.__name__ = name # for Plex tracing
170 def __call__(self, stream, text):
171 return getattr(stream, self.name)(text)
173 #------------------------------------------------------------------
175 def build_resword_dict():
177 for word in reserved_words:
181 cython.declare(resword_dict=object)
182 resword_dict = build_resword_dict()
184 #------------------------------------------------------------------
186 class CompileTimeScope(object):
188 def __init__(self, outer = None):
192 def declare(self, name, value):
193 self.entries[name] = value
195 def lookup_here(self, name):
196 return self.entries[name]
198 def __contains__(self, name):
199 return name in self.entries
201 def lookup(self, name):
203 return self.lookup_here(name)
207 return outer.lookup(name)
211 def initial_compile_time_env():
212 benv = CompileTimeScope()
213 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
214 'UNAME_VERSION', 'UNAME_MACHINE')
215 for name, value in zip(names, platform.uname()):
216 benv.declare(name, value)
217 import __builtin__ as builtins
218 names = ('False', 'True',
219 'abs', 'bool', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate',
220 'float', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min',
221 'oct', 'ord', 'pow', 'range', 'reduce', 'repr', 'round', 'slice', 'str',
222 'sum', 'tuple', 'xrange', 'zip')
225 benv.declare(name, getattr(builtins, name))
226 except AttributeError:
229 denv = CompileTimeScope(benv)
232 #------------------------------------------------------------------
234 class SourceDescriptor(object):
236 A SourceDescriptor should be considered immutable.
238 _escaped_description = None
241 assert False # To catch all places where a descriptor is used directly as a filename
243 def get_escaped_description(self):
244 if self._escaped_description is None:
245 self._escaped_description = \
246 self.get_description().encode('ASCII', 'replace').decode("ASCII")
247 return self._escaped_description
249 def __gt__(self, other):
250 # this is only used to provide some sort of order
252 return self._cmp_name > other._cmp_name
253 except AttributeError:
256 def __lt__(self, other):
257 # this is only used to provide some sort of order
259 return self._cmp_name < other._cmp_name
260 except AttributeError:
263 def __le__(self, other):
264 # this is only used to provide some sort of order
266 return self._cmp_name <= other._cmp_name
267 except AttributeError:
270 class FileSourceDescriptor(SourceDescriptor):
272 Represents a code source. A code source is a more generic abstraction
273 for a "filename" (as sometimes the code doesn't come from a file).
274 Instances of code sources are passed to Scanner.__init__ as the
275 optional name argument and will be passed back when asking for
276 the position()-tuple.
278 def __init__(self, filename):
279 self.filename = filename
280 self._cmp_name = filename
283 return Utils.open_source_file(self.filename)
285 def get_description(self):
288 def get_filenametable_entry(self):
291 def __eq__(self, other):
292 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
295 return hash(self.filename)
298 return "<FileSourceDescriptor:%s>" % self.filename
300 class StringSourceDescriptor(SourceDescriptor):
302 Instances of this class can be used instead of a filenames if the
303 code originates from a string object.
305 def __init__(self, name, code):
307 self.codelines = [x + "\n" for x in code.split("\n")]
308 self._cmp_name = name
311 return self.codelines
313 def get_description(self):
316 def get_filenametable_entry(self):
317 return "stringsource"
320 return hash(self.name)
322 def __eq__(self, other):
323 return isinstance(other, StringSourceDescriptor) and self.name == other.name
326 return "<StringSourceDescriptor:%s>" % self.name
328 #------------------------------------------------------------------
330 class PyrexScanner(Scanner):
331 # context Context Compilation context
332 # included_files [string] Files included with 'include' statement
333 # compile_time_env dict Environment for conditional compilation
334 # compile_time_eval boolean In a true conditional compilation context
335 # compile_time_expr boolean In a compile-time expression context
337 def __init__(self, file, filename, parent_scanner = None,
338 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
339 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
341 self.context = parent_scanner.context
342 self.included_files = parent_scanner.included_files
343 self.compile_time_env = parent_scanner.compile_time_env
344 self.compile_time_eval = parent_scanner.compile_time_eval
345 self.compile_time_expr = parent_scanner.compile_time_expr
347 self.context = context
348 self.included_files = scope.included_files
349 self.compile_time_env = initial_compile_time_env()
350 self.compile_time_eval = 1
351 self.compile_time_expr = 0
352 self.parse_comments = parse_comments
353 self.source_encoding = source_encoding
354 self.trace = trace_scanner
355 self.indentation_stack = [0]
356 self.indentation_char = None
357 self.bracket_nesting_level = 0
362 def commentline(self, text):
363 if self.parse_comments:
364 self.produce('commentline', text)
366 def current_level(self):
367 return self.indentation_stack[-1]
369 def open_bracket_action(self, text):
370 self.bracket_nesting_level = self.bracket_nesting_level + 1
373 def close_bracket_action(self, text):
374 self.bracket_nesting_level = self.bracket_nesting_level - 1
377 def newline_action(self, text):
378 if self.bracket_nesting_level == 0:
380 self.produce('NEWLINE', '')
389 def begin_string_action(self, text):
390 if text[:1] in string_prefixes:
392 if text[:1] in raw_prefixes:
394 self.begin(self.string_states[text])
395 self.produce('BEGIN_STRING')
397 def end_string_action(self, text):
399 self.produce('END_STRING')
401 def unclosed_string_action(self, text):
402 self.end_string_action(text)
403 self.error("Unclosed string literal")
405 def indentation_action(self, text):
407 # Indentation within brackets should be ignored.
408 #if self.bracket_nesting_level > 0:
410 # Check that tabs and spaces are being used consistently.
413 #print "Scanner.indentation_action: indent with", repr(c) ###
414 if self.indentation_char is None:
415 self.indentation_char = c
416 #print "Scanner.indentation_action: setting indent_char to", repr(c)
418 if self.indentation_char != c:
419 self.error("Mixed use of tabs and spaces")
420 if text.replace(c, "") != "":
421 self.error("Mixed use of tabs and spaces")
422 # Figure out how many indents/dedents to do
423 current_level = self.current_level()
424 new_level = len(text)
425 #print "Changing indent level from", current_level, "to", new_level ###
426 if new_level == current_level:
428 elif new_level > current_level:
429 #print "...pushing level", new_level ###
430 self.indentation_stack.append(new_level)
431 self.produce('INDENT', '')
433 while new_level < self.current_level():
434 #print "...popping level", self.indentation_stack[-1] ###
435 self.indentation_stack.pop()
436 self.produce('DEDENT', '')
437 #print "...current level now", self.current_level() ###
438 if new_level != self.current_level():
439 self.error("Inconsistent indentation")
441 def eof_action(self, text):
442 while len(self.indentation_stack) > 1:
443 self.produce('DEDENT', '')
444 self.indentation_stack.pop()
445 self.produce('EOF', '')
449 sy, systring = self.read()
450 except UnrecognizedInput:
451 self.error("Unrecognized character")
453 if systring in resword_dict:
456 systring = EncodedString(systring)
457 systring.encoding = self.source_encoding
459 self.systring = systring
460 if False: # debug_scanner:
461 _, line, col = self.position()
462 if not self.systring or self.sy == self.systring:
465 t = "%s %s" % (self.sy, self.systring)
466 print("--- %3d %2d %s" % (line, col, t))
468 def put_back(self, sy, systring):
469 self.unread(self.sy, self.systring)
471 self.systring = systring
473 def unread(self, token, value):
474 # This method should be added to Plex
475 self.queue.insert(0, (token, value))
477 def error(self, message, pos = None, fatal = True):
479 pos = self.position()
480 if self.sy == 'INDENT':
481 err = error(pos, "Possible inconsistent indentation")
482 err = error(pos, message)
485 def expect(self, what, message = None):
489 self.expected(what, message)
491 def expect_keyword(self, what, message = None):
492 if self.sy == IDENT and self.systring == what:
495 self.expected(what, message)
497 def expected(self, what, message = None):
501 self.error("Expected '%s'" % what)
503 def expect_indent(self):
504 self.expect('INDENT',
505 "Expected an increase in indentation level")
507 def expect_dedent(self):
508 self.expect('DEDENT',
509 "Expected a decrease in indentation level")
511 def expect_newline(self, message = "Expected a newline"):
512 # Expect either a newline or end of file
514 self.expect('NEWLINE', message)