1 # cython: infer_types=True, language_level=3
11 cython.declare(EncodedString=object, string_prefixes=object, raw_prefixes=object, IDENT=unicode,
12 print_function=object)
14 from Cython import Plex, Utils
15 from Cython.Plex.Scanners import Scanner
16 from Cython.Plex.Errors import UnrecognizedInput
17 from Errors import CompileError, error
18 from Lexicon import string_prefixes, raw_prefixes, make_lexicon, IDENT
19 from Future import print_function
21 from StringEncoding import EncodedString
25 scanner_debug_flags = 0
26 scanner_dump_file = None
33 lexicon = make_lexicon()
36 #------------------------------------------------------------------
39 "global", "nonlocal", "def", "class", "print", "del", "pass", "break",
40 "continue", "return", "raise", "import", "exec", "try",
41 "except", "finally", "while", "if", "elif", "else", "for",
42 "in", "assert", "and", "or", "not", "is", "in", "lambda",
43 "from", "yield", "with", "nonlocal",
46 pyx_reserved_words = py_reserved_words + [
47 "include", "ctypedef", "cdef", "cpdef",
48 "cimport", "DEF", "IF", "ELIF", "ELSE"
53 def __init__(self, name):
55 self.__name__ = name # for Plex tracing
57 def __call__(self, stream, text):
58 return getattr(stream, self.name)(text)
60 #------------------------------------------------------------------
62 class CompileTimeScope(object):
64 def __init__(self, outer = None):
68 def declare(self, name, value):
69 self.entries[name] = value
71 def lookup_here(self, name):
72 return self.entries[name]
74 def __contains__(self, name):
75 return name in self.entries
77 def lookup(self, name):
79 return self.lookup_here(name)
83 return outer.lookup(name)
87 def initial_compile_time_env():
88 benv = CompileTimeScope()
89 names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
90 'UNAME_VERSION', 'UNAME_MACHINE')
91 for name, value in zip(names, platform.uname()):
92 benv.declare(name, value)
94 import __builtin__ as builtins
97 names = ('False', 'True',
98 'abs', 'bool', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate',
99 'float', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min',
100 'oct', 'ord', 'pow', 'range', 'reduce', 'repr', 'round', 'slice', 'str',
101 'sum', 'tuple', 'xrange', 'zip')
104 benv.declare(name, getattr(builtins, name))
105 except AttributeError:
108 denv = CompileTimeScope(benv)
111 #------------------------------------------------------------------
113 class SourceDescriptor(object):
115 A SourceDescriptor should be considered immutable.
119 _escaped_description = None
122 assert False # To catch all places where a descriptor is used directly as a filename
124 def set_file_type_from_name(self, filename):
125 name, ext = os.path.splitext(filename)
126 self._file_type = ext in ('.pyx', '.pxd', '.py') and ext[1:] or 'pyx'
128 def is_cython_file(self):
129 return self._file_type in ('pyx', 'pxd')
131 def is_python_file(self):
132 return self._file_type == 'py'
134 def get_escaped_description(self):
135 if self._escaped_description is None:
136 self._escaped_description = \
137 self.get_description().encode('ASCII', 'replace').decode("ASCII")
138 return self._escaped_description
140 def __gt__(self, other):
141 # this is only used to provide some sort of order
143 return self._cmp_name > other._cmp_name
144 except AttributeError:
147 def __lt__(self, other):
148 # this is only used to provide some sort of order
150 return self._cmp_name < other._cmp_name
151 except AttributeError:
154 def __le__(self, other):
155 # this is only used to provide some sort of order
157 return self._cmp_name <= other._cmp_name
158 except AttributeError:
161 class FileSourceDescriptor(SourceDescriptor):
163 Represents a code source. A code source is a more generic abstraction
164 for a "filename" (as sometimes the code doesn't come from a file).
165 Instances of code sources are passed to Scanner.__init__ as the
166 optional name argument and will be passed back when asking for
167 the position()-tuple.
169 def __init__(self, filename, path_description=None):
170 filename = Utils.decode_filename(filename)
171 self.path_description = path_description or filename
172 self.filename = filename
173 self.set_file_type_from_name(filename)
174 self._cmp_name = filename
176 def get_lines(self, encoding=None, error_handling=None):
177 return Utils.open_source_file(
178 self.filename, encoding=encoding,
179 error_handling=error_handling,
180 # newline normalisation is costly before Py2.6
181 require_normalised_newlines=False)
183 def get_description(self):
184 return self.path_description
186 def get_filenametable_entry(self):
189 def __eq__(self, other):
190 return isinstance(other, FileSourceDescriptor) and self.filename == other.filename
193 return hash(self.filename)
196 return "<FileSourceDescriptor:%s>" % self.filename
198 class StringSourceDescriptor(SourceDescriptor):
200 Instances of this class can be used instead of a filenames if the
201 code originates from a string object.
205 def __init__(self, name, code):
207 #self.set_file_type_from_name(name)
208 self.codelines = [x + "\n" for x in code.split("\n")]
209 self._cmp_name = name
211 def get_lines(self, encoding=None, error_handling=None):
213 return self.codelines
215 return [ line.encode(encoding, error_handling).decode(encoding)
216 for line in self.codelines ]
218 def get_description(self):
221 def get_filenametable_entry(self):
222 return "stringsource"
225 return hash(self.name)
227 def __eq__(self, other):
228 return isinstance(other, StringSourceDescriptor) and self.name == other.name
231 return "<StringSourceDescriptor:%s>" % self.name
233 #------------------------------------------------------------------
235 class PyrexScanner(Scanner):
236 # context Context Compilation context
237 # included_files [string] Files included with 'include' statement
238 # compile_time_env dict Environment for conditional compilation
239 # compile_time_eval boolean In a true conditional compilation context
240 # compile_time_expr boolean In a compile-time expression context
242 def __init__(self, file, filename, parent_scanner = None,
243 scope = None, context = None, source_encoding=None, parse_comments=True, initial_pos=None):
244 Scanner.__init__(self, get_lexicon(), file, filename, initial_pos)
246 self.context = parent_scanner.context
247 self.included_files = parent_scanner.included_files
248 self.compile_time_env = parent_scanner.compile_time_env
249 self.compile_time_eval = parent_scanner.compile_time_eval
250 self.compile_time_expr = parent_scanner.compile_time_expr
252 self.context = context
253 self.included_files = scope.included_files
254 self.compile_time_env = initial_compile_time_env()
255 self.compile_time_eval = 1
256 self.compile_time_expr = 0
257 self.parse_comments = parse_comments
258 self.source_encoding = source_encoding
259 if filename.is_python_file():
260 self.in_python_file = True
261 self.keywords = cython.set(py_reserved_words)
263 self.in_python_file = False
264 self.keywords = cython.set(pyx_reserved_words)
265 self.trace = trace_scanner
266 self.indentation_stack = [0]
267 self.indentation_char = None
268 self.bracket_nesting_level = 0
273 def commentline(self, text):
274 if self.parse_comments:
275 self.produce('commentline', text)
277 def current_level(self):
278 return self.indentation_stack[-1]
280 def open_bracket_action(self, text):
281 self.bracket_nesting_level = self.bracket_nesting_level + 1
284 def close_bracket_action(self, text):
285 self.bracket_nesting_level = self.bracket_nesting_level - 1
288 def newline_action(self, text):
289 if self.bracket_nesting_level == 0:
291 self.produce('NEWLINE', '')
300 def begin_string_action(self, text):
301 if text[:1] in string_prefixes:
303 if text[:1] in raw_prefixes:
305 self.begin(self.string_states[text])
306 self.produce('BEGIN_STRING')
308 def end_string_action(self, text):
310 self.produce('END_STRING')
312 def unclosed_string_action(self, text):
313 self.end_string_action(text)
314 self.error("Unclosed string literal")
316 def indentation_action(self, text):
318 # Indentation within brackets should be ignored.
319 #if self.bracket_nesting_level > 0:
321 # Check that tabs and spaces are being used consistently.
324 #print "Scanner.indentation_action: indent with", repr(c) ###
325 if self.indentation_char is None:
326 self.indentation_char = c
327 #print "Scanner.indentation_action: setting indent_char to", repr(c)
329 if self.indentation_char != c:
330 self.error("Mixed use of tabs and spaces")
331 if text.replace(c, "") != "":
332 self.error("Mixed use of tabs and spaces")
333 # Figure out how many indents/dedents to do
334 current_level = self.current_level()
335 new_level = len(text)
336 #print "Changing indent level from", current_level, "to", new_level ###
337 if new_level == current_level:
339 elif new_level > current_level:
340 #print "...pushing level", new_level ###
341 self.indentation_stack.append(new_level)
342 self.produce('INDENT', '')
344 while new_level < self.current_level():
345 #print "...popping level", self.indentation_stack[-1] ###
346 self.indentation_stack.pop()
347 self.produce('DEDENT', '')
348 #print "...current level now", self.current_level() ###
349 if new_level != self.current_level():
350 self.error("Inconsistent indentation")
352 def eof_action(self, text):
353 while len(self.indentation_stack) > 1:
354 self.produce('DEDENT', '')
355 self.indentation_stack.pop()
356 self.produce('EOF', '')
360 sy, systring = self.read()
361 except UnrecognizedInput:
362 self.error("Unrecognized character")
364 if systring in self.keywords:
365 if systring == u'print' and print_function in self.context.future_directives:
366 self.keywords.discard('print')
367 systring = EncodedString(systring)
368 elif systring == u'exec' and self.context.language_level >= 3:
369 self.keywords.discard('exec')
370 systring = EncodedString(systring)
374 systring = EncodedString(systring)
376 self.systring = systring
377 if False: # debug_scanner:
378 _, line, col = self.position()
379 if not self.systring or self.sy == self.systring:
382 t = "%s %s" % (self.sy, self.systring)
383 print("--- %3d %2d %s" % (line, col, t))
386 saved = self.sy, self.systring
388 next = self.sy, self.systring
390 self.sy, self.systring = saved
393 def put_back(self, sy, systring):
394 self.unread(self.sy, self.systring)
396 self.systring = systring
398 def unread(self, token, value):
399 # This method should be added to Plex
400 self.queue.insert(0, (token, value))
402 def error(self, message, pos = None, fatal = True):
404 pos = self.position()
405 if self.sy == 'INDENT':
406 err = error(pos, "Possible inconsistent indentation")
407 err = error(pos, message)
410 def expect(self, what, message = None):
414 self.expected(what, message)
416 def expect_keyword(self, what, message = None):
417 if self.sy == IDENT and self.systring == what:
420 self.expected(what, message)
422 def expected(self, what, message = None):
427 found = self.systring
430 self.error("Expected '%s', found '%s'" % (what, found))
432 def expect_indent(self):
433 self.expect('INDENT',
434 "Expected an increase in indentation level")
436 def expect_dedent(self):
437 self.expect('DEDENT',
438 "Expected a decrease in indentation level")
440 def expect_newline(self, message = "Expected a newline"):
441 # Expect either a newline or end of file
443 self.expect('NEWLINE', message)