pre-Py3k fixes
[cython.git] / Cython / Compiler / Scanning.py
1 #
2 #   Pyrex Scanner
3 #
4
5 #import pickle
6 import cPickle as pickle
7
8 import os
9 import platform
10 import stat
11 import sys
12 from time import time
13
14 from Cython import Plex
15 from Cython.Plex import Scanner
16 from Cython.Plex.Errors import UnrecognizedInput
17 from Errors import CompileError, error
18 from Lexicon import string_prefixes, make_lexicon
19
20 plex_version = getattr(Plex, '_version', None)
21 #print "Plex version:", plex_version ###
22
23 debug_scanner = 0
24 trace_scanner = 0
25 scanner_debug_flags = 0
26 scanner_dump_file = None
27 binary_lexicon_pickle = 1
28 notify_lexicon_unpickling = 0
29 notify_lexicon_pickling = 1
30
31 lexicon = None
32
33 #-----------------------------------------------------------------
34
35 def hash_source_file(path):
36     # Try to calculate a hash code for the given source file.
37     # Returns an empty string if the file cannot be accessed.
38     #print "Hashing", path ###
39     import md5
40     try:
41         try:
42             f = open(path, "rU")
43             text = f.read()
44         except IOError, e:
45             print("Unable to hash scanner source file (%s)" % e)
46             return ""
47     finally:
48         f.close()
49     # Normalise spaces/tabs. We don't know what sort of
50     # space-tab substitution the file may have been
51     # through, so we replace all spans of spaces and
52     # tabs by a single space.
53     import re
54     text = re.sub("[ \t]+", " ", text)
55     hash = md5.new(text).hexdigest()
56     return hash
57
58 def open_pickled_lexicon(expected_hash):
59     # Try to open pickled lexicon file and verify that
60     # it matches the source file. Returns the opened
61     # file if successful, otherwise None. ???
62     f = None
63     result = None
64     if os.path.exists(lexicon_pickle):
65         try:
66             f = open(lexicon_pickle, "rb")
67             actual_hash = pickle.load(f)
68             if actual_hash == expected_hash:
69                 result = f
70                 f = None
71             else:
72                 print("Lexicon hash mismatch:")       ###
73                 print("   expected " + expected_hash) ###
74                 print("   got     " + actual_hash)    ###
75         except IOError, e:
76             print("Warning: Unable to read pickled lexicon " + lexicon_pickle)
77             print(e)
78     if f:
79         f.close()
80     return result
81
82 def try_to_unpickle_lexicon():
83     global lexicon, lexicon_pickle, lexicon_hash
84     dir = os.path.dirname(__file__)
85     source_file = os.path.join(dir, "Lexicon.py")
86     lexicon_hash = hash_source_file(source_file)
87     lexicon_pickle = os.path.join(dir, "Lexicon.pickle")
88     f = open_pickled_lexicon(expected_hash = lexicon_hash)
89     if f:
90         if notify_lexicon_unpickling:
91             t0 = time()
92             print("Unpickling lexicon...")
93         lexicon = pickle.load(f)
94         f.close()
95         if notify_lexicon_unpickling:
96             t1 = time()
97             print("Done (%.2f seconds)" % (t1 - t0))
98
99 def create_new_lexicon():
100     global lexicon
101     t0 = time()
102     print("Creating lexicon...")
103     lexicon = make_lexicon()
104     t1 = time()
105     print("Done (%.2f seconds)" % (t1 - t0))
106
107 def pickle_lexicon():
108     f = None
109     try:
110         f = open(lexicon_pickle, "wb")
111     except IOError:
112         print("Warning: Unable to save pickled lexicon in " + lexicon_pickle)
113     if f:
114         if notify_lexicon_pickling:
115             t0 = time()
116             print("Pickling lexicon...")
117         pickle.dump(lexicon_hash, f, binary_lexicon_pickle)
118         pickle.dump(lexicon, f, binary_lexicon_pickle)
119         f.close()
120         if notify_lexicon_pickling:
121             t1 = time()
122             print("Done (%.2f seconds)" % (t1 - t0))
123
124 def get_lexicon():
125     global lexicon
126     if not lexicon and plex_version is None:
127         try_to_unpickle_lexicon()
128     if not lexicon:
129         create_new_lexicon()
130         if plex_version is None:
131             pickle_lexicon()
132     return lexicon
133     
134 #------------------------------------------------------------------
135
136 reserved_words = [
137     "global", "include", "ctypedef", "cdef", "def", "class",
138     "print", "del", "pass", "break", "continue", "return",
139     "raise", "import", "exec", "try", "except", "finally",
140     "while", "if", "elif", "else", "for", "in", "assert",
141     "and", "or", "not", "is", "in", "lambda", "from",
142     "NULL", "cimport", "by", "with", "cpdef", "DEF", "IF", "ELIF", "ELSE"
143 ]
144
145 class Method:
146
147     def __init__(self, name):
148         self.name = name
149         self.__name__ = name # for Plex tracing
150     
151     def __call__(self, stream, text):
152         return getattr(stream, self.name)(text)
153
154 #------------------------------------------------------------------
155
156 def build_resword_dict():
157     d = {}
158     for word in reserved_words:
159         d[word] = 1
160     return d
161
162 #------------------------------------------------------------------
163
164 class CompileTimeScope(object):
165
166     def __init__(self, outer = None):
167         self.entries = {}
168         self.outer = outer
169     
170     def declare(self, name, value):
171         self.entries[name] = value
172     
173     def lookup_here(self, name):
174         return self.entries[name]
175     
176     def lookup(self, name):
177         try:
178             return self.lookup_here(name)
179         except KeyError:
180             outer = self.outer
181             if outer:
182                 return outer.lookup(name)
183             else:
184                 raise
185
186 def initial_compile_time_env():
187     benv = CompileTimeScope()
188     names = ('UNAME_SYSNAME', 'UNAME_NODENAME', 'UNAME_RELEASE',
189         'UNAME_VERSION', 'UNAME_MACHINE')
190     for name, value in zip(names, platform.uname()):
191         benv.declare(name, value)
192     import __builtin__
193     names = ('False', 'True',
194         'abs', 'bool', 'chr', 'cmp', 'complex', 'dict', 'divmod', 'enumerate',
195         'float', 'hash', 'hex', 'int', 'len', 'list', 'long', 'map', 'max', 'min',
196         'oct', 'ord', 'pow', 'range', 'reduce', 'repr', 'round', 'slice', 'str',
197         'sum', 'tuple', 'xrange', 'zip')
198     for name in names:
199         benv.declare(name, getattr(__builtin__, name))
200     denv = CompileTimeScope(benv)
201     return denv
202
203 #------------------------------------------------------------------
204
205 class PyrexScanner(Scanner):
206     #  context            Context  Compilation context
207     #  type_names         set      Identifiers to be treated as type names
208     #  compile_time_env   dict     Environment for conditional compilation
209     #  compile_time_eval  boolean  In a true conditional compilation context
210     #  compile_time_expr  boolean  In a compile-time expression context
211     
212     resword_dict = build_resword_dict()
213
214     def __init__(self, file, filename, parent_scanner = None, 
215             type_names = None, context = None):
216         Scanner.__init__(self, get_lexicon(), file, filename)
217         if parent_scanner:
218             self.context = parent_scanner.context
219             self.type_names = parent_scanner.type_names
220             self.compile_time_env = parent_scanner.compile_time_env
221             self.compile_time_eval = parent_scanner.compile_time_eval
222             self.compile_time_expr = parent_scanner.compile_time_expr
223         else:
224             self.context = context
225             self.type_names = type_names
226             self.compile_time_env = initial_compile_time_env()
227             self.compile_time_eval = 1
228             self.compile_time_expr = 0
229         self.trace = trace_scanner
230         self.indentation_stack = [0]
231         self.indentation_char = None
232         self.bracket_nesting_level = 0
233         self.begin('INDENT')
234         self.sy = ''
235         self.next()
236     
237     def current_level(self):
238         return self.indentation_stack[-1]
239
240     def open_bracket_action(self, text):
241         self.bracket_nesting_level = self.bracket_nesting_level + 1
242         return text
243
244     def close_bracket_action(self, text):
245         self.bracket_nesting_level = self.bracket_nesting_level - 1
246         return text
247
248     def newline_action(self, text):
249         if self.bracket_nesting_level == 0:
250             self.begin('INDENT')
251             self.produce('NEWLINE', '')
252     
253     string_states = {
254         "'":   'SQ_STRING',
255         '"':   'DQ_STRING',
256         "'''": 'TSQ_STRING',
257         '"""': 'TDQ_STRING'
258     }
259     
260     def begin_string_action(self, text):
261         if text[:1] in string_prefixes:
262             text = text[1:]
263         self.begin(self.string_states[text])
264         self.produce('BEGIN_STRING')
265     
266     def end_string_action(self, text):
267         self.begin('')
268         self.produce('END_STRING')
269     
270     def unclosed_string_action(self, text):
271         self.end_string_action(text)
272         self.error("Unclosed string literal")
273
274     def indentation_action(self, text):
275         self.begin('')
276         # Indentation within brackets should be ignored.
277         #if self.bracket_nesting_level > 0:
278         #       return
279         # Check that tabs and spaces are being used consistently.
280         if text:
281             c = text[0]
282             #print "Scanner.indentation_action: indent with", repr(c) ###
283             if self.indentation_char is None:
284                 self.indentation_char = c
285                 #print "Scanner.indentation_action: setting indent_char to", repr(c)
286             else:
287                 if self.indentation_char != c:
288                     self.error("Mixed use of tabs and spaces")
289             if text.replace(c, "") != "":
290                 self.error("Mixed use of tabs and spaces")
291         # Figure out how many indents/dedents to do
292         current_level = self.current_level()
293         new_level = len(text)
294         #print "Changing indent level from", current_level, "to", new_level ###
295         if new_level == current_level:
296             return
297         elif new_level > current_level:
298             #print "...pushing level", new_level ###
299             self.indentation_stack.append(new_level)
300             self.produce('INDENT', '')
301         else:
302             while new_level < self.current_level():
303                 #print "...popping level", self.indentation_stack[-1] ###
304                 self.indentation_stack.pop()
305                 self.produce('DEDENT', '')
306             #print "...current level now", self.current_level() ###
307             if new_level != self.current_level():
308                 self.error("Inconsistent indentation")
309
310     def eof_action(self, text):
311         while len(self.indentation_stack) > 1:
312             self.produce('DEDENT', '')
313             self.indentation_stack.pop()
314         self.produce('EOF', '')
315
316     def next(self):
317         try:
318             sy, systring = self.read()
319         except UnrecognizedInput:
320             self.error("Unrecognized character")
321         if sy == 'IDENT' and systring in self.resword_dict:
322             sy = systring
323         self.sy = sy
324         self.systring = systring
325         if debug_scanner:
326             _, line, col = self.position()
327             if not self.systring or self.sy == self.systring:
328                 t = self.sy
329             else:
330                 t = "%s %s" % (self.sy, self.systring)
331             print("--- %3d %2d %s" % (line, col, t))
332     
333     def put_back(self, sy, systring):
334         self.unread(self.sy, self.systring)
335         self.sy = sy
336         self.systring = systring
337     
338     def unread(self, token, value):
339         # This method should be added to Plex
340         self.queue.insert(0, (token, value))
341     
342     def add_type_name(self, name):
343         self.type_names[name] = 1
344     
345     def looking_at_type_name(self):
346         return self.sy == 'IDENT' and self.systring in self.type_names
347     
348     def error(self, message, pos = None):
349         if pos is None:
350             pos = self.position()
351         if self.sy == 'INDENT':
352             error(pos, "Possible inconsistent indentation")
353         raise error(pos, message)
354         
355     def expect(self, what, message = None):
356         if self.sy == what:
357             self.next()
358         else:
359             self.expected(what, message)
360     
361     def expect_keyword(self, what, message = None):
362         if self.sy == 'IDENT' and self.systring == what:
363             self.next()
364         else:
365             self.expected(what, message)
366     
367     def expected(self, what, message):
368         if message:
369             self.error(message)
370         else:
371             self.error("Expected '%s'" % what)
372         
373     def expect_indent(self):
374         self.expect('INDENT',
375             "Expected an increase in indentation level")
376
377     def expect_dedent(self):
378         self.expect('DEDENT',
379             "Expected a decrease in indentation level")
380
381     def expect_newline(self, message = "Expected a newline"):
382         # Expect either a newline or end of file
383         if self.sy != 'EOF':
384             self.expect('NEWLINE', message)