From: Robert Bradshaw Date: Tue, 25 Mar 2008 09:19:38 +0000 (-0700) Subject: Basic control flow state processing X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=670cda8e459657d9c6b02504d2dd1414fc89d4b0;p=cython.git Basic control flow state processing --- diff --git a/Cython/Compiler/ControlFlow.py b/Cython/Compiler/ControlFlow.py new file mode 100644 index 00000000..7bce1de5 --- /dev/null +++ b/Cython/Compiler/ControlFlow.py @@ -0,0 +1,158 @@ +import bisect + +# This module keeps track of arbitrary "states" at any point of the code. +# A state is considered known if every path to the given point agrees on +# its state, otherwise it is None (i.e. unknown). + +# It might be useful to be able to "freeze" the set of states by pushing +# all state changes to the tips of the trees for fast reading. Perhaps this +# could be done on get_state, clearing the cache on set_state (assuming +# incoming is immutable). + + +class ControlFlow: + + def __init__(self, start_pos, incoming, parent): + self.start_pos = start_pos + self.incoming = incoming + if parent is None and incoming is not None: + parent = incoming.parent + self.parent = parent + self.tip = {} + self.end_pos = ((),) + + def start_branch(self, pos): + self.end_pos = pos + branch_point = BranchingControlFlow(pos, self) + if self.parent is not None: + self.parent.branches[-1] = branch_point + return branch_point.branches[0] + + def next_branch(self, pos): + self.end_pos = pos + return self.parent.new_branch(pos) + + def finish_branch(self, pos): + self.end_pos = pos + self.parent.end_pos = pos + return LinearControlFlow(pos, self.parent) + + def get_state(self, pos, item): + return self.get_pos_state(pos, item)[1] + + def get_pos_state(self, pos, item): + # do some caching + if pos > self.end_pos: + try: + return self.tip[item] + except KeyError: + self.tip[item] = pos_state = self._get_pos_state(pos, item) + return pos_state + else: + return self._get_pos_state(pos, item) + +class LinearControlFlow(ControlFlow): + + def __init__(self, start_pos=(), incoming=None, parent=None): + ControlFlow.__init__(self, start_pos, incoming, parent) + self.events = {} + + def set_state(self, pos, item, state): + if self.tip.has_key(item): + del self.tip[item] + if pos < self.start_pos: + if self.incoming is not None: + self.incoming.set_state(pos, item, state) + else: + if self.events.has_key(item): + event_list = self.events[item] + else: + event_list = [] + self.events[item] = event_list + bisect.insort(event_list, (pos, state)) + + + def _get_pos_state(self, pos, item): + if pos > self.start_pos: + if self.events.has_key(item): + event_list = self.events[item] + for event in event_list[::-1]: + if event[0] < pos: + return event + + if self.incoming is not None: + return self.incoming.get_pos_state(pos, item) + + else: + return None, None + + def to_string(self, indent, limit=None): + + if len(self.events) == 0: + s = indent + "[no state changes]" + + else: + all = [] + for item, event_list in self.events.items(): + for pos, state in event_list: + all.append((indent, pos, item, state)) + all.sort() + all = ["%s%s: %s <- %s" % data for data in all] + s = "\n".join(all) + if self.incoming is not limit and self.incoming is not None: + s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s) + return s + + +class BranchingControlFlow(ControlFlow): + + def __init__(self, start_pos, incoming, parent=None): + ControlFlow.__init__(self, start_pos, incoming, parent) + self.branches = [LinearControlFlow(start_pos, incoming, parent=self)] + self.branch_starts = [start_pos] + + def set_state(self, pos, item, state): + + if self.tip.has_key(item): + del self.tip[item] + + if pos < self.start_pos: + self.incoming.set_state(pos, item, state) + else: + for branch_pos, branch in zip(self.branch_starts[::-1], self.branches[::-1]): + if pos >= branch_pos: + branch.set_state(pos, item, state) + return + + def _get_pos_state(self, pos, item): + if pos <= self.start_pos: + return self.incoming.get_pos_state(pos, item) + elif pos < self.end_pos: + for branch_pos, branch in zip(self.branch_starts[::-1], self.branches[::-1]): + if pos >= branch_pos: + return branch.get_pos_state(pos, item) + else: + last_pos, last_state = self.branches[0].get_pos_state(pos, item) + if last_state is None: + return None, None + for branch in self.branches[1:]: + other_pos, other_state = branch.get_pos_state(pos, item) + if other_state is None or other_state != last_state: + return None, None + elif last_pos is not other_pos: + last_pos = max(last_pos, other_pos) + return last_pos, last_state + + def new_branch(self, pos): + self.branches.append(LinearControlFlow(pos, self.incoming, parent=self)) + self.branch_starts.append(pos) + return self.branches[-1] + + def to_string(self, indent, limit=None): + join = "\n%sor\n" % indent + s = join.join([branch.to_string(indent+" ", limit=self.incoming) for branch in self.branches]) + if self.incoming is not limit and self.incoming is not None: + s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s) + return s + + \ No newline at end of file diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index f681d87b..a7b0a33c 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -817,6 +817,7 @@ class NameNode(AtomicExprNode): self.entry = env.lookup_here(self.name) if not self.entry: self.entry = env.declare_var(self.name, py_object_type, self.pos) + env.control_flow.set_state(self.pos, (self.name, 'initalized'), True) if self.entry.is_declared_generic: self.result_ctype = py_object_type @@ -944,7 +945,13 @@ class NameNode(AtomicExprNode): self.result_code, namespace, self.entry.name, - code.error_goto_if_null(self.result_code, self.pos))) + code.error_goto_if_null(self.result_code, self.pos))) + elif entry.is_local: + assigned = entry.scope.control_flow.get_state(self.pos, (entry.name, 'initalized')) + if assigned is False: + error(self.pos, "local variable '%s' referenced before assignment" % entry.name) + elif assigned is None: + code.putln('/* check %s */' % entry.cname) def generate_assignment_code(self, rhs, code): #print "NameNode.generate_assignment_code:", self.name ### diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index 41466882..01a32844 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -14,6 +14,7 @@ from Symtab import ModuleScope, LocalScope, \ StructOrUnionScope, PyClassScope, CClassScope from Cython.Utils import open_new_file, replace_suffix import Options +import ControlFlow from DebugFlags import debug_disposal_code @@ -119,15 +120,19 @@ class Node: # - # There are 3 phases of parse tree processing, applied in order to + # There are 4 phases of parse tree processing, applied in order to # all the statements in a given scope-block: # + # (0) analyse_control_flow + # Create the control flow tree into which state can be asserted and + # queried. + # # (1) analyse_declarations # Make symbol table entries for all declarations at the current # level, both explicit (def, cdef, etc.) and implicit (assignment # to an otherwise undeclared name). # - # (2) analyse_expressions + # (2) analyse_expressions # Determine the result types of expressions and fill in the # 'type' attribute of each ExprNode. Insert coercion nodes into the # tree where needed to convert to and from Python objects. @@ -135,12 +140,15 @@ class Node: # in the 'result_code' attribute of each ExprNode with a C code # fragment. # - # (3) generate_code + # (3) generate_code # Emit C code for all declarations, statements and expressions. # Recursively applies the 3 processing phases to the bodies of # functions. # + def analyse_control_flow(self, env): + pass + def analyse_declarations(self, env): pass @@ -156,6 +164,26 @@ class Node: # mro does the wrong thing if isinstance(self, BlockNode): self.body.annotate(code) + + def end_pos(self): + try: + return self._end_pos + except AttributeError: + children = [acc.get() for acc in self.get_child_accessors()] + if len(children) == 0: + self._end_pos = self.pos + else: + # Sometimes lists, sometimes nodes + flat = [] + for child in children: + if child is None: + pass + elif isinstance(child, list): + flat += child + else: + flat.append(child) + self._end_pos = max([child.end_pos() for child in flat]) + return self._end_pos class BlockNode: @@ -215,6 +243,10 @@ class StatListNode(Node): child_attrs = ["stats"] + def analyse_control_flow(self, env): + for stat in self.stats: + stat.analyse_control_flow(env) + def analyse_declarations(self, env): #print "StatListNode.analyse_declarations" ### for stat in self.stats: @@ -311,11 +343,12 @@ class CDeclaratorNode(Node): class CNameDeclaratorNode(CDeclaratorNode): - # name string The Pyrex name being declared - # cname string or None C name, if specified + # name string The Pyrex name being declared + # cname string or None C name, if specified + # rhs ExprNode or None the value assigned on declaration child_attrs = [] - + def analyse(self, base_type, env): self.type = base_type return self, base_type @@ -323,6 +356,7 @@ class CNameDeclaratorNode(CDeclaratorNode): def analyse_expressions(self, env): self.entry = env.lookup(self.name) if self.rhs is not None: + env.control_flow.set_state(self.rhs.end_pos(), (self.entry.name, 'initalized'), True) self.entry.used = 1 if self.type.is_pyobject: self.entry.init_to_none = False @@ -770,6 +804,7 @@ class FuncDefNode(StatNode, BlockNode): code.init_labels() self.declare_arguments(lenv) transforms.run('before_analyse_function', self, env=env, lenv=lenv, genv=genv) + self.body.analyse_control_flow(lenv) self.body.analyse_declarations(lenv) self.body.analyse_expressions(lenv) transforms.run('after_analyse_function', self, env=env, lenv=lenv, genv=genv) @@ -865,7 +900,9 @@ class FuncDefNode(StatNode, BlockNode): # ----- Return cleanup code.put_label(code.return_label) code.put_var_decrefs(lenv.var_entries, used_only = 1) - code.put_var_decrefs(lenv.arg_entries) + for entry in lenv.arg_entries: +# if len(entry.assignments) > 0: + code.put_var_decref(entry) self.put_stararg_decrefs(code) if acquire_gil: code.putln("PyGILState_Release(_save);") @@ -894,7 +931,8 @@ class FuncDefNode(StatNode, BlockNode): # This is necessary, because if the argument is # assigned to, it will be decrefed. for entry in env.arg_entries: - code.put_var_incref(entry) +# if len(entry.assignments) > 0: + code.put_var_incref(entry) def generate_optarg_wrapper_function(self, env, code): pass @@ -2714,6 +2752,15 @@ class IfStatNode(StatNode): child_attrs = ["if_clauses", "else_clause"] + def analyse_control_flow(self, env): + env.start_branching(self.pos) + for if_clause in self.if_clauses: + if_clause.analyse_control_flow(env) + env.next_branch(if_clause.end_pos()) + if self.else_clause: + self.else_clause.analyse_control_flow(env) + env.finish_branching(self.end_pos()) + def analyse_declarations(self, env): for if_clause in self.if_clauses: if_clause.analyse_declarations(env) @@ -2752,6 +2799,9 @@ class IfClauseNode(Node): child_attrs = ["condition", "body"] + def analyse_control_flow(self, env): + self.body.analyse_control_flow(env) + def analyse_declarations(self, env): self.condition.analyse_declarations(env) self.body.analyse_declarations(env) @@ -2779,8 +2829,19 @@ class IfClauseNode(Node): self.condition.annotate(code) self.body.annotate(code) + +class LoopNode: + + def analyse_control_flow(self, env): + env.start_branching(self.pos) + self.body.analyse_control_flow(env) + env.next_branch(self.body.end_pos()) + if self.else_clause: + self.else_clause.analyse_control_flow(env) + env.finish_branching(self.end_pos()) + -class WhileStatNode(StatNode): +class WhileStatNode(LoopNode, StatNode): # while statement # # condition ExprNode @@ -2788,7 +2849,7 @@ class WhileStatNode(StatNode): # else_clause StatNode child_attrs = ["condition", "body", "else_clause"] - + def analyse_declarations(self, env): self.body.analyse_declarations(env) if self.else_clause: @@ -2835,7 +2896,7 @@ def ForStatNode(pos, **kw): else: return ForFromStatNode(pos, **kw) -class ForInStatNode(StatNode): +class ForInStatNode(LoopNode, StatNode): # for statement # # target ExprNode @@ -2943,7 +3004,7 @@ class ForInStatNode(StatNode): self.item.annotate(code) -class ForFromStatNode(StatNode): +class ForFromStatNode(LoopNode, StatNode): # for name from expr rel name rel expr # # target NameNode @@ -2962,12 +3023,6 @@ class ForFromStatNode(StatNode): # py_loopvar_node PyTempNode or None child_attrs = ["target", "bound1", "bound2", "step", "body", "else_clause", "py_loopvar_node"] - def analyse_declarations(self, env): - self.target.analyse_target_declaration(env) - self.body.analyse_declarations(env) - if self.else_clause: - self.else_clause.analyse_declarations(env) - def analyse_expressions(self, env): import ExprNodes self.target.analyse_target_types(env) @@ -3084,8 +3139,21 @@ class TryExceptStatNode(StatNode): # else_clause StatNode or None # cleanup_list [Entry] temps to clean up on error - child_attrs = ["body", "except_clauses", "else_clause", "cleanup_list"] + child_attrs = ["body", "except_clauses", "else_clause"] + def analyse_control_flow(self, env): + env.start_branching(self.pos) + self.body.analyse_control_flow(env) + for except_clause in self.except_clauses: + env.next_branch(except_clause.pos) + except_clause.analyse_control_flow(env) + if self.else_clause: + env.next_branch(self.except_clauses[-1].end_pos()) + # the else cause it executed only when the try clause finishes + env.control_flow.incoming = env.control_flow.parent.branches[0] + self.else_clause.analyse_control_flow(env) + env.finish_branching(self.end_pos()) + def analyse_declarations(self, env): self.body.analyse_declarations(env) for except_clause in self.except_clauses: @@ -3239,7 +3307,7 @@ class TryFinallyStatNode(StatNode): # exception on entry to the finally block and restore # it on exit. - child_attrs = ["body", "finally_clause", "cleanup_list"] + child_attrs = ["body", "finally_clause"] preserve_exception = 1 @@ -3248,6 +3316,13 @@ class TryFinallyStatNode(StatNode): # continue in the try block, since we have no problem # handling it. + def analyse_control_flow(self, env): + env.start_branching(self.pos) + self.body.analyse_control_flow(env) + env.next_branch(self.body.end_pos()) + self.finally_clause.analyse_control_flow(env) + env.finish_branching(self.end_pos()) + def analyse_declarations(self, env): self.body.analyse_declarations(env) self.finally_clause.analyse_declarations(env) diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 8227618c..273536b7 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -1162,10 +1162,13 @@ def p_try_statement(s): if s.sy == 'else': s.next() else_clause = p_suite(s) - return Nodes.TryExceptStatNode(pos, + body = Nodes.TryExceptStatNode(pos, body = body, except_clauses = except_clauses, else_clause = else_clause) - elif s.sy == 'finally': + if s.sy != 'finally': + return body + # try-except-finally is equivalent to nested try-except/try-finally + if s.sy == 'finally': s.next() finally_clause = p_suite(s) return Nodes.TryFinallyStatNode(pos, diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index ab48aae0..955d4459 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -3,6 +3,8 @@ # import re +import bisect + from Errors import warning, error, InternalError import Options import Naming @@ -12,6 +14,7 @@ import TypeSlots from TypeSlots import \ pyfunction_signature, pymethod_signature, \ get_special_method_signature, get_property_accessor_signature +import ControlFlow import __builtin__ identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$") @@ -43,6 +46,7 @@ class Entry: # setter_cname string C func for setting or deleting property # is_self_arg boolean Is the "self" arg of an exttype method # is_arg boolean Is the arg of a method + # is_local boolean Is a local variable # is_readonly boolean Can't be assigned to # func_cname string C func implementing Python func # pos position Source position where declared @@ -91,6 +95,7 @@ class Entry: setter_cname = None is_self_arg = 0 is_arg = 0 + is_local = 0 is_declared_generic = 0 is_readonly = 0 func_cname = None @@ -117,8 +122,8 @@ class Entry: self.type = type self.pos = pos self.init = init - - + + class Scope: # name string Unqualified name # outer_scope Scope or None Enclosing scope @@ -145,6 +150,7 @@ class Scope: # qualified_name string "modname" or "modname.classname" # pystring_entries [Entry] String const entries newly used as # Python strings in this scope + # control_flow ControlFlow Used for keeping track of environment state is_py_class_scope = 0 is_c_class_scope = 0 @@ -187,7 +193,17 @@ class Scope: self.num_to_entry = {} self.obj_to_entry = {} self.pystring_entries = [] + self.control_flow = ControlFlow.LinearControlFlow() + + def start_branching(self, pos): + self.control_flow = self.control_flow.start_branch(pos) + def next_branch(self, pos): + self.control_flow = self.control_flow.next_branch(pos) + + def finish_branching(self, pos): + self.control_flow = self.control_flow.finish_branch(pos) + def __str__(self): return "<%s %s>" % (self.__class__.__name__, self.qualified_name) @@ -233,6 +249,7 @@ class Scope: if name: entry.qualified_name = self.qualify_name(name) dict[name] = entry + entry.scope = self return entry def qualify_name(self, name): @@ -341,6 +358,7 @@ class Scope: entry = self.declare(name, cname, type, pos) entry.is_variable = 1 entry.visibility = visibility + self.control_flow.set_state((), (name, 'initalized'), False) return entry def declare_builtin(self, name, pos): @@ -1025,6 +1043,7 @@ class LocalScope(Scope): entry = Scope.declare_var(self, name, type, pos, cname, visibility, is_cdef) entry.init_to_none = type.is_pyobject + entry.is_local = 1 self.var_entries.append(entry) return entry