Basic control flow state processing
authorRobert Bradshaw <robertwb@math.washington.edu>
Tue, 25 Mar 2008 09:19:38 +0000 (02:19 -0700)
committerRobert Bradshaw <robertwb@math.washington.edu>
Tue, 25 Mar 2008 09:19:38 +0000 (02:19 -0700)
Cython/Compiler/ControlFlow.py [new file with mode: 0644]
Cython/Compiler/ExprNodes.py
Cython/Compiler/Nodes.py
Cython/Compiler/Parsing.py
Cython/Compiler/Symtab.py

diff --git a/Cython/Compiler/ControlFlow.py b/Cython/Compiler/ControlFlow.py
new file mode 100644 (file)
index 0000000..7bce1de
--- /dev/null
@@ -0,0 +1,158 @@
+import bisect
+
+# This module keeps track of arbitrary "states" at any point of the code. 
+# A state is considered known if every path to the given point agrees on
+# its state, otherwise it is None (i.e. unknown). 
+
+# It might be useful to be able to "freeze" the set of states by pushing 
+# all state changes to the tips of the trees for fast reading. Perhaps this
+# could be done on get_state, clearing the cache on set_state (assuming 
+# incoming is immutable). 
+
+
+class ControlFlow:
+
+    def __init__(self, start_pos, incoming, parent):
+        self.start_pos = start_pos
+        self.incoming = incoming
+        if parent is None and incoming is not None:
+            parent = incoming.parent
+        self.parent = parent
+        self.tip = {}
+        self.end_pos = ((),)
+        
+    def start_branch(self, pos):
+        self.end_pos = pos
+        branch_point = BranchingControlFlow(pos, self)
+        if self.parent is not None:
+            self.parent.branches[-1] = branch_point
+        return branch_point.branches[0]
+    
+    def next_branch(self, pos):
+        self.end_pos = pos
+        return self.parent.new_branch(pos)
+        
+    def finish_branch(self, pos):
+        self.end_pos = pos
+        self.parent.end_pos = pos
+        return LinearControlFlow(pos, self.parent)
+        
+    def get_state(self, pos, item):
+        return self.get_pos_state(pos, item)[1]
+        
+    def get_pos_state(self, pos, item):
+        # do some caching
+        if pos > self.end_pos:
+            try:
+                return self.tip[item]
+            except KeyError:
+                self.tip[item] = pos_state = self._get_pos_state(pos, item)
+                return pos_state
+        else:
+            return self._get_pos_state(pos, item)
+        
+class LinearControlFlow(ControlFlow):
+
+    def __init__(self, start_pos=(), incoming=None, parent=None):
+        ControlFlow.__init__(self, start_pos, incoming, parent)
+        self.events = {}
+            
+    def set_state(self, pos, item, state):
+        if self.tip.has_key(item):
+            del self.tip[item]
+        if pos < self.start_pos:
+            if self.incoming is not None:
+                self.incoming.set_state(pos, item, state)
+        else:
+            if self.events.has_key(item):
+                event_list = self.events[item]
+            else:
+                event_list = []
+                self.events[item] = event_list
+            bisect.insort(event_list, (pos, state))
+            
+        
+    def _get_pos_state(self, pos, item):
+        if pos > self.start_pos:
+            if self.events.has_key(item):
+                event_list = self.events[item]
+                for event in event_list[::-1]:
+                    if event[0] < pos:
+                        return event
+                        
+        if self.incoming is not None:
+            return self.incoming.get_pos_state(pos, item)
+        
+        else:
+            return None, None
+            
+    def to_string(self, indent, limit=None):
+    
+        if len(self.events) == 0:
+            s = indent + "[no state changes]"
+            
+        else:
+            all = []
+            for item, event_list in self.events.items():
+                for pos, state in event_list:
+                    all.append((indent, pos, item, state))
+            all.sort()
+            all = ["%s%s: %s <- %s" % data for data in all]
+            s = "\n".join(all)
+        if self.incoming is not limit and self.incoming is not None:
+            s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s)
+        return s
+    
+    
+class BranchingControlFlow(ControlFlow):
+    
+    def __init__(self, start_pos, incoming, parent=None):
+        ControlFlow.__init__(self, start_pos, incoming, parent)
+        self.branches = [LinearControlFlow(start_pos, incoming, parent=self)]
+        self.branch_starts = [start_pos]
+        
+    def set_state(self, pos, item, state):
+    
+        if self.tip.has_key(item):
+            del self.tip[item]
+        
+        if pos < self.start_pos:
+            self.incoming.set_state(pos, item, state)
+        else:
+            for branch_pos, branch in zip(self.branch_starts[::-1], self.branches[::-1]):
+                if pos >= branch_pos:
+                    branch.set_state(pos, item, state)
+                    return
+    
+    def _get_pos_state(self, pos, item):
+        if pos <= self.start_pos:
+            return self.incoming.get_pos_state(pos, item)
+        elif pos < self.end_pos:
+            for branch_pos, branch in zip(self.branch_starts[::-1], self.branches[::-1]):
+                if pos >= branch_pos:
+                    return branch.get_pos_state(pos, item)
+        else:
+            last_pos, last_state = self.branches[0].get_pos_state(pos, item)
+            if last_state is None:
+                return None, None
+            for branch in self.branches[1:]:
+                other_pos, other_state = branch.get_pos_state(pos, item)
+                if other_state is None or other_state != last_state:
+                    return None, None
+                elif last_pos is not other_pos:
+                    last_pos = max(last_pos, other_pos)
+            return last_pos, last_state
+
+    def new_branch(self, pos):
+        self.branches.append(LinearControlFlow(pos, self.incoming, parent=self))
+        self.branch_starts.append(pos)
+        return self.branches[-1]
+
+    def to_string(self, indent, limit=None):
+        join = "\n%sor\n" % indent
+        s = join.join([branch.to_string(indent+"    ", limit=self.incoming) for branch in self.branches])
+        if self.incoming is not limit and self.incoming is not None:
+            s = "%s\n%s" % (self.incoming.to_string(indent, limit=limit), s)
+        return s
+        
+    
\ No newline at end of file
index f681d87bb2d64839889357f94ecebcda8fe5c7f7..a7b0a33cc2a68a1d3b04f0e565f2b152531b758e 100644 (file)
@@ -817,6 +817,7 @@ class NameNode(AtomicExprNode):
         self.entry = env.lookup_here(self.name)
         if not self.entry:
             self.entry = env.declare_var(self.name, py_object_type, self.pos)
+        env.control_flow.set_state(self.pos, (self.name, 'initalized'), True)
         if self.entry.is_declared_generic:
             self.result_ctype = py_object_type
     
@@ -944,7 +945,13 @@ class NameNode(AtomicExprNode):
                     self.result_code,
                     namespace, 
                     self.entry.name,
-                    code.error_goto_if_null(self.result_code, self.pos)))              
+                    code.error_goto_if_null(self.result_code, self.pos)))
+        elif entry.is_local:
+            assigned = entry.scope.control_flow.get_state(self.pos, (entry.name, 'initalized'))
+            if assigned is False:
+                error(self.pos, "local variable '%s' referenced before assignment" % entry.name)
+            elif assigned is None:
+                code.putln('/* check %s */' % entry.cname)
 
     def generate_assignment_code(self, rhs, code):
         #print "NameNode.generate_assignment_code:", self.name ###
index 4146688287f6b948579d354528dac38df2e0492f..01a3284474cbc3375c9c134c2e5caa51e4b8bc7e 100644 (file)
@@ -14,6 +14,7 @@ from Symtab import ModuleScope, LocalScope, \
     StructOrUnionScope, PyClassScope, CClassScope
 from Cython.Utils import open_new_file, replace_suffix
 import Options
+import ControlFlow
 
 from DebugFlags import debug_disposal_code
 
@@ -119,15 +120,19 @@ class Node:
     
     
     #
-    #  There are 3 phases of parse tree processing, applied in order to
+    #  There are 4 phases of parse tree processing, applied in order to
     #  all the statements in a given scope-block:
     #
+    #  (0) analyse_control_flow
+    #        Create the control flow tree into which state can be asserted and
+    #        queried.
+    #
     #  (1) analyse_declarations
     #        Make symbol table entries for all declarations at the current
     #        level, both explicit (def, cdef, etc.) and implicit (assignment
     #        to an otherwise undeclared name).
     #
-    #          (2)     analyse_expressions
+    #  (2) analyse_expressions
     #         Determine the result types of expressions and fill in the
     #         'type' attribute of each ExprNode. Insert coercion nodes into the
     #         tree where needed to convert to and from Python objects. 
@@ -135,12 +140,15 @@ class Node:
     #         in the 'result_code' attribute of each ExprNode with a C code
     #         fragment.
     #
-    #   (3) generate_code
+    #  (3) generate_code
     #         Emit C code for all declarations, statements and expressions.
     #         Recursively applies the 3 processing phases to the bodies of
     #         functions.
     #
     
+    def analyse_control_flow(self, env):
+        pass
+    
     def analyse_declarations(self, env):
         pass
     
@@ -156,6 +164,26 @@ class Node:
         # mro does the wrong thing
         if isinstance(self, BlockNode):
             self.body.annotate(code)
+            
+    def end_pos(self):
+        try:
+            return self._end_pos
+        except AttributeError:
+            children = [acc.get() for acc in self.get_child_accessors()]
+            if len(children) == 0:
+                self._end_pos = self.pos
+            else:
+                # Sometimes lists, sometimes nodes
+                flat = []
+                for child in children:
+                    if child is None:
+                        pass
+                    elif isinstance(child, list):
+                        flat += child
+                    else:
+                        flat.append(child)
+                self._end_pos = max([child.end_pos() for child in flat])
+            return self._end_pos
 
 
 class BlockNode:
@@ -215,6 +243,10 @@ class StatListNode(Node):
     
     child_attrs = ["stats"]
     
+    def analyse_control_flow(self, env):
+        for stat in self.stats:
+            stat.analyse_control_flow(env)
+
     def analyse_declarations(self, env):
         #print "StatListNode.analyse_declarations" ###
         for stat in self.stats:
@@ -311,11 +343,12 @@ class CDeclaratorNode(Node):
 
 
 class CNameDeclaratorNode(CDeclaratorNode):
-    #  name   string           The Pyrex name being declared
-    #  cname  string or None   C name, if specified
+    #  name   string             The Pyrex name being declared
+    #  cname  string or None     C name, if specified
+    #  rhs    ExprNode or None   the value assigned on declaration
     
     child_attrs = []
-
+    
     def analyse(self, base_type, env):
         self.type = base_type
         return self, base_type
@@ -323,6 +356,7 @@ class CNameDeclaratorNode(CDeclaratorNode):
     def analyse_expressions(self, env):
         self.entry = env.lookup(self.name)
         if self.rhs is not None:
+            env.control_flow.set_state(self.rhs.end_pos(), (self.entry.name, 'initalized'), True)
             self.entry.used = 1
             if self.type.is_pyobject:
                 self.entry.init_to_none = False
@@ -770,6 +804,7 @@ class FuncDefNode(StatNode, BlockNode):
         code.init_labels()
         self.declare_arguments(lenv)
         transforms.run('before_analyse_function', self, env=env, lenv=lenv, genv=genv)
+        self.body.analyse_control_flow(lenv)
         self.body.analyse_declarations(lenv)
         self.body.analyse_expressions(lenv)
         transforms.run('after_analyse_function', self, env=env, lenv=lenv, genv=genv)
@@ -865,7 +900,9 @@ class FuncDefNode(StatNode, BlockNode):
         # ----- Return cleanup
         code.put_label(code.return_label)
         code.put_var_decrefs(lenv.var_entries, used_only = 1)
-        code.put_var_decrefs(lenv.arg_entries)
+        for entry in lenv.arg_entries:
+#            if len(entry.assignments) > 0:
+                code.put_var_decref(entry)
         self.put_stararg_decrefs(code)
         if acquire_gil:
             code.putln("PyGILState_Release(_save);")
@@ -894,7 +931,8 @@ class FuncDefNode(StatNode, BlockNode):
         # This is necessary, because if the argument is
         # assigned to, it will be decrefed.
         for entry in env.arg_entries:
-            code.put_var_incref(entry)
+#            if len(entry.assignments) > 0:
+                code.put_var_incref(entry)
 
     def generate_optarg_wrapper_function(self, env, code):
         pass
@@ -2714,6 +2752,15 @@ class IfStatNode(StatNode):
 
     child_attrs = ["if_clauses", "else_clause"]
     
+    def analyse_control_flow(self, env):
+        env.start_branching(self.pos)
+        for if_clause in self.if_clauses:
+            if_clause.analyse_control_flow(env)
+            env.next_branch(if_clause.end_pos())
+        if self.else_clause:
+            self.else_clause.analyse_control_flow(env)
+        env.finish_branching(self.end_pos())
+
     def analyse_declarations(self, env):
         for if_clause in self.if_clauses:
             if_clause.analyse_declarations(env)
@@ -2752,6 +2799,9 @@ class IfClauseNode(Node):
     
     child_attrs = ["condition", "body"]
 
+    def analyse_control_flow(self, env):
+        self.body.analyse_control_flow(env)
+        
     def analyse_declarations(self, env):
         self.condition.analyse_declarations(env)
         self.body.analyse_declarations(env)
@@ -2779,8 +2829,19 @@ class IfClauseNode(Node):
         self.condition.annotate(code)
         self.body.annotate(code)
         
+        
+class LoopNode:
+    
+    def analyse_control_flow(self, env):
+        env.start_branching(self.pos)
+        self.body.analyse_control_flow(env)
+        env.next_branch(self.body.end_pos())
+        if self.else_clause:
+            self.else_clause.analyse_control_flow(env)
+        env.finish_branching(self.end_pos())
+
     
-class WhileStatNode(StatNode):
+class WhileStatNode(LoopNode, StatNode):
     #  while statement
     #
     #  condition    ExprNode
@@ -2788,7 +2849,7 @@ class WhileStatNode(StatNode):
     #  else_clause  StatNode
 
     child_attrs = ["condition", "body", "else_clause"]
-    
+
     def analyse_declarations(self, env):
         self.body.analyse_declarations(env)
         if self.else_clause:
@@ -2835,7 +2896,7 @@ def ForStatNode(pos, **kw):
     else:
         return ForFromStatNode(pos, **kw)
 
-class ForInStatNode(StatNode):
+class ForInStatNode(LoopNode, StatNode):
     #  for statement
     #
     #  target        ExprNode
@@ -2943,7 +3004,7 @@ class ForInStatNode(StatNode):
         self.item.annotate(code)
 
 
-class ForFromStatNode(StatNode):
+class ForFromStatNode(LoopNode, StatNode):
     #  for name from expr rel name rel expr
     #
     #  target        NameNode
@@ -2962,12 +3023,6 @@ class ForFromStatNode(StatNode):
     #  py_loopvar_node    PyTempNode or None
     child_attrs = ["target", "bound1", "bound2", "step", "body", "else_clause", "py_loopvar_node"]
     
-    def analyse_declarations(self, env):
-        self.target.analyse_target_declaration(env)
-        self.body.analyse_declarations(env)
-        if self.else_clause:
-            self.else_clause.analyse_declarations(env)
-
     def analyse_expressions(self, env):
         import ExprNodes
         self.target.analyse_target_types(env)
@@ -3084,8 +3139,21 @@ class TryExceptStatNode(StatNode):
     #  else_clause      StatNode or None
     #  cleanup_list     [Entry]            temps to clean up on error
 
-    child_attrs = ["body", "except_clauses", "else_clause", "cleanup_list"]
+    child_attrs = ["body", "except_clauses", "else_clause"]
     
+    def analyse_control_flow(self, env):
+        env.start_branching(self.pos)
+        self.body.analyse_control_flow(env)
+        for except_clause in self.except_clauses:
+            env.next_branch(except_clause.pos)
+            except_clause.analyse_control_flow(env)
+        if self.else_clause:
+            env.next_branch(self.except_clauses[-1].end_pos())
+            # the else cause it executed only when the try clause finishes
+            env.control_flow.incoming = env.control_flow.parent.branches[0]
+            self.else_clause.analyse_control_flow(env)
+        env.finish_branching(self.end_pos())
+
     def analyse_declarations(self, env):
         self.body.analyse_declarations(env)
         for except_clause in self.except_clauses:
@@ -3239,7 +3307,7 @@ class TryFinallyStatNode(StatNode):
     #  exception on entry to the finally block and restore
     #  it on exit.
 
-    child_attrs = ["body", "finally_clause", "cleanup_list"]
+    child_attrs = ["body", "finally_clause"]
     
     preserve_exception = 1
     
@@ -3248,6 +3316,13 @@ class TryFinallyStatNode(StatNode):
     # continue in the try block, since we have no problem
     # handling it.
     
+    def analyse_control_flow(self, env):
+        env.start_branching(self.pos)
+        self.body.analyse_control_flow(env)
+        env.next_branch(self.body.end_pos())
+        self.finally_clause.analyse_control_flow(env)
+        env.finish_branching(self.end_pos())
+
     def analyse_declarations(self, env):
         self.body.analyse_declarations(env)
         self.finally_clause.analyse_declarations(env)
index 8227618c3ced8d5683b7ba69f5158ae78b814219..273536b771cf1bd5453518cc1f4eafb9bae73e81 100644 (file)
@@ -1162,10 +1162,13 @@ def p_try_statement(s):
         if s.sy == 'else':
             s.next()
             else_clause = p_suite(s)
-        return Nodes.TryExceptStatNode(pos,
+        body = Nodes.TryExceptStatNode(pos,
             body = body, except_clauses = except_clauses,
             else_clause = else_clause)
-    elif s.sy == 'finally':
+        if s.sy != 'finally':
+            return body
+        # try-except-finally is equivalent to nested try-except/try-finally
+    if s.sy == 'finally':
         s.next()
         finally_clause = p_suite(s)
         return Nodes.TryFinallyStatNode(pos,
index ab48aae049c3406dfa076014f7ea2bcf8cba0b85..955d4459baf8b3516f64c4ffb9465fb66e1ff53b 100644 (file)
@@ -3,6 +3,8 @@
 #
 
 import re
+import bisect
+
 from Errors import warning, error, InternalError
 import Options
 import Naming
@@ -12,6 +14,7 @@ import TypeSlots
 from TypeSlots import \
     pyfunction_signature, pymethod_signature, \
     get_special_method_signature, get_property_accessor_signature
+import ControlFlow
 import __builtin__
 
 identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$")
@@ -43,6 +46,7 @@ class Entry:
     # setter_cname     string          C func for setting or deleting property
     # is_self_arg      boolean    Is the "self" arg of an exttype method
     # is_arg           boolean    Is the arg of a method
+    # is_local         boolean    Is a local variable
     # is_readonly      boolean    Can't be assigned to
     # func_cname       string     C func implementing Python func
     # pos              position   Source position where declared
@@ -91,6 +95,7 @@ class Entry:
     setter_cname = None
     is_self_arg = 0
     is_arg = 0
+    is_local = 0
     is_declared_generic = 0
     is_readonly = 0
     func_cname = None
@@ -117,8 +122,8 @@ class Entry:
         self.type = type
         self.pos = pos
         self.init = init
-
-
+        
+        
 class Scope:
     # name              string             Unqualified name
     # outer_scope       Scope or None      Enclosing scope
@@ -145,6 +150,7 @@ class Scope:
     # qualified_name    string             "modname" or "modname.classname"
     # pystring_entries  [Entry]            String const entries newly used as
     #                                        Python strings in this scope
+    # control_flow     ControlFlow  Used for keeping track of environment state
 
     is_py_class_scope = 0
     is_c_class_scope = 0
@@ -187,7 +193,17 @@ class Scope:
         self.num_to_entry = {}
         self.obj_to_entry = {}
         self.pystring_entries = []
+        self.control_flow = ControlFlow.LinearControlFlow()
+        
+    def start_branching(self, pos):
+        self.control_flow = self.control_flow.start_branch(pos)
     
+    def next_branch(self, pos):
+        self.control_flow = self.control_flow.next_branch(pos)
+        
+    def finish_branching(self, pos):
+        self.control_flow = self.control_flow.finish_branch(pos)
+        
     def __str__(self):
         return "<%s %s>" % (self.__class__.__name__, self.qualified_name)
     
@@ -233,6 +249,7 @@ class Scope:
         if name:
             entry.qualified_name = self.qualify_name(name)
             dict[name] = entry
+        entry.scope = self
         return entry
     
     def qualify_name(self, name):
@@ -341,6 +358,7 @@ class Scope:
         entry = self.declare(name, cname, type, pos)
         entry.is_variable = 1
         entry.visibility = visibility
+        self.control_flow.set_state((), (name, 'initalized'), False)
         return entry
         
     def declare_builtin(self, name, pos):
@@ -1025,6 +1043,7 @@ class LocalScope(Scope):
         entry = Scope.declare_var(self, name, type, pos, 
             cname, visibility, is_cdef)
         entry.init_to_none = type.is_pyobject
+        entry.is_local = 1
         self.var_entries.append(entry)
         return entry