Added tip-to-root/root-to-tip flexibility to hooke.util.graph
authorW. Trevor King <wking@drexel.edu>
Fri, 7 May 2010 16:13:39 +0000 (12:13 -0400)
committerW. Trevor King <wking@drexel.edu>
Fri, 7 May 2010 16:13:39 +0000 (12:13 -0400)
The old implementation only did tip-to-root.  The new default is
root-to-tip.

hooke/util/graph.py

index 9e5ee95..c2302e0 100644 (file)
@@ -41,12 +41,26 @@ class Node (list):
 
     We can find the cycle path.
 
-    >>> print d.path(d)
+    >>> print d.parent_path(d)
     [b, a, d]
+
+    After a run through :meth:`Graph.set_children`, we can also
+    list children
+
+    >>> g = Graph([a, b, c, d])
+    >>> g.set_children()
+    >>> print a.children
+    [b, c]
+
+    And descendents.
+
+    >>> print [node for node in a.descendents(depth_first=True)]
+    [b, d, a, c]
     """
     def __init__(self, parents=[], data=None):
         list.__init__(self, parents)
         self.data = data
+        self.children = []
 
     def __cmp__(self, other):
         return -cmp(self.data, other.data)
@@ -68,15 +82,16 @@ class Node (list):
     def __repr__(self):
         return self.__str__()
 
-    def ancestors(self, depth_first=False):
-        """Generate all ancestors.
+    def traverse(self, next, depth_first=False):
+        """Iterate through all nodes returned by `next(node)`.
 
-        Will only yield each ancestor once, even in the case of
+        Will only yield each traversed node once, even in the case of
         diamond inheritance, etc.
 
-        Breadth first by default.
+        Breadth first by default.  Set `depth_first==True` for a
+        depth first search.
         """
-        stack = list(self)
+        stack = list(next(self))
         popped = []
         while len(stack) > 0:
             node = stack.pop(0)
@@ -85,55 +100,105 @@ class Node (list):
             popped.append(node)
             yield node
             if depth_first == True:
-                for parent in reversed(node):
-                    stack.insert(0, parent)
+                for target in reversed(next(node)):
+                    stack.insert(0, target)
             else:
-                stack.extend(node)
-    def path(self, node):
-        """Return the shortest list of parents connecting `self` to
-        `node`.
+                stack.extend(next(node))
+
+    def ancestors(self, depth_first=False):
+        """Generate all ancestors.
+
+        This is a small wrapper around :meth:`traverse`.
+        """
+        next = lambda node : node  # list node's parents
+        for node in self.traverse(next=next, depth_first=depth_first):
+            yield node
+
+    def descendents(self, depth_first=False):
+        """Generate all decendents.
+
+        This is a small wrapper around :meth:`traverse`.
+        """
+        next = lambda node : node.children
+        for node in self.traverse(next=next, depth_first=depth_first):
+            yield node
+
+    def path(self, next, node):
+        """Return the shortest list of nodes connecting `self` to
+        `node` via `next(node)`.
         """
         if node in self:
             return [node]
-        stack = list(self)
+        stack = list(next(self))
         paths = dict((id(n), [n]) for n in stack)
         while len(stack) > 0:
             n = stack.pop(0)
             n_path = paths[id(n)]
-            for parent in n:
-                if id(parent) in paths:
+            for target in next(n):
+                if id(target) in paths:
                     continue
-                p_path = list(n_path)
-                p_path.append(parent)
-                if id(parent) == id(node):
-                    return p_path
-                stack.append(parent)
-                paths[id(parent)] = p_path
-
-class _GraphRowEntry (list):
-    """A node in the graph with outstanding children.
-
-    `columns` is a list of the indexes of outstanding columns which
-    will eventually attach to those children.
-    """
-    def __init__(self, node, columns=[]):
-        list.__init__(self, columns)
-        self.node = node
+                t_path = list(n_path)
+                t_path.append(target)
+                if id(target) == id(node):
+                    return t_path
+                stack.append(target)
+                paths[id(target)] = t_path
+
+    def parent_path(self, node):
+        """Return the shortest list of nodes connecting `self` to
+        its parent `node`.
+
+        This is a small wrapper around :meth:`path`.
+        """
+        next = lambda node : node  # list node's parents
+        return self.path(next, node)
 
-class GraphRow (list):
-    """A graph row generator.
+    def child_path(self, node):
+        """Return the shortest list of nodes connecting `self` to
+        its child `node`.
 
-    Contains a list of :class:`_GraphRowEntry`\s (added with
-    :meth:`insert`), and generates the current line with
-    :meth:`__str__`.  You should generate a graph with repeated
-    calls::
+        This is a small wrapper around :meth:`path`.
+        """
+        next = lambda node : node.children
+        return self.path(next, node)
 
-        g = GraphRow()
-        for node in nodes:
-            g.insert(node)
-            print str(g)
 
-    The string rendering can be cusomized by changing `g.chars`.
+class GraphRow (object):
+    """Represent the state of a single row in a graph.
+
+    Generated by :class:`GraphRowGenerator`, printed with
+    :class:`GraphRowPrinter`.
+
+    :attr:`node` is the active node and :attr:`active` is its branch
+    column index.  :attr:`width` is the number of current branch
+    columns.
+
+    :attr:`born`, :attr:`dead`, and :attr:`inherited` are lists of
+    `(branch_column_index, target_node)` pairs.  `dead` lists nodes
+    from previous rows whose branches complete on this row,
+    `inherited` lists nodes from previous rows whose branches continue
+    through this row, and `born` list nodes whose branches start on
+    this row.
+    """
+    def __init__(self, node, active=-1, dead=None, inherited=None, born=None,
+                 tip_to_root=False):
+        self.node = node
+        self.active = active
+        if dead == None:
+            dead = []
+        self.dead = dead
+        if inherited == None:
+            inherited = []
+        self.inherited = inherited
+        if born == None:
+            born = []
+        self.born = born
+        self.tip_to_root = tip_to_root
+
+class GraphRowPrinter (object):
+    """Customizable printing for :class:`GraphRow`.
+
+    The string rendering can be customized by changing :attr:`chars`.
     Control over the branch columns:
 
     ================= ===========================================
@@ -149,116 +214,83 @@ class GraphRow (list):
     `sep: split/join` separate split/join branch columns from the active node
     `sep: default`    separate all remaining branch columns
     ================= =======================================================
-
-    For the split/join branch columns, "born" and "died" are defined
-    from the point of view of `GraphRow`.  Because trees are printed
-    tip-to-root (:meth:`Graph.topological_sort`), "born" runs are
-    determined by the active node's parents (which have not yet been
-    printed), and "dead" runs by its children (which have been
-    printed).
     """
-    def __init__(self):
-        list.__init__(self)
-        self._width = 0
-        self._last_width = 0
-        self._old_dead = []
-        self._next_old_dead = []
-        self.chars = {
-            'node: both tip and root': 'b',
-            'node: root': 'r',
-            'node: tip': 't',
-            'node: regular': '*',
-            'split/join: born and died left of active': '>',
-            'split/join: born and died right of active': '<',
-            'split/join: born left of active': '/',
-            'split/join: born right of active': '\\',
-            'split/join: died left of active': '\\',
-            'split/join: died right of active': '/',
-            'run: blank': ' ',
-            'run: connected': '|',
-            'sep: split/join': '-',
-            'sep: default': ' ',               
-            }
-    def insert(self, node):
-        """Add a new node to the graph.
-
-        Nodes should be inserted in tip-to-root topological order
-        (i.e. node must be inserted before any of its parents).
-        """
-        self._node = node
-        self._last_width = self._width
-        self._old_dead = self._next_old_dead
-        self._born = []
-        self._dead = []
-        if len(self) > 0 and len(self[-1].node) == 0:
-            # remove last entry's active column if it has no parents
-            self[-1].pop(0) # (it' already in _old_dead via _next_old_dead)
-        self._children = [(i,e) for i,e in enumerate(self) if node in e.node]
-        for i,e in self._children:
-            self._dead.append(e.pop(0)) # TODO: sharing branches vs. current 1 per child
-        exhausted = [] # _GraphRowEntry\s with no futher parents
-        for e in self:
-            if len(e) == 0:
-                exhausted.append(e)
-        for e in exhausted:
-            self.remove(e)
-        self._dead.sort()
-        remaining = max(len(node), 1)
-        slots = sorted(self._old_dead + self._dead)
-        self._born.extend(slots[:remaining])
-        remaining -= len(slots)
-        self._born.extend(range(self._last_width, self._last_width+remaining))
-        self._active = self._born[0]
-        self.append(_GraphRowEntry(node, columns=self._born))
-        self._width = max([e[-1] for e in self])+1
-        self._next_old_dead = [i for i in slots[len(self._born):] if i < self._width]
-        if len(node) == 0:
-            self._next_old_dead.insert(0, self._active)
-
-    def __str__(self):
-        """Render the current line of the graph as a string.
+    def __init__(self, chars=None):
+        if chars == None:
+            chars = {
+                'node: both tip and root': 'b',
+                'node: root': 'r',
+                'node: tip': 't',
+                'node: regular': '*',
+                'split/join: born and died left of active': '>',
+                'split/join: born and died right of active': '<',
+                'split/join: born left of active': '/',
+                'split/join: born right of active': '\\',
+                'split/join: died left of active': '\\',
+                'split/join: died right of active': '/',
+                'run: blank': ' ',
+                'run: connected': '|',
+                'sep: split/join': '-',
+                'sep: default': ' ',               
+                }
+        self.chars = chars
+    def __call__(self, graph_row):
+        """Render the :class:`GraphRow` instance `graph_row` as a
+        string.
         """
-        left_connects = [self._born[0]]
-        right_connects = [self._born[-1]]
-        if len(self._dead) > 0:
-            left_connects.append(self._dead[0])
-            right_connects.append(self._dead[-1])
-        left_connect = min(left_connects)
-        right_connect = max(right_connects)
+        dead = [i for i,node in graph_row.dead]
+        inherited = [i for i,node in graph_row.inherited]
+        born = [i for i,node in graph_row.born]
+        right_connect = max(graph_row.active,
+                            max(born+[-1]), # +[-1] protects against empty born
+                            max(dead+[-1]))
+        left_connect = min(graph_row.active,
+                           min(born+[right_connect]),
+                           min(dead+[right_connect]))
+        max_col = max(right_connect, max(inherited+[-1]))
         string = []
-        for i in range(max(self._width, self._last_width)):
-            if i == self._active:
-                if len(self._node) == 0:
-                    if len(self._children) == 0:
+        for i in range(max_col + 1):
+            # Get char, the node or branch column character.
+            if i == graph_row.active:
+                if len(born) == 0:
+                    if len(dead) == 0:
                         char = self.chars['node: both tip and root']
-                    else:
+                    elif graph_row.tip_to_root == True:
+                        # The dead are children
+                        char = self.chars['node: root']
+                    else: # The dead are parents
+                        char = self.chars['node: tip']
+                elif len(dead) == 0:
+                    if graph_row.tip_to_root == True:
+                        # The born are parents
+                        char = self.chars['node: tip']
+                    else: # The born are children
                         char = self.chars['node: root']
-                elif len(self._children) == 0:
-                    char = self.chars['node: tip']
                 else:
                     char = self.chars['node: regular']
-            elif i in self._born:
-                if i in self._dead:
-                    if i < self._active:
+            elif i in born:
+                if i in dead: # born and died
+                    if i < graph_row.active:
                         char = self.chars[
                             'split/join: born and died left of active']
                     else:
                         char = self.chars[
                             'split/join: born and died right of active']
-                else:
-                    if i < self._active:
+                else: # just born
+                    if i < graph_row.active:
                         char = self.chars['split/join: born left of active']
                     else:
                         char = self.chars['split/join: born right of active']
-            elif i in self._dead:
-                if i < self._active:
+            elif i in dead: # just died
+                if i < graph_row.active:
                     char = self.chars['split/join: died left of active']
                 else:
                     char = self.chars['split/join: died right of active']
-            elif i in self._old_dead:
-                char = self.chars['run: blank']
-            else:
+            elif i in inherited:
                 char = self.chars['run: connected']
+            else:
+                char = self.chars['run: blank']
+            # Get sep, the separation character.
             if i < left_connect or i >= right_connect:
                 sep = self.chars['sep: default']
             else:
@@ -268,6 +300,72 @@ class GraphRow (list):
             string.extend([char, sep])
         return ''.join(string)[:-1] # [-1] strips final sep
 
+class GraphRowGenerator (list):
+    """A :class:`GraphRow` generator.
+
+    Contains a list of :class:`GraphRow`\s (added with
+    :meth:`insert`(:class:`hooke.util.graph.Node`)).  You should
+    generate a graph with repeated calls::
+
+        tip_to_root = True
+        g = GraphRowGenerator(tip_to_root=tip_to_root)
+        p = GraphRowPrinter(tip_to_root=tip_to_root)
+        for node in nodes:
+            g.insert(node)
+            print p(g[-1])
+
+    For the split/join branch columns, "born" and "dead" are defined
+    from the point of view of `GraphRow`.  For root-to-tip ordering
+    (`tip_to_root==False`, the default), "born" runs are determined
+    by the active node's children (which have yet to be printed) and
+    "dead" runs by its parents (which have been printed).  If
+    `tip_to_root==True`, swap "children" and "parents" in the above
+    sentence.
+    """
+    def __init__(self, tip_to_root=False):
+        list.__init__(self)
+        self.tip_to_root = tip_to_root
+    def insert(self, node):
+        """Add a new node to the graph.
+
+        If `tip_to_root==True`, nodes should be inserted in
+        tip-to-root topological order (i.e. node must be inserted
+        before any of its parents).
+
+        If `tip_to_root==False`, nodes must be inserted before any
+        of their children.
+        """
+        if len(self) == 0:
+            previous = GraphRow(node=None, active=-1)
+        else:
+            previous = self[-1]
+        current = GraphRow(node=node, active=-1, tip_to_root=self.tip_to_root)
+        if self.tip_to_root == True: # children die, parents born
+            dead_nodes = list(current.node.children)
+            born_nodes = list(current.node)
+        else: # root-to-tip: parents die, children born
+            dead_nodes = list(current.node)
+            born_nodes = list(current.node.children)
+        # Mark the dead and inherited branch columns
+        for i,node in previous.inherited + previous.born:
+            if node in dead_nodes or node == current.node:
+                current.dead.append((i, node))
+            else:
+                current.inherited.append((i, node))
+        # Place born and active branch columns
+        num_born = max(len(born_nodes), 1) # 1 to ensure slot for active node
+        remaining = num_born # number of nodes left to place
+        used_slots = [i for i,n in current.inherited]
+        old_max = max(used_slots+[-1]) # +[-1] in case used_slots is empty
+        slots = sorted([i for i in range(old_max+1) if i not in used_slots])
+        remaining -= len(slots)
+        slots.extend(range(old_max+1, old_max+1+remaining))
+        current.active = slots[0]
+        current.born = zip(slots, born_nodes)
+        # TODO: sharing branches vs. current 1 per child
+        self.append(current)
+
+
 class Graph (list):
     """A directed, acyclic graph structure.
 
@@ -289,18 +387,28 @@ class Graph (list):
     >>> n.h.append(n.e)
     >>> n.i.extend([n.f, n.g, n.h])
     >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
-    >>> g.topological_sort()
+    >>> g.topological_sort(tip_to_root=True)
     >>> print [node for node in g]
     [i, h, g, f, e, d, c, b, a]
     >>> print g.ascii_graph()
-    t-\-\ i
+    r-\-\ a
+    | | * b
+    | * | c
+    * | | d
+    *-<-< e
+    | | * f
+    | * | g
     * | | h
+    t-/-/ i
+    >>> print g.ascii_graph(tip_to_root=True)
+    t-\-\ i
+    | | * h
     | * | g
-    | | * f
+    * | | f
     *-<-< e
-    * | | d
+    | | * d
     | * | c
-    | | * b
+    * | | b
     r-/-/ a
 
     >>> for char in ['a','b','c','d','e','f','g','h']:
@@ -313,14 +421,14 @@ class Graph (list):
     >>> n.g.extend([n.e, n.f])
     >>> n.h.extend([n.c, n.g])
     >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h])
-    >>> print g.ascii_graph()
+    >>> print g.ascii_graph(tip_to_root=True)
     t-\ h
-    *-|-\ g
-    *-|-|-\ f
-    | | * | e
-    *-|-/ | d
-    | *   | c
-    | *---/ b
+    | *-\ g
+    | | *-\ f
+    | * | | e
+    | *-|-/ d
+    * | | c
+    *-|-/ b
     r-/ a
 
     >>> for char in ['a','b','c','d','e','f','g','h','i']:
@@ -329,16 +437,16 @@ class Graph (list):
     ...     nx = getattr(n, char)
     ...     n.i.append(nx)
     >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
-    >>> print g.ascii_graph()
+    >>> print g.ascii_graph(tip_to_root=True)
     t-\-\-\-\-\-\-\ i
-    r | | | | | | | h
-    r-/ | | | | | | g
-    r---/ | | | | | f
-    r-----/ | | | | e
-    r-------/ | | | d
-    r---------/ | | c
-    r-----------/ | b
-    r-------------/ a
+    | | | | | | | r h
+    | | | | | | r g
+    | | | | | r f
+    | | | | r e
+    | | | r d
+    | | r c
+    | r b
+    r a
 
     >>> for char in ['a','b','c','d','e','f','g','h','i']:
     ...     setattr(n, char, Node(data=char))
@@ -346,7 +454,7 @@ class Graph (list):
     ...     nx = getattr(n, char)
     ...     nx.append(n.a)
     >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
-    >>> print g.ascii_graph()
+    >>> print g.ascii_graph(tip_to_root=True)
     t i
     | t h
     | | t g
@@ -366,16 +474,16 @@ class Graph (list):
     >>> n.h.extend([n.a, n.c, n.d, n.g])
     >>> n.i.extend([n.a, n.b, n.c, n.g])
     >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
-    >>> print g.ascii_graph()
+    >>> print g.ascii_graph(tip_to_root=True)
     t-\-\-\ i
     | | | | t-\-\-\ h
-    *-|-|-|-<-|-|-|-\ g
-    *-|-|-|-|-|-|-|-|-\-\ f
-    *-|-|-|-< | | | | | | e
-    | | | | | *-|-|-|-/ | d
-    r-/-|-|-|-|-/-|-|---/ c
-    r---/-|-|-|---|-/ b
-    r-----/-/-/---/ a
+    | | | *-|-|-|-<-\ g
+    | | | | | | | | *-\-\ f
+    | | | | | | | *-|-|-< e
+    | | | | | | *-|-|-/ | d
+    | | r-|-|-/-|-|-/---/ c
+    | r---/ |   | | b
+    r-------/---/-/ a
 
     Ok, enough pretty graphs ;).  Here's an example of cycle
     detection.
@@ -396,19 +504,29 @@ class Graph (list):
       b
       a
     """
-    def topological_sort(self):
+    def set_children(self):
+        """Fill out each node's :attr:`Node.children` list.
+        """
+        for node in self:
+            for parent in node:
+                if node not in parent.children:
+                    parent.children.append(node)
+
+    def topological_sort(self, tip_to_root=False):
         """Algorithm from git's commit.c `sort_in_topological_order`_.
 
-        Ordering is tip-to-root.
+        Default ordering is root-to-tip.  Set `tip_to_root=True` for
+        tip-to-root.
 
         In situations where topological sorting is ambiguous, the
-        nodes are sorted using the inverse (since we use tip-to-root
-        ordering) of their comparison functions (__cmp__, __lt__,
-        ...).
+        nodes are sorted using the node comparison functions (__cmp__,
+        __lt__, ...).  If `tip_to_root==True`, the inverse
+        comparison functions are used.
 
         .. _sort_in_topological_order:
           http://git.kernel.org/?p=git/git.git;a=blob;f=commit.c;h=731191e63bd39a89a8ea4ed0390c49d5605cdbed;hb=HEAD#l425
         """
+        # sort tip-to-root first, then reverse if neccessary
         for node in self:
             node._outcount = 0
         for node in self:
@@ -430,39 +548,43 @@ class Graph (list):
             raise CyclicGraphError(
                 '%d of %d elements not reachable from tips'
                 % (orig_len - final_len, orig_len))
+        if tip_to_root == False:
+            self.reverse()
 
     def check_for_cycles(self):
         """Check for cyclic references.
         """
         for node in self:
             if node in node.ancestors():
-                path = node.path(node)
+                path = node.parent_path(node)
                 raise CyclicGraphError(
                     'cycle detected:\n  %s'
                     % '\n  '.join([repr(node)]+[repr(node) for node in path]))
 
-    def graph_rows(self, graph_row=None):
+    def graph_rows(self, tip_to_root=False):
         """Generate a sequence of (`graph_row`, `node`) tuples.
 
-        Preforms :meth:`topological_sort` internally.
-
-        If `graph_row` is `None`, a new instance of :class:`GraphRow`
-        is used.
+        Preforms :meth:`set_children` and :meth:`topological_sort`
+        internally.
         """
-        if graph_row == None:
-            graph_row = GraphRow()
-        self.topological_sort()
+        graph_row_generator = GraphRowGenerator(tip_to_root=tip_to_root)
+        self.set_children()
+        self.topological_sort(tip_to_root=tip_to_root)
         for node in self:
-            graph_row.insert(node)
-            yield (graph_row, node)
+            graph_row_generator.insert(node)
+            yield (graph_row_generator[-1], node)
 
-    def ascii_graph(self, string_fn=str):
+    def ascii_graph(self, graph_row_printer=None, string_fn=str,
+                    tip_to_root=False):
         """Print an ascii graph on the left with `string_fn(node)` on
-        the right.
+        the right.  If `graph_row_printer` is `None`, a default
+        instance of :class:`GraphRowPrinter` will be used.
 
         See the class docstring for example output.
         """
+        if graph_row_printer == None:
+            graph_row_printer = GraphRowPrinter()
         graph = []
-        for row,node in self.graph_rows():
-            graph.append('%s %s' % (str(row), string_fn(node)))
+        for row,node in self.graph_rows(tip_to_root=tip_to_root):
+            graph.append('%s %s' % (graph_row_printer(row), string_fn(node)))
         return '\n'.join(graph)