Added the graph module I'd developed for Bugs-Everywhere
authorW. Trevor King <wking@drexel.edu>
Thu, 6 May 2010 12:37:17 +0000 (08:37 -0400)
committerW. Trevor King <wking@drexel.edu>
Thu, 6 May 2010 12:37:17 +0000 (08:37 -0400)
hooke/util/__init__.py [new file with mode: 0644]
hooke/util/graph.py [new file with mode: 0644]

diff --git a/hooke/util/__init__.py b/hooke/util/__init__.py
new file mode 100644 (file)
index 0000000..8b1bf74
--- /dev/null
@@ -0,0 +1,10 @@
+# COPYRIGHT
+
+"""The util module contains assorted utilities not directly associated
+with Hooke functionality.
+
+To facilitate faster loading, submodules are not imported by default.
+The available submodules are:
+
+* :mod:`hooke.util.graph`
+"""
diff --git a/hooke/util/graph.py b/hooke/util/graph.py
new file mode 100644 (file)
index 0000000..bf7432e
--- /dev/null
@@ -0,0 +1,466 @@
+# COPYRIGHT
+
+"""Define :class:`Graph`, a directed, acyclic graph structure.
+"""
+
+import bisect
+
+class CyclicGraphError (ValueError):
+    pass
+
+class Node (list):
+    """A node/element in a graph.
+
+    Contains a list of the node's parents, and stores the node's
+    `data`.
+
+    Examples
+    --------
+
+    >>> a = Node(data='a')
+    >>> b = Node(parents=[a], data='b')
+    >>> c = Node(parents=[a], data='c')
+    >>> d = Node(parents=[b, c], data='d')
+    >>> str(d)
+    'd'
+
+    We can list all of a node's ancestors.
+
+    >>> print [node for node in d.ancestors()]
+    [b, c, a]
+    >>> print [node for node in d.ancestors(depth_first=True)]
+    [b, a, c]
+
+    Ancestors works with cycles.
+
+    >>> a.append(d)
+    >>> print [node for node in d.ancestors()]
+    [b, c, a, d]
+
+    We can find the cycle path.
+
+    >>> print d.path(d)
+    [b, a, d]
+    """
+    def __init__(self, parents=[], data=None):
+        list.__init__(self, parents)
+        self.data = data
+
+    def __cmp__(self, other):
+        return -cmp(self.data, other.data)
+    def __eq__(self, other):
+        return self.__cmp__(other) == 0
+    def __ne__(self, other):
+        return self.__cmp__(other) != 0
+    def __lt__(self, other):
+        return self.__cmp__(other) < 0
+    def __gt__(self, other):
+        return self.__cmp__(other) > 0
+    def __le__(self, other):
+        return self.__cmp__(other) <= 0
+    def __ge__(self, other):
+        return self.__cmp__(other) >= 0
+
+    def __str__(self):
+        return str(self.data)
+    def __repr__(self):
+        return self.__str__()
+
+    def ancestors(self, depth_first=False):
+        """Generate all ancestors.
+
+        Will only yield each ancestor once, even in the case of
+        diamond inheritance, etc.
+
+        Breadth first by default.
+        """
+        stack = list(self)
+        popped = []
+        while len(stack) > 0:
+            node = stack.pop(0)
+            if node in popped:
+                continue
+            popped.append(node)
+            yield node
+            if depth_first == True:
+                for parent in reversed(node):
+                    stack.insert(0, parent)
+            else:
+                stack.extend(node)
+    def path(self, node):
+        """Return the shortest list of parents connecting `self` to
+        `node`.
+        """
+        if node in self:
+            return [node]
+        stack = list(self)
+        paths = dict((id(n), [n]) for n in stack)
+        while len(stack) > 0:
+            n = stack.pop(0)
+            n_path = paths[id(n)]
+            for parent in n:
+                if id(parent) in paths:
+                    continue
+                p_path = list(n_path)
+                p_path.append(parent)
+                if id(parent) == id(node):
+                    return p_path
+                stack.append(parent)
+                paths[id(parent)] = p_path
+
+class _GraphRowEntry (list):
+    """A node in the graph with outstanding children.
+
+    `columns` is a list of the indexes of outstanding columns which
+    will eventually attach to those children.
+    """
+    def __init__(self, node, columns=[]):
+        list.__init__(self, columns)
+        self.node = node
+
+class GraphRow (list):
+    """A graph row generator.
+
+    Contains a list of :class:`_GraphRowEntry`\s (added with
+    :meth:`insert`), and generates the current line with
+    :meth:`__str__`.  You should generate a graph with repeated
+    calls::
+
+        g = GraphRow()
+        for node in nodes:
+            g.insert(node)
+            print str(g)
+
+    The string rendering can be cusomized by changing `g.chars`.
+    Control over the branch columns:
+
+    ================= ===========================================
+    `node: ...`       the active (most recently inserted) node
+    `split/join: ...` branching/merging runs from the active node
+    `run: connected`  connect a branch to its eventual node
+    `run: blank`      place-holder for extinct runs
+    ================= ===========================================
+
+    Branch columns are seperated by separator columns:
+
+    ================= =======================================================
+    `sep: split/join` separate split/join branch columns from the active node
+    `sep: default`    separate all remaining branch columns
+    ================= =======================================================
+
+    For the split/join branch columns, "born" and "died" are defined
+    from the point of view of `GraphRow`.  Because trees are printed
+    tip-to-root (:meth:`Graph.topological_sort`), "born" runs are
+    determined by the active node's parents (which have not yet been
+    printed), and "dead" runs by its children (which have been
+    printed).
+    """
+    def __init__(self):
+        list.__init__(self)
+        self._width = 0
+        self._last_width = 0
+        self._old_dead = []
+        self._next_old_dead = []
+        self.chars = {
+            'node: both tip and root': 'b',
+            'node: root': 'r',
+            'node: tip': 't',
+            'node: regular': '*',
+            'split/join: born and died left of active': '>',
+            'split/join: born and died right of active': '<',
+            'split/join: born left of active': '/',
+            'split/join: born right of active': '\\',
+            'split/join: died left of active': '\\',
+            'split/join: died right of active': '/',
+            'run: blank': ' ',
+            'run: connected': '|',
+            'sep: split/join': '-',
+            'sep: default': ' ',               
+            }
+    def insert(self, node):
+        """Add a new node to the graph.
+
+        Nodes should be inserted in tip-to-root topological order
+        (i.e. node must be inserted before any of its parents).
+        """
+        self._node = node
+        self._last_width = self._width
+        self._old_dead = self._next_old_dead
+        self._born = []
+        self._dead = []
+        if len(self) > 0 and len(self[-1].node) == 0:
+            # remove last entry's active column if it has no parents
+            self[-1].pop(0) # (it' already in _old_dead via _next_old_dead)
+        self._children = [(i,e) for i,e in enumerate(self) if node in e.node]
+        for i,e in self._children:
+            self._dead.append(e.pop(0)) # TODO: sharing branches vs. current 1 per child
+        exhausted = [] # _GraphRowEntry\s with no futher parents
+        for e in self:
+            if len(e) == 0:
+                exhausted.append(e)
+        for e in exhausted:
+            self.remove(e)
+        self._dead.sort()
+        remaining = max(len(node), 1)
+        slots = sorted(self._old_dead + self._dead)
+        self._born.extend(slots[:remaining])
+        remaining -= len(slots)
+        self._born.extend(range(self._last_width, self._last_width+remaining))
+        self._active = self._born[0]
+        self.append(_GraphRowEntry(node, columns=self._born))
+        self._width = max([e[-1] for e in self])+1
+        self._next_old_dead = [i for i in slots[len(self._born):] if i < self._width]
+        if len(node) == 0:
+            self._next_old_dead.insert(0, self._active)
+
+    def __str__(self):
+        """Render the current line of the graph as a string.
+        """
+        left_connects = [self._born[0]]
+        right_connects = [self._born[-1]]
+        if len(self._dead) > 0:
+            left_connects.append(self._dead[0])
+            right_connects.append(self._dead[-1])
+        left_connect = min(left_connects)
+        right_connect = max(right_connects)
+        string = []
+        for i in range(max(self._width, self._last_width)):
+            if i == self._active:
+                if len(self._node) == 0:
+                    if len(self._children) == 0:
+                        char = self.chars['node: both tip and root']
+                    else:
+                        char = self.chars['node: root']
+                elif len(self._children) == 0:
+                    char = self.chars['node: tip']
+                else:
+                    char = self.chars['node: regular']
+            elif i in self._born:
+                if i in self._dead:
+                    if i < self._active:
+                        char = self.chars[
+                            'split/join: born and died left of active']
+                    else:
+                        char = self.chars[
+                            'split/join: born and died right of active']
+                else:
+                    if i < self._active:
+                        char = self.chars['split/join: born left of active']
+                    else:
+                        char = self.chars['split/join: born right of active']
+            elif i in self._dead:
+                if i < self._active:
+                    char = self.chars['split/join: died left of active']
+                else:
+                    char = self.chars['split/join: died right of active']
+            elif i in self._old_dead:
+                char = self.chars['run: blank']
+            else:
+                char = self.chars['run: connected']
+            if i < left_connect or i >= right_connect:
+                sep = self.chars['sep: default']
+            else:
+                sep = self.chars['sep: split/join']
+                if char == self.chars['run: blank']:
+                    char = self.chars['sep: split/join']
+            string.extend([char, sep])
+        return ''.join(string)[:-1] # [-1] strips final sep
+
+class Graph (list):
+    """A directed, acyclic graph structure.
+
+    Contains methods for sorting and printing graphs.
+
+    Examples
+    --------
+
+    >>> class Nodes (object): pass
+    >>> n = Nodes()
+    >>> for char in ['a','b','c','d','e','f','g','h','i']:
+    ...     setattr(n, char, Node(data=char))
+    >>> n.b.append(n.a)
+    >>> n.c.append(n.a)
+    >>> n.d.append(n.a)
+    >>> n.e.extend([n.b, n.c, n.d])
+    >>> n.f.append(n.e)
+    >>> n.g.append(n.e)
+    >>> n.h.append(n.e)
+    >>> n.i.extend([n.f, n.g, n.h])
+    >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
+    >>> g.topological_sort()
+    >>> print [node for node in g]
+    [i, h, g, f, e, d, c, b, a]
+    >>> print g.ascii_graph()
+    t-\-\ i
+    * | | h
+    | * | g
+    | | * f
+    *-<-< e
+    * | | d
+    | * | c
+    | | * b
+    r-/-/ a
+
+    >>> for char in ['a','b','c','d','e','f','g','h']:
+    ...     setattr(n, char, Node(data=char))
+    >>> n.b.append(n.a)
+    >>> n.c.append(n.b)
+    >>> n.d.append(n.a)
+    >>> n.e.append(n.d)
+    >>> n.f.extend([n.b, n.d])
+    >>> n.g.extend([n.e, n.f])
+    >>> n.h.extend([n.c, n.g])
+    >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h])
+    >>> print g.ascii_graph()
+    t-\ h
+    *-|-\ g
+    *-|-|-\ f
+    | | * | e
+    *-|-/ | d
+    | *   | c
+    | *---/ b
+    r-/ a
+
+    >>> for char in ['a','b','c','d','e','f','g','h','i']:
+    ...     setattr(n, char, Node(data=char))
+    >>> for char in ['a', 'b','c','d','e','f','g','h']:
+    ...     nx = getattr(n, char)
+    ...     n.i.append(nx)
+    >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
+    >>> print g.ascii_graph()
+    t-\-\-\-\-\-\-\ i
+    r | | | | | | | h
+    r-/ | | | | | | g
+    r---/ | | | | | f
+    r-----/ | | | | e
+    r-------/ | | | d
+    r---------/ | | c
+    r-----------/ | b
+    r-------------/ a
+
+    >>> for char in ['a','b','c','d','e','f','g','h','i']:
+    ...     setattr(n, char, Node(data=char))
+    >>> for char in ['b','c','d','e','f','g','h','i']:
+    ...     nx = getattr(n, char)
+    ...     nx.append(n.a)
+    >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
+    >>> print g.ascii_graph()
+    t i
+    | t h
+    | | t g
+    | | | t f
+    | | | | t e
+    | | | | | t d
+    | | | | | | t c
+    | | | | | | | t b
+    r-/-/-/-/-/-/-/ a
+
+    >>> for char in ['a','b','c','d','e','f','g','h','i']:
+    ...     setattr(n, char, Node(data=char))
+    >>> n.d.append(n.a)
+    >>> n.e.extend([n.a, n.c])
+    >>> n.f.extend([n.c, n.d, n.e])
+    >>> n.g.extend([n.b, n.e, n.f])
+    >>> n.h.extend([n.a, n.c, n.d, n.g])
+    >>> n.i.extend([n.a, n.b, n.c, n.g])
+    >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
+    >>> print g.ascii_graph()
+    t-\-\-\ i
+    | | | | t-\-\-\ h
+    *-|-|-|-<-|-|-|-\ g
+    *-|-|-|-|-|-|-|-|-\-\ f
+    *-|-|-|-< | | | | | | e
+    | | | | | *-|-|-|-/ | d
+    r-/-|-|-|-|-/-|-|---/ c
+    r---/-|-|-|---|-/ b
+    r-----/-/-/---/ a
+
+    Ok, enough pretty graphs ;).  Here's an example of cycle
+    detection.
+
+    >>> for char in ['a','b','c','d']:
+    ...     setattr(n, char, Node(data=char))
+    >>> n.b.append(n.a)
+    >>> n.c.append(n.a)
+    >>> n.d.extend([n.b, n.c])
+    >>> n.a.append(n.d)
+    >>> g = Graph([n.a,n.b,n.c,n.d])
+    >>> g.check_for_cycles()
+    Traceback (most recent call last):
+      ...
+    CyclicGraphError: cycle detected:
+      a
+      d
+      b
+      a
+    """
+    def topological_sort(self):
+        """Algorithm from git's commit.c `sort_in_topological_order`_.
+
+        Ordering is tip-to-root.
+
+        In situations where topological sorting is ambiguous, the
+        nodes are sorted using the inverse (since we use tip-to-root
+        ordering) of their comparison functions (__cmp__, __lt__,
+        ...).
+
+        .. _sort_in_topological_order:
+          http://git.kernel.org/?p=git/git.git;a=blob;f=commit.c;h=731191e63bd39a89a8ea4ed0390c49d5605cdbed;hb=HEAD#l425
+        """
+        for node in self:
+            node._outcount = 0
+        for node in self:
+            for parent in node:
+                parent._outcount += 1
+        tips = sorted([node for node in self if node._outcount == 0])
+        orig_len = len(self)
+        del self[:]
+        while len(tips) > 0:
+            node = tips.pop(0)
+            for parent in node:
+                parent._outcount -= 1
+                if parent._outcount == 0:
+                    bisect.insort(tips, parent)
+            node._outcount = -1
+            self.append(node)
+        final_len = len(self)
+        if final_len != orig_len:
+            raise CyclicGraphError(
+                '%d of %d elements not reachable from tips'
+                % (orig_len - final_len, orig_len))
+
+    def check_for_cycles(self):
+        """Check for cyclic references.
+        """
+        for node in self:
+            if node in node.ancestors():
+                path = node.path(node)
+                raise CyclicGraphError(
+                    'cycle detected:\n  %s'
+                    % '\n  '.join([repr(node)]+[repr(node) for node in path]))
+
+    def graph_rows(self, graph_row=None):
+        """Generate a sequence of (`graph_row`, `node`) tuples.
+
+        Preforms :meth:`topological_sort` internally.
+
+        If `graph_row` is `None`, a new instance of :class:`GraphRow`
+        is used.
+        """
+        if graph_row == None:
+            graph_row = GraphRow()
+        self.topological_sort()
+        for node in self:
+            graph_row.insert(node)
+            yield (graph_row, node)
+
+    def ascii_graph(self, string_fn=str):
+        """Print an ascii graph on the left with `string_fn(node)` on
+        the right.
+
+        See the class docstring for example output.
+        """
+        graph = []
+        for row,node in self.graph_rows():
+            graph.append('%s %s' % (str(row), string_fn(node)))
+        return '\n'.join(graph)