From e3d22aebdae671c6bf0fcc4131ad3d5744b19699 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 6 May 2010 08:37:17 -0400 Subject: [PATCH] Added the graph module I'd developed for Bugs-Everywhere --- hooke/util/__init__.py | 10 + hooke/util/graph.py | 466 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 476 insertions(+) create mode 100644 hooke/util/__init__.py create mode 100644 hooke/util/graph.py diff --git a/hooke/util/__init__.py b/hooke/util/__init__.py new file mode 100644 index 0000000..8b1bf74 --- /dev/null +++ b/hooke/util/__init__.py @@ -0,0 +1,10 @@ +# COPYRIGHT + +"""The util module contains assorted utilities not directly associated +with Hooke functionality. + +To facilitate faster loading, submodules are not imported by default. +The available submodules are: + +* :mod:`hooke.util.graph` +""" diff --git a/hooke/util/graph.py b/hooke/util/graph.py new file mode 100644 index 0000000..bf7432e --- /dev/null +++ b/hooke/util/graph.py @@ -0,0 +1,466 @@ +# COPYRIGHT + +"""Define :class:`Graph`, a directed, acyclic graph structure. +""" + +import bisect + +class CyclicGraphError (ValueError): + pass + +class Node (list): + """A node/element in a graph. + + Contains a list of the node's parents, and stores the node's + `data`. + + Examples + -------- + + >>> a = Node(data='a') + >>> b = Node(parents=[a], data='b') + >>> c = Node(parents=[a], data='c') + >>> d = Node(parents=[b, c], data='d') + >>> str(d) + 'd' + + We can list all of a node's ancestors. + + >>> print [node for node in d.ancestors()] + [b, c, a] + >>> print [node for node in d.ancestors(depth_first=True)] + [b, a, c] + + Ancestors works with cycles. + + >>> a.append(d) + >>> print [node for node in d.ancestors()] + [b, c, a, d] + + We can find the cycle path. + + >>> print d.path(d) + [b, a, d] + """ + def __init__(self, parents=[], data=None): + list.__init__(self, parents) + self.data = data + + def __cmp__(self, other): + return -cmp(self.data, other.data) + def __eq__(self, other): + return self.__cmp__(other) == 0 + def __ne__(self, other): + return self.__cmp__(other) != 0 + def __lt__(self, other): + return self.__cmp__(other) < 0 + def __gt__(self, other): + return self.__cmp__(other) > 0 + def __le__(self, other): + return self.__cmp__(other) <= 0 + def __ge__(self, other): + return self.__cmp__(other) >= 0 + + def __str__(self): + return str(self.data) + def __repr__(self): + return self.__str__() + + def ancestors(self, depth_first=False): + """Generate all ancestors. + + Will only yield each ancestor once, even in the case of + diamond inheritance, etc. + + Breadth first by default. + """ + stack = list(self) + popped = [] + while len(stack) > 0: + node = stack.pop(0) + if node in popped: + continue + popped.append(node) + yield node + if depth_first == True: + for parent in reversed(node): + stack.insert(0, parent) + else: + stack.extend(node) + def path(self, node): + """Return the shortest list of parents connecting `self` to + `node`. + """ + if node in self: + return [node] + stack = list(self) + paths = dict((id(n), [n]) for n in stack) + while len(stack) > 0: + n = stack.pop(0) + n_path = paths[id(n)] + for parent in n: + if id(parent) in paths: + continue + p_path = list(n_path) + p_path.append(parent) + if id(parent) == id(node): + return p_path + stack.append(parent) + paths[id(parent)] = p_path + +class _GraphRowEntry (list): + """A node in the graph with outstanding children. + + `columns` is a list of the indexes of outstanding columns which + will eventually attach to those children. + """ + def __init__(self, node, columns=[]): + list.__init__(self, columns) + self.node = node + +class GraphRow (list): + """A graph row generator. + + Contains a list of :class:`_GraphRowEntry`\s (added with + :meth:`insert`), and generates the current line with + :meth:`__str__`. You should generate a graph with repeated + calls:: + + g = GraphRow() + for node in nodes: + g.insert(node) + print str(g) + + The string rendering can be cusomized by changing `g.chars`. + Control over the branch columns: + + ================= =========================================== + `node: ...` the active (most recently inserted) node + `split/join: ...` branching/merging runs from the active node + `run: connected` connect a branch to its eventual node + `run: blank` place-holder for extinct runs + ================= =========================================== + + Branch columns are seperated by separator columns: + + ================= ======================================================= + `sep: split/join` separate split/join branch columns from the active node + `sep: default` separate all remaining branch columns + ================= ======================================================= + + For the split/join branch columns, "born" and "died" are defined + from the point of view of `GraphRow`. Because trees are printed + tip-to-root (:meth:`Graph.topological_sort`), "born" runs are + determined by the active node's parents (which have not yet been + printed), and "dead" runs by its children (which have been + printed). + """ + def __init__(self): + list.__init__(self) + self._width = 0 + self._last_width = 0 + self._old_dead = [] + self._next_old_dead = [] + self.chars = { + 'node: both tip and root': 'b', + 'node: root': 'r', + 'node: tip': 't', + 'node: regular': '*', + 'split/join: born and died left of active': '>', + 'split/join: born and died right of active': '<', + 'split/join: born left of active': '/', + 'split/join: born right of active': '\\', + 'split/join: died left of active': '\\', + 'split/join: died right of active': '/', + 'run: blank': ' ', + 'run: connected': '|', + 'sep: split/join': '-', + 'sep: default': ' ', + } + def insert(self, node): + """Add a new node to the graph. + + Nodes should be inserted in tip-to-root topological order + (i.e. node must be inserted before any of its parents). + """ + self._node = node + self._last_width = self._width + self._old_dead = self._next_old_dead + self._born = [] + self._dead = [] + if len(self) > 0 and len(self[-1].node) == 0: + # remove last entry's active column if it has no parents + self[-1].pop(0) # (it' already in _old_dead via _next_old_dead) + self._children = [(i,e) for i,e in enumerate(self) if node in e.node] + for i,e in self._children: + self._dead.append(e.pop(0)) # TODO: sharing branches vs. current 1 per child + exhausted = [] # _GraphRowEntry\s with no futher parents + for e in self: + if len(e) == 0: + exhausted.append(e) + for e in exhausted: + self.remove(e) + self._dead.sort() + remaining = max(len(node), 1) + slots = sorted(self._old_dead + self._dead) + self._born.extend(slots[:remaining]) + remaining -= len(slots) + self._born.extend(range(self._last_width, self._last_width+remaining)) + self._active = self._born[0] + self.append(_GraphRowEntry(node, columns=self._born)) + self._width = max([e[-1] for e in self])+1 + self._next_old_dead = [i for i in slots[len(self._born):] if i < self._width] + if len(node) == 0: + self._next_old_dead.insert(0, self._active) + + def __str__(self): + """Render the current line of the graph as a string. + """ + left_connects = [self._born[0]] + right_connects = [self._born[-1]] + if len(self._dead) > 0: + left_connects.append(self._dead[0]) + right_connects.append(self._dead[-1]) + left_connect = min(left_connects) + right_connect = max(right_connects) + string = [] + for i in range(max(self._width, self._last_width)): + if i == self._active: + if len(self._node) == 0: + if len(self._children) == 0: + char = self.chars['node: both tip and root'] + else: + char = self.chars['node: root'] + elif len(self._children) == 0: + char = self.chars['node: tip'] + else: + char = self.chars['node: regular'] + elif i in self._born: + if i in self._dead: + if i < self._active: + char = self.chars[ + 'split/join: born and died left of active'] + else: + char = self.chars[ + 'split/join: born and died right of active'] + else: + if i < self._active: + char = self.chars['split/join: born left of active'] + else: + char = self.chars['split/join: born right of active'] + elif i in self._dead: + if i < self._active: + char = self.chars['split/join: died left of active'] + else: + char = self.chars['split/join: died right of active'] + elif i in self._old_dead: + char = self.chars['run: blank'] + else: + char = self.chars['run: connected'] + if i < left_connect or i >= right_connect: + sep = self.chars['sep: default'] + else: + sep = self.chars['sep: split/join'] + if char == self.chars['run: blank']: + char = self.chars['sep: split/join'] + string.extend([char, sep]) + return ''.join(string)[:-1] # [-1] strips final sep + +class Graph (list): + """A directed, acyclic graph structure. + + Contains methods for sorting and printing graphs. + + Examples + -------- + + >>> class Nodes (object): pass + >>> n = Nodes() + >>> for char in ['a','b','c','d','e','f','g','h','i']: + ... setattr(n, char, Node(data=char)) + >>> n.b.append(n.a) + >>> n.c.append(n.a) + >>> n.d.append(n.a) + >>> n.e.extend([n.b, n.c, n.d]) + >>> n.f.append(n.e) + >>> n.g.append(n.e) + >>> n.h.append(n.e) + >>> n.i.extend([n.f, n.g, n.h]) + >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) + >>> g.topological_sort() + >>> print [node for node in g] + [i, h, g, f, e, d, c, b, a] + >>> print g.ascii_graph() + t-\-\ i + * | | h + | * | g + | | * f + *-<-< e + * | | d + | * | c + | | * b + r-/-/ a + + >>> for char in ['a','b','c','d','e','f','g','h']: + ... setattr(n, char, Node(data=char)) + >>> n.b.append(n.a) + >>> n.c.append(n.b) + >>> n.d.append(n.a) + >>> n.e.append(n.d) + >>> n.f.extend([n.b, n.d]) + >>> n.g.extend([n.e, n.f]) + >>> n.h.extend([n.c, n.g]) + >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h]) + >>> print g.ascii_graph() + t-\ h + *-|-\ g + *-|-|-\ f + | | * | e + *-|-/ | d + | * | c + | *---/ b + r-/ a + + >>> for char in ['a','b','c','d','e','f','g','h','i']: + ... setattr(n, char, Node(data=char)) + >>> for char in ['a', 'b','c','d','e','f','g','h']: + ... nx = getattr(n, char) + ... n.i.append(nx) + >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) + >>> print g.ascii_graph() + t-\-\-\-\-\-\-\ i + r | | | | | | | h + r-/ | | | | | | g + r---/ | | | | | f + r-----/ | | | | e + r-------/ | | | d + r---------/ | | c + r-----------/ | b + r-------------/ a + + >>> for char in ['a','b','c','d','e','f','g','h','i']: + ... setattr(n, char, Node(data=char)) + >>> for char in ['b','c','d','e','f','g','h','i']: + ... nx = getattr(n, char) + ... nx.append(n.a) + >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) + >>> print g.ascii_graph() + t i + | t h + | | t g + | | | t f + | | | | t e + | | | | | t d + | | | | | | t c + | | | | | | | t b + r-/-/-/-/-/-/-/ a + + >>> for char in ['a','b','c','d','e','f','g','h','i']: + ... setattr(n, char, Node(data=char)) + >>> n.d.append(n.a) + >>> n.e.extend([n.a, n.c]) + >>> n.f.extend([n.c, n.d, n.e]) + >>> n.g.extend([n.b, n.e, n.f]) + >>> n.h.extend([n.a, n.c, n.d, n.g]) + >>> n.i.extend([n.a, n.b, n.c, n.g]) + >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) + >>> print g.ascii_graph() + t-\-\-\ i + | | | | t-\-\-\ h + *-|-|-|-<-|-|-|-\ g + *-|-|-|-|-|-|-|-|-\-\ f + *-|-|-|-< | | | | | | e + | | | | | *-|-|-|-/ | d + r-/-|-|-|-|-/-|-|---/ c + r---/-|-|-|---|-/ b + r-----/-/-/---/ a + + Ok, enough pretty graphs ;). Here's an example of cycle + detection. + + >>> for char in ['a','b','c','d']: + ... setattr(n, char, Node(data=char)) + >>> n.b.append(n.a) + >>> n.c.append(n.a) + >>> n.d.extend([n.b, n.c]) + >>> n.a.append(n.d) + >>> g = Graph([n.a,n.b,n.c,n.d]) + >>> g.check_for_cycles() + Traceback (most recent call last): + ... + CyclicGraphError: cycle detected: + a + d + b + a + """ + def topological_sort(self): + """Algorithm from git's commit.c `sort_in_topological_order`_. + + Ordering is tip-to-root. + + In situations where topological sorting is ambiguous, the + nodes are sorted using the inverse (since we use tip-to-root + ordering) of their comparison functions (__cmp__, __lt__, + ...). + + .. _sort_in_topological_order: + http://git.kernel.org/?p=git/git.git;a=blob;f=commit.c;h=731191e63bd39a89a8ea4ed0390c49d5605cdbed;hb=HEAD#l425 + """ + for node in self: + node._outcount = 0 + for node in self: + for parent in node: + parent._outcount += 1 + tips = sorted([node for node in self if node._outcount == 0]) + orig_len = len(self) + del self[:] + while len(tips) > 0: + node = tips.pop(0) + for parent in node: + parent._outcount -= 1 + if parent._outcount == 0: + bisect.insort(tips, parent) + node._outcount = -1 + self.append(node) + final_len = len(self) + if final_len != orig_len: + raise CyclicGraphError( + '%d of %d elements not reachable from tips' + % (orig_len - final_len, orig_len)) + + def check_for_cycles(self): + """Check for cyclic references. + """ + for node in self: + if node in node.ancestors(): + path = node.path(node) + raise CyclicGraphError( + 'cycle detected:\n %s' + % '\n '.join([repr(node)]+[repr(node) for node in path])) + + def graph_rows(self, graph_row=None): + """Generate a sequence of (`graph_row`, `node`) tuples. + + Preforms :meth:`topological_sort` internally. + + If `graph_row` is `None`, a new instance of :class:`GraphRow` + is used. + """ + if graph_row == None: + graph_row = GraphRow() + self.topological_sort() + for node in self: + graph_row.insert(node) + yield (graph_row, node) + + def ascii_graph(self, string_fn=str): + """Print an ascii graph on the left with `string_fn(node)` on + the right. + + See the class docstring for example output. + """ + graph = [] + for row,node in self.graph_rows(): + graph.append('%s %s' % (str(row), string_fn(node))) + return '\n'.join(graph) -- 2.26.2