From 3a9f4aeedbb6df963248e53251a76469272ed2ad Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Fri, 7 May 2010 12:13:39 -0400 Subject: [PATCH] Added tip-to-root/root-to-tip flexibility to hooke.util.graph The old implementation only did tip-to-root. The new default is root-to-tip. --- hooke/util/graph.py | 488 +++++++++++++++++++++++++++----------------- 1 file changed, 305 insertions(+), 183 deletions(-) diff --git a/hooke/util/graph.py b/hooke/util/graph.py index 9e5ee95..c2302e0 100644 --- a/hooke/util/graph.py +++ b/hooke/util/graph.py @@ -41,12 +41,26 @@ class Node (list): We can find the cycle path. - >>> print d.path(d) + >>> print d.parent_path(d) [b, a, d] + + After a run through :meth:`Graph.set_children`, we can also + list children + + >>> g = Graph([a, b, c, d]) + >>> g.set_children() + >>> print a.children + [b, c] + + And descendents. + + >>> print [node for node in a.descendents(depth_first=True)] + [b, d, a, c] """ def __init__(self, parents=[], data=None): list.__init__(self, parents) self.data = data + self.children = [] def __cmp__(self, other): return -cmp(self.data, other.data) @@ -68,15 +82,16 @@ class Node (list): def __repr__(self): return self.__str__() - def ancestors(self, depth_first=False): - """Generate all ancestors. + def traverse(self, next, depth_first=False): + """Iterate through all nodes returned by `next(node)`. - Will only yield each ancestor once, even in the case of + Will only yield each traversed node once, even in the case of diamond inheritance, etc. - Breadth first by default. + Breadth first by default. Set `depth_first==True` for a + depth first search. """ - stack = list(self) + stack = list(next(self)) popped = [] while len(stack) > 0: node = stack.pop(0) @@ -85,55 +100,105 @@ class Node (list): popped.append(node) yield node if depth_first == True: - for parent in reversed(node): - stack.insert(0, parent) + for target in reversed(next(node)): + stack.insert(0, target) else: - stack.extend(node) - def path(self, node): - """Return the shortest list of parents connecting `self` to - `node`. + stack.extend(next(node)) + + def ancestors(self, depth_first=False): + """Generate all ancestors. + + This is a small wrapper around :meth:`traverse`. + """ + next = lambda node : node # list node's parents + for node in self.traverse(next=next, depth_first=depth_first): + yield node + + def descendents(self, depth_first=False): + """Generate all decendents. + + This is a small wrapper around :meth:`traverse`. + """ + next = lambda node : node.children + for node in self.traverse(next=next, depth_first=depth_first): + yield node + + def path(self, next, node): + """Return the shortest list of nodes connecting `self` to + `node` via `next(node)`. """ if node in self: return [node] - stack = list(self) + stack = list(next(self)) paths = dict((id(n), [n]) for n in stack) while len(stack) > 0: n = stack.pop(0) n_path = paths[id(n)] - for parent in n: - if id(parent) in paths: + for target in next(n): + if id(target) in paths: continue - p_path = list(n_path) - p_path.append(parent) - if id(parent) == id(node): - return p_path - stack.append(parent) - paths[id(parent)] = p_path - -class _GraphRowEntry (list): - """A node in the graph with outstanding children. - - `columns` is a list of the indexes of outstanding columns which - will eventually attach to those children. - """ - def __init__(self, node, columns=[]): - list.__init__(self, columns) - self.node = node + t_path = list(n_path) + t_path.append(target) + if id(target) == id(node): + return t_path + stack.append(target) + paths[id(target)] = t_path + + def parent_path(self, node): + """Return the shortest list of nodes connecting `self` to + its parent `node`. + + This is a small wrapper around :meth:`path`. + """ + next = lambda node : node # list node's parents + return self.path(next, node) -class GraphRow (list): - """A graph row generator. + def child_path(self, node): + """Return the shortest list of nodes connecting `self` to + its child `node`. - Contains a list of :class:`_GraphRowEntry`\s (added with - :meth:`insert`), and generates the current line with - :meth:`__str__`. You should generate a graph with repeated - calls:: + This is a small wrapper around :meth:`path`. + """ + next = lambda node : node.children + return self.path(next, node) - g = GraphRow() - for node in nodes: - g.insert(node) - print str(g) - The string rendering can be cusomized by changing `g.chars`. +class GraphRow (object): + """Represent the state of a single row in a graph. + + Generated by :class:`GraphRowGenerator`, printed with + :class:`GraphRowPrinter`. + + :attr:`node` is the active node and :attr:`active` is its branch + column index. :attr:`width` is the number of current branch + columns. + + :attr:`born`, :attr:`dead`, and :attr:`inherited` are lists of + `(branch_column_index, target_node)` pairs. `dead` lists nodes + from previous rows whose branches complete on this row, + `inherited` lists nodes from previous rows whose branches continue + through this row, and `born` list nodes whose branches start on + this row. + """ + def __init__(self, node, active=-1, dead=None, inherited=None, born=None, + tip_to_root=False): + self.node = node + self.active = active + if dead == None: + dead = [] + self.dead = dead + if inherited == None: + inherited = [] + self.inherited = inherited + if born == None: + born = [] + self.born = born + self.tip_to_root = tip_to_root + +class GraphRowPrinter (object): + """Customizable printing for :class:`GraphRow`. + + The string rendering can be customized by changing :attr:`chars`. Control over the branch columns: ================= =========================================== @@ -149,116 +214,83 @@ class GraphRow (list): `sep: split/join` separate split/join branch columns from the active node `sep: default` separate all remaining branch columns ================= ======================================================= - - For the split/join branch columns, "born" and "died" are defined - from the point of view of `GraphRow`. Because trees are printed - tip-to-root (:meth:`Graph.topological_sort`), "born" runs are - determined by the active node's parents (which have not yet been - printed), and "dead" runs by its children (which have been - printed). """ - def __init__(self): - list.__init__(self) - self._width = 0 - self._last_width = 0 - self._old_dead = [] - self._next_old_dead = [] - self.chars = { - 'node: both tip and root': 'b', - 'node: root': 'r', - 'node: tip': 't', - 'node: regular': '*', - 'split/join: born and died left of active': '>', - 'split/join: born and died right of active': '<', - 'split/join: born left of active': '/', - 'split/join: born right of active': '\\', - 'split/join: died left of active': '\\', - 'split/join: died right of active': '/', - 'run: blank': ' ', - 'run: connected': '|', - 'sep: split/join': '-', - 'sep: default': ' ', - } - def insert(self, node): - """Add a new node to the graph. - - Nodes should be inserted in tip-to-root topological order - (i.e. node must be inserted before any of its parents). - """ - self._node = node - self._last_width = self._width - self._old_dead = self._next_old_dead - self._born = [] - self._dead = [] - if len(self) > 0 and len(self[-1].node) == 0: - # remove last entry's active column if it has no parents - self[-1].pop(0) # (it' already in _old_dead via _next_old_dead) - self._children = [(i,e) for i,e in enumerate(self) if node in e.node] - for i,e in self._children: - self._dead.append(e.pop(0)) # TODO: sharing branches vs. current 1 per child - exhausted = [] # _GraphRowEntry\s with no futher parents - for e in self: - if len(e) == 0: - exhausted.append(e) - for e in exhausted: - self.remove(e) - self._dead.sort() - remaining = max(len(node), 1) - slots = sorted(self._old_dead + self._dead) - self._born.extend(slots[:remaining]) - remaining -= len(slots) - self._born.extend(range(self._last_width, self._last_width+remaining)) - self._active = self._born[0] - self.append(_GraphRowEntry(node, columns=self._born)) - self._width = max([e[-1] for e in self])+1 - self._next_old_dead = [i for i in slots[len(self._born):] if i < self._width] - if len(node) == 0: - self._next_old_dead.insert(0, self._active) - - def __str__(self): - """Render the current line of the graph as a string. + def __init__(self, chars=None): + if chars == None: + chars = { + 'node: both tip and root': 'b', + 'node: root': 'r', + 'node: tip': 't', + 'node: regular': '*', + 'split/join: born and died left of active': '>', + 'split/join: born and died right of active': '<', + 'split/join: born left of active': '/', + 'split/join: born right of active': '\\', + 'split/join: died left of active': '\\', + 'split/join: died right of active': '/', + 'run: blank': ' ', + 'run: connected': '|', + 'sep: split/join': '-', + 'sep: default': ' ', + } + self.chars = chars + def __call__(self, graph_row): + """Render the :class:`GraphRow` instance `graph_row` as a + string. """ - left_connects = [self._born[0]] - right_connects = [self._born[-1]] - if len(self._dead) > 0: - left_connects.append(self._dead[0]) - right_connects.append(self._dead[-1]) - left_connect = min(left_connects) - right_connect = max(right_connects) + dead = [i for i,node in graph_row.dead] + inherited = [i for i,node in graph_row.inherited] + born = [i for i,node in graph_row.born] + right_connect = max(graph_row.active, + max(born+[-1]), # +[-1] protects against empty born + max(dead+[-1])) + left_connect = min(graph_row.active, + min(born+[right_connect]), + min(dead+[right_connect])) + max_col = max(right_connect, max(inherited+[-1])) string = [] - for i in range(max(self._width, self._last_width)): - if i == self._active: - if len(self._node) == 0: - if len(self._children) == 0: + for i in range(max_col + 1): + # Get char, the node or branch column character. + if i == graph_row.active: + if len(born) == 0: + if len(dead) == 0: char = self.chars['node: both tip and root'] - else: + elif graph_row.tip_to_root == True: + # The dead are children + char = self.chars['node: root'] + else: # The dead are parents + char = self.chars['node: tip'] + elif len(dead) == 0: + if graph_row.tip_to_root == True: + # The born are parents + char = self.chars['node: tip'] + else: # The born are children char = self.chars['node: root'] - elif len(self._children) == 0: - char = self.chars['node: tip'] else: char = self.chars['node: regular'] - elif i in self._born: - if i in self._dead: - if i < self._active: + elif i in born: + if i in dead: # born and died + if i < graph_row.active: char = self.chars[ 'split/join: born and died left of active'] else: char = self.chars[ 'split/join: born and died right of active'] - else: - if i < self._active: + else: # just born + if i < graph_row.active: char = self.chars['split/join: born left of active'] else: char = self.chars['split/join: born right of active'] - elif i in self._dead: - if i < self._active: + elif i in dead: # just died + if i < graph_row.active: char = self.chars['split/join: died left of active'] else: char = self.chars['split/join: died right of active'] - elif i in self._old_dead: - char = self.chars['run: blank'] - else: + elif i in inherited: char = self.chars['run: connected'] + else: + char = self.chars['run: blank'] + # Get sep, the separation character. if i < left_connect or i >= right_connect: sep = self.chars['sep: default'] else: @@ -268,6 +300,72 @@ class GraphRow (list): string.extend([char, sep]) return ''.join(string)[:-1] # [-1] strips final sep +class GraphRowGenerator (list): + """A :class:`GraphRow` generator. + + Contains a list of :class:`GraphRow`\s (added with + :meth:`insert`(:class:`hooke.util.graph.Node`)). You should + generate a graph with repeated calls:: + + tip_to_root = True + g = GraphRowGenerator(tip_to_root=tip_to_root) + p = GraphRowPrinter(tip_to_root=tip_to_root) + for node in nodes: + g.insert(node) + print p(g[-1]) + + For the split/join branch columns, "born" and "dead" are defined + from the point of view of `GraphRow`. For root-to-tip ordering + (`tip_to_root==False`, the default), "born" runs are determined + by the active node's children (which have yet to be printed) and + "dead" runs by its parents (which have been printed). If + `tip_to_root==True`, swap "children" and "parents" in the above + sentence. + """ + def __init__(self, tip_to_root=False): + list.__init__(self) + self.tip_to_root = tip_to_root + def insert(self, node): + """Add a new node to the graph. + + If `tip_to_root==True`, nodes should be inserted in + tip-to-root topological order (i.e. node must be inserted + before any of its parents). + + If `tip_to_root==False`, nodes must be inserted before any + of their children. + """ + if len(self) == 0: + previous = GraphRow(node=None, active=-1) + else: + previous = self[-1] + current = GraphRow(node=node, active=-1, tip_to_root=self.tip_to_root) + if self.tip_to_root == True: # children die, parents born + dead_nodes = list(current.node.children) + born_nodes = list(current.node) + else: # root-to-tip: parents die, children born + dead_nodes = list(current.node) + born_nodes = list(current.node.children) + # Mark the dead and inherited branch columns + for i,node in previous.inherited + previous.born: + if node in dead_nodes or node == current.node: + current.dead.append((i, node)) + else: + current.inherited.append((i, node)) + # Place born and active branch columns + num_born = max(len(born_nodes), 1) # 1 to ensure slot for active node + remaining = num_born # number of nodes left to place + used_slots = [i for i,n in current.inherited] + old_max = max(used_slots+[-1]) # +[-1] in case used_slots is empty + slots = sorted([i for i in range(old_max+1) if i not in used_slots]) + remaining -= len(slots) + slots.extend(range(old_max+1, old_max+1+remaining)) + current.active = slots[0] + current.born = zip(slots, born_nodes) + # TODO: sharing branches vs. current 1 per child + self.append(current) + + class Graph (list): """A directed, acyclic graph structure. @@ -289,18 +387,28 @@ class Graph (list): >>> n.h.append(n.e) >>> n.i.extend([n.f, n.g, n.h]) >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) - >>> g.topological_sort() + >>> g.topological_sort(tip_to_root=True) >>> print [node for node in g] [i, h, g, f, e, d, c, b, a] >>> print g.ascii_graph() - t-\-\ i + r-\-\ a + | | * b + | * | c + * | | d + *-<-< e + | | * f + | * | g * | | h + t-/-/ i + >>> print g.ascii_graph(tip_to_root=True) + t-\-\ i + | | * h | * | g - | | * f + * | | f *-<-< e - * | | d + | | * d | * | c - | | * b + * | | b r-/-/ a >>> for char in ['a','b','c','d','e','f','g','h']: @@ -313,14 +421,14 @@ class Graph (list): >>> n.g.extend([n.e, n.f]) >>> n.h.extend([n.c, n.g]) >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h]) - >>> print g.ascii_graph() + >>> print g.ascii_graph(tip_to_root=True) t-\ h - *-|-\ g - *-|-|-\ f - | | * | e - *-|-/ | d - | * | c - | *---/ b + | *-\ g + | | *-\ f + | * | | e + | *-|-/ d + * | | c + *-|-/ b r-/ a >>> for char in ['a','b','c','d','e','f','g','h','i']: @@ -329,16 +437,16 @@ class Graph (list): ... nx = getattr(n, char) ... n.i.append(nx) >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) - >>> print g.ascii_graph() + >>> print g.ascii_graph(tip_to_root=True) t-\-\-\-\-\-\-\ i - r | | | | | | | h - r-/ | | | | | | g - r---/ | | | | | f - r-----/ | | | | e - r-------/ | | | d - r---------/ | | c - r-----------/ | b - r-------------/ a + | | | | | | | r h + | | | | | | r g + | | | | | r f + | | | | r e + | | | r d + | | r c + | r b + r a >>> for char in ['a','b','c','d','e','f','g','h','i']: ... setattr(n, char, Node(data=char)) @@ -346,7 +454,7 @@ class Graph (list): ... nx = getattr(n, char) ... nx.append(n.a) >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) - >>> print g.ascii_graph() + >>> print g.ascii_graph(tip_to_root=True) t i | t h | | t g @@ -366,16 +474,16 @@ class Graph (list): >>> n.h.extend([n.a, n.c, n.d, n.g]) >>> n.i.extend([n.a, n.b, n.c, n.g]) >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i]) - >>> print g.ascii_graph() + >>> print g.ascii_graph(tip_to_root=True) t-\-\-\ i | | | | t-\-\-\ h - *-|-|-|-<-|-|-|-\ g - *-|-|-|-|-|-|-|-|-\-\ f - *-|-|-|-< | | | | | | e - | | | | | *-|-|-|-/ | d - r-/-|-|-|-|-/-|-|---/ c - r---/-|-|-|---|-/ b - r-----/-/-/---/ a + | | | *-|-|-|-<-\ g + | | | | | | | | *-\-\ f + | | | | | | | *-|-|-< e + | | | | | | *-|-|-/ | d + | | r-|-|-/-|-|-/---/ c + | r---/ | | | b + r-------/---/-/ a Ok, enough pretty graphs ;). Here's an example of cycle detection. @@ -396,19 +504,29 @@ class Graph (list): b a """ - def topological_sort(self): + def set_children(self): + """Fill out each node's :attr:`Node.children` list. + """ + for node in self: + for parent in node: + if node not in parent.children: + parent.children.append(node) + + def topological_sort(self, tip_to_root=False): """Algorithm from git's commit.c `sort_in_topological_order`_. - Ordering is tip-to-root. + Default ordering is root-to-tip. Set `tip_to_root=True` for + tip-to-root. In situations where topological sorting is ambiguous, the - nodes are sorted using the inverse (since we use tip-to-root - ordering) of their comparison functions (__cmp__, __lt__, - ...). + nodes are sorted using the node comparison functions (__cmp__, + __lt__, ...). If `tip_to_root==True`, the inverse + comparison functions are used. .. _sort_in_topological_order: http://git.kernel.org/?p=git/git.git;a=blob;f=commit.c;h=731191e63bd39a89a8ea4ed0390c49d5605cdbed;hb=HEAD#l425 """ + # sort tip-to-root first, then reverse if neccessary for node in self: node._outcount = 0 for node in self: @@ -430,39 +548,43 @@ class Graph (list): raise CyclicGraphError( '%d of %d elements not reachable from tips' % (orig_len - final_len, orig_len)) + if tip_to_root == False: + self.reverse() def check_for_cycles(self): """Check for cyclic references. """ for node in self: if node in node.ancestors(): - path = node.path(node) + path = node.parent_path(node) raise CyclicGraphError( 'cycle detected:\n %s' % '\n '.join([repr(node)]+[repr(node) for node in path])) - def graph_rows(self, graph_row=None): + def graph_rows(self, tip_to_root=False): """Generate a sequence of (`graph_row`, `node`) tuples. - Preforms :meth:`topological_sort` internally. - - If `graph_row` is `None`, a new instance of :class:`GraphRow` - is used. + Preforms :meth:`set_children` and :meth:`topological_sort` + internally. """ - if graph_row == None: - graph_row = GraphRow() - self.topological_sort() + graph_row_generator = GraphRowGenerator(tip_to_root=tip_to_root) + self.set_children() + self.topological_sort(tip_to_root=tip_to_root) for node in self: - graph_row.insert(node) - yield (graph_row, node) + graph_row_generator.insert(node) + yield (graph_row_generator[-1], node) - def ascii_graph(self, string_fn=str): + def ascii_graph(self, graph_row_printer=None, string_fn=str, + tip_to_root=False): """Print an ascii graph on the left with `string_fn(node)` on - the right. + the right. If `graph_row_printer` is `None`, a default + instance of :class:`GraphRowPrinter` will be used. See the class docstring for example output. """ + if graph_row_printer == None: + graph_row_printer = GraphRowPrinter() graph = [] - for row,node in self.graph_rows(): - graph.append('%s %s' % (str(row), string_fn(node))) + for row,node in self.graph_rows(tip_to_root=tip_to_root): + graph.append('%s %s' % (graph_row_printer(row), string_fn(node))) return '\n'.join(graph) -- 2.26.2