1 # Copyright (C) 2010-2012 W. Trevor King <wking@drexel.edu>
3 # This file is part of Hooke.
5 # Hooke is free software: you can redistribute it and/or modify it under the
6 # terms of the GNU Lesser General Public License as published by the Free
7 # Software Foundation, either version 3 of the License, or (at your option) any
10 # Hooke is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with Hooke. If not, see <http://www.gnu.org/licenses/>.
18 """Define :class:`Graph`, a directed, acyclic graph structure.
19 :class:`Graph`\s are composed of :class:`Node`\s, also defined by this
25 class CyclicGraphError (ValueError):
29 """A node/element in a graph.
31 Contains a list of the node's parents, and stores the node's
37 >>> a = Node(data='a')
38 >>> b = Node(parents=[a], data='b')
39 >>> c = Node(parents=[a], data='c')
40 >>> d = Node(parents=[b, c], data='d')
44 We can list all of a node's ancestors.
46 >>> print [node for node in d.ancestors()]
48 >>> print [node for node in d.ancestors(depth_first=True)]
51 Ancestors works with cycles.
54 >>> print [node for node in d.ancestors()]
57 We can find the cycle path.
59 >>> print d.parent_path(d)
62 After a run through :meth:`Graph.set_children`, we can also
65 >>> g = Graph([a, b, c, d])
72 >>> print [node for node in a.descendents(depth_first=True)]
75 def __init__(self, parents=[], data=None):
76 list.__init__(self, parents)
80 def __cmp__(self, other):
81 return -cmp(self.data, other.data)
82 def __eq__(self, other):
83 return self.__cmp__(other) == 0
84 def __ne__(self, other):
85 return self.__cmp__(other) != 0
86 def __lt__(self, other):
87 return self.__cmp__(other) < 0
88 def __gt__(self, other):
89 return self.__cmp__(other) > 0
90 def __le__(self, other):
91 return self.__cmp__(other) <= 0
92 def __ge__(self, other):
93 return self.__cmp__(other) >= 0
100 def traverse(self, next, depth_first=False):
101 """Iterate through all nodes returned by `next(node)`.
103 Will only yield each traversed node once, even in the case of
104 diamond inheritance, etc.
106 Breadth first by default. Set `depth_first==True` for a
109 stack = list(next(self))
111 while len(stack) > 0:
117 if depth_first == True:
118 for target in reversed(next(node)):
119 stack.insert(0, target)
121 stack.extend(next(node))
123 def ancestors(self, depth_first=False):
124 """Generate all ancestors.
126 This is a small wrapper around :meth:`traverse`.
128 next = lambda node : node # list node's parents
129 for node in self.traverse(next=next, depth_first=depth_first):
132 def descendents(self, depth_first=False):
133 """Generate all decendents.
135 This is a small wrapper around :meth:`traverse`.
137 next = lambda node : node.children
138 for node in self.traverse(next=next, depth_first=depth_first):
141 def path(self, next, node):
142 """Return the shortest list of nodes connecting `self` to
143 `node` via `next(node)`.
147 stack = list(next(self))
148 paths = dict((id(n), [n]) for n in stack)
149 while len(stack) > 0:
151 n_path = paths[id(n)]
152 for target in next(n):
153 if id(target) in paths:
155 t_path = list(n_path)
156 t_path.append(target)
157 if id(target) == id(node):
160 paths[id(target)] = t_path
162 def parent_path(self, node):
163 """Return the shortest list of nodes connecting `self` to
166 This is a small wrapper around :meth:`path`.
168 next = lambda node : node # list node's parents
169 return self.path(next, node)
171 def child_path(self, node):
172 """Return the shortest list of nodes connecting `self` to
175 This is a small wrapper around :meth:`path`.
177 next = lambda node : node.children
178 return self.path(next, node)
181 class GraphRow (object):
182 """Represent the state of a single row in a graph.
184 Generated by :class:`GraphRowGenerator`, printed with
185 :class:`GraphRowPrinter`.
187 :attr:`node` is the active node and :attr:`active` is its branch
188 column index. :attr:`width` is the number of current branch
191 :attr:`born`, :attr:`dead`, and :attr:`inherited` are lists of
192 `(branch_column_index, target_node)` pairs. `dead` lists nodes
193 from previous rows whose branches complete on this row,
194 `inherited` lists nodes from previous rows whose branches continue
195 through this row, and `born` list nodes whose branches start on
198 def __init__(self, node, active=-1, dead=None, inherited=None, born=None,
205 if inherited == None:
207 self.inherited = inherited
211 self.tip_to_root = tip_to_root
213 class GraphRowPrinter (object):
214 """Customizable printing for :class:`GraphRow`.
216 The string rendering can be customized by changing :attr:`chars`.
217 Control over the branch columns:
219 ================= ===========================================
220 `node: ...` the active (most recently inserted) node
221 `split/join: ...` branching/merging runs from the active node
222 `run: connected` connect a branch to its eventual node
223 `run: blank` place-holder for extinct runs
224 ================= ===========================================
226 Branch columns are seperated by separator columns:
228 ================= =======================================================
229 `sep: split/join` separate split/join branch columns from the active node
230 `sep: default` separate all remaining branch columns
231 ================= =======================================================
233 def __init__(self, chars=None):
236 'node: both tip and root': 'b',
239 'node: regular': '*',
240 'split/join: born and died left of active': '>',
241 'split/join: born and died right of active': '<',
242 'split/join: born left of active': '/',
243 'split/join: born right of active': '\\',
244 'split/join: died left of active': '\\',
245 'split/join: died right of active': '/',
247 'run: connected': '|',
248 'sep: split/join': '-',
252 def __call__(self, graph_row):
253 """Render the :class:`GraphRow` instance `graph_row` as a
256 dead = [i for i,node in graph_row.dead]
257 inherited = [i for i,node in graph_row.inherited]
258 born = [i for i,node in graph_row.born]
259 right_connect = max(graph_row.active,
260 max(born+[-1]), # +[-1] protects against empty born
262 left_connect = min(graph_row.active,
263 min(born+[right_connect]),
264 min(dead+[right_connect]))
265 max_col = max(right_connect, max(inherited+[-1]))
267 for i in range(max_col + 1):
268 # Get char, the node or branch column character.
269 if i == graph_row.active:
272 char = self.chars['node: both tip and root']
273 elif graph_row.tip_to_root == True:
274 # The dead are children
275 char = self.chars['node: root']
276 else: # The dead are parents
277 char = self.chars['node: tip']
279 if graph_row.tip_to_root == True:
280 # The born are parents
281 char = self.chars['node: tip']
282 else: # The born are children
283 char = self.chars['node: root']
285 char = self.chars['node: regular']
287 if i in dead: # born and died
288 if i < graph_row.active:
290 'split/join: born and died left of active']
293 'split/join: born and died right of active']
295 if i < graph_row.active:
296 char = self.chars['split/join: born left of active']
298 char = self.chars['split/join: born right of active']
299 elif i in dead: # just died
300 if i < graph_row.active:
301 char = self.chars['split/join: died left of active']
303 char = self.chars['split/join: died right of active']
305 char = self.chars['run: connected']
307 char = self.chars['run: blank']
308 # Get sep, the separation character.
309 if i < left_connect or i >= right_connect:
310 sep = self.chars['sep: default']
312 sep = self.chars['sep: split/join']
313 if char == self.chars['run: blank']:
314 char = self.chars['sep: split/join']
315 string.extend([char, sep])
316 return ''.join(string)[:-1] # [-1] strips final sep
318 class GraphRowGenerator (list):
319 """A :class:`GraphRow` generator.
321 Contains a list of :class:`GraphRow`\s (added with
322 :meth:`insert`(:class:`hooke.util.graph.Node`)). You should
323 generate a graph with repeated calls::
326 g = GraphRowGenerator(tip_to_root=tip_to_root)
327 p = GraphRowPrinter(tip_to_root=tip_to_root)
332 For the split/join branch columns, "born" and "dead" are defined
333 from the point of view of `GraphRow`. For root-to-tip ordering
334 (`tip_to_root==False`, the default), "born" runs are determined
335 by the active node's children (which have yet to be printed) and
336 "dead" runs by its parents (which have been printed). If
337 `tip_to_root==True`, swap "children" and "parents" in the above
340 def __init__(self, tip_to_root=False):
342 self.tip_to_root = tip_to_root
343 def insert(self, node):
344 """Add a new node to the graph.
346 If `tip_to_root==True`, nodes should be inserted in
347 tip-to-root topological order (i.e. node must be inserted
348 before any of its parents).
350 If `tip_to_root==False`, nodes must be inserted before any
354 previous = GraphRow(node=None, active=-1)
357 current = GraphRow(node=node, active=-1, tip_to_root=self.tip_to_root)
358 if self.tip_to_root == True: # children die, parents born
359 dead_nodes = list(current.node.children)
360 born_nodes = list(current.node)
361 else: # root-to-tip: parents die, children born
362 dead_nodes = list(current.node)
363 born_nodes = list(current.node.children)
364 # Mark the dead and inherited branch columns
365 for i,node in previous.inherited + previous.born:
366 if node in dead_nodes or node == current.node:
367 current.dead.append((i, node))
369 current.inherited.append((i, node))
370 # Place born and active branch columns
371 num_born = max(len(born_nodes), 1) # 1 to ensure slot for active node
372 remaining = num_born # number of nodes left to place
373 used_slots = [i for i,n in current.inherited]
374 old_max = max(used_slots+[-1]) # +[-1] in case used_slots is empty
375 slots = sorted([i for i in range(old_max+1) if i not in used_slots])
376 remaining -= len(slots)
377 slots.extend(range(old_max+1, old_max+1+remaining))
378 current.active = slots[0]
379 current.born = zip(slots, born_nodes)
380 # TODO: sharing branches vs. current 1 per child
385 """A directed, acyclic graph structure.
387 Contains methods for sorting and printing graphs.
392 >>> class Nodes (object): pass
394 >>> for char in ['a','b','c','d','e','f','g','h','i']:
395 ... setattr(n, char, Node(data=char))
399 >>> n.e.extend([n.b, n.c, n.d])
403 >>> n.i.extend([n.f, n.g, n.h])
404 >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
405 >>> g.topological_sort(tip_to_root=True)
406 >>> print [node for node in g]
407 [i, h, g, f, e, d, c, b, a]
408 >>> print g.ascii_graph()
418 >>> print g.ascii_graph(tip_to_root=True)
429 >>> for char in ['a','b','c','d','e','f','g','h']:
430 ... setattr(n, char, Node(data=char))
435 >>> n.f.extend([n.b, n.d])
436 >>> n.g.extend([n.e, n.f])
437 >>> n.h.extend([n.c, n.g])
438 >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h])
439 >>> print g.ascii_graph(tip_to_root=True)
449 >>> for char in ['a','b','c','d','e','f','g','h','i']:
450 ... setattr(n, char, Node(data=char))
451 >>> for char in ['a', 'b','c','d','e','f','g','h']:
452 ... nx = getattr(n, char)
454 >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
455 >>> print g.ascii_graph(tip_to_root=True)
466 >>> for char in ['a','b','c','d','e','f','g','h','i']:
467 ... setattr(n, char, Node(data=char))
468 >>> for char in ['b','c','d','e','f','g','h','i']:
469 ... nx = getattr(n, char)
471 >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
472 >>> print g.ascii_graph(tip_to_root=True)
483 >>> for char in ['a','b','c','d','e','f','g','h','i']:
484 ... setattr(n, char, Node(data=char))
486 >>> n.e.extend([n.a, n.c])
487 >>> n.f.extend([n.c, n.d, n.e])
488 >>> n.g.extend([n.b, n.e, n.f])
489 >>> n.h.extend([n.a, n.c, n.d, n.g])
490 >>> n.i.extend([n.a, n.b, n.c, n.g])
491 >>> g = Graph([n.a,n.b,n.c,n.d,n.e,n.f,n.g,n.h,n.i])
492 >>> print g.ascii_graph(tip_to_root=True)
496 | | | | | | | | *-\-\ f
497 | | | | | | | *-|-|-< e
498 | | | | | | *-|-|-/ | d
499 | | r-|-|-/-|-|-/---/ c
503 Ok, enough pretty graphs ;). Here's an example of cycle
506 >>> for char in ['a','b','c','d']:
507 ... setattr(n, char, Node(data=char))
510 >>> n.d.extend([n.b, n.c])
512 >>> g = Graph([n.a,n.b,n.c,n.d])
513 >>> g.check_for_cycles()
514 Traceback (most recent call last):
516 CyclicGraphError: cycle detected:
522 def set_children(self):
523 """Fill out each node's :attr:`Node.children` list.
527 if node not in parent.children:
528 parent.children.append(node)
530 def topological_sort(self, tip_to_root=False):
531 """Algorithm from git's commit.c `sort_in_topological_order`_.
533 Default ordering is root-to-tip. Set `tip_to_root=True` for
536 In situations where topological sorting is ambiguous, the
537 nodes are sorted using the node comparison functions (__cmp__,
538 __lt__, ...). If `tip_to_root==True`, the inverse
539 comparison functions are used.
541 .. _sort_in_topological_order:
542 http://git.kernel.org/?p=git/git.git;a=blob;f=commit.c;h=731191e63bd39a89a8ea4ed0390c49d5605cdbed;hb=HEAD#l425
544 # sort tip-to-root first, then reverse if neccessary
549 parent._outcount += 1
550 tips = sorted([node for node in self if node._outcount == 0])
556 parent._outcount -= 1
557 if parent._outcount == 0:
558 bisect.insort(tips, parent)
561 final_len = len(self)
562 if final_len != orig_len:
563 raise CyclicGraphError(
564 '%d of %d elements not reachable from tips'
565 % (orig_len - final_len, orig_len))
566 if tip_to_root == False:
569 def check_for_cycles(self):
570 """Check for cyclic references.
573 if node in node.ancestors():
574 path = node.parent_path(node)
575 raise CyclicGraphError(
576 'cycle detected:\n %s'
577 % '\n '.join([repr(node)]+[repr(node) for node in path]))
579 def graph_rows(self, tip_to_root=False):
580 """Generate a sequence of (`graph_row`, `node`) tuples.
582 Preforms :meth:`set_children` and :meth:`topological_sort`
585 graph_row_generator = GraphRowGenerator(tip_to_root=tip_to_root)
587 self.topological_sort(tip_to_root=tip_to_root)
589 graph_row_generator.insert(node)
590 yield (graph_row_generator[-1], node)
592 def ascii_graph(self, graph_row_printer=None, string_fn=str,
594 """Print an ascii graph on the left with `string_fn(node)` on
595 the right. If `graph_row_printer` is `None`, a default
596 instance of :class:`GraphRowPrinter` will be used.
598 See the class docstring for example output.
600 if graph_row_printer == None:
601 graph_row_printer = GraphRowPrinter()
603 for row,node in self.graph_rows(tip_to_root=tip_to_root):
604 graph.append('%s %s' % (graph_row_printer(row), string_fn(node)))
605 return '\n'.join(graph)