From e3b364ab477de66743d239a72ca72fbd45510075 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Thu, 30 Sep 2010 12:46:28 -0400 Subject: [PATCH] My initial updates to depgraph2dot.py and py2depgraph.py. Perks: * module blacklist/whitelist via INVISIBLE_MODS and VISIBLE_PATHS * general refactoring --- README | 13 ++ depgraph2dot.py | 491 +++++++++++++++++++++++++++++++++++------------- py2depgraph.py | 9 +- 3 files changed, 379 insertions(+), 134 deletions(-) create mode 100644 README diff --git a/README b/README new file mode 100644 index 0000000..cbc6e5f --- /dev/null +++ b/README @@ -0,0 +1,13 @@ +Originals from: "Toby Dickenson" +http://tarind.com/depgraph.html + +Generate python module dependency graphs using dot (graphviz package) +$ python py2depgraph.py path/to/my/script.py | python depgraph2dot.py | dot -T png -o depgraph.png + +I added some code to also save the path of any modules, +so it would be easier to only print modules with particular paths, +or to make the boundary between python and other languages more clear. + +Now the default is to only print modules I wrote or those involving comedi, +print shared extension modules in blue, +and the shared libraries in green. diff --git a/depgraph2dot.py b/depgraph2dot.py index bbebf18..594e486 100644 --- a/depgraph2dot.py +++ b/depgraph2dot.py @@ -1,4 +1,7 @@ -# Copyright 2004 Toby Dickenson +#!/usr/bin/python +# +# Copyright 2004 Toby Dickenson +# Copyright 2008-2010 W. Trevor King # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -19,178 +22,404 @@ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +import sys, getopt, colorsys, imp, md5, re, pprint +from os import popen, getuid # for finding C extension dependencies with system calls +from pwd import getpwuid -import sys, getopt, colorsys, imp, md5 +USER=getpwuid(getuid())[0] # get effective user name -class pydepgraphdot: +INVISIBLE_MODS=('__future__','copy','doctest','glob','optparse','os','qt','re', + 'StringIO','string','sys','textwrap','time','types','unittest') +INVISIBLE_PATHS=(r'.*',) +VISIBLE_PATHS=(r'.*%s.*' % USER,r'.*comedi.*') - def main(self,argv): - opts,args = getopt.getopt(argv,'',['mono']) - self.colored = 1 - for o,v in opts: - if o=='--mono': - self.colored = 0 - self.render() +def _pathmatch(regexp_tuple, path) : + "Check if a regexp in regexp tuple matches the string path" + for regexp in regexp_tuple : + if re.match(regexp, path) != None : + return True + return False - def fix(self,s): - # Convert a module name to a syntactically correct node name - return s.replace('.','_') - - def render(self): - p,t = self.get_data() - - # normalise our input data - for k,d in p.items(): - for v in d.keys(): - if not p.has_key(v): - p[v] = {} - - f = self.get_output_file() - - f.write('digraph G {\n') - #f.write('concentrate = true;\n') - #f.write('ordering = out;\n') - f.write('ranksep=1.0;\n') - f.write('node [style=filled,fontname=Helvetica,fontsize=10];\n') - allkd = p.items() - allkd.sort() - for k,d in allkd: - tk = t.get(k) - if self.use(k,tk): - allv = d.keys() - allv.sort() - for v in allv: - tv = t.get(v) - if self.use(v,tv) and not self.toocommon(v,tv): - f.write('%s -> %s' % ( self.fix(k),self.fix(v) ) ) - self.write_attributes(f,self.edge_attributes(k,v)) - f.write(';\n') - f.write(self.fix(k)) - self.write_attributes(f,self.node_attributes(k,tk)) - f.write(';\n') - f.write('}\n') - - def write_attributes(self,f,a): - if a: - f.write(' [') - f.write(','.join(a)) - f.write(']') - - def node_attributes(self,k,type): +class hooks (object) : + """ + Modules show up if visible_mod_test(...) == True. + + """ + def __init__(self, + invisible_mods=INVISIBLE_MODS, + invisible_paths=INVISIBLE_PATHS, + visible_paths=VISIBLE_PATHS, + link_outside_visited_nodes=True, + ignore_builtins=True) : + self._invisible_mods = invisible_mods + self._invisible_paths = invisible_paths + self._visible_paths = visible_paths + self._link_outside_visited_nodes = link_outside_visited_nodes + self._ignore_builtins = ignore_builtins + self._entered_bonus_nodes = {} # a dict of bonus nodes already printed + self._debug = False + def continue_test(self) : + return True + def visible_mod_test(self, mod_name, dep_dict, type, path, + check_external_link=True) : + """ + Return true if this module is interesting and should be drawn. + Return false if it should be completely omitted. + """ + if self._invisible_name(mod_name) == True : + if self._debug : print "\t\tinvisible module", mod_name + return False + if self._link_outside_visited_nodes == False \ + and check_external_link == True \ + and mod_name != '__main__' : + if self.follow_edge_test('__main__', imp.PY_SOURCE, './dummy.py', + mod_name, type, path) == False: + return False # don't draw nodes we wouldn't visit + return True + def follow_edge_test(self, module_name, type, path, + dname, dtype, dpath): + if self._debug : + print "\ttesting edge from %s %s %s to %s %s %s" \ + % (module_name, type, path, dname, dtype, dpath) + if self.visible_mod_test(dname, None, dtype, dpath, + check_external_link=False) == False : + if self._debug : print "\t\tinvisible target module" + return False # don't draw edges to invisible modules + elif dname == '__main__': + # references *to* __main__ are never interesting. omitting them means + # that main floats to the top of the page + if self._debug : print "\t\ttarget is __main__" + return False + elif self._invisible_path(path) == True and module_name != '__main__' : + # the path for __main__ seems to be it's filename + if self._debug : print "\t\tinvisible module parent path", path + return False # don't draw edges from invisible path modules + elif self._link_outside_visited_nodes == False \ + and self._invisible_path(dpath) == True : + if self._debug : print "\t\tinvisible module path", dpath + return False # don't draw edges to invisible path modules + elif dtype == imp.PKG_DIRECTORY: + # don't draw edges to packages. + if self._debug : print "\t\tpackage" + return False + return True + def _invisible_name(self, mod_name) : + if mod_name in self._invisible_mods : + # nearly all modules use all of these... more or less. + # They add nothing to our diagram. + return True + return False + def _invisible_path(self, path) : + """ + Paths are visible by default. Adding a regexp to invisible_paths hides + matching paths, unless the path matches a regexp in visible_paths, in + which case it is again visible. + """ + if path == None and self._ignore_builtins : + return True # ignore modules without paths (builtins, etc) + if (_pathmatch(self._invisible_paths, path) + and not _pathmatch(self._visible_paths, path)): + return True + return False + +class dotformat (object) : + def __init__(self, colored=True, hooks_instance=None) : + if hooks_instance != None : + self._hooks = hooks_instance + else : + self._hooks = hooks() + self._colored = colored + def header(self): + return ('digraph G {\n' + #' concentrate = true;\n' + #' ordering = out;\n' + ' ranksep=1.0;\n' + ' node [style=filled,fontname=Helvetica,fontsize=10];\n') + def footer(self): + return '}\n' + def module(self, mod_name, dep_dict, type, path) : + name = self._fix_name(mod_name) a = [] - a.append('label="%s"' % self.label(k)) - if self.colored: - a.append('fillcolor="%s"' % self.color(k,type)) + if mod_name == '__main__' : + # the path for __main__ seems to be it's filename + a.append('label="%s"' % self._label(path)) + else : + a.append('label="%s"' % self._label(mod_name)) + if self._colored: + a.append('fillcolor="%s"' % self._color(mod_name,type)) else: a.append('fillcolor=white') - if self.toocommon(k,type): + if self._hooks._invisible_path(path): + # for printing `invisible' modules a.append('peripheries=2') - return a - - def edge_attributes(self,k,v): + return self._dot_node(name, a) + def edge(self, mod_name, dep_dict, type, path, + dep_name, dep_type, dep_path) : + name = self._fix_name(mod_name) + target = self._fix_name(dep_name) a = [] - weight = self.weight(k,v) + weight = self._weight(mod_name,dep_name) if weight!=1: a.append('weight=%d' % weight) - length = self.alien(k,v) + length = self._alien(mod_name) if length: a.append('minlen=%d' % length) - return a - - def get_data(self): - t = eval(sys.stdin.read()) - return t['depgraph'],t['types'] - - def get_output_file(self): - return sys.stdout - - def use(self,s,type): - # Return true if this module is interesting and should be drawn. Return false - # if it should be completely omitted. This is a default policy - please override. - if s in ('os','sys','qt','time','__future__','types','re','string'): - # nearly all modules use all of these... more or less. They add nothing to - # our diagram. - return 0 - if s.startswith('encodings.'): - return 0 - if s=='__main__': - return 1 - if self.toocommon(s,type): - # A module where we dont want to draw references _to_. Dot doesnt handle these - # well, so it is probably best to not draw them at all. - return 0 - return 1 + return self._dot_edge(name, target, a) - def toocommon(self,s,type): - # Return true if references to this module are uninteresting. Such references - # do not get drawn. This is a default policy - please override. - # - if s=='__main__': - # references *to* __main__ are never interesting. omitting them means - # that main floats to the top of the page - return 1 - if type==imp.PKG_DIRECTORY: - # dont draw references to packages. - return 1 - return 0 - - def weight(self,a,b): + def _fix_name(self, mod_name): + # Convert a module name to a syntactically correct node name + return mod_name.replace('.','_') + def _label(self,s): + # Convert a module name to a formatted node label. + return '\\.\\n'.join(s.split('.')) + def _weight(self, mod_name, target_name): # Return the weight of the dependency from a to b. Higher weights # usually have shorter straighter edges. Return 1 if it has normal weight. # A value of 4 is usually good for ensuring that a related pair of modules - # are drawn next to each other. This is a default policy - please override. + # are drawn next to each other. # - if b.split('.')[-1].startswith('_'): + if target_name.split('.')[-1].startswith('_'): # A module that starts with an underscore. You need a special reason to # import these (for example random imports _random), so draw them close # together return 4 return 1 - - def alien(self,a,b): + def _alien(self, mod_name): # Return non-zero if references to this module are strange, and should be drawn # extra-long. the value defines the length, in rank. This is also good for putting some - # vertical space between seperate subsystems. This is a default policy - please override. - # + # vertical space between seperate subsystems. return 0 - - def label(self,s): - # Convert a module name to a formatted node label. This is a default policy - please override. - # - return '\\.\\n'.join(s.split('.')) - - def color(self,s,type): - # Return the node color for this module name. This is a default policy - please override. - # + def _color(self, mod_name, type): + # Return the node color for this module name. + if type == imp.C_EXTENSION: + # make C extensions bluegreen + # bluegreen is at 180 deg, see http://en.wikipedia.org/wiki/Image:HueScale.svg + r,g,b = colorsys.hsv_to_rgb(180.0/360.0, .2, 1) + return '#%02x%02x%02x' % (r*255,g*255,b*255) # Calculate a color systematically based on the hash of the module name. Modules in the # same package have the same color. Unpackaged modules are grey - t = self.normalise_module_name_for_hash_coloring(s,type) - return self.color_from_name(t) - - def normalise_module_name_for_hash_coloring(self,s,type): + t = self._normalise_module_name_for_hash_coloring(mod_name,type) + return self._color_from_name(t) + def _normalise_module_name_for_hash_coloring(self,mod_name,type): if type==imp.PKG_DIRECTORY: - return s + return mod_name else: - i = s.rfind('.') + i = mod_name.rfind('.') if i<0: return '' else: - return s[:i] - - def color_from_name(self,name): + return mod_name[:i] + def _color_from_name(self,name): n = md5.md5(name).digest() hf = float(ord(n[0])+ord(n[1])*0xff)/0xffff sf = float(ord(n[2]))/0xff vf = float(ord(n[3]))/0xff r,g,b = colorsys.hsv_to_rgb(hf, 0.3+0.6*sf, 0.8+0.2*vf) return '#%02x%02x%02x' % (r*256,g*256,b*256) + + # abstract out most of the dot language for head and edge declarations + def _dot_node(self, name, attrs) : + string = ' %s' % self._fix_name(name) + string += self._attribute_string(attrs) + string += ';\n' + return string + def _dot_edge(self, source, target, attrs) : + string = ' %s -> %s' % (source, target) + string += self._attribute_string(attrs) + string += ';\n' + return string + def _attribute_string(self, attributes): + string = '' + if attributes: + string += ' [%s]' % (','.join(attributes)) + return string +class dotformat_Cext (dotformat) : + # support for listing C-language extension code. + _visible_paths = VISIBLE_PATHS + def module(self, mod_name, dep_dict, type, path) : + name = self._fix_name(mod_name) + a = [] + if mod_name == '__main__' : + # the path for __main__ seems to be it's filename + a.append('label="%s"' % self._label(path)) + else : + a.append('label="%s"' % self._label(mod_name)) + if self._colored: + a.append('fillcolor="%s"' % self._color(mod_name,type)) + else: + a.append('fillcolor=white') + if self._hooks._invisible_path(path): + # for printing `invisible' modules + a.append('peripheries=2') + string = self._dot_node(name, a) + #print "type %s:\t%s\t(%s)" % (mod_name, type, imp.C_EXTENSION) + if type == imp.C_EXTENSION: + string += self._Cext_depend_dotstring(mod_name, path) + return string + def _Cext_depend_dotstring(self, mod_name, path) : + deps = self._Cext_depends(mod_name, path) + string = "" + for dep in deps : + edge_attrs = self._Cext_edge_attributes(mod_name, dep) + string += self._dot_node(dep, self._Cext_node_attributes(dep)) + string += self._dot_edge(mod_name, dep, edge_attrs) + return string + def _Cext_depends(self, s, path): + "Return a list of dependencies for a shared object file" + # make sure the extension is a shared object file (sanity check) + ret = [] + if path.find('.so') != len(path)-len('.so'): + return ret + for line in popen('ldd %s' % path, 'r') : + try: # ldd line: soname [=> path] (address) + soname = line.split('=>')[0].strip() + sopath = line.split('=>')[1].split('(')[0].strip() + except IndexError: + continue # irregular dependency (kernel?) + if _pathmatch(self._visible_paths, path) : + ret.append(soname) + return ret -def main(): - pydepgraphdot().main(sys.argv[1:]) + def _Cext_edge_attributes(self, mod_name, dep_name): + return [] # nothing for now... -if __name__=='__main__': - main() + def _Cext_node_attributes(self, dep_name): + a = [] + a.append('label="%s"' % self._label(dep_name)) + if self._colored: + a.append('fillcolor="%s"' % self._Cext_depcolor(dep_name)) + else: + a.append('fillcolor=white') + return a + + def _Cext_depcolor(self, dep_name): + # make extension dependencies green + r,g,b = colorsys.hsv_to_rgb(120.0/360.0, .2, 1) # green is at 120 deg, see http://en.wikipedia.org/wiki/Image:HueScale.svg + return '#%02x%02x%02x' % (r*255,g*255,b*255) + + + + +class pydepgraphdot (object) : + def __init__(self, hooks_instance=None, dotformat_instance=None) : + if dotformat_instance != None : + self._dotformat = dotformat_instance + else : + self._dotformat = dotformat() + if hooks_instance != None : + self._hooks = hooks_instance + else : + self._hooks = hooks() + self.reset() + self._debug=False + + def render(self, root_module='__main__'): + depgraph,types,paths = self.get_data() + if root_module != None : + self.add_module_target(root_module) + + depgraph,type,paths = self.fill_missing_deps(depgraph, types, paths) + + f = self.get_output_file() + + f.write(self._dotformat.header()) + + while True : + if self._hooks.continue_test() == False : + if self.debug : print '\t\tcontinue_test() False' + break + mod = self.next_module_target() + if mod == None : + if self._debug : print '\t\tout of modules' + break # out of modules + # I don't know anything about the underlying implementation, + # but I assume `key in dict` is more efficient than `key in list` + # because dicts are inherently hashed. + # That's my excuse for passing around deps with dummy values. + deps = depgraph[mod] + type = types[mod] + path = paths[mod] + if self._hooks.visible_mod_test(mod, deps, type, path) == False : + if self._debug : print '\t\tinvisible module' + continue + f.write(self._dotformat.module(mod, deps, type, path)) + ds = deps.keys() # now we want a consistent ordering, + ds.sort() # so pull out the keys and sort them + for d in ds : + if self._hooks.follow_edge_test(mod, type, path, + d, types[d], paths[d]) : + if self._debug : print '\t\tfollow to %s' % d + #print "%s, %s, %s, %s, %s, %s, %s" % (mod, deps, type, path, d, types[d], paths[d]) + f.write(self._dotformat.edge(mod, deps, type, path, + d, types[d], paths[d])) + self.add_module_target(d) + else : + if self._debug : print "\t\tdon't follow to %s" % d + + f.write(self._dotformat.footer()) + + # data processing methods (input, output, checking) + def get_data(self): + t = eval(sys.stdin.read()) + return t['depgraph'],t['types'],t['paths'] + def get_output_file(self): + return sys.stdout + def fill_missing_deps(self, depgraph, types, paths) : + # normalize our input data + for mod,deps in depgraph.items(): # module and it's dependencies + for dep in deps.keys(): + if not depgraph.has_key(dep): + # if dep not listed in depgraph somehow... + # add it in, with no further dependencies + depgraph[dep] = {} + # add dummies to types and paths too, if neccessary + if not dep in types : + types[dep] = None + if not dep in paths : + paths[dep] = None + if self._debug : + print "Adding dummy entry for missing module '%s'" \ + % dep + return (depgraph, types, paths) + + # keep a list of modules for a breadth-first search. + def reset(self) : + # create stacks of nodes for traversing the mesh + self._modules_todo = [] + self._modules_entered = [] + def add_module_target(self, target_module) : + if not target_module in self._modules_entered : + # add to the back of the stack + if self._debug : print '\tpush', target_module + self._modules_todo.append(target_module) + self._modules_entered.append(target_module) + # otherwise, it's already on the list, so don't worry about it. + def next_module_target(self) : + if len(self._modules_todo) > 0 : + if self._debug : print '\tpop', self._modules_todo[0] + return self._modules_todo.pop(0) # remove from front of the list + else : + return None # no more modules! we're done. + + +def main(): + opts,args = getopt.getopt(sys.argv,'',['mono']) + colored = True + for o,v in opts: + if o=='--mono': + colored = False + # Fancyness with shared hooks instance so we can do slick thinks like + # printing all modules just inside an invisible zone, since we'll need + # the dotformatter to know which nodes are visible. + hk = hooks(link_outside_visited_nodes=False) + #hk._debug = True + dt = dotformat_Cext(colored=colored, hooks_instance=hk) + py = pydepgraphdot(hooks_instance=hk, dotformat_instance=dt) + #py._debug = True + py.render() +if __name__=='__main__': + main() diff --git a/py2depgraph.py b/py2depgraph.py index 5e97e06..f2bfe29 100644 --- a/py2depgraph.py +++ b/py2depgraph.py @@ -1,4 +1,5 @@ -# Copyright 2004 Toby Dickenson +# Copyright 2004 Toby Dickenson +# Copyright 2008-2010 W. Trevor King # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -26,6 +27,7 @@ class mymf(modulefinder.ModuleFinder): def __init__(self,*args,**kwargs): self._depgraph = {} self._types = {} + self._paths = {} self._last_caller = None modulefinder.ModuleFinder.__init__(self,*args,**kwargs) @@ -47,6 +49,7 @@ class mymf(modulefinder.ModuleFinder): r = modulefinder.ModuleFinder.load_module(self, fqname, fp, pathname, (suffix, mode, type)) if r is not None: self._types[r.__name__] = type + self._paths[r.__name__] = pathname return r @@ -56,7 +59,7 @@ def main(argv): exclude = [] mf = mymf(path,debug,exclude) mf.run_script(argv[0]) - pprint.pprint({'depgraph':mf._depgraph,'types':mf._types}) + pprint.pprint({'depgraph':mf._depgraph,'types':mf._types,'paths':mf._paths}) if __name__=='__main__': - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:]) -- 2.26.2