From: W. Trevor King Date: Fri, 28 Sep 2012 04:12:25 +0000 (-0400) Subject: posts:yacc2dot: edit the post, update script to Python 3, and rename to .py. X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=f375ab82577db4f046a065a7623fc1269a542c1c;p=blog.git posts:yacc2dot: edit the post, update script to Python 3, and rename to .py. --- diff --git a/posts/yacc2dot.mdwn b/posts/yacc2dot.mdwn index cea1306..5edf5ce 100644 --- a/posts/yacc2dot.mdwn +++ b/posts/yacc2dot.mdwn @@ -31,9 +31,8 @@ meander through the [Bison manual][bison]. Tools ----- -The following graphics were generated from the `y.output` -files output by `yacc` using [my yacc2dot script](#yacc2dot) and -[Graphviz dot][graphviz]. +The following graphics were generated from the `y.output` files output +by `yacc` using [[yacc2dot.py]] script and [Graphviz dot][graphviz]. Example 1: Single, explicit addition ------------------------------------ @@ -134,13 +133,6 @@ which generates the following `yacc` output and `dot` image: alt="Visualization of implicit_multiplication grammar" title="Visualization of implicit multiplication grammar" ]] - -yacc2dot --------- - -I'll post my `yacc2dot` code once I polish it up a little bit. -Take a look at [yaccviso][] if you're impatient. - [DB]: http://www.dabeaz.com/ply/ply.html#ply_nn23 [SJ]: http://dinosaur.compilertools.net/yacc/index.html diff --git a/posts/yacc2dot/yacc2dot b/posts/yacc2dot/yacc2dot deleted file mode 100755 index 35c40ee..0000000 --- a/posts/yacc2dot/yacc2dot +++ /dev/null @@ -1,584 +0,0 @@ -#!/usr/bin/python -# -# convert yacc's verbose output to a dot-language diagram - -from sys import stdin, stdout, stderr - -DOT_HEAD = """ -digraph yacc { - - epsilon = 0.01; - nodesep = 0.2; /* inches */ - - node [shape = record]; - -""" - -DOT_GOTO_EDGE = """ - edge [color = blue]; -""" - -DOT_REDUCE_EDGE = """ - edge [color = red]; -""" - -DOT_TAIL = "}\n" - - - - -class action (object) : - """ - Parse the text defining a given rule: - >>> a = action(' TEXT shift, and go to state 1') - >>> print a.key - TEXT - >>> print a.action - shift - >>> a = action(' $default accept') - >>> print a.action - accept - >>> a = action(' $default reduce using rule 1 (sum)') - >>> print a.action - reduce - >>> print a.reduce_rule - 1 - >>> a = action(' PLUS shift, and go to state 3') - >>> print a.goto - 3 - >>> a = action(' sum go to state 2') - >>> print a.action - go - >>> print a.goto - 2 - """ - def __init__(self, string) : - self.rule = string - split = string.split() - self.key = split[0] - if split[1][-1] == ',' : - self.action = split[1][0:-1] - elif split[1][0] == '[': - self.action = 'IGNORED' - else : - self.action = split[1] - - if self.action == 'reduce' : - self.reduce_rule = int(split[-2]) - elif self.action == 'shift' : - self.goto = int(split[-1]) - elif self.action == 'go' : - self.goto = int(split[-1]) - def __str__(self) : - if self.action == 'go' or self.action == 'shift': - details = ', goto %d' % self.goto - elif self.action == 'reduce' : - details = ', rule %d' % self.reduce_rule - elif self.action == 'accept' : - details = '' - elif self.action == 'IGNORED' : - details = '' - else : - raise Exception, 'wierd action %s' % self.action - - return '' % (self.key, self.action, details) - def __repr__(self) : - return self.__str__() - -class state (object) : - """ - Parse the text defining a given state: - >>> state_text = '''state 0 - ... - ... 0 $accept: . authors $end - ... - ... TEXT shift, and go to state 1 - ... LBRACE shift, and go to state 2 - ... - ... authors go to state 3 - ... author go to state 4 - ... text go to state 5 - ... - ... ''' - >>> s = state(state_text) - >>> print s.id - 0 - >>> print s.rules - [(0, '$accept: . authors $end')] - >>> print s.tok_act - [, ] - >>> print s.rule_act - [, , ] - >>> print s.goes_to() - [1, 2, 3, 4, 5] - """ - def __init__(self, state_text, debug=False) : - # because the output of `yacc -v grammar.y' is so regular, - # we'll sneak by with a HACK parsing job ;). - self.id = None - self.rules = [] # list of "rule string"s ###(, , [], pos) - self.tok_act = [] - self.rule_act = [] - lookfor = 'state' - hadtext = False # so consecutive blank lines don't keep switching lookfor state - for line in state_text.splitlines() : - if debug : print >> stderr, '%s : %s' % (lookfor, line) - split = line.split() - if len(split) < 1 : # blank line - if hadtext == False : - continue # ignore - elif lookfor == 'rules' : - lookfor = 'tok-act' - continue - elif lookfor == 'tok-act' : - lookfor = 'rule-act' - continue - elif split[0] == 'state' : # 'state ' - assert lookfor == 'state', "Multiple states in '%s'" % state_text - assert len(split) == 2, "%d items in state line '%s'" % line - self.id = int(split[1]) # store as self.id - lookfor = 'rules' - hadtext = False - continue - elif lookfor == 'rules' : # e.g ' 2 expr: expr . PLUS expr' - rule_id = int(split[0]) - rule_text = split[1:] - if rule_text[0] == '|' : # e.g. second line of - # expr: X - # | expr PLUS expr - rule_text.pop(0) - prev_rule = self.rules[len(self.rules)-1] - p_text_split = prev_rule[1].split() - rule_text.insert(0, p_text_split[0]) - self.rules.append((rule_id, " ".join(rule_text))) - elif lookfor == 'tok-act' : - self.tok_act.append(action(line)) - elif lookfor == 'rule-act' : - self.rule_act.append(action(line)) - else : - raise Exception, "Unrecognized lookfor '%s' for line '%s'" % (lookfor, line) - hadtext = True - def __cmp__(self, other) : - return cmp(self.id, other.id) - def __str__(self) : - return '' % self.id - def __repr__(self) : - return self.__str__() - def goes_to(self) : - "Return a list of integer states that this node goes to (not reduces to)." - ret = [] - for a in self.tok_act : - if hasattr(a, 'goto') : - ret.append(a.goto) - for a in self.rule_act : - if hasattr(a, 'goto') : - ret.append(a.goto) - return ret - -class tnode (object) : - """ - Tree structure, with backlinks. - >>> root = tnode("root") - >>> root.add_child("child0") - >>> root.add_child("child1") - >>> root.child[0].add_child("child00") - >>> root.child[0].add_child("child01") - >>> print root.child[0].child - [, ] - >>> print root.child[0].parent - - >>> print root.find("child01") - - """ - def __init__(self, value=None, parent=None) : - self.value = value - self.parent = parent - self.child = [] - def add_child(self, child_value) : - self.child.append(tnode(child_value, self)) - def build(self, fn_get_children) : - "use gn_get_children(value) = [values' children] to recursively build the tree" - children = fn_get_children(self.value) - for child in children : - self.add_child(child) - for cnode in self.child : - cnode.build(fn_get_children) - def find(self, value) : - "depth first search" - if self.value == value : - return self - else : - for cnode in self.child : - match = cnode.find(value) - if match != None : - return match - return None - def __str__(self) : - string = '' % str(self.value) - return string - def __repr__(self) : - return self.__str__() - -class yaccout (object) : - """ - Parse the output of `yacc -v file.y' - For example, for sum.y = - %token X - %token PLUS - %% - sum : X PLUS X - We get: - >>> yacc_text = ''' - ... Grammar - ... - ... 0 $accept: sum $end - ... - ... 1 sum: X PLUS X - ... - ... - ... Terminals, with rules where they appear - ... - ... $end (0) 0 - ... error (256) - ... X (258) 1 - ... PLUS (259) 1 - ... - ... - ... Nonterminals, with rules where they appear - ... - ... $accept (5) - ... on left: 0 - ... sum (6) - ... on left: 1, on right: 0 - ... - ... - ... state 0 - ... - ... 0 $accept: . sum $end - ... - ... X shift, and go to state 1 - ... - ... sum go to state 2 - ... - ... - ... state 1 - ... - ... 1 sum: X . PLUS X - ... - ... PLUS shift, and go to state 3 - ... - ... - ... state 2 - ... - ... 0 $accept: sum . $end - ... - ... $end shift, and go to state 4 - ... - ... - ... state 3 - ... - ... 1 sum: X PLUS . X - ... - ... X shift, and go to state 5 - ... - ... - ... state 4 - ... - ... 0 $accept: sum $end . - ... - ... $default accept - ... - ... - ... state 5 - ... - ... 1 sum: X PLUS X . - ... - ... $default reduce using rule 1 (sum) - ... ''' - >>> y = yaccout(yacc_text) - >>> print y.states - [, , , , , ] - >>> print y.states[0].tok_act - [] - >>> print y.root.child[0].child - [>] - >>> print y.root.child[0].parent - > - """ - def __init__(self, yacc_text, debug=False): - # because the output of `yacc -v grammar.y' is so regular, - # we'll sneak by with a HACK parsing job ;). - self.read_states(yacc_text, debug=debug) - #self.gen_tree() - #self.reduce_link(debug=debug) - def read_states(self, yacc_text, debug=False): - "read in all the states" - self.states = [] - lookfor = 'first state' - for line in yacc_text.splitlines() : - split = line.split(' ') - if lookfor == 'first state' : - if split[0] != 'state' : - continue - else : - lookfor = 'states' - state_text = [line] - continue - else : - if split[0] == 'state' : # we've hit the next state - self.states.append(state('\n'.join(state_text), debug=debug)) - state_text = [line] - continue - else : - state_text.append(line) - # end of file, so process the last state - self.states.append(state('\n'.join(state_text), debug=debug)) - def state_goes_to(self, state) : - ret = [] - for id in state.goes_to() : - for s in self.states : - if s.id == id : - ret.append(s) - return ret - def gen_tree(self): - "generate the state dependency tree" - self.root = tnode(self.states[0]) - self.root.build(self.state_goes_to) - def reduce_action(self, state, action, debug): - if action.action == 'reduce' : - if debug : print >> stderr, 'reduce from %s with rule %d::' % (state, action.reduce_rule), - # find the rule text - rule = None - for r in state.rules : - if r[0] == action.reduce_rule : - rule = r[1] # e.g. 'sum: X PLUS X .' - if debug : print >> stderr, rule - # find the number of args in the rule - split = rule.split() - args = len(split) - 2 # 2 for 'sum:' and '.' - # find the state in the tnode tree - tnode = self.root.find(state) - for i in range(args) : # take arg steps up the tree - tnode = tnode.parent - tstate = tnode.value # reduction target state - action.reduce_to = tstate.id # state we reduce to - action.reduce_targ = split[0][0:-1] # rule we reduce to 'sum' - i = 0 - for a in tstate.tok_act : - if a.key == action.reduce_targ : - action.reduce_targ_i = i - i += 1 - for a in tstate.rule_act : - if a.key == action.reduce_targ : - action.reduce_targ_i = i - i += 1 - if debug : print >> stderr, 'to state %d' % action.reduce_to - def reduce_link(self, debug=False): - "generate the reduce_to backlinks" - for state in self.states : - for a in state.tok_act : - self.reduce_action(state, a, debug) - for a in state.rule_act : - self.reduce_action(state, a, debug) - - -def dot_state (state) : - """ - Print a dot node for a given state. - Node type must be 'record'. - layout: - state %d - rules - ... - a0 | a1 | ... - >>> state_text = '''state 0 - ... - ... 0 $accept: . authors $end - ... - ... TEXT shift, and go to state 1 - ... LBRACE shift, and go to state 2 - ... - ... authors go to state 3 - ... author go to state 4 - ... text go to state 5 - ... - ... ''' - >>> s = state(state_text) - >>> print dot_state(s), - state0 [label = "{ state 0 | { TEXT | LBRACE | authors | author | text } }"]; - """ - label = '{ state %d | ' % state.id - for rule in state.rules : - label += '(%d) %s\\n' % (rule[0], rule[1]) - label += ' | {' - a = 0 - for action in state.tok_act[:-1] : - label += ' %s |' % (a, action.key,) - a += 1 - if len(state.tok_act) > 0 : - label += ' %s' % (a, state.tok_act[-1].key) - a += 1 - for action in state.rule_act : - label += ' | %s' % (a, action.key) - a += 1 - label += ' } }' - - string = ' state%d [label = "%s"];\n' % (state.id, label) - return string - -def dot_goto(id, a, action) : - """ - Print dot links for a given action (action a for state id). - >>> a = action(' TEXT shift, and go to state 1') - >>> print dot_goto(5,8,a), - state5:a8 -> state1:s; - >>> a = action(' $default reduce using rule 1 (sum)') - >>> print dot_goto(0,1,a), - - >>> a = action(' sum go to state 2') - >>> print dot_goto(0,1,a), - state0:a1 -> state2:s; - """ - if hasattr(action, 'goto') : - string = ' state%d:a%d -> state%d:s;\n' % (id, a, action.goto) - else : - string = '' - return string - - -def dot_gotos(state) : - """ - Print dot links for a given state. - >>> state_text = '''state 0 - ... - ... 0 $accept: . authors $end - ... - ... TEXT shift, and go to state 1 - ... LBRACE reduce using rule 1 (braces) - ... - ... authors go to state 3 - ... author go to state 4 - ... text go to state 5 - ... - ... ''' - >>> s = state(state_text) - >>> print dot_gotos(s), - state0:a0 -> state1:s; - state0:a2 -> state3:s; - state0:a3 -> state4:s; - state0:a4 -> state5:s; - """ - string = "" - a = 0 - for action in state.tok_act : - string += dot_goto(state.id, a, action) - a += 1 - for action in state.rule_act : - string += dot_goto(state.id, a, action) - a += 1 - return string - -def dot_reduce(id, a, action) : - """ - Print dot reduce links for a reduction action (action a for state id). - """ - if action.action == 'reduce' : - string = ' state%d:a%d -> state%d:a%d;\n' % (id, a, action.reduce_to, action.reduce_targ_i) - else : - string = '' - return string - - -def dot_reduces(state) : - """ - Print dot reduce links for a given state. - """ - string = "" - a = 0 - for action in state.tok_act : - string += dot_reduce(state.id, a, action) - a += 1 - for action in state.rule_act : - string += dot_reduce(state.id, a, action) - a += 1 - return string - -def yacc2dot(yaccout) : - string = DOT_HEAD - string += "\n" - for state in yaccout.states : - string += dot_state(state) - string += "\n" - string += DOT_GOTO_EDGE - for state in yaccout.states : - string += dot_gotos(state) - string += "\n" - string += DOT_REDUCE_EDGE - #for state in yaccout.states : - # string += dot_reduces(state) - string += "\n" - string += DOT_TAIL - return string - -def open_IOfiles(ifilename=None, ofilename=None, debug=False): - if ifilename : - if debug : print >> stderr, "open input file '%s'" % ifilename - ifile = file(ifilename, 'r') - else : - ifile = stdin - if ofilename : - if debug : print >> stderr, "open output file '%s'" % ofilename - ofile = file(ofilename, 'w') - else : - ofile = stdout - return (ifile, ofile) - -def close_IOfiles(ifilename=None, ifile=stdin, - ofilename=None, ofile=stdout, - debug=False): - if ifilename : - if debug : print >> stderr, "close input file '%s'" % ifilename - ifile.close() - if ofilename : - if debug : print >> stderr, "close output file '%s'" % ofilename - ofile.close() - -def _test(): - import doctest - doctest.testmod() - -if __name__ == "__main__": - from optparse import OptionParser - - parser = OptionParser(usage="usage: %prog [options]", version="%prog 0.1") - - parser.add_option('-f', '--input-file', dest="ifilename", - help="Read input from FILE (default stdin)", - type='string', metavar="FILE") - parser.add_option('-o', '--output-file', dest="ofilename", - help="Write output to FILE (default stdout)", - type='string', metavar="FILE") - parser.add_option('-t', '--test', dest="test", - help="Run the yacc2dot test suite", - action="store_true", default=False) - parser.add_option('-v', '--verbose', dest="verbose", - help="Print lots of debugging information", - action="store_true", default=False) - - (options, args) = parser.parse_args() - parser.destroy() - - ifile,ofile = open_IOfiles(options.ifilename, options.ofilename, - options.verbose) - - if options.test : - _test() - else : - text = ifile.read() - y = yaccout(text, options.verbose) - dot = yacc2dot(y) - print >> ofile, dot - - close_IOfiles(options.ifilename, ifile, - options.ofilename, ofile, options.verbose) - diff --git a/posts/yacc2dot/yacc2dot.py b/posts/yacc2dot/yacc2dot.py new file mode 100755 index 0000000..938c137 --- /dev/null +++ b/posts/yacc2dot/yacc2dot.py @@ -0,0 +1,675 @@ +#!/usr/bin/python3 +# +# Copyright (C) 2010-2012 W. Trevor King +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +"""Convert yacc's verbose output to a dot-language diagram +""" + +import functools as _functools +import logging as _logging +import sys as _sys + + +__version__ = '0.2' + +LOG = _logging.getLogger('yacc2dot') +LOG.setLevel(_logging.ERROR) +LOG.addHandler(_logging.StreamHandler()) + + +class Action (object): + """ + Parse the text defining a given rule: + >>> a = Action(' TEXT shift, and go to state 1') + >>> print(a.key) + TEXT + >>> print(a.action) + shift + >>> a = Action(' $default accept') + >>> print(a.action) + accept + >>> a = Action(' $default reduce using rule 1 (sum)') + >>> print(a.action) + reduce + >>> print(a.reduce_rule) + 1 + >>> a = Action(' PLUS shift, and go to state 3') + >>> print(a.goto) + 3 + >>> a = Action(' sum go to state 2') + >>> print(a.action) + go + >>> print(a.goto) + 2 + """ + def __init__(self, string): + self.rule = string + split = string.split() + self.key = split[0] + if split[1][-1] == ',': + self.action = split[1][0:-1] + elif split[1][0] == '[': + self.action = 'IGNORED' + else: + self.action = split[1] + + if self.action == 'reduce': + self.reduce_rule = int(split[-2]) + elif self.action == 'shift': + self.goto = int(split[-1]) + elif self.action == 'go': + self.goto = int(split[-1]) + + def __str__(self): + if self.action == 'go' or self.action == 'shift': + details = ', goto {}'.format(self.goto) + elif self.action == 'reduce': + details = ', rule {}'.format(self.reduce_rule) + elif self.action == 'accept': + details = '' + elif self.action == 'IGNORED': + details = '' + else: + raise Exception('weird action {}'.format(self.action)) + + return '<{} {}, {}{}>'.format( + self.__class__.__name__, self.key, self.action, details) + + def __repr__(self): + return self.__str__() + + +@_functools.total_ordering +class State (object): + """ + Parse the text defining a given state: + >>> state_text = '''state 0 + ... + ... 0 $accept: . authors $end + ... + ... TEXT shift, and go to state 1 + ... LBRACE shift, and go to state 2 + ... + ... authors go to state 3 + ... author go to state 4 + ... text go to state 5 + ... + ... ''' + >>> s = State(state_text) + >>> print(s.id) + 0 + >>> print(s.rules) + [(0, '$accept: . authors $end')] + >>> print(s.tok_act) + [, ] + >>> print(s.rule_act) + [, , ] + >>> print(s.goes_to()) + [1, 2, 3, 4, 5] + """ + def __init__(self, state_text): + # because the output of `yacc -v grammar.y' is so regular, + # we'll sneak by with a HACK parsing job ;). + self.id = None + # list of "rule string"s ###(, , [], pos) + self.rules = [] + self.tok_act = [] + self.rule_act = [] + lookfor = 'state' + # so consecutive blank lines don't keep switching lookfor state + hadtext = False + for line in state_text.splitlines(): + LOG.debug('{} : {}'.format(lookfor, line)) + split = line.split() + if len(split) < 1 : # blank line + if hadtext == False: + continue # ignore + elif lookfor == 'rules': + lookfor = 'tok-act' + continue + elif lookfor == 'tok-act': + lookfor = 'rule-act' + continue + elif split[0] == 'state' : # 'state ' + assert lookfor == 'state', "Multiple states in '%s'" % state_text + assert len(split) == 2, "%d items in state line '%s'" % line + self.id = int(split[1]) # store as self.id + lookfor = 'rules' + hadtext = False + continue + elif lookfor == 'rules' : # e.g ' 2 expr: expr . PLUS expr' + rule_id = int(split[0]) + rule_text = split[1:] + if rule_text[0] == '|' : # e.g. second line of + # expr: X + # | expr PLUS expr + rule_text.pop(0) + prev_rule = self.rules[len(self.rules)-1] + p_text_split = prev_rule[1].split() + rule_text.insert(0, p_text_split[0]) + self.rules.append((rule_id, " ".join(rule_text))) + elif lookfor == 'tok-act': + self.tok_act.append(Action(line)) + elif lookfor == 'rule-act': + self.rule_act.append(Action(line)) + else: + raise Exception( + "Unrecognized lookfor '{}' for line '{}'".format( + lookfor, line)) + hadtext = True + + def __eq__(self, other): + if other is None: + return False + return self.id == other.id + + def __lt__(self, other): + if other is None: + return False + return self.id < other.id + + def __hash__(self): + return self.id + + def __str__(self): + return '<{} {}>'.format(self.__class__.__name__, self.id) + + def __repr__(self): + return self.__str__() + + def goes_to(self): + """Return a list of integer states this node goes to + + This is not the same as the states the node reduces to. + """ + ret = [] + for a in self.tok_act: + if hasattr(a, 'goto'): + ret.append(a.goto) + for a in self.rule_act: + if hasattr(a, 'goto'): + ret.append(a.goto) + return ret + + +@_functools.total_ordering +class TreeNode (object): + """Tree structure, with backlinks. + + >>> root = TreeNode('root') + >>> root.add_child('child0') + >>> root.add_child('child1') + >>> root.child[0].add_child('child00') + >>> root.child[0].add_child('child01') + >>> print(root.child[0].child) + [, ] + >>> print(root.child[0].parent) + + >>> print(root.find('child01')) + + """ + def __init__(self, value=None, parent=None): + self.value = value + self.parent = parent + self.child = [] + + def __eq__(self, other): + if other is None: + return False + return self.value == other.value + + def __lt__(self, other): + if other is None: + return False + return self.value < other.value + + def __str__(self): + return '<{} {}>'.format(self.__class__.__name__, self.value) + + def __repr__(self): + return self.__str__() + + def add_child(self, child_value, built): + if child_value in built: + self.child.append(built[child_value]) + else: + self.child.append(TreeNode(child_value, self)) + + def build(self, fn_get_children, built=None): + """Recursively build the tree + + Using ``gn_get_children(value) = [child values]``. + """ + if built is None: + built = {} + handled = set() + children = fn_get_children(self.value) + for child in children: + self.add_child(child, built=built) + for cnode in self.child: + if cnode.value not in built: + built[cnode.value] = cnode + cnode.build(fn_get_children, built=built) + + def find(self, value): + "depth first search" + if self.value == value: + return self + else: + for cnode in self.child: + match = cnode.find(value) + if match != None: + return match + return None + + +class YaccParser (object): + """Parse the output of `yacc -v file.y' + + For example, for sum.y = + %token X + %token PLUS + %% + sum : X PLUS X + + We get: + >>> yacc_text = ''' + ... Grammar + ... + ... 0 $accept: sum $end + ... + ... 1 sum: X PLUS X + ... + ... + ... Terminals, with rules where they appear + ... + ... $end (0) 0 + ... error (256) + ... X (258) 1 + ... PLUS (259) 1 + ... + ... + ... Nonterminals, with rules where they appear + ... + ... $accept (5) + ... on left: 0 + ... sum (6) + ... on left: 1, on right: 0 + ... + ... + ... state 0 + ... + ... 0 $accept: . sum $end + ... + ... X shift, and go to state 1 + ... + ... sum go to state 2 + ... + ... + ... state 1 + ... + ... 1 sum: X . PLUS X + ... + ... PLUS shift, and go to state 3 + ... + ... + ... state 2 + ... + ... 0 $accept: sum . $end + ... + ... $end shift, and go to state 4 + ... + ... + ... state 3 + ... + ... 1 sum: X PLUS . X + ... + ... X shift, and go to state 5 + ... + ... + ... state 4 + ... + ... 0 $accept: sum $end . + ... + ... $default accept + ... + ... + ... state 5 + ... + ... 1 sum: X PLUS X . + ... + ... $default reduce using rule 1 (sum) + ... ''' + >>> y = YaccParser(yacc_text) + >>> print(y.states) + [, , , , , ] + >>> print(y.states[0].tok_act) + [] + >>> print(y.root.child[0].child) + [>] + >>> print(y.root.child[0].parent) + > + """ + def __init__(self, yacc_text): + # because the output of `yacc -v grammar.y' is so regular, + # we'll sneak by with a HACK parsing job ;). + self.read_states(yacc_text) + self.gen_tree() + self.reduce_link() + + def read_states(self, yacc_text): + "read in all the states" + self.states = [] + lookfor = 'first state' + for line in yacc_text.splitlines(): + split = line.split(' ') + if lookfor == 'first state': + if split[0] != 'state': + continue + else: + lookfor = 'states' + state_text = [line] + continue + else: + if split[0] == 'state' : # we've hit the next state + self.states.append(State('\n'.join(state_text))) + state_text = [line] + continue + else: + state_text.append(line) + # end of file, so process the last state + self.states.append(State('\n'.join(state_text))) + + def state_goes_to(self, state): + ret = [] + for id in state.goes_to(): + for s in self.states: + if s.id == id: + ret.append(s) + return ret + + def gen_tree(self): + "generate the state dependency tree" + self.root = TreeNode(self.states[0]) + self.root.build(self.state_goes_to) + + def reduce_action(self, state, action): + if action.action == 'reduce': + # find the rule text + rule = None + for r in state.rules: + if r[0] == action.reduce_rule: + rule = r[1] # e.g. 'sum: X PLUS X .' + LOG.debug('reduce from {} with rule {}: {}'.format( + state, action.reduce_rule, rule)) + # find the number of args in the rule + split = rule.split() + args = len(split) - 2 # 2 for 'sum:' and '.' + # find the state in the tnode tree + tnode = self.root.find(state) + for i in range(args) : # take arg steps up the tree + tnode = tnode.parent + tstate = tnode.value # reduction target state + action.reduce_to = tstate.id # state we reduce to + action.reduce_targ = split[0][0:-1] # rule we reduce to 'sum' + i = 0 + for a in tstate.tok_act: + if a.key == action.reduce_targ: + action.reduce_targ_i = i + i += 1 + for a in tstate.rule_act: + if a.key == action.reduce_targ: + action.reduce_targ_i = i + i += 1 + LOG.debug('to state {}'.format(action.reduce_to)) + + def reduce_link(self): + "generate the reduce_to backlinks" + for state in self.states: + for a in state.tok_act: + self.reduce_action(state, a) + for a in state.rule_act: + self.reduce_action(state, a) + + +class DotPrinter (object): + head = ( + 'digraph yacc {', + '', + ' epsilon = 0.01;', + ' nodesep = 0.2; /* inches */', + '', + ' node [shape = record];', + ) + + goto_edge = ( + ' edge [color = blue];', + ) + + reduce_edge = ( + ' edge [color = red];', + ) + + tail = ( + '}', + ) + + def state(self, state): + """Print a dot node for a given state. + + Node type must be 'record'. + layout: + state %d + rules + ... + a0 | a1 | ... + + >>> state_text = '''state 0 + ... + ... 0 $accept: . authors $end + ... + ... TEXT shift, and go to state 1 + ... LBRACE shift, and go to state 2 + ... + ... authors go to state 3 + ... author go to state 4 + ... text go to state 5 + ... + ... ''' + >>> s = State(state_text) + >>> p = DotPrinter() + >>> print(p.state(s)) + state0 [label = "{ state 0 | (0) $accept: . authors $end\\n | { TEXT | LBRACE | authors | author | text } }"]; + """ + label = ['{{ state {} | '.format(state.id)] + for id,target in state.rules: + label.append('({}) {}\\n'.format(id, target)) + label.append(' | {') + a = 0 + for action in state.tok_act[:-1]: + label.append(' {} |'.format(a, action.key)) + a += 1 + if len(state.tok_act) > 0: + label.append(' {}'.format(a, state.tok_act[-1].key)) + a += 1 + for action in state.rule_act: + label.append(' | {}'.format(a, action.key)) + a += 1 + label.append(' } }') + return ' state{} [label = "{}"];'.format(state.id, ''.join(label)) + + def goto(self, id, a, action): + """Print dot links for a given action + + Action a for state id + + >>> a = Action(' TEXT shift, and go to state 1') + >>> p = DotPrinter() + >>> print(p.goto(5,8,a)) + state5:a8 -> state1:s; + >>> a = Action(' $default reduce using rule 1 (sum)') + >>> print(p.goto(0,1,a)) + None + >>> a = Action(' sum go to state 2') + >>> print(p.goto(0,1,a)) + state0:a1 -> state2:s; + """ + if hasattr(action, 'goto'): + return ' state{}:a{} -> state{}:s;'.format(id, a, action.goto) + return None + + def gotos(self, state): + """Print dot links for a given state. + + >>> state_text = '''state 0 + ... + ... 0 $accept: . authors $end + ... + ... TEXT shift, and go to state 1 + ... LBRACE reduce using rule 1 (braces) + ... + ... authors go to state 3 + ... author go to state 4 + ... text go to state 5 + ... + ... ''' + >>> s = State(state_text) + >>> p = DotPrinter() + >>> print(p.gotos(s)) + state0:a0 -> state1:s; + state0:a2 -> state3:s; + state0:a3 -> state4:s; + state0:a4 -> state5:s; + """ + ret = [] + a = 0 + for action in state.tok_act: + ret.append(self.goto(state.id, a, action)) + a += 1 + for action in state.rule_act: + ret.append(self.goto(state.id, a, action)) + a += 1 + return '\n'.join(x for x in ret if x) + + def reduce(self, id, a, action): + """Print dot reduce links for a reduction action + + Action a for state id. + """ + if action.action == 'reduce': + return ' state{}:a{} -> state{}:a{};'.format( + id, a, action.reduce_to, action.reduce_targ_i) + return None + + def reduces(self, state): + """Print dot reduce links for a given state. + """ + ret = [] + a = 0 + for action in state.tok_act: + ret.append(self.reduce(state.id, a, action)) + a += 1 + for action in state.rule_act: + ret.append(self.reduce(state.id, a, action)) + a += 1 + return '\n'.join(x for x in ret if x) + + def __call__(self, yaccout): + lines = list(self.head) + lines.append('') + for state in yaccout.states: + lines.append(self.state(state)) + lines.append('') + lines.extend(self.goto_edge) + for state in yaccout.states: + x = self.gotos(state) + if x: + lines.append(x) + lines.append('') + lines.extend(self.reduce_edge) + for state in yaccout.states: + x = self.reduces(state) + if x: + lines.append(x) + lines.append('') + lines.extend(self.tail) + lines.append('') + return '\n'.join(lines) + + +def open_files(ifilename=None, ofilename=None): + if ifilename: + LOG.debug("open input file '{}'".format(ifilename)) + ifile = file(ifilename, 'r') + else: + ifile = _sys.stdin + if ofilename: + LOG.debug("open output file '{}'".format(ofilename)) + ofile = file(ofilename, 'w') + else: + ofile = _sys.stdout + return (ifile, ofile) + +def close_files(ifilename=None, ifile=_sys.stdin, + ofilename=None, ofile=_sys.stdout): + if ifilename: + LOG.debug("close input file '{}'".format(ifilename)) + ifile.close() + if ofilename: + LOG.debug("close output file '{}'".format(ofilename)) + ofile.close() + +def _test(): + import doctest + doctest.testmod() + + +if __name__ == "__main__": + from argparse import ArgumentParser + + parser = ArgumentParser( + description=__doc__, version=__version__) + + parser.add_argument( + '-f', '--input-file', dest='ifilename', metavar='FILE', + help='Read input from FILE (default stdin)') + parser.add_argument( + '-o', '--output-file', dest='ofilename', metavar='FILE', + help='Write output to FILE (default stdout)') + parser.add_argument( + '-t', '--test', dest='test', action='store_const', const=True, + help='Run the yacc2dot test suite') + parser.add_argument( + '-V', '--verbose', dest='verbose', action='count', default=0, + help='Increment verbosity') + + args = parser.parse_args() + + if args.verbose: + LOG.setLevel(_logging.DEBUG) + + ifile,ofile = open_files(args.ifilename, args.ofilename) + try: + if args.test: + _test() + else: + text = ifile.read() + y = YaccParser(text) + p = DotPrinter() + dot = p(y) + ofile.write(dot) + finally: + close_files(args.ifilename, ifile, args.ofilename, ofile)