Cython/Plex/Machines.py

   1 #=======================================================================
   2 #
   3 #   Python Lexical Analyser
   4 #
   5 #   Classes for building NFAs and DFAs
   6 #
   7 #=======================================================================
   8
   9 import string
  10 import sys
  11 from sys import maxint
  12 from types import TupleType
  13
  14 from Transitions import TransitionMap
  15
  16 LOWEST_PRIORITY = -sys.maxint
  17
  18 class Machine:
  19   """A collection of Nodes representing an NFA or DFA."""
  20   states = None         # [Node]
  21   next_state_number = 1
  22   initial_states = None # {(name, bol): Node}
  23
  24   def __init__(self):
  25     self.states = []
  26     self.initial_states = {}
  27
  28   def __del__(self):
  29     #print "Destroying", self ###
  30     for state in self.states:
  31       state.destroy()
  32
  33   def new_state(self):
  34     """Add a new state to the machine and return it."""
  35     s = Node()
  36     n = self.next_state_number
  37     self.next_state_number = n + 1
  38     s.number = n
  39     self.states.append(s)
  40     return s
  41
  42   def new_initial_state(self, name):
  43     state = self.new_state()
  44     self.make_initial_state(name, state)
  45     return state
  46
  47   def make_initial_state(self, name, state):
  48     self.initial_states[name] = state
  49
  50   def get_initial_state(self, name):
  51     return self.initial_states[name]
  52
  53   def dump(self, file):
  54     file.write("Plex.Machine:\n")
  55     if self.initial_states is not None:
  56       file.write("   Initial states:\n")
  57       for (name, state) in self.initial_states.items():
  58         file.write("      '%s': %d\n" % (name, state.number))
  59     for s in self.states:
  60       s.dump(file)
  61
  62 class Node:
  63   """A state of an NFA or DFA."""
  64   transitions = None       # TransitionMap
  65   action = None            # Action
  66   action_priority = None   # integer
  67   number = 0               # for debug output
  68   epsilon_closure = None   # used by nfa_to_dfa()
  69
  70   def __init__(self):
  71     # Preinitialise the list of empty transitions, because
  72     # the nfa-to-dfa algorithm needs it
  73     #self.transitions = {'':[]}
  74     self.transitions = TransitionMap()
  75     self.action_priority = LOWEST_PRIORITY
  76
  77   def destroy(self):
  78     #print "Destroying", self ###
  79     self.transitions = None
  80     self.action = None
  81     self.epsilon_closure = None
  82
  83   def add_transition(self, event, new_state):
  84     self.transitions.add(event, new_state)
  85
  86   def link_to(self, state):
  87     """Add an epsilon-move from this state to another state."""
  88     self.add_transition('', state)
  89
  90   def set_action(self, action, priority):
  91     """Make this an accepting state with the given action. If
  92     there is already an action, choose the action with highest
  93     priority."""
  94     if priority > self.action_priority:
  95       self.action = action
  96       self.action_priority = priority
  97
  98   def get_action(self):
  99     return self.action
 100
 101   def get_action_priority(self):
 102     return self.action_priority
 103
 104 #       def merge_actions(self, other_state):
 105 #               """Merge actions of other state into this state according
 106 #    to their priorities."""
 107 #               action = other_state.get_action()
 108 #               priority = other_state.get_action_priority()
 109 #               self.set_action(action, priority)
 110
 111   def is_accepting(self):
 112     return self.action is not None
 113
 114   def __str__(self):
 115     return "State %d" % self.number
 116
 117   def dump(self, file):
 118     import string
 119     # Header
 120     file.write("   State %d:\n" % self.number)
 121     # Transitions
 122 #               self.dump_transitions(file)
 123     self.transitions.dump(file)
 124     # Action
 125     action = self.action
 126     priority = self.action_priority
 127     if action is not None:
 128       file.write("      %s [priority %d]\n" % (action, priority))
 129
 130
 131 class FastMachine:
 132   """
 133   FastMachine is a deterministic machine represented in a way that
 134   allows fast scanning.
 135   """
 136   initial_states = None # {state_name:state}
 137   states = None         # [state]
 138                         # where state = {event:state, 'else':state, 'action':Action}
 139   next_number = 1       # for debugging
 140
 141   new_state_template = {
 142     '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
 143   }
 144
 145   def __init__(self, old_machine = None):
 146     self.initial_states = initial_states = {}
 147     self.states = []
 148     if old_machine:
 149       self.old_to_new = old_to_new = {}
 150       for old_state in old_machine.states:
 151         new_state = self.new_state()
 152         old_to_new[old_state] = new_state
 153       for name, old_state in old_machine.initial_states.items():
 154         initial_states[name] = old_to_new[old_state]
 155       for old_state in old_machine.states:
 156         new_state = old_to_new[old_state]
 157         for event, old_state_set in old_state.transitions.items():
 158           if old_state_set:
 159             new_state[event] = old_to_new[old_state_set.keys()[0]]
 160           else:
 161             new_state[event] = None
 162         new_state['action'] = old_state.action
 163
 164   def __del__(self):
 165     for state in self.states:
 166       state.clear()
 167
 168   def new_state(self, action = None):
 169     number = self.next_number
 170     self.next_number = number + 1
 171     result = self.new_state_template.copy()
 172     result['number'] = number
 173     result['action'] = action
 174     self.states.append(result)
 175     return result
 176
 177   def make_initial_state(self, name, state):
 178     self.initial_states[name] = state
 179
 180   def add_transitions(self, state, event, new_state):
 181     if type(event) == TupleType:
 182       code0, code1 = event
 183       if code0 == -maxint:
 184         state['else'] = new_state
 185       elif code1 <> maxint:
 186         while code0 < code1:
 187           state[chr(code0)] = new_state
 188           code0 = code0 + 1
 189     else:
 190       state[event] = new_state
 191
 192   def get_initial_state(self, name):
 193     return self.initial_states[name]
 194
 195   def dump(self, file):
 196     file.write("Plex.FastMachine:\n")
 197     file.write("   Initial states:\n")
 198     for name, state in self.initial_states.items():
 199       file.write("      %s: %s\n" % (repr(name), state['number']))
 200     for state in self.states:
 201       self.dump_state(state, file)
 202
 203   def dump_state(self, state, file):
 204     import string
 205     # Header
 206     file.write("   State %d:\n" % state['number'])
 207     # Transitions
 208     self.dump_transitions(state, file)
 209     # Action
 210     action = state['action']
 211     if action is not None:
 212       file.write("      %s\n" % action)
 213
 214   def dump_transitions(self, state, file):
 215     chars_leading_to_state = {}
 216     special_to_state = {}
 217     for (c, s) in state.items():
 218       if len(c) == 1:
 219         chars = chars_leading_to_state.get(id(s), None)
 220         if chars is None:
 221           chars = []
 222           chars_leading_to_state[id(s)] = chars
 223         chars.append(c)
 224       elif len(c) <= 4:
 225         special_to_state[c] = s
 226     ranges_to_state = {}
 227     for state in self.states:
 228       char_list = chars_leading_to_state.get(id(state), None)
 229       if char_list:
 230         ranges = self.chars_to_ranges(char_list)
 231         ranges_to_state[ranges] = state
 232     ranges_list = ranges_to_state.keys()
 233     ranges_list.sort()
 234     for ranges in ranges_list:
 235       key = self.ranges_to_string(ranges)
 236       state = ranges_to_state[ranges]
 237       file.write("      %s --> State %d\n" % (key, state['number']))
 238     for key in ('bol', 'eol', 'eof', 'else'):
 239       state = special_to_state.get(key, None)
 240       if state:
 241         file.write("      %s --> State %d\n" % (key, state['number']))
 242
 243   def chars_to_ranges(self, char_list):
 244     char_list.sort()
 245     i = 0
 246     n = len(char_list)
 247     result = []
 248     while i < n:
 249       c1 = ord(char_list[i])
 250       c2 = c1
 251       i = i + 1
 252       while i < n and ord(char_list[i]) == c2 + 1:
 253         i = i + 1
 254         c2 = c2 + 1
 255       result.append((chr(c1), chr(c2)))
 256     return tuple(result)
 257
 258   def ranges_to_string(self, range_list):
 259     return string.join(map(self.range_to_string, range_list), ",")
 260
 261   def range_to_string(self, (c1, c2)):
 262     if c1 == c2:
 263       return repr(c1)
 264     else:
 265       return "%s..%s" % (repr(c1), repr(c2))
 266 ##
 267 ## (Superseded by Machines.FastMachine)
 268 ##
 269 ## class StateTableMachine:
 270 ##   """
 271 ##   StateTableMachine is an alternative representation of a Machine
 272 ##   that can be run more efficiently.
 273 ##   """
 274 ##   initial_states = None # {state_name:state_index}
 275 ##   states = None # [([state] indexed by char code, Action)]
 276
 277 ##   special_map = {'bol':256, 'eol':257, 'eof':258}
 278
 279 ##   def __init__(self, m):
 280 ##     """
 281 ##     Initialise StateTableMachine from Machine |m|.
 282 ##     """
 283 ##     initial_states = self.initial_states = {}
 284 ##     states = self.states = [None]
 285 ##     old_to_new = {}
 286 ##     i = 1
 287 ##     for old_state in m.states:
 288 ##       new_state = ([0] * 259, old_state.get_action())
 289 ##       states.append(new_state)
 290 ##       old_to_new[old_state] = i # new_state
 291 ##       i = i + 1
 292 ##     for name, old_state in m.initial_states.items():
 293 ##       initial_states[name] = old_to_new[old_state]
 294 ##     for old_state in m.states:
 295 ##       new_state_index = old_to_new[old_state]
 296 ##       new_table = states[new_state_index][0]
 297 ##       transitions = old_state.transitions
 298 ##       for c, old_targets in transitions.items():
 299 ##         if old_targets:
 300 ##           old_target = old_targets[0]
 301 ##           new_target_index = old_to_new[old_target]
 302 ##           if len(c) == 1:
 303 ##             a = ord(c)
 304 ##           else:
 305 ##             a = self.special_map[c]
 306 ##           new_table[a] = states[new_target_index]
 307
 308 ##   def dump(self, f):
 309 ##     f.write("Plex.StateTableMachine:\n")
 310 ##     f.write("    Initial states:\n")
 311 ##     for name, index in self.initial_states.items():
 312 ##       f.write("        %s: State %d\n" % (
 313 ##         repr(name), id(self.states[index])))
 314 ##     for i in xrange(1, len(self.states)):
 315 ##       table, action = self.states[i]
 316 ##       f.write("    State %d:" % i)
 317 ##       if action:
 318 ##         f.write("%s" % action)
 319 ##       f.write("\n")
 320 ##       f.write("        %s\n" % map(id,table))
 321
 322
 323
 324
 325
 326