fb9ba717d80f5bc40a76b84b4027fe64c73c684d
[cython.git] / Cython / Plex / Machines.py
1 #=======================================================================
2 #
3 #   Python Lexical Analyser
4 #
5 #   Classes for building NFAs and DFAs
6 #
7 #=======================================================================
8
9 import string
10 import sys
11 from sys import maxint
12 from types import TupleType
13
14 from Transitions import TransitionMap
15
16 LOWEST_PRIORITY = -sys.maxint
17
18 class Machine:
19   """A collection of Nodes representing an NFA or DFA."""
20   states = None         # [Node]
21   next_state_number = 1
22   initial_states = None # {(name, bol): Node}
23
24   def __init__(self):
25     self.states = []
26     self.initial_states = {}
27
28   def __del__(self):
29     #print "Destroying", self ###
30     for state in self.states:
31       state.destroy()
32
33   def new_state(self):
34     """Add a new state to the machine and return it."""
35     s = Node()
36     n = self.next_state_number
37     self.next_state_number = n + 1
38     s.number = n
39     self.states.append(s)
40     return s
41
42   def new_initial_state(self, name):
43     state = self.new_state()
44     self.make_initial_state(name, state)
45     return state
46
47   def make_initial_state(self, name, state):
48     self.initial_states[name] = state
49
50   def get_initial_state(self, name):
51     return self.initial_states[name]
52   
53   def dump(self, file):
54     file.write("Plex.Machine:\n")
55     if self.initial_states is not None:
56       file.write("   Initial states:\n")
57       for (name, state) in self.initial_states.items():
58         file.write("      '%s': %d\n" % (name, state.number))
59     for s in self.states:
60       s.dump(file)
61
62 class Node:
63   """A state of an NFA or DFA."""
64   transitions = None       # TransitionMap
65   action = None            # Action
66   action_priority = None   # integer
67   number = 0               # for debug output
68   epsilon_closure = None   # used by nfa_to_dfa()
69
70   def __init__(self):
71     # Preinitialise the list of empty transitions, because
72     # the nfa-to-dfa algorithm needs it
73     #self.transitions = {'':[]}
74     self.transitions = TransitionMap()
75     self.action_priority = LOWEST_PRIORITY
76
77   def destroy(self):
78     #print "Destroying", self ###
79     self.transitions = None
80     self.action = None
81     self.epsilon_closure = None
82
83   def add_transition(self, event, new_state):
84     self.transitions.add(event, new_state)
85   
86   def link_to(self, state):
87     """Add an epsilon-move from this state to another state."""
88     self.add_transition('', state)
89
90   def set_action(self, action, priority):
91     """Make this an accepting state with the given action. If 
92     there is already an action, choose the action with highest
93     priority."""
94     if priority > self.action_priority:
95       self.action = action
96       self.action_priority = priority
97
98   def get_action(self):
99     return self.action
100
101   def get_action_priority(self):
102     return self.action_priority
103
104 #       def merge_actions(self, other_state):
105 #               """Merge actions of other state into this state according
106 #    to their priorities."""
107 #               action = other_state.get_action()
108 #               priority = other_state.get_action_priority()
109 #               self.set_action(action, priority)
110
111   def is_accepting(self):
112     return self.action is not None
113
114   def __str__(self):
115     return "State %d" % self.number
116
117   def dump(self, file):
118     import string
119     # Header
120     file.write("   State %d:\n" % self.number)
121     # Transitions
122 #               self.dump_transitions(file)
123     self.transitions.dump(file)
124     # Action
125     action = self.action
126     priority = self.action_priority
127     if action is not None:
128       file.write("      %s [priority %d]\n" % (action, priority))
129   
130
131 class FastMachine:
132   """
133   FastMachine is a deterministic machine represented in a way that
134   allows fast scanning.
135   """
136   initial_states = None # {state_name:state}
137   states = None         # [state]
138                         # where state = {event:state, 'else':state, 'action':Action}
139   next_number = 1       # for debugging
140   
141   new_state_template = {
142     '':None, 'bol':None, 'eol':None, 'eof':None, 'else':None
143   }
144   
145   def __init__(self, old_machine = None):
146     self.initial_states = initial_states = {}
147     self.states = []
148     if old_machine:
149       self.old_to_new = old_to_new = {}
150       for old_state in old_machine.states:
151         new_state = self.new_state()
152         old_to_new[old_state] = new_state
153       for name, old_state in old_machine.initial_states.items():
154         initial_states[name] = old_to_new[old_state]
155       for old_state in old_machine.states:
156         new_state = old_to_new[old_state]
157         for event, old_state_set in old_state.transitions.items():
158           if old_state_set:
159             new_state[event] = old_to_new[old_state_set.keys()[0]]
160           else:
161             new_state[event] = None
162         new_state['action'] = old_state.action
163   
164   def __del__(self):
165     for state in self.states:
166       state.clear()
167   
168   def new_state(self, action = None):
169     number = self.next_number
170     self.next_number = number + 1
171     result = self.new_state_template.copy()
172     result['number'] = number
173     result['action'] = action
174     self.states.append(result)
175     return result
176   
177   def make_initial_state(self, name, state):
178     self.initial_states[name] = state
179   
180   def add_transitions(self, state, event, new_state):
181     if type(event) == TupleType:
182       code0, code1 = event
183       if code0 == -maxint:
184         state['else'] = new_state
185       elif code1 <> maxint:
186         while code0 < code1:
187           state[chr(code0)] = new_state
188           code0 = code0 + 1
189     else:
190       state[event] = new_state
191   
192   def get_initial_state(self, name):
193     return self.initial_states[name]
194   
195   def dump(self, file):
196     file.write("Plex.FastMachine:\n")
197     file.write("   Initial states:\n")
198     for name, state in self.initial_states.items():
199       file.write("      %s: %s\n" % (repr(name), state['number']))
200     for state in self.states:
201       self.dump_state(state, file)
202
203   def dump_state(self, state, file):
204     import string
205     # Header
206     file.write("   State %d:\n" % state['number'])
207     # Transitions
208     self.dump_transitions(state, file)
209     # Action
210     action = state['action']
211     if action is not None:
212       file.write("      %s\n" % action)
213   
214   def dump_transitions(self, state, file):
215     chars_leading_to_state = {}
216     special_to_state = {}
217     for (c, s) in state.items():
218       if len(c) == 1:
219         chars = chars_leading_to_state.get(id(s), None)
220         if chars is None:
221           chars = []
222           chars_leading_to_state[id(s)] = chars
223         chars.append(c)
224       elif len(c) <= 4:
225         special_to_state[c] = s
226     ranges_to_state = {}
227     for state in self.states:
228       char_list = chars_leading_to_state.get(id(state), None)
229       if char_list:
230         ranges = self.chars_to_ranges(char_list)
231         ranges_to_state[ranges] = state
232     ranges_list = ranges_to_state.keys()
233     ranges_list.sort()
234     for ranges in ranges_list:
235       key = self.ranges_to_string(ranges)
236       state = ranges_to_state[ranges]
237       file.write("      %s --> State %d\n" % (key, state['number']))
238     for key in ('bol', 'eol', 'eof', 'else'):
239       state = special_to_state.get(key, None)
240       if state:
241         file.write("      %s --> State %d\n" % (key, state['number']))
242
243   def chars_to_ranges(self, char_list):
244     char_list.sort()
245     i = 0
246     n = len(char_list)
247     result = []
248     while i < n:
249       c1 = ord(char_list[i])
250       c2 = c1
251       i = i + 1
252       while i < n and ord(char_list[i]) == c2 + 1:
253         i = i + 1
254         c2 = c2 + 1
255       result.append((chr(c1), chr(c2)))
256     return tuple(result)
257   
258   def ranges_to_string(self, range_list):
259     return string.join(map(self.range_to_string, range_list), ",")
260   
261   def range_to_string(self, (c1, c2)):
262     if c1 == c2:
263       return repr(c1)
264     else:
265       return "%s..%s" % (repr(c1), repr(c2))
266 ##
267 ## (Superseded by Machines.FastMachine)
268 ##
269 ## class StateTableMachine:
270 ##   """
271 ##   StateTableMachine is an alternative representation of a Machine
272 ##   that can be run more efficiently.
273 ##   """
274 ##   initial_states = None # {state_name:state_index}
275 ##   states = None # [([state] indexed by char code, Action)] 
276   
277 ##   special_map = {'bol':256, 'eol':257, 'eof':258}
278   
279 ##   def __init__(self, m):
280 ##     """
281 ##     Initialise StateTableMachine from Machine |m|.
282 ##     """
283 ##     initial_states = self.initial_states = {}
284 ##     states = self.states = [None]
285 ##     old_to_new = {}
286 ##     i = 1
287 ##     for old_state in m.states:
288 ##       new_state = ([0] * 259, old_state.get_action())
289 ##       states.append(new_state)
290 ##       old_to_new[old_state] = i # new_state
291 ##       i = i + 1
292 ##     for name, old_state in m.initial_states.items():
293 ##       initial_states[name] = old_to_new[old_state]
294 ##     for old_state in m.states:
295 ##       new_state_index = old_to_new[old_state]
296 ##       new_table = states[new_state_index][0]
297 ##       transitions = old_state.transitions
298 ##       for c, old_targets in transitions.items():
299 ##         if old_targets:
300 ##           old_target = old_targets[0]
301 ##           new_target_index = old_to_new[old_target]
302 ##           if len(c) == 1:
303 ##             a = ord(c)
304 ##           else:
305 ##             a = self.special_map[c]
306 ##           new_table[a] = states[new_target_index]
307
308 ##   def dump(self, f):
309 ##     f.write("Plex.StateTableMachine:\n")
310 ##     f.write("    Initial states:\n")
311 ##     for name, index in self.initial_states.items():
312 ##       f.write("        %s: State %d\n" % (
313 ##         repr(name), id(self.states[index])))
314 ##     for i in xrange(1, len(self.states)):
315 ##       table, action = self.states[i]
316 ##       f.write("    State %d:" % i)
317 ##       if action:
318 ##         f.write("%s" % action)
319 ##       f.write("\n")
320 ##       f.write("        %s\n" % map(id,table))
321       
322       
323       
324       
325
326