#
#=======================================================================
+import cython
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
+
import Errors
from Regexps import BOL, EOL, EOF
-class Scanner:
+NOT_FOUND = object()
+
+class Scanner(object):
"""
A Scanner is used to read tokens from a stream of characters
using the token set specified by a Plex.Lexicon.
position() --> (name, line, col)
Returns the position of the last token read using the
read() method.
-
+
begin(state_name)
Causes scanner to change state.
-
+
produce(value [, text])
Causes return of a token value to the caller of the
Scanner.
"""
- lexicon = None # Lexicon
- stream = None # file-like object
- name = ''
- buffer = ''
- buf_start_pos = 0 # position in input of start of buffer
- next_pos = 0 # position in input of next char to read
- cur_pos = 0 # position in input of current char
- cur_line = 1 # line number of current char
- cur_line_start = 0 # position in input of start of current line
- start_pos = 0 # position in input of start of token
- start_line = 0 # line number of start of token
- start_col = 0 # position in line of start of token
- text = None # text of last token read
- initial_state = None # Node
- state_name = '' # Name of initial state
- queue = None # list of tokens to be returned
- trace = 0
+# lexicon = None # Lexicon
+# stream = None # file-like object
+# name = ''
+# buffer = ''
+# buf_start_pos = 0 # position in input of start of buffer
+# next_pos = 0 # position in input of next char to read
+# cur_pos = 0 # position in input of current char
+# cur_line = 1 # line number of current char
+# cur_line_start = 0 # position in input of start of current line
+# start_pos = 0 # position in input of start of token
+# start_line = 0 # line number of start of token
+# start_col = 0 # position in line of start of token
+# text = None # text of last token read
+# initial_state = None # Node
+# state_name = '' # Name of initial state
+# queue = None # list of tokens to be returned
+# trace = 0
- def __init__(self, lexicon, stream, name = ''):
+ def __init__(self, lexicon, stream, name = '', initial_pos = None):
"""
Scanner(lexicon, stream, name = '')
|name| is optional, and may be the name of the file being
scanned or any other identifying string.
"""
+ self.trace = 0
+
+ self.buffer = u''
+ self.buf_start_pos = 0
+ self.next_pos = 0
+ self.cur_pos = 0
+ self.cur_line = 1
+ self.start_pos = 0
+ self.start_line = 0
+ self.start_col = 0
+ self.text = None
+ self.state_name = None
+
self.lexicon = lexicon
self.stream = stream
self.name = name
self.cur_line_start = 0
self.cur_char = BOL
self.input_state = 1
+ if initial_pos is not None:
+ self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
def read(self):
"""
self.start_pos = self.cur_pos
self.start_line = self.cur_line
self.start_col = self.cur_pos - self.cur_line_start
-# if self.trace:
-# action = self.run_machine()
-# else:
-# action = self.run_machine_inlined()
action = self.run_machine_inlined()
- if action:
+ if action is not None:
if self.trace:
print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos))
- base = self.buf_start_pos
- text = self.buffer[self.start_pos - base : self.cur_pos - base]
+ text = self.buffer[self.start_pos - self.buf_start_pos :
+ self.cur_pos - self.buf_start_pos]
return (text, action)
else:
if self.cur_pos == self.start_pos:
- if self.cur_char == EOL:
+ if self.cur_char is EOL:
self.next_char()
- if not self.cur_char or self.cur_char == EOF:
- return ('', None)
+ if self.cur_char is None or self.cur_char is EOF:
+ return (u'', None)
raise Errors.UnrecognizedInput(self, self.state_name)
-
- def run_machine(self):
- """
- Run the machine until no more transitions are possible.
- """
- self.state = self.initial_state
- self.backup_state = None
- while self.transition():
- pass
- return self.back_up()
-
+
def run_machine_inlined(self):
"""
Inlined version of run_machine for speed.
buffer = self.buffer
buf_start_pos = self.buf_start_pos
buf_len = len(buffer)
- backup_state = None
+ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+ None, 0, 0, 0, u'', 0, 0
trace = self.trace
while 1:
if trace: #TRACE#
# Begin inlined self.save_for_backup()
#action = state.action #@slow
action = state['action'] #@fast
- if action:
- backup_state = (
- action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
+ if action is not None:
+ b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+ action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
# End inlined self.save_for_backup()
c = cur_char
#new_state = state.new_state(c) #@slow
- new_state = state.get(c, -1) #@fast
- if new_state == -1: #@fast
+ new_state = state.get(c, NOT_FOUND) #@fast
+ if new_state is NOT_FOUND: #@fast
new_state = c and state.get('else') #@fast
if new_state:
if trace: #TRACE#
c = buffer[buf_index]
next_pos = next_pos + 1
else:
- c = ''
+ c = u''
# End inlined: c = self.read_char()
- if c == '\n':
+ if c == u'\n':
cur_char = EOL
input_state = 2
elif not c:
else:
cur_char = c
elif input_state == 2:
- cur_char = '\n'
+ cur_char = u'\n'
input_state = 3
elif input_state == 3:
cur_line = cur_line + 1
cur_char = EOF
input_state = 5
else: # input_state = 5
- cur_char = ''
+ cur_char = u''
# End inlined self.next_char()
else: # not new_state
if trace: #TRACE#
print("blocked") #TRACE#
# Begin inlined: action = self.back_up()
- if backup_state:
- (action, cur_pos, cur_line, cur_line_start,
- cur_char, input_state, next_pos) = backup_state
+ if b_action is not None:
+ (action, cur_pos, cur_line, cur_line_start,
+ cur_char, input_state, next_pos) = \
+ (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
+ b_cur_char, b_input_state, b_next_pos)
else:
action = None
break # while 1
self.cur_line_start = cur_line_start
self.cur_char = cur_char
self.input_state = input_state
- self.next_pos = next_pos
+ self.next_pos = next_pos
if trace: #TRACE#
- if action: #TRACE#
- print("Doing " + action) #TRACE#
+ if action is not None: #TRACE#
+ print("Doing %s" % action) #TRACE#
return action
-
-# def transition(self):
-# self.save_for_backup()
-# c = self.cur_char
-# new_state = self.state.new_state(c)
-# if new_state:
-# if self.trace:
-# print "Scanner: read: State %d: %s --> State %d" % (
-# self.state.number, repr(c), new_state.number)
-# self.state = new_state
-# self.next_char()
-# return 1
-# else:
-# if self.trace:
-# print "Scanner: read: State %d: %s --> blocked" % (
-# self.state.number, repr(c))
-# return 0
-
-# def save_for_backup(self):
-# action = self.state.get_action()
-# if action:
-# if self.trace:
-# print "Scanner: read: Saving backup point at", self.cur_pos
-# self.backup_state = (
-# action, self.cur_pos, self.cur_line, self.cur_line_start,
-# self.cur_char, self.input_state, self.next_pos)
-
-# def back_up(self):
-# backup_state = self.backup_state
-# if backup_state:
-# (action, self.cur_pos, self.cur_line, self.cur_line_start,
-# self.cur_char, self.input_state, self.next_pos) = backup_state
-# if self.trace:
-# print "Scanner: read: Backing up to", self.cur_pos
-# return action
-# else:
-# return None
-
+
def next_char(self):
input_state = self.input_state
if self.trace:
if input_state == 1:
self.cur_pos = self.next_pos
c = self.read_char()
- if c == '\n':
+ if c == u'\n':
self.cur_char = EOL
self.input_state = 2
elif not c:
else:
self.cur_char = c
elif input_state == 2:
- self.cur_char = '\n'
+ self.cur_char = u'\n'
self.input_state = 3
elif input_state == 3:
self.cur_line = self.cur_line + 1
self.cur_char = EOF
self.input_state = 5
else: # input_state = 5
- self.cur_char = ''
+ self.cur_char = u''
if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
-
-# def read_char(self):
-# """
-# Get the next input character, filling the buffer if necessary.
-# Returns '' at end of file.
-# """
-# next_pos = self.next_pos
-# buf_index = next_pos - self.buf_start_pos
-# if buf_index == len(self.buffer):
-# discard = self.start_pos - self.buf_start_pos
-# data = self.stream.read(0x1000)
-# self.buffer = self.buffer[discard:] + data
-# self.buf_start_pos = self.buf_start_pos + discard
-# buf_index = buf_index - discard
-# if not data:
-# return ''
-# c = self.buffer[buf_index]
-# self.next_pos = next_pos + 1
-# return c
-
+
def position(self):
"""
Return a tuple (name, line, col) representing the location of
"""
return (self.name, self.start_line, self.start_col)
+ def get_position(self):
+ """Python accessible wrapper around position(), only for error reporting.
+ """
+ return self.position()
+
def begin(self, state_name):
"""Set the current state of the scanner to the named state."""
self.initial_state = (
Override this method if you want something to be done at
end of file.
"""
-
-# For backward compatibility:
-setattr(Scanner, "yield", Scanner.produce)