fix scanner level error reporting in Plex/Errors.py
[cython.git] / Cython / Plex / Scanners.py
index acf4b83bc13973d698c0176cf91f8af3a630060e..315742f309de1ad94f6cebbf6e485cd004d62f55 100644 (file)
@@ -7,10 +7,15 @@
 #
 #=======================================================================
 
+import cython
+cython.declare(BOL=object, EOL=object, EOF=object, NOT_FOUND=object)
+
 import Errors
 from Regexps import BOL, EOL, EOF
 
-class Scanner:
+NOT_FOUND = object()
+
+class Scanner(object):
   """
   A Scanner is used to read tokens from a stream of characters
   using the token set specified by a Plex.Lexicon.
@@ -32,35 +37,35 @@ class Scanner:
     position() --> (name, line, col)
       Returns the position of the last token read using the
       read() method.
-    
+
     begin(state_name)
       Causes scanner to change state.
-    
+
     produce(value [, text])
       Causes return of a token value to the caller of the
       Scanner.
 
   """
 
-  lexicon = None        # Lexicon
-  stream = None         # file-like object
-  name = ''
-  buffer = ''
-  buf_start_pos = 0     # position in input of start of buffer
-  next_pos = 0          # position in input of next char to read
-  cur_pos = 0           # position in input of current char
-  cur_line = 1          # line number of current char
-  cur_line_start = 0    # position in input of start of current line
-  start_pos = 0         # position in input of start of token
-  start_line = 0        # line number of start of token
-  start_col = 0         # position in line of start of token
-  text = None           # text of last token read
-  initial_state = None  # Node
-  state_name = ''       # Name of initial state
-  queue = None          # list of tokens to be returned
-  trace = 0
+#  lexicon = None        # Lexicon
+#  stream = None         # file-like object
+#  name = ''
+#  buffer = ''
+#  buf_start_pos = 0     # position in input of start of buffer
+#  next_pos = 0          # position in input of next char to read
+#  cur_pos = 0           # position in input of current char
+#  cur_line = 1          # line number of current char
+#  cur_line_start = 0    # position in input of start of current line
+#  start_pos = 0         # position in input of start of token
+#  start_line = 0        # line number of start of token
+#  start_col = 0         # position in line of start of token
+#  text = None           # text of last token read
+#  initial_state = None  # Node
+#  state_name = ''       # Name of initial state
+#  queue = None          # list of tokens to be returned
+#  trace = 0
 
-  def __init__(self, lexicon, stream, name = ''):
+  def __init__(self, lexicon, stream, name = '', initial_pos = None):
     """
     Scanner(lexicon, stream, name = '')
 
@@ -73,6 +78,19 @@ class Scanner:
       |name| is optional, and may be the name of the file being
       scanned or any other identifying string.
     """
+    self.trace = 0
+
+    self.buffer = u''
+    self.buf_start_pos = 0
+    self.next_pos = 0
+    self.cur_pos = 0
+    self.cur_line = 1
+    self.start_pos = 0
+    self.start_line = 0
+    self.start_col = 0
+    self.text = None
+    self.state_name = None
+
     self.lexicon = lexicon
     self.stream = stream
     self.name = name
@@ -84,6 +102,8 @@ class Scanner:
     self.cur_line_start = 0
     self.cur_char = BOL
     self.input_state = 1
+    if initial_pos is not None:
+        self.cur_line, self.cur_line_start = initial_pos[1], -initial_pos[2]
 
   def read(self):
     """
@@ -115,36 +135,22 @@ class Scanner:
     self.start_pos = self.cur_pos
     self.start_line = self.cur_line
     self.start_col = self.cur_pos - self.cur_line_start
-#              if self.trace:
-#                      action = self.run_machine()
-#              else:
-#                      action = self.run_machine_inlined()
     action = self.run_machine_inlined()
-    if action:
+    if action is not None:
       if self.trace:
         print("Scanner: read: Performing %s %d:%d" % (
           action, self.start_pos, self.cur_pos))
-      base = self.buf_start_pos
-      text = self.buffer[self.start_pos - base : self.cur_pos - base]
+      text = self.buffer[self.start_pos - self.buf_start_pos :
+                         self.cur_pos   - self.buf_start_pos]
       return (text, action)
     else:
       if self.cur_pos == self.start_pos:
-        if self.cur_char == EOL:
+        if self.cur_char is EOL:
           self.next_char()
-        if not self.cur_char or self.cur_char == EOF:
-          return ('', None)
+        if self.cur_char is None or self.cur_char is EOF:
+          return (u'', None)
       raise Errors.UnrecognizedInput(self, self.state_name)
-  
-  def run_machine(self):
-    """
-    Run the machine until no more transitions are possible.
-    """
-    self.state = self.initial_state
-    self.backup_state = None
-    while self.transition():
-      pass
-    return self.back_up()
-  
+
   def run_machine_inlined(self):
     """
     Inlined version of run_machine for speed.
@@ -159,7 +165,8 @@ class Scanner:
     buffer = self.buffer
     buf_start_pos = self.buf_start_pos
     buf_len = len(buffer)
-    backup_state = None
+    b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+              None, 0, 0, 0, u'', 0, 0
     trace = self.trace
     while 1:
       if trace: #TRACE#
@@ -168,14 +175,14 @@ class Scanner:
       # Begin inlined self.save_for_backup()
       #action = state.action #@slow
       action = state['action'] #@fast
-      if action:
-        backup_state = (
-          action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
+      if action is not None:
+        b_action, b_cur_pos, b_cur_line, b_cur_line_start, b_cur_char, b_input_state, b_next_pos = \
+                  action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos
       # End inlined self.save_for_backup()
       c = cur_char
       #new_state = state.new_state(c) #@slow
-      new_state = state.get(c, -1) #@fast
-      if new_state == -1: #@fast
+      new_state = state.get(c, NOT_FOUND) #@fast
+      if new_state is NOT_FOUND: #@fast
         new_state = c and state.get('else') #@fast
       if new_state:
         if trace: #TRACE#
@@ -202,9 +209,9 @@ class Scanner:
               c = buffer[buf_index]
               next_pos = next_pos + 1
             else:
-              c = ''
+              c = u''
           # End inlined: c = self.read_char()
-          if c == '\n':
+          if c == u'\n':
             cur_char = EOL
             input_state = 2
           elif not c:
@@ -213,7 +220,7 @@ class Scanner:
           else:
             cur_char = c
         elif input_state == 2:
-          cur_char = '\n'
+          cur_char = u'\n'
           input_state = 3
         elif input_state == 3:
           cur_line = cur_line + 1
@@ -224,15 +231,17 @@ class Scanner:
           cur_char = EOF
           input_state = 5
         else: # input_state = 5
-          cur_char = ''
+          cur_char = u''
         # End inlined self.next_char()
       else: # not new_state
         if trace: #TRACE#
           print("blocked")  #TRACE#
         # Begin inlined: action = self.back_up()
-        if backup_state:
-          (action, cur_pos, cur_line, cur_line_start, 
-            cur_char, input_state, next_pos) = backup_state
+        if b_action is not None:
+          (action, cur_pos, cur_line, cur_line_start,
+           cur_char, input_state, next_pos) = \
+                   (b_action, b_cur_pos, b_cur_line, b_cur_line_start,
+                    b_cur_char, b_input_state, b_next_pos)
         else:
           action = None
         break # while 1
@@ -242,49 +251,12 @@ class Scanner:
     self.cur_line_start = cur_line_start
     self.cur_char = cur_char
     self.input_state = input_state
-    self.next_pos       = next_pos
+    self.next_pos     = next_pos
     if trace: #TRACE#
-      if action: #TRACE#
-        print("Doing " + action) #TRACE#
+      if action is not None: #TRACE#
+        print("Doing %s" % action) #TRACE#
     return action
-    
-#      def transition(self):
-#              self.save_for_backup()
-#              c = self.cur_char
-#              new_state = self.state.new_state(c)
-#              if new_state:
-#                      if self.trace:
-#                              print "Scanner: read: State %d: %s --> State %d" % (
-#                                      self.state.number, repr(c), new_state.number)
-#                      self.state = new_state
-#                      self.next_char()
-#                      return 1
-#              else:
-#                      if self.trace:
-#                              print "Scanner: read: State %d: %s --> blocked" % (
-#                                      self.state.number, repr(c))
-#                      return 0
-  
-#      def save_for_backup(self):
-#              action = self.state.get_action()
-#              if action:
-#                      if self.trace:
-#                              print "Scanner: read: Saving backup point at", self.cur_pos
-#                      self.backup_state = (
-#                              action, self.cur_pos, self.cur_line, self.cur_line_start, 
-#                              self.cur_char, self.input_state, self.next_pos)
-  
-#      def back_up(self):
-#              backup_state = self.backup_state
-#              if backup_state:
-#                      (action, self.cur_pos, self.cur_line, self.cur_line_start, 
-#                              self.cur_char, self.input_state, self.next_pos) = backup_state
-#                      if self.trace:
-#                              print "Scanner: read: Backing up to", self.cur_pos
-#                      return action
-#              else:
-#                      return None
-  
+
   def next_char(self):
     input_state = self.input_state
     if self.trace:
@@ -292,7 +264,7 @@ class Scanner:
     if input_state == 1:
       self.cur_pos = self.next_pos
       c = self.read_char()
-      if c == '\n':
+      if c == u'\n':
         self.cur_char = EOL
         self.input_state = 2
       elif not c:
@@ -301,7 +273,7 @@ class Scanner:
       else:
         self.cur_char = c
     elif input_state == 2:
-      self.cur_char = '\n'
+      self.cur_char = u'\n'
       self.input_state = 3
     elif input_state == 3:
       self.cur_line = self.cur_line + 1
@@ -312,29 +284,10 @@ class Scanner:
       self.cur_char = EOF
       self.input_state = 5
     else: # input_state = 5
-      self.cur_char = ''
+      self.cur_char = u''
     if self.trace:
       print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
-    
-#      def read_char(self):
-#              """
-#    Get the next input character, filling the buffer if necessary.
-#    Returns '' at end of file.
-#    """
-#              next_pos = self.next_pos
-#              buf_index = next_pos - self.buf_start_pos
-#              if buf_index == len(self.buffer):
-#                      discard = self.start_pos - self.buf_start_pos
-#                      data = self.stream.read(0x1000)
-#                      self.buffer = self.buffer[discard:] + data
-#                      self.buf_start_pos = self.buf_start_pos + discard
-#                      buf_index = buf_index - discard
-#                      if not data:
-#                              return ''
-#              c = self.buffer[buf_index]
-#              self.next_pos = next_pos + 1
-#              return c
-  
+
   def position(self):
     """
     Return a tuple (name, line, col) representing the location of
@@ -346,6 +299,11 @@ class Scanner:
     """
     return (self.name, self.start_line, self.start_col)
 
+  def get_position(self):
+    """Python accessible wrapper around position(), only for error reporting.
+    """
+    return self.position()
+
   def begin(self, state_name):
     """Set the current state of the scanner to the named state."""
     self.initial_state = (
@@ -372,6 +330,3 @@ class Scanner:
     Override this method if you want something to be done at
     end of file.
     """
-
-# For backward compatibility:
-setattr(Scanner, "yield", Scanner.produce)