From f6fdecace68d0d8eb3b0b5c424ce4e3b890a6472 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Thu, 12 Mar 2009 20:24:28 +0100 Subject: [PATCH] some more cythonisation in Plex scanner classes (15% faster for lxml) --- Cython/Plex/Scanners.pxd | 21 +++++----- Cython/Plex/Scanners.py | 84 +++++----------------------------------- 2 files changed, 21 insertions(+), 84 deletions(-) diff --git a/Cython/Plex/Scanners.pxd b/Cython/Plex/Scanners.pxd index 58c9a670..fe09cff0 100644 --- a/Cython/Plex/Scanners.pxd +++ b/Cython/Plex/Scanners.pxd @@ -6,28 +6,29 @@ cdef class Scanner: cdef public stream cdef public name cdef public buffer - cdef public long buf_start_pos - cdef public long next_pos - cdef public long cur_pos - cdef public long cur_line - cdef public long cur_line_start - cdef public long start_pos - cdef public long start_line - cdef public long start_col + cdef public Py_ssize_t buf_start_pos + cdef public Py_ssize_t next_pos + cdef public Py_ssize_t cur_pos + cdef public Py_ssize_t cur_line + cdef public Py_ssize_t cur_line_start + cdef public Py_ssize_t start_pos + cdef public Py_ssize_t start_line + cdef public Py_ssize_t start_col cdef public text cdef public initial_state # int? cdef public state_name cdef public list queue cdef public bint trace cdef public cur_char - cdef public input_state + cdef public int input_state cdef public level @cython.locals(input_state=long) cpdef next_char(self) cpdef read(self) - cpdef position(self) + cpdef tuple scan_a_token(self) + cpdef tuple position(self) @cython.locals(cur_pos=cython.long, cur_line=cython.long, cur_line_start=cython.long, input_state=cython.long, diff --git a/Cython/Plex/Scanners.py b/Cython/Plex/Scanners.py index c76ec706..c6d511ed 100644 --- a/Cython/Plex/Scanners.py +++ b/Cython/Plex/Scanners.py @@ -75,6 +75,8 @@ class Scanner: |name| is optional, and may be the name of the file being scanned or any other identifying string. """ + self.trace = 0 + self.buffer = '' self.buf_start_pos = 0 self.next_pos = 0 @@ -135,7 +137,7 @@ class Scanner: # else: # action = self.run_machine_inlined() action = self.run_machine_inlined() - if action: + if action is not None: if self.trace: print("Scanner: read: Performing %s %d:%d" % ( action, self.start_pos, self.cur_pos)) @@ -144,21 +146,11 @@ class Scanner: return (text, action) else: if self.cur_pos == self.start_pos: - if self.cur_char == EOL: + if self.cur_char is EOL: self.next_char() - if not self.cur_char or self.cur_char == EOF: + if self.cur_char is None or self.cur_char is EOF: return ('', None) raise Errors.UnrecognizedInput(self, self.state_name) - - def run_machine(self): - """ - Run the machine until no more transitions are possible. - """ - self.state = self.initial_state - self.backup_state = None - while self.transition(): - pass - return self.back_up() def run_machine_inlined(self): """ @@ -183,7 +175,7 @@ class Scanner: # Begin inlined self.save_for_backup() #action = state.action #@slow action = state['action'] #@fast - if action: + if action is not None: backup_state = ( action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos) # End inlined self.save_for_backup() @@ -245,7 +237,7 @@ class Scanner: if trace: #TRACE# print("blocked") #TRACE# # Begin inlined: action = self.back_up() - if backup_state: + if backup_state is not None: (action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos) = backup_state else: @@ -259,46 +251,9 @@ class Scanner: self.input_state = input_state self.next_pos = next_pos if trace: #TRACE# - if action: #TRACE# - print("Doing " + action) #TRACE# + if action is not None: #TRACE# + print("Doing %s" % action) #TRACE# return action - -# def transition(self): -# self.save_for_backup() -# c = self.cur_char -# new_state = self.state.new_state(c) -# if new_state: -# if self.trace: -# print "Scanner: read: State %d: %s --> State %d" % ( -# self.state.number, repr(c), new_state.number) -# self.state = new_state -# self.next_char() -# return 1 -# else: -# if self.trace: -# print "Scanner: read: State %d: %s --> blocked" % ( -# self.state.number, repr(c)) -# return 0 - -# def save_for_backup(self): -# action = self.state.get_action() -# if action: -# if self.trace: -# print "Scanner: read: Saving backup point at", self.cur_pos -# self.backup_state = ( -# action, self.cur_pos, self.cur_line, self.cur_line_start, -# self.cur_char, self.input_state, self.next_pos) - -# def back_up(self): -# backup_state = self.backup_state -# if backup_state: -# (action, self.cur_pos, self.cur_line, self.cur_line_start, -# self.cur_char, self.input_state, self.next_pos) = backup_state -# if self.trace: -# print "Scanner: read: Backing up to", self.cur_pos -# return action -# else: -# return None def next_char(self): input_state = self.input_state @@ -330,26 +285,7 @@ class Scanner: self.cur_char = '' if self.trace: print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))) - -# def read_char(self): -# """ -# Get the next input character, filling the buffer if necessary. -# Returns '' at end of file. -# """ -# next_pos = self.next_pos -# buf_index = next_pos - self.buf_start_pos -# if buf_index == len(self.buffer): -# discard = self.start_pos - self.buf_start_pos -# data = self.stream.read(0x1000) -# self.buffer = self.buffer[discard:] + data -# self.buf_start_pos = self.buf_start_pos + discard -# buf_index = buf_index - discard -# if not data: -# return '' -# c = self.buffer[buf_index] -# self.next_pos = next_pos + 1 -# return c - + def position(self): """ Return a tuple (name, line, col) representing the location of -- 2.26.2