From: Stefan Behnel Date: Tue, 13 May 2008 10:13:27 +0000 (+0200) Subject: merged intern table with general string table to support unicode string interning... X-Git-Tag: 0.9.8rc1~37^2~90 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=c4ba39e12e8893223c235ac3f92617960f298aa4;p=cython.git merged intern table with general string table to support unicode string interning in Py3 --- diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index 0481e3cc..1e11b256 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -221,12 +221,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.putln("/* Implementation of %s */" % env.qualified_name) self.generate_const_definitions(env, code) self.generate_interned_num_decls(env, code) - self.generate_interned_name_decls(env, code) self.generate_py_string_decls(env, code) self.generate_cached_builtins_decls(env, code) self.body.generate_function_definitions(env, code, options.transforms) code.mark_pos(None) - self.generate_interned_name_table(env, code) self.generate_py_string_table(env, code) self.generate_typeobj_definitions(env, code) self.generate_method_table(env, code) @@ -1362,47 +1360,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): "{0, 0, 0, 0, 0}") code.putln( "};") - - def generate_interned_name_table(self, env, code): - code.mark_pos(None) - items = env.intern_map.items() - if items: - items.sort() - code.putln("") - code.putln( - "static __Pyx_InternTabEntry %s[] = {" % - Naming.intern_tab_cname) - for (name, cname) in items: - code.putln( - '{&%s, "%s"},' % ( - cname, - name)) - code.putln( - "{0, 0}") - code.putln( - "};") - + def generate_py_string_table(self, env, code): entries = env.all_pystring_entries if entries: code.putln("") + for entry in entries: + if entry.is_interned: + code.putln('static char %s[] = "%s";' % ( + entry.cname, entry.init)) + code.putln("") code.putln( "static __Pyx_StringTabEntry %s[] = {" % Naming.stringtab_cname) for entry in entries: code.putln( - "{&%s, %s, sizeof(%s), %d}," % ( + "{&%s, %s, sizeof(%s), %d, %d}," % ( entry.pystring_cname, entry.cname, entry.cname, - entry.type.is_unicode + entry.type.is_unicode, + entry.is_interned )) code.putln( - "{0, 0, 0, 0}") + "{0, 0, 0, 0, 0}") code.putln( "};") - def generate_filename_init_prototype(self, code): code.putln(""); code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname) @@ -1546,12 +1530,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): entry.cname, entry.init, code.error_goto_if_null(entry.cname, self.pos))) - if env.intern_map: - env.use_utility_code(Nodes.init_intern_tab_utility_code); - code.putln( - "if (__Pyx_InternStrings(%s) < 0) %s;" % ( - Naming.intern_tab_cname, - code.error_goto(self.pos))) def generate_string_init_code(self, env, code): if env.all_pystring_entries: diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index e434fe35..9eca7a2d 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -231,19 +231,6 @@ class BlockNode: if not entry.is_interned: code.put_var_declaration(entry, static = 1) - def generate_interned_name_decls(self, env, code): - # Flush accumulated interned names from the global scope - # and generate declarations for them. - genv = env.global_scope() - intern_map = genv.intern_map - names = genv.interned_names - if names: - code.putln("") - for name in names: - code.putln( - "static PyObject *%s;" % intern_map[name]) - del names[:] - def generate_py_string_decls(self, env, code): entries = env.pystring_entries if entries: @@ -878,7 +865,6 @@ class FuncDefNode(StatNode, BlockNode): # if we supported them, which we probably won't. # ----- Top-level constants used by this function self.generate_interned_num_decls(lenv, code) - self.generate_interned_name_decls(lenv, code) self.generate_py_string_decls(lenv, code) self.generate_cached_builtins_decls(lenv, code) #code.putln("") @@ -3729,8 +3715,7 @@ utility_function_predeclarations = \ #define INLINE #endif -typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/ -typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/ +typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern;} __Pyx_StringTabEntry; /*proto*/ """ + """ @@ -4306,27 +4291,6 @@ done: #------------------------------------------------------------------------------------ -init_intern_tab_utility_code = [ -""" -static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/ -""",""" -static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) { - while (t->p) { - #if PY_MAJOR_VERSION < 3 - *t->p = PyString_InternFromString(t->s); - #else - *t->p = PyString_FromString(t->s); - #endif - if (!*t->p) - return -1; - ++t; - } - return 0; -} -"""] - -#------------------------------------------------------------------------------------ - init_string_tab_utility_code = [ """ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ @@ -4337,10 +4301,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { #if PY_MAJOR_VERSION < 3 *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); #else - *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + if (t->intern) { + *t->p = PyUnicode_InternFromString(t->s); + } else { + *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); + } #endif } else { - *t->p = PyString_FromStringAndSize(t->s, t->n - 1); + #if PY_MAJOR_VERSION < 3 + if (t->intern) + *t->p = PyString_InternFromString(t->s); + else + #endif + *t->p = PyString_FromStringAndSize(t->s, t->n - 1); } if (!*t->p) return -1; diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index 5e067ddf..3b0bffbd 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -15,7 +15,7 @@ from TypeSlots import \ import ControlFlow import __builtin__ -identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$") +possible_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match class Entry: # A symbol table entry in a Scope or ModuleNamespace. @@ -64,7 +64,6 @@ class Entry: # type is an extension type # as_module None Module scope, if a cimported module # is_inherited boolean Is an inherited attribute of an extension type - # #interned_cname string C name of interned name string # pystring_cname string C name of Python version of string literal # is_interned boolean For string const entries, value is interned # used boolean @@ -104,7 +103,6 @@ class Entry: in_cinclude = 0 as_module = None is_inherited = 0 - #interned_cname = None pystring_cname = None is_interned = 0 used = 0 @@ -204,10 +202,10 @@ class Scope: def __str__(self): return "<%s %s>" % (self.__class__.__name__, self.qualified_name) - + def intern(self, name): return self.global_scope().intern(name) - + def qualifying_scope(self): return self.parent_scope @@ -465,14 +463,12 @@ class Scope: # Python identifier, it will be interned. if not entry.pystring_cname: value = entry.init - if not entry.type.is_unicode and identifier_pattern.match(value): - entry.pystring_cname = self.intern(value) + if possible_identifier(value): entry.is_interned = 1 - else: - entry.pystring_cname = entry.cname + "p" - self.pystring_entries.append(entry) - self.global_scope().all_pystring_entries.append(entry) - + entry.pystring_cname = entry.cname + "p" + self.pystring_entries.append(entry) + self.global_scope().all_pystring_entries.append(entry) + def add_py_num(self, value): # Add an entry for an int constant. cname = "%s%s" % (Naming.interned_num_prefix, value) @@ -678,7 +674,6 @@ class ModuleScope(Scope): # pxd_file_loaded boolean Corresponding .pxd file has been processed # cimported_modules [ModuleScope] Modules imported with cimport # intern_map {string : string} Mapping from Python names to interned strs - # interned_names [string] Interned names pending generation of declarations # interned_nums [int/long] Interned numeric constants # all_pystring_entries [Entry] Python string consts from all scopes # types_imported {PyrexType : 1} Set of types for which import code generated @@ -706,7 +701,6 @@ class ModuleScope(Scope): self.pxd_file_loaded = 0 self.cimported_modules = [] self.intern_map = {} - self.interned_names = [] self.interned_nums = [] self.interned_objs = [] self.all_pystring_entries = [] @@ -743,15 +737,11 @@ class ModuleScope(Scope): else: entry.is_builtin = 1 return entry - + def intern(self, name): - intern_map = self.intern_map - cname = intern_map.get(name) - if not cname: - cname = Naming.interned_prefix + name - intern_map[name] = cname - self.interned_names.append(name) - return cname + string_entry = self.add_string_const(name) + self.add_py_string(string_entry) + return string_entry.pystring_cname def find_module(self, module_name, pos): # Find a module in the import namespace, interpreting @@ -832,8 +822,6 @@ class ModuleScope(Scope): "Non-cdef global variable is not a generic Python object") entry.is_pyglobal = 1 entry.namespace_cname = self.module_cname - #if Options.intern_names: - # entry.interned_cname = self.intern(name) else: entry.is_cglobal = 1 self.var_entries.append(entry) @@ -1151,8 +1139,6 @@ class PyClassScope(ClassScope): cname, visibility, is_cdef) entry.is_pyglobal = 1 entry.namespace_cname = self.class_obj_cname - #if Options.intern_names: - # entry.interned_cname = self.intern(name) return entry def allocate_temp(self, type):