merged intern table with general string table to support unicode string interning...
authorStefan Behnel <scoder@users.berlios.de>
Tue, 13 May 2008 10:13:27 +0000 (12:13 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Tue, 13 May 2008 10:13:27 +0000 (12:13 +0200)
Cython/Compiler/ModuleNode.py
Cython/Compiler/Nodes.py
Cython/Compiler/Symtab.py

index 0481e3cc36447eaac130d82ff1180f361171816b..1e11b256e8d68ad279a7f72e8d7a9d0d64eb0449 100644 (file)
@@ -221,12 +221,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
         code.putln("/* Implementation of %s */" % env.qualified_name)
         self.generate_const_definitions(env, code)
         self.generate_interned_num_decls(env, code)
-        self.generate_interned_name_decls(env, code)
         self.generate_py_string_decls(env, code)
         self.generate_cached_builtins_decls(env, code)
         self.body.generate_function_definitions(env, code, options.transforms)
         code.mark_pos(None)
-        self.generate_interned_name_table(env, code)
         self.generate_py_string_table(env, code)
         self.generate_typeobj_definitions(env, code)
         self.generate_method_table(env, code)
@@ -1362,47 +1360,33 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                     "{0, 0, 0, 0, 0}")
             code.putln(
                 "};")
-    
-    def generate_interned_name_table(self, env, code):
-        code.mark_pos(None)
-        items = env.intern_map.items()
-        if items:
-            items.sort()
-            code.putln("")
-            code.putln(
-                "static __Pyx_InternTabEntry %s[] = {" %
-                    Naming.intern_tab_cname)
-            for (name, cname) in items:
-                code.putln(
-                    '{&%s, "%s"},' % (
-                        cname,
-                        name))
-            code.putln(
-                "{0, 0}")
-            code.putln(
-                "};")
-    
+
     def generate_py_string_table(self, env, code):
         entries = env.all_pystring_entries
         if entries:
             code.putln("")
+            for entry in entries:
+                if entry.is_interned:
+                    code.putln('static char %s[] = "%s";' % (
+                            entry.cname, entry.init))
+            code.putln("")
             code.putln(
                 "static __Pyx_StringTabEntry %s[] = {" %
                     Naming.stringtab_cname)
             for entry in entries:
                 code.putln(
-                    "{&%s, %s, sizeof(%s), %d}," % (
+                    "{&%s, %s, sizeof(%s), %d, %d}," % (
                         entry.pystring_cname,
                         entry.cname,
                         entry.cname,
-                        entry.type.is_unicode
+                        entry.type.is_unicode,
+                        entry.is_interned
                         ))
             code.putln(
-                "{0, 0, 0, 0}")
+                "{0, 0, 0, 0, 0}")
             code.putln(
                 "};")
 
-
     def generate_filename_init_prototype(self, code):
         code.putln("");
         code.putln("static void %s(void); /*proto*/" % Naming.fileinit_cname)
@@ -1546,12 +1530,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
                 entry.cname,
                 entry.init,
                 code.error_goto_if_null(entry.cname, self.pos)))
-        if env.intern_map:
-            env.use_utility_code(Nodes.init_intern_tab_utility_code);
-            code.putln(
-                "if (__Pyx_InternStrings(%s) < 0) %s;" % (
-                    Naming.intern_tab_cname,
-                    code.error_goto(self.pos)))
     
     def generate_string_init_code(self, env, code):
         if env.all_pystring_entries:
index e434fe35fdc5dab07bc83e36cdb15aeaa671c7fd..9eca7a2daf78a5e66efe64e2170cc828c941ced2 100644 (file)
@@ -231,19 +231,6 @@ class BlockNode:
                 if not entry.is_interned:
                     code.put_var_declaration(entry, static = 1)
     
-    def generate_interned_name_decls(self, env, code):
-        #  Flush accumulated interned names from the global scope
-        #  and generate declarations for them.
-        genv = env.global_scope()
-        intern_map = genv.intern_map
-        names = genv.interned_names
-        if names:
-            code.putln("")
-            for name in names:
-                code.putln(
-                    "static PyObject *%s;" % intern_map[name])
-            del names[:]
-    
     def generate_py_string_decls(self, env, code):
         entries = env.pystring_entries
         if entries:
@@ -878,7 +865,6 @@ class FuncDefNode(StatNode, BlockNode):
         # if we supported them, which we probably won't.
         # ----- Top-level constants used by this function
         self.generate_interned_num_decls(lenv, code)
-        self.generate_interned_name_decls(lenv, code)
         self.generate_py_string_decls(lenv, code)
         self.generate_cached_builtins_decls(lenv, code)
         #code.putln("")
@@ -3729,8 +3715,7 @@ utility_function_predeclarations = \
 #define INLINE 
 #endif
 
-typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/
-typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
+typedef struct {PyObject **p; char *s; long n; char is_unicode; char intern;} __Pyx_StringTabEntry; /*proto*/
 
 """ + """
 
@@ -4306,27 +4291,6 @@ done:
 
 #------------------------------------------------------------------------------------
 
-init_intern_tab_utility_code = [
-"""
-static int __Pyx_InternStrings(__Pyx_InternTabEntry *t); /*proto*/
-""","""
-static int __Pyx_InternStrings(__Pyx_InternTabEntry *t) {
-    while (t->p) {
-        #if PY_MAJOR_VERSION < 3
-        *t->p = PyString_InternFromString(t->s);
-        #else
-        *t->p = PyString_FromString(t->s);
-        #endif
-        if (!*t->p)
-            return -1;
-        ++t;
-    }
-    return 0;
-}
-"""]
-
-#------------------------------------------------------------------------------------
-
 init_string_tab_utility_code = [
 """
 static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
@@ -4337,10 +4301,19 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
             #if PY_MAJOR_VERSION < 3
             *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
             #else
-            *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
             #endif
         } else {
-            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+            #if PY_MAJOR_VERSION < 3
+            if (t->intern)
+                *t->p = PyString_InternFromString(t->s);
+            else
+            #endif
+                *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
         }
         if (!*t->p)
             return -1;
index 5e067ddfb196bd62e8223b4ae9af0795c564f3cb..3b0bffbdb4b521b1cdb9ede2d3182ca7e75da4d8 100644 (file)
@@ -15,7 +15,7 @@ from TypeSlots import \
 import ControlFlow
 import __builtin__
 
-identifier_pattern = re.compile(r"[A-Za-z_][A-Za-z0-9_]*$")
+possible_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match
 
 class Entry:
     # A symbol table entry in a Scope or ModuleNamespace.
@@ -64,7 +64,6 @@ class Entry:
     #                                 type is an extension type
     # as_module        None       Module scope, if a cimported module
     # is_inherited     boolean    Is an inherited attribute of an extension type
-    # #interned_cname   string     C name of interned name string
     # pystring_cname   string     C name of Python version of string literal
     # is_interned      boolean    For string const entries, value is interned
     # used             boolean
@@ -104,7 +103,6 @@ class Entry:
     in_cinclude = 0
     as_module = None
     is_inherited = 0
-    #interned_cname = None
     pystring_cname = None
     is_interned = 0
     used = 0
@@ -204,10 +202,10 @@ class Scope:
         
     def __str__(self):
         return "<%s %s>" % (self.__class__.__name__, self.qualified_name)
-    
+
     def intern(self, name):
         return self.global_scope().intern(name)
-    
+
     def qualifying_scope(self):
         return self.parent_scope
     
@@ -465,14 +463,12 @@ class Scope:
         # Python identifier, it will be interned.
         if not entry.pystring_cname:
             value = entry.init
-            if not entry.type.is_unicode and identifier_pattern.match(value):
-                entry.pystring_cname = self.intern(value)
+            if possible_identifier(value):
                 entry.is_interned = 1
-            else:
-                entry.pystring_cname = entry.cname + "p"
-                self.pystring_entries.append(entry)
-                self.global_scope().all_pystring_entries.append(entry)
-                
+            entry.pystring_cname = entry.cname + "p"
+            self.pystring_entries.append(entry)
+            self.global_scope().all_pystring_entries.append(entry)
+
     def add_py_num(self, value):
         # Add an entry for an int constant.
         cname = "%s%s" % (Naming.interned_num_prefix, value)
@@ -678,7 +674,6 @@ class ModuleScope(Scope):
     # pxd_file_loaded      boolean            Corresponding .pxd file has been processed
     # cimported_modules    [ModuleScope]      Modules imported with cimport
     # intern_map           {string : string}  Mapping from Python names to interned strs
-    # interned_names       [string]           Interned names pending generation of declarations
     # interned_nums        [int/long]         Interned numeric constants
     # all_pystring_entries [Entry]            Python string consts from all scopes
     # types_imported       {PyrexType : 1}    Set of types for which import code generated
@@ -706,7 +701,6 @@ class ModuleScope(Scope):
         self.pxd_file_loaded = 0
         self.cimported_modules = []
         self.intern_map = {}
-        self.interned_names = []
         self.interned_nums = []
         self.interned_objs = []
         self.all_pystring_entries = []
@@ -743,15 +737,11 @@ class ModuleScope(Scope):
         else:
             entry.is_builtin = 1
         return entry
-    
+
     def intern(self, name):
-        intern_map = self.intern_map
-        cname = intern_map.get(name)
-        if not cname:
-            cname = Naming.interned_prefix + name
-            intern_map[name] = cname
-            self.interned_names.append(name)
-        return cname
+        string_entry = self.add_string_const(name)
+        self.add_py_string(string_entry)
+        return string_entry.pystring_cname
 
     def find_module(self, module_name, pos):
         # Find a module in the import namespace, interpreting
@@ -832,8 +822,6 @@ class ModuleScope(Scope):
                     "Non-cdef global variable is not a generic Python object")
             entry.is_pyglobal = 1
             entry.namespace_cname = self.module_cname
-            #if Options.intern_names:
-            #  entry.interned_cname = self.intern(name)
         else:
             entry.is_cglobal = 1
             self.var_entries.append(entry)
@@ -1151,8 +1139,6 @@ class PyClassScope(ClassScope):
             cname, visibility, is_cdef)
         entry.is_pyglobal = 1
         entry.namespace_cname = self.class_obj_cname
-        #if Options.intern_names:
-        #      entry.interned_cname = self.intern(name)
         return entry
 
     def allocate_temp(self, type):