From 5530fe1dad19d3d065bd5ae6e6f0b0efa9d47037 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 18 Mar 2009 21:44:34 +0100 Subject: [PATCH] almost complete refactoring of constant allocation to move it into the code generation phase --- Cython/Compiler/Code.py | 336 ++++++++++++++++++++++++++-------- Cython/Compiler/ExprNodes.py | 113 ++++++------ Cython/Compiler/ModuleNode.py | 19 +- Cython/Compiler/Naming.py | 1 + Cython/Compiler/Nodes.py | 67 ++++--- Cython/Compiler/Symtab.py | 101 ---------- tests/errors/e_strcoerce.pyx | 9 +- 7 files changed, 366 insertions(+), 280 deletions(-) diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index aa77a037..121a87b7 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -2,15 +2,16 @@ # Pyrex - Code output module # +import re import codecs import Naming import Options -from Cython.Utils import open_new_file, open_source_file +import StringEncoding +from Cython import Utils from PyrexTypes import py_object_type, typecast from TypeSlots import method_coexist from Scanning import SourceDescriptor from Cython.StringIOTree import StringIOTree -import DebugFlags try: set except NameError: @@ -48,6 +49,8 @@ class FunctionState(object): self.temps_used_type = {} # name -> (type, manage_ref) self.temp_counter = 0 + # labels + def new_label(self, name=None): n = self.label_counter self.label_counter = n + 1 @@ -107,6 +110,8 @@ class FunctionState(object): def label_used(self, lbl): return lbl in self.labels_used + # temp handling + def allocate_temp(self, type, manage_ref): """ Allocates a temporary (which may create a new one or get a previously @@ -196,6 +201,81 @@ class FunctionState(object): if manage_ref for cname in freelist] + +class IntConst(object): + """Global info about a Python integer constant held by GlobalState. + """ + # cname string + # value int + # is_long boolean + + def __init__(self, cname, value, is_long): + self.cname = cname + self.value = value + self.is_long = is_long + +possible_identifier = re.compile(ur"(?![0-9])\w+$", re.U).match +nice_identifier = re.compile('^[a-zA-Z0-0_]+$').match + +class StringConst(object): + """Global info about a C string constant held by GlobalState. + """ + # cname string + # text EncodedString or BytesLiteral + # py_strings {(identifier, encoding) : PyStringConst} + + def __init__(self, cname, text, byte_string): + self.cname = cname + self.text = text + self.escaped_value = StringEncoding.escape_byte_string(byte_string) + self.py_strings = None + + def get_py_string_const(self, encoding, identifier=None): + py_strings = self.py_strings + text = self.text + if encoding is not None: + encoding = encoding.upper() + + key = (bool(identifier), encoding) + if py_strings is not None and key in py_strings: + py_string = py_strings[key] + else: + if py_strings is None: + self.py_strings = {} + is_unicode = encoding is None + intern = bool(identifier or ( + identifier is None and possible_identifier(text))) + if intern: + prefix = Naming.interned_str_prefix + else: + prefix = Naming.py_const_prefix + pystring_cname = "%s%s%s_%s" % ( + prefix, + is_unicode and 'u' or 'b', + identifier and 'i' or '', + self.cname[len(Naming.const_prefix):]) + + py_string = PyStringConst( + pystring_cname, is_unicode, bool(identifier), intern) + self.py_strings[key] = py_string + + return py_string + +class PyStringConst(object): + """Global info about a Python string constant held by GlobalState. + """ + # cname string + # unicode boolean + # intern boolean + # identifier boolean + + def __init__(self, cname, is_unicode, identifier=False, intern=False): + self.cname = cname + self.identifier = identifier + self.unicode = is_unicode + self.intern = intern + + class GlobalState(object): # filename_table {string : int} for finding filename table indexes # filename_list [string] filenames in filename table order @@ -212,6 +292,9 @@ class GlobalState(object): # check if constants are already added). # In time, hopefully the literals etc. will be # supplied directly instead. + # + # const_cname_counter int global counter for constant identifiers + # # interned_strings @@ -232,10 +315,13 @@ class GlobalState(object): self.input_file_contents = {} self.used_utility_code = set() self.declared_cnames = {} - self.pystring_table_needed = False self.in_utility_code_generation = False self.emit_linenums = emit_linenums + self.const_cname_counter = 1 + self.string_const_index = {} + self.int_const_index = {} + def initwriters(self, rootwriter): self.utilprotowriter = rootwriter.new_writer() self.utildefwriter = rootwriter.new_writer() @@ -257,10 +343,6 @@ class GlobalState(object): self.cleanupwriter.putln("") self.cleanupwriter.putln("static void __Pyx_CleanupGlobals(void) {") - self.pystring_table.putln("") - self.pystring_table.putln("static __Pyx_StringTabEntry %s[] = {" % - Naming.stringtab_cname) - # # Global constants, interned objects, etc. # @@ -270,16 +352,7 @@ class GlobalState(object): def close_global_decls(self): # This is called when it is known that no more global declarations will # declared (but can be called before or after insert_XXX). - if self.pystring_table_needed: - self.pystring_table.putln("{0, 0, 0, 0, 0, 0}") - self.pystring_table.putln("};") - import Nodes - self.use_utility_code(Nodes.init_string_tab_utility_code) - self.initwriter.putln( - "if (__Pyx_InitStrings(%s) < 0) %s;" % ( - Naming.stringtab_cname, - self.initwriter.error_goto(self.module_pos))) - + self.generate_const_declarations() if Options.cache_builtins: w = self.init_cached_builtins_writer w.putln("return 0;") @@ -300,8 +373,7 @@ class GlobalState(object): w.exit_cfunc_scope() def insert_initcode_into(self, code): - if self.pystring_table_needed: - code.insert(self.pystring_table) + code.insert(self.pystring_table) if Options.cache_builtins: code.insert(self.init_cached_builtins_writer) code.insert(self.initwriter) @@ -312,6 +384,137 @@ class GlobalState(object): def put_pyobject_decl(self, entry): self.decls_writer.putln("static PyObject *%s;" % entry.cname) + # constant handling at code generation time + + def get_int_const(self, str_value, longness=False): + longness = bool(longness or Utils.long_literal(str_value)) + try: + c = self.int_const_index[(str_value, longness)] + except KeyError: + c = self.new_int_const(str_value, longness) + return c + + def get_string_const(self, text): + # return a C string constant, creating a new one if necessary + if text.is_unicode: + byte_string = text.utf8encode() + else: + byte_string = text.byteencode() + try: + c = self.string_const_index[byte_string] + except KeyError: + c = self.new_string_const(text, byte_string) + return c + + def get_py_string_const(self, text, identifier=None): + # return a Python string constant, creating a new one if necessary + c_string = self.get_string_const(text) + py_string = c_string.get_py_string_const(text.encoding, identifier) + return py_string + + def new_string_const(self, text, byte_string): + cname = self.new_string_const_cname(text) + c = StringConst(cname, text, byte_string) + self.string_const_index[byte_string] = c + return c + + def new_int_const(self, value, longness): + cname = self.new_int_const_cname(value, longness) + c = IntConst(cname, value, longness) + self.int_const_index[(value, longness)] = c + return c + + def new_string_const_cname(self, value, intern=None): + # Create a new globally-unique nice name for a C string constant. + if len(value) < 20 and nice_identifier(value): + return "%s%s" % (Naming.const_prefix, value) + else: + return self.new_const_cname() + + def new_int_const_cname(self, value, longness): + if longness: + value += 'L' + cname = "%s%s" % (Naming.interned_num_prefix, value) + cname = cname.replace('-', 'neg_').replace('.','_') + return cname + + def new_const_cname(self, prefix=''): + n = self.const_cname_counter + self.const_cname_counter += 1 + return "%s%s%d" % (Naming.const_prefix, prefix, n) + + def add_cached_builtin_decl(self, entry): + if Options.cache_builtins: + if self.should_declare(entry.cname, entry): + interned_cname = self.get_py_string_const(entry.name, True).cname + self.put_pyobject_decl(entry) + self.init_cached_builtins_writer.putln('%s = __Pyx_GetName(%s, %s); if (!%s) %s' % ( + entry.cname, + Naming.builtins_cname, + interned_cname, + entry.cname, + self.init_cached_builtins_writer.error_goto(entry.pos))) + + def generate_const_declarations(self): + self.generate_string_constants() + self.generate_int_constants() + + def generate_string_constants(self): + c_consts = [ (len(c.cname), c.cname, c) + for c in self.string_const_index.itervalues() ] + c_consts.sort() + py_strings = [] + for _, cname, c in c_consts: + self.decls_writer.putln('static char %s[] = "%s";' % ( + cname, c.escaped_value)) + if c.py_strings is not None: + for py_string in c.py_strings.itervalues(): + py_strings.append((c.cname, len(py_string.cname), py_string)) + + if py_strings: + import Nodes + self.use_utility_code(Nodes.init_string_tab_utility_code) + + py_strings.sort() + self.pystring_table.putln("") + self.pystring_table.putln("static __Pyx_StringTabEntry %s[] = {" % + Naming.stringtab_cname) + for c_cname, _, py_string in py_strings: + self.decls_writer.putln( + "static PyObject *%s;" % py_string.cname) + self.pystring_table.putln( + "{&%s, %s, sizeof(%s), %d, %d, %d}," % ( + py_string.cname, + c_cname, + c_cname, + py_string.unicode, + py_string.intern, + py_string.identifier + )) + self.pystring_table.putln("{0, 0, 0, 0, 0, 0}") + self.pystring_table.putln("};") + + self.initwriter.putln( + "if (__Pyx_InitStrings(%s) < 0) %s;" % ( + Naming.stringtab_cname, + self.initwriter.error_goto(self.module_pos))) + + def generate_int_constants(self): + consts = [ (len(c.value), c.value, c.is_long, c) + for c in self.int_const_index.itervalues() ] + consts.sort() + for _, value, longness, c in consts: + cname = c.cname + self.decls_writer.putln("static PyObject *%s;" % cname) + if longness: + function = '%s = PyLong_FromString((char *)"%s", 0, 0); %s;' + else: + function = "%s = PyInt_FromLong(%s); %s;" + self.initwriter.putln(function % ( + cname, + value, + self.initwriter.error_goto_if_null(cname, self.module_pos))) + # The functions below are there in a transition phase only # and will be deprecated. They are called from Nodes.BlockNode. # The copy&paste duplication is intentional in order to be able @@ -327,55 +530,6 @@ class GlobalState(object): self.declared_cnames[cname] = entry return True - def add_const_definition(self, entry): - if self.should_declare(entry.cname, entry): - self.decls_writer.put_var_declaration(entry, static = 1) - - def add_interned_string_decl(self, entry): - if self.should_declare(entry.cname, entry): - self.decls_writer.put_var_declaration(entry, static = 1) - self.add_py_string_decl(entry) - - def add_py_string_decl(self, entry): - if self.should_declare(entry.pystring_cname, entry): - self.decls_writer.putln("static PyObject *%s;" % entry.pystring_cname) - self.pystring_table_needed = True - self.pystring_table.putln("{&%s, %s, sizeof(%s), %d, %d, %d}," % ( - entry.pystring_cname, - entry.cname, - entry.cname, - entry.type.is_unicode, - entry.is_interned, - entry.is_identifier - )) - - def add_interned_num_decl(self, entry): - if self.should_declare(entry.cname, entry): - if entry.init[-1] == "L": - self.initwriter.putln('%s = PyLong_FromString((char *)"%s", 0, 0); %s;' % ( - entry.cname, - entry.init[:-1], # strip 'L' for Py3 compatibility - self.initwriter.error_goto_if_null(entry.cname, self.module_pos))) - else: - self.initwriter.putln("%s = PyInt_FromLong(%s); %s;" % ( - entry.cname, - entry.init, - self.initwriter.error_goto_if_null(entry.cname, self.module_pos))) - - self.put_pyobject_decl(entry) - - def add_cached_builtin_decl(self, entry): - if Options.cache_builtins: - if self.should_declare(entry.cname, entry): - self.put_pyobject_decl(entry) - self.init_cached_builtins_writer.putln('%s = __Pyx_GetName(%s, %s); if (!%s) %s' % ( - entry.cname, - Naming.builtins_cname, - entry.interned_cname, - entry.cname, - self.init_cached_builtins_writer.error_goto(entry.pos))) - - # # File name state # @@ -475,6 +629,7 @@ def funccontext_property(name): setattr(self.funcstate, name, value) return property(get, set) + class CCodeWriter(object): """ Utility class to output C code. @@ -596,6 +751,25 @@ class CCodeWriter(object): def exit_cfunc_scope(self): self.funcstate = None + # constant handling + + def get_py_num(self, str_value, longness): + return self.globalstate.get_int_const(str_value, longness).cname + + def get_string_const(self, text): + return self.globalstate.get_string_const(text).cname + + def get_py_string_const(self, text, identifier=None): + return self.globalstate.get_py_string_const(text, identifier).cname + + def intern(self, text): + return self.get_py_string_const(text) + + def intern_identifier(self, text): + return self.get_py_string_const(text, True) + + # code generation + def putln(self, code = ""): if self.marker and self.bol: self.emit_marker() @@ -732,6 +906,18 @@ class CCodeWriter(object): else: self.putln("%s;" % decl) + def put_h_guard(self, guard): + self.putln("#ifndef %s" % guard) + self.putln("#define %s" % guard) + + def unlikely(self, cond): + if Options.gcc_branch_hints: + return 'unlikely(%s)' % cond + else: + return cond + + # Python objects and reference counting + def entry_as_pyobject(self, entry): type = entry.type if (not entry.is_self_arg and not entry.type.is_complete() @@ -876,20 +1062,12 @@ class CCodeWriter(object): doc_code, term)) + # error handling + def put_error_if_neg(self, pos, value): # return self.putln("if (unlikely(%s < 0)) %s" % (value, self.error_goto(pos))) # TODO this path is almost _never_ taken, yet this macro makes is slower! return self.putln("if (%s < 0) %s" % (value, self.error_goto(pos))) - def put_h_guard(self, guard): - self.putln("#ifndef %s" % guard) - self.putln("#define %s" % guard) - - def unlikely(self, cond): - if Options.gcc_branch_hints: - return 'unlikely(%s)' % cond - else: - return cond - def set_error_info(self, pos): if Options.c_line_in_traceback: cinfo = " %s = %s;" % (Naming.clineno_cname, Naming.line_c_macro) @@ -937,7 +1115,7 @@ class PyrexCodeWriter(object): # level int indentation level def __init__(self, outfile_name): - self.f = open_new_file(outfile_name) + self.f = Utils.open_new_file(outfile_name) self.level = 0 def putln(self, code): diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index c6e3ddd9..919bb2de 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -541,7 +541,7 @@ class ExprNode(Node): src_type = self.type src_is_py_type = src_type.is_pyobject dst_is_py_type = dst_type.is_pyobject - + if dst_type.is_pyobject: if not src.type.is_pyobject: src = CoerceToPyTypeNode(src, env) @@ -857,7 +857,6 @@ class IntNode(ConstNode): # Arrange for a Python version of the number to be pre-allocated # when coercing to a Python type. if dst_type.is_pyobject: - self.entry = env.get_py_num(self.value, self.longness) self.type = PyrexTypes.py_object_type # We still need to perform normal coerce_to processing on the # result, because we might be coercing to an extension type, @@ -868,11 +867,14 @@ class IntNode(ConstNode): self.type = PyrexTypes.c_bint_type return self - def calculate_result_code(self): + def generate_evaluation_code(self, code): if self.type.is_pyobject: - return self.entry.cname + self.result_code = code.get_py_num(self.value, self.longness) else: - return str(self.value) + self.unsigned + self.longness + self.result_code = str(self.value) + self.unsigned + self.longness + + def calculate_result_code(self): + return self.result_code def calculate_constant_result(self): self.constant_result = int(self.value, 0) @@ -903,16 +905,11 @@ class FloatNode(ConstNode): class StringNode(ConstNode): - # entry Symtab.Entry - type = PyrexTypes.c_char_ptr_type def compile_time_value(self, denv): return self.value - - def analyse_types(self, env): - self.entry = env.add_string_const(self.value) - + def analyse_as_type(self, env): type = PyrexTypes.parse_basic_type(self.value) if type is not None: @@ -924,17 +921,17 @@ class StringNode(ConstNode): sizeof_node.analyse_types(env) if isinstance(sizeof_node, SizeofTypeNode): return sizeof_node.arg_type - + def coerce_to(self, dst_type, env): if dst_type == PyrexTypes.c_char_ptr_type: self.type = PyrexTypes.c_char_ptr_type return self - + if dst_type.is_int: - if not self.type.is_pyobject and len(self.entry.init) == 1: + if not self.type.is_pyobject and len(self.value) == 1: return CharNode(self.pos, value=self.value) else: - error(self.pos, "Only coerce single-character ascii strings can be used as ints.") + error(self.pos, "Only single-character byte strings can be coerced into ints.") return self # Arrange for a Python version of the string to be pre-allocated # when coercing to a Python type. @@ -948,40 +945,38 @@ class StringNode(ConstNode): return ConstNode.coerce_to(node, dst_type, env) def as_py_string_node(self, env): - # Return a new StringNode with the same entry as this node + # Return a new StringNode with the same value as this node # but whose type is a Python type instead of a C type. - entry = self.entry - env.add_py_string(entry) - return StringNode(self.pos, value = self.value, entry = entry, type = py_object_type) - - def calculate_result_code(self): + return StringNode(self.pos, value = self.value, type = py_object_type) + + def generate_evaluation_code(self, code): if self.type.is_pyobject: - return self.entry.pystring_cname + self.result_code = code.get_py_string_const(self.value) else: - return self.entry.cname + self.result_code = code.get_string_const(self.value) + + def calculate_result_code(self): + return self.result_code class UnicodeNode(PyConstNode): - # entry Symtab.Entry - type = unicode_type - - def analyse_types(self, env): - self.entry = env.add_string_const(self.value) - env.add_py_string(self.entry) - - def calculate_result_code(self): - return self.entry.pystring_cname - def _coerce_to(self, dst_type, env): - if not dst_type.is_pyobject: - node = StringNode(self.pos, entry = entry, type = py_object_type) - return ConstNode.coerce_to(node, dst_type, env) + def coerce_to(self, dst_type, env): + if dst_type.is_pyobject: + return self else: + error(self.pos, "Unicode objects do not support coercion to C types.") return self - # We still need to perform normal coerce_to processing on the - # result, because we might be coercing to an extension type, - # in which case a type test node will be needed. + + def generate_evaluation_code(self, code): + if self.type.is_pyobject: + self.result_code = code.get_py_string_const(self.value) + else: + self.result_code = code.get_string_const(self.value) + + def calculate_result_code(self): + return self.result_code def compile_time_value(self, env): return self.value @@ -992,11 +987,14 @@ class IdentifierStringNode(ConstNode): # keyword arguments in a call, or for imported names type = PyrexTypes.py_object_type - def analyse_types(self, env): - self.cname = env.intern_identifier(self.value) + def generate_evaluation_code(self, code): + if self.type.is_pyobject: + self.result_code = code.get_py_string_const(self.value, True) + else: + self.result_code = code.get_string_const(self.value) def calculate_result_code(self): - return self.cname + return self.result_code class LongNode(AtomicNewTempExprNode): @@ -1058,7 +1056,6 @@ class NameNode(AtomicExprNode): # name string Python name of the variable # # entry Entry Symbol table entry - # interned_cname string is_name = True is_cython_module = False @@ -1200,10 +1197,6 @@ class NameNode(AtomicExprNode): entry = self.entry type = entry.type self.type = type - if entry.is_pyglobal or entry.is_builtin: - assert type.is_pyobject, "Python global or builtin not a Python object" - self.interned_cname = self.entry.interned_cname = \ - env.intern_identifier(self.entry.name) def check_identifier_kind(self): #print "NameNode.check_identifier_kind:", self.entry.name ### @@ -1270,6 +1263,8 @@ class NameNode(AtomicExprNode): if entry.is_builtin and Options.cache_builtins: return # Lookup already cached elif entry.is_pyglobal or entry.is_builtin: + assert entry.type.is_pyobject, "Python global or builtin not a Python object" + interned_cname = code.intern_identifier(self.entry.name) if entry.is_builtin: namespace = Naming.builtins_cname else: # entry.is_pyglobal @@ -1278,7 +1273,7 @@ class NameNode(AtomicExprNode): '%s = __Pyx_GetName(%s, %s); %s' % ( self.result(), namespace, - self.interned_cname, + interned_cname, code.error_goto_if_null(self.result(), self.pos))) code.put_gotref(self.py_result()) @@ -1305,6 +1300,8 @@ class NameNode(AtomicExprNode): # is_pyglobal seems to be True for module level-globals only. # We use this to access class->tp_dict if necessary. if entry.is_pyglobal: + assert entry.type.is_pyobject, "Python global or builtin not a Python object" + interned_cname = code.intern_identifier(self.entry.name) namespace = self.entry.scope.namespace_cname if entry.is_member: # if the entry is a member we have to cheat: SetAttr does not work @@ -1312,7 +1309,7 @@ class NameNode(AtomicExprNode): code.put_error_if_neg(self.pos, 'PyDict_SetItem(%s->tp_dict, %s, %s)' % ( namespace, - self.interned_cname, + interned_cname, rhs.py_result())) rhs.generate_disposal_code(code) rhs.free_temps(code) @@ -1323,7 +1320,7 @@ class NameNode(AtomicExprNode): code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % ( namespace, - self.interned_cname, + interned_cname, rhs.py_result())) if debug_disposal_code: print("NameNode.generate_assignment_code:") @@ -2630,7 +2627,6 @@ class AttributeNode(NewTempExprNode): # member string C name of struct member # is_called boolean Function call is being done on result # entry Entry Symbol table entry of attribute - # interned_attr_cname string C name of interned attribute name is_attribute = 1 subexprs = ['obj'] @@ -2830,7 +2826,6 @@ class AttributeNode(NewTempExprNode): if obj_type.is_pyobject: self.type = py_object_type self.is_py_attr = 1 - self.interned_attr_cname = env.intern_identifier(self.attribute) else: if not obj_type.is_error: error(self.pos, @@ -2879,12 +2874,13 @@ class AttributeNode(NewTempExprNode): return "%s%s%s" % (obj_code, self.op, self.member) def generate_result_code(self, code): + interned_attr_cname = code.intern_identifier(self.attribute) if self.is_py_attr: code.putln( '%s = PyObject_GetAttr(%s, %s); %s' % ( self.result(), self.obj.py_result(), - self.interned_attr_cname, + interned_attr_cname, code.error_goto_if_null(self.result(), self.pos))) code.put_gotref(self.py_result()) else: @@ -2896,12 +2892,13 @@ class AttributeNode(NewTempExprNode): self.put_nonecheck(code) def generate_assignment_code(self, rhs, code): + interned_attr_cname = code.intern_identifier(self.attribute) self.obj.generate_evaluation_code(code) if self.is_py_attr: code.put_error_if_neg(self.pos, 'PyObject_SetAttr(%s, %s, %s)' % ( self.obj.py_result(), - self.interned_attr_cname, + interned_attr_cname, rhs.py_result())) rhs.generate_disposal_code(code) rhs.free_temps(code) @@ -2928,12 +2925,13 @@ class AttributeNode(NewTempExprNode): self.obj.free_temps(code) def generate_deletion_code(self, code): + interned_attr_cname = code.intern_identifier(self.attribute) self.obj.generate_evaluation_code(code) if self.is_py_attr: code.put_error_if_neg(self.pos, 'PyObject_DelAttr(%s, %s)' % ( self.obj.py_result(), - self.interned_attr_cname)) + interned_attr_cname)) else: error(self.pos, "Cannot delete C attribute of extension type") self.obj.generate_disposal_code(code) @@ -3552,7 +3550,6 @@ class ClassNode(ExprNode): # a name, tuple of bases and class dictionary. # # name EncodedString Name of the class - # cname string Class name as a Python string # bases ExprNode Base class tuple # dict ExprNode Class dict (not owned by this node) # doc ExprNode or None Doc string @@ -3561,7 +3558,6 @@ class ClassNode(ExprNode): subexprs = ['bases', 'doc'] def analyse_types(self, env): - self.cname = env.intern_identifier(self.name) self.bases.analyse_types(env) if self.doc: self.doc.analyse_types(env) @@ -3574,6 +3570,7 @@ class ClassNode(ExprNode): gil_message = "Constructing Python class" def generate_result_code(self, code): + cname = code.intern_identifier(self.name) if self.doc: code.put_error_if_neg(self.pos, 'PyDict_SetItemString(%s, "__doc__", %s)' % ( @@ -3584,7 +3581,7 @@ class ClassNode(ExprNode): self.result(), self.bases.py_result(), self.dict.py_result(), - self.cname, + cname, self.module_name, code.error_goto_if_null(self.result(), self.pos))) code.put_gotref(self.py_result()) diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index 1d519228..5027a361 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -261,7 +261,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): self.generate_py_string_decls(env, code) code.globalstate.insert_global_var_declarations_into(code) - + self.generate_cached_builtins_decls(env, code) self.body.generate_function_definitions(env, code) code.mark_pos(None) @@ -550,13 +550,6 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): code.putln('static const char * %s= %s;' % (Naming.cfilenm_cname, Naming.file_c_macro)) code.putln('static const char *%s;' % Naming.filename_cname) code.putln('static const char **%s;' % Naming.filetable_cname) - if env.doc: - docstr = env.doc - if not isinstance(docstr, str): - docstr = docstr.utf8encode() - code.putln('') - code.putln('static char %s[] = "%s";' % ( - env.doc_cname, escape_byte_string(docstr))) env.use_utility_code(streq_utility_code) @@ -1491,12 +1484,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): "static struct PyGetSetDef %s[] = {" % env.getset_table_cname) for entry in env.property_entries: + if entry.doc: + doc_code = "__Pyx_DOCSTR(%s)" % code.get_string_const(entry.doc) + else: + doc_code = "0" code.putln( '{(char *)"%s", %s, %s, %s, 0},' % ( entry.name, entry.getter_cname or "0", entry.setter_cname or "0", - entry.doc_cname or "0")) + doc_code)) code.putln( "{0, 0, 0, 0, 0}") code.putln( @@ -1719,7 +1716,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): def generate_pymoduledef_struct(self, env, code): if env.doc: - doc = "__Pyx_DOCSTR(%s)" % env.doc_cname + doc = "__Pyx_DOCSTR(%s)" % code.get_string_const(env.doc) else: doc = "0" code.putln("") @@ -1741,7 +1738,7 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): # Generate code to create the module object and # install the builtins. if env.doc: - doc = env.doc_cname + doc = "__Pyx_DOCSTR(%s)" % code.get_string_const(env.doc) else: doc = "0" code.putln("#if PY_MAJOR_VERSION < 3") diff --git a/Cython/Compiler/Naming.py b/Cython/Compiler/Naming.py index bfed035e..2f0ad92c 100644 --- a/Cython/Compiler/Naming.py +++ b/Cython/Compiler/Naming.py @@ -27,6 +27,7 @@ label_prefix = pyrex_prefix + "L" pymethdef_prefix = pyrex_prefix + "mdef_" methtab_prefix = pyrex_prefix + "methods_" memtab_prefix = pyrex_prefix + "members_" +interned_str_prefix = pyrex_prefix + "n_" interned_num_prefix = pyrex_prefix + "int_" objstruct_prefix = pyrex_prefix + "obj_" typeptr_prefix = pyrex_prefix + "ptype_" diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index 9328dc85..c38d233d 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -489,7 +489,7 @@ class CArrayDeclaratorNode(CDeclaratorNode): self.dimension.analyse_const_expression(env) if not self.dimension.type.is_int: error(self.dimension.pos, "Array dimension not integer") - size = self.dimension.result() + size = self.dimension.compile_time_value(env) try: size = int(size) except ValueError: @@ -581,7 +581,7 @@ class CFuncDeclaratorNode(CDeclaratorNode): "Exception value must be a Python exception or cdef function with no arguments.") exc_val = self.exception_value else: - exc_val = self.exception_value.result() + exc_val = self.exception_value.compile_time_value(env) if not return_type.assignable_from(self.exception_value.type): error(self.exception_value.pos, "Exception value incompatible with function return type") @@ -642,6 +642,17 @@ class CArgDeclNode(Node): else: return self.name_declarator, self.type + def prepare_default_result_code(self, code): + if self.default: + if self.default.is_literal: + # FIXME: IS IT OK TO CALL THIS HERE??? + self.default.generate_evaluation_code(code) + self.default_result_code = self.default.result() + if self.default.type != self.type and not self.type.is_int: + self.default_result_code = self.type.cast_code(self.default_result_code) + else: + self.default_result_code = self.default_entry.cname + def annotate(self, code): if self.default: self.default.annotate(code) @@ -934,7 +945,7 @@ class CEnumDefItemNode(StatNode): if not self.value.type.is_int: self.value = self.value.coerce_to(PyrexTypes.c_int_type, env) self.value.analyse_const_expression(env) - value = self.value.result() + value = self.value.compile_time_value(env) else: value = self.name entry = env.declare_const(self.name, enum_entry.type, @@ -989,21 +1000,17 @@ class FuncDefNode(StatNode, BlockNode): arg.default = arg.default.coerce_to(arg.type, genv) if arg.default.is_literal: arg.default_entry = arg.default - arg.default_result_code = arg.default.calculate_result_code() - if arg.default.type != arg.type and not arg.type.is_int: - arg.default_result_code = arg.type.cast_code(arg.default_result_code) else: arg.default.allocate_temps(genv) arg.default_entry = genv.add_default_value(arg.type) if arg.type.is_pyobject: arg.default_entry.init = 0 arg.default_entry.used = 1 - arg.default_result_code = arg.default_entry.cname else: error(arg.pos, "This argument cannot have a default value") arg.default = None - + def need_gil_acquisition(self, lenv): return 0 @@ -1350,8 +1357,6 @@ class CFuncDefNode(FuncDefNode): self.entry.as_variable = self.py_func.entry # Reset scope entry the above cfunction env.entries[name] = self.entry - self.py_func.interned_attr_cname = env.intern_identifier( - self.py_func.entry.name) if not env.is_module_scope or Options.lookup_module_cpdef: self.override = OverrideCheckNode(self.pos, py_func = self.py_func) self.body = StatListNode(self.pos, stats=[self.override, self.body]) @@ -1433,7 +1438,9 @@ class CFuncDefNode(FuncDefNode): def generate_argument_declarations(self, env, code): for arg in self.args: if arg.default: - code.putln('%s = %s;' % (arg.type.declaration_code(arg.cname), arg.default_result_code)) + arg.prepare_default_result_code(code) + code.putln('%s = %s;' % ( + arg.type.declaration_code(arg.cname), arg.default_result_code)) def generate_keyword_list(self, code): pass @@ -1793,10 +1800,6 @@ class DefNode(FuncDefNode): arg.entry = self.declare_argument(env, arg) arg.entry.used = 1 arg.entry.is_self_arg = arg.is_self_arg - if not arg.is_self_arg: - arg.name_entry = env.get_string_const( - arg.name, identifier = True) - env.add_py_string(arg.name_entry, identifier = True) if arg.hdr_type: if arg.is_self_arg or \ (arg.type.is_extension_type and not arg.hdr_type.is_extension_type): @@ -1878,6 +1881,8 @@ class DefNode(FuncDefNode): code.putln("PyObject *%s = 0;" % arg.hdr_cname) else: code.put_var_declaration(arg.entry) + if arg.default: + arg.prepare_default_result_code(code) def generate_keyword_list(self, code): if self.signature_has_generic_args() and \ @@ -1887,7 +1892,8 @@ class DefNode(FuncDefNode): Naming.pykwdlist_cname) for arg in self.args: if arg.is_generic: - code.put('&%s,' % arg.name_entry.pystring_cname) + pystring_cname = code.intern_identifier(arg.name) + code.put('&%s,' % pystring_cname) code.putln("0};") def generate_argument_parsing_code(self, env, code): @@ -2073,10 +2079,11 @@ class DefNode(FuncDefNode): code.putln('} else {') for i, arg in enumerate(kw_only_args): if not arg.default: + pystring_cname = code.intern_identifier(arg.name) # required keyword-only argument missing code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' % ( self.name.utf8encode(), - arg.name_entry.pystring_cname)) + pystring_cname)) code.putln(code.error_goto(self.pos)) break @@ -2224,19 +2231,20 @@ class DefNode(FuncDefNode): code.putln('default:') else: code.putln('case %2d:' % i) + pystring_cname = code.intern_identifier(arg.name) if arg.default: if arg.kw_only: # handled separately below continue code.putln('if (kw_args > %d) {' % num_required_args) code.putln('PyObject* value = PyDict_GetItem(%s, %s);' % ( - Naming.kwds_cname, arg.name_entry.pystring_cname)) + Naming.kwds_cname, pystring_cname)) code.putln('if (unlikely(value)) { values[%d] = value; kw_args--; }' % i) code.putln('}') else: num_required_args -= 1 code.putln('values[%d] = PyDict_GetItem(%s, %s);' % ( - i, Naming.kwds_cname, arg.name_entry.pystring_cname)) + i, Naming.kwds_cname, pystring_cname)) code.putln('if (likely(values[%d])) kw_args--;' % i); if i < min_positional_args: if i == 0: @@ -2256,7 +2264,7 @@ class DefNode(FuncDefNode): elif arg.kw_only: code.putln('else {') code.put('__Pyx_RaiseKeywordRequired("%s", %s); ' %( - self.name.utf8encode(), arg.name_entry.pystring_cname)) + self.name.utf8encode(), pystring_cname)) code.putln(code.error_goto(self.pos)) code.putln('}') if max_positional_args > 0: @@ -2276,9 +2284,10 @@ class DefNode(FuncDefNode): code.putln('while (kw_args > 0) {') code.putln('PyObject* value;') for i, arg in optional_args: + pystring_cname = code.intern_identifier(arg.name) code.putln( 'value = PyDict_GetItem(%s, %s);' % ( - Naming.kwds_cname, arg.name_entry.pystring_cname)) + Naming.kwds_cname, pystring_cname)) code.putln( 'if (value) { values[%d] = value; if (!(--kw_args)) break; }' % i) code.putln('break;') @@ -2445,6 +2454,7 @@ class OverrideCheckNode(StatNode): self.body.analyse_expressions(env) def generate_execution_code(self, code): + interned_attr_cname = code.intern_identifier(self.py_func.entry.name) # Check to see if we are an extension type if self.py_func.is_module_scope: self_arg = "((PyObject *)%s)" % Naming.module_cname @@ -2459,10 +2469,12 @@ class OverrideCheckNode(StatNode): code.putln("else if (unlikely(Py_TYPE(%s)->tp_dictoffset != 0)) {" % self_arg) err = code.error_goto_if_null(self.func_node.result(), self.pos) # need to get attribute manually--scope would return cdef method - code.putln("%s = PyObject_GetAttr(%s, %s); %s" % (self.func_node.result(), self_arg, self.py_func.interned_attr_cname, err)) + code.putln("%s = PyObject_GetAttr(%s, %s); %s" % ( + self.func_node.result(), self_arg, interned_attr_cname, err)) code.put_gotref(self.func_node.py_result()) is_builtin_function_or_method = 'PyCFunction_Check(%s)' % self.func_node.result() - is_overridden = '(PyCFunction_GET_FUNCTION(%s) != (void *)&%s)' % (self.func_node.result(), self.py_func.entry.func_cname) + is_overridden = '(PyCFunction_GET_FUNCTION(%s) != (void *)&%s)' % ( + self.func_node.result(), self.py_func.entry.func_cname) code.putln('if (!%s || %s) {' % (is_builtin_function_or_method, is_overridden)) self.body.generate_execution_code(code) code.putln('}') @@ -2732,10 +2744,6 @@ class PropertyNode(StatNode): def analyse_declarations(self, env): entry = env.declare_property(self.name, self.doc, self.pos) if entry: - if self.doc and Options.docstrings: - doc_entry = env.get_string_const( - self.doc, identifier = False) - entry.doc_cname = doc_entry.cname self.body.analyse_declarations(entry.scope) def analyse_expressions(self, env): @@ -4709,7 +4717,7 @@ class FromImportStatNode(StatNode): else: coerced_item = self.item.coerce_to(target.type, env) self.interned_items.append( - (env.intern_identifier(name), target, coerced_item)) + (name, target, coerced_item)) #target.release_target_temp(env) # was release_temp ?!? self.module.release_temp(env) self.item.release_temp(env) @@ -4722,7 +4730,8 @@ class FromImportStatNode(StatNode): Naming.import_star, self.module.py_result(), code.error_goto(self.pos))) - for cname, target, coerced_item in self.interned_items: + for name, target, coerced_item in self.interned_items: + cname = code.intern_identifier(name) code.putln( '%s = PyObject_GetAttr(%s, %s); %s' % ( self.item.result(), diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index 58b57c85..8ce80ff1 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -248,9 +248,6 @@ class Scope(object): def __str__(self): return "<%s %s>" % (self.__class__.__name__, self.qualified_name) - def intern_identifier(self, name): - return self.global_scope().intern_identifier(name) - def qualifying_scope(self): return self.parent_scope @@ -521,98 +518,6 @@ class Scope(object): if entry and entry.is_type: return entry.type - def add_string_const(self, value, identifier = False): - # Add an entry for a string constant. - if identifier: - cname = self.new_string_const_cname(value) - else: - cname = self.new_const_cname() - if value.is_unicode: - c_type = PyrexTypes.c_utf8_char_array_type - value = value.utf8encode() - else: - c_type = PyrexTypes.c_char_array_type - value = value.byteencode() - entry = Entry("", cname, c_type, init = value) - entry.used = 1 - self.const_entries.append(entry) - return entry - - def get_string_const(self, value, identifier = False): - # Get entry for string constant. Returns an existing - # one if possible, otherwise creates a new one. - genv = self.global_scope() - if identifier: - string_map = genv.identifier_to_entry - else: - string_map = genv.string_to_entry - entry = string_map.get(value) - if not entry: - entry = self.add_string_const(value, identifier) - entry.is_identifier = identifier - string_map[value] = entry - return entry - - def add_py_string(self, entry, identifier = None): - # If not already done, allocate a C name for a Python version of - # a string literal, and add it to the list of Python strings to - # be created at module init time. If the string resembles a - # Python identifier, it will be interned. - if entry.pystring_cname: - return - value = entry.init - entry.pystring_cname = Naming.py_const_prefix + entry.cname[len(Naming.const_prefix):] - self.pystring_entries.append(entry) - self.global_scope().all_pystring_entries.append(entry) - if identifier or (identifier is None and possible_identifier(value)): - entry.is_interned = 1 - self.global_scope().new_interned_string_entries.append(entry) - - def add_py_num(self, value): - # Add an entry for an int constant. - cname = "%s%s" % (Naming.interned_num_prefix, value) - cname = cname.replace('-', 'neg_').replace('.','_') - entry = Entry("", cname, py_object_type, init = value) - entry.used = 1 - entry.is_interned = 1 - self.const_entries.append(entry) - self.interned_nums.append(entry) - return entry - - def get_py_num(self, value, longness): - # Get entry for int constant. Returns an existing - # one if possible, otherwise creates a new one. - if longness or Utils.long_literal(value): - value += "L" - genv = self.global_scope() - entry = genv.num_to_entry.get(value) - if not entry: - entry = genv.add_py_num(value) - genv.num_to_entry[value] = entry - genv.pynum_entries.append(entry) - return entry - - def get_py_obj(self, obj, c_prefix=''): - # Get entry for a generic constant. Returns an existing - # one if possible, otherwise creates a new one. - genv = self.global_scope() - entry = genv.obj_to_entry.get(obj) - if not entry: - entry = genv.add_py_num(obj, c_prefix) - genv.obj_to_entry[obj] = entry - return entry - - def new_string_const_cname(self, value): - # Create a new globally-unique nice name for a string constant. - if len(value) < 20 and nice_identifier(value): - return "%s%s" % (Naming.const_prefix, value) - else: - return self.global_scope().new_const_cname() - - def new_const_cname(self): - # Create a new globally-unique name for a constant. - return self.global_scope().new_const_cname() - def allocate_temp(self, type): # Allocate a temporary variable of the given type from the # free list if available, otherwise create a new one. @@ -870,11 +775,6 @@ class ModuleScope(Scope): entry.is_builtin = 1 return entry - def intern_identifier(self, name): - string_entry = self.get_string_const(name, identifier = True) - self.add_py_string(string_entry, identifier = 1) - return string_entry.pystring_cname - def find_module(self, module_name, pos): # Find a module in the import namespace, interpreting # relative imports relative to this module's parent. @@ -1416,7 +1316,6 @@ class CClassScope(ClassScope): # I keep it in for now. is_member should be enough # later on self.namespace_cname = "(PyObject *)%s" % self.parent_type.typeptr_cname - entry.interned_cname = self.intern_identifier(name) return entry diff --git a/tests/errors/e_strcoerce.pyx b/tests/errors/e_strcoerce.pyx index 89968d76..187a4eef 100644 --- a/tests/errors/e_strcoerce.pyx +++ b/tests/errors/e_strcoerce.pyx @@ -2,7 +2,12 @@ cdef int c1 = "t" # works cdef int c2 = "te" # fails cdef int cx = "test" # fails +cdef int x1 = "\xFF" # works +cdef int x2 = u"\xFF" # fails + + _ERRORS = u""" -2:14: Only coerce single-character ascii strings can be used as ints. -3:14: Only coerce single-character ascii strings can be used as ints. +2:14: Only single-character byte strings can be coerced into ints. +3:14: Only single-character byte strings can be coerced into ints. +6:14: Unicode objects do not support coercion to C types. """ -- 2.26.2