From: Stefan Behnel Date: Tue, 12 Aug 2008 20:10:54 +0000 (+0200) Subject: use a dedicated UnicodeType and UnicodeNode to represent unicode literals X-Git-Tag: 0.9.8.1~52 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=14986aeae0eb57a9d5e0bda80fa3404eb7cf8559;p=cython.git use a dedicated UnicodeType and UnicodeNode to represent unicode literals fixes the unicode literal indexing problem (only for unicode strings, not for byte strings!) --- diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index b833824c..ce345460 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -738,6 +738,29 @@ class StringNode(ConstNode): return self.entry.cname +class UnicodeNode(PyConstNode): + # entry Symtab.Entry + + type = PyrexTypes.c_unicode_type + + def analyse_types(self, env): + self.entry = env.add_string_const(self.value) + env.add_py_string(self.entry) + + def calculate_result_code(self): + return self.entry.pystring_cname + + def _coerce_to(self, dst_type, env): + if not dst_type.is_pyobject: + node = StringNode(self.pos, entry = entry, type = py_object_type) + return ConstNode.coerce_to(node, dst_type, env) + else: + return self + # We still need to perform normal coerce_to processing on the + # result, because we might be coercing to an extension type, + # in which case a type test node will be needed. + + class IdentifierStringNode(ConstNode): # A Python string that behaves like an identifier, e.g. for # keyword arguments in a call, or for imported names diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index fafcc8a3..2e93a737 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -492,6 +492,8 @@ def p_atom(s): kind, value = p_cat_string_literal(s) if kind == 'c': return ExprNodes.CharNode(pos, value = value) + elif kind == 'u': + return ExprNodes.UnicodeNode(pos, value = value) else: return ExprNodes.StringNode(pos, value = value) elif sy == 'IDENT': diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py index e8a7fd10..8417b8c1 100644 --- a/Cython/Compiler/PyrexTypes.py +++ b/Cython/Compiler/PyrexTypes.py @@ -998,20 +998,6 @@ class CStringType: return '"%s"' % Utils.escape_byte_string(value) -class CUTF8StringType: - # Mixin class for C unicode types. - - is_string = 1 - is_unicode = 1 - - to_py_function = "PyUnicode_DecodeUTF8" - exception_value = "NULL" - - def literal_code(self, value): - assert isinstance(value, str) - return '"%s"' % Utils.escape_byte_string(value) - - class CCharArrayType(CStringType, CArrayType): # C 'char []' type. @@ -1020,16 +1006,6 @@ class CCharArrayType(CStringType, CArrayType): def __init__(self, size): CArrayType.__init__(self, c_char_type, size) - - -class CUTF8CharArrayType(CUTF8StringType, CArrayType): - # C 'char []' type. - - parsetuple_format = "s" - pymemberdef_typecode = "T_STRING_INPLACE" - - def __init__(self, size): - CArrayType.__init__(self, c_char_type, size) class CCharPtrType(CStringType, CPtrType): @@ -1042,6 +1018,29 @@ class CCharPtrType(CStringType, CPtrType): CPtrType.__init__(self, c_char_type) +class UnicodeType(BuiltinObjectType): + # The Python unicode type. + + is_string = 1 + is_unicode = 1 + + parsetuple_format = "O" + + def __init__(self): + BuiltinObjectType.__init__(self, "unicode", "PyUnicodeObject") + + def literal_code(self, value): + assert isinstance(value, str) + return '"%s"' % Utils.escape_byte_string(value) + + def declaration_code(self, entity_code, + for_display = 0, dll_linkage = None, pyrex = 0): + if pyrex or for_display: + return self.base_declaration_code(self.name, entity_code) + else: + return "%s %s[]" % (public_decl("char", dll_linkage), entity_code) + + class ErrorType(PyrexType): # Used to prevent propagation of error messages. @@ -1106,7 +1105,7 @@ c_longdouble_type = CFloatType(8, typestring="g") c_null_ptr_type = CNullPtrType(c_void_type) c_char_array_type = CCharArrayType(None) -c_utf8_char_array_type = CUTF8CharArrayType(None) +c_unicode_type = UnicodeType() c_char_ptr_type = CCharPtrType() c_char_ptr_ptr_type = CPtrType(c_char_ptr_type) c_py_ssize_t_ptr_type = CPtrType(c_py_ssize_t_type) diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index 3942faa2..d47cd02f 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -504,7 +504,7 @@ class Scope: else: cname = self.new_const_cname() if value.is_unicode: - c_type = PyrexTypes.c_utf8_char_array_type + c_type = PyrexTypes.c_unicode_type value = value.utf8encode() else: c_type = PyrexTypes.c_char_array_type