From 31536bc9847ae146e18816f574534fe4dc24641f Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sat, 10 Oct 2009 16:50:31 +0200 Subject: [PATCH] error reporting on string type coercion --- Cython/Compiler/ExprNodes.py | 77 ++++++++++++++++++++++------- tests/errors/string_assignments.pyx | 65 ++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 tests/errors/string_assignments.pyx diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 4c12f1cb..b10fb2a3 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -32,6 +32,22 @@ class NotConstant(object): pass # just for the name not_a_constant = NotConstant() constant_value_not_set = object() +# error messages when coercing from key[0] to key[1] +find_coercion_error = { + # string related errors + (Builtin.unicode_type, Builtin.bytes_type) : "Cannot convert Unicode string to 'bytes' implicitly, encoding required.", + (Builtin.unicode_type, Builtin.str_type) : "Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.", + (Builtin.unicode_type, PyrexTypes.c_char_ptr_type) : "Unicode objects do not support coercion to C types.", + (Builtin.bytes_type, Builtin.unicode_type) : "Cannot convert 'bytes' object to unicode implicitly, decoding required", + (Builtin.bytes_type, Builtin.str_type) : "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.", + (Builtin.str_type, Builtin.unicode_type) : "str objects do not support coercion to unicode, use a unicode string literal instead (u'')", + (Builtin.str_type, Builtin.bytes_type) : "Cannot convert 'str' to 'bytes' implicitly. This is not portable.", + (Builtin.str_type, PyrexTypes.c_char_ptr_type) : "'str' objects do not support coercion to C types.", + (PyrexTypes.c_char_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required", + (PyrexTypes.c_uchar_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required", + }.get + + class ExprNode(Node): # subexprs [string] Class var holding names of subexpr node attrs # type PyrexType Type of the result @@ -516,6 +532,9 @@ class ExprNode(Node): src_is_py_type = src_type.is_pyobject dst_is_py_type = dst_type.is_pyobject + if self.check_for_coercion_error(dst_type): + return self + if dst_type.is_pyobject: if not src.type.is_pyobject: src = CoerceToPyTypeNode(src, env) @@ -534,10 +553,24 @@ class ExprNode(Node): # is enough, but Cython gets confused when the types are # in different pxi files. if not (str(src.type) == str(dst_type) or dst_type.assignable_from(src_type)): - error(self.pos, "Cannot assign type '%s' to '%s'" % - (src.type, dst_type)) + self.fail_assignment(dst_type) return src + def fail_assignment(self, dst_type): + error(self.pos, "Cannot assign type '%s' to '%s'" % (self.type, dst_type)) + + def check_for_coercion_error(self, dst_type, fail=False, default=None): + if fail and not default: + default = "Cannot assign type '%(FROM)s' to '%(TO)s'" + message = find_coercion_error((self.type, dst_type), default) + if message is not None: + error(self.pos, message % {'FROM': self.type, 'TO': dst_type}) + return True + if fail: + self.fail_assignment(dst_type) + return True + return False + def coerce_to_pyobject(self, env): return self.coerce_to(PyrexTypes.py_object_type, env) @@ -799,10 +832,17 @@ class BytesNode(ConstNode): return self return CharNode(self.pos, value=self.value) - if dst_type.is_pyobject and not self.type.is_pyobject: - node = self.as_py_string_node(env) - else: - node = self + node = self + if not self.type.is_pyobject: + if dst_type in (py_object_type, Builtin.bytes_type): + node = self.as_py_string_node(env) + else: + self.fail_assignment(dst_type) + return self + elif dst_type.is_pyobject and dst_type is not py_object_type: + self.check_for_coercion_error(dst_type, fail=True) + return self + # We still need to perform normal coerce_to processing on the # result, because we might be coercing to an extension type, # in which case a type test node will be needed. @@ -832,13 +872,16 @@ class UnicodeNode(PyConstNode): # value EncodedString type = unicode_type - + def coerce_to(self, dst_type, env): - if dst_type.is_pyobject: - return self - else: + if dst_type is self.type: + pass + elif not dst_type.is_pyobject: error(self.pos, "Unicode objects do not support coercion to C types.") - return self + elif dst_type is not py_object_type: + if not self.check_for_coercion_error(dst_type): + self.fail_assignment(dst_type) + return self def generate_evaluation_code(self, code): self.result_code = code.get_py_string_const(self.value) @@ -859,18 +902,18 @@ class StringNode(PyConstNode): # # value BytesLiteral - type = PyrexTypes.py_object_type + type = Builtin.str_type def coerce_to(self, dst_type, env): - if dst_type is Builtin.unicode_type: - error(self.pos, "str objects do not support coercion to unicode, use a unicode string literal instead (u'')") + if dst_type is Builtin.str_type: return self if dst_type is Builtin.bytes_type: + # special case: bytes = 'str literal' return BytesNode(self.pos, value=self.value) - elif dst_type.is_pyobject: - return self - else: + elif not dst_type.is_pyobject: return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env) + self.check_for_coercion_error(dst_type) + return self def generate_evaluation_code(self, code): self.result_code = code.get_py_string_const(self.value, True) diff --git a/tests/errors/string_assignments.pyx b/tests/errors/string_assignments.pyx new file mode 100644 index 00000000..5a906396 --- /dev/null +++ b/tests/errors/string_assignments.pyx @@ -0,0 +1,65 @@ +# coding: ASCII + +# ok: +cdef char* c1 = "abc" +cdef bytes b1 = "abc" +cdef str s1 = "abc" + +cdef unicode u1 = u"abc" + +cdef bytes b2 = b"abc" +cdef char* c2 = b"abc" + +cdef bytes b3 = c1 +cdef char* c3 = b1 + +cdef object o1 = "abc" +cdef object o2 = b"abc" +cdef object o3 = u"abc" + +o4 = c1 +o5 = b1 +o6 = s1 +o7 = u1 + +# errors: +cdef char* c_f1 = u"abc" +cdef char* c_f2 = u1 +cdef char* c_f3 = s1 + +cdef bytes b_f1 = u"abc" +cdef bytes b_f2 = u1 +cdef bytes b_f3 = s1 + +cdef str s_f1 = b"abc" +cdef str s_f2 = b1 +cdef str s_f3 = u"abc" +cdef str s_f4 = u1 + +cdef unicode u_f1 = "abc" +cdef unicode u_f2 = s1 +cdef unicode u_f3 = b"abc" +cdef unicode u_f4 = b1 +cdef unicode u_f5 = c1 + + +_ERRORS = u""" +26:20: Unicode objects do not support coercion to C types. +27:22: Unicode objects do not support coercion to C types. +28:22: 'str' objects do not support coercion to C types. + +30:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required. +31:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required. +32:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable. + +34:17: Cannot assign type 'char *' to 'str object' +35:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3. +36:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding. +37:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding. + +39:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'') +40:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'') +41:20: Cannot assign type 'char *' to 'unicode object' +42:22: Cannot convert 'bytes' object to unicode implicitly, decoding required +43:22: Cannot convert 'char*' to unicode implicitly, decoding required +""" -- 2.26.2