From: Stefan Behnel Date: Tue, 15 Sep 2009 11:26:07 +0000 (+0200) Subject: test and fix for unicode.encode() transformation X-Git-Tag: 0.12.alpha0~199 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=bb09b855f8567414bfee9ec07fcf9aebcb27730a;p=cython.git test and fix for unicode.encode() transformation --- diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index 71b0eeda..261f6c14 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -788,7 +788,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform): PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType( Builtin.bytes_type, [ - PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), + PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None), PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None), ], @@ -796,7 +796,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform): PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType( Builtin.bytes_type, [ - PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None), + PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None), ], exception_value = "NULL") @@ -880,11 +880,11 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform): self.PyUnicode_AsXyzString_func_type, 'encode', is_unbound_method, [string_node]) - if len(args) == 2: return self._substitute_method_call( node, "PyUnicode_AsEncodedString", self.PyUnicode_AsEncodedString_func_type, - 'encode', is_unbound_method, [string_node, encoding_node]) + 'encode', is_unbound_method, + [string_node, encoding_node, null_node]) return self._substitute_method_call( node, "PyUnicode_AsEncodedString", diff --git a/tests/run/unicodeencode.pyx b/tests/run/unicodeencode.pyx new file mode 100644 index 00000000..ae3e0fa7 --- /dev/null +++ b/tests/run/unicodeencode.pyx @@ -0,0 +1,52 @@ +# -*- coding: utf-8 -*- + +__doc__ = u""" +>>> len(u) +15 +>>> default == 'abcdefg'.encode() +True +>>> isinstance(utf8, _bytes) +True +>>> utf8 == u.encode('UTF-8') +True +>>> isinstance(utf8_strict, _bytes) +True +>>> utf8_strict == u.encode('UTF-8', 'strict') +True +>>> isinstance(ascii_replace, _bytes) +True +>>> ascii_replace == u.encode('ASCII', 'replace') +True +>>> isinstance(cp850_strict, _bytes) +True +>>> cp850_strict == u.encode('cp850', 'strict') +True +>>> isinstance(latin1, _bytes) +True +>>> latin1 == u.encode('latin-1') +True +>>> isinstance(latin1_constant, _bytes) +True +>>> latin1_constant == latin1 +True +""" + +_bytes = bytes + +cdef unicode text = u'abcäöüöéèâÁÀABC' + +u = text + +default = u'abcdefg'.encode() + +utf8 = text.encode(u'UTF-8') + +utf8_strict = text.encode(u'UTF-8', u'strict') + +ascii_replace = text.encode(u'ASCII', u'replace') + +cp850_strict = text.encode(u'cp850', u'strict') + +latin1 = text.encode(u'latin-1') + +latin1_constant = u'abcäöüöéèâÁÀABC'.encode('latin1')