test and fix for unicode.encode() transformation
authorStefan Behnel <scoder@users.berlios.de>
Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
Cython/Compiler/Optimize.py
tests/run/unicodeencode.pyx [new file with mode: 0644]

index 71b0eeda4910637e28f62ca67cf178f80fe3c3d4..261f6c148b9f68facf4293f6f277984b540eb8db 100644 (file)
@@ -788,7 +788,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
 
     PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
         Builtin.bytes_type, [
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
             PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
             PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
             ],
@@ -796,7 +796,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
 
     PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
         Builtin.bytes_type, [
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
             ],
         exception_value = "NULL")
 
@@ -880,11 +880,11 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
                         self.PyUnicode_AsXyzString_func_type,
                         'encode', is_unbound_method, [string_node])
 
-        if len(args) == 2:
             return self._substitute_method_call(
                 node, "PyUnicode_AsEncodedString",
                 self.PyUnicode_AsEncodedString_func_type,
-                'encode', is_unbound_method, [string_node, encoding_node])
+                'encode', is_unbound_method,
+                [string_node, encoding_node, null_node])
 
         return self._substitute_method_call(
             node, "PyUnicode_AsEncodedString",
diff --git a/tests/run/unicodeencode.pyx b/tests/run/unicodeencode.pyx
new file mode 100644 (file)
index 0000000..ae3e0fa
--- /dev/null
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+__doc__ = u"""
+>>> len(u)
+15
+>>> default == 'abcdefg'.encode()
+True
+>>> isinstance(utf8, _bytes)
+True
+>>> utf8 == u.encode('UTF-8')
+True
+>>> isinstance(utf8_strict, _bytes)
+True
+>>> utf8_strict == u.encode('UTF-8', 'strict')
+True
+>>> isinstance(ascii_replace, _bytes)
+True
+>>> ascii_replace == u.encode('ASCII', 'replace')
+True
+>>> isinstance(cp850_strict, _bytes)
+True
+>>> cp850_strict == u.encode('cp850', 'strict')
+True
+>>> isinstance(latin1, _bytes)
+True
+>>> latin1 == u.encode('latin-1')
+True
+>>> isinstance(latin1_constant, _bytes)
+True
+>>> latin1_constant == latin1
+True
+"""
+
+_bytes = bytes
+
+cdef unicode text = u'abcäöüöéèâÁÀABC'
+
+u = text
+
+default = u'abcdefg'.encode()
+
+utf8 = text.encode(u'UTF-8')
+
+utf8_strict = text.encode(u'UTF-8', u'strict')
+
+ascii_replace = text.encode(u'ASCII', u'replace')
+
+cp850_strict = text.encode(u'cp850', u'strict')
+
+latin1 = text.encode(u'latin-1')
+
+latin1_constant = u'abcäöüöéèâÁÀABC'.encode('latin1')