test and fix for unicode.encode() transformation

author Stefan Behnel <scoder@users.berlios.de>

Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)

committer Stefan Behnel <scoder@users.berlios.de>

Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
author Stefan Behnel <scoder@users.berlios.de>
Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
committer Stefan Behnel <scoder@users.berlios.de>
Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py

index 71b0eeda4910637e28f62ca67cf178f80fe3c3d4..261f6c148b9f68facf4293f6f277984b540eb8db 100644 (file)
--- a/Cython/Compiler/Optimize.py
+++ b/Cython/Compiler/Optimize.py
@@ -788,7 +788,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
  
      PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
          Builtin.bytes_type, [
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
              PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_char_ptr_type, None),
              PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_char_ptr_type, None),
              ],
@@ -796,7 +796,7 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
  
      PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
          Builtin.bytes_type, [
-            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
              ],
          exception_value = "NULL")
  
@@ -880,11 +880,11 @@ class OptimizeBuiltinCalls(Visitor.VisitorTransform):
                          self.PyUnicode_AsXyzString_func_type,
                          'encode', is_unbound_method, [string_node])
  
-        if len(args) == 2:
              return self._substitute_method_call(
                  node, "PyUnicode_AsEncodedString",
                  self.PyUnicode_AsEncodedString_func_type,
-                'encode', is_unbound_method, [string_node, encoding_node])
+                'encode', is_unbound_method,
+                [string_node, encoding_node, null_node])
  
          return self._substitute_method_call(
              node, "PyUnicode_AsEncodedString",
diff --git a/tests/run/unicodeencode.pyx b/tests/run/unicodeencode.pyx

new file mode 100644 (file)

index 0000000..ae3e0fa
--- /dev/null
+++ b/tests/run/unicodeencode.pyx
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+
+__doc__ = u"""
+>>> len(u)
+15
+>>> default == 'abcdefg'.encode()
+True
+>>> isinstance(utf8, _bytes)
+True
+>>> utf8 == u.encode('UTF-8')
+True
+>>> isinstance(utf8_strict, _bytes)
+True
+>>> utf8_strict == u.encode('UTF-8', 'strict')
+True
+>>> isinstance(ascii_replace, _bytes)
+True
+>>> ascii_replace == u.encode('ASCII', 'replace')
+True
+>>> isinstance(cp850_strict, _bytes)
+True
+>>> cp850_strict == u.encode('cp850', 'strict')
+True
+>>> isinstance(latin1, _bytes)
+True
+>>> latin1 == u.encode('latin-1')
+True
+>>> isinstance(latin1_constant, _bytes)
+True
+>>> latin1_constant == latin1
+True
+"""
+
+_bytes = bytes
+
+cdef unicode text = u'abcäöüöéèâÁÀABC'
+
+u = text
+
+default = u'abcdefg'.encode()
+
+utf8 = text.encode(u'UTF-8')
+
+utf8_strict = text.encode(u'UTF-8', u'strict')
+
+ascii_replace = text.encode(u'ASCII', u'replace')
+
+cp850_strict = text.encode(u'cp850', u'strict')
+
+latin1 = text.encode(u'latin-1')
+
+latin1_constant = u'abcäöüöéèâÁÀABC'.encode('latin1')
author	Stefan Behnel <scoder@users.berlios.de>
	Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
committer	Stefan Behnel <scoder@users.berlios.de>
	Tue, 15 Sep 2009 11:26:07 +0000 (13:26 +0200)
Cython/Compiler/Optimize.py		patch \| blob \| history
tests/run/unicodeencode.pyx	[new file with mode: 0644]	patch \| blob