From 99eac7ae52760e94dd2234c6a4e89dbd8af31420 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sun, 6 Dec 2009 11:22:59 +0100 Subject: [PATCH] support charptr.decode() with non-literal encodings and fix some temp usage issues; test case split --HG-- rename : tests/run/carray_slicing.pyx => tests/run/charptr_decode.pyx --- Cython/Compiler/Optimize.py | 44 ++++++++++------- tests/run/carray_slicing.pyx | 57 +--------------------- tests/run/charptr_decode.pyx | 94 ++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 74 deletions(-) create mode 100644 tests/run/charptr_decode.pyx diff --git a/Cython/Compiler/Optimize.py b/Cython/Compiler/Optimize.py index 57864d5f..69099b0d 100644 --- a/Cython/Compiler/Optimize.py +++ b/Cython/Compiler/Optimize.py @@ -1312,7 +1312,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): else: if start.type.is_pyobject: start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.env_stack[-1]) - if not start.is_simple: + if stop: start = UtilNodes.LetRefNode(start) temps.append(start) string_node = ExprNodes.AddNode(pos=start.pos, @@ -1334,7 +1334,7 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): return node if not stop: - if start or not string_node.is_simple: + if start or not string_node.is_name: string_node = UtilNodes.LetRefNode(string_node) temps.append(string_node) stop = ExprNodes.PythonCapiCallNode( @@ -1359,7 +1359,9 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): encoding, encoding_node, error_handling, error_handling_node = parameters # try to find a specific encoder function - codec_name = self._find_special_codec_name(encoding) + codec_name = None + if encoding is not None: + codec_name = self._find_special_codec_name(encoding) if codec_name is not None: decode_function = "PyUnicode_Decode%s" % codec_name node = ExprNodes.PythonCapiCallNode( @@ -1397,29 +1399,35 @@ class OptimizeBuiltinCalls(Visitor.EnvTransform): encoding_node = args[1] if isinstance(encoding_node, ExprNodes.CoerceToPyTypeNode): encoding_node = encoding_node.arg - if not isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode, - ExprNodes.BytesNode)): + if isinstance(encoding_node, (ExprNodes.UnicodeNode, ExprNodes.StringNode, + ExprNodes.BytesNode)): + encoding = encoding_node.value + encoding_node = ExprNodes.BytesNode(encoding_node.pos, value=encoding, + type=PyrexTypes.c_char_ptr_type) + elif encoding_node.type.is_string: + encoding = None + else: return None - encoding = encoding_node.value - encoding_node = ExprNodes.BytesNode(encoding_node.pos, value=encoding, - type=PyrexTypes.c_char_ptr_type) null_node = ExprNodes.NullNode(pos) if len(args) == 3: error_handling_node = args[2] if isinstance(error_handling_node, ExprNodes.CoerceToPyTypeNode): error_handling_node = error_handling_node.arg - if not isinstance(error_handling_node, - (ExprNodes.UnicodeNode, ExprNodes.StringNode, - ExprNodes.BytesNode)): - return None - error_handling = error_handling_node.value - if error_handling == 'strict': - error_handling_node = null_node + if isinstance(error_handling_node, + (ExprNodes.UnicodeNode, ExprNodes.StringNode, + ExprNodes.BytesNode)): + error_handling = error_handling_node.value + if error_handling == 'strict': + error_handling_node = null_node + else: + error_handling_node = ExprNodes.BytesNode( + error_handling_node.pos, value=error_handling, + type=PyrexTypes.c_char_ptr_type) + elif error_handling_node.type.is_string: + error_handling = None else: - error_handling_node = ExprNodes.BytesNode( - error_handling_node.pos, value=error_handling, - type=PyrexTypes.c_char_ptr_type) + return None else: error_handling = 'strict' error_handling_node = null_node diff --git a/tests/run/carray_slicing.pyx b/tests/run/carray_slicing.pyx index 6f7cf3d2..89e30512 100644 --- a/tests/run/carray_slicing.pyx +++ b/tests/run/carray_slicing.pyx @@ -13,61 +13,6 @@ def slice_charptr_end(): """ return cstring[:1], cstring[:3], cstring[:9] -@cython.test_assert_path_exists("//PythonCapiCallNode") -@cython.test_fail_if_path_exists("//AttributeNode") -def slice_charptr_decode(): - """ - >>> print(str(slice_charptr_decode()).replace("u'", "'")) - ('a', 'abc', 'abcABCqtp') - """ - return (cstring[:1].decode('UTF-8'), - cstring[:3].decode('UTF-8'), - cstring[:9].decode('UTF-8')) - -@cython.test_assert_path_exists("//PythonCapiCallNode") -@cython.test_fail_if_path_exists("//AttributeNode") -def slice_charptr_decode_slice2(): - """ - >>> print(str(slice_charptr_decode_slice2()).replace("u'", "'")) - ('a', 'bc', 'tp') - """ - return (cstring[0:1].decode('UTF-8'), - cstring[1:3].decode('UTF-8'), - cstring[7:9].decode('UTF-8')) - -@cython.test_assert_path_exists("//PythonCapiCallNode") -@cython.test_fail_if_path_exists("//AttributeNode") -def slice_charptr_decode_strlen(): - """ - >>> print(str(slice_charptr_decode_strlen()).replace("u'", "'")) - ('abcABCqtp', 'bcABCqtp', '') - """ - return (cstring.decode('UTF-8'), - cstring[1:].decode('UTF-8'), - cstring[9:].decode('UTF-8')) - -@cython.test_assert_path_exists("//PythonCapiCallNode") -@cython.test_fail_if_path_exists("//AttributeNode") -def slice_charptr_decode_unbound(): - """ - >>> print(str(slice_charptr_decode_unbound()).replace("u'", "'")) - ('a', 'abc', 'abcABCqtp') - """ - return (bytes.decode(cstring[:1], 'UTF-8'), - bytes.decode(cstring[:3], 'UTF-8', 'replace'), - bytes.decode(cstring[:9], 'UTF-8')) - -@cython.test_assert_path_exists("//PythonCapiCallNode") -@cython.test_fail_if_path_exists("//AttributeNode") -def slice_charptr_decode_errormode(): - """ - >>> print(str(slice_charptr_decode_errormode()).replace("u'", "'")) - ('a', 'abc', 'abcABCqtp') - """ - return (cstring[:1].decode('UTF-8', 'strict'), - cstring[:3].decode('UTF-8', 'replace'), - cstring[:9].decode('UTF-8', 'unicode_escape')) - @cython.test_assert_path_exists("//ForFromStatNode", "//ForFromStatNode//SliceIndexNode") @cython.test_fail_if_path_exists("//ForInStatNode") @@ -117,7 +62,7 @@ def slice_charptr_for_loop_c(): @cython.test_fail_if_path_exists("//ForInStatNode") def slice_charptr_for_loop_c_dynamic_bounds(): """ - >>> slice_charptr_for_loop_c() + >>> slice_charptr_for_loop_c_dynamic_bounds() ['a', 'b', 'c'] ['b', 'c', 'A', 'B'] ['B', 'C', 'q', 't', 'p'] diff --git a/tests/run/charptr_decode.pyx b/tests/run/charptr_decode.pyx new file mode 100644 index 00000000..4194f792 --- /dev/null +++ b/tests/run/charptr_decode.pyx @@ -0,0 +1,94 @@ + +cimport cython + +############################################################ +# tests for char* slicing + +cdef char* cstring = "abcABCqtp" + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode(): + """ + >>> print(str(slice_charptr_decode()).replace("u'", "'")) + ('a', 'abc', 'abcABCqtp') + """ + return (cstring[:1].decode('UTF-8'), + cstring[:3].decode('UTF-8'), + cstring[:9].decode('UTF-8')) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode_unknown_encoding(): + """ + >>> print(str(slice_charptr_decode_unknown_encoding()).replace("u'", "'")) + ('abcABCqtp', 'abcABCqtp', 'abc', 'abcABCqt') + """ + cdef char* enc = 'UTF-8' + cdef char* error_handling = 'strict' + return (cstring.decode(enc), + cstring.decode(enc, error_handling), + cstring[:3].decode(enc), + cstring[:8].decode(enc, error_handling)) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode_slice2(): + """ + >>> print(str(slice_charptr_decode_slice2()).replace("u'", "'")) + ('a', 'bc', 'tp') + """ + return (cstring[0:1].decode('UTF-8'), + cstring[1:3].decode('UTF-8'), + cstring[7:9].decode('UTF-8')) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode_strlen(): + """ + >>> print(str(slice_charptr_decode_strlen()).replace("u'", "'")) + ('abcABCqtp', 'bcABCqtp', '') + """ + return (cstring.decode('UTF-8'), + cstring[1:].decode('UTF-8'), + cstring[9:].decode('UTF-8')) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode_unbound(): + """ + >>> print(str(slice_charptr_decode_unbound()).replace("u'", "'")) + ('a', 'abc', 'abcABCqtp') + """ + return (bytes.decode(cstring[:1], 'UTF-8'), + bytes.decode(cstring[:3], 'UTF-8', 'replace'), + bytes.decode(cstring[:9], 'UTF-8')) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_decode_errormode(): + """ + >>> print(str(slice_charptr_decode_errormode()).replace("u'", "'")) + ('a', 'abc', 'abcABCqtp') + """ + return (cstring[:1].decode('UTF-8', 'strict'), + cstring[:3].decode('UTF-8', 'replace'), + cstring[:9].decode('UTF-8', 'unicode_escape')) + +@cython.test_assert_path_exists("//PythonCapiCallNode") +@cython.test_fail_if_path_exists("//AttributeNode") +def slice_charptr_dynamic_bounds(): + """ + >>> print(str(slice_charptr_dynamic_bounds()).replace("u'", "'")) + ('abc', 'abc', 'bcAB', 'BCqtp') + """ + return (cstring[:return3()].decode('UTF-8'), + cstring[0:return3()].decode('UTF-8'), + cstring[return1():return5()].decode('UTF-8'), + cstring[return4():return9()].decode('UTF-8')) + +cdef return1(): return 1 +cdef return3(): return 3 +cdef return4(): return 4 +cdef return5(): return 5 +cdef return9(): return 9 -- 2.26.2