From 1aeb2d9b23c4cf49d9e413399cb371c8bb3f93a9 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Wed, 12 May 2010 19:11:00 +0200 Subject: [PATCH] prevent redundant coercion from Py_UNICODE to a unicode string when the subscript index is a Python object - this case is no longer optimised --- Cython/Compiler/ExprNodes.py | 35 ++++++++++++------------ tests/run/unicode_indexing.pyx | 50 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 8bc156b1..6cb86bc2 100755 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -1897,9 +1897,14 @@ class IndexNode(ExprNode): base_type = self.base.infer_type(env) if base_type.is_ptr or base_type.is_array: return base_type.base_type - elif base_type is Builtin.unicode_type: + elif base_type is Builtin.unicode_type and self.index.infer_type(env).is_int: # Py_UNICODE will automatically coerce to a unicode string - # if required, so this is safe + # if required, so this is safe. We only infer Py_UNICODE + # when the index is a C integer type. Otherwise, we may + # need to use normal Python item access, in which case + # it's faster to return the one-char unicode string than + # to receive it, throw it away, and potentially rebuild it + # on a subsequent PyObject coercion. return PyrexTypes.c_py_unicode_type else: # TODO: Handle buffers (hopefully without too much redundancy). @@ -1988,9 +1993,9 @@ class IndexNode(ExprNode): else: self.index = self.index.coerce_to_pyobject(env) self.is_temp = 1 - if base_type is unicode_type: + if self.index.type.is_int and base_type is unicode_type: # Py_UNICODE will automatically coerce to a unicode string - # if required, so this is safe + # if required, so this is fast and safe self.type = PyrexTypes.c_py_unicode_type else: self.type = py_object_type @@ -2128,13 +2133,10 @@ class IndexNode(ExprNode): code.error_goto(self.pos))) code.put_gotref(self.py_result()) elif self.type is PyrexTypes.c_py_unicode_type and self.base.type is unicode_type: + assert self.index.type.is_int + index_code = self.index.result() + function = "__Pyx_GetItemInt_Unicode" code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code) - if self.index.type.is_int: - index_code = self.index.result() - function = "__Pyx_GetItemInt_Unicode" - else: - index_code = self.index.py_result() - function = "__Pyx_GetItemInt_Unicode_Generic" code.putln( "%s = %s(%s, %s%s); if (unlikely(%s == (Py_UNICODE)-1)) %s;" % ( self.result(), @@ -6784,18 +6786,17 @@ static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, } static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, PyObject* j) { - PyObject *r; Py_UNICODE uchar; + PyObject *uchar_string; if (!j) return (Py_UNICODE)-1; - r = PyObject_GetItem(ustring, j); + uchar_string = PyObject_GetItem(ustring, j); Py_DECREF(j); - if (!r) return (Py_UNICODE)-1; - uchar = PyUnicode_AS_UNICODE(r)[0]; - Py_DECREF(r); + if (!uchar_string) return (Py_UNICODE)-1; + uchar = PyUnicode_AS_UNICODE(uchar_string)[0]; + Py_DECREF(uchar_string); return uchar; } -''', -) +''') getitem_int_utility_code = UtilityCode( proto = """ diff --git a/tests/run/unicode_indexing.pyx b/tests/run/unicode_indexing.pyx index 505f30e1..db1b5b69 100644 --- a/tests/run/unicode_indexing.pyx +++ b/tests/run/unicode_indexing.pyx @@ -26,6 +26,26 @@ def index(unicode ustring, Py_ssize_t i): return ustring[i] +@cython.test_assert_path_exists("//IndexNode") +@cython.test_fail_if_path_exists("//CoerceToPyTypeNode") +def index_pyindex(unicode ustring, i): + """ + >>> index(ustring, 0) == 'a' + True + >>> index(ustring, 2) == 'e' + True + >>> index(ustring, -1) == '6' + True + >>> index(ustring, -len(ustring)) == 'a' + True + + >>> index(ustring, len(ustring)) + Traceback (most recent call last): + IndexError: string index out of range + """ + return ustring[i] + + @cython.test_assert_path_exists("//CoerceToPyTypeNode", "//IndexNode") @@ -219,3 +239,33 @@ def index_add(unicode ustring, Py_ssize_t i, Py_ssize_t j): True """ return ustring[i] + ustring[j] + + +@cython.test_assert_path_exists("//CoerceToPyTypeNode", + "//IndexNode", + "//InPlaceAssignmentNode", + "//CoerceToPyTypeNode//IndexNode") +@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode") +def index_concat_loop(unicode ustring): + """ + >>> index_concat_loop(ustring) == ustring + True + """ + cdef int i + cdef unicode s = u'' + for i in range(len(ustring)): + s += ustring[i] + return s + + +@cython.test_assert_path_exists("//CoerceToPyTypeNode", + "//IndexNode", + "//CoerceToPyTypeNode//IndexNode") +@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode") +def index_join_loop(unicode ustring): + """ + >>> index_join_loop(ustring) == ustring + True + """ + cdef int i + return u''.join([ ustring[i] for i in range(len(ustring)) ]) -- 2.26.2