prevent redundant coercion from Py_UNICODE to a unicode string when the subscript...

author Stefan Behnel <scoder@users.berlios.de>

Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)

committer Stefan Behnel <scoder@users.berlios.de>

Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)
author Stefan Behnel <scoder@users.berlios.de>
Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)
committer Stefan Behnel <scoder@users.berlios.de>
Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py

index 8bc156b1628ee968ae02955eba2994c5f296484c..6cb86bc2b9f1bc032ff5e5a953f993bd178ba90a 100755 (executable)
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -1897,9 +1897,14 @@ class IndexNode(ExprNode):
          base_type = self.base.infer_type(env)
          if base_type.is_ptr or base_type.is_array:
              return base_type.base_type
-        elif base_type is Builtin.unicode_type:
+        elif base_type is Builtin.unicode_type and self.index.infer_type(env).is_int:
              # Py_UNICODE will automatically coerce to a unicode string
-            # if required, so this is safe
+            # if required, so this is safe. We only infer Py_UNICODE
+            # when the index is a C integer type. Otherwise, we may
+            # need to use normal Python item access, in which case
+            # it's faster to return the one-char unicode string than
+            # to receive it, throw it away, and potentially rebuild it
+            # on a subsequent PyObject coercion.
              return PyrexTypes.c_py_unicode_type
          else:
              # TODO: Handle buffers (hopefully without too much redundancy).
@@ -1988,9 +1993,9 @@ class IndexNode(ExprNode):
                  else:
                      self.index = self.index.coerce_to_pyobject(env)
                      self.is_temp = 1
-                if base_type is unicode_type:
+                if self.index.type.is_int and base_type is unicode_type:
                      # Py_UNICODE will automatically coerce to a unicode string
-                    # if required, so this is safe
+                    # if required, so this is fast and safe
                      self.type = PyrexTypes.c_py_unicode_type
                  else:
                      self.type = py_object_type
@@ -2128,13 +2133,10 @@ class IndexNode(ExprNode):
                          code.error_goto(self.pos)))
                  code.put_gotref(self.py_result())
              elif self.type is PyrexTypes.c_py_unicode_type and self.base.type is unicode_type:
+                assert self.index.type.is_int
+                index_code = self.index.result()
+                function = "__Pyx_GetItemInt_Unicode"
                  code.globalstate.use_utility_code(getitem_int_pyunicode_utility_code)
-                if self.index.type.is_int:
-                    index_code = self.index.result()
-                    function = "__Pyx_GetItemInt_Unicode"
-                else:
-                    index_code = self.index.py_result()
-                    function = "__Pyx_GetItemInt_Unicode_Generic"
                  code.putln(
                      "%s = %s(%s, %s%s); if (unlikely(%s == (Py_UNICODE)-1)) %s;" % (
                          self.result(),
@@ -6784,18 +6786,17 @@ static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring,
  }
  
  static CYTHON_INLINE Py_UNICODE __Pyx_GetItemInt_Unicode_Generic(PyObject* ustring, PyObject* j) {
-    PyObject *r;
      Py_UNICODE uchar;
+    PyObject *uchar_string;
      if (!j) return (Py_UNICODE)-1;
-    r = PyObject_GetItem(ustring, j);
+    uchar_string = PyObject_GetItem(ustring, j);
      Py_DECREF(j);
-    if (!r) return (Py_UNICODE)-1;
-    uchar = PyUnicode_AS_UNICODE(r)[0];
-    Py_DECREF(r);
+    if (!uchar_string) return (Py_UNICODE)-1;
+    uchar = PyUnicode_AS_UNICODE(uchar_string)[0];
+    Py_DECREF(uchar_string);
      return uchar;
  }
-''',
-)
+''')
  
  getitem_int_utility_code = UtilityCode(
  proto = """
diff --git a/tests/run/unicode_indexing.pyx b/tests/run/unicode_indexing.pyx

index 505f30e15b1d8072a5d902d5f06133a17ee41bb1..db1b5b6987497f442aa0bb8ec2ed51d8a92369db 100644 (file)
--- a/tests/run/unicode_indexing.pyx
+++ b/tests/run/unicode_indexing.pyx
@@ -26,6 +26,26 @@ def index(unicode ustring, Py_ssize_t i):
      return ustring[i]
  
  
+@cython.test_assert_path_exists("//IndexNode")
+@cython.test_fail_if_path_exists("//CoerceToPyTypeNode")
+def index_pyindex(unicode ustring, i):
+    """
+    >>> index(ustring, 0) == 'a'
+    True
+    >>> index(ustring, 2) == 'e'
+    True
+    >>> index(ustring, -1) == '6'
+    True
+    >>> index(ustring, -len(ustring)) == 'a'
+    True
+
+    >>> index(ustring, len(ustring))
+    Traceback (most recent call last):
+    IndexError: string index out of range
+    """
+    return ustring[i]
+
+
  
  @cython.test_assert_path_exists("//CoerceToPyTypeNode",
                                  "//IndexNode")
@@ -219,3 +239,33 @@ def index_add(unicode ustring, Py_ssize_t i, Py_ssize_t j):
      True
      """
      return ustring[i] + ustring[j]
+
+
+@cython.test_assert_path_exists("//CoerceToPyTypeNode",
+                                "//IndexNode",
+                                "//InPlaceAssignmentNode",
+                                "//CoerceToPyTypeNode//IndexNode")
+@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
+def index_concat_loop(unicode ustring):
+    """
+    >>> index_concat_loop(ustring) == ustring
+    True
+    """
+    cdef int i
+    cdef unicode s = u''
+    for i in range(len(ustring)):
+        s += ustring[i]
+    return s
+
+
+@cython.test_assert_path_exists("//CoerceToPyTypeNode",
+                                "//IndexNode",
+                                "//CoerceToPyTypeNode//IndexNode")
+@cython.test_fail_if_path_exists("//IndexNode//CoerceToPyTypeNode")
+def index_join_loop(unicode ustring):
+    """
+    >>> index_join_loop(ustring) == ustring
+    True
+    """
+    cdef int i
+    return u''.join([ ustring[i] for i in range(len(ustring)) ])
author	Stefan Behnel <scoder@users.berlios.de>
	Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)
committer	Stefan Behnel <scoder@users.berlios.de>
	Wed, 12 May 2010 17:11:00 +0000 (19:11 +0200)
Cython/Compiler/ExprNodes.py		patch \| blob \| history
tests/run/unicode_indexing.pyx		patch \| blob \| history