From: Stefan Behnel Date: Wed, 8 Sep 2010 09:31:54 +0000 (+0200) Subject: fix type inference for sliced builtins X-Git-Tag: 0.14.alpha0~344 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=13327ba0ae9be8ff982eee679e5dc6113c6670b9;p=cython.git fix type inference for sliced builtins --- diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index d12f48a8..ab267126 100755 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -1942,29 +1942,39 @@ class IndexNode(ExprNode): return self.base.type_dependencies(env) def infer_type(self, env): - is_slice = isinstance(self.index, SliceNode) - if isinstance(self.base, BytesNode): - if is_slice: + base_type = self.base.infer_type(env) + if isinstance(self.index, SliceNode): + # slicing! + if base_type.is_string: return bytes_type + elif base_type in (unicode_type, bytes_type, str_type, list_type, tuple_type): + # slicing these returns the same type + return base_type else: - return py_object_type # Py2/3 return different types - base_type = self.base.infer_type(env) - if base_type.is_ptr or base_type.is_array: - return base_type.base_type - elif base_type is unicode_type and self.index.infer_type(env).is_int: - # Py_UNICODE will automatically coerce to a unicode string - # if required, so this is safe. We only infer Py_UNICODE - # when the index is a C integer type. Otherwise, we may - # need to use normal Python item access, in which case - # it's faster to return the one-char unicode string than - # to receive it, throw it away, and potentially rebuild it - # on a subsequent PyObject coercion. - return PyrexTypes.c_py_unicode_type - elif base_type in (str_type, unicode_type): - # these types will always return their own type on Python indexing/slicing - return base_type - elif is_slice and base_type in (bytes_type, list_type, tuple_type): - # slicing these returns the same type + # TODO: Handle buffers (hopefully without too much redundancy). + return py_object_type + + if isinstance(self.base, BytesNode): + # Py2/3 return different types on indexing bytes objects + # and we can't be sure if we are slicing, so we can't do + # any better than this: + return py_object_type + + if self.index.infer_type(env).is_int or isinstance(self.index, (IntNode, LongNode)): + # indexing! + if base_type is unicode_type: + # Py_UNICODE will automatically coerce to a unicode string + # if required, so this is safe. We only infer Py_UNICODE + # when the index is a C integer type. Otherwise, we may + # need to use normal Python item access, in which case + # it's faster to return the one-char unicode string than + # to receive it, throw it away, and potentially rebuild it + # on a subsequent PyObject coercion. + return PyrexTypes.c_py_unicode_type + elif base_type.is_ptr or base_type.is_array: + return base_type.base_type + if base_type is unicode_type: + # this type always returns its own type on Python indexing/slicing return base_type else: # TODO: Handle buffers (hopefully without too much redundancy). @@ -1993,11 +2003,12 @@ class IndexNode(ExprNode): self.type = PyrexTypes.error_type return - if isinstance(self.index, IntNode) and Utils.long_literal(self.index.value): + is_slice = isinstance(self.index, SliceNode) + if not is_slice and isinstance(self.index, IntNode) and Utils.long_literal(self.index.value): self.index = self.index.coerce_to_pyobject(env) - + # Handle the case where base is a literal char* (and we expect a string, not an int) - if isinstance(self.base, BytesNode): + if isinstance(self.base, BytesNode) or is_slice: self.base = self.base.coerce_to_pyobject(env) skip_child_analysis = False @@ -2069,6 +2080,8 @@ class IndexNode(ExprNode): # Py_UNICODE will automatically coerce to a unicode string # if required, so this is fast and safe self.type = PyrexTypes.c_py_unicode_type + elif is_slice and base_type in (bytes_type, str_type, unicode_type, list_type, tuple_type): + self.type = base_type else: self.type = py_object_type else: diff --git a/tests/run/type_inference.pyx b/tests/run/type_inference.pyx index 49549871..b78f70e2 100644 --- a/tests/run/type_inference.pyx +++ b/tests/run/type_inference.pyx @@ -60,10 +60,20 @@ def slicing(): assert typeof(b) == "char *", typeof(b) b1 = b[1:2] assert typeof(b1) == "bytes object", typeof(b1) + b2 = b[1:2:2] + assert typeof(b2) == "bytes object", typeof(b2) u = u"xyz" assert typeof(u) == "unicode object", typeof(u) u1 = u[1:2] assert typeof(u1) == "unicode object", typeof(u1) + u2 = u[1:2:2] + assert typeof(u2) == "unicode object", typeof(u2) + s = "xyz" + assert typeof(s) == "str object", typeof(s) + s1 = s[1:2] + assert typeof(s1) == "str object", typeof(s1) + s2 = s[1:2:2] + assert typeof(s2) == "str object", typeof(s2) L = [1,2,3] assert typeof(L) == "list object", typeof(L) L1 = L[1:2] @@ -84,11 +94,15 @@ def indexing(): b = b"abc" assert typeof(b) == "char *", typeof(b) b1 = b[1] - assert typeof(b1) == "char", typeof(b1) # FIXME: bytes object ?? + assert typeof(b1) == "char", typeof(b1) # FIXME: Python object ?? u = u"xyz" assert typeof(u) == "unicode object", typeof(u) u1 = u[1] assert typeof(u1) == "Py_UNICODE", typeof(u1) + s = "xyz" + assert typeof(s) == "str object", typeof(s) + s1 = s[1] + assert typeof(s1) == "Python object", typeof(s1) L = [1,2,3] assert typeof(L) == "list object", typeof(L) L1 = L[1] @@ -267,7 +281,7 @@ def loop_over_bytes(): def loop_over_str(): """ >>> print( loop_over_str() ) - str object + Python object """ cdef str string = 'abcdefg' for c in string: