From 61e1905556d7a3e687d7ef49f2939ad8558a79f9 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sun, 25 Apr 2010 18:14:58 +0200 Subject: [PATCH] coerce Py_UNICODE to and from single character unicode strings by default --- Cython/Compiler/PyrexTypes.py | 79 ++++++++++++++++++++++------------- tests/run/py_unicode_type.pyx | 62 +++++++++++++++++++++------ 2 files changed, 98 insertions(+), 43 deletions(-) diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py index 37e1cd04..e21e2558 100755 --- a/Cython/Compiler/PyrexTypes.py +++ b/Cython/Compiler/PyrexTypes.py @@ -881,17 +881,60 @@ class CBIntType(CIntType): class CPyUnicodeIntType(CIntType): # Py_UNICODE - # Conversion from a unicode string to Py_UNICODE at runtime is not - # currently supported and may never be - we only convert from and - # to integers here. The maximum value for a Py_UNICODE is - # 1114111, so PyInt_FromLong() will do just fine here. + # Py_UNICODE coerces from and to single character unicode strings, + # but we also allow Python integers as input. The value range for + # Py_UNICODE is 0..1114111, which is checked when converting from + # an integer value. - to_py_function = "PyInt_FromLong" - from_py_function = "__Pyx_PyInt_AsPy_UNICODE" + to_py_function = "PyUnicode_FromOrdinal" + from_py_function = "__Pyx_PyObject_AsPy_UNICODE" + + def create_from_py_utility_code(self, env): + env.use_utility_code(pyobject_as_py_unicode_utility_code) + return True def sign_and_name(self): return "Py_UNICODE" +pyobject_as_py_unicode_utility_code = UtilityCode( +proto=''' +static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*); +''', +impl=''' +static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) { + static long maxval = 0; + long ival; + if (PyUnicode_Check(x)) { + if (unlikely(PyUnicode_GET_SIZE(x) != 1)) { + PyErr_Format(PyExc_ValueError, + "only single character unicode strings can be converted to Py_UNICODE, got length " + #if PY_VERSION_HEX < 0x02050000 + "%d", + #else + "%zd", + #endif + PyUnicode_GET_SIZE(x)); + return (Py_UNICODE)-1; + } + return PyUnicode_AS_UNICODE(x)[0]; + } + if (unlikely(!maxval)) + maxval = (long)PyUnicode_GetMax(); + ival = __Pyx_PyInt_AsLong(x); + if (unlikely(ival < 0)) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_OverflowError, + "cannot convert negative value to Py_UNICODE"); + return (Py_UNICODE)-1; + } else if (unlikely(ival > maxval)) { + PyErr_SetString(PyExc_OverflowError, + "value too large to convert to Py_UNICODE"); + return (Py_UNICODE)-1; + } + return (Py_UNICODE)ival; +} +''') + class CPySSizeTType(CIntType): @@ -2512,10 +2555,6 @@ type_conversion_predeclarations = """ static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); -#ifdef Py_USING_UNICODE -static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject*); -#endif - static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*); @@ -2580,26 +2619,6 @@ static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { return res; } -#ifdef Py_USING_UNICODE -static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject* x) { - long ival = __Pyx_PyInt_AsLong(x); - static long maxval = 0; - if (unlikely(!maxval)) - maxval = (long)PyUnicode_GetMax(); - if (unlikely(ival < 0)) { - if (!PyErr_Occurred()) - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to Py_UNICODE"); - return (Py_UNICODE)-1; - } else if (unlikely(ival > maxval)) { - PyErr_SetString(PyExc_OverflowError, - "value too large to convert to Py_UNICODE"); - return (Py_UNICODE)-1; - } - return (Py_UNICODE)ival; -} -#endif - static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { Py_ssize_t ival; PyObject* x = PyNumber_Index(b); diff --git a/tests/run/py_unicode_type.pyx b/tests/run/py_unicode_type.pyx index 41fe97d7..ff0bba10 100644 --- a/tests/run/py_unicode_type.pyx +++ b/tests/run/py_unicode_type.pyx @@ -3,7 +3,6 @@ cdef Py_UNICODE char_ASCII = u'A' cdef Py_UNICODE char_KLINGON = u'\uF8D2' - def compare_ASCII(): """ >>> compare_ASCII() @@ -39,31 +38,68 @@ def index_literal(int i): >>> index_literal(4) == '5' True """ - # runtime casts are not currently supported - #return (u"12345"[i]) return u"12345"[i] -def unicode_cardinal(Py_UNICODE i): +def index_literal_pyunicode(int i): + """ + >>> index_literal_pyunicode(0) == '1' + True + >>> index_literal_pyunicode(-5) == '1' + True + >>> index_literal_pyunicode(2) == '3' + True + >>> index_literal_pyunicode(4) == '5' + True + """ + return (u"12345"[i]) + + +from cpython.unicode cimport PyUnicode_FromOrdinal +import sys + +u0 = u'\x00' +u1 = u'\x01' +umax = PyUnicode_FromOrdinal(sys.maxunicode) + +def unicode_ordinal(Py_UNICODE i): """ - >>> import sys + >>> ord(unicode_ordinal(0)) == 0 + True + >>> ord(unicode_ordinal(1)) == 1 + True + >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode + True - >>> unicode_cardinal(0) - 0 - >>> unicode_cardinal(1) - 1 - >>> unicode_cardinal(sys.maxunicode) == sys.maxunicode + >>> ord(unicode_ordinal(u0)) == 0 + True + >>> ord(unicode_ordinal(u1)) == 1 + True + >>> ord(unicode_ordinal(umax)) == sys.maxunicode True - - >>> unicode_cardinal(-1) #doctest: +ELLIPSIS + Value too small: + >>> unicode_ordinal(-1) #doctest: +ELLIPSIS Traceback (most recent call last): ... OverflowError: ... - >>> unicode_cardinal(sys.maxunicode+1) #doctest: +ELLIPSIS + Value too large: + >>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS Traceback (most recent call last): ... OverflowError: ... + + Less than one character: + >>> unicode_ordinal(u0[:0]) + Traceback (most recent call last): + ... + ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0 + + More than one character: + >>> unicode_ordinal(u0+u1) + Traceback (most recent call last): + ... + ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2 """ return i -- 2.26.2