From: Stefan Behnel Date: Mon, 31 Jan 2011 07:56:52 +0000 (+0100) Subject: support surrogate pair to Py_UCS4 coercion only in 16 bit Unicode builds X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=79f7d7f3f097285f1f06639056e5aa92b1dbef39;p=cython.git support surrogate pair to Py_UCS4 coercion only in 16 bit Unicode builds --- diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py index ca997ee8..07142349 100755 --- a/Cython/Compiler/PyrexTypes.py +++ b/Cython/Compiler/PyrexTypes.py @@ -958,7 +958,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { if (PyUnicode_Check(x)) { if (likely(PyUnicode_GET_SIZE(x) == 1)) { return PyUnicode_AS_UNICODE(x)[0]; - } else if (PyUnicode_GET_SIZE(x) == 2) { + } + #if Py_UNICODE_SIZE == 2 + else if (PyUnicode_GET_SIZE(x) == 2) { Py_UCS4 high_val = PyUnicode_AS_UNICODE(x)[0]; if (high_val >= 0xD800 && high_val <= 0xDBFF) { Py_UCS4 low_val = PyUnicode_AS_UNICODE(x)[1]; @@ -967,8 +969,9 @@ static CYTHON_INLINE Py_UCS4 __Pyx_PyObject_AsPy_UCS4(PyObject* x) { } } } + #endif PyErr_Format(PyExc_ValueError, - "only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length " + "only single character unicode strings can be converted to Py_UCS4, got length " #if PY_VERSION_HEX < 0x02050000 "%d", #else diff --git a/tests/run/py_ucs4_type.pyx b/tests/run/py_ucs4_type.pyx index 930333d0..b5790dce 100644 --- a/tests/run/py_ucs4_type.pyx +++ b/tests/run/py_ucs4_type.pyx @@ -68,13 +68,13 @@ def unicode_ordinal(Py_UCS4 i): >>> unicode_ordinal(u0[:0]) Traceback (most recent call last): ... - ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 0 + ValueError: only single character unicode strings can be converted to Py_UCS4, got length 0 More than one character: >>> unicode_ordinal(u0+u1) Traceback (most recent call last): ... - ValueError: only single character unicode strings or surrogate pairs can be converted to Py_UCS4, got length 2 + ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2 """ return i