class CPyUnicodeIntType(CIntType):
# Py_UNICODE
- # Conversion from a unicode string to Py_UNICODE at runtime is not
- # currently supported and may never be - we only convert from and
- # to integers here. The maximum value for a Py_UNICODE is
- # 1114111, so PyInt_FromLong() will do just fine here.
+ # Py_UNICODE coerces from and to single character unicode strings,
+ # but we also allow Python integers as input. The value range for
+ # Py_UNICODE is 0..1114111, which is checked when converting from
+ # an integer value.
- to_py_function = "PyInt_FromLong"
- from_py_function = "__Pyx_PyInt_AsPy_UNICODE"
+ to_py_function = "PyUnicode_FromOrdinal"
+ from_py_function = "__Pyx_PyObject_AsPy_UNICODE"
+
+ def create_from_py_utility_code(self, env):
+ env.use_utility_code(pyobject_as_py_unicode_utility_code)
+ return True
def sign_and_name(self):
return "Py_UNICODE"
+pyobject_as_py_unicode_utility_code = UtilityCode(
+proto='''
+static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject*);
+''',
+impl='''
+static CYTHON_INLINE Py_UNICODE __Pyx_PyObject_AsPy_UNICODE(PyObject* x) {
+ static long maxval = 0;
+ long ival;
+ if (PyUnicode_Check(x)) {
+ if (unlikely(PyUnicode_GET_SIZE(x) != 1)) {
+ PyErr_Format(PyExc_ValueError,
+ "only single character unicode strings can be converted to Py_UNICODE, got length "
+ #if PY_VERSION_HEX < 0x02050000
+ "%d",
+ #else
+ "%zd",
+ #endif
+ PyUnicode_GET_SIZE(x));
+ return (Py_UNICODE)-1;
+ }
+ return PyUnicode_AS_UNICODE(x)[0];
+ }
+ if (unlikely(!maxval))
+ maxval = (long)PyUnicode_GetMax();
+ ival = __Pyx_PyInt_AsLong(x);
+ if (unlikely(ival < 0)) {
+ if (!PyErr_Occurred())
+ PyErr_SetString(PyExc_OverflowError,
+ "cannot convert negative value to Py_UNICODE");
+ return (Py_UNICODE)-1;
+ } else if (unlikely(ival > maxval)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to Py_UNICODE");
+ return (Py_UNICODE)-1;
+ }
+ return (Py_UNICODE)ival;
+}
+''')
+
class CPySSizeTType(CIntType):
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
-#ifdef Py_USING_UNICODE
-static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject*);
-#endif
-
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
return res;
}
-#ifdef Py_USING_UNICODE
-static CYTHON_INLINE Py_UNICODE __Pyx_PyInt_AsPy_UNICODE(PyObject* x) {
- long ival = __Pyx_PyInt_AsLong(x);
- static long maxval = 0;
- if (unlikely(!maxval))
- maxval = (long)PyUnicode_GetMax();
- if (unlikely(ival < 0)) {
- if (!PyErr_Occurred())
- PyErr_SetString(PyExc_OverflowError,
- "can't convert negative value to Py_UNICODE");
- return (Py_UNICODE)-1;
- } else if (unlikely(ival > maxval)) {
- PyErr_SetString(PyExc_OverflowError,
- "value too large to convert to Py_UNICODE");
- return (Py_UNICODE)-1;
- }
- return (Py_UNICODE)ival;
-}
-#endif
-
static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
Py_ssize_t ival;
PyObject* x = PyNumber_Index(b);
cdef Py_UNICODE char_ASCII = u'A'
cdef Py_UNICODE char_KLINGON = u'\uF8D2'
-
def compare_ASCII():
"""
>>> compare_ASCII()
>>> index_literal(4) == '5'
True
"""
- # runtime casts are not currently supported
- #return <Py_UNICODE>(u"12345"[i])
return u"12345"[i]
-def unicode_cardinal(Py_UNICODE i):
+def index_literal_pyunicode(int i):
+ """
+ >>> index_literal_pyunicode(0) == '1'
+ True
+ >>> index_literal_pyunicode(-5) == '1'
+ True
+ >>> index_literal_pyunicode(2) == '3'
+ True
+ >>> index_literal_pyunicode(4) == '5'
+ True
+ """
+ return <Py_UNICODE>(u"12345"[i])
+
+
+from cpython.unicode cimport PyUnicode_FromOrdinal
+import sys
+
+u0 = u'\x00'
+u1 = u'\x01'
+umax = PyUnicode_FromOrdinal(sys.maxunicode)
+
+def unicode_ordinal(Py_UNICODE i):
"""
- >>> import sys
+ >>> ord(unicode_ordinal(0)) == 0
+ True
+ >>> ord(unicode_ordinal(1)) == 1
+ True
+ >>> ord(unicode_ordinal(sys.maxunicode)) == sys.maxunicode
+ True
- >>> unicode_cardinal(0)
- 0
- >>> unicode_cardinal(1)
- 1
- >>> unicode_cardinal(sys.maxunicode) == sys.maxunicode
+ >>> ord(unicode_ordinal(u0)) == 0
+ True
+ >>> ord(unicode_ordinal(u1)) == 1
+ True
+ >>> ord(unicode_ordinal(umax)) == sys.maxunicode
True
-
- >>> unicode_cardinal(-1) #doctest: +ELLIPSIS
+ Value too small:
+ >>> unicode_ordinal(-1) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
OverflowError: ...
- >>> unicode_cardinal(sys.maxunicode+1) #doctest: +ELLIPSIS
+ Value too large:
+ >>> unicode_ordinal(sys.maxunicode+1) #doctest: +ELLIPSIS
Traceback (most recent call last):
...
OverflowError: ...
+
+ Less than one character:
+ >>> unicode_ordinal(u0[:0])
+ Traceback (most recent call last):
+ ...
+ ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 0
+
+ More than one character:
+ >>> unicode_ordinal(u0+u1)
+ Traceback (most recent call last):
+ ...
+ ValueError: only single character unicode strings can be converted to Py_UNICODE, got length 2
"""
return i