def coerce_to(self, dst_type, env):
if dst_type.is_int:
if not self.can_coerce_to_char_literal():
- error(self.pos, "Only single-character strings can be coerced into ints.")
+ error(self.pos, "Only single-character string literals can be coerced into ints.")
+ return self
+ if dst_type is PyrexTypes.c_py_unicode_type:
+ error(self.pos, "Bytes literals cannot coerce to Py_UNICODE, use a unicode literal instead.")
return self
return CharNode(self.pos, value=self.value)
def coerce_to(self, dst_type, env):
if dst_type is self.type:
pass
+ elif dst_type is PyrexTypes.c_py_unicode_type:
+ if not self.can_coerce_to_char_literal():
+ error(self.pos, "Only single-character Unicode string literals can be coerced into Py_UNICODE.")
+ return self
+ int_value = ord(self.value)
+ return IntNode(self.pos, value=int_value, constant_result=int_value)
elif not dst_type.is_pyobject:
- error(self.pos, "Unicode objects do not support coercion to C types.")
+ error(self.pos, "Unicode literals do not support coercion to C types other than Py_UNICODE.")
elif dst_type is not py_object_type:
if not self.check_for_coercion_error(dst_type):
self.fail_assignment(dst_type)
return self
+ def can_coerce_to_char_literal(self):
+ return len(self.value) == 1
+
def generate_evaluation_code(self, code):
self.result_code = code.get_py_string_const(self.value)
type1_can_be_int = False
type2_can_be_int = False
- if isinstance(operand1, (StringNode, BytesNode)) \
+ if isinstance(operand1, (StringNode, BytesNode, UnicodeNode)) \
and operand1.can_coerce_to_char_literal():
type1_can_be_int = True
- if isinstance(operand2, (StringNode, BytesNode)) \
+ if isinstance(operand2, (StringNode, BytesNode, UnicodeNode)) \
and operand2.can_coerce_to_char_literal():
type2_can_be_int = True
return node
PyUnicode_AS_UNICODE_func_type = PyrexTypes.CFuncType(
- PyrexTypes.CPtrType(PyrexTypes.c_uint_type), [ # FIXME: return type is actually Py_UNICODE*
+ PyrexTypes.CPtrType(PyrexTypes.c_py_unicode_type), [
PyrexTypes.CFuncTypeArg("s", Builtin.unicode_type, None)
])
special_basic_c_types = {
# name : (signed, longness)
+ "Py_UNICODE" : (0, 0),
"Py_ssize_t" : (2, 0),
"size_t" : (0, 0),
}
return 'int'
+class CPyUnicodeIntType(CIntType):
+ # Py_UNICODE
+
+ # Conversion from a unicode string to Py_UNICODE at runtime is not
+ # currently supported and may never be - we only convert from and
+ # to integers here. The maximum value for a Py_UNICODE is
+ # 1114111, so PyInt_FromLong() will do just fine here.
+
+ to_py_function = "PyInt_FromLong"
+
+ def sign_and_name(self):
+ return "Py_UNICODE"
+
+
class CPySSizeTType(CIntType):
to_py_function = "PyInt_FromSsize_t"
rank_to_type_name = (
"char", # 0
"short", # 1
- "int", # 2
- "long", # 3
- "Py_ssize_t", # 4
- "size_t", # 5
- "PY_LONG_LONG", # 6
- "float", # 7
- "double", # 8
- "long double", # 9
+ "Py_UNICODE", # 2
+ "int", # 3
+ "long", # 4
+ "Py_ssize_t", # 5
+ "size_t", # 6
+ "PY_LONG_LONG", # 7
+ "float", # 8
+ "double", # 9
+ "long double", # 10
)
py_object_type = PyObjectType()
c_uchar_type = CIntType(0, 0)
c_ushort_type = CIntType(1, 0)
-c_uint_type = CIntType(2, 0)
-c_ulong_type = CIntType(3, 0)
-c_ulonglong_type = CIntType(6, 0)
+c_py_unicode_type = CPyUnicodeIntType(2, 0)
+c_uint_type = CIntType(3, 0)
+c_ulong_type = CIntType(4, 0)
+c_ulonglong_type = CIntType(7, 0)
c_char_type = CIntType(0, 1)
c_short_type = CIntType(1, 1)
-c_int_type = CIntType(2, 1)
-c_long_type = CIntType(3, 1)
-c_longlong_type = CIntType(6, 1)
+c_int_type = CIntType(3, 1)
+c_long_type = CIntType(4, 1)
+c_longlong_type = CIntType(7, 1)
c_schar_type = CIntType(0, 2)
c_sshort_type = CIntType(1, 2)
-c_sint_type = CIntType(2, 2)
-c_slong_type = CIntType(3, 2)
-c_slonglong_type = CIntType(6, 2)
+c_sint_type = CIntType(3, 2)
+c_slong_type = CIntType(4, 2)
+c_slonglong_type = CIntType(7, 2)
-c_bint_type = CBIntType(2, 1)
-c_py_ssize_t_type = CPySSizeTType(4, 2)
-c_size_t_type = CSizeTType(5, 0)
+c_bint_type = CBIntType(3, 1)
+c_py_ssize_t_type = CPySSizeTType(5, 2)
+c_size_t_type = CSizeTType(6, 0)
-c_float_type = CFloatType(7, math_h_modifier='f')
-c_double_type = CFloatType(8)
-c_longdouble_type = CFloatType(9, math_h_modifier='l')
+c_float_type = CFloatType(8, math_h_modifier='f')
+c_double_type = CFloatType(9)
+c_longdouble_type = CFloatType(10, math_h_modifier='l')
c_float_complex_type = CComplexType(c_float_type)
c_double_complex_type = CComplexType(c_double_type)
c_py_ssize_t_ptr_type = CPtrType(c_py_ssize_t_type)
c_size_t_ptr_type = CPtrType(c_size_t_type)
-c_returncode_type = CIntType(2, 1, is_returncode = 1)
+c_returncode_type = CIntType(3, 1, is_returncode = 1)
c_anon_enum_type = CAnonEnumType(-1, 1)
# the Py_buffer type is defined in Builtin.py
(1, 0, "bint"): c_bint_type,
(0, 0, "size_t") : c_size_t_type,
(2, 0, "Py_ssize_t"): c_py_ssize_t_type,
+ (0, 0, "Py_UNICODE"): c_py_unicode_type,
(1, 0, "float"): c_float_type,
(1, 0, "double"): c_double_type,
signed = 2
elif name == 'size_t':
signed = 0
+ elif name == 'Py_UNICODE':
+ signed = 0
else:
if name.startswith('u'):
name = name[1:]
# Predefined types
-int_types = ['char', 'short', 'int', 'long', 'longlong', 'Py_ssize_t', 'size_t']
+int_types = ['char', 'short', 'Py_UNICODE', 'int', 'long', 'longlong', 'Py_ssize_t', 'size_t']
float_types = ['longdouble', 'double', 'float']
complex_types = ['longdoublecomplex', 'doublecomplex', 'floatcomplex', 'complex']
other_types = ['bint', 'void']
for name in int_types:
gs[name] = typedef(py_int)
- if not name.endswith('size_t'):
+ if name != 'Py_UNICODE' and not name.endswith('size_t'):
gs['u'+name] = typedef(py_int)
gs['s'+name] = typedef(py_int)
cdef int x1 = "\xFF" # works
cdef int x2 = "\u0FFF" # fails
-cdef int x3 = u"\xFF" # fails
+cdef Py_UNICODE u1 = u"\xFF" # works
+cdef int u3 = u"\xFF" # fails
-_ERRORS = u"""
-2:14: Only single-character strings can be coerced into ints.
-3:14: Only single-character strings can be coerced into ints.
-6:15: Only single-character strings can be coerced into ints.
-7:14: Unicode objects do not support coercion to C types.
+
+_ERRORS = """
+2:14: Only single-character string literals can be coerced into ints.
+3:14: Only single-character string literals can be coerced into ints.
+6:15: Only single-character string literals can be coerced into ints.
+9:14: Unicode literals do not support coercion to C types other than Py_UNICODE.
"""
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+
+cdef Py_UNICODE char_ASCII = u'A'
+cdef Py_UNICODE char_KLINGON = u'\uF8D2'
+
+def char_too_long_ASCII():
+ cdef Py_UNICODE c = u'AB'
+
+def char_too_long_Unicode():
+ cdef Py_UNICODE c = u'A\uF8D2'
+
+def char_too_long_bytes():
+ cdef Py_UNICODE c = b'AB'
+
+def char_too_long_latin1():
+ cdef Py_UNICODE char_bytes_latin1 = b'รถ'
+
+
+_ERRORS = """
+7:24: Only single-character Unicode string literals can be coerced into Py_UNICODE.
+10:24: Only single-character Unicode string literals can be coerced into Py_UNICODE.
+13:24: Only single-character string literals can be coerced into ints.
+16:40: Bytes literals cannot coerce to Py_UNICODE, use a unicode literal instead.
+"""
cdef list l_f3 = u1
_ERRORS = u"""
-25:20: Unicode objects do not support coercion to C types.
+25:20: Unicode literals do not support coercion to C types other than Py_UNICODE.
26:22: Unicode objects do not support coercion to C types.
27:22: 'str' objects do not support coercion to C types (use 'bytes'?).
'C'
"""
for c in s:
- if c == 'C':
+ if c == b'C':
return 'C'
else:
return 'X'
"""
cdef char c
for c in s:
- if c == 'C':
+ if c == b'C':
return 'C'
else:
return 'X'
-def for_int_in_unicode(unicode s):
+def for_pyunicode_in_unicode(unicode s):
"""
- >>> for_int_in_unicode(unicode_abc)
+ >>> for_pyunicode_in_unicode(unicode_abc)
'X'
- >>> for_int_in_unicode(unicode_ABC)
+ >>> for_pyunicode_in_unicode(unicode_ABC)
'C'
"""
- cdef int c
+ cdef Py_UNICODE c
for c in s:
- if c == 'C':
+ if c == u'C':
return 'C'
else:
return 'X'
--- /dev/null
+# -*- coding: iso-8859-1 -*-
+
+cdef Py_UNICODE char_ASCII = u'A'
+cdef Py_UNICODE char_KLINGON = u'\uF8D2'
+
+
+def compare_ASCII():
+ """
+ >>> compare_ASCII()
+ True
+ False
+ False
+ """
+ print(char_ASCII == u'A')
+ print(char_ASCII == u'B')
+ print(char_ASCII == u'\uF8D2')
+
+
+def compare_KLINGON():
+ """
+ >>> compare_ASCII()
+ True
+ False
+ False
+ """
+ print(char_KLINGON == u'\uF8D2')
+ print(char_KLINGON == u'A')
+ print(char_KLINGON == u'B')
+
+
+def index_literal(int i):
+ """
+ >>> index_literal(0) == '1'
+ True
+ >>> index_literal(-5) == '1'
+ True
+ >>> index_literal(2) == '3'
+ True
+ >>> index_literal(4) == '5'
+ True
+ """
+ # runtime casts are not currently supported
+ #return <Py_UNICODE>(u"12345"[i])
+ return u"12345"[i]