# to be rebuilt next time pyrexc is run.
#
-string_prefixes = "cCrR"
+string_prefixes = "cCrRuU"
def make_lexicon():
from Cython.Plex import \
"static __Pyx_StringTabEntry %s[] = {" %
Naming.stringtab_cname)
for entry in entries:
+ print repr(entry.init), type(entry.init)
code.putln(
- "{&%s, %s, sizeof(%s)}," % (
+ "{&%s, %s, sizeof(%s), %d}," % (
entry.pystring_cname,
entry.cname,
- entry.cname))
+ entry.cname,
+ isinstance(entry.init, unicode)
+ ))
code.putln(
- "{0, 0, 0}")
+ "{0, 0, 0, 0}")
code.putln(
"};")
typedef struct {const char *s; const void **p;} __Pyx_CApiTabEntry; /*proto*/
typedef struct {PyObject **p; char *s;} __Pyx_InternTabEntry; /*proto*/
-typedef struct {PyObject **p; char *s; long n;} __Pyx_StringTabEntry; /*proto*/
+typedef struct {PyObject **p; char *s; long n; int is_unicode;} __Pyx_StringTabEntry; /*proto*/
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
static INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
""","""
static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
while (t->p) {
- *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+ if (t->is_unicode) {
+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+ } else {
+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+ }
if (!*t->p)
return -1;
++t;
def p_string_literal(s):
# A single string or char literal.
- # Returns (kind, value) where kind in ('', 'c', 'r')
+ # Returns (kind, value) where kind in ('', 'c', 'r', 'u')
if s.sy == 'STRING':
value = unquote(s.systring)
s.next()
pos = s.position()
#is_raw = s.systring[:1].lower() == "r"
kind = s.systring[:1].lower()
- if kind not in "cr":
+ if kind not in "cru":
kind = ''
chars = []
while 1:
systr = s.systring
if len(systr) == 1 and systr in "'\"\n":
chars.append('\\')
+ if kind == 'u' and not isinstance(systr, unicode):
+ systr = systr.decode("UTF-8")
chars.append(systr)
elif sy == 'ESCAPE':
systr = s.systring
chars.append('\\x0' + systr[2:])
elif c == '\n':
pass
+ elif c == 'u':
+ chars.append(systr)
else:
chars.append(r'\\' + systr[1:])
elif sy == 'NEWLINE':
"Unexpected token %r:%r in string literal" %
(sy, s.systring))
s.next()
- value = join(chars, '')
+ if kind == 'u':
+ value = u''.join(chars)
+ else:
+ value = ''.join(chars)
#print "p_string_literal: value =", repr(value) ###
return kind, value
from_py_function = "PyString_AsString"
def literal_code(self, value):
+ if isinstance(value, unicode):
+ value = value.encode("UTF-8")
return '"%s"' % value