two_hex = hexdigit + hexdigit
four_hex = two_hex + two_hex
escapeseq = Str("\\") + (two_oct | three_oct | two_hex |
- Str('u') + four_hex | Str('x') + two_hex | AnyChar)
+ Str('u') + four_hex | Str('x') + two_hex |
+ Str('U') + four_hex + four_hex | AnyChar)
deco = Str("@")
bra = Any("([{")
# Pyrex Parser
#
-import os, re
+import os
+import re
+import sys
from types import ListType, TupleType
from Scanning import PyrexScanner, FileSourceDescriptor
import Nodes
chars.append(systr)
elif c == '\n':
pass
- elif c in 'ux':
- if kind == 'u':
- try:
- chars.append(
- systr.encode("ASCII").decode('unicode_escape'))
- except UnicodeDecodeError:
+ elif c in 'Uux':
+ if kind == 'u' or c == 'x':
+ chrval = int(systr[2:], 16)
+ if chrval > sys.maxunicode:
s.error("Invalid unicode escape '%s'" % systr,
pos = pos)
- elif c == 'x':
- chars.append('\\x0' + systr[2:])
+ strval = unichr(chrval)
else:
- chars.append(systr)
+ # unicode escapes in plain byte strings are not unescaped
+ strval = systr
+ chars.append(strval.replace('\\', '\\\\'))
else:
chars.append(r'\\' + systr[1:])
elif sy == 'NEWLINE':
--- /dev/null
+__doc__ = u"""
+
+>>> py_strings = [
+... '\\x1234',
+... '\\x0A12\\x0C34',
+... '\\x0A57',
+... 'abc\\x12def',
+... u'\\u1234',
+... u'\\U00041234',
+... b'\\u1234',
+... b'\\U00041234',
+... ]
+
+>>> for i, (py_string, c_string) in enumerate(zip(py_strings, c_strings)):
+... assert py_string == c_string, "%d: %r != %r" % (i, py_string, c_string)
+
+"""
+
+import sys
+if sys.version_info[0] < 3:
+ __doc__ = __doc__.replace(" b'", " '")
+else:
+ __doc__ = __doc__.replace(" u'", " '")
+
+c_strings = [
+'\x1234',
+'\x0A12\x0C34',
+'\x0A57',
+'abc\x12def',
+u'\u1234',
+u'\U00041234',
+b'\u1234',
+b'\U00041234',
+]