From 71a599407e0f40e5cf050c9d681309e888fee988 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sun, 10 Aug 2008 20:37:12 +0200 Subject: [PATCH] support for long unicode escapes ('\U...') fixed unicode escape handling in byte strings unescape \xXY in string literals as C allows it to conflict with trailing hex numbers - output string escaping will do the right thing --- Cython/Compiler/Lexicon.py | 3 ++- Cython/Compiler/Parsing.py | 21 +++++++++++---------- tests/run/strescapes.pyx | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 tests/run/strescapes.pyx diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index dade469e..e3f1be9a 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -64,7 +64,8 @@ def make_lexicon(): two_hex = hexdigit + hexdigit four_hex = two_hex + two_hex escapeseq = Str("\\") + (two_oct | three_oct | two_hex | - Str('u') + four_hex | Str('x') + two_hex | AnyChar) + Str('u') + four_hex | Str('x') + two_hex | + Str('U') + four_hex + four_hex | AnyChar) deco = Str("@") bra = Any("([{") diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 1f733893..298fc092 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -2,7 +2,9 @@ # Pyrex Parser # -import os, re +import os +import re +import sys from types import ListType, TupleType from Scanning import PyrexScanner, FileSourceDescriptor import Nodes @@ -604,18 +606,17 @@ def p_string_literal(s): chars.append(systr) elif c == '\n': pass - elif c in 'ux': - if kind == 'u': - try: - chars.append( - systr.encode("ASCII").decode('unicode_escape')) - except UnicodeDecodeError: + elif c in 'Uux': + if kind == 'u' or c == 'x': + chrval = int(systr[2:], 16) + if chrval > sys.maxunicode: s.error("Invalid unicode escape '%s'" % systr, pos = pos) - elif c == 'x': - chars.append('\\x0' + systr[2:]) + strval = unichr(chrval) else: - chars.append(systr) + # unicode escapes in plain byte strings are not unescaped + strval = systr + chars.append(strval.replace('\\', '\\\\')) else: chars.append(r'\\' + systr[1:]) elif sy == 'NEWLINE': diff --git a/tests/run/strescapes.pyx b/tests/run/strescapes.pyx new file mode 100644 index 00000000..a88df304 --- /dev/null +++ b/tests/run/strescapes.pyx @@ -0,0 +1,34 @@ +__doc__ = u""" + +>>> py_strings = [ +... '\\x1234', +... '\\x0A12\\x0C34', +... '\\x0A57', +... 'abc\\x12def', +... u'\\u1234', +... u'\\U00041234', +... b'\\u1234', +... b'\\U00041234', +... ] + +>>> for i, (py_string, c_string) in enumerate(zip(py_strings, c_strings)): +... assert py_string == c_string, "%d: %r != %r" % (i, py_string, c_string) + +""" + +import sys +if sys.version_info[0] < 3: + __doc__ = __doc__.replace(" b'", " '") +else: + __doc__ = __doc__.replace(" u'", " '") + +c_strings = [ +'\x1234', +'\x0A12\x0C34', +'\x0A57', +'abc\x12def', +u'\u1234', +u'\U00041234', +b'\u1234', +b'\U00041234', +] -- 2.26.2