From: Stefan Behnel Date: Thu, 6 Mar 2008 10:39:10 +0000 (+0100) Subject: fixes for escape sequences in unicode literals X-Git-Tag: 0.9.6.14~29^2~22^2 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=a7bd92e24c19944bba5b6227aea486ad12fefcfe;p=cython.git fixes for escape sequences in unicode literals --- diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index 89d7b43f..cd9cfdb9 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -61,7 +61,9 @@ def make_lexicon(): two_oct = octdigit + octdigit three_oct = octdigit + octdigit + octdigit two_hex = hexdigit + hexdigit - escapeseq = Str("\\") + (two_oct | three_oct | two_hex | AnyChar) + four_hex = two_hex + two_hex + escapeseq = Str("\\") + (two_oct | three_oct | two_hex | + Str('u') + four_hex | Str('x') + two_hex | AnyChar) bra = Any("([{") ket = Any(")]}") diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 62670782..8227618c 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -565,12 +565,19 @@ def p_string_literal(s): c = systr[1] if c in "'\"\\abfnrtv01234567": chars.append(systr) - elif c == 'x': - chars.append('\\x0' + systr[2:]) elif c == '\n': pass - elif c == 'u': - chars.append(systr) + elif c in 'ux': + if kind == 'u': + try: + chars.append(systr.decode('unicode_escape')) + except UnicodeDecodeError: + s.error("Invalid unicode escape '%s'" % systr, + pos = pos) + elif c == 'x': + chars.append('\\x0' + systr[2:]) + else: + chars.append(systr) else: chars.append(r'\\' + systr[1:]) elif sy == 'NEWLINE': @@ -585,8 +592,6 @@ def p_string_literal(s): (sy, s.systring)) s.next() value = ''.join(chars) - if kind == 'u': - value = value.decode('raw_unicode_escape') #print "p_string_literal: value =", repr(value) ### return kind, value