fixes for escape sequences in unicode literals
authorStefan Behnel <scoder@users.berlios.de>
Thu, 6 Mar 2008 10:39:10 +0000 (11:39 +0100)
committerStefan Behnel <scoder@users.berlios.de>
Thu, 6 Mar 2008 10:39:10 +0000 (11:39 +0100)
Cython/Compiler/Lexicon.py
Cython/Compiler/Parsing.py

index 89d7b43fddc5d2e84524731bd72246a2106b8040..cd9cfdb9666e50ca64b430a05a56a2a1dde6eecc 100644 (file)
@@ -61,7 +61,9 @@ def make_lexicon():
     two_oct = octdigit + octdigit
     three_oct = octdigit + octdigit + octdigit
     two_hex = hexdigit + hexdigit
-    escapeseq = Str("\\") + (two_oct | three_oct | two_hex | AnyChar)
+    four_hex = two_hex + two_hex
+    escapeseq = Str("\\") + (two_oct | three_oct | two_hex |
+                             Str('u') + four_hex | Str('x') + two_hex | AnyChar)
     
     bra = Any("([{")
     ket = Any(")]}")
index 62670782f9c9cb9080c63586bc1d3bb3e74923fd..8227618c3ced8d5683b7ba69f5158ae78b814219 100644 (file)
@@ -565,12 +565,19 @@ def p_string_literal(s):
                 c = systr[1]
                 if c in "'\"\\abfnrtv01234567":
                     chars.append(systr)
-                elif c == 'x':
-                    chars.append('\\x0' + systr[2:])
                 elif c == '\n':
                     pass
-                elif c == 'u':
-                    chars.append(systr)
+                elif c in 'ux':
+                    if kind == 'u':
+                        try:
+                            chars.append(systr.decode('unicode_escape'))
+                        except UnicodeDecodeError:
+                            s.error("Invalid unicode escape '%s'" % systr,
+                                    pos = pos)
+                    elif c == 'x':
+                        chars.append('\\x0' + systr[2:])
+                    else:
+                        chars.append(systr)
                 else:
                     chars.append(r'\\' + systr[1:])
         elif sy == 'NEWLINE':
@@ -585,8 +592,6 @@ def p_string_literal(s):
                     (sy, s.systring))
     s.next()
     value = ''.join(chars)
-    if kind == 'u':
-        value = value.decode('raw_unicode_escape')
     #print "p_string_literal: value =", repr(value) ###
     return kind, value