reject invalid hex/unicode escape sequences instead of letting them crash the compiler
authorStefan Behnel <scoder@users.berlios.de>
Sat, 15 Jan 2011 16:56:42 +0000 (17:56 +0100)
committerStefan Behnel <scoder@users.berlios.de>
Sat, 15 Jan 2011 16:56:42 +0000 (17:56 +0100)
Cython/Compiler/Parsing.py
tests/errors/invalid_hex_escape0.pyx [new file with mode: 0644]
tests/errors/invalid_hex_escape1.pyx [new file with mode: 0644]
tests/errors/invalid_uescape.pyx [new file with mode: 0644]
tests/errors/invalid_uescape0.pyx [new file with mode: 0644]
tests/errors/invalid_uescape2.pyx [new file with mode: 0644]
tests/run/strliterals.pyx

index 938683d4e75fdb29c7988b698a9b5312eaab654b..1b39d7cbe2c491233de9ce4b99192f94dec6bc5f 100644 (file)
@@ -782,13 +782,19 @@ def p_string_literal(s, kind_override=None):
                 elif c == u'\n':
                     pass
                 elif c == u'x':
-                    chars.append_charval( int(systr[2:], 16) )
+                    if len(systr) == 4:
+                        chars.append_charval( int(systr[2:], 16) )
+                    else:
+                        s.error("Invalid hex escape '%s'" % systr, pos=s.position())
                 elif c in u'Uu':
                     if kind in ('u', ''):
-                        chrval = int(systr[2:], 16)
-                        if chrval > 1114111: # sys.maxunicode:
-                            s.error("Invalid unicode escape '%s'" % systr,
-                                    pos = pos)
+                        if len(systr) in (6,10):
+                            chrval = int(systr[2:], 16)
+                            if chrval > 1114111: # sys.maxunicode:
+                                s.error("Invalid unicode escape '%s'" % systr,
+                                        pos = pos)
+                        else:
+                            s.error("Invalid unicode escape '%s'" % systr, pos=s.position())
                     else:
                         # unicode escapes in plain byte strings are not unescaped
                         chrval = None
diff --git a/tests/errors/invalid_hex_escape0.pyx b/tests/errors/invalid_hex_escape0.pyx
new file mode 100644 (file)
index 0000000..ea5fb92
--- /dev/null
@@ -0,0 +1,6 @@
+
+'\x'
+
+_ERRORS = '''
+2:1: Invalid hex escape '\x'
+'''
diff --git a/tests/errors/invalid_hex_escape1.pyx b/tests/errors/invalid_hex_escape1.pyx
new file mode 100644 (file)
index 0000000..3e47eff
--- /dev/null
@@ -0,0 +1,6 @@
+
+'\x1'
+
+_ERRORS = '''
+2:1: Invalid hex escape '\x'
+'''
diff --git a/tests/errors/invalid_uescape.pyx b/tests/errors/invalid_uescape.pyx
new file mode 100644 (file)
index 0000000..a203665
--- /dev/null
@@ -0,0 +1,6 @@
+
+u'\uXYZ'
+
+_ERRORS = '''
+2:2: Invalid unicode escape '\u'
+'''
diff --git a/tests/errors/invalid_uescape0.pyx b/tests/errors/invalid_uescape0.pyx
new file mode 100644 (file)
index 0000000..1aa5235
--- /dev/null
@@ -0,0 +1,6 @@
+
+u'\u'
+
+_ERRORS = '''
+2:1: Invalid unicode escape '\u'
+'''
diff --git a/tests/errors/invalid_uescape2.pyx b/tests/errors/invalid_uescape2.pyx
new file mode 100644 (file)
index 0000000..e91561f
--- /dev/null
@@ -0,0 +1,6 @@
+
+u'\u12'
+
+_ERRORS = '''
+2:1: Invalid unicode escape '\u'
+'''
index 395125d1defa4c6fcdb8a995810d2cbd5d8ec2ff..5194668e13beb0b34aac9338527d21ce8112aa93 100644 (file)
@@ -125,6 +125,13 @@ __doc__ = ur"""
     >>> len(uresc)
     9
 
+    >>> bytes_uescape
+    b'\\u1234\\U12345678\\u\\u1\\u12\\uX'
+    >>> bytes_uescape == b'\\u1234\\U12345678\\u\\u1\\u12\\uX'
+    True
+    >>> len(bytes_uescape)
+    28
+
     >>> newlines == "Aaa\n"
     True
     
@@ -165,6 +172,8 @@ sresc =  r'\12\'\"\\'
 bresc = br'\12\'\"\\'
 uresc = ur'\12\'\"\\'
 
+bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX'
+
 newlines = "Aaa\n"
 
 # T640, long literals with escapes