From: Stefan Behnel Date: Sat, 15 Jan 2011 16:56:42 +0000 (+0100) Subject: reject invalid hex/unicode escape sequences instead of letting them crash the compiler X-Git-Tag: 0.14.1rc0~6 X-Git-Url: http://git.tremily.us/gitweb.cgi?a=commitdiff_plain;h=5cdb7ba3d86bbe9b4e7956823848e779cd6e32f7;p=cython.git reject invalid hex/unicode escape sequences instead of letting them crash the compiler --- diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 938683d4..1b39d7cb 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -782,13 +782,19 @@ def p_string_literal(s, kind_override=None): elif c == u'\n': pass elif c == u'x': - chars.append_charval( int(systr[2:], 16) ) + if len(systr) == 4: + chars.append_charval( int(systr[2:], 16) ) + else: + s.error("Invalid hex escape '%s'" % systr, pos=s.position()) elif c in u'Uu': if kind in ('u', ''): - chrval = int(systr[2:], 16) - if chrval > 1114111: # sys.maxunicode: - s.error("Invalid unicode escape '%s'" % systr, - pos = pos) + if len(systr) in (6,10): + chrval = int(systr[2:], 16) + if chrval > 1114111: # sys.maxunicode: + s.error("Invalid unicode escape '%s'" % systr, + pos = pos) + else: + s.error("Invalid unicode escape '%s'" % systr, pos=s.position()) else: # unicode escapes in plain byte strings are not unescaped chrval = None diff --git a/tests/errors/invalid_hex_escape0.pyx b/tests/errors/invalid_hex_escape0.pyx new file mode 100644 index 00000000..ea5fb925 --- /dev/null +++ b/tests/errors/invalid_hex_escape0.pyx @@ -0,0 +1,6 @@ + +'\x' + +_ERRORS = ''' +2:1: Invalid hex escape '\x' +''' diff --git a/tests/errors/invalid_hex_escape1.pyx b/tests/errors/invalid_hex_escape1.pyx new file mode 100644 index 00000000..3e47eff8 --- /dev/null +++ b/tests/errors/invalid_hex_escape1.pyx @@ -0,0 +1,6 @@ + +'\x1' + +_ERRORS = ''' +2:1: Invalid hex escape '\x' +''' diff --git a/tests/errors/invalid_uescape.pyx b/tests/errors/invalid_uescape.pyx new file mode 100644 index 00000000..a2036659 --- /dev/null +++ b/tests/errors/invalid_uescape.pyx @@ -0,0 +1,6 @@ + +u'\uXYZ' + +_ERRORS = ''' +2:2: Invalid unicode escape '\u' +''' diff --git a/tests/errors/invalid_uescape0.pyx b/tests/errors/invalid_uescape0.pyx new file mode 100644 index 00000000..1aa52350 --- /dev/null +++ b/tests/errors/invalid_uescape0.pyx @@ -0,0 +1,6 @@ + +u'\u' + +_ERRORS = ''' +2:1: Invalid unicode escape '\u' +''' diff --git a/tests/errors/invalid_uescape2.pyx b/tests/errors/invalid_uescape2.pyx new file mode 100644 index 00000000..e91561f4 --- /dev/null +++ b/tests/errors/invalid_uescape2.pyx @@ -0,0 +1,6 @@ + +u'\u12' + +_ERRORS = ''' +2:1: Invalid unicode escape '\u' +''' diff --git a/tests/run/strliterals.pyx b/tests/run/strliterals.pyx index 395125d1..5194668e 100644 --- a/tests/run/strliterals.pyx +++ b/tests/run/strliterals.pyx @@ -125,6 +125,13 @@ __doc__ = ur""" >>> len(uresc) 9 + >>> bytes_uescape + b'\\u1234\\U12345678\\u\\u1\\u12\\uX' + >>> bytes_uescape == b'\\u1234\\U12345678\\u\\u1\\u12\\uX' + True + >>> len(bytes_uescape) + 28 + >>> newlines == "Aaa\n" True @@ -165,6 +172,8 @@ sresc = r'\12\'\"\\' bresc = br'\12\'\"\\' uresc = ur'\12\'\"\\' +bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX' + newlines = "Aaa\n" # T640, long literals with escapes