From 5cdb7ba3d86bbe9b4e7956823848e779cd6e32f7 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sat, 15 Jan 2011 17:56:42 +0100 Subject: [PATCH] reject invalid hex/unicode escape sequences instead of letting them crash the compiler --- Cython/Compiler/Parsing.py | 16 +++++++++++----- tests/errors/invalid_hex_escape0.pyx | 6 ++++++ tests/errors/invalid_hex_escape1.pyx | 6 ++++++ tests/errors/invalid_uescape.pyx | 6 ++++++ tests/errors/invalid_uescape0.pyx | 6 ++++++ tests/errors/invalid_uescape2.pyx | 6 ++++++ tests/run/strliterals.pyx | 9 +++++++++ 7 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 tests/errors/invalid_hex_escape0.pyx create mode 100644 tests/errors/invalid_hex_escape1.pyx create mode 100644 tests/errors/invalid_uescape.pyx create mode 100644 tests/errors/invalid_uescape0.pyx create mode 100644 tests/errors/invalid_uescape2.pyx diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 938683d4..1b39d7cb 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -782,13 +782,19 @@ def p_string_literal(s, kind_override=None): elif c == u'\n': pass elif c == u'x': - chars.append_charval( int(systr[2:], 16) ) + if len(systr) == 4: + chars.append_charval( int(systr[2:], 16) ) + else: + s.error("Invalid hex escape '%s'" % systr, pos=s.position()) elif c in u'Uu': if kind in ('u', ''): - chrval = int(systr[2:], 16) - if chrval > 1114111: # sys.maxunicode: - s.error("Invalid unicode escape '%s'" % systr, - pos = pos) + if len(systr) in (6,10): + chrval = int(systr[2:], 16) + if chrval > 1114111: # sys.maxunicode: + s.error("Invalid unicode escape '%s'" % systr, + pos = pos) + else: + s.error("Invalid unicode escape '%s'" % systr, pos=s.position()) else: # unicode escapes in plain byte strings are not unescaped chrval = None diff --git a/tests/errors/invalid_hex_escape0.pyx b/tests/errors/invalid_hex_escape0.pyx new file mode 100644 index 00000000..ea5fb925 --- /dev/null +++ b/tests/errors/invalid_hex_escape0.pyx @@ -0,0 +1,6 @@ + +'\x' + +_ERRORS = ''' +2:1: Invalid hex escape '\x' +''' diff --git a/tests/errors/invalid_hex_escape1.pyx b/tests/errors/invalid_hex_escape1.pyx new file mode 100644 index 00000000..3e47eff8 --- /dev/null +++ b/tests/errors/invalid_hex_escape1.pyx @@ -0,0 +1,6 @@ + +'\x1' + +_ERRORS = ''' +2:1: Invalid hex escape '\x' +''' diff --git a/tests/errors/invalid_uescape.pyx b/tests/errors/invalid_uescape.pyx new file mode 100644 index 00000000..a2036659 --- /dev/null +++ b/tests/errors/invalid_uescape.pyx @@ -0,0 +1,6 @@ + +u'\uXYZ' + +_ERRORS = ''' +2:2: Invalid unicode escape '\u' +''' diff --git a/tests/errors/invalid_uescape0.pyx b/tests/errors/invalid_uescape0.pyx new file mode 100644 index 00000000..1aa52350 --- /dev/null +++ b/tests/errors/invalid_uescape0.pyx @@ -0,0 +1,6 @@ + +u'\u' + +_ERRORS = ''' +2:1: Invalid unicode escape '\u' +''' diff --git a/tests/errors/invalid_uescape2.pyx b/tests/errors/invalid_uescape2.pyx new file mode 100644 index 00000000..e91561f4 --- /dev/null +++ b/tests/errors/invalid_uescape2.pyx @@ -0,0 +1,6 @@ + +u'\u12' + +_ERRORS = ''' +2:1: Invalid unicode escape '\u' +''' diff --git a/tests/run/strliterals.pyx b/tests/run/strliterals.pyx index 395125d1..5194668e 100644 --- a/tests/run/strliterals.pyx +++ b/tests/run/strliterals.pyx @@ -125,6 +125,13 @@ __doc__ = ur""" >>> len(uresc) 9 + >>> bytes_uescape + b'\\u1234\\U12345678\\u\\u1\\u12\\uX' + >>> bytes_uescape == b'\\u1234\\U12345678\\u\\u1\\u12\\uX' + True + >>> len(bytes_uescape) + 28 + >>> newlines == "Aaa\n" True @@ -165,6 +172,8 @@ sresc = r'\12\'\"\\' bresc = br'\12\'\"\\' uresc = ur'\12\'\"\\' +bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX' + newlines = "Aaa\n" # T640, long literals with escapes -- 2.26.2