From 62fc87e0a0968f9c01d25f59243979878c94fada Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Tue, 12 Aug 2008 12:25:22 +0200 Subject: [PATCH] unescape all string content in the parser and escape it on the way out otherwise, different ways of spelling special characters can end up being correctly escaped or not in the C file --- Cython/Compiler/ExprNodes.py | 8 ++++- Cython/Compiler/Parsing.py | 36 +++++++++++--------- Cython/Utils.py | 26 +++++++++++---- tests/run/charescape.pyx | 64 ++++++++++++++++++++++++++++++++++++ tests/run/strescapes.pyx | 14 ++++++-- 5 files changed, 124 insertions(+), 24 deletions(-) create mode 100644 tests/run/charescape.pyx diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index d52f5894..e9dbdd64 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -642,7 +642,13 @@ class CharNode(ConstNode): return ord(self.value) def calculate_result_code(self): - return "'%s'" % self.value + if self.value == "'": + return r"'\''" + char = ord(self.value) + if char < 32: + return "'\\x%02X'" % char + else: + return "'%s'" % self.value class IntNode(ConstNode): diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index 298fc092..18da59b2 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -585,25 +585,28 @@ def p_string_literal(s): sy = s.sy #print "p_string_literal: sy =", sy, repr(s.systring) ### if sy == 'CHARS': - systr = s.systring - if len(systr) == 1 and systr in "'\"\n": - chars.append('\\') - chars.append(systr) + chars.append(s.systring) elif sy == 'ESCAPE': systr = s.systring if is_raw: if systr == '\\\n': - chars.append(r'\\\n') - elif systr == r'\"': - chars.append(r'\\\"') - elif systr == r'\\': - chars.append(r'\\\\') + chars.append('\n') + elif systr == '\\\"': + chars.append('"') + elif systr == '\\\'': + chars.append("'") + elif systr == '\\\\': + chars.append('\\') else: - chars.append('\\' + systr) + chars.append(systr) else: c = systr[1] - if c in "'\"\\abfnrtv01234567": - chars.append(systr) + if c in "01234567": + chars.append(chr(int(systr[1:]))) + elif c in "'\"\\": + chars.append(c) + elif c in "abfnrtv": + chars.append(Utils.char_from_escape_sequence(systr)) elif c == '\n': pass elif c in 'Uux': @@ -616,11 +619,11 @@ def p_string_literal(s): else: # unicode escapes in plain byte strings are not unescaped strval = systr - chars.append(strval.replace('\\', '\\\\')) + chars.append(strval) else: chars.append(r'\\' + systr[1:]) elif sy == 'NEWLINE': - chars.append(r'\n') + chars.append('\n') elif sy == 'END_STRING': break elif sy == 'EOF': @@ -629,8 +632,11 @@ def p_string_literal(s): s.error( "Unexpected token %r:%r in string literal" % (sy, s.systring)) + string = u''.join(chars) + if kind == 'c' and len(string) != 1: + error(pos, u"invalid character literal: %r" % string) s.next() - value = Utils.EncodedString( u''.join(chars) ) + value = Utils.EncodedString(string) if kind != 'u': value.encoding = s.source_encoding #print "p_string_literal: value =", repr(value) ### diff --git a/Cython/Utils.py b/Cython/Utils.py index f17ab613..83ca46b5 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -99,25 +99,39 @@ class EncodedString(unicode): # return unicode.__eq__(self, other) and \ # getattr(other, 'encoding', '') == self.encoding -def _to_oct_sequence(s): +char_from_escape_sequence = { + r'\a' : '\a', + r'\b' : '\b', + r'\f' : '\f', + r'\n' : '\n', + r'\r' : '\r', + r'\t' : '\t', + r'\v' : '\v', + }.get + +def _to_escape_sequence(s): if s in '\n\r\t': return repr(s)[1:-1] + elif s == '"': + return r'\"' else: + # oct passes much better than hex return ''.join(['\\%03o' % ord(c) for c in s]) -_c_special = ('\0', '\n','\r','\t', '??', '<:', ':>', '<%', '%>', '%:', '%:') -_c_special_replacements = zip(_c_special, map(_to_oct_sequence, _c_special)) +_c_special = ('\0', '\n', '\r', '\t', '??', '"') +_c_special_replacements = zip(_c_special, map(_to_escape_sequence, _c_special)) -def _build_special_test(): +def _build_specials_test(): subexps = [] for special in _c_special: regexp = ''.join(['[%s]' % c for c in special]) subexps.append(regexp) - return re.compile('(' + '|'.join(subexps) + ')').search + return re.compile('|'.join(subexps)).search -_has_specials = _build_special_test() +_has_specials = _build_specials_test() def escape_byte_string(s): + s = s.replace('\\', '\\\\') if _has_specials(s): for special, replacement in _c_special_replacements: s = s.replace(special, replacement) diff --git a/tests/run/charescape.pyx b/tests/run/charescape.pyx new file mode 100644 index 00000000..1216d260 --- /dev/null +++ b/tests/run/charescape.pyx @@ -0,0 +1,64 @@ +__doc__ = u""" +>>> s = test() +>>> assert s == ''.join([chr(i) for i in range(1,49)]), s +""" + +def test(): + cdef char s[50] + + s[ 0] = c'\0' + s[ 1] = c'\x01' + s[ 2] = c'\x02' + s[ 3] = c'\x03' + s[ 4] = c'\x04' + s[ 5] = c'\x05' + s[ 6] = c'\x06' + s[ 7] = c'\x07' + s[ 8] = c'\x08' + s[ 9] = c'\x09' + s[10] = c'\x0A' + s[11] = c'\x0B' + s[12] = c'\x0C' + s[13] = c'\x0D' + s[14] = c'\x0E' + s[15] = c'\x0F' + s[16] = c'\x10' + s[17] = c'\x11' + s[18] = c'\x12' + s[19] = c'\x13' + s[20] = c'\x14' + s[21] = c'\x15' + s[22] = c'\x16' + s[23] = c'\x17' + s[24] = c'\x18' + s[25] = c'\x19' + s[26] = c'\x1A' + s[27] = c'\x1B' + s[28] = c'\x1C' + s[29] = c'\x1D' + s[30] = c'\x1E' + s[31] = c'\x1F' + s[32] = c'\x20' + s[33] = c'\x21' + s[34] = c'\x22' + s[35] = c'\x23' + s[36] = c'\x24' + s[37] = c'\x25' + s[38] = c'\x26' + s[39] = c'\x27' + s[40] = c'\x28' + s[41] = c'\x29' + s[42] = c'\x2A' + s[43] = c'\x2B' + s[44] = c'\x2C' + s[45] = c'\x2D' + s[46] = c'\x2E' + s[47] = c'\x2F' + s[48] = c'\x30' + + s[49] = c'\x00' + + assert s[ 0] == c'\x00' + assert s[49] == c'\0' + + return &s[1] diff --git a/tests/run/strescapes.pyx b/tests/run/strescapes.pyx index b5d99eed..01b9d795 100644 --- a/tests/run/strescapes.pyx +++ b/tests/run/strescapes.pyx @@ -4,6 +4,11 @@ __doc__ = u""" ... b'\\x1234', ... b'\\x0A12\\x0C34', ... b'\\x0A57', +... b'\\x0A', +... b'\\'', +... b"\\'", +... b"\\"", +... b'\\"', ... b'abc\\x12def', ... u'\\u1234', ... u'\\U00001234', @@ -28,14 +33,19 @@ __doc__ = u""" import sys if sys.version_info[0] < 3: - __doc__ = __doc__.replace(u" b'", u" '") + __doc__ = __doc__.replace(u" b'", u" '").replace(u' b"', u' "') else: - __doc__ = __doc__.replace(u" u'", u" '") + __doc__ = __doc__.replace(u" u'", u" '").replace(u' u"', u' "') c_strings = [ (b'\x1234', 3), (b'\x0A12\x0C34', 6), (b'\x0A57', 3), +(b'\x0A', 1), +(b'\'', 1), +(b"\'", 1), +(b"\"", 1), +(b'\"', 1), (b'abc\x12def', 7), (u'\u1234', 1), (u'\U00001234', 1), -- 2.26.2