unescape all string content in the parser and escape it on the way out
authorStefan Behnel <scoder@users.berlios.de>
Tue, 12 Aug 2008 10:25:22 +0000 (12:25 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Tue, 12 Aug 2008 10:25:22 +0000 (12:25 +0200)
otherwise, different ways of spelling special characters can end up being correctly escaped or not in the C file

Cython/Compiler/ExprNodes.py
Cython/Compiler/Parsing.py
Cython/Utils.py
tests/run/charescape.pyx [new file with mode: 0644]
tests/run/strescapes.pyx

index d52f58943d12a6405807461f42d7845669e89309..e9dbdd64f9fa132e63652b75816e69896e0b4295 100644 (file)
@@ -642,7 +642,13 @@ class CharNode(ConstNode):
         return ord(self.value)
     
     def calculate_result_code(self):
-        return "'%s'" % self.value
+        if self.value == "'":
+            return r"'\''"
+        char = ord(self.value)
+        if char < 32:
+            return "'\\x%02X'" % char
+        else:
+            return "'%s'" % self.value
 
 
 class IntNode(ConstNode):
index 298fc092f33e950154682d09920ef31d75b27994..18da59b2aeac97952f774dce70d397503eb90f8a 100644 (file)
@@ -585,25 +585,28 @@ def p_string_literal(s):
         sy = s.sy
         #print "p_string_literal: sy =", sy, repr(s.systring) ###
         if sy == 'CHARS':
-            systr = s.systring
-            if len(systr) == 1 and systr in "'\"\n":
-                chars.append('\\')
-            chars.append(systr)
+            chars.append(s.systring)
         elif sy == 'ESCAPE':
             systr = s.systring
             if is_raw:
                 if systr == '\\\n':
-                    chars.append(r'\\\n')
-                elif systr == r'\"':
-                    chars.append(r'\\\"')
-                elif systr == r'\\':
-                    chars.append(r'\\\\')
+                    chars.append('\n')
+                elif systr == '\\\"':
+                    chars.append('"')
+                elif systr == '\\\'':
+                    chars.append("'")
+                elif systr == '\\\\':
+                    chars.append('\\')
                 else:
-                    chars.append('\\' + systr)
+                    chars.append(systr)
             else:
                 c = systr[1]
-                if c in "'\"\\abfnrtv01234567":
-                    chars.append(systr)
+                if c in "01234567":
+                    chars.append(chr(int(systr[1:])))
+                elif c in "'\"\\":
+                    chars.append(c)
+                elif c in "abfnrtv":
+                    chars.append(Utils.char_from_escape_sequence(systr))
                 elif c == '\n':
                     pass
                 elif c in 'Uux':
@@ -616,11 +619,11 @@ def p_string_literal(s):
                     else:
                         # unicode escapes in plain byte strings are not unescaped
                         strval = systr
-                    chars.append(strval.replace('\\', '\\\\'))
+                    chars.append(strval)
                 else:
                     chars.append(r'\\' + systr[1:])
         elif sy == 'NEWLINE':
-            chars.append(r'\n')
+            chars.append('\n')
         elif sy == 'END_STRING':
             break
         elif sy == 'EOF':
@@ -629,8 +632,11 @@ def p_string_literal(s):
             s.error(
                 "Unexpected token %r:%r in string literal" %
                     (sy, s.systring))
+    string = u''.join(chars)
+    if kind == 'c' and len(string) != 1:
+        error(pos, u"invalid character literal: %r" % string)
     s.next()
-    value = Utils.EncodedString( u''.join(chars) )
+    value = Utils.EncodedString(string)
     if kind != 'u':
         value.encoding = s.source_encoding
     #print "p_string_literal: value =", repr(value) ###
index f17ab613a2cf029df1d1035cc0d972fe4c655f93..83ca46b501ba9024aa7c183c68d79272caf3951e 100644 (file)
@@ -99,25 +99,39 @@ class EncodedString(unicode):
 #        return unicode.__eq__(self, other) and \
 #            getattr(other, 'encoding', '') == self.encoding
 
-def _to_oct_sequence(s):
+char_from_escape_sequence = {
+    r'\a' : '\a',
+    r'\b' : '\b',
+    r'\f' : '\f',
+    r'\n' : '\n',
+    r'\r' : '\r',
+    r'\t' : '\t',
+    r'\v' : '\v',
+    }.get
+
+def _to_escape_sequence(s):
     if s in '\n\r\t':
         return repr(s)[1:-1]
+    elif s == '"':
+        return r'\"'
     else:
+        # oct passes much better than hex
         return ''.join(['\\%03o' % ord(c) for c in s])
 
-_c_special = ('\0', '\n','\r','\t', '??', '<:', ':>', '<%', '%>', '%:', '%:')
-_c_special_replacements = zip(_c_special, map(_to_oct_sequence, _c_special))
+_c_special = ('\0', '\n', '\r', '\t', '??', '"')
+_c_special_replacements = zip(_c_special, map(_to_escape_sequence, _c_special))
 
-def _build_special_test():
+def _build_specials_test():
     subexps = []
     for special in _c_special:
         regexp = ''.join(['[%s]' % c for c in special])
         subexps.append(regexp)
-    return re.compile('(' + '|'.join(subexps) + ')').search
+    return re.compile('|'.join(subexps)).search
 
-_has_specials = _build_special_test()
+_has_specials = _build_specials_test()
 
 def escape_byte_string(s):
+    s = s.replace('\\', '\\\\')
     if _has_specials(s):
         for special, replacement in _c_special_replacements:
             s = s.replace(special, replacement)
diff --git a/tests/run/charescape.pyx b/tests/run/charescape.pyx
new file mode 100644 (file)
index 0000000..1216d26
--- /dev/null
@@ -0,0 +1,64 @@
+__doc__ = u"""
+>>> s = test()
+>>> assert s == ''.join([chr(i) for i in range(1,49)]), s
+"""
+
+def test():
+    cdef char s[50]
+
+    s[ 0] = c'\0'
+    s[ 1] = c'\x01'
+    s[ 2] = c'\x02'
+    s[ 3] = c'\x03'
+    s[ 4] = c'\x04'
+    s[ 5] = c'\x05'
+    s[ 6] = c'\x06'
+    s[ 7] = c'\x07'
+    s[ 8] = c'\x08'
+    s[ 9] = c'\x09'
+    s[10] = c'\x0A'
+    s[11] = c'\x0B'
+    s[12] = c'\x0C'
+    s[13] = c'\x0D'
+    s[14] = c'\x0E'
+    s[15] = c'\x0F'
+    s[16] = c'\x10'
+    s[17] = c'\x11'
+    s[18] = c'\x12'
+    s[19] = c'\x13'
+    s[20] = c'\x14'
+    s[21] = c'\x15'
+    s[22] = c'\x16'
+    s[23] = c'\x17'
+    s[24] = c'\x18'
+    s[25] = c'\x19'
+    s[26] = c'\x1A'
+    s[27] = c'\x1B'
+    s[28] = c'\x1C'
+    s[29] = c'\x1D'
+    s[30] = c'\x1E'
+    s[31] = c'\x1F'
+    s[32] = c'\x20'
+    s[33] = c'\x21'
+    s[34] = c'\x22'
+    s[35] = c'\x23'
+    s[36] = c'\x24'
+    s[37] = c'\x25'
+    s[38] = c'\x26'
+    s[39] = c'\x27'
+    s[40] = c'\x28'
+    s[41] = c'\x29'
+    s[42] = c'\x2A'
+    s[43] = c'\x2B'
+    s[44] = c'\x2C'
+    s[45] = c'\x2D'
+    s[46] = c'\x2E'
+    s[47] = c'\x2F'
+    s[48] = c'\x30'
+
+    s[49] = c'\x00'
+
+    assert s[ 0] == c'\x00'
+    assert s[49] == c'\0'
+
+    return &s[1]
index b5d99eedb24ee88d61a2ad8b5625fbac8d51c926..01b9d7953ac561d9930a93c38e7838bc16c6945b 100644 (file)
@@ -4,6 +4,11 @@ __doc__ = u"""
 ... b'\\x1234',
 ... b'\\x0A12\\x0C34',
 ... b'\\x0A57',
+... b'\\x0A',
+... b'\\'',
+... b"\\'",
+... b"\\"",
+... b'\\"',
 ... b'abc\\x12def',
 ... u'\\u1234',
 ... u'\\U00001234',
@@ -28,14 +33,19 @@ __doc__ = u"""
 
 import sys
 if sys.version_info[0] < 3:
-    __doc__ = __doc__.replace(u" b'", u" '")
+    __doc__ = __doc__.replace(u" b'", u" '").replace(u' b"', u' "')
 else:
-    __doc__ = __doc__.replace(u" u'", u" '")
+    __doc__ = __doc__.replace(u" u'", u" '").replace(u' u"', u' "')
 
 c_strings = [
 (b'\x1234', 3),
 (b'\x0A12\x0C34', 6),
 (b'\x0A57', 3),
+(b'\x0A', 1),
+(b'\'', 1),
+(b"\'", 1),
+(b"\"", 1),
+(b'\"', 1),
 (b'abc\x12def', 7),
 (u'\u1234', 1),
 (u'\U00001234', 1),