prevent control characters in unicode literals (ord<32) from sneaking into the C...
authorStefan Behnel <scoder@users.berlios.de>
Mon, 9 Aug 2010 09:39:02 +0000 (11:39 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Mon, 9 Aug 2010 09:39:02 +0000 (11:39 +0200)
Cython/Compiler/StringEncoding.py

index 09ca20794061ca772e2f96915ae779118ca4b144..b5bd45f0670d5399decdf99cb2b5754e24971961 100644 (file)
@@ -135,7 +135,7 @@ def _to_escape_sequence(s):
         # within a character sequence, oct passes much better than hex
         return ''.join(['\\%03o' % ord(c) for c in s])
 
-_c_special = ('\\', '\0', '\n', '\r', '\t', '??', '"')
+_c_special = ('\\', '??', '"') + tuple(map(chr, range(32)))
 _c_special_replacements = [(orig.encode('ASCII'),
                             _to_escape_sequence(orig).encode('ASCII'))
                            for orig in _c_special ]
@@ -171,7 +171,8 @@ def escape_byte_string(s):
     """
     if _has_specials(s):
         for special, replacement in _c_special_replacements:
-            s = s.replace(special, replacement)
+            if special in s:
+                s = s.replace(special, replacement)
     try:
         return s.decode("ASCII") # trial decoding: plain ASCII => done
     except UnicodeDecodeError: