otherwise, different ways of spelling special characters can end up being correctly escaped or not in the C file
return ord(self.value)
def calculate_result_code(self):
- return "'%s'" % self.value
+ if self.value == "'":
+ return r"'\''"
+ char = ord(self.value)
+ if char < 32:
+ return "'\\x%02X'" % char
+ else:
+ return "'%s'" % self.value
class IntNode(ConstNode):
sy = s.sy
#print "p_string_literal: sy =", sy, repr(s.systring) ###
if sy == 'CHARS':
- systr = s.systring
- if len(systr) == 1 and systr in "'\"\n":
- chars.append('\\')
- chars.append(systr)
+ chars.append(s.systring)
elif sy == 'ESCAPE':
systr = s.systring
if is_raw:
if systr == '\\\n':
- chars.append(r'\\\n')
- elif systr == r'\"':
- chars.append(r'\\\"')
- elif systr == r'\\':
- chars.append(r'\\\\')
+ chars.append('\n')
+ elif systr == '\\\"':
+ chars.append('"')
+ elif systr == '\\\'':
+ chars.append("'")
+ elif systr == '\\\\':
+ chars.append('\\')
else:
- chars.append('\\' + systr)
+ chars.append(systr)
else:
c = systr[1]
- if c in "'\"\\abfnrtv01234567":
- chars.append(systr)
+ if c in "01234567":
+ chars.append(chr(int(systr[1:])))
+ elif c in "'\"\\":
+ chars.append(c)
+ elif c in "abfnrtv":
+ chars.append(Utils.char_from_escape_sequence(systr))
elif c == '\n':
pass
elif c in 'Uux':
else:
# unicode escapes in plain byte strings are not unescaped
strval = systr
- chars.append(strval.replace('\\', '\\\\'))
+ chars.append(strval)
else:
chars.append(r'\\' + systr[1:])
elif sy == 'NEWLINE':
- chars.append(r'\n')
+ chars.append('\n')
elif sy == 'END_STRING':
break
elif sy == 'EOF':
s.error(
"Unexpected token %r:%r in string literal" %
(sy, s.systring))
+ string = u''.join(chars)
+ if kind == 'c' and len(string) != 1:
+ error(pos, u"invalid character literal: %r" % string)
s.next()
- value = Utils.EncodedString( u''.join(chars) )
+ value = Utils.EncodedString(string)
if kind != 'u':
value.encoding = s.source_encoding
#print "p_string_literal: value =", repr(value) ###
# return unicode.__eq__(self, other) and \
# getattr(other, 'encoding', '') == self.encoding
-def _to_oct_sequence(s):
+char_from_escape_sequence = {
+ r'\a' : '\a',
+ r'\b' : '\b',
+ r'\f' : '\f',
+ r'\n' : '\n',
+ r'\r' : '\r',
+ r'\t' : '\t',
+ r'\v' : '\v',
+ }.get
+
+def _to_escape_sequence(s):
if s in '\n\r\t':
return repr(s)[1:-1]
+ elif s == '"':
+ return r'\"'
else:
+ # oct passes much better than hex
return ''.join(['\\%03o' % ord(c) for c in s])
-_c_special = ('\0', '\n','\r','\t', '??', '<:', ':>', '<%', '%>', '%:', '%:')
-_c_special_replacements = zip(_c_special, map(_to_oct_sequence, _c_special))
+_c_special = ('\0', '\n', '\r', '\t', '??', '"')
+_c_special_replacements = zip(_c_special, map(_to_escape_sequence, _c_special))
-def _build_special_test():
+def _build_specials_test():
subexps = []
for special in _c_special:
regexp = ''.join(['[%s]' % c for c in special])
subexps.append(regexp)
- return re.compile('(' + '|'.join(subexps) + ')').search
+ return re.compile('|'.join(subexps)).search
-_has_specials = _build_special_test()
+_has_specials = _build_specials_test()
def escape_byte_string(s):
+ s = s.replace('\\', '\\\\')
if _has_specials(s):
for special, replacement in _c_special_replacements:
s = s.replace(special, replacement)
--- /dev/null
+__doc__ = u"""
+>>> s = test()
+>>> assert s == ''.join([chr(i) for i in range(1,49)]), s
+"""
+
+def test():
+ cdef char s[50]
+
+ s[ 0] = c'\0'
+ s[ 1] = c'\x01'
+ s[ 2] = c'\x02'
+ s[ 3] = c'\x03'
+ s[ 4] = c'\x04'
+ s[ 5] = c'\x05'
+ s[ 6] = c'\x06'
+ s[ 7] = c'\x07'
+ s[ 8] = c'\x08'
+ s[ 9] = c'\x09'
+ s[10] = c'\x0A'
+ s[11] = c'\x0B'
+ s[12] = c'\x0C'
+ s[13] = c'\x0D'
+ s[14] = c'\x0E'
+ s[15] = c'\x0F'
+ s[16] = c'\x10'
+ s[17] = c'\x11'
+ s[18] = c'\x12'
+ s[19] = c'\x13'
+ s[20] = c'\x14'
+ s[21] = c'\x15'
+ s[22] = c'\x16'
+ s[23] = c'\x17'
+ s[24] = c'\x18'
+ s[25] = c'\x19'
+ s[26] = c'\x1A'
+ s[27] = c'\x1B'
+ s[28] = c'\x1C'
+ s[29] = c'\x1D'
+ s[30] = c'\x1E'
+ s[31] = c'\x1F'
+ s[32] = c'\x20'
+ s[33] = c'\x21'
+ s[34] = c'\x22'
+ s[35] = c'\x23'
+ s[36] = c'\x24'
+ s[37] = c'\x25'
+ s[38] = c'\x26'
+ s[39] = c'\x27'
+ s[40] = c'\x28'
+ s[41] = c'\x29'
+ s[42] = c'\x2A'
+ s[43] = c'\x2B'
+ s[44] = c'\x2C'
+ s[45] = c'\x2D'
+ s[46] = c'\x2E'
+ s[47] = c'\x2F'
+ s[48] = c'\x30'
+
+ s[49] = c'\x00'
+
+ assert s[ 0] == c'\x00'
+ assert s[49] == c'\0'
+
+ return &s[1]
... b'\\x1234',
... b'\\x0A12\\x0C34',
... b'\\x0A57',
+... b'\\x0A',
+... b'\\'',
+... b"\\'",
+... b"\\"",
+... b'\\"',
... b'abc\\x12def',
... u'\\u1234',
... u'\\U00001234',
import sys
if sys.version_info[0] < 3:
- __doc__ = __doc__.replace(u" b'", u" '")
+ __doc__ = __doc__.replace(u" b'", u" '").replace(u' b"', u' "')
else:
- __doc__ = __doc__.replace(u" u'", u" '")
+ __doc__ = __doc__.replace(u" u'", u" '").replace(u' u"', u' "')
c_strings = [
(b'\x1234', 3),
(b'\x0A12\x0C34', 6),
(b'\x0A57', 3),
+(b'\x0A', 1),
+(b'\'', 1),
+(b"\'", 1),
+(b"\"", 1),
+(b'\"', 1),
(b'abc\x12def', 7),
(u'\u1234', 1),
(u'\U00001234', 1),