# return unicode.__eq__(self, other) and \
# getattr(other, 'encoding', '') == self.encoding
+def _to_oct_sequence(s):
+ return ''.join(['\\%03o' % ord(c) for c in s])
+
+_c_special = ('\0', '??', '<:', ':>', '<%', '%>', '%:', '%:')
+_c_special_replacements = zip(_c_special, map(_to_oct_sequence, _c_special))
+
+def _build_special_test():
+ subexps = []
+ for special in _c_special + ('\n','\r','\t'):
+ regexp = ''.join(['[%s]' % c for c in special ])
+ subexps.append(regexp)
+ return re.compile('(' + '|'.join(subexps) + ')').search
+
+_has_specials = _build_special_test()
+
def escape_byte_string(s):
- s = s.replace('\0', r'\000').replace('\x0A', r'\012').replace('\x0C', r'\014')
+ if _has_specials(s):
+ s = s.replace('\n', r'\n').replace('\r', r'\r').replace('\t', r'\t')
+ for special, replacement in _c_special_replacements:
+ s = s.replace(special, replacement)
try:
s.decode("ASCII")
return s
... b'\\x0A57',
... b'abc\\x12def',
... u'\\u1234',
-... u'\\U00041234',
+... u'\\U00001234',
... b'\\u1234',
-... b'\\U00041234',
+... b'\\U00001234',
+... b'\\n\\r\\t',
+... b':>',
+... b'??>',
+... b'\\0\\0\\0',
... ]
->>> for i, (py_string, c_string) in enumerate(zip(py_strings, c_strings)):
+>>> for i, (py_string, (c_string, length)) in enumerate(zip(py_strings, c_strings)):
... assert py_string == c_string, "%d: %r != %r" % (i, py_string, c_string)
+... assert len(py_string) == length, (
+... "%d: wrong length of %r, got %d, expected %d" % (
+... i, py_string, len(py_string), length))
+... assert len(c_string) == length, (
+... "%d: wrong length of %r, got %d, expected %d" % (
+... i, c_string, len(c_string), length))
"""
__doc__ = __doc__.replace(u" u'", u" '")
c_strings = [
-b'\x1234',
-b'\x0A12\x0C34',
-b'\x0A57',
-b'abc\x12def',
-u'\u1234',
-u'\U00041234',
-b'\u1234',
-b'\U00041234',
+(b'\x1234', 3),
+(b'\x0A12\x0C34', 6),
+(b'\x0A57', 3),
+(b'abc\x12def', 7),
+(u'\u1234', 1),
+(u'\U00001234', 1),
+(b'\u1234', 6),
+(b'\U00001234', 10),
+(b'\n\r\t', 3),
+(b':>', 2),
+(b'??>', 3),
+(b'\0\0\0', 3),
]