if identifier:
intern = True
elif identifier is None:
- if is_unicode:
+ if isinstance(text, unicode):
intern = bool(possible_unicode_identifier(text))
else:
intern = bool(possible_bytes_identifier(text))
self.unicode = is_unicode
self.intern = intern
+ def __lt__(self, other):
+ return self.cname < other.cname
+
class GlobalState(object):
# filename_table {string : int} for finding filename table indexes
return py_string
def new_string_const(self, text, byte_string):
- cname = self.new_string_const_cname(text)
+ cname = self.new_string_const_cname(byte_string)
c = StringConst(cname, text, byte_string)
self.string_const_index[byte_string] = c
return c
self.py_constants.append(c)
return c
- def new_string_const_cname(self, value, intern=None):
+ def new_string_const_cname(self, bytes_value, intern=None):
# Create a new globally-unique nice name for a C string constant.
+ try:
+ value = bytes_value.decode('ASCII')
+ except UnicodeError:
+ return self.new_const_cname()
+
if len(value) < 20 and nice_identifier(value):
return "%s%s" % (Naming.const_prefix, value)
else:
# Make sure to create a new file here so we can
# safely hard link the output files.
os.unlink(path)
- return open(path, "w")
+
+ # we use the ISO-8859-1 encoding here because we only write pure
+ # ASCII strings or (e.g. for file names) byte encoded strings as
+ # Unicode, so we need a direct mapping from the first 256 Unicode
+ # characters to a byte sequence, which ISO-8859-1 provides
+ return codecs.open(path, "w", encoding="ISO-8859-1")
def castrate_file(path, st):
# Remove junk contents from an output file after a