From: Stefan Behnel Date: Mon, 6 Jul 2009 09:47:35 +0000 (+0200) Subject: Py3 fixes X-Git-Tag: 0.12.alpha0~259 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=16ac5bf9f877179e9844f348385a98d35cb50739;p=cython.git Py3 fixes --- diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index 0fe26a50..401f8085 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -317,7 +317,7 @@ class StringConst(object): if identifier: intern = True elif identifier is None: - if is_unicode: + if isinstance(text, unicode): intern = bool(possible_unicode_identifier(text)) else: intern = bool(possible_bytes_identifier(text)) @@ -353,6 +353,9 @@ class PyStringConst(object): self.unicode = is_unicode self.intern = intern + def __lt__(self, other): + return self.cname < other.cname + class GlobalState(object): # filename_table {string : int} for finding filename table indexes @@ -544,7 +547,7 @@ class GlobalState(object): return py_string def new_string_const(self, text, byte_string): - cname = self.new_string_const_cname(text) + cname = self.new_string_const_cname(byte_string) c = StringConst(cname, text, byte_string) self.string_const_index[byte_string] = c return c @@ -561,8 +564,13 @@ class GlobalState(object): self.py_constants.append(c) return c - def new_string_const_cname(self, value, intern=None): + def new_string_const_cname(self, bytes_value, intern=None): # Create a new globally-unique nice name for a C string constant. + try: + value = bytes_value.decode('ASCII') + except UnicodeError: + return self.new_const_cname() + if len(value) < 20 and nice_identifier(value): return "%s%s" % (Naming.const_prefix, value) else: diff --git a/Cython/Utils.py b/Cython/Utils.py index c841cc1d..dfb4cf3a 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -14,7 +14,12 @@ def open_new_file(path): # Make sure to create a new file here so we can # safely hard link the output files. os.unlink(path) - return open(path, "w") + + # we use the ISO-8859-1 encoding here because we only write pure + # ASCII strings or (e.g. for file names) byte encoded strings as + # Unicode, so we need a direct mapping from the first 256 Unicode + # characters to a byte sequence, which ISO-8859-1 provides + return codecs.open(path, "w", encoding="ISO-8859-1") def castrate_file(path, st): # Remove junk contents from an output file after a