From 16ac5bf9f877179e9844f348385a98d35cb50739 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Mon, 6 Jul 2009 11:47:35 +0200 Subject: [PATCH] Py3 fixes --- Cython/Compiler/Code.py | 14 +++++++++++--- Cython/Utils.py | 7 ++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index 0fe26a50..401f8085 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -317,7 +317,7 @@ class StringConst(object): if identifier: intern = True elif identifier is None: - if is_unicode: + if isinstance(text, unicode): intern = bool(possible_unicode_identifier(text)) else: intern = bool(possible_bytes_identifier(text)) @@ -353,6 +353,9 @@ class PyStringConst(object): self.unicode = is_unicode self.intern = intern + def __lt__(self, other): + return self.cname < other.cname + class GlobalState(object): # filename_table {string : int} for finding filename table indexes @@ -544,7 +547,7 @@ class GlobalState(object): return py_string def new_string_const(self, text, byte_string): - cname = self.new_string_const_cname(text) + cname = self.new_string_const_cname(byte_string) c = StringConst(cname, text, byte_string) self.string_const_index[byte_string] = c return c @@ -561,8 +564,13 @@ class GlobalState(object): self.py_constants.append(c) return c - def new_string_const_cname(self, value, intern=None): + def new_string_const_cname(self, bytes_value, intern=None): # Create a new globally-unique nice name for a C string constant. + try: + value = bytes_value.decode('ASCII') + except UnicodeError: + return self.new_const_cname() + if len(value) < 20 and nice_identifier(value): return "%s%s" % (Naming.const_prefix, value) else: diff --git a/Cython/Utils.py b/Cython/Utils.py index c841cc1d..dfb4cf3a 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -14,7 +14,12 @@ def open_new_file(path): # Make sure to create a new file here so we can # safely hard link the output files. os.unlink(path) - return open(path, "w") + + # we use the ISO-8859-1 encoding here because we only write pure + # ASCII strings or (e.g. for file names) byte encoded strings as + # Unicode, so we need a direct mapping from the first 256 Unicode + # characters to a byte sequence, which ISO-8859-1 provides + return codecs.open(path, "w", encoding="ISO-8859-1") def castrate_file(path, st): # Remove junk contents from an output file after a -- 2.26.2