From: Stefan Behnel Date: Thu, 21 Apr 2011 16:04:55 +0000 (+0200) Subject: when C compiling original Cython/Py2 sources in Py3, interpret unprefixed string... X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=756d0872263f142c273b92e1ce1ebb5ebb1f4d92;p=cython.git when C compiling original Cython/Py2 sources in Py3, interpret unprefixed string literals as CPython's parser would --- diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index 7abbc39f..51aa4256 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -332,7 +332,8 @@ class StringConst(object): self.escaped_value = StringEncoding.escape_byte_string(byte_string) self.py_strings = None - def get_py_string_const(self, encoding, identifier=None, is_str=False): + def get_py_string_const(self, encoding, identifier=None, + is_str=False, py3str_cstring=None): py_strings = self.py_strings text = self.text @@ -351,47 +352,52 @@ class StringConst(object): else: encoding_key = ''.join(find_alphanums(encoding)) - key = (is_str, is_unicode, encoding_key) - if py_strings is not None and key in py_strings: - py_string = py_strings[key] + key = (is_str, is_unicode, encoding_key, py3str_cstring) + if py_strings is not None: + try: + return py_strings[key] + except KeyError: + pass else: - if py_strings is None: - self.py_strings = {} - if identifier: - intern = True - elif identifier is None: - if isinstance(text, unicode): - intern = bool(possible_unicode_identifier(text)) - else: - intern = bool(possible_bytes_identifier(text)) - else: - intern = False - if intern: - prefix = Naming.interned_str_prefix - else: - prefix = Naming.py_const_prefix - pystring_cname = "%s%s_%s" % ( - prefix, - (is_str and 's') or (is_unicode and 'u') or 'b', - self.cname[len(Naming.const_prefix):]) - - py_string = PyStringConst( - pystring_cname, encoding, is_unicode, is_str, intern) - self.py_strings[key] = py_string + self.py_strings = {} + if identifier: + intern = True + elif identifier is None: + if isinstance(text, unicode): + intern = bool(possible_unicode_identifier(text)) + else: + intern = bool(possible_bytes_identifier(text)) + else: + intern = False + if intern: + prefix = Naming.interned_str_prefix + else: + prefix = Naming.py_const_prefix + pystring_cname = "%s%s_%s" % ( + prefix, + (is_str and 's') or (is_unicode and 'u') or 'b', + self.cname[len(Naming.const_prefix):]) + + py_string = PyStringConst( + pystring_cname, encoding, is_unicode, is_str, py3str_cstring, intern) + self.py_strings[key] = py_string return py_string class PyStringConst(object): """Global info about a Python string constant held by GlobalState. """ # cname string + # py3str_cstring string # encoding string # intern boolean # is_unicode boolean # is_str boolean - def __init__(self, cname, encoding, is_unicode, is_str=False, intern=False): + def __init__(self, cname, encoding, is_unicode, is_str=False, + py3str_cstring=None, intern=False): self.cname = cname + self.py3str_cstring = py3str_cstring self.encoding = encoding self.is_str = is_str self.is_unicode = is_unicode @@ -614,10 +620,16 @@ class GlobalState(object): c = self.new_string_const(text, byte_string) return c - def get_py_string_const(self, text, identifier=None, is_str=False): + def get_py_string_const(self, text, identifier=None, + is_str=False, unicode_value=None): # return a Python string constant, creating a new one if necessary c_string = self.get_string_const(text) - py_string = c_string.get_py_string_const(text.encoding, identifier, is_str) + py3str_cstring = None + if is_str and unicode_value is not None \ + and unicode_value.utf8encode() != text.byteencode(): + py3str_cstring = self.get_string_const(unicode_value) + py_string = c_string.get_py_string_const( + text.encoding, identifier, is_str, py3str_cstring) return py_string def get_interned_identifier(self, text): @@ -743,6 +755,17 @@ class GlobalState(object): decls_writer.putln( "static PyObject *%s;" % py_string.cname) + if py_string.py3str_cstring: + w.putln("#if PY_MAJOR_VERSION >= 3") + w.putln( + "{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % ( + py_string.cname, + py_string.py3str_cstring.cname, + py_string.py3str_cstring.cname, + encoding, + 1, 1, 0, + )) + w.putln("#else") w.putln( "{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % ( py_string.cname, @@ -753,6 +776,8 @@ class GlobalState(object): py_string.is_str, py_string.intern )) + if py_string.py3str_cstring: + w.putln("#endif") w.putln("{0, 0, 0, 0, 0, 0, 0}") w.putln("};") @@ -1010,8 +1035,10 @@ class CCodeWriter(object): def get_string_const(self, text): return self.globalstate.get_string_const(text).cname - def get_py_string_const(self, text, identifier=None, is_str=False): - return self.globalstate.get_py_string_const(text, identifier, is_str).cname + def get_py_string_const(self, text, identifier=None, + is_str=False, unicode_value=None): + return self.globalstate.get_py_string_const( + text, identifier, is_str, unicode_value).cname def get_argument_default_const(self, type): return self.globalstate.get_py_const(type).cname diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 54d19884..ba6a8276 100755 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -1115,16 +1115,6 @@ class StringNode(PyConstNode): if not dst_type.is_pyobject: return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env) self.check_for_coercion_error(dst_type, fail=True) - - # this will be a unicode string in Py3, so make sure we can decode it - if self.value.encoding and isinstance(self.value, StringEncoding.BytesLiteral): - try: - self.value.decode(self.value.encoding) - except UnicodeDecodeError: - error(self.pos, ("Decoding unprefixed string literal from '%s' failed. Consider using" - "a byte string or unicode string explicitly, " - "or adjust the source code encoding.") % self.value.encoding) - return self def can_coerce_to_char_literal(self): @@ -1132,7 +1122,8 @@ class StringNode(PyConstNode): def generate_evaluation_code(self, code): self.result_code = code.get_py_string_const( - self.value, identifier=self.is_identifier, is_str=True) + self.value, identifier=self.is_identifier, is_str=True, + unicode_value=self.unicode_value) def get_constant_c_result_code(self): return None diff --git a/tests/run/strliterals.pyx b/tests/run/strliterals.pyx index 5194668e..8d844579 100644 --- a/tests/run/strliterals.pyx +++ b/tests/run/strliterals.pyx @@ -132,6 +132,17 @@ __doc__ = ur""" >>> len(bytes_uescape) 28 + >>> (sys.version_info[0] >= 3 and len(str_uescape) == 3 or + ... sys.version_info[0] < 3 and len(str_uescape) == 17 or + ... len(str_uescape)) + True + >>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or + ... sys.version_info[0] < 3 and str_uescape[0] == '\\' or + ... str_uescape[0]) + True + >>> print(str_uescape[-1]) + B + >>> newlines == "Aaa\n" True @@ -173,6 +184,7 @@ bresc = br'\12\'\"\\' uresc = ur'\12\'\"\\' bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX' +str_uescape = '\u0063\U00012345\x42' newlines = "Aaa\n"