when C compiling original Cython/Py2 sources in Py3, interpret unprefixed string...
authorStefan Behnel <scoder@users.berlios.de>
Thu, 21 Apr 2011 16:04:55 +0000 (18:04 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Thu, 21 Apr 2011 16:04:55 +0000 (18:04 +0200)
Cython/Compiler/Code.py
Cython/Compiler/ExprNodes.py
tests/run/strliterals.pyx

index 7abbc39f41bd03d98328d062722ec93a08ffaf61..51aa4256b5e145fd5c7a0db933a7080da97ffa2f 100644 (file)
@@ -332,7 +332,8 @@ class StringConst(object):
         self.escaped_value = StringEncoding.escape_byte_string(byte_string)
         self.py_strings = None
 
-    def get_py_string_const(self, encoding, identifier=None, is_str=False):
+    def get_py_string_const(self, encoding, identifier=None,
+                            is_str=False, py3str_cstring=None):
         py_strings = self.py_strings
         text = self.text
 
@@ -351,47 +352,52 @@ class StringConst(object):
             else:
                 encoding_key = ''.join(find_alphanums(encoding))
 
-        key = (is_str, is_unicode, encoding_key)
-        if py_strings is not None and key in py_strings:
-            py_string = py_strings[key]
+        key = (is_str, is_unicode, encoding_key, py3str_cstring)
+        if py_strings is not None:
+            try:
+                return py_strings[key]
+            except KeyError:
+                pass
         else:
-            if py_strings is None:
-                self.py_strings = {}
-            if identifier:
-                intern = True
-            elif identifier is None:
-                if isinstance(text, unicode):
-                    intern = bool(possible_unicode_identifier(text))
-                else:
-                    intern = bool(possible_bytes_identifier(text))
-            else:
-                intern = False
-            if intern:
-                prefix = Naming.interned_str_prefix
-            else:
-                prefix = Naming.py_const_prefix
-            pystring_cname = "%s%s_%s" % (
-                prefix,
-                (is_str and 's') or (is_unicode and 'u') or 'b',
-                self.cname[len(Naming.const_prefix):])
-
-            py_string = PyStringConst(
-                pystring_cname, encoding, is_unicode, is_str, intern)
-            self.py_strings[key] = py_string
+            self.py_strings = {}
 
+        if identifier:
+            intern = True
+        elif identifier is None:
+            if isinstance(text, unicode):
+                intern = bool(possible_unicode_identifier(text))
+            else:
+                intern = bool(possible_bytes_identifier(text))
+        else:
+            intern = False
+        if intern:
+            prefix = Naming.interned_str_prefix
+        else:
+            prefix = Naming.py_const_prefix
+        pystring_cname = "%s%s_%s" % (
+            prefix,
+            (is_str and 's') or (is_unicode and 'u') or 'b',
+            self.cname[len(Naming.const_prefix):])
+
+        py_string = PyStringConst(
+            pystring_cname, encoding, is_unicode, is_str, py3str_cstring, intern)
+        self.py_strings[key] = py_string
         return py_string
 
 class PyStringConst(object):
     """Global info about a Python string constant held by GlobalState.
     """
     # cname       string
+    # py3str_cstring string
     # encoding    string
     # intern      boolean
     # is_unicode  boolean
     # is_str      boolean
 
-    def __init__(self, cname, encoding, is_unicode, is_str=False, intern=False):
+    def __init__(self, cname, encoding, is_unicode, is_str=False,
+                 py3str_cstring=None, intern=False):
         self.cname = cname
+        self.py3str_cstring = py3str_cstring
         self.encoding = encoding
         self.is_str = is_str
         self.is_unicode = is_unicode
@@ -614,10 +620,16 @@ class GlobalState(object):
             c = self.new_string_const(text, byte_string)
         return c
 
-    def get_py_string_const(self, text, identifier=None, is_str=False):
+    def get_py_string_const(self, text, identifier=None,
+                            is_str=False, unicode_value=None):
         # return a Python string constant, creating a new one if necessary
         c_string = self.get_string_const(text)
-        py_string = c_string.get_py_string_const(text.encoding, identifier, is_str)
+        py3str_cstring = None
+        if is_str and unicode_value is not None \
+               and unicode_value.utf8encode() != text.byteencode():
+            py3str_cstring = self.get_string_const(unicode_value)
+        py_string = c_string.get_py_string_const(
+            text.encoding, identifier, is_str, py3str_cstring)
         return py_string
 
     def get_interned_identifier(self, text):
@@ -743,6 +755,17 @@ class GlobalState(object):
 
                 decls_writer.putln(
                     "static PyObject *%s;" % py_string.cname)
+                if py_string.py3str_cstring:
+                    w.putln("#if PY_MAJOR_VERSION >= 3")
+                    w.putln(
+                        "{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
+                        py_string.cname,
+                        py_string.py3str_cstring.cname,
+                        py_string.py3str_cstring.cname,
+                        encoding,
+                        1, 1, 0,
+                        ))
+                    w.putln("#else")
                 w.putln(
                     "{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
                     py_string.cname,
@@ -753,6 +776,8 @@ class GlobalState(object):
                     py_string.is_str,
                     py_string.intern
                     ))
+                if py_string.py3str_cstring:
+                    w.putln("#endif")
             w.putln("{0, 0, 0, 0, 0, 0, 0}")
             w.putln("};")
 
@@ -1010,8 +1035,10 @@ class CCodeWriter(object):
     def get_string_const(self, text):
         return self.globalstate.get_string_const(text).cname
 
-    def get_py_string_const(self, text, identifier=None, is_str=False):
-        return self.globalstate.get_py_string_const(text, identifier, is_str).cname
+    def get_py_string_const(self, text, identifier=None,
+                            is_str=False, unicode_value=None):
+        return self.globalstate.get_py_string_const(
+            text, identifier, is_str, unicode_value).cname
 
     def get_argument_default_const(self, type):
         return self.globalstate.get_py_const(type).cname
index 54d19884653d665251cea2d927b8c6e1f72e9f28..ba6a82761f2e33873a13602ebe43d6e3c0d0f5c8 100755 (executable)
@@ -1115,16 +1115,6 @@ class StringNode(PyConstNode):
             if not dst_type.is_pyobject:
                 return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
             self.check_for_coercion_error(dst_type, fail=True)
-
-        # this will be a unicode string in Py3, so make sure we can decode it
-        if self.value.encoding and isinstance(self.value, StringEncoding.BytesLiteral):
-            try:
-                self.value.decode(self.value.encoding)
-            except UnicodeDecodeError:
-                error(self.pos, ("Decoding unprefixed string literal from '%s' failed. Consider using"
-                                 "a byte string or unicode string explicitly, "
-                                 "or adjust the source code encoding.") % self.value.encoding)
-
         return self
 
     def can_coerce_to_char_literal(self):
@@ -1132,7 +1122,8 @@ class StringNode(PyConstNode):
 
     def generate_evaluation_code(self, code):
         self.result_code = code.get_py_string_const(
-            self.value, identifier=self.is_identifier, is_str=True)
+            self.value, identifier=self.is_identifier, is_str=True,
+            unicode_value=self.unicode_value)
 
     def get_constant_c_result_code(self):
         return None
index 5194668e13beb0b34aac9338527d21ce8112aa93..8d8445796034a331ce42ea74bb052e537df9c248 100644 (file)
@@ -132,6 +132,17 @@ __doc__ = ur"""
     >>> len(bytes_uescape)
     28
 
+    >>> (sys.version_info[0] >= 3 and len(str_uescape) == 3 or
+    ...  sys.version_info[0] <  3 and len(str_uescape) == 17 or
+    ...  len(str_uescape))
+    True
+    >>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
+    ...  sys.version_info[0] <  3 and str_uescape[0] == '\\' or
+    ...  str_uescape[0])
+    True
+    >>> print(str_uescape[-1])
+    B
+
     >>> newlines == "Aaa\n"
     True
     
@@ -173,6 +184,7 @@ bresc = br'\12\'\"\\'
 uresc = ur'\12\'\"\\'
 
 bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX'
+str_uescape = '\u0063\U00012345\x42'
 
 newlines = "Aaa\n"