use a dedicated UnicodeType and UnicodeNode to represent unicode literals

author Stefan Behnel <scoder@users.berlios.de>

Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)

committer Stefan Behnel <scoder@users.berlios.de>

Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)
author Stefan Behnel <scoder@users.berlios.de>
Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)
committer Stefan Behnel <scoder@users.berlios.de>
Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)
diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py

index b833824c932fef24507bd60c7a0b04265e0bc0b1..ce345460623fa2da6b84565ed15a5e7a3322d8f0 100644 (file)
--- a/Cython/Compiler/ExprNodes.py
+++ b/Cython/Compiler/ExprNodes.py
@@ -738,6 +738,29 @@ class StringNode(ConstNode):
              return self.entry.cname
  
  
+class UnicodeNode(PyConstNode):
+    #  entry   Symtab.Entry
+
+    type = PyrexTypes.c_unicode_type
+
+    def analyse_types(self, env):
+        self.entry = env.add_string_const(self.value)
+        env.add_py_string(self.entry)
+
+    def calculate_result_code(self):
+        return self.entry.pystring_cname
+    
+    def _coerce_to(self, dst_type, env):
+        if not dst_type.is_pyobject:
+            node = StringNode(self.pos, entry = entry, type = py_object_type)
+            return ConstNode.coerce_to(node, dst_type, env)
+        else:
+            return self
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+
+
  class IdentifierStringNode(ConstNode):
      # A Python string that behaves like an identifier, e.g. for
      # keyword arguments in a call, or for imported names
diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py

index fafcc8a33fd2bf942a7c61a570de025a03f48138..2e93a73716e79035635927442fa7dfebc1dcdfd5 100644 (file)
--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -492,6 +492,8 @@ def p_atom(s):
          kind, value = p_cat_string_literal(s)
          if kind == 'c':
              return ExprNodes.CharNode(pos, value = value)
+        elif kind == 'u':
+            return ExprNodes.UnicodeNode(pos, value = value)
          else:
              return ExprNodes.StringNode(pos, value = value)
      elif sy == 'IDENT':
diff --git a/Cython/Compiler/PyrexTypes.py b/Cython/Compiler/PyrexTypes.py

index e8a7fd10d8f85a3b06ec8537b6d2dbfbe4932358..8417b8c1cbe5eff306e680fbc34a9b7efd205141 100644 (file)
--- a/Cython/Compiler/PyrexTypes.py
+++ b/Cython/Compiler/PyrexTypes.py
@@ -998,20 +998,6 @@ class CStringType:
          return '"%s"' % Utils.escape_byte_string(value)
  
  
-class CUTF8StringType:
-    #  Mixin class for C unicode types.
-
-    is_string = 1
-    is_unicode = 1
-    
-    to_py_function = "PyUnicode_DecodeUTF8"
-    exception_value = "NULL"
-
-    def literal_code(self, value):
-        assert isinstance(value, str)
-        return '"%s"' % Utils.escape_byte_string(value)
-
-
  class CCharArrayType(CStringType, CArrayType):
      #  C 'char []' type.
      
@@ -1020,16 +1006,6 @@ class CCharArrayType(CStringType, CArrayType):
      
      def __init__(self, size):
          CArrayType.__init__(self, c_char_type, size)
-
-
-class CUTF8CharArrayType(CUTF8StringType, CArrayType):
-    #  C 'char []' type.
-    
-    parsetuple_format = "s"
-    pymemberdef_typecode = "T_STRING_INPLACE"
-    
-    def __init__(self, size):
-        CArrayType.__init__(self, c_char_type, size)
      
  
  class CCharPtrType(CStringType, CPtrType):
@@ -1042,6 +1018,29 @@ class CCharPtrType(CStringType, CPtrType):
          CPtrType.__init__(self, c_char_type)
  
  
+class UnicodeType(BuiltinObjectType):
+    #  The Python unicode type.
+
+    is_string = 1
+    is_unicode = 1
+    
+    parsetuple_format = "O"
+
+    def __init__(self):
+        BuiltinObjectType.__init__(self, "unicode", "PyUnicodeObject")
+
+    def literal_code(self, value):
+        assert isinstance(value, str)
+        return '"%s"' % Utils.escape_byte_string(value)
+
+    def declaration_code(self, entity_code, 
+            for_display = 0, dll_linkage = None, pyrex = 0):
+        if pyrex or for_display:
+            return self.base_declaration_code(self.name, entity_code)
+        else:
+            return "%s %s[]" % (public_decl("char", dll_linkage), entity_code)
+
+
  class ErrorType(PyrexType):
      # Used to prevent propagation of error messages.
      
@@ -1106,7 +1105,7 @@ c_longdouble_type =  CFloatType(8, typestring="g")
  
  c_null_ptr_type =     CNullPtrType(c_void_type)
  c_char_array_type =   CCharArrayType(None)
-c_utf8_char_array_type =   CUTF8CharArrayType(None)
+c_unicode_type =      UnicodeType()
  c_char_ptr_type =     CCharPtrType()
  c_char_ptr_ptr_type = CPtrType(c_char_ptr_type)
  c_py_ssize_t_ptr_type =  CPtrType(c_py_ssize_t_type)
diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py

index 3942faa263984d6eb765ccc1da05dba04b5852cf..d47cd02f0f9187787281364f5494505a7a334a52 100644 (file)
--- a/Cython/Compiler/Symtab.py
+++ b/Cython/Compiler/Symtab.py
@@ -504,7 +504,7 @@ class Scope:
          else:
              cname = self.new_const_cname()
          if value.is_unicode:
-            c_type = PyrexTypes.c_utf8_char_array_type
+            c_type = PyrexTypes.c_unicode_type
              value = value.utf8encode()
          else:
              c_type = PyrexTypes.c_char_array_type
author	Stefan Behnel <scoder@users.berlios.de>
	Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)
committer	Stefan Behnel <scoder@users.berlios.de>
	Tue, 12 Aug 2008 20:10:54 +0000 (22:10 +0200)
Cython/Compiler/ExprNodes.py		patch \| blob \| history
Cython/Compiler/Parsing.py		patch \| blob \| history
Cython/Compiler/PyrexTypes.py		patch \| blob \| history
Cython/Compiler/Symtab.py		patch \| blob \| history