fix order of surrogate pair in wide unicode strings
authorStefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
Cython/Compiler/StringEncoding.py
tests/run/unicodeliterals.pyx

index 637bca1caa8f96c2591b4a2babae025d3fc86fa9..09ca20794061ca772e2f96915ae779118ca4b144 100644 (file)
@@ -36,8 +36,8 @@ class UnicodeLiteralBuilder(object):
                 # wide Unicode character on narrow platform => replace
                 # by surrogate pair
                 char_number -= 0x10000
-                self.chars.append( unichr((char_number  % 1024) + 0xDC00) )
                 self.chars.append( unichr((char_number // 1024) + 0xD800) )
+                self.chars.append( unichr((char_number  % 1024) + 0xDC00) )
             else:
                 self.chars.append( unichr(char_number) )
     else:
index 0553f255fc1038375c46db1053746a690eac320f..cdb4c8a845562988ed90cbe58608b5a8d8e00f03 100644 (file)
@@ -1,5 +1,7 @@
 # -*- coding: utf-8 -*-
 
+import sys
+
 __doc__ = br"""
     >>> sa
     'abc'
@@ -38,6 +40,12 @@ __doc__ = br"""
     12
     >>> len(null)
     1
+    >>> sys.maxunicode >= 65535
+    True
+    >>> sys.maxunicode == 65535 and 1 or len(wide_literal) # test for wide build
+    1
+    >>> sys.maxunicode > 65535 and 2 or len(wide_literal)  # test for narrow build
+    2
 """.decode("ASCII") + u"""
     >>> ua == u'abc'
     True
@@ -59,9 +67,12 @@ __doc__ = br"""
     True
     >>> null == u'\\x00' # unescaped by Python (required by doctest)
     True
+    >>> wide_literal == u'\U00101234'    # unescaped by Cython
+    True
+    >>> wide_literal == u'\\U00101234'   # unescaped by Python
+    True
 """
 
-import sys
 if sys.version_info[0] >= 3:
     __doc__ = __doc__.replace(u" u'", u" '")
 else:
@@ -78,3 +89,5 @@ f = u'\xf8'
 
 add = u'Søk ik' + u'üÖä' + u'abc'
 null = u'\x00'
+
+wide_literal = u'\U00101234'