fix order of surrogate pair in wide unicode strings

author Stefan Behnel <scoder@users.berlios.de>

Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)

committer Stefan Behnel <scoder@users.berlios.de>

Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
author Stefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
committer Stefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py

index 637bca1caa8f96c2591b4a2babae025d3fc86fa9..09ca20794061ca772e2f96915ae779118ca4b144 100644 (file)
--- a/Cython/Compiler/StringEncoding.py
+++ b/Cython/Compiler/StringEncoding.py
@@ -36,8 +36,8 @@ class UnicodeLiteralBuilder(object):
                  # wide Unicode character on narrow platform => replace
                  # by surrogate pair
                  char_number -= 0x10000
-                self.chars.append( unichr((char_number  % 1024) + 0xDC00) )
                  self.chars.append( unichr((char_number // 1024) + 0xD800) )
+                self.chars.append( unichr((char_number  % 1024) + 0xDC00) )
              else:
                  self.chars.append( unichr(char_number) )
      else:
diff --git a/tests/run/unicodeliterals.pyx b/tests/run/unicodeliterals.pyx

index 0553f255fc1038375c46db1053746a690eac320f..cdb4c8a845562988ed90cbe58608b5a8d8e00f03 100644 (file)
--- a/tests/run/unicodeliterals.pyx
+++ b/tests/run/unicodeliterals.pyx
@@ -1,5 +1,7 @@
  # -*- coding: utf-8 -*-
  
+import sys
+
  __doc__ = br"""
      >>> sa
      'abc'
@@ -38,6 +40,12 @@ __doc__ = br"""
      12
      >>> len(null)
      1
+    >>> sys.maxunicode >= 65535
+    True
+    >>> sys.maxunicode == 65535 and 1 or len(wide_literal) # test for wide build
+    1
+    >>> sys.maxunicode > 65535 and 2 or len(wide_literal)  # test for narrow build
+    2
  """.decode("ASCII") + u"""
      >>> ua == u'abc'
      True
@@ -59,9 +67,12 @@ __doc__ = br"""
      True
      >>> null == u'\\x00' # unescaped by Python (required by doctest)
      True
+    >>> wide_literal == u'\U00101234'    # unescaped by Cython
+    True
+    >>> wide_literal == u'\\U00101234'   # unescaped by Python
+    True
  """
  
-import sys
  if sys.version_info[0] >= 3:
      __doc__ = __doc__.replace(u" u'", u" '")
  else:
@@ -78,3 +89,5 @@ f = u'\xf8'
  
  add = u'Søk ik' + u'üÖä' + u'abc'
  null = u'\x00'
+
+wide_literal = u'\U00101234'
author	Stefan Behnel <scoder@users.berlios.de>
	Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
committer	Stefan Behnel <scoder@users.berlios.de>
	Sat, 3 Jul 2010 16:22:12 +0000 (18:22 +0200)
Cython/Compiler/StringEncoding.py		patch \| blob \| history
tests/run/unicodeliterals.pyx		patch \| blob \| history