From: Stefan Behnel Date: Sat, 3 Jul 2010 16:22:12 +0000 (+0200) Subject: fix order of surrogate pair in wide unicode strings X-Git-Tag: 0.13.beta0~38 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=4603013caeef083cca7aeeff498441105384e79c;p=cython.git fix order of surrogate pair in wide unicode strings --- diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py index 637bca1c..09ca2079 100644 --- a/Cython/Compiler/StringEncoding.py +++ b/Cython/Compiler/StringEncoding.py @@ -36,8 +36,8 @@ class UnicodeLiteralBuilder(object): # wide Unicode character on narrow platform => replace # by surrogate pair char_number -= 0x10000 - self.chars.append( unichr((char_number % 1024) + 0xDC00) ) self.chars.append( unichr((char_number // 1024) + 0xD800) ) + self.chars.append( unichr((char_number % 1024) + 0xDC00) ) else: self.chars.append( unichr(char_number) ) else: diff --git a/tests/run/unicodeliterals.pyx b/tests/run/unicodeliterals.pyx index 0553f255..cdb4c8a8 100644 --- a/tests/run/unicodeliterals.pyx +++ b/tests/run/unicodeliterals.pyx @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- +import sys + __doc__ = br""" >>> sa 'abc' @@ -38,6 +40,12 @@ __doc__ = br""" 12 >>> len(null) 1 + >>> sys.maxunicode >= 65535 + True + >>> sys.maxunicode == 65535 and 1 or len(wide_literal) # test for wide build + 1 + >>> sys.maxunicode > 65535 and 2 or len(wide_literal) # test for narrow build + 2 """.decode("ASCII") + u""" >>> ua == u'abc' True @@ -59,9 +67,12 @@ __doc__ = br""" True >>> null == u'\\x00' # unescaped by Python (required by doctest) True + >>> wide_literal == u'\U00101234' # unescaped by Cython + True + >>> wide_literal == u'\\U00101234' # unescaped by Python + True """ -import sys if sys.version_info[0] >= 3: __doc__ = __doc__.replace(u" u'", u" '") else: @@ -78,3 +89,5 @@ f = u'\xf8' add = u'Søk ik' + u'üÖä' + u'abc' null = u'\x00' + +wide_literal = u'\U00101234'