From 015b5ef0dfd868441f8145f0221f1e8c76496a42 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sat, 3 Jul 2010 17:23:36 +0200 Subject: [PATCH] fix parsing of wide unicode escapes on narrow Unicode platforms --- Cython/Compiler/StringEncoding.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/Cython/Compiler/StringEncoding.py b/Cython/Compiler/StringEncoding.py index 059ce369..637bca1c 100644 --- a/Cython/Compiler/StringEncoding.py +++ b/Cython/Compiler/StringEncoding.py @@ -30,8 +30,19 @@ class UnicodeLiteralBuilder(object): assert isinstance(characters, _unicode), str(type(characters)) self.chars.append(characters) - def append_charval(self, char_number): - self.chars.append( unichr(char_number) ) + if sys.maxunicode == 65535: + def append_charval(self, char_number): + if char_number > 65535: + # wide Unicode character on narrow platform => replace + # by surrogate pair + char_number -= 0x10000 + self.chars.append( unichr((char_number % 1024) + 0xDC00) ) + self.chars.append( unichr((char_number // 1024) + 0xD800) ) + else: + self.chars.append( unichr(char_number) ) + else: + def append_charval(self, char_number): + self.chars.append( unichr(char_number) ) def getstring(self): return EncodedString(u''.join(self.chars)) -- 2.26.2