fix parsing of wide unicode escapes on narrow Unicode platforms
authorStefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 15:23:36 +0000 (17:23 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Sat, 3 Jul 2010 15:23:36 +0000 (17:23 +0200)
Cython/Compiler/StringEncoding.py

index 059ce3693c95d97f4e12ab2bfbf74224846225f4..637bca1caa8f96c2591b4a2babae025d3fc86fa9 100644 (file)
@@ -30,8 +30,19 @@ class UnicodeLiteralBuilder(object):
         assert isinstance(characters, _unicode), str(type(characters))
         self.chars.append(characters)
 
-    def append_charval(self, char_number):
-        self.chars.append( unichr(char_number) )
+    if sys.maxunicode == 65535:
+        def append_charval(self, char_number):
+            if char_number > 65535:
+                # wide Unicode character on narrow platform => replace
+                # by surrogate pair
+                char_number -= 0x10000
+                self.chars.append( unichr((char_number  % 1024) + 0xDC00) )
+                self.chars.append( unichr((char_number // 1024) + 0xD800) )
+            else:
+                self.chars.append( unichr(char_number) )
+    else:
+        def append_charval(self, char_number):
+            self.chars.append( unichr(char_number) )
 
     def getstring(self):
         return EncodedString(u''.join(self.chars))