avoid redundant recoding during code comment injection by configuring input codec...
authorStefan Behnel <scoder@users.berlios.de>
Sat, 17 Oct 2009 20:52:16 +0000 (22:52 +0200)
committerStefan Behnel <scoder@users.berlios.de>
Sat, 17 Oct 2009 20:52:16 +0000 (22:52 +0200)
Cython/Compiler/Code.py
Cython/Compiler/Scanning.py

index cc3165792487ac768e87e8c60bfe0ee0e39d343e..23f5cf60c03db9992a011f55e84841cbf64fe831 100644 (file)
@@ -747,8 +747,9 @@ class GlobalState(object):
                     u'*/', u'*[inserted by cython to avoid comment closer]/'
                     ).replace(
                     u'/*', u'/[inserted by cython to avoid comment start]*'
-                    ).encode('ASCII', 'replace').decode('ASCII')
-                 for line in source_desc.get_lines()]
+                    )
+                 for line in source_desc.get_lines(encoding='ASCII',
+                                                   error_handling='replace')]
             if len(F) == 0: F.append(u'')
             self.input_file_contents[source_desc] = F
             return F
index 456b3dce4ef52d415c548bd877a306ae1950a9b5..4e23bafd1773b4129b5720a27925746da0dfcd44 100644 (file)
@@ -9,6 +9,7 @@ import os
 import platform
 import stat
 import sys
+import codecs
 from time import time
 
 import cython
@@ -279,8 +280,12 @@ class FileSourceDescriptor(SourceDescriptor):
         self.filename = filename
         self._cmp_name = filename
     
-    def get_lines(self):
-        return Utils.open_source_file(self.filename)
+    def get_lines(self, encoding=None, error_handling=None):
+        if not encoding:
+            return Utils.open_source_file(self.filename)
+        else:
+            return codecs.open(self.filename, "rU", encoding=encoding,
+                               errors=error_handling)
     
     def get_description(self):
         return self.filename
@@ -307,9 +312,13 @@ class StringSourceDescriptor(SourceDescriptor):
         self.codelines = [x + "\n" for x in code.split("\n")]
         self._cmp_name = name
     
-    def get_lines(self):
-        return self.codelines
-    
+    def get_lines(self, encoding=None, error_handling=None):
+        if not encoding:
+            return self.codelines
+        else:
+            return [ line.encode(encoding, error_handling).decode(encoding)
+                     for line in self.codelines ]
+
     def get_description(self):
         return self.name