From: Stefan Behnel Date: Sat, 17 Oct 2009 20:52:16 +0000 (+0200) Subject: avoid redundant recoding during code comment injection by configuring input codec... X-Git-Tag: 0.13.beta0~2^2~121^2~21 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=67ee1fffed18b2bbe94f3ec1cde243e8dcd28a34;p=cython.git avoid redundant recoding during code comment injection by configuring input codec directly --- diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index cc316579..23f5cf60 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -747,8 +747,9 @@ class GlobalState(object): u'*/', u'*[inserted by cython to avoid comment closer]/' ).replace( u'/*', u'/[inserted by cython to avoid comment start]*' - ).encode('ASCII', 'replace').decode('ASCII') - for line in source_desc.get_lines()] + ) + for line in source_desc.get_lines(encoding='ASCII', + error_handling='replace')] if len(F) == 0: F.append(u'') self.input_file_contents[source_desc] = F return F diff --git a/Cython/Compiler/Scanning.py b/Cython/Compiler/Scanning.py index 456b3dce..4e23bafd 100644 --- a/Cython/Compiler/Scanning.py +++ b/Cython/Compiler/Scanning.py @@ -9,6 +9,7 @@ import os import platform import stat import sys +import codecs from time import time import cython @@ -279,8 +280,12 @@ class FileSourceDescriptor(SourceDescriptor): self.filename = filename self._cmp_name = filename - def get_lines(self): - return Utils.open_source_file(self.filename) + def get_lines(self, encoding=None, error_handling=None): + if not encoding: + return Utils.open_source_file(self.filename) + else: + return codecs.open(self.filename, "rU", encoding=encoding, + errors=error_handling) def get_description(self): return self.filename @@ -307,9 +312,13 @@ class StringSourceDescriptor(SourceDescriptor): self.codelines = [x + "\n" for x in code.split("\n")] self._cmp_name = name - def get_lines(self): - return self.codelines - + def get_lines(self, encoding=None, error_handling=None): + if not encoding: + return self.codelines + else: + return [ line.encode(encoding, error_handling).decode(encoding) + for line in self.codelines ] + def get_description(self): return self.name