From: Stefan Behnel Date: Wed, 30 Apr 2008 21:42:09 +0000 (+0200) Subject: robustness against unicode errors on encoding detection X-Git-Tag: 0.9.6.14~5 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=1a019507522dd0124c5f7d70c7400ada4b79bb24;p=cython.git robustness against unicode errors on encoding detection --- diff --git a/Cython/Compiler/Main.py b/Cython/Compiler/Main.py index 22da73f4..f07f528c 100644 --- a/Cython/Compiler/Main.py +++ b/Cython/Compiler/Main.py @@ -139,25 +139,25 @@ class Context: def parse(self, source_filename, type_names, pxd, full_module_name): # Parse the given source file and return a parse tree. - f = Utils.open_source_file(source_filename, "rU") - - if isinstance(source_filename, unicode): - name = source_filename - else: - filename_encoding = sys.getfilesystemencoding() - if filename_encoding is None: - filename_encoding = sys.getdefaultencoding() - name = source_filename.decode(filename_encoding) - try: + f = Utils.open_source_file(source_filename, "rU") + try: - s = PyrexScanner(f, name, source_encoding = f.encoding, - type_names = type_names, context = self) - tree = Parsing.p_module(s, pxd, full_module_name) - except UnicodeDecodeError, msg: - error((name, 0, 0), "Decoding error, missing or incorrect coding= at top of source (%s)" % msg) - finally: - f.close() + if isinstance(source_filename, unicode): + name = source_filename + else: + filename_encoding = sys.getfilesystemencoding() + if filename_encoding is None: + filename_encoding = sys.getdefaultencoding() + name = source_filename.decode(filename_encoding) + + s = PyrexScanner(f, name, source_encoding = f.encoding, + type_names = type_names, context = self) + tree = Parsing.p_module(s, pxd, full_module_name) + finally: + f.close() + except UnicodeDecodeError, msg: + error((source_filename, 0, 0), "Decoding error, missing or incorrect coding= at top of source (%s)" % msg) if Errors.num_errors > 0: raise CompileError return tree diff --git a/Cython/Utils.py b/Cython/Utils.py index 342f09bf..a6fd0577 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -41,12 +41,15 @@ def detect_file_encoding(source_filename): # PEPs 263 and 3120 f = codecs.open(source_filename, "rU", encoding="UTF-8") try: - for line_no, line in enumerate(f): - encoding = _match_file_encoding(line) + chars = [] + for i in range(2): + c = f.read(1) + while c and c != '\n': + chars.append(c) + c = f.read(1) + encoding = _match_file_encoding(u''.join(chars)) if encoding: return encoding.group(1) - if line_no == 1: - break finally: f.close() return "UTF-8"