From a934c41c80c417aa6dcb9c5dd00c4a315843ba0d Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 25 Apr 2008 14:02:03 +0200 Subject: [PATCH] fixed source filename and Cython code embedding in C files, moved source file reading algorithm to Cython.Utils to make it available everywhere (and then use it everywhere) --- Cython/Compiler/Code.py | 22 ++++++++------- Cython/Compiler/Main.py | 32 +++++++++------------- Cython/Compiler/Nodes.py | 55 ++++++++++++++++++++++---------------- Cython/Compiler/Parsing.py | 6 ++--- Cython/Utils.py | 24 ++++++++++++++++- 5 files changed, 82 insertions(+), 57 deletions(-) diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py index c1f51de1..d107248f 100644 --- a/Cython/Compiler/Code.py +++ b/Cython/Compiler/Code.py @@ -2,9 +2,10 @@ # Pyrex - Code output module # +import codecs import Naming import Options -from Cython.Utils import open_new_file +from Cython.Utils import open_new_file, open_source_file from PyrexTypes import py_object_type, typecast from TypeSlots import method_coexist @@ -85,23 +86,24 @@ class CCodeWriter: def indent(self): self.f.write(" " * self.level) + def get_py_version_hex(self, pyversion): + return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4] + def file_contents(self, file): try: return self.input_file_contents[file] except KeyError: - F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/') - for line in open(file).readlines()] + F = [line.encode('ASCII', 'replace').replace( + '*/', '*[inserted by cython to avoid comment closer]/') + for line in open_source_file(file)] self.input_file_contents[file] = F return F - def get_py_version_hex(self, pyversion): - return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4] - def mark_pos(self, pos): if pos is None: return - file, line, col = pos - contents = self.file_contents(file) + filename, line, col = pos + contents = self.file_contents(filename) context = '' for i in range(max(0,line-3), min(line+2, len(contents))): @@ -109,8 +111,8 @@ class CCodeWriter: if i+1 == line: # line numbers in pyrex start counting up from 1 s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n' context += " * " + s - - marker = '"%s":%s\n%s' % (file, line, context) + + marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context) if self.last_marker != marker: self.marker = marker diff --git a/Cython/Compiler/Main.py b/Cython/Compiler/Main.py index c49d170e..c1ed7a01 100644 --- a/Cython/Compiler/Main.py +++ b/Cython/Compiler/Main.py @@ -137,28 +137,20 @@ class Context: self.modules[name] = scope return scope - match_file_encoding = re.compile("coding[:=]\s*([-\w.]+)").search - - def detect_file_encoding(self, source_filename): - # PEPs 263 and 3120 - f = codecs.open(source_filename, "rU", encoding="UTF-8") - try: - for line_no, line in enumerate(f): - encoding = self.match_file_encoding(line) - if encoding: - return encoding.group(1) - if line_no == 1: - break - finally: - f.close() - return "UTF-8" - def parse(self, source_filename, type_names, pxd, full_module_name): # Parse the given source file and return a parse tree. - encoding = self.detect_file_encoding(source_filename) - f = codecs.open(source_filename, "rU", encoding=encoding) - s = PyrexScanner(f, source_filename, source_encoding = encoding, - type_names = type_names, context = self) + f = Utils.open_source_file(source_filename, "rU") + + if isinstance(source_filename, unicode): + name = source_filename + else: + filename_encoding = sys.getfilesystemencoding() + if filename_encoding is None: + filename_encoding = getdefaultencoding() + name = source_filename.decode(filename_encoding) + + s = PyrexScanner(f, name, source_encoding = f.encoding, + type_names = type_names, context = self) try: tree = Parsing.p_module(s, pxd, full_module_name) finally: diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index 7b82d540..edc14e98 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -37,7 +37,31 @@ def relative_position(pos): AUTHOR: William Stein """ return (pos[0][absolute_path_length+1:], pos[1]) - + +def embed_position(pos, docstring): + if not Options.embed_pos_in_docstring: + return docstring + pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos) + if docstring is None: + # unicode string + return ExprNodes.EncodedString(pos_line) + + # make sure we can encode the filename in the docstring encoding + # otherwise make the docstring a unicode string + encoding = docstring.encoding + if encoding is not None: + try: + encoded_bytes = pos_line.encode(encoding) + except UnicodeEncodeError: + encoding = None + + if not docstring: + # reuse the string encoding of the original docstring + doc = ExprNodes.EncodedString(pos_line) + else: + doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring) + doc.encoding = encoding + return doc class AttributeAccessor: """Used as the result of the Node.get_children_accessors() generator""" @@ -1357,20 +1381,12 @@ class DefNode(FuncDefNode): Naming.pyfunc_prefix + prefix + name entry.pymethdef_cname = \ Naming.pymethdef_prefix + prefix + name - if not Options.docstrings: - entry.doc = None - else: - if Options.embed_pos_in_docstring: - doc = u'File: %s (starting at line %s)'%relative_position(self.pos) - if not self.doc is None: - doc = doc + u'\\n' + self.doc - doc = ExprNodes.EncodedString(doc) - doc.encoding = self.doc.encoding - entry.doc = doc - else: - entry.doc = self.doc + if Options.docstrings: + entry.doc = embed_position(self.pos, self.doc) entry.doc_cname = \ Naming.funcdoc_prefix + prefix + name + else: + entry.doc = None def declare_arguments(self, env): for arg in self.args: @@ -1922,10 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode): import ExprNodes self.dict = ExprNodes.DictNode(pos, key_value_pairs = []) if self.doc and Options.docstrings: - if Options.embed_pos_in_docstring: - doc = u'File: %s (starting at line %s)'%relative_position(self.pos) - doc = ExprNodes.EncodedString(doc + 'u\\n' + self.doc) - doc.encoding = self.doc.encoding + doc = embed_position(self.pos, self.doc) doc_node = ExprNodes.StringNode(pos, value = doc) else: doc_node = None @@ -2036,13 +2049,9 @@ class CClassDefNode(StatNode, BlockNode): typedef_flag = self.typedef_flag, api = self.api) scope = self.entry.type.scope - + if self.doc and Options.docstrings: - if Options.embed_pos_in_docstring: - scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos) - scope.doc = scope.doc + '\\n' + self.doc - else: - scope.doc = self.doc + scope.doc = embed_position(self.pos, self.doc) if has_body: self.body.analyse_declarations(scope) diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index d89d381d..e18bac0a 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -10,6 +10,7 @@ import Nodes import ExprNodes from ModuleNode import ModuleNode from Errors import error, InternalError +from Cython import Utils def p_ident(s, message = "Expected an identifier"): if s.sy == 'IDENT': @@ -1178,9 +1179,8 @@ def p_include_statement(s, level): if s.compile_time_eval: include_file_path = s.context.find_include_file(include_file_name, pos) if include_file_path: - encoding = s.context.detect_file_encoding(include_file_path) - f = codecs.open(include_file_path, "rU", encoding=encoding) - s2 = PyrexScanner(f, include_file_path, s, source_encoding=encoding) + f = Utils.open_source_file(include_file_path, mode="rU") + s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding) try: tree = p_statement_list(s2, level) finally: diff --git a/Cython/Utils.py b/Cython/Utils.py index 1b4b07d3..20ea31e6 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -3,7 +3,7 @@ # anywhere else in particular # -import os, sys +import os, sys, re, codecs def replace_suffix(path, newsuf): base, _ = os.path.splitext(path) @@ -32,3 +32,25 @@ def castrate_file(path, st): f.close() if st: os.utime(path, (st.st_atime, st.st_mtime)) + +# support for source file encoding detection and unicode decoding + +_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search + +def detect_file_encoding(source_filename): + # PEPs 263 and 3120 + f = codecs.open(source_filename, "rU", encoding="UTF-8") + try: + for line_no, line in enumerate(f): + encoding = _match_file_encoding(line) + if encoding: + return encoding.group(1) + if line_no == 1: + break + finally: + f.close() + return "UTF-8" + +def open_source_file(source_filename, mode="rU"): + encoding = detect_file_encoding(source_filename) + return codecs.open(source_filename, mode=mode, encoding=encoding) -- 2.26.2