# Pyrex - Code output module
#
+import codecs
import Naming
import Options
-from Cython.Utils import open_new_file
+from Cython.Utils import open_new_file, open_source_file
from PyrexTypes import py_object_type, typecast
from TypeSlots import method_coexist
def indent(self):
self.f.write(" " * self.level)
+ def get_py_version_hex(self, pyversion):
+ return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
+
def file_contents(self, file):
try:
return self.input_file_contents[file]
except KeyError:
- F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/')
- for line in open(file).readlines()]
+ F = [line.encode('ASCII', 'replace').replace(
+ '*/', '*[inserted by cython to avoid comment closer]/')
+ for line in open_source_file(file)]
self.input_file_contents[file] = F
return F
- def get_py_version_hex(self, pyversion):
- return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
-
def mark_pos(self, pos):
if pos is None:
return
- file, line, col = pos
- contents = self.file_contents(file)
+ filename, line, col = pos
+ contents = self.file_contents(filename)
context = ''
for i in range(max(0,line-3), min(line+2, len(contents))):
if i+1 == line: # line numbers in pyrex start counting up from 1
s = s.rstrip() + ' # <<<<<<<<<<<<<< ' + '\n'
context += " * " + s
-
- marker = '"%s":%s\n%s' % (file, line, context)
+
+ marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context)
if self.last_marker != marker:
self.marker = marker
self.modules[name] = scope
return scope
- match_file_encoding = re.compile("coding[:=]\s*([-\w.]+)").search
-
- def detect_file_encoding(self, source_filename):
- # PEPs 263 and 3120
- f = codecs.open(source_filename, "rU", encoding="UTF-8")
- try:
- for line_no, line in enumerate(f):
- encoding = self.match_file_encoding(line)
- if encoding:
- return encoding.group(1)
- if line_no == 1:
- break
- finally:
- f.close()
- return "UTF-8"
-
def parse(self, source_filename, type_names, pxd, full_module_name):
# Parse the given source file and return a parse tree.
- encoding = self.detect_file_encoding(source_filename)
- f = codecs.open(source_filename, "rU", encoding=encoding)
- s = PyrexScanner(f, source_filename, source_encoding = encoding,
- type_names = type_names, context = self)
+ f = Utils.open_source_file(source_filename, "rU")
+
+ if isinstance(source_filename, unicode):
+ name = source_filename
+ else:
+ filename_encoding = sys.getfilesystemencoding()
+ if filename_encoding is None:
+ filename_encoding = getdefaultencoding()
+ name = source_filename.decode(filename_encoding)
+
+ s = PyrexScanner(f, name, source_encoding = f.encoding,
+ type_names = type_names, context = self)
try:
tree = Parsing.p_module(s, pxd, full_module_name)
finally:
AUTHOR: William Stein
"""
return (pos[0][absolute_path_length+1:], pos[1])
-
+
+def embed_position(pos, docstring):
+ if not Options.embed_pos_in_docstring:
+ return docstring
+ pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos)
+ if docstring is None:
+ # unicode string
+ return ExprNodes.EncodedString(pos_line)
+
+ # make sure we can encode the filename in the docstring encoding
+ # otherwise make the docstring a unicode string
+ encoding = docstring.encoding
+ if encoding is not None:
+ try:
+ encoded_bytes = pos_line.encode(encoding)
+ except UnicodeEncodeError:
+ encoding = None
+
+ if not docstring:
+ # reuse the string encoding of the original docstring
+ doc = ExprNodes.EncodedString(pos_line)
+ else:
+ doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring)
+ doc.encoding = encoding
+ return doc
class AttributeAccessor:
"""Used as the result of the Node.get_children_accessors() generator"""
Naming.pyfunc_prefix + prefix + name
entry.pymethdef_cname = \
Naming.pymethdef_prefix + prefix + name
- if not Options.docstrings:
- entry.doc = None
- else:
- if Options.embed_pos_in_docstring:
- doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
- if not self.doc is None:
- doc = doc + u'\\n' + self.doc
- doc = ExprNodes.EncodedString(doc)
- doc.encoding = self.doc.encoding
- entry.doc = doc
- else:
- entry.doc = self.doc
+ if Options.docstrings:
+ entry.doc = embed_position(self.pos, self.doc)
entry.doc_cname = \
Naming.funcdoc_prefix + prefix + name
+ else:
+ entry.doc = None
def declare_arguments(self, env):
for arg in self.args:
import ExprNodes
self.dict = ExprNodes.DictNode(pos, key_value_pairs = [])
if self.doc and Options.docstrings:
- if Options.embed_pos_in_docstring:
- doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
- doc = ExprNodes.EncodedString(doc + 'u\\n' + self.doc)
- doc.encoding = self.doc.encoding
+ doc = embed_position(self.pos, self.doc)
doc_node = ExprNodes.StringNode(pos, value = doc)
else:
doc_node = None
typedef_flag = self.typedef_flag,
api = self.api)
scope = self.entry.type.scope
-
+
if self.doc and Options.docstrings:
- if Options.embed_pos_in_docstring:
- scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
- scope.doc = scope.doc + '\\n' + self.doc
- else:
- scope.doc = self.doc
+ scope.doc = embed_position(self.pos, self.doc)
if has_body:
self.body.analyse_declarations(scope)
import ExprNodes
from ModuleNode import ModuleNode
from Errors import error, InternalError
+from Cython import Utils
def p_ident(s, message = "Expected an identifier"):
if s.sy == 'IDENT':
if s.compile_time_eval:
include_file_path = s.context.find_include_file(include_file_name, pos)
if include_file_path:
- encoding = s.context.detect_file_encoding(include_file_path)
- f = codecs.open(include_file_path, "rU", encoding=encoding)
- s2 = PyrexScanner(f, include_file_path, s, source_encoding=encoding)
+ f = Utils.open_source_file(include_file_path, mode="rU")
+ s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding)
try:
tree = p_statement_list(s2, level)
finally:
# anywhere else in particular
#
-import os, sys
+import os, sys, re, codecs
def replace_suffix(path, newsuf):
base, _ = os.path.splitext(path)
f.close()
if st:
os.utime(path, (st.st_atime, st.st_mtime))
+
+# support for source file encoding detection and unicode decoding
+
+_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
+
+def detect_file_encoding(source_filename):
+ # PEPs 263 and 3120
+ f = codecs.open(source_filename, "rU", encoding="UTF-8")
+ try:
+ for line_no, line in enumerate(f):
+ encoding = _match_file_encoding(line)
+ if encoding:
+ return encoding.group(1)
+ if line_no == 1:
+ break
+ finally:
+ f.close()
+ return "UTF-8"
+
+def open_source_file(source_filename, mode="rU"):
+ encoding = detect_file_encoding(source_filename)
+ return codecs.open(source_filename, mode=mode, encoding=encoding)