fixed source filename and Cython code embedding in C files, moved source file reading...

author Stefan Behnel <scoder@users.berlios.de>

Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)

committer Stefan Behnel <scoder@users.berlios.de>

Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)
author Stefan Behnel <scoder@users.berlios.de>
Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)
committer Stefan Behnel <scoder@users.berlios.de>
Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)
diff --git a/Cython/Compiler/Code.py b/Cython/Compiler/Code.py

index c1f51de1036d1bf5f043cde8174bca9085c64fc0..d107248f6631e012dc80ffb4848c4bdc160b8fd2 100644 (file)
--- a/Cython/Compiler/Code.py
+++ b/Cython/Compiler/Code.py
@@ -2,9 +2,10 @@
  #   Pyrex - Code output module
  #
  
+import codecs
  import Naming
  import Options
-from Cython.Utils import open_new_file
+from Cython.Utils import open_new_file, open_source_file
  from PyrexTypes import py_object_type, typecast
  from TypeSlots import method_coexist
  
@@ -85,23 +86,24 @@ class CCodeWriter:
      def indent(self):
          self.f.write("  " * self.level)
  
+    def get_py_version_hex(self, pyversion):
+        return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
+
      def file_contents(self, file):
          try:
              return self.input_file_contents[file]
          except KeyError:
-            F = [line.replace('*/', '*[inserted by cython to avoid comment closer]/')
-                 for line in open(file).readlines()]
+            F = [line.encode('ASCII', 'replace').replace(
+                    '*/', '*[inserted by cython to avoid comment closer]/')
+                 for line in open_source_file(file)]
              self.input_file_contents[file] = F
              return F
  
-    def get_py_version_hex(self, pyversion):
-        return "0x%02X%02X%02X%02X" % (tuple(pyversion) + (0,0,0,0))[:4]
-
      def mark_pos(self, pos):
          if pos is None:
              return
-        file, line, col = pos
-        contents = self.file_contents(file)
+        filename, line, col = pos
+        contents = self.file_contents(filename)
  
          context = ''
          for i in range(max(0,line-3), min(line+2, len(contents))):
@@ -109,8 +111,8 @@ class CCodeWriter:
              if i+1 == line:   # line numbers in pyrex start counting up from 1
                  s = s.rstrip() + '             # <<<<<<<<<<<<<< ' + '\n'
              context += " * " + s
-        
-        marker = '"%s":%s\n%s' % (file, line, context)
+
+        marker = '"%s":%d\n%s' % (filename.encode('ASCII', 'replace'), line, context)
          if self.last_marker != marker:
              self.marker = marker
  
diff --git a/Cython/Compiler/Main.py b/Cython/Compiler/Main.py

index c49d170e4cdccaab0a1ba716a372e38c33773f88..c1ed7a01854fd4c2a0230589175e23165c7f5bbc 100644 (file)
--- a/Cython/Compiler/Main.py
+++ b/Cython/Compiler/Main.py
@@ -137,28 +137,20 @@ class Context:
              self.modules[name] = scope
          return scope
  
-    match_file_encoding = re.compile("coding[:=]\s*([-\w.]+)").search
-
-    def detect_file_encoding(self, source_filename):
-        # PEPs 263 and 3120
-        f = codecs.open(source_filename, "rU", encoding="UTF-8")
-        try:
-            for line_no, line in enumerate(f):
-                encoding = self.match_file_encoding(line)
-                if encoding:
-                    return encoding.group(1)
-                if line_no == 1:
-                    break
-        finally:
-            f.close()
-        return "UTF-8"
-
      def parse(self, source_filename, type_names, pxd, full_module_name):
          # Parse the given source file and return a parse tree.
-        encoding = self.detect_file_encoding(source_filename)
-        f = codecs.open(source_filename, "rU", encoding=encoding)
-        s = PyrexScanner(f, source_filename, source_encoding = encoding,
-            type_names = type_names, context = self)
+        f = Utils.open_source_file(source_filename, "rU")
+
+        if isinstance(source_filename, unicode):
+            name = source_filename
+        else:
+            filename_encoding = sys.getfilesystemencoding()
+            if filename_encoding is None:
+                filename_encoding = getdefaultencoding()
+            name = source_filename.decode(filename_encoding)
+
+        s = PyrexScanner(f, name, source_encoding = f.encoding,
+                         type_names = type_names, context = self)
          try:
              tree = Parsing.p_module(s, pxd, full_module_name)
          finally:
diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py

index 7b82d540cb2ff56952500cf7015a71af47b77fbd..edc14e98f55b32c9470618a136b977843f1fcddf 100644 (file)
--- a/Cython/Compiler/Nodes.py
+++ b/Cython/Compiler/Nodes.py
@@ -37,7 +37,31 @@ def relative_position(pos):
      AUTHOR: William Stein
      """
      return (pos[0][absolute_path_length+1:], pos[1])
-        
+
+def embed_position(pos, docstring):
+    if not Options.embed_pos_in_docstring:
+        return docstring
+    pos_line = u'File: %s (starting at line %s)' % relative_position(self.pos)
+    if docstring is None:
+        # unicode string
+        return ExprNodes.EncodedString(pos_line)
+
+    # make sure we can encode the filename in the docstring encoding
+    # otherwise make the docstring a unicode string
+    encoding = docstring.encoding
+    if encoding is not None:
+        try:
+            encoded_bytes = pos_line.encode(encoding)
+        except UnicodeEncodeError:
+            encoding = None
+
+    if not docstring:
+        # reuse the string encoding of the original docstring
+        doc = ExprNodes.EncodedString(pos_line)
+    else:
+        doc = ExprNodes.EncodedString(pos_line + u'\\n' + docstring)
+    doc.encoding = encoding
+    return doc
  
  class AttributeAccessor:
      """Used as the result of the Node.get_children_accessors() generator"""
@@ -1357,20 +1381,12 @@ class DefNode(FuncDefNode):
              Naming.pyfunc_prefix + prefix + name
          entry.pymethdef_cname = \
              Naming.pymethdef_prefix + prefix + name
-        if not Options.docstrings:
-            entry.doc = None
-        else:
-            if Options.embed_pos_in_docstring:
-                doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
-                if not self.doc is None:
-                    doc = doc + u'\\n' + self.doc
-                doc = ExprNodes.EncodedString(doc)
-                doc.encoding = self.doc.encoding
-                entry.doc = doc
-            else:
-                entry.doc = self.doc
+        if Options.docstrings:
+            entry.doc = embed_position(self.pos, self.doc)
              entry.doc_cname = \
                  Naming.funcdoc_prefix + prefix + name
+        else:
+            entry.doc = None
  
      def declare_arguments(self, env):
          for arg in self.args:
@@ -1922,10 +1938,7 @@ class PyClassDefNode(StatNode, BlockNode):
          import ExprNodes
          self.dict = ExprNodes.DictNode(pos, key_value_pairs = [])
          if self.doc and Options.docstrings:
-            if Options.embed_pos_in_docstring:
-                doc = u'File: %s (starting at line %s)'%relative_position(self.pos)
-                doc = ExprNodes.EncodedString(doc + 'u\\n' + self.doc)
-                doc.encoding = self.doc.encoding
+            doc = embed_position(self.pos, self.doc)
              doc_node = ExprNodes.StringNode(pos, value = doc)
          else:
              doc_node = None
@@ -2036,13 +2049,9 @@ class CClassDefNode(StatNode, BlockNode):
              typedef_flag = self.typedef_flag,
              api = self.api)
          scope = self.entry.type.scope
-        
+
          if self.doc and Options.docstrings:
-            if Options.embed_pos_in_docstring:
-                scope.doc = 'File: %s (starting at line %s)'%relative_position(self.pos)
-                scope.doc = scope.doc + '\\n' + self.doc
-            else:
-                scope.doc = self.doc
+            scope.doc = embed_position(self.pos, self.doc)
  
          if has_body:
              self.body.analyse_declarations(scope)
diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py

index d89d381de1a385fa917597286c05c43f619b09dd..e18bac0a1cce66396422d28697c4274f3151fffb 100644 (file)
--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -10,6 +10,7 @@ import Nodes
  import ExprNodes
  from ModuleNode import ModuleNode
  from Errors import error, InternalError
+from Cython import Utils
  
  def p_ident(s, message = "Expected an identifier"):
      if s.sy == 'IDENT':
@@ -1178,9 +1179,8 @@ def p_include_statement(s, level):
      if s.compile_time_eval:
          include_file_path = s.context.find_include_file(include_file_name, pos)
          if include_file_path:
-            encoding = s.context.detect_file_encoding(include_file_path)
-            f = codecs.open(include_file_path, "rU", encoding=encoding)
-            s2 = PyrexScanner(f, include_file_path, s, source_encoding=encoding)
+            f = Utils.open_source_file(include_file_path, mode="rU")
+            s2 = PyrexScanner(f, include_file_path, s, source_encoding=f.encoding)
              try:
                  tree = p_statement_list(s2, level)
              finally:
diff --git a/Cython/Utils.py b/Cython/Utils.py

index 1b4b07d3ae04b8e6e6824a54d4df2b2778be063b..20ea31e64f29385b027fa240e3e862b486c1d3ff 100644 (file)
--- a/Cython/Utils.py
+++ b/Cython/Utils.py
@@ -3,7 +3,7 @@
  #            anywhere else in particular
  #
  
-import os, sys
+import os, sys, re, codecs
  
  def replace_suffix(path, newsuf):
      base, _ = os.path.splitext(path)
@@ -32,3 +32,25 @@ def castrate_file(path, st):
          f.close()
          if st:
              os.utime(path, (st.st_atime, st.st_mtime))
+
+# support for source file encoding detection and unicode decoding
+
+_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
+
+def detect_file_encoding(source_filename):
+    # PEPs 263 and 3120
+    f = codecs.open(source_filename, "rU", encoding="UTF-8")
+    try:
+        for line_no, line in enumerate(f):
+            encoding = _match_file_encoding(line)
+            if encoding:
+                return encoding.group(1)
+            if line_no == 1:
+                break
+    finally:
+        f.close()
+    return "UTF-8"
+
+def open_source_file(source_filename, mode="rU"):
+    encoding = detect_file_encoding(source_filename)
+    return codecs.open(source_filename, mode=mode, encoding=encoding)
author	Stefan Behnel <scoder@users.berlios.de>
	Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)
committer	Stefan Behnel <scoder@users.berlios.de>
	Fri, 25 Apr 2008 12:02:03 +0000 (14:02 +0200)
Cython/Compiler/Code.py		patch \| blob \| history
Cython/Compiler/Main.py		patch \| blob \| history
Cython/Compiler/Nodes.py		patch \| blob \| history
Cython/Compiler/Parsing.py		patch \| blob \| history
Cython/Utils.py		patch \| blob \| history