Special function decorator warning.
[cython.git] / Cython / Utils.py
index 811ad4e95bd0cf211ebfc437213276b8230f90d0..abebe312ebc5926acd854ae2b49e3e988c6a3dbc 100644 (file)
@@ -11,10 +11,15 @@ def replace_suffix(path, newsuf):
 
 def open_new_file(path):
     if os.path.exists(path):
-        # Make sure to create a new file here so we can 
-        # safely hard link the output files. 
+        # Make sure to create a new file here so we can
+        # safely hard link the output files.
         os.unlink(path)
-    return open(path, "w")
+
+    # we use the ISO-8859-1 encoding here because we only write pure
+    # ASCII strings or (e.g. for file names) byte encoded strings as
+    # Unicode, so we need a direct mapping from the first 256 Unicode
+    # characters to a byte sequence, which ISO-8859-1 provides
+    return codecs.open(path, "w", encoding="ISO-8859-1")
 
 def castrate_file(path, st):
     #  Remove junk contents from an output file after a
@@ -40,9 +45,30 @@ def file_newer_than(path, time):
     ftime = modification_time(path)
     return ftime > time
 
-# support for source file encoding detection
+def path_exists(path):
+    # try on the filesystem first
+    if os.path.exists(path):
+        return True
+    # figure out if a PEP 302 loader is around
+    try:
+        loader = __loader__
+        # XXX the code below assumes as 'zipimport.zipimporter' instance
+        # XXX should be easy to generalize, but too lazy right now to write it
+        if path.startswith(loader.archive):
+            nrmpath = os.path.normpath(path)
+            arcname = nrmpath[len(loader.archive)+1:]
+            try:
+                loader.get_data(arcname)
+                return True
+            except IOError:
+                return False
+    except NameError:
+        pass
+    return False
+
+# file name encodings
 
-def encode_filename(filename):
+def decode_filename(filename):
     if isinstance(filename, unicode):
         return filename
     try:
@@ -54,11 +80,13 @@ def encode_filename(filename):
         pass
     return filename
 
+# support for source file encoding detection
+
 _match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
 
 def detect_file_encoding(source_filename):
     # PEPs 263 and 3120
-    f = codecs.open(source_filename, "rU", encoding="UTF-8")
+    f = open_source_file(source_filename, encoding="UTF-8", error_handling='ignore')
     try:
         chars = []
         for i in range(2):
@@ -73,44 +101,123 @@ def detect_file_encoding(source_filename):
         f.close()
     return "UTF-8"
 
-def open_source_file(source_filename, mode="rU"):
-    encoding = detect_file_encoding(source_filename)
-    return codecs.open(source_filename, mode=mode, encoding=encoding)
+normalise_newlines = re.compile(u'\r\n?|\n').sub
+
+class NormalisedNewlineStream(object):
+  """The codecs module doesn't provide universal newline support.
+  This class is used as a stream wrapper that provides this
+  functionality.  The new 'io' in Py2.6+/3.x supports this out of the
+  box.
+  """
+  def __init__(self, stream):
+    # let's assume .read() doesn't change
+    self._read = stream.read
+    self.close = stream.close
+    self.encoding = getattr(stream, 'encoding', 'UTF-8')
+
+  def read(self, count=-1):
+    data = self._read(count)
+    if u'\r' not in data:
+      return data
+    if data.endswith(u'\r'):
+      # may be missing a '\n'
+      data += self._read(1)
+    return normalise_newlines(u'\n', data)
+
+  def readlines(self):
+    content = []
+    data = self.read(0x1000)
+    while data:
+        content.append(data)
+        data = self.read(0x1000)
+    return u''.join(content).split(u'\n')
+
+io = None
+if sys.version_info >= (2,6):
+    try:
+        import io
+    except ImportError:
+        pass
 
-def long_literal(value):
-    if isinstance(value, basestring):
-        if len(value) < 2:
-            value = int(value)
-        elif value[0] == 0:
-            value = int(value, 8)
-        elif value[1] in 'xX':
+def open_source_file(source_filename, mode="r",
+                     encoding=None, error_handling=None,
+                     require_normalised_newlines=True):
+    if encoding is None:
+        encoding = detect_file_encoding(source_filename)
+    #
+    try:
+        loader = __loader__
+        if source_filename.startswith(loader.archive):
+            return open_source_from_loader(
+                loader, source_filename,
+                encoding, error_handling,
+                require_normalised_newlines)
+    except (NameError, AttributeError):
+        pass
+    #
+    if io is not None:
+        return io.open(source_filename, mode=mode,
+                       encoding=encoding, errors=error_handling)
+    else:
+        # codecs module doesn't have universal newline support
+        stream = codecs.open(source_filename, mode=mode,
+                             encoding=encoding, errors=error_handling)
+        if require_normalised_newlines:
+            stream = NormalisedNewlineStream(stream)
+        return stream
+
+def open_source_from_loader(loader,
+                            source_filename,
+                            encoding=None, error_handling=None,
+                            require_normalised_newlines=True):
+    nrmpath = os.path.normpath(source_filename)
+    arcname = nrmpath[len(loader.archive)+1:]
+    data = loader.get_data(arcname)
+    if io is not None:
+        return io.TextIOWrapper(io.BytesIO(data),
+                                encoding=encoding,
+                                errors=error_handling)
+    else:
+        try:
+            import cStringIO as StringIO
+        except ImportError:
+            import StringIO
+        reader = codecs.getreader(encoding)
+        stream = reader(StringIO.StringIO(data))
+        if require_normalised_newlines:
+            stream = NormalisedNewlineStream(stream)
+        return stream
+
+def str_to_number(value):
+    # note: this expects a string as input that was accepted by the
+    # parser already
+    if len(value) < 2:
+        value = int(value, 0)
+    elif value[0] == '0':
+        if value[1] in 'xX':
+            # hex notation ('0x1AF')
             value = int(value[2:], 16)
+        elif value[1] in 'oO':
+            # Py3 octal notation ('0o136')
+            value = int(value[2:], 8)
+        elif value[1] in 'bB':
+            # Py3 binary notation ('0b101')
+            value = int(value[2:], 2)
         else:
-            value = int(value)
+            # Py2 octal notation ('0136')
+            value = int(value, 8)
+    else:
+        value = int(value, 0)
+    return value
+
+def long_literal(value):
+    if isinstance(value, basestring):
+        value = str_to_number(value)
     return not -2**31 <= value < 2**31
 
-# a simple class that simplifies the usage of utility code
-
-class UtilityCode(object):
-    def __init__(self, proto=None, impl=None, init=None, cleanup=None, requires=None):
-        self.proto = proto
-        self.impl = impl
-        self.init = init
-        self.cleanup = cleanup
-        self.requires = requires
-
-    def write_init_code(self, writer, pos):
-        if not self.init:
-            return
-        if callable(self.init):
-            self.init(writer, pos)
-        else:
-            writer.put(self.init)
+def none_or_sub(s, data):
+    if s is None:
+        return s
+    else:
+        return s % data
 
-    def write_cleanup_code(self, writer, pos):
-        if not self.cleanup:
-            return
-        if callable(self.cleanup):
-            self.cleanup(writer, pos)
-        else:
-            writer.put(self.cleanup)