Cython/Utils.py

   1 #
   2 #   Cython -- Things that don't belong
   3 #            anywhere else in particular
   4 #
   5
   6 import os, sys, re, codecs
   7
   8 def replace_suffix(path, newsuf):
   9     base, _ = os.path.splitext(path)
  10     return base + newsuf
  11
  12 def open_new_file(path):
  13     if os.path.exists(path):
  14         # Make sure to create a new file here so we can
  15         # safely hard link the output files.
  16         os.unlink(path)
  17
  18     # we use the ISO-8859-1 encoding here because we only write pure
  19     # ASCII strings or (e.g. for file names) byte encoded strings as
  20     # Unicode, so we need a direct mapping from the first 256 Unicode
  21     # characters to a byte sequence, which ISO-8859-1 provides
  22     return codecs.open(path, "w", encoding="ISO-8859-1")
  23
  24 def castrate_file(path, st):
  25     #  Remove junk contents from an output file after a
  26     #  failed compilation.
  27     #  Also sets access and modification times back to
  28     #  those specified by st (a stat struct).
  29     try:
  30         f = open_new_file(path)
  31     except EnvironmentError:
  32         pass
  33     else:
  34         f.write(
  35             "#error Do not use this file, it is the result of a failed Cython compilation.\n")
  36         f.close()
  37         if st:
  38             os.utime(path, (st.st_atime, st.st_mtime-1))
  39
  40 def modification_time(path):
  41     st = os.stat(path)
  42     return st.st_mtime
  43
  44 def file_newer_than(path, time):
  45     ftime = modification_time(path)
  46     return ftime > time
  47
  48 def path_exists(path):
  49     # try on the filesystem first
  50     if os.path.exists(path):
  51         return True
  52     # figure out if a PEP 302 loader is around
  53     try:
  54         loader = __loader__
  55         # XXX the code below assumes as 'zipimport.zipimporter' instance
  56         # XXX should be easy to generalize, but too lazy right now to write it
  57         if path.startswith(loader.archive):
  58             nrmpath = os.path.normpath(path)
  59             arcname = nrmpath[len(loader.archive)+1:]
  60             try:
  61                 loader.get_data(arcname)
  62                 return True
  63             except IOError:
  64                 return False
  65     except NameError:
  66         pass
  67     return False
  68
  69 # file name encodings
  70
  71 def decode_filename(filename):
  72     if isinstance(filename, unicode):
  73         return filename
  74     try:
  75         filename_encoding = sys.getfilesystemencoding()
  76         if filename_encoding is None:
  77             filename_encoding = sys.getdefaultencoding()
  78         filename = filename.decode(filename_encoding)
  79     except UnicodeDecodeError:
  80         pass
  81     return filename
  82
  83 # support for source file encoding detection
  84
  85 _match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
  86
  87 def detect_file_encoding(source_filename):
  88     # PEPs 263 and 3120
  89     f = open_source_file(source_filename, encoding="UTF-8", error_handling='ignore')
  90     try:
  91         chars = []
  92         for i in range(2):
  93             c = f.read(1)
  94             while c and c != u'\n':
  95                 chars.append(c)
  96                 c = f.read(1)
  97             encoding = _match_file_encoding(u''.join(chars))
  98             if encoding:
  99                 return encoding.group(1)
 100     finally:
 101         f.close()
 102     return "UTF-8"
 103
 104 normalise_newlines = re.compile(u'\r\n?|\n').sub
 105
 106 class NormalisedNewlineStream(object):
 107   """The codecs module doesn't provide universal newline support.
 108   This class is used as a stream wrapper that provides this
 109   functionality.  The new 'io' in Py2.6+/3.x supports this out of the
 110   box.
 111   """
 112   def __init__(self, stream):
 113     # let's assume .read() doesn't change
 114     self._read = stream.read
 115     self.close = stream.close
 116     self.encoding = getattr(stream, 'encoding', 'UTF-8')
 117
 118   def read(self, count=-1):
 119     data = self._read(count)
 120     if u'\r' not in data:
 121       return data
 122     if data.endswith(u'\r'):
 123       # may be missing a '\n'
 124       data += self._read(1)
 125     return normalise_newlines(u'\n', data)
 126
 127   def readlines(self):
 128     content = []
 129     data = self.read(0x1000)
 130     while data:
 131         content.append(data)
 132         data = self.read(0x1000)
 133     return u''.join(content).split(u'\n')
 134
 135 io = None
 136 if sys.version_info >= (2,6):
 137     try:
 138         import io
 139     except ImportError:
 140         pass
 141
 142 def open_source_file(source_filename, mode="r",
 143                      encoding=None, error_handling=None,
 144                      require_normalised_newlines=True):
 145     if encoding is None:
 146         encoding = detect_file_encoding(source_filename)
 147     #
 148     try:
 149         loader = __loader__
 150         if source_filename.startswith(loader.archive):
 151             return open_source_from_loader(
 152                 loader, source_filename,
 153                 encoding, error_handling,
 154                 require_normalised_newlines)
 155     except (NameError, AttributeError):
 156         pass
 157     #
 158     if io is not None:
 159         return io.open(source_filename, mode=mode,
 160                        encoding=encoding, errors=error_handling)
 161     else:
 162         # codecs module doesn't have universal newline support
 163         stream = codecs.open(source_filename, mode=mode,
 164                              encoding=encoding, errors=error_handling)
 165         if require_normalised_newlines:
 166             stream = NormalisedNewlineStream(stream)
 167         return stream
 168
 169 def open_source_from_loader(loader,
 170                             source_filename,
 171                             encoding=None, error_handling=None,
 172                             require_normalised_newlines=True):
 173     nrmpath = os.path.normpath(source_filename)
 174     arcname = nrmpath[len(loader.archive)+1:]
 175     data = loader.get_data(arcname)
 176     if io is not None:
 177         return io.TextIOWrapper(io.BytesIO(data),
 178                                 encoding=encoding,
 179                                 errors=error_handling)
 180     else:
 181         try:
 182             import cStringIO as StringIO
 183         except ImportError:
 184             import StringIO
 185         reader = codecs.getreader(encoding)
 186         stream = reader(StringIO.StringIO(data))
 187         if require_normalised_newlines:
 188             stream = NormalisedNewlineStream(stream)
 189         return stream
 190
 191 def str_to_number(value):
 192     # note: this expects a string as input that was accepted by the
 193     # parser already
 194     if len(value) < 2:
 195         value = int(value, 0)
 196     elif value[0] == '0':
 197         if value[1] in 'xX':
 198             # hex notation ('0x1AF')
 199             value = int(value[2:], 16)
 200         elif value[1] in 'oO':
 201             # Py3 octal notation ('0o136')
 202             value = int(value[2:], 8)
 203         elif value[1] in 'bB':
 204             # Py3 binary notation ('0b101')
 205             value = int(value[2:], 2)
 206         else:
 207             # Py2 octal notation ('0136')
 208             value = int(value, 8)
 209     else:
 210         value = int(value, 0)
 211     return value
 212
 213 def long_literal(value):
 214     if isinstance(value, basestring):
 215         value = str_to_number(value)
 216     return not -2**31 <= value < 2**31
 217
 218 def none_or_sub(s, data):
 219     if s is None:
 220         return s
 221     else:
 222         return s % data
 223