merged in latest cython-devel

author Stefan Behnel <scoder@users.berlios.de>

Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)

committer Stefan Behnel <scoder@users.berlios.de>

Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)
author Stefan Behnel <scoder@users.berlios.de>
Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)
committer Stefan Behnel <scoder@users.berlios.de>
Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)
diff --cc Cython/Utils.py

index c3c46aca3224b1ffd9663f9ebfd056d628f75d94,a81d0f502f22cf5e3d7708a06dbd7c7f1a30b8e8..916b907143ff8ba6fba83da6636a5fd80e9663f1
--- 1/Cython/Utils.py
--- 2/Cython/Utils.py
+++ b/Cython/Utils.py
@@@ -78,35 -78,68 +78,83 @@@ def detect_file_encoding(source_filenam
           f.close()
       return "UTF-8"
   
- def open_source_file(source_filename, mode="rU"):
-     encoding = detect_file_encoding(source_filename)
-     return codecs.open(source_filename, mode=mode, encoding=encoding)
+ normalise_newlines = re.compile(u'\r\n?|\n').sub
+ 
+ class NormalisedNewlineStream(object):
+   """The codecs module doesn't provide universal newline support.
+   This class is used as a stream wrapper that provides this
+   functionality.  The new 'io' in Py2.6+/3.1+ supports this out of the
+   box.
+   """
+   def __init__(self, stream):
+     # let's assume .read() doesn't change
+     self._read = stream.read
+     self.close = stream.close
+     self.encoding = getattr(stream, 'encoding', 'UTF-8')
+ 
+   def read(self, count):
+     data = self._read(count)
+     if u'\r' not in data:
+       return data
+     if data.endswith(u'\r'):
+       # may be missing a '\n'
+       data += self._read(1)
+     return normalise_newlines(u'\n', data)
+ 
+   def readlines(self):
+     content = []
+     data = self._read(0x1000)
+     while data:
+         content.append(data)
+         data = self._read(0x1000)
+     return u''.join(content).split(u'\n')
+ 
+ try:
+     from io import open as io_open
+ except ImportError:
+     io_open = None
+ 
+ def open_source_file(source_filename, mode="r",
+                      encoding=None, error_handling=None,
+                      require_normalised_newlines=True):
+     if encoding is None:
+         encoding = detect_file_encoding(source_filename)
+     if io_open is not None:
+         return io_open(source_filename, mode=mode,
+                        encoding=encoding, errors=error_handling)
+     else:
+         # codecs module doesn't have universal newline support
+         stream = codecs.open(source_filename, mode=mode,
+                              encoding=encoding, errors=error_handling)
+         if require_normalised_newlines:
+             stream = NormalisedNewlineStream(stream)
+         return stream
   
- -def long_literal(value):
- -    if isinstance(value, basestring):
- -        if len(value) < 2:
- -            value = int(value)
- -        elif value[0] == 0:
- -            value = int(value, 8)
- -        elif value[1] in 'xX':
+ +def str_to_number(value):
+ +    # note: this expects a string as input that was accepted by the
+ +    # parser already
+ +    if len(value) < 2:
+ +        value = int(value, 0)
+ +    elif value[0] == '0':
+ +        if value[1] in 'xX':
+ +            # hex notation ('0x1AF')
               value = int(value[2:], 16)
+ +        elif value[1] in 'oO':
+ +            # Py3 octal notation ('0o136')
+ +            value = int(value[2:], 8)
+ +        elif value[1] in 'bB':
+ +            # Py3 binary notation ('0b101')
+ +            value = int(value[2:], 2)
           else:
- -            value = int(value)
+ +            # Py2 octal notation ('0136')
+ +            value = int(value, 8)
+ +    else:
+ +        value = int(value, 0)
+ +    return value
+ +
+ +def long_literal(value):
+ +    if isinstance(value, basestring):
+ +        value = str_to_number(value)
       return not -2**31 <= value < 2**31
   
   def none_or_sub(s, data):
author	Stefan Behnel <scoder@users.berlios.de>
	Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)
committer	Stefan Behnel <scoder@users.berlios.de>
	Sat, 20 Mar 2010 21:01:32 +0000 (22:01 +0100)