From: Stefan Behnel Date: Sat, 20 Mar 2010 21:01:32 +0000 (+0100) Subject: merged in latest cython-devel X-Git-Tag: 0.13.beta0~2^2~92 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=4856376ef48712afd492306dba121d77acd612a4;p=cython.git merged in latest cython-devel --- 4856376ef48712afd492306dba121d77acd612a4 diff --cc Cython/Utils.py index c3c46aca,a81d0f50..916b9071 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@@ -78,35 -78,68 +78,83 @@@ def detect_file_encoding(source_filenam f.close() return "UTF-8" - def open_source_file(source_filename, mode="rU"): - encoding = detect_file_encoding(source_filename) - return codecs.open(source_filename, mode=mode, encoding=encoding) + normalise_newlines = re.compile(u'\r\n?|\n').sub + + class NormalisedNewlineStream(object): + """The codecs module doesn't provide universal newline support. + This class is used as a stream wrapper that provides this + functionality. The new 'io' in Py2.6+/3.1+ supports this out of the + box. + """ + def __init__(self, stream): + # let's assume .read() doesn't change + self._read = stream.read + self.close = stream.close + self.encoding = getattr(stream, 'encoding', 'UTF-8') + + def read(self, count): + data = self._read(count) + if u'\r' not in data: + return data + if data.endswith(u'\r'): + # may be missing a '\n' + data += self._read(1) + return normalise_newlines(u'\n', data) + + def readlines(self): + content = [] + data = self._read(0x1000) + while data: + content.append(data) + data = self._read(0x1000) + return u''.join(content).split(u'\n') + + try: + from io import open as io_open + except ImportError: + io_open = None + + def open_source_file(source_filename, mode="r", + encoding=None, error_handling=None, + require_normalised_newlines=True): + if encoding is None: + encoding = detect_file_encoding(source_filename) + if io_open is not None: + return io_open(source_filename, mode=mode, + encoding=encoding, errors=error_handling) + else: + # codecs module doesn't have universal newline support + stream = codecs.open(source_filename, mode=mode, + encoding=encoding, errors=error_handling) + if require_normalised_newlines: + stream = NormalisedNewlineStream(stream) + return stream -def long_literal(value): - if isinstance(value, basestring): - if len(value) < 2: - value = int(value) - elif value[0] == 0: - value = int(value, 8) - elif value[1] in 'xX': +def str_to_number(value): + # note: this expects a string as input that was accepted by the + # parser already + if len(value) < 2: + value = int(value, 0) + elif value[0] == '0': + if value[1] in 'xX': + # hex notation ('0x1AF') value = int(value[2:], 16) + elif value[1] in 'oO': + # Py3 octal notation ('0o136') + value = int(value[2:], 8) + elif value[1] in 'bB': + # Py3 binary notation ('0b101') + value = int(value[2:], 2) else: - value = int(value) + # Py2 octal notation ('0136') + value = int(value, 8) + else: + value = int(value, 0) + return value + +def long_literal(value): + if isinstance(value, basestring): + value = str_to_number(value) return not -2**31 <= value < 2**31 def none_or_sub(s, data):