f.close()
return "UTF-8"
- def open_source_file(source_filename, mode="rU"):
- encoding = detect_file_encoding(source_filename)
- return codecs.open(source_filename, mode=mode, encoding=encoding)
+ normalise_newlines = re.compile(u'\r\n?|\n').sub
+
+ class NormalisedNewlineStream(object):
+ """The codecs module doesn't provide universal newline support.
+ This class is used as a stream wrapper that provides this
+ functionality. The new 'io' in Py2.6+/3.1+ supports this out of the
+ box.
+ """
+ def __init__(self, stream):
+ # let's assume .read() doesn't change
+ self._read = stream.read
+ self.close = stream.close
+ self.encoding = getattr(stream, 'encoding', 'UTF-8')
+
+ def read(self, count):
+ data = self._read(count)
+ if u'\r' not in data:
+ return data
+ if data.endswith(u'\r'):
+ # may be missing a '\n'
+ data += self._read(1)
+ return normalise_newlines(u'\n', data)
+
+ def readlines(self):
+ content = []
+ data = self._read(0x1000)
+ while data:
+ content.append(data)
+ data = self._read(0x1000)
+ return u''.join(content).split(u'\n')
+
+ try:
+ from io import open as io_open
+ except ImportError:
+ io_open = None
+
+ def open_source_file(source_filename, mode="r",
+ encoding=None, error_handling=None,
+ require_normalised_newlines=True):
+ if encoding is None:
+ encoding = detect_file_encoding(source_filename)
+ if io_open is not None:
+ return io_open(source_filename, mode=mode,
+ encoding=encoding, errors=error_handling)
+ else:
+ # codecs module doesn't have universal newline support
+ stream = codecs.open(source_filename, mode=mode,
+ encoding=encoding, errors=error_handling)
+ if require_normalised_newlines:
+ stream = NormalisedNewlineStream(stream)
+ return stream
-def long_literal(value):
- if isinstance(value, basestring):
- if len(value) < 2:
- value = int(value)
- elif value[0] == 0:
- value = int(value, 8)
- elif value[1] in 'xX':
+def str_to_number(value):
+ # note: this expects a string as input that was accepted by the
+ # parser already
+ if len(value) < 2:
+ value = int(value, 0)
+ elif value[0] == '0':
+ if value[1] in 'xX':
+ # hex notation ('0x1AF')
value = int(value[2:], 16)
+ elif value[1] in 'oO':
+ # Py3 octal notation ('0o136')
+ value = int(value[2:], 8)
+ elif value[1] in 'bB':
+ # Py3 binary notation ('0b101')
+ value = int(value[2:], 2)
else:
- value = int(value)
+ # Py2 octal notation ('0136')
+ value = int(value, 8)
+ else:
+ value = int(value, 0)
+ return value
+
+def long_literal(value):
+ if isinstance(value, basestring):
+ value = str_to_number(value)
return not -2**31 <= value < 2**31
def none_or_sub(s, data):