From b4c2c4c205e75ef746de3985f6ae1474bb94f8e4 Mon Sep 17 00:00:00 2001 From: Robert Bradshaw Date: Thu, 31 Jul 2008 00:55:14 -0700 Subject: [PATCH] Better integer literal parsing. Now accepts U and LL suffixes, and large integer literals are longs rather than being truncated as Python objects. --- Cython/Compiler/ExprNodes.py | 10 ++++++++-- Cython/Compiler/Lexicon.py | 6 +++--- Cython/Compiler/ModuleNode.py | 14 ++++++++++---- Cython/Compiler/Parsing.py | 19 +++++++++++++------ Cython/Compiler/Symtab.py | 4 +++- Cython/Utils.py | 10 ++++++++++ tests/run/int_literals.pyx | 17 +++++++++++++++++ 7 files changed, 64 insertions(+), 16 deletions(-) create mode 100644 tests/run/int_literals.pyx diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 92e1e2a7..3fc61bfc 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -646,13 +646,19 @@ class CharNode(ConstNode): class IntNode(ConstNode): + + # unsigned "" or "U" + # longness "" or "L" or "LL" + + unsigned = "" + longness = "" type = PyrexTypes.c_long_type def coerce_to(self, dst_type, env): # Arrange for a Python version of the string to be pre-allocated # when coercing to a Python type. if dst_type.is_pyobject: - self.entry = env.get_py_num(self.value) + self.entry = env.get_py_num(self.value, self.longness) self.type = PyrexTypes.py_object_type # We still need to perform normal coerce_to processing on the # result, because we might be coercing to an extension type, @@ -663,7 +669,7 @@ class IntNode(ConstNode): if self.type.is_pyobject: return self.entry.cname else: - return str(self.value) + return str(self.value) + self.unsigned + self.longness def compile_time_value(self, denv): return int(self.value, 0) diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index e42bb037..dade469e 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -27,7 +27,8 @@ def make_lexicon(): name = letter + Rep(letter | digit) intconst = decimal | (Str("0x") + Rep1(hexdigit)) - longconst = intconst + Str("L") + intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) + intliteral = intconst + intsuffix fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) imagconst = (intconst | fltconst) + Any("jJ") @@ -79,8 +80,7 @@ def make_lexicon(): return Lexicon([ (name, 'IDENT'), - (intconst, 'INT'), - (longconst, 'LONG'), + (intliteral, 'INT'), (fltconst, 'FLOAT'), (imagconst, 'IMAG'), (deco, 'DECORATOR'), diff --git a/Cython/Compiler/ModuleNode.py b/Cython/Compiler/ModuleNode.py index 68394529..b5689c93 100644 --- a/Cython/Compiler/ModuleNode.py +++ b/Cython/Compiler/ModuleNode.py @@ -1718,10 +1718,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): def generate_intern_code(self, env, code): for entry in env.pynum_entries: - code.putln("%s = PyInt_FromLong(%s); %s;" % ( - entry.cname, - entry.init, - code.error_goto_if_null(entry.cname, self.pos))) + if entry.init[-1] == "L": + code.putln('%s = PyLong_FromString("%s", 0, 0); %s;' % ( + entry.cname, + entry.init, + code.error_goto_if_null(entry.cname, self.pos))) + else: + code.putln("%s = PyInt_FromLong(%s); %s;" % ( + entry.cname, + entry.init, + code.error_goto_if_null(entry.cname, self.pos))) def generate_string_init_code(self, env, code): if env.all_pystring_entries: diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index ba31864d..b57b2963 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -466,11 +466,18 @@ def p_atom(s): elif sy == 'INT': value = s.systring s.next() - return ExprNodes.IntNode(pos, value = value) - elif sy == 'LONG': - value = s.systring - s.next() - return ExprNodes.LongNode(pos, value = value) + unsigned = "" + longness = "" + while value[-1] in "UuLl": + if value[-1] in "Ll": + longness += "L" + else: + unsigned += "U" + value = value[:-1] + return ExprNodes.IntNode(pos, + value = value, + unsigned = unsigned, + longness = longness) elif sy == 'FLOAT': value = s.systring s.next() @@ -516,7 +523,7 @@ def p_name(s, name): elif isinstance(value, int): return ExprNodes.IntNode(pos, value = rep) elif isinstance(value, long): - return ExprNodes.LongNode(pos, value = rep) + return ExprNodes.IntNode(pos, value = rep, longness = "L") elif isinstance(value, float): return ExprNodes.FloatNode(pos, value = rep) elif isinstance(value, unicode): diff --git a/Cython/Compiler/Symtab.py b/Cython/Compiler/Symtab.py index 05d38cc7..28c67e40 100644 --- a/Cython/Compiler/Symtab.py +++ b/Cython/Compiler/Symtab.py @@ -544,9 +544,11 @@ class Scope: self.interned_nums.append(entry) return entry - def get_py_num(self, value): + def get_py_num(self, value, longness): # Get entry for int constant. Returns an existing # one if possible, otherwise creates a new one. + if longness or Utils.long_literal(value): + value += "L" genv = self.global_scope() entry = genv.num_to_entry.get(value) if not entry: diff --git a/Cython/Utils.py b/Cython/Utils.py index 88550b58..cb59d2f8 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -115,3 +115,13 @@ def escape_byte_string(s): else: append(c) return ''.join(l) + +def long_literal(value): + if isinstance(value, basestring): + if len(value) < 2: + value = int(value) + elif value[0] == 0: + return int(value, 8) + elif value[1] in 'xX': + return int(value[2:], 16) + return not -2**31 <= value < 2**31 diff --git a/tests/run/int_literals.pyx b/tests/run/int_literals.pyx new file mode 100644 index 00000000..44fec858 --- /dev/null +++ b/tests/run/int_literals.pyx @@ -0,0 +1,17 @@ +__doc__ = """ + >>> c_longs() + (1, 1L, -1L, 18446744073709551615L) + >>> py_longs() + (1, 1L, 100000000000000000000000000000000L, -100000000000000000000000000000000L) +""" + +def c_longs(): + cdef long a = 1L + cdef unsigned long ua = 1UL + cdef long long aa = 0xFFFFFFFFFFFFFFFFLL + cdef unsigned long long uaa = 0xFFFFFFFFFFFFFFFFULL + + return a, ua, aa, uaa + +def py_longs(): + return 1, 1L, 100000000000000000000000000000000, -100000000000000000000000000000000 -- 2.26.2