Better integer literal parsing.
authorRobert Bradshaw <robertwb@math.washington.edu>
Thu, 31 Jul 2008 07:55:14 +0000 (00:55 -0700)
committerRobert Bradshaw <robertwb@math.washington.edu>
Thu, 31 Jul 2008 07:55:14 +0000 (00:55 -0700)
Now accepts U and LL suffixes, and large integer literals are longs rather than being truncated as Python objects.

Cython/Compiler/ExprNodes.py
Cython/Compiler/Lexicon.py
Cython/Compiler/ModuleNode.py
Cython/Compiler/Parsing.py
Cython/Compiler/Symtab.py
Cython/Utils.py
tests/run/int_literals.pyx [new file with mode: 0644]

index 92e1e2a7e35783a2c8353f506ebf1bb3d4d9cd1d..3fc61bfc90e02bd78039fa809f3eaa92a7898f96 100644 (file)
@@ -646,13 +646,19 @@ class CharNode(ConstNode):
 
 
 class IntNode(ConstNode):
+
+    # unsigned     "" or "U"
+    # longness     "" or "L" or "LL"
+
+    unsigned = ""
+    longness = ""
     type = PyrexTypes.c_long_type
 
     def coerce_to(self, dst_type, env):
         # Arrange for a Python version of the string to be pre-allocated
         # when coercing to a Python type.
         if dst_type.is_pyobject:
-            self.entry = env.get_py_num(self.value)
+            self.entry = env.get_py_num(self.value, self.longness)
             self.type = PyrexTypes.py_object_type
         # We still need to perform normal coerce_to processing on the
         # result, because we might be coercing to an extension type,
@@ -663,7 +669,7 @@ class IntNode(ConstNode):
         if self.type.is_pyobject:
             return self.entry.cname
         else:
-            return str(self.value)
+            return str(self.value) + self.unsigned + self.longness
 
     def compile_time_value(self, denv):
         return int(self.value, 0)
index e42bb0377d11365f9668f767464315935001ee11..dade469e72fb278feca9103a6ba8de861c8401d8 100644 (file)
@@ -27,7 +27,8 @@ def make_lexicon():
     
     name = letter + Rep(letter | digit)
     intconst = decimal | (Str("0x") + Rep1(hexdigit))
-    longconst = intconst + Str("L")
+    intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
+    intliteral = intconst + intsuffix
     fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
     imagconst = (intconst | fltconst) + Any("jJ")
     
@@ -79,8 +80,7 @@ def make_lexicon():
     
     return Lexicon([
         (name, 'IDENT'),
-        (intconst, 'INT'),
-        (longconst, 'LONG'),
+        (intliteral, 'INT'),
         (fltconst, 'FLOAT'),
         (imagconst, 'IMAG'),
         (deco, 'DECORATOR'),
index 68394529e46eb2d20eeff18d98758827e4725808..b5689c93bfbae53bb9295fc36163f5e9bdd88b07 100644 (file)
@@ -1718,10 +1718,16 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
     
     def generate_intern_code(self, env, code):
         for entry in env.pynum_entries:
-            code.putln("%s = PyInt_FromLong(%s); %s;" % (
-                entry.cname,
-                entry.init,
-                code.error_goto_if_null(entry.cname, self.pos)))
+            if entry.init[-1] == "L":
+                code.putln('%s = PyLong_FromString("%s", 0, 0); %s;' % (
+                    entry.cname,
+                    entry.init,
+                    code.error_goto_if_null(entry.cname, self.pos)))
+            else:
+                code.putln("%s = PyInt_FromLong(%s); %s;" % (
+                    entry.cname,
+                    entry.init,
+                    code.error_goto_if_null(entry.cname, self.pos)))
     
     def generate_string_init_code(self, env, code):
         if env.all_pystring_entries:
index ba31864dce40e9200c6db786448d2731f0090dee..b57b296380e3b0a4cf5ec986146eed939e82b42d 100644 (file)
@@ -466,11 +466,18 @@ def p_atom(s):
     elif sy == 'INT':
         value = s.systring
         s.next()
-        return ExprNodes.IntNode(pos, value = value)
-    elif sy == 'LONG':
-        value = s.systring
-        s.next()
-        return ExprNodes.LongNode(pos, value = value)
+        unsigned = ""
+        longness = ""
+        while value[-1] in "UuLl":
+            if value[-1] in "Ll":
+                longness += "L"
+            else:
+                unsigned += "U"
+            value = value[:-1]
+        return ExprNodes.IntNode(pos, 
+                                 value = value,
+                                 unsigned = unsigned,
+                                 longness = longness)
     elif sy == 'FLOAT':
         value = s.systring
         s.next()
@@ -516,7 +523,7 @@ def p_name(s, name):
             elif isinstance(value, int):
                 return ExprNodes.IntNode(pos, value = rep)
             elif isinstance(value, long):
-                return ExprNodes.LongNode(pos, value = rep)
+                return ExprNodes.IntNode(pos, value = rep, longness = "L")
             elif isinstance(value, float):
                 return ExprNodes.FloatNode(pos, value = rep)
             elif isinstance(value, unicode):
index 05d38cc79cf4c2d055039f081c3b6174567857ab..28c67e40211e5d4985796589d8d4368945945133 100644 (file)
@@ -544,9 +544,11 @@ class Scope:
         self.interned_nums.append(entry)
         return entry
         
-    def get_py_num(self, value):
+    def get_py_num(self, value, longness):
         # Get entry for int constant. Returns an existing
         # one if possible, otherwise creates a new one.
+        if longness or Utils.long_literal(value):
+            value += "L"
         genv = self.global_scope()
         entry = genv.num_to_entry.get(value)
         if not entry:
index 88550b587decd9b03fe27866f9c43138abadcaa6..cb59d2f8b72233bf35018f88fd74ba00d539465a 100644 (file)
@@ -115,3 +115,13 @@ def escape_byte_string(s):
         else:
             append(c)
     return ''.join(l)
+
+def long_literal(value):
+    if isinstance(value, basestring):
+        if len(value) < 2:
+            value = int(value)
+        elif value[0] == 0:
+            return int(value, 8)
+        elif value[1] in 'xX':
+            return int(value[2:], 16)
+    return not -2**31 <= value < 2**31
diff --git a/tests/run/int_literals.pyx b/tests/run/int_literals.pyx
new file mode 100644 (file)
index 0000000..44fec85
--- /dev/null
@@ -0,0 +1,17 @@
+__doc__ = """
+    >>> c_longs()
+    (1, 1L, -1L, 18446744073709551615L)
+    >>> py_longs()
+    (1, 1L, 100000000000000000000000000000000L, -100000000000000000000000000000000L)
+"""
+
+def c_longs():
+    cdef long a = 1L
+    cdef unsigned long ua = 1UL
+    cdef long long aa = 0xFFFFFFFFFFFFFFFFLL
+    cdef unsigned long long uaa = 0xFFFFFFFFFFFFFFFFULL
+    
+    return a, ua, aa, uaa
+    
+def py_longs():
+    return 1, 1L, 100000000000000000000000000000000, -100000000000000000000000000000000