parse integer literals differently based on the type of source file: .pyx vs. .py...
authorStefan Behnel <scoder@users.berlios.de>
Sun, 14 Nov 2010 16:24:42 +0000 (17:24 +0100)
committerStefan Behnel <scoder@users.berlios.de>
Sun, 14 Nov 2010 16:24:42 +0000 (17:24 +0100)
Cython/Compiler/ExprNodes.py
Cython/Compiler/Parsing.pxd
Cython/Compiler/Parsing.py
tests/errors/e_int_literals_py2.py [new file with mode: 0644]
tests/errors/e_int_literals_py3.py [new file with mode: 0644]
tests/run/cython3.pyx

index 6cf4855cbf97f68c878c7054c8102b906585e7d0..5f463f9bf11bbdb1d329d8b11fe246f05aa169a9 100755 (executable)
@@ -800,9 +800,11 @@ class IntNode(ConstNode):
 
     # unsigned     "" or "U"
     # longness     "" or "L" or "LL"
+    # is_c_literal   True/False/None   creator considers this a C integer literal
 
     unsigned = ""
     longness = ""
+    is_c_literal = None # unknown
 
     def __init__(self, pos, **kwds):
         ExprNode.__init__(self, pos, **kwds)
@@ -815,7 +817,10 @@ class IntNode(ConstNode):
                 self.calculate_constant_result()
             except ValueError:
                 pass
-        if self.constant_result in (constant_value_not_set, not_a_constant) or \
+        # we ignore 'is_c_literal = True' and instead map signed 32bit
+        # integers as C long values
+        if self.is_c_literal or \
+               self.constant_result in (constant_value_not_set, not_a_constant) or \
                self.unsigned or self.longness == 'LL':
             # clearly a C literal
             rank = (self.longness == 'LL') and 2 or 1
@@ -844,17 +849,18 @@ class IntNode(ConstNode):
             else:
                 return FloatNode(self.pos, value=self.value, type=dst_type,
                                  constant_result=not_a_constant)
-        node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
-                       type = dst_type, unsigned=self.unsigned, longness=self.longness)
         if dst_type.is_numeric and not dst_type.is_complex:
             node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
-                           type = dst_type, unsigned=self.unsigned, longness=self.longness)
+                           type = dst_type, is_c_literal = True,
+                           unsigned=self.unsigned, longness=self.longness)
             return node
         elif dst_type.is_pyobject:
             node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
-                           type = PyrexTypes.py_object_type, unsigned=self.unsigned, longness=self.longness)
+                           type = PyrexTypes.py_object_type, is_c_literal = False,
+                           unsigned=self.unsigned, longness=self.longness)
         else:
-            # not setting the type here!
+            # FIXME: not setting the type here to keep it working with
+            # complex numbers. Should they be special cased?
             node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
                            unsigned=self.unsigned, longness=self.longness)
         # We still need to perform normal coerce_to processing on the
index 5416601fad6cc2ec4ce3cf9360efab787f585a23..76d38c51048f8a4dad4e90a03040ccfe0984b680 100644 (file)
@@ -1,5 +1,6 @@
 # We declare all of these here to type the first argument.
 
+cimport cython
 from Cython.Compiler.Scanning cimport PyrexScanner
 
 
@@ -46,6 +47,8 @@ cpdef expect_ellipsis(PyrexScanner s)
 cpdef make_slice_nodes(pos, subscripts)
 cpdef make_slice_node(pos, start, stop = *, step = *)
 cpdef p_atom(PyrexScanner s)
+@cython.locals(value=unicode)
+cpdef p_int_literal(PyrexScanner s)
 cpdef p_name(PyrexScanner s, name)
 cpdef p_cat_string_literal(PyrexScanner s)
 cpdef p_opt_string_literal(PyrexScanner s, required_type=*)
index 4d7ad1d86ee12a55d8b5cf8601cd6c0c4a094a04..322a2b8245f70a753979b9979335050fdd006468 100644 (file)
@@ -583,20 +583,7 @@ def p_atom(s):
         expect_ellipsis(s)
         return ExprNodes.EllipsisNode(pos)
     elif sy == 'INT':
-        value = s.systring
-        s.next()
-        unsigned = ""
-        longness = ""
-        while value[-1] in "UuLl":
-            if value[-1] in "Ll":
-                longness += "L"
-            else:
-                unsigned += "U"
-            value = value[:-1]
-        return ExprNodes.IntNode(pos, 
-                                 value = value,
-                                 unsigned = unsigned,
-                                 longness = longness)
+        return p_int_literal(s)
     elif sy == 'FLOAT':
         value = s.systring
         s.next()
@@ -631,6 +618,37 @@ def p_atom(s):
     else:
         s.error("Expected an identifier or literal")
 
+def p_int_literal(s):
+    pos = s.position()
+    value = s.systring
+    s.next()
+    unsigned = ""
+    longness = ""
+    while value[-1] in u"UuLl":
+        if value[-1] in u"Ll":
+            longness += "L"
+        else:
+            unsigned += "U"
+        value = value[:-1]
+    # '3L' is ambiguous in Py2 but not in Py3.  '3U' and '3LL' are
+    # illegal in Py2 Python files.  All suffixes are illegal in Py3
+    # Python files.
+    is_c_literal = None
+    if unsigned:
+        is_c_literal = True
+    elif longness:
+        if longness == 'LL' or s.context.language_level >= 3:
+            is_c_literal = True
+    if s.in_python_file:
+        if is_c_literal:
+            error(pos, "illegal integer literal syntax in Python source file")
+        is_c_literal = False
+    return ExprNodes.IntNode(pos,
+                             is_c_literal = is_c_literal,
+                             value = value,
+                             unsigned = unsigned,
+                             longness = longness)
+
 def p_name(s, name):
     pos = s.position()
     if not s.compile_time_expr and name in s.compile_time_env:
diff --git a/tests/errors/e_int_literals_py2.py b/tests/errors/e_int_literals_py2.py
new file mode 100644 (file)
index 0000000..692d232
--- /dev/null
@@ -0,0 +1,15 @@
+# cython: language_level=2
+
+def int_literals():
+    a = 1L  # ok
+    b = 10000000000000L # ok
+    c = 1UL
+    d = 10000000000000UL
+    e = 10000000000000LL
+
+
+_ERRORS = """
+6:8: illegal integer literal syntax in Python source file
+7:8: illegal integer literal syntax in Python source file
+8:8: illegal integer literal syntax in Python source file
+"""
diff --git a/tests/errors/e_int_literals_py3.py b/tests/errors/e_int_literals_py3.py
new file mode 100644 (file)
index 0000000..ead9dfd
--- /dev/null
@@ -0,0 +1,17 @@
+# cython: language_level=3
+
+def int_literals():
+    a = 1L
+    b = 10000000000000L
+    c = 1UL
+    d = 10000000000000UL
+    e = 10000000000000LL
+
+
+_ERRORS = """
+4:8: illegal integer literal syntax in Python source file
+5:8: illegal integer literal syntax in Python source file
+6:8: illegal integer literal syntax in Python source file
+7:8: illegal integer literal syntax in Python source file
+8:8: illegal integer literal syntax in Python source file
+"""
index 7c1c3455814d14afd62ef9ee640b26fb8848fb62..56594bd8d1bca1a47b7d44b25b4c7803bfe942f9 100644 (file)
@@ -106,3 +106,16 @@ def dict_iter(dict d):
     values = [ value for value in d.values() ]
     items = [ item for item in d.items() ]
     return keys, values, items
+
+def int_literals():
+    """
+    >>> int_literals()
+    long
+    long
+    unsigned long
+    unsigned long
+    """
+    print(cython.typeof(1L))
+    print(cython.typeof(10000000000000L))
+    print(cython.typeof(1UL))
+    print(cython.typeof(10000000000000UL))