From be8d379841f4651df32f44860ae9cc683e171565 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Thu, 28 Jan 2010 23:05:39 +0100 Subject: [PATCH] support for some Python 3 (or 2.6+) syntax features (found by test_grammar.py in Py3.1.1): - oct/bin notation: 0o12345, 0b10101 - function annotations (only pure syntax support, not currently used) also: allow decorators on inner functions --- Cython/Compiler/ExprNodes.py | 18 +++++++++++----- Cython/Compiler/Lexicon.py | 7 +++++-- Cython/Compiler/Nodes.py | 12 ++++++++--- Cython/Compiler/Parsing.pxd | 6 +++--- Cython/Compiler/Parsing.py | 40 +++++++++++++++++++++++++++--------- Cython/Utils.py | 31 ++++++++++++++++++++-------- 6 files changed, 83 insertions(+), 31 deletions(-) diff --git a/Cython/Compiler/ExprNodes.py b/Cython/Compiler/ExprNodes.py index 22a4092d..51dd88de 100644 --- a/Cython/Compiler/ExprNodes.py +++ b/Cython/Compiler/ExprNodes.py @@ -19,6 +19,7 @@ import Builtin import Symtab import Options from Annotate import AnnotationItem +from Cython import Utils from Cython.Debugging import print_call_chain from DebugFlags import debug_disposal_code, debug_temp_alloc, \ @@ -771,16 +772,23 @@ class IntNode(ConstNode): self.result_code = self.get_constant_c_result_code() def get_constant_c_result_code(self): - return str(self.value) + self.unsigned + self.longness + value = self.value + if isinstance(value, basestring) and len(value) > 2: + # must convert C-incompatible Py3 oct/bin notations + if value[1] in 'oO': + value = value[0] + value[2:] # '0o123' => '0123' + elif value[1] in 'bB': + value = int(value[2:], 2) + return str(value) + self.unsigned + self.longness def calculate_result_code(self): return self.result_code def calculate_constant_result(self): - self.constant_result = int(self.value, 0) + self.constant_result = Utils.str_to_number(self.value) def compile_time_value(self, denv): - return int(self.value, 0) + return Utils.str_to_number(self.value) class FloatNode(ConstNode): @@ -966,10 +974,10 @@ class LongNode(AtomicExprNode): type = py_object_type def calculate_constant_result(self): - self.constant_result = long(self.value) + self.constant_result = Utils.str_to_number(self.value) def compile_time_value(self, denv): - return long(self.value) + return Utils.str_to_number(self.value) def analyse_types(self, env): self.is_temp = 1 diff --git a/Cython/Compiler/Lexicon.py b/Cython/Compiler/Lexicon.py index 53e70c64..2d4fd195 100644 --- a/Cython/Compiler/Lexicon.py +++ b/Cython/Compiler/Lexicon.py @@ -17,6 +17,7 @@ def make_lexicon(): letter = Any("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_") digit = Any("0123456789") + bindigit = Any("01") octdigit = Any("01234567") hexdigit = Any("0123456789ABCDEFabcdef") indentation = Bol + Rep(Any(" \t")) @@ -27,7 +28,9 @@ def make_lexicon(): decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal) name = letter + Rep(letter | digit) - intconst = decimal | (Str("0x") + Rep1(hexdigit)) + intconst = decimal | (Str("0") + ((Any("Xx") + Rep1(hexdigit)) | + (Any("Oo") + Rep1(octdigit)) | + (Any("Bb") + Rep1(bindigit)) )) intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu"))) intliteral = intconst + intsuffix fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent) @@ -75,7 +78,7 @@ def make_lexicon(): punct = Any(":,;+-*/|&<>=.%`~^?") diphthong = Str("==", "<>", "!=", "<=", ">=", "<<", ">>", "**", "//", "+=", "-=", "*=", "/=", "%=", "|=", "^=", "&=", - "<<=", ">>=", "**=", "//=") + "<<=", ">>=", "**=", "//=", "->") spaces = Rep1(Any(" \t\f")) escaped_newline = Str("\\\n") lineterm = Eol + Opt(Str("\n")) diff --git a/Cython/Compiler/Nodes.py b/Cython/Compiler/Nodes.py index 13a1fede..e77ebf90 100644 --- a/Cython/Compiler/Nodes.py +++ b/Cython/Compiler/Nodes.py @@ -597,6 +597,7 @@ class CArgDeclNode(Node): # not_none boolean Tagged with 'not None' # default ExprNode or None # default_value PyObjectConst constant for default value + # annotation ExprNode or None Py3 function arg annotation # is_self_arg boolean Is the "self" arg of an extension type method # is_kw_only boolean Is a keyword-only argument @@ -607,6 +608,7 @@ class CArgDeclNode(Node): type = None name_declarator = None default_value = None + annotation = None def analyse(self, env, nonempty = 0): #print "CArgDeclNode.analyse: is_self_arg =", self.is_self_arg ### @@ -1603,8 +1605,9 @@ class PyArgDeclNode(Node): # Argument which must be a Python object (used # for * and ** arguments). # - # name string - # entry Symtab.Entry + # name string + # entry Symtab.Entry + # annotation ExprNode or None Py3 argument annotation child_attrs = [] @@ -1626,6 +1629,8 @@ class DefNode(FuncDefNode): # starstar_arg PyArgDeclNode or None ** argument # doc EncodedString or None # body StatListNode + # return_type_annotation + # ExprNode or None the Py3 return type annotation # # The following subnode is constructed internally # when the def statement is inside a Python class definition. @@ -1641,6 +1646,7 @@ class DefNode(FuncDefNode): reqd_kw_flags_cname = "0" is_wrapper = 0 decorators = None + return_type_annotation = None entry = None acquire_gil = 0 @@ -1685,7 +1691,7 @@ class DefNode(FuncDefNode): cfunc_type = cfunc.type if len(self.args) != len(cfunc_type.args) or cfunc_type.has_varargs: error(self.pos, "wrong number of arguments") - error(declarator.pos, "previous declaration here") + error(cfunc.pos, "previous declaration here") for formal_arg, type_arg in zip(self.args, cfunc_type.args): name_declarator, type = formal_arg.analyse(cfunc.scope, nonempty=1) if type is None or type is PyrexTypes.py_object_type or formal_arg.is_self: diff --git a/Cython/Compiler/Parsing.pxd b/Cython/Compiler/Parsing.pxd index bca87ddf..a95b97f0 100644 --- a/Cython/Compiler/Parsing.pxd +++ b/Cython/Compiler/Parsing.pxd @@ -126,9 +126,9 @@ cpdef p_nogil(PyrexScanner s) cpdef p_with_gil(PyrexScanner s) cpdef p_exception_value_clause(PyrexScanner s) cpdef p_c_arg_list(PyrexScanner s, ctx = *, bint in_pyfunc = *, bint cmethod_flag = *, - bint nonempty_declarators = *, bint kw_only = *) + bint nonempty_declarators = *, bint kw_only = *, bint annotated = *) cpdef p_optional_ellipsis(PyrexScanner s) -cpdef p_c_arg_decl(PyrexScanner s, ctx, in_pyfunc, bint cmethod_flag = *, bint nonempty = *, bint kw_only = *) +cpdef p_c_arg_decl(PyrexScanner s, ctx, in_pyfunc, bint cmethod_flag = *, bint nonempty = *, bint kw_only = *, bint annotated = *) cpdef p_api(PyrexScanner s) cpdef p_cdef_statement(PyrexScanner s, ctx) cpdef p_cdef_block(PyrexScanner s, ctx) @@ -143,7 +143,7 @@ cpdef p_c_func_or_var_declaration(PyrexScanner s, pos, ctx) cpdef p_ctypedef_statement(PyrexScanner s, ctx) cpdef p_decorators(PyrexScanner s) cpdef p_def_statement(PyrexScanner s, list decorators = *) -cpdef p_varargslist(PyrexScanner s, terminator=*) +cpdef p_varargslist(PyrexScanner s, terminator=*, bint annotated = *) cpdef p_py_arg_decl(PyrexScanner s) cpdef p_class_statement(PyrexScanner s, decorators) cpdef p_c_class_definition(PyrexScanner s, pos, ctx) diff --git a/Cython/Compiler/Parsing.py b/Cython/Compiler/Parsing.py index ddad09ae..f3af3271 100644 --- a/Cython/Compiler/Parsing.py +++ b/Cython/Compiler/Parsing.py @@ -113,7 +113,8 @@ def p_lambdef(s, allow_conditional=True): args = [] star_arg = starstar_arg = None else: - args, star_arg, starstar_arg = p_varargslist(s, terminator=':') + args, star_arg, starstar_arg = p_varargslist( + s, terminator=':', annotated=False) s.expect(':') if allow_conditional: expr = p_simple_expr(s) @@ -1215,6 +1216,8 @@ def p_from_import_statement(s, first_statement = 0): imported_names = [p_imported_name(s, is_cimport)] while s.sy == ',': s.next() + if is_parenthesized and s.sy == ')': + break imported_names.append(p_imported_name(s, is_cimport)) if is_parenthesized: s.expect(')') @@ -1464,7 +1467,7 @@ def p_except_clause(s): exc_value = None if s.sy != ':': exc_type = p_simple_expr(s) - if s.sy == ',': + if s.sy == ',' or (s.sy == 'IDENT' and s.systring == 'as'): s.next() exc_value = p_simple_expr(s) body = p_suite(s) @@ -1625,7 +1628,7 @@ def p_statement(s, ctx, first_statement = 0): elif s.sy == 'IF': return p_IF_statement(s, ctx) elif s.sy == 'DECORATOR': - if ctx.level not in ('module', 'class', 'c_class', 'property', 'module_pxd', 'c_class_pxd'): + if ctx.level not in ('module', 'class', 'c_class', 'function', 'property', 'module_pxd', 'c_class_pxd'): print ctx.level s.error('decorator not allowed here') s.level = ctx.level @@ -2143,14 +2146,15 @@ def p_exception_value_clause(s): c_arg_list_terminators = ('*', '**', '.', ')') def p_c_arg_list(s, ctx = Ctx(), in_pyfunc = 0, cmethod_flag = 0, - nonempty_declarators = 0, kw_only = 0): + nonempty_declarators = 0, kw_only = 0, annotated = 1): # Comma-separated list of C argument declarations, possibly empty. # May have a trailing comma. args = [] is_self_arg = cmethod_flag while s.sy not in c_arg_list_terminators: args.append(p_c_arg_decl(s, ctx, in_pyfunc, is_self_arg, - nonempty = nonempty_declarators, kw_only = kw_only)) + nonempty = nonempty_declarators, kw_only = kw_only, + annotated = annotated)) if s.sy != ',': break s.next() @@ -2164,10 +2168,12 @@ def p_optional_ellipsis(s): else: return 0 -def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0, kw_only = 0): +def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0, + kw_only = 0, annotated = 1): pos = s.position() not_none = 0 default = None + annotation = None base_type = p_c_base_type(s, cmethod_flag, nonempty = nonempty) declarator = p_c_declarator(s, ctx, nonempty = nonempty) if s.sy == 'not': @@ -2179,6 +2185,9 @@ def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0, kw_only = 0) if not in_pyfunc: error(pos, "'not None' only allowed in Python functions") not_none = 1 + if annotated and s.sy == ':': + s.next() + annotation = p_simple_expr(s) if s.sy == '=': s.next() if 'pxd' in s.level: @@ -2193,6 +2202,7 @@ def p_c_arg_decl(s, ctx, in_pyfunc, cmethod_flag = 0, nonempty = 0, kw_only = 0) declarator = declarator, not_none = not_none, default = default, + annotation = annotation, kw_only = kw_only) def p_api(s): @@ -2458,13 +2468,19 @@ def p_def_statement(s, decorators=None): s.expect(')') if p_nogil(s): error(s.pos, "Python function cannot be declared nogil") + return_type_annotation = None + if s.sy == '->': + s.next() + return_type_annotation = p_simple_expr(s) doc, body = p_suite(s, Ctx(level = 'function'), with_doc = 1) return Nodes.DefNode(pos, name = name, args = args, star_arg = star_arg, starstar_arg = starstar_arg, - doc = doc, body = body, decorators = decorators) + doc = doc, body = body, decorators = decorators, + return_type_annotation = return_type_annotation) -def p_varargslist(s, terminator=')'): - args = p_c_arg_list(s, in_pyfunc = 1, nonempty_declarators = 1) +def p_varargslist(s, terminator=')', annotated=1): + args = p_c_arg_list(s, in_pyfunc = 1, nonempty_declarators = 1, + annotated = annotated) star_arg = None starstar_arg = None if s.sy == '*': @@ -2485,7 +2501,11 @@ def p_varargslist(s, terminator=')'): def p_py_arg_decl(s): pos = s.position() name = p_ident(s) - return Nodes.PyArgDeclNode(pos, name = name) + annotation = None + if s.sy == ':': + s.next() + annotation = p_simple_expr(s) + return Nodes.PyArgDeclNode(pos, name = name, annotation = annotation) def p_class_statement(s, decorators): # s.sy == 'class' diff --git a/Cython/Utils.py b/Cython/Utils.py index dfb4cf3a..c3c46aca 100644 --- a/Cython/Utils.py +++ b/Cython/Utils.py @@ -82,16 +82,31 @@ def open_source_file(source_filename, mode="rU"): encoding = detect_file_encoding(source_filename) return codecs.open(source_filename, mode=mode, encoding=encoding) -def long_literal(value): - if isinstance(value, basestring): - if len(value) < 2: - value = int(value) - elif value[0] == 0: - value = int(value, 8) - elif value[1] in 'xX': +def str_to_number(value): + # note: this expects a string as input that was accepted by the + # parser already + if len(value) < 2: + value = int(value, 0) + elif value[0] == '0': + if value[1] in 'xX': + # hex notation ('0x1AF') value = int(value[2:], 16) + elif value[1] in 'oO': + # Py3 octal notation ('0o136') + value = int(value[2:], 8) + elif value[1] in 'bB': + # Py3 binary notation ('0b101') + value = int(value[2:], 2) else: - value = int(value) + # Py2 octal notation ('0136') + value = int(value, 8) + else: + value = int(value, 0) + return value + +def long_literal(value): + if isinstance(value, basestring): + value = str_to_number(value) return not -2**31 <= value < 2**31 def none_or_sub(s, data): -- 2.26.2