fixed setup.py, a type and removed the possibility to use multiple statements per...
[jinja2.git] / jinja2 / parser.py
1 # -*- coding: utf-8 -*-
2 """
3     jinja2.parser
4     ~~~~~~~~~~~~~
5
6     Implements the template parser.
7
8     :copyright: 2008 by Armin Ronacher.
9     :license: BSD, see LICENSE for more details.
10 """
11 from operator import itemgetter
12 from jinja2 import nodes
13 from jinja2.exceptions import TemplateSyntaxError
14
15
16 _statement_keywords = frozenset(['for', 'if', 'block', 'extends', 'print',
17                                  'macro', 'include'])
18 _compare_operators = frozenset(['eq', 'ne', 'lt', 'lteq', 'gt', 'gteq', 'in'])
19 statement_end_tokens = set(['variable_end', 'block_end', 'in'])
20
21
22 class Parser(object):
23     """The template parser class.
24
25     Transforms sourcecode into an abstract syntax tree.
26     """
27
28     def __init__(self, environment, source, filename=None):
29         self.environment = environment
30         if isinstance(filename, unicode):
31             filename = filename.encode('utf-8')
32         self.source = unicode(source)
33         self.filename = filename
34         self.closed = False
35         self.stream = environment.lexer.tokenize(source, filename)
36         self.extensions = {}
37         for extension in environment.extensions:
38             for tag in extension.tags:
39                 self.extensions[tag] = extension.parse
40
41     def parse_statement(self):
42         """Parse a single statement."""
43         token_type = self.stream.current.type
44         if token_type in _statement_keywords:
45             return getattr(self, 'parse_' + token_type)()
46         elif token_type is 'call':
47             return self.parse_call_block()
48         elif token_type is 'filter':
49             return self.parse_filter_block()
50         elif token_type is 'name':
51             ext = self.extensions.get(self.stream.current.value)
52             if ext is not None:
53                 return ext(self)
54         lineno = self.stream.current
55         expr = self.parse_tuple()
56         if self.stream.current.type == 'assign':
57             result = self.parse_assign(expr)
58         else:
59             result = nodes.ExprStmt(expr, lineno=lineno)
60         return result
61
62     def parse_assign(self, target):
63         """Parse an assign statement."""
64         lineno = self.stream.expect('assign').lineno
65         if not target.can_assign():
66             raise TemplateSyntaxError("can't assign to '%s'" %
67                                       target, target.lineno,
68                                       self.filename)
69         expr = self.parse_tuple()
70         target.set_ctx('store')
71         return nodes.Assign(target, expr, lineno=lineno)
72
73     def parse_statements(self, end_tokens, drop_needle=False):
74         """Parse multiple statements into a list until one of the end tokens
75         is reached.  This is used to parse the body of statements as it also
76         parses template data if appropriate.
77         """
78         # the first token may be a colon for python compatibility
79         if self.stream.current.type is 'colon':
80             self.stream.next()
81
82         # in the future it would be possible to add whole code sections
83         # by adding some sort of end of statement token and parsing those here.
84         self.stream.expect('block_end')
85         result = self.subparse(end_tokens)
86
87         if drop_needle:
88             self.stream.next()
89         return result
90
91     def parse_for(self):
92         """Parse a for loop."""
93         lineno = self.stream.expect('for').lineno
94         target = self.parse_tuple(simplified=True)
95         if not target.can_assign():
96             raise TemplateSyntaxError("can't assign to '%s'" %
97                                       target, target.lineno,
98                                       self.filename)
99         target.set_ctx('store')
100         self.stream.expect('in')
101         iter = self.parse_tuple(no_condexpr=True)
102         test = None
103         if self.stream.current.type is 'if':
104             self.stream.next()
105             test = self.parse_expression()
106         body = self.parse_statements(('endfor', 'else'))
107         if self.stream.next().type is 'endfor':
108             else_ = []
109         else:
110             else_ = self.parse_statements(('endfor',), drop_needle=True)
111         return nodes.For(target, iter, body, else_, test, lineno=lineno)
112
113     def parse_if(self):
114         """Parse an if construct."""
115         node = result = nodes.If(lineno=self.stream.expect('if').lineno)
116         while 1:
117             # TODO: exclude conditional expressions here
118             node.test = self.parse_tuple()
119             node.body = self.parse_statements(('elif', 'else', 'endif'))
120             token_type = self.stream.next().type
121             if token_type is 'elif':
122                 new_node = nodes.If(lineno=self.stream.current.lineno)
123                 node.else_ = [new_node]
124                 node = new_node
125                 continue
126             elif token_type is 'else':
127                 node.else_ = self.parse_statements(('endif',),
128                                                    drop_needle=True)
129             else:
130                 node.else_ = []
131             break
132         return result
133
134     def parse_block(self):
135         node = nodes.Block(lineno=self.stream.expect('block').lineno)
136         node.name = self.stream.expect('name').value
137         node.body = self.parse_statements(('endblock',), drop_needle=True)
138         return node
139
140     def parse_extends(self):
141         node = nodes.Extends(lineno=self.stream.expect('extends').lineno)
142         node.template = self.parse_expression()
143         return node
144
145     def parse_include(self):
146         node = nodes.Include(lineno=self.stream.expect('include').lineno)
147         expr = self.parse_expression()
148         if self.stream.current.type is 'assign':
149             self.stream.next()
150             if not isinstance(expr, nodes.Name):
151                 raise TemplateSyntaxError('must assign imported template to '
152                                           'variable or current scope',
153                                           expr.lineno, self.filename)
154             if not expr.can_assign():
155                 raise TemplateSyntaxError('can\'t assign imported template '
156                                           'to %r' % expr, expr.lineno,
157                                           self.filename)
158             node.target = expr.name
159             node.template = self.parse_expression()
160         else:
161             node.target = None
162             node.template = expr
163         return node
164
165     def parse_signature(self, node):
166         node.args = args = []
167         node.defaults = defaults = []
168         self.stream.expect('lparen')
169         while self.stream.current.type is not 'rparen':
170             if args:
171                 self.stream.expect('comma')
172             token = self.stream.expect('name')
173             arg = nodes.Name(token.value, 'param', lineno=token.lineno)
174             if not arg.can_assign():
175                 raise TemplateSyntaxError("can't assign to '%s'" %
176                                           arg.name, arg.lineno,
177                                           self.filename)
178             if self.stream.current.type is 'assign':
179                 self.stream.next()
180                 defaults.append(self.parse_expression())
181             args.append(arg)
182         self.stream.expect('rparen')
183
184     def parse_call_block(self):
185         node = nodes.CallBlock(lineno=self.stream.expect('call').lineno)
186         if self.stream.current.type is 'lparen':
187             self.parse_signature(node)
188
189         node.call = self.parse_expression()
190         if not isinstance(node.call, nodes.Call):
191             raise TemplateSyntaxError('expected call', node.lineno,
192                                       self.filename)
193         node.body = self.parse_statements(('endcall',), drop_needle=True)
194         return node
195
196     def parse_filter_block(self):
197         node = nodes.FilterBlock(lineno=self.stream.expect('filter').lineno)
198         node.filter = self.parse_filter(None, start_inline=True)
199         node.body = self.parse_statements(('endfilter',), drop_needle=True)
200         return node
201
202     def parse_macro(self):
203         node = nodes.Macro(lineno=self.stream.expect('macro').lineno)
204         node.name = self.stream.expect('name').value
205         # make sure that assignments to that name are allowed
206         if not nodes.Name(node.name, 'store').can_assign():
207             raise TemplateSyntaxError('can\'t assign macro to %r' %
208                                       node.target, node.lineno,
209                                       self.filename)
210         self.parse_signature(node)
211         node.body = self.parse_statements(('endmacro',), drop_needle=True)
212         return node
213
214     def parse_print(self):
215         node = nodes.Output(lineno=self.stream.expect('print').lineno)
216         node.nodes = []
217         while self.stream.current.type not in statement_end_tokens:
218             if node.nodes:
219                 self.stream.expect('comma')
220             node.nodes.append(self.parse_expression())
221         return node
222
223     def parse_expression(self, no_condexpr=False):
224         """Parse an expression."""
225         if no_condexpr:
226             return self.parse_or()
227         return self.parse_condexpr()
228
229     def parse_condexpr(self):
230         lineno = self.stream.current.lineno
231         expr1 = self.parse_or()
232         while self.stream.current.type is 'if':
233             self.stream.next()
234             expr2 = self.parse_or()
235             self.stream.expect('else')
236             expr3 = self.parse_condexpr()
237             expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno)
238             lineno = self.stream.current.lineno
239         return expr1
240
241     def parse_or(self):
242         lineno = self.stream.current.lineno
243         left = self.parse_and()
244         while self.stream.current.type is 'or':
245             self.stream.next()
246             right = self.parse_and()
247             left = nodes.Or(left, right, lineno=lineno)
248             lineno = self.stream.current.lineno
249         return left
250
251     def parse_and(self):
252         lineno = self.stream.current.lineno
253         left = self.parse_compare()
254         while self.stream.current.type is 'and':
255             self.stream.next()
256             right = self.parse_compare()
257             left = nodes.And(left, right, lineno=lineno)
258             lineno = self.stream.current.lineno
259         return left
260
261     def parse_compare(self):
262         lineno = self.stream.current.lineno
263         expr = self.parse_add()
264         ops = []
265         while 1:
266             token_type = self.stream.current.type
267             if token_type in _compare_operators:
268                 self.stream.next()
269                 ops.append(nodes.Operand(token_type, self.parse_add()))
270             elif token_type is 'not' and self.stream.look().type is 'in':
271                 self.stream.skip(2)
272                 ops.append(nodes.Operand('notin', self.parse_add()))
273             else:
274                 break
275             lineno = self.stream.current.lineno
276         if not ops:
277             return expr
278         return nodes.Compare(expr, ops, lineno=lineno)
279
280     def parse_add(self):
281         lineno = self.stream.current.lineno
282         left = self.parse_sub()
283         while self.stream.current.type is 'add':
284             self.stream.next()
285             right = self.parse_sub()
286             left = nodes.Add(left, right, lineno=lineno)
287             lineno = self.stream.current.lineno
288         return left
289
290     def parse_sub(self):
291         lineno = self.stream.current.lineno
292         left = self.parse_concat()
293         while self.stream.current.type is 'sub':
294             self.stream.next()
295             right = self.parse_concat()
296             left = nodes.Sub(left, right, lineno=lineno)
297             lineno = self.stream.current.lineno
298         return left
299
300     def parse_concat(self):
301         lineno = self.stream.current.lineno
302         args = [self.parse_mul()]
303         while self.stream.current.type is 'tilde':
304             self.stream.next()
305             args.append(self.parse_mul())
306         if len(args) == 1:
307             return args[0]
308         return nodes.Concat(args, lineno=lineno)
309
310     def parse_mul(self):
311         lineno = self.stream.current.lineno
312         left = self.parse_div()
313         while self.stream.current.type is 'mul':
314             self.stream.next()
315             right = self.parse_div()
316             left = nodes.Mul(left, right, lineno=lineno)
317             lineno = self.stream.current.lineno
318         return left
319
320     def parse_div(self):
321         lineno = self.stream.current.lineno
322         left = self.parse_floordiv()
323         while self.stream.current.type is 'div':
324             self.stream.next()
325             right = self.parse_floordiv()
326             left = nodes.Div(left, right, lineno=lineno)
327             lineno = self.stream.current.lineno
328         return left
329
330     def parse_floordiv(self):
331         lineno = self.stream.current.lineno
332         left = self.parse_mod()
333         while self.stream.current.type is 'floordiv':
334             self.stream.next()
335             right = self.parse_mod()
336             left = nodes.FloorDiv(left, right, lineno=lineno)
337             lineno = self.stream.current.lineno
338         return left
339
340     def parse_mod(self):
341         lineno = self.stream.current.lineno
342         left = self.parse_pow()
343         while self.stream.current.type is 'mod':
344             self.stream.next()
345             right = self.parse_pow()
346             left = nodes.Mod(left, right, lineno=lineno)
347             lineno = self.stream.current.lineno
348         return left
349
350     def parse_pow(self):
351         lineno = self.stream.current.lineno
352         left = self.parse_unary()
353         while self.stream.current.type is 'pow':
354             self.stream.next()
355             right = self.parse_unary()
356             left = nodes.Pow(left, right, lineno=lineno)
357             lineno = self.stream.current.lineno
358         return left
359
360     def parse_unary(self):
361         token_type = self.stream.current.type
362         lineno = self.stream.current.lineno
363         if token_type is 'not':
364             self.stream.next()
365             node = self.parse_unary()
366             return nodes.Not(node, lineno=lineno)
367         if token_type is 'sub':
368             self.stream.next()
369             node = self.parse_unary()
370             return nodes.Neg(node, lineno=lineno)
371         if token_type is 'add':
372             self.stream.next()
373             node = self.parse_unary()
374             return nodes.Pos(node, lineno=lineno)
375         return self.parse_primary()
376
377     def parse_primary(self, parse_postfix=True):
378         token = self.stream.current
379         if token.type is 'name':
380             if token.value in ('true', 'false'):
381                 node = nodes.Const(token.value == 'true', lineno=token.lineno)
382             elif token.value == 'none':
383                 node = nodes.Const(None, lineno=token.lineno)
384             else:
385                 node = nodes.Name(token.value, 'load', lineno=token.lineno)
386             self.stream.next()
387         elif token.type in ('integer', 'float', 'string'):
388             self.stream.next()
389             node = nodes.Const(token.value, lineno=token.lineno)
390         elif token.type is 'lparen':
391             self.stream.next()
392             node = self.parse_tuple()
393             self.stream.expect('rparen')
394         elif token.type is 'lbracket':
395             node = self.parse_list()
396         elif token.type is 'lbrace':
397             node = self.parse_dict()
398         else:
399             raise TemplateSyntaxError("unexpected token '%s'" %
400                                       (token,), token.lineno,
401                                       self.filename)
402         if parse_postfix:
403             node = self.parse_postfix(node)
404         return node
405
406     def parse_tuple(self, enforce=False, simplified=False, no_condexpr=False):
407         """
408         Parse multiple expressions into a tuple. This can also return
409         just one expression which is not a tuple. If you want to enforce
410         a tuple, pass it enforce=True (currently unused).
411         """
412         lineno = self.stream.current.lineno
413         if simplified:
414             parse = self.parse_primary
415         elif no_condexpr:
416             parse = lambda: self.parse_expression(no_condexpr=True)
417         else:
418             parse = self.parse_expression
419         args = []
420         is_tuple = False
421         while 1:
422             if args:
423                 self.stream.expect('comma')
424             if self.stream.current.type in statement_end_tokens:
425                 break
426             args.append(parse())
427             if self.stream.current.type is not 'comma':
428                 break
429             is_tuple = True
430             lineno = self.stream.current.lineno
431         if not is_tuple and args:
432             if enforce:
433                 raise TemplateSyntaxError('tuple expected', lineno,
434                                           self.filename)
435             return args[0]
436         return nodes.Tuple(args, 'load', lineno=lineno)
437
438     def parse_list(self):
439         token = self.stream.expect('lbracket')
440         items = []
441         while self.stream.current.type is not 'rbracket':
442             if items:
443                 self.stream.expect('comma')
444             if self.stream.current.type == 'rbracket':
445                 break
446             items.append(self.parse_expression())
447         self.stream.expect('rbracket')
448         return nodes.List(items, lineno=token.lineno)
449
450     def parse_dict(self):
451         token = self.stream.expect('lbrace')
452         items = []
453         while self.stream.current.type is not 'rbrace':
454             if items:
455                 self.stream.expect('comma')
456             if self.stream.current.type == 'rbrace':
457                 break
458             key = self.parse_expression()
459             self.stream.expect('colon')
460             value = self.parse_expression()
461             items.append(nodes.Pair(key, value, lineno=key.lineno))
462         self.stream.expect('rbrace')
463         return nodes.Dict(items, lineno=token.lineno)
464
465     def parse_postfix(self, node):
466         while 1:
467             token_type = self.stream.current.type
468             if token_type is 'dot' or token_type is 'lbracket':
469                 node = self.parse_subscript(node)
470             elif token_type is 'lparen':
471                 node = self.parse_call(node)
472             elif token_type is 'pipe':
473                 node = self.parse_filter(node)
474             elif token_type is 'is':
475                 node = self.parse_test(node)
476             else:
477                 break
478         return node
479
480     def parse_subscript(self, node):
481         token = self.stream.next()
482         if token.type is 'dot':
483             attr_token = self.stream.current
484             if attr_token.type not in ('name', 'integer'):
485                 raise TemplateSyntaxError('expected name or number',
486                                           attr_token.lineno, self.filename)
487             arg = nodes.Const(attr_token.value, lineno=attr_token.lineno)
488             self.stream.next()
489         elif token.type is 'lbracket':
490             args = []
491             while self.stream.current.type is not 'rbracket':
492                 if args:
493                     self.stream.expect('comma')
494                 args.append(self.parse_subscribed())
495             self.stream.expect('rbracket')
496             if len(args) == 1:
497                 arg = args[0]
498             else:
499                 arg = nodes.Tuple(args, lineno, self.filename)
500         else:
501             raise TemplateSyntaxError('expected subscript expression',
502                                       self.lineno, self.filename)
503         return nodes.Subscript(node, arg, 'load', lineno=token.lineno)
504
505     def parse_subscribed(self):
506         lineno = self.stream.current.lineno
507
508         if self.stream.current.type is 'colon':
509             self.stream.next()
510             args = [None]
511         else:
512             node = self.parse_expression()
513             if self.stream.current.type is not 'colon':
514                 return node
515             self.stream.next()
516             args = [node]
517
518         if self.stream.current.type is 'colon':
519             args.append(None)
520         elif self.stream.current.type not in ('rbracket', 'comma'):
521             args.append(self.parse_expression())
522         else:
523             args.append(None)
524
525         if self.stream.current.type is 'colon':
526             self.stream.next()
527             if self.stream.current.type not in ('rbracket', 'comma'):
528                 args.append(self.parse_expression())
529             else:
530                 args.append(None)
531         else:
532             args.append(None)
533
534         return nodes.Slice(lineno=lineno, *args)
535
536     def parse_call(self, node):
537         token = self.stream.expect('lparen')
538         args = []
539         kwargs = []
540         dyn_args = dyn_kwargs = None
541         require_comma = False
542
543         def ensure(expr):
544             if not expr:
545                 raise TemplateSyntaxError('invalid syntax for function '
546                                           'call expression', token.lineno,
547                                           self.filename)
548
549         while self.stream.current.type is not 'rparen':
550             if require_comma:
551                 self.stream.expect('comma')
552                 # support for trailing comma
553                 if self.stream.current.type is 'rparen':
554                     break
555             if self.stream.current.type is 'mul':
556                 ensure(dyn_args is None and dyn_kwargs is None)
557                 self.stream.next()
558                 dyn_args = self.parse_expression()
559             elif self.stream.current.type is 'pow':
560                 ensure(dyn_kwargs is None)
561                 self.stream.next()
562                 dyn_kwargs = self.parse_expression()
563             else:
564                 ensure(dyn_args is None and dyn_kwargs is None)
565                 if self.stream.current.type is 'name' and \
566                     self.stream.look().type is 'assign':
567                     key = self.stream.current.value
568                     self.stream.skip(2)
569                     kwargs.append(nodes.Keyword(key, self.parse_expression(),
570                                                 lineno=key.lineno))
571                 else:
572                     ensure(not kwargs)
573                     args.append(self.parse_expression())
574
575             require_comma = True
576         self.stream.expect('rparen')
577
578         if node is None:
579             return args, kwargs, dyn_args, dyn_kwargs
580         return nodes.Call(node, args, kwargs, dyn_args, dyn_kwargs,
581                           lineno=token.lineno)
582
583     def parse_filter(self, node, start_inline=False):
584         lineno = self.stream.current.type
585         while self.stream.current.type == 'pipe' or start_inline:
586             if not start_inline:
587                 self.stream.next()
588             token = self.stream.expect('name')
589             if self.stream.current.type is 'lparen':
590                 args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None)
591             else:
592                 args = []
593                 kwargs = []
594                 dyn_args = dyn_kwargs = None
595             node = nodes.Filter(node, token.value, args, kwargs, dyn_args,
596                                 dyn_kwargs, lineno=token.lineno)
597             start_inline = False
598         return node
599
600     def parse_test(self, node):
601         token = self.stream.expect('is')
602         if self.stream.current.type is 'not':
603             self.stream.next()
604             negated = True
605         else:
606             negated = False
607         name = self.stream.expect('name').value
608         dyn_args = dyn_kwargs = None
609         kwargs = []
610         if self.stream.current.type is 'lparen':
611             args, kwargs, dyn_args, dyn_kwargs = self.parse_call(None)
612         elif self.stream.current.type in ('name', 'string', 'integer',
613                                           'float', 'lparen', 'lbracket',
614                                           'lbrace', 'regex'):
615             args = [self.parse_expression()]
616         else:
617             args = []
618         node = nodes.Test(node, name, args, kwargs, dyn_args,
619                           dyn_kwargs, lineno=token.lineno)
620         if negated:
621             node = nodes.Not(node, lineno=token.lineno)
622         return node
623
624     def subparse(self, end_tokens=None):
625         body = []
626         data_buffer = []
627         add_data = data_buffer.append
628
629         def flush_data():
630             if data_buffer:
631                 lineno = data_buffer[0].lineno
632                 body.append(nodes.Output(data_buffer[:], lineno=lineno))
633                 del data_buffer[:]
634
635         while self.stream:
636             token = self.stream.current
637             if token.type is 'data':
638                 if token.value:
639                     add_data(nodes.Const(token.value, lineno=token.lineno))
640                 self.stream.next()
641             elif token.type is 'variable_begin':
642                 self.stream.next()
643                 want_comma = False
644                 while not self.stream.current.test_many(statement_end_tokens):
645                     if want_comma:
646                         self.stream.expect('comma')
647                     add_data(self.parse_expression())
648                     want_comma = True
649                 self.stream.expect('variable_end')
650             elif token.type is 'block_begin':
651                 flush_data()
652                 self.stream.next()
653                 if end_tokens is not None and \
654                    self.stream.current.test_many(end_tokens):
655                     return body
656                 while self.stream.current.type is not 'block_end':
657                     body.append(self.parse_statement())
658                 self.stream.expect('block_end')
659             else:
660                 raise AssertionError('internal parsing error')
661
662         flush_data()
663         return body
664
665     def parse(self):
666         """Parse the whole template into a `Template` node."""
667         result = nodes.Template(self.subparse(), lineno=1)
668         result.set_environment(self.environment)
669         return result