jinja2/ext.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.ext
   4     ~~~~~~~~~~
   5
   6     Jinja extensions allow to add custom tags similar to the way django custom
   7     tags work.  By default two example extensions exist: an i18n and a cache
   8     extension.
   9
  10     :copyright: (c) 2010 by the Jinja Team.
  11     :license: BSD.
  12 """
  13 from collections import deque
  14 from jinja2 import nodes
  15 from jinja2.defaults import *
  16 from jinja2.environment import Environment
  17 from jinja2.runtime import Undefined, concat
  18 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
  19 from jinja2.utils import contextfunction, import_string, Markup, next
  20
  21
  22 # the only real useful gettext functions for a Jinja template.  Note
  23 # that ugettext must be assigned to gettext as Jinja doesn't support
  24 # non unicode strings.
  25 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
  26
  27
  28 class ExtensionRegistry(type):
  29     """Gives the extension an unique identifier."""
  30
  31     def __new__(cls, name, bases, d):
  32         rv = type.__new__(cls, name, bases, d)
  33         rv.identifier = rv.__module__ + '.' + rv.__name__
  34         return rv
  35
  36
  37 class Extension(object):
  38     """Extensions can be used to add extra functionality to the Jinja template
  39     system at the parser level.  Custom extensions are bound to an environment
  40     but may not store environment specific data on `self`.  The reason for
  41     this is that an extension can be bound to another environment (for
  42     overlays) by creating a copy and reassigning the `environment` attribute.
  43
  44     As extensions are created by the environment they cannot accept any
  45     arguments for configuration.  One may want to work around that by using
  46     a factory function, but that is not possible as extensions are identified
  47     by their import name.  The correct way to configure the extension is
  48     storing the configuration values on the environment.  Because this way the
  49     environment ends up acting as central configuration storage the
  50     attributes may clash which is why extensions have to ensure that the names
  51     they choose for configuration are not too generic.  ``prefix`` for example
  52     is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
  53     name as includes the name of the extension (fragment cache).
  54     """
  55     __metaclass__ = ExtensionRegistry
  56
  57     #: if this extension parses this is the list of tags it's listening to.
  58     tags = set()
  59
  60     #: the priority of that extension.  This is especially useful for
  61     #: extensions that preprocess values.  A lower value means higher
  62     #: priority.
  63     #:
  64     #: .. versionadded:: 2.4
  65     priority = 100
  66
  67     def __init__(self, environment):
  68         self.environment = environment
  69
  70     def bind(self, environment):
  71         """Create a copy of this extension bound to another environment."""
  72         rv = object.__new__(self.__class__)
  73         rv.__dict__.update(self.__dict__)
  74         rv.environment = environment
  75         return rv
  76
  77     def preprocess(self, source, name, filename=None):
  78         """This method is called before the actual lexing and can be used to
  79         preprocess the source.  The `filename` is optional.  The return value
  80         must be the preprocessed source.
  81         """
  82         return source
  83
  84     def filter_stream(self, stream):
  85         """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
  86         to filter tokens returned.  This method has to return an iterable of
  87         :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
  88         :class:`~jinja2.lexer.TokenStream`.
  89
  90         In the `ext` folder of the Jinja2 source distribution there is a file
  91         called `inlinegettext.py` which implements a filter that utilizes this
  92         method.
  93         """
  94         return stream
  95
  96     def parse(self, parser):
  97         """If any of the :attr:`tags` matched this method is called with the
  98         parser as first argument.  The token the parser stream is pointing at
  99         is the name token that matched.  This method has to return one or a
 100         list of multiple nodes.
 101         """
 102         raise NotImplementedError()
 103
 104     def attr(self, name, lineno=None):
 105         """Return an attribute node for the current extension.  This is useful
 106         to pass constants on extensions to generated template code.
 107
 108         ::
 109
 110             self.attr('_my_attribute', lineno=lineno)
 111         """
 112         return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
 113
 114     def call_method(self, name, args=None, kwargs=None, dyn_args=None,
 115                     dyn_kwargs=None, lineno=None):
 116         """Call a method of the extension.  This is a shortcut for
 117         :meth:`attr` + :class:`jinja2.nodes.Call`.
 118         """
 119         if args is None:
 120             args = []
 121         if kwargs is None:
 122             kwargs = []
 123         return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
 124                           dyn_args, dyn_kwargs, lineno=lineno)
 125
 126
 127 @contextfunction
 128 def _gettext_alias(__context, *args, **kwargs):
 129     return __context.call(__context.resolve('gettext'), *args, **kwargs)
 130
 131
 132 def _make_new_gettext(func):
 133     @contextfunction
 134     def gettext(__context, __string, **variables):
 135         rv = __context.call(func, __string)
 136         if __context.eval_ctx.autoescape:
 137             rv = Markup(rv)
 138         return rv % variables
 139     return gettext
 140
 141
 142 def _make_new_ngettext(func):
 143     @contextfunction
 144     def ngettext(__context, __singular, __plural, __num, **variables):
 145         variables.setdefault('num', __num)
 146         rv = __context.call(func, __singular, __plural, __num)
 147         if __context.eval_ctx.autoescape:
 148             rv = Markup(rv)
 149         return rv % variables
 150     return ngettext
 151
 152
 153 class InternationalizationExtension(Extension):
 154     """This extension adds gettext support to Jinja2."""
 155     tags = set(['trans'])
 156
 157     # TODO: the i18n extension is currently reevaluating values in a few
 158     # situations.  Take this example:
 159     #   {% trans count=something() %}{{ count }} foo{% pluralize
 160     #     %}{{ count }} fooss{% endtrans %}
 161     # something is called twice here.  One time for the gettext value and
 162     # the other time for the n-parameter of the ngettext function.
 163
 164     def __init__(self, environment):
 165         Extension.__init__(self, environment)
 166         environment.globals['_'] = _gettext_alias
 167         environment.extend(
 168             install_gettext_translations=self._install,
 169             install_null_translations=self._install_null,
 170             install_gettext_callables=self._install_callables,
 171             uninstall_gettext_translations=self._uninstall,
 172             extract_translations=self._extract,
 173             newstyle_gettext=False
 174         )
 175
 176     def _install(self, translations, newstyle=None):
 177         gettext = getattr(translations, 'ugettext', None)
 178         if gettext is None:
 179             gettext = translations.gettext
 180         ngettext = getattr(translations, 'ungettext', None)
 181         if ngettext is None:
 182             ngettext = translations.ngettext
 183         self._install_callables(gettext, ngettext, newstyle)
 184
 185     def _install_null(self, newstyle=None):
 186         self._install_callables(
 187             lambda x: x,
 188             lambda s, p, n: (n != 1 and (p,) or (s,))[0],
 189             newstyle
 190         )
 191
 192     def _install_callables(self, gettext, ngettext, newstyle=None):
 193         if newstyle is not None:
 194             self.environment.newstyle_gettext = newstyle
 195         if self.environment.newstyle_gettext:
 196             gettext = _make_new_gettext(gettext)
 197             ngettext = _make_new_ngettext(ngettext)
 198         self.environment.globals.update(
 199             gettext=gettext,
 200             ngettext=ngettext
 201         )
 202
 203     def _uninstall(self, translations):
 204         for key in 'gettext', 'ngettext':
 205             self.environment.globals.pop(key, None)
 206
 207     def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
 208         if isinstance(source, basestring):
 209             source = self.environment.parse(source)
 210         return extract_from_ast(source, gettext_functions)
 211
 212     def parse(self, parser):
 213         """Parse a translatable tag."""
 214         lineno = next(parser.stream).lineno
 215         num_called_num = False
 216
 217         # find all the variables referenced.  Additionally a variable can be
 218         # defined in the body of the trans block too, but this is checked at
 219         # a later state.
 220         plural_expr = None
 221         variables = {}
 222         while parser.stream.current.type != 'block_end':
 223             if variables:
 224                 parser.stream.expect('comma')
 225
 226             # skip colon for python compatibility
 227             if parser.stream.skip_if('colon'):
 228                 break
 229
 230             name = parser.stream.expect('name')
 231             if name.value in variables:
 232                 parser.fail('translatable variable %r defined twice.' %
 233                             name.value, name.lineno,
 234                             exc=TemplateAssertionError)
 235
 236             # expressions
 237             if parser.stream.current.type == 'assign':
 238                 next(parser.stream)
 239                 variables[name.value] = var = parser.parse_expression()
 240             else:
 241                 variables[name.value] = var = nodes.Name(name.value, 'load')
 242
 243             if plural_expr is None:
 244                 plural_expr = var
 245                 num_called_num = name.value == 'num'
 246
 247         parser.stream.expect('block_end')
 248
 249         plural = plural_names = None
 250         have_plural = False
 251         referenced = set()
 252
 253         # now parse until endtrans or pluralize
 254         singular_names, singular = self._parse_block(parser, True)
 255         if singular_names:
 256             referenced.update(singular_names)
 257             if plural_expr is None:
 258                 plural_expr = nodes.Name(singular_names[0], 'load')
 259                 num_called_num = singular_names[0] == 'num'
 260
 261         # if we have a pluralize block, we parse that too
 262         if parser.stream.current.test('name:pluralize'):
 263             have_plural = True
 264             next(parser.stream)
 265             if parser.stream.current.type != 'block_end':
 266                 name = parser.stream.expect('name')
 267                 if name.value not in variables:
 268                     parser.fail('unknown variable %r for pluralization' %
 269                                 name.value, name.lineno,
 270                                 exc=TemplateAssertionError)
 271                 plural_expr = variables[name.value]
 272                 num_called_num = name.value == 'num'
 273             parser.stream.expect('block_end')
 274             plural_names, plural = self._parse_block(parser, False)
 275             next(parser.stream)
 276             referenced.update(plural_names)
 277         else:
 278             next(parser.stream)
 279
 280         # register free names as simple name expressions
 281         for var in referenced:
 282             if var not in variables:
 283                 variables[var] = nodes.Name(var, 'load')
 284
 285         if not have_plural:
 286             plural_expr = None
 287         elif plural_expr is None:
 288             parser.fail('pluralize without variables', lineno)
 289
 290         node = self._make_node(singular, plural, variables, plural_expr,
 291                                bool(referenced),
 292                                num_called_num and have_plural)
 293         node.set_lineno(lineno)
 294         return node
 295
 296     def _parse_block(self, parser, allow_pluralize):
 297         """Parse until the next block tag with a given name."""
 298         referenced = []
 299         buf = []
 300         while 1:
 301             if parser.stream.current.type == 'data':
 302                 buf.append(parser.stream.current.value.replace('%', '%%'))
 303                 next(parser.stream)
 304             elif parser.stream.current.type == 'variable_begin':
 305                 next(parser.stream)
 306                 name = parser.stream.expect('name').value
 307                 referenced.append(name)
 308                 buf.append('%%(%s)s' % name)
 309                 parser.stream.expect('variable_end')
 310             elif parser.stream.current.type == 'block_begin':
 311                 next(parser.stream)
 312                 if parser.stream.current.test('name:endtrans'):
 313                     break
 314                 elif parser.stream.current.test('name:pluralize'):
 315                     if allow_pluralize:
 316                         break
 317                     parser.fail('a translatable section can have only one '
 318                                 'pluralize section')
 319                 parser.fail('control structures in translatable sections are '
 320                             'not allowed')
 321             elif parser.stream.eos:
 322                 parser.fail('unclosed translation block')
 323             else:
 324                 assert False, 'internal parser error'
 325
 326         return referenced, concat(buf)
 327
 328     def _make_node(self, singular, plural, variables, plural_expr,
 329                    vars_referenced, num_called_num):
 330         """Generates a useful node from the data provided."""
 331         # no variables referenced?  no need to escape for old style
 332         # gettext invocations only if there are vars.
 333         if not vars_referenced and not self.environment.newstyle_gettext:
 334             singular = singular.replace('%%', '%')
 335             if plural:
 336                 plural = plural.replace('%%', '%')
 337
 338         # singular only:
 339         if plural_expr is None:
 340             gettext = nodes.Name('gettext', 'load')
 341             node = nodes.Call(gettext, [nodes.Const(singular)],
 342                               [], None, None)
 343
 344         # singular and plural
 345         else:
 346             ngettext = nodes.Name('ngettext', 'load')
 347             node = nodes.Call(ngettext, [
 348                 nodes.Const(singular),
 349                 nodes.Const(plural),
 350                 plural_expr
 351             ], [], None, None)
 352
 353         # in case newstyle gettext is used, the method is powerful
 354         # enough to handle the variable expansion and autoescape
 355         # handling itself
 356         if self.environment.newstyle_gettext:
 357             for key, value in variables.iteritems():
 358                 # the function adds that later anyways in case num was
 359                 # called num, so just skip it.
 360                 if num_called_num and key == 'num':
 361                     continue
 362                 node.kwargs.append(nodes.Keyword(key, value))
 363
 364         # otherwise do that here
 365         else:
 366             # mark the return value as safe if we are in an
 367             # environment with autoescaping turned on
 368             node = nodes.MarkSafeIfAutoescape(node)
 369             if variables:
 370                 node = nodes.Mod(node, nodes.Dict([
 371                     nodes.Pair(nodes.Const(key), value)
 372                     for key, value in variables.items()
 373                 ]))
 374         return nodes.Output([node])
 375
 376
 377 class ExprStmtExtension(Extension):
 378     """Adds a `do` tag to Jinja2 that works like the print statement just
 379     that it doesn't print the return value.
 380     """
 381     tags = set(['do'])
 382
 383     def parse(self, parser):
 384         node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
 385         node.node = parser.parse_tuple()
 386         return node
 387
 388
 389 class LoopControlExtension(Extension):
 390     """Adds break and continue to the template engine."""
 391     tags = set(['break', 'continue'])
 392
 393     def parse(self, parser):
 394         token = next(parser.stream)
 395         if token.value == 'break':
 396             return nodes.Break(lineno=token.lineno)
 397         return nodes.Continue(lineno=token.lineno)
 398
 399
 400 class WithExtension(Extension):
 401     """Adds support for a django-like with block."""
 402     tags = set(['with'])
 403
 404     def parse(self, parser):
 405         node = nodes.Scope(lineno=next(parser.stream).lineno)
 406         assignments = []
 407         while parser.stream.current.type != 'block_end':
 408             lineno = parser.stream.current.lineno
 409             if assignments:
 410                 parser.stream.expect('comma')
 411             target = parser.parse_assign_target()
 412             parser.stream.expect('assign')
 413             expr = parser.parse_expression()
 414             assignments.append(nodes.Assign(target, expr, lineno=lineno))
 415         node.body = assignments + \
 416             list(parser.parse_statements(('name:endwith',),
 417                                          drop_needle=True))
 418         return node
 419
 420
 421 class AutoEscapeExtension(Extension):
 422     """Changes auto escape rules for a scope."""
 423     tags = set(['autoescape'])
 424
 425     def parse(self, parser):
 426         node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno)
 427         node.options = [
 428             nodes.Keyword('autoescape', parser.parse_expression())
 429         ]
 430         node.body = parser.parse_statements(('name:endautoescape',),
 431                                             drop_needle=True)
 432         return nodes.Scope([node])
 433
 434
 435 def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
 436                      babel_style=True):
 437     """Extract localizable strings from the given template node.  Per
 438     default this function returns matches in babel style that means non string
 439     parameters as well as keyword arguments are returned as `None`.  This
 440     allows Babel to figure out what you really meant if you are using
 441     gettext functions that allow keyword arguments for placeholder expansion.
 442     If you don't want that behavior set the `babel_style` parameter to `False`
 443     which causes only strings to be returned and parameters are always stored
 444     in tuples.  As a consequence invalid gettext calls (calls without a single
 445     string parameter or string parameters after non-string parameters) are
 446     skipped.
 447
 448     This example explains the behavior:
 449
 450     >>> from jinja2 import Environment
 451     >>> env = Environment()
 452     >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
 453     >>> list(extract_from_ast(node))
 454     [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
 455     >>> list(extract_from_ast(node, babel_style=False))
 456     [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
 457
 458     For every string found this function yields a ``(lineno, function,
 459     message)`` tuple, where:
 460
 461     * ``lineno`` is the number of the line on which the string was found,
 462     * ``function`` is the name of the ``gettext`` function used (if the
 463       string was extracted from embedded Python code), and
 464     *  ``message`` is the string itself (a ``unicode`` object, or a tuple
 465        of ``unicode`` objects for functions with multiple string arguments).
 466
 467     This extraction function operates on the AST and is because of that unable
 468     to extract any comments.  For comment support you have to use the babel
 469     extraction interface or extract comments yourself.
 470     """
 471     for node in node.find_all(nodes.Call):
 472         if not isinstance(node.node, nodes.Name) or \
 473            node.node.name not in gettext_functions:
 474             continue
 475
 476         strings = []
 477         for arg in node.args:
 478             if isinstance(arg, nodes.Const) and \
 479                isinstance(arg.value, basestring):
 480                 strings.append(arg.value)
 481             else:
 482                 strings.append(None)
 483
 484         for arg in node.kwargs:
 485             strings.append(None)
 486         if node.dyn_args is not None:
 487             strings.append(None)
 488         if node.dyn_kwargs is not None:
 489             strings.append(None)
 490
 491         if not babel_style:
 492             strings = tuple(x for x in strings if x is not None)
 493             if not strings:
 494                 continue
 495         else:
 496             if len(strings) == 1:
 497                 strings = strings[0]
 498             else:
 499                 strings = tuple(strings)
 500         yield node.lineno, node.node.name, strings
 501
 502
 503 class _CommentFinder(object):
 504     """Helper class to find comments in a token stream.  Can only
 505     find comments for gettext calls forwards.  Once the comment
 506     from line 4 is found, a comment for line 1 will not return a
 507     usable value.
 508     """
 509
 510     def __init__(self, tokens, comment_tags):
 511         self.tokens = tokens
 512         self.comment_tags = comment_tags
 513         self.offset = 0
 514         self.last_lineno = 0
 515
 516     def find_backwards(self, offset):
 517         try:
 518             for _, token_type, token_value in \
 519                     reversed(self.tokens[self.offset:offset]):
 520                 if token_type in ('comment', 'linecomment'):
 521                     try:
 522                         prefix, comment = token_value.split(None, 1)
 523                     except ValueError:
 524                         continue
 525                     if prefix in self.comment_tags:
 526                         return [comment.rstrip()]
 527             return []
 528         finally:
 529             self.offset = offset
 530
 531     def find_comments(self, lineno):
 532         if not self.comment_tags or self.last_lineno > lineno:
 533             return []
 534         for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
 535             if token_lineno > lineno:
 536                 return self.find_backwards(self.offset + idx)
 537         return self.find_backwards(len(self.tokens))
 538
 539
 540 def babel_extract(fileobj, keywords, comment_tags, options):
 541     """Babel extraction method for Jinja templates.
 542
 543     .. versionchanged:: 2.3
 544        Basic support for translation comments was added.  If `comment_tags`
 545        is now set to a list of keywords for extraction, the extractor will
 546        try to find the best preceeding comment that begins with one of the
 547        keywords.  For best results, make sure to not have more than one
 548        gettext call in one line of code and the matching comment in the
 549        same line or the line before.
 550
 551     .. versionchanged:: 2.5.1
 552        The `newstyle_gettext` flag can be set to `True` to enable newstyle
 553        gettext calls.
 554
 555     .. versionchanged:: 2.7
 556        A `silent` option can now be provided.  If set to `False` template
 557        syntax errors are propagated instead of being ignored.
 558
 559     :param fileobj: the file-like object the messages should be extracted from
 560     :param keywords: a list of keywords (i.e. function names) that should be
 561                      recognized as translation functions
 562     :param comment_tags: a list of translator tags to search for and include
 563                          in the results.
 564     :param options: a dictionary of additional options (optional)
 565     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
 566              (comments will be empty currently)
 567     """
 568     extensions = set()
 569     for extension in options.get('extensions', '').split(','):
 570         extension = extension.strip()
 571         if not extension:
 572             continue
 573         extensions.add(import_string(extension))
 574     if InternationalizationExtension not in extensions:
 575         extensions.add(InternationalizationExtension)
 576
 577     def getbool(options, key, default=False):
 578         return options.get(key, str(default)).lower() in \
 579             ('1', 'on', 'yes', 'true')
 580
 581     silent = getbool(options, 'silent', True)
 582     environment = Environment(
 583         options.get('block_start_string', BLOCK_START_STRING),
 584         options.get('block_end_string', BLOCK_END_STRING),
 585         options.get('variable_start_string', VARIABLE_START_STRING),
 586         options.get('variable_end_string', VARIABLE_END_STRING),
 587         options.get('comment_start_string', COMMENT_START_STRING),
 588         options.get('comment_end_string', COMMENT_END_STRING),
 589         options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
 590         options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
 591         getbool(options, 'trim_blocks', TRIM_BLOCKS),
 592         NEWLINE_SEQUENCE, frozenset(extensions),
 593         cache_size=0,
 594         auto_reload=False
 595     )
 596
 597     if getbool(options, 'newstyle_gettext'):
 598         environment.newstyle_gettext = True
 599
 600     source = fileobj.read().decode(options.get('encoding', 'utf-8'))
 601     try:
 602         node = environment.parse(source)
 603         tokens = list(environment.lex(environment.preprocess(source)))
 604     except TemplateSyntaxError, e:
 605         if not silent:
 606             raise
 607         # skip templates with syntax errors
 608         return
 609
 610     finder = _CommentFinder(tokens, comment_tags)
 611     for lineno, func, message in extract_from_ast(node, keywords):
 612         yield lineno, func, message, finder.find_comments(lineno)
 613
 614
 615 #: nicer import names
 616 i18n = InternationalizationExtension
 617 do = ExprStmtExtension
 618 loopcontrols = LoopControlExtension
 619 with_ = WithExtension
 620 autoescape = AutoEscapeExtension