jinja2/ext.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.ext
   4     ~~~~~~~~~~
   5
   6     Jinja extensions allow to add custom tags similar to the way django custom
   7     tags work.  By default two example extensions exist: an i18n and a cache
   8     extension.
   9
  10     :copyright: (c) 2010 by the Jinja Team.
  11     :license: BSD.
  12 """
  13 from collections import deque
  14 from jinja2 import nodes
  15 from jinja2.defaults import *
  16 from jinja2.environment import Environment
  17 from jinja2.runtime import Undefined, concat
  18 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
  19 from jinja2.utils import contextfunction, import_string, Markup, next
  20
  21
  22 # the only real useful gettext functions for a Jinja template.  Note
  23 # that ugettext must be assigned to gettext as Jinja doesn't support
  24 # non unicode strings.
  25 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
  26
  27
  28 class ExtensionRegistry(type):
  29     """Gives the extension an unique identifier."""
  30
  31     def __new__(cls, name, bases, d):
  32         rv = type.__new__(cls, name, bases, d)
  33         rv.identifier = rv.__module__ + '.' + rv.__name__
  34         return rv
  35
  36
  37 class Extension(object):
  38     """Extensions can be used to add extra functionality to the Jinja template
  39     system at the parser level.  Custom extensions are bound to an environment
  40     but may not store environment specific data on `self`.  The reason for
  41     this is that an extension can be bound to another environment (for
  42     overlays) by creating a copy and reassigning the `environment` attribute.
  43
  44     As extensions are created by the environment they cannot accept any
  45     arguments for configuration.  One may want to work around that by using
  46     a factory function, but that is not possible as extensions are identified
  47     by their import name.  The correct way to configure the extension is
  48     storing the configuration values on the environment.  Because this way the
  49     environment ends up acting as central configuration storage the
  50     attributes may clash which is why extensions have to ensure that the names
  51     they choose for configuration are not too generic.  ``prefix`` for example
  52     is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
  53     name as includes the name of the extension (fragment cache).
  54     """
  55     __metaclass__ = ExtensionRegistry
  56
  57     #: if this extension parses this is the list of tags it's listening to.
  58     tags = set()
  59
  60     #: the priority of that extension.  This is especially useful for
  61     #: extensions that preprocess values.  A lower value means higher
  62     #: priority.
  63     #:
  64     #: .. versionadded:: 2.4
  65     priority = 100
  66
  67     def __init__(self, environment):
  68         self.environment = environment
  69
  70     def bind(self, environment):
  71         """Create a copy of this extension bound to another environment."""
  72         rv = object.__new__(self.__class__)
  73         rv.__dict__.update(self.__dict__)
  74         rv.environment = environment
  75         return rv
  76
  77     def preprocess(self, source, name, filename=None):
  78         """This method is called before the actual lexing and can be used to
  79         preprocess the source.  The `filename` is optional.  The return value
  80         must be the preprocessed source.
  81         """
  82         return source
  83
  84     def filter_stream(self, stream):
  85         """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
  86         to filter tokens returned.  This method has to return an iterable of
  87         :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
  88         :class:`~jinja2.lexer.TokenStream`.
  89
  90         In the `ext` folder of the Jinja2 source distribution there is a file
  91         called `inlinegettext.py` which implements a filter that utilizes this
  92         method.
  93         """
  94         return stream
  95
  96     def parse(self, parser):
  97         """If any of the :attr:`tags` matched this method is called with the
  98         parser as first argument.  The token the parser stream is pointing at
  99         is the name token that matched.  This method has to return one or a
 100         list of multiple nodes.
 101         """
 102         raise NotImplementedError()
 103
 104     def attr(self, name, lineno=None):
 105         """Return an attribute node for the current extension.  This is useful
 106         to pass constants on extensions to generated template code::
 107
 108             self.attr('_my_attribute', lineno=lineno)
 109         """
 110         return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
 111
 112     def call_method(self, name, args=None, kwargs=None, dyn_args=None,
 113                     dyn_kwargs=None, lineno=None):
 114         """Call a method of the extension.  This is a shortcut for
 115         :meth:`attr` + :class:`jinja2.nodes.Call`.
 116         """
 117         if args is None:
 118             args = []
 119         if kwargs is None:
 120             kwargs = []
 121         return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
 122                           dyn_args, dyn_kwargs, lineno=lineno)
 123
 124
 125 @contextfunction
 126 def _gettext_alias(__context, *args, **kwargs):
 127     return __context.call(__context.resolve('gettext'), *args, **kwargs)
 128
 129
 130 def _make_new_gettext(func):
 131     @contextfunction
 132     def gettext(__context, __string, **variables):
 133         rv = __context.call(func, __string)
 134         if __context.eval_ctx.autoescape:
 135             rv = Markup(rv)
 136         return rv % variables
 137     return gettext
 138
 139
 140 def _make_new_ngettext(func):
 141     @contextfunction
 142     def ngettext(__context, __singular, __plural, __num, **variables):
 143         variables.setdefault('num', __num)
 144         rv = __context.call(func, __singular, __plural, __num)
 145         if __context.eval_ctx.autoescape:
 146             rv = Markup(rv)
 147         return rv % variables
 148     return ngettext
 149
 150
 151 class InternationalizationExtension(Extension):
 152     """This extension adds gettext support to Jinja2."""
 153     tags = set(['trans'])
 154
 155     # TODO: the i18n extension is currently reevaluating values in a few
 156     # situations.  Take this example:
 157     #   {% trans count=something() %}{{ count }} foo{% pluralize
 158     #     %}{{ count }} fooss{% endtrans %}
 159     # something is called twice here.  One time for the gettext value and
 160     # the other time for the n-parameter of the ngettext function.
 161
 162     def __init__(self, environment):
 163         Extension.__init__(self, environment)
 164         environment.globals['_'] = _gettext_alias
 165         environment.extend(
 166             install_gettext_translations=self._install,
 167             install_null_translations=self._install_null,
 168             install_gettext_callables=self._install_callables,
 169             uninstall_gettext_translations=self._uninstall,
 170             extract_translations=self._extract,
 171             newstyle_gettext=False
 172         )
 173
 174     def _install(self, translations, newstyle=None):
 175         gettext = getattr(translations, 'ugettext', None)
 176         if gettext is None:
 177             gettext = translations.gettext
 178         ngettext = getattr(translations, 'ungettext', None)
 179         if ngettext is None:
 180             ngettext = translations.ngettext
 181         self._install_callables(gettext, ngettext, newstyle)
 182
 183     def _install_null(self, newstyle=None):
 184         self._install_callables(
 185             lambda x: x,
 186             lambda s, p, n: (n != 1 and (p,) or (s,))[0],
 187             newstyle
 188         )
 189
 190     def _install_callables(self, gettext, ngettext, newstyle=None):
 191         if newstyle is not None:
 192             self.environment.newstyle_gettext = newstyle
 193         if self.environment.newstyle_gettext:
 194             gettext = _make_new_gettext(gettext)
 195             ngettext = _make_new_ngettext(ngettext)
 196         self.environment.globals.update(
 197             gettext=gettext,
 198             ngettext=ngettext
 199         )
 200
 201     def _uninstall(self, translations):
 202         for key in 'gettext', 'ngettext':
 203             self.environment.globals.pop(key, None)
 204
 205     def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
 206         if isinstance(source, basestring):
 207             source = self.environment.parse(source)
 208         return extract_from_ast(source, gettext_functions)
 209
 210     def parse(self, parser):
 211         """Parse a translatable tag."""
 212         lineno = next(parser.stream).lineno
 213         num_called_num = False
 214
 215         # find all the variables referenced.  Additionally a variable can be
 216         # defined in the body of the trans block too, but this is checked at
 217         # a later state.
 218         plural_expr = None
 219         variables = {}
 220         while parser.stream.current.type != 'block_end':
 221             if variables:
 222                 parser.stream.expect('comma')
 223
 224             # skip colon for python compatibility
 225             if parser.stream.skip_if('colon'):
 226                 break
 227
 228             name = parser.stream.expect('name')
 229             if name.value in variables:
 230                 parser.fail('translatable variable %r defined twice.' %
 231                             name.value, name.lineno,
 232                             exc=TemplateAssertionError)
 233
 234             # expressions
 235             if parser.stream.current.type == 'assign':
 236                 next(parser.stream)
 237                 variables[name.value] = var = parser.parse_expression()
 238             else:
 239                 variables[name.value] = var = nodes.Name(name.value, 'load')
 240
 241             if plural_expr is None:
 242                 plural_expr = var
 243                 num_called_num = name.value == 'num'
 244
 245         parser.stream.expect('block_end')
 246
 247         plural = plural_names = None
 248         have_plural = False
 249         referenced = set()
 250
 251         # now parse until endtrans or pluralize
 252         singular_names, singular = self._parse_block(parser, True)
 253         if singular_names:
 254             referenced.update(singular_names)
 255             if plural_expr is None:
 256                 plural_expr = nodes.Name(singular_names[0], 'load')
 257                 num_called_num = singular_names[0] == 'num'
 258
 259         # if we have a pluralize block, we parse that too
 260         if parser.stream.current.test('name:pluralize'):
 261             have_plural = True
 262             next(parser.stream)
 263             if parser.stream.current.type != 'block_end':
 264                 name = parser.stream.expect('name')
 265                 if name.value not in variables:
 266                     parser.fail('unknown variable %r for pluralization' %
 267                                 name.value, name.lineno,
 268                                 exc=TemplateAssertionError)
 269                 plural_expr = variables[name.value]
 270                 num_called_num = name.value == 'num'
 271             parser.stream.expect('block_end')
 272             plural_names, plural = self._parse_block(parser, False)
 273             next(parser.stream)
 274             referenced.update(plural_names)
 275         else:
 276             next(parser.stream)
 277
 278         # register free names as simple name expressions
 279         for var in referenced:
 280             if var not in variables:
 281                 variables[var] = nodes.Name(var, 'load')
 282
 283         if not have_plural:
 284             plural_expr = None
 285         elif plural_expr is None:
 286             parser.fail('pluralize without variables', lineno)
 287
 288         node = self._make_node(singular, plural, variables, plural_expr,
 289                                bool(referenced),
 290                                num_called_num and have_plural)
 291         node.set_lineno(lineno)
 292         return node
 293
 294     def _parse_block(self, parser, allow_pluralize):
 295         """Parse until the next block tag with a given name."""
 296         referenced = []
 297         buf = []
 298         while 1:
 299             if parser.stream.current.type == 'data':
 300                 buf.append(parser.stream.current.value.replace('%', '%%'))
 301                 next(parser.stream)
 302             elif parser.stream.current.type == 'variable_begin':
 303                 next(parser.stream)
 304                 name = parser.stream.expect('name').value
 305                 referenced.append(name)
 306                 buf.append('%%(%s)s' % name)
 307                 parser.stream.expect('variable_end')
 308             elif parser.stream.current.type == 'block_begin':
 309                 next(parser.stream)
 310                 if parser.stream.current.test('name:endtrans'):
 311                     break
 312                 elif parser.stream.current.test('name:pluralize'):
 313                     if allow_pluralize:
 314                         break
 315                     parser.fail('a translatable section can have only one '
 316                                 'pluralize section')
 317                 parser.fail('control structures in translatable sections are '
 318                             'not allowed')
 319             elif parser.stream.eos:
 320                 parser.fail('unclosed translation block')
 321             else:
 322                 assert False, 'internal parser error'
 323
 324         return referenced, concat(buf)
 325
 326     def _make_node(self, singular, plural, variables, plural_expr,
 327                    vars_referenced, num_called_num):
 328         """Generates a useful node from the data provided."""
 329         # no variables referenced?  no need to escape for old style
 330         # gettext invocations
 331         if not vars_referenced and not self.environment.newstyle_gettext:
 332             singular = singular.replace('%%', '%')
 333             if plural:
 334                 plural = plural.replace('%%', '%')
 335
 336         # singular only:
 337         if plural_expr is None:
 338             gettext = nodes.Name('gettext', 'load')
 339             node = nodes.Call(gettext, [nodes.Const(singular)],
 340                               [], None, None)
 341
 342         # singular and plural
 343         else:
 344             ngettext = nodes.Name('ngettext', 'load')
 345             node = nodes.Call(ngettext, [
 346                 nodes.Const(singular),
 347                 nodes.Const(plural),
 348                 plural_expr
 349             ], [], None, None)
 350
 351         # in case newstyle gettext is used, the method is powerful
 352         # enough to handle the variable expansion and autoescape
 353         # handling itself
 354         if self.environment.newstyle_gettext:
 355             for key, value in variables.iteritems():
 356                 # the function adds that later anyways in case num was
 357                 # called num, so just skip it.
 358                 if num_called_num and key == 'num':
 359                     continue
 360                 node.kwargs.append(nodes.Keyword(key, value))
 361
 362         # otherwise do that here
 363         else:
 364             # mark the return value as safe if we are in an
 365             # environment with autoescaping turned on
 366             node = nodes.MarkSafeIfAutoescape(node)
 367             if variables:
 368                 node = nodes.Mod(node, nodes.Dict([
 369                     nodes.Pair(nodes.Const(key), value)
 370                     for key, value in variables.items()
 371                 ]))
 372         return nodes.Output([node])
 373
 374
 375 class ExprStmtExtension(Extension):
 376     """Adds a `do` tag to Jinja2 that works like the print statement just
 377     that it doesn't print the return value.
 378     """
 379     tags = set(['do'])
 380
 381     def parse(self, parser):
 382         node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
 383         node.node = parser.parse_tuple()
 384         return node
 385
 386
 387 class LoopControlExtension(Extension):
 388     """Adds break and continue to the template engine."""
 389     tags = set(['break', 'continue'])
 390
 391     def parse(self, parser):
 392         token = next(parser.stream)
 393         if token.value == 'break':
 394             return nodes.Break(lineno=token.lineno)
 395         return nodes.Continue(lineno=token.lineno)
 396
 397
 398 class WithExtension(Extension):
 399     """Adds support for a django-like with block."""
 400     tags = set(['with'])
 401
 402     def parse(self, parser):
 403         node = nodes.Scope(lineno=next(parser.stream).lineno)
 404         assignments = []
 405         while parser.stream.current.type != 'block_end':
 406             lineno = parser.stream.current.lineno
 407             if assignments:
 408                 parser.stream.expect('comma')
 409             target = parser.parse_assign_target()
 410             parser.stream.expect('assign')
 411             expr = parser.parse_expression()
 412             assignments.append(nodes.Assign(target, expr, lineno=lineno))
 413         node.body = assignments + \
 414             list(parser.parse_statements(('name:endwith',),
 415                                          drop_needle=True))
 416         return node
 417
 418
 419 class AutoEscapeExtension(Extension):
 420     """Changes auto escape rules for a scope."""
 421     tags = set(['autoescape'])
 422
 423     def parse(self, parser):
 424         node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno)
 425         node.options = [
 426             nodes.Keyword('autoescape', parser.parse_expression())
 427         ]
 428         node.body = parser.parse_statements(('name:endautoescape',),
 429                                             drop_needle=True)
 430         return nodes.Scope([node])
 431
 432
 433 def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
 434                      babel_style=True):
 435     """Extract localizable strings from the given template node.  Per
 436     default this function returns matches in babel style that means non string
 437     parameters as well as keyword arguments are returned as `None`.  This
 438     allows Babel to figure out what you really meant if you are using
 439     gettext functions that allow keyword arguments for placeholder expansion.
 440     If you don't want that behavior set the `babel_style` parameter to `False`
 441     which causes only strings to be returned and parameters are always stored
 442     in tuples.  As a consequence invalid gettext calls (calls without a single
 443     string parameter or string parameters after non-string parameters) are
 444     skipped.
 445
 446     This example explains the behavior:
 447
 448     >>> from jinja2 import Environment
 449     >>> env = Environment()
 450     >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
 451     >>> list(extract_from_ast(node))
 452     [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
 453     >>> list(extract_from_ast(node, babel_style=False))
 454     [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
 455
 456     For every string found this function yields a ``(lineno, function,
 457     message)`` tuple, where:
 458
 459     * ``lineno`` is the number of the line on which the string was found,
 460     * ``function`` is the name of the ``gettext`` function used (if the
 461       string was extracted from embedded Python code), and
 462     *  ``message`` is the string itself (a ``unicode`` object, or a tuple
 463        of ``unicode`` objects for functions with multiple string arguments).
 464
 465     This extraction function operates on the AST and is because of that unable
 466     to extract any comments.  For comment support you have to use the babel
 467     extraction interface or extract comments yourself.
 468     """
 469     for node in node.find_all(nodes.Call):
 470         if not isinstance(node.node, nodes.Name) or \
 471            node.node.name not in gettext_functions:
 472             continue
 473
 474         strings = []
 475         for arg in node.args:
 476             if isinstance(arg, nodes.Const) and \
 477                isinstance(arg.value, basestring):
 478                 strings.append(arg.value)
 479             else:
 480                 strings.append(None)
 481
 482         for arg in node.kwargs:
 483             strings.append(None)
 484         if node.dyn_args is not None:
 485             strings.append(None)
 486         if node.dyn_kwargs is not None:
 487             strings.append(None)
 488
 489         if not babel_style:
 490             strings = tuple(x for x in strings if x is not None)
 491             if not strings:
 492                 continue
 493         else:
 494             if len(strings) == 1:
 495                 strings = strings[0]
 496             else:
 497                 strings = tuple(strings)
 498         yield node.lineno, node.node.name, strings
 499
 500
 501 class _CommentFinder(object):
 502     """Helper class to find comments in a token stream.  Can only
 503     find comments for gettext calls forwards.  Once the comment
 504     from line 4 is found, a comment for line 1 will not return a
 505     usable value.
 506     """
 507
 508     def __init__(self, tokens, comment_tags):
 509         self.tokens = tokens
 510         self.comment_tags = comment_tags
 511         self.offset = 0
 512         self.last_lineno = 0
 513
 514     def find_backwards(self, offset):
 515         try:
 516             for _, token_type, token_value in \
 517                     reversed(self.tokens[self.offset:offset]):
 518                 if token_type in ('comment', 'linecomment'):
 519                     try:
 520                         prefix, comment = token_value.split(None, 1)
 521                     except ValueError:
 522                         continue
 523                     if prefix in self.comment_tags:
 524                         return [comment.rstrip()]
 525             return []
 526         finally:
 527             self.offset = offset
 528
 529     def find_comments(self, lineno):
 530         if not self.comment_tags or self.last_lineno > lineno:
 531             return []
 532         for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
 533             if token_lineno > lineno:
 534                 return self.find_backwards(self.offset + idx)
 535         return self.find_backwards(len(self.tokens))
 536
 537
 538 def babel_extract(fileobj, keywords, comment_tags, options):
 539     """Babel extraction method for Jinja templates.
 540
 541     .. versionchanged:: 2.3
 542        Basic support for translation comments was added.  If `comment_tags`
 543        is now set to a list of keywords for extraction, the extractor will
 544        try to find the best preceeding comment that begins with one of the
 545        keywords.  For best results, make sure to not have more than one
 546        gettext call in one line of code and the matching comment in the
 547        same line or the line before.
 548
 549     .. versionchanged:: 2.5.1
 550        The `newstyle_gettext` flag can be set to `True` to enable newstyle
 551        gettext calls.
 552
 553     :param fileobj: the file-like object the messages should be extracted from
 554     :param keywords: a list of keywords (i.e. function names) that should be
 555                      recognized as translation functions
 556     :param comment_tags: a list of translator tags to search for and include
 557                          in the results.
 558     :param options: a dictionary of additional options (optional)
 559     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
 560              (comments will be empty currently)
 561     """
 562     extensions = set()
 563     for extension in options.get('extensions', '').split(','):
 564         extension = extension.strip()
 565         if not extension:
 566             continue
 567         extensions.add(import_string(extension))
 568     if InternationalizationExtension not in extensions:
 569         extensions.add(InternationalizationExtension)
 570
 571     def getbool(options, key, default=False):
 572         options.get(key, str(default)).lower() in ('1', 'on', 'yes', 'true')
 573
 574     environment = Environment(
 575         options.get('block_start_string', BLOCK_START_STRING),
 576         options.get('block_end_string', BLOCK_END_STRING),
 577         options.get('variable_start_string', VARIABLE_START_STRING),
 578         options.get('variable_end_string', VARIABLE_END_STRING),
 579         options.get('comment_start_string', COMMENT_START_STRING),
 580         options.get('comment_end_string', COMMENT_END_STRING),
 581         options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
 582         options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
 583         getbool(options, 'trim_blocks', TRIM_BLOCKS),
 584         NEWLINE_SEQUENCE, frozenset(extensions),
 585         cache_size=0,
 586         auto_reload=False
 587     )
 588
 589     if getbool(options, 'newstyle_gettext'):
 590         environment.newstyle_gettext = True
 591
 592     source = fileobj.read().decode(options.get('encoding', 'utf-8'))
 593     try:
 594         node = environment.parse(source)
 595         tokens = list(environment.lex(environment.preprocess(source)))
 596     except TemplateSyntaxError, e:
 597         # skip templates with syntax errors
 598         return
 599
 600     finder = _CommentFinder(tokens, comment_tags)
 601     for lineno, func, message in extract_from_ast(node, keywords):
 602         yield lineno, func, message, finder.find_comments(lineno)
 603
 604
 605 #: nicer import names
 606 i18n = InternationalizationExtension
 607 do = ExprStmtExtension
 608 loopcontrols = LoopControlExtension
 609 with_ = WithExtension
 610 autoescape = AutoEscapeExtension