jinja2/ext.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.ext
   4     ~~~~~~~~~~
   5
   6     Jinja extensions allow to add custom tags similar to the way django custom
   7     tags work.  By default two example extensions exist: an i18n and a cache
   8     extension.
   9
  10     :copyright: (c) 2010 by the Jinja Team.
  11     :license: BSD.
  12 """
  13 from collections import deque
  14 from jinja2 import nodes
  15 from jinja2.defaults import *
  16 from jinja2.environment import get_spontaneous_environment
  17 from jinja2.runtime import Undefined, concat
  18 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
  19 from jinja2.utils import contextfunction, import_string, Markup, next
  20
  21
  22 # the only real useful gettext functions for a Jinja template.  Note
  23 # that ugettext must be assigned to gettext as Jinja doesn't support
  24 # non unicode strings.
  25 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
  26
  27
  28 class ExtensionRegistry(type):
  29     """Gives the extension an unique identifier."""
  30
  31     def __new__(cls, name, bases, d):
  32         rv = type.__new__(cls, name, bases, d)
  33         rv.identifier = rv.__module__ + '.' + rv.__name__
  34         return rv
  35
  36
  37 class Extension(object):
  38     """Extensions can be used to add extra functionality to the Jinja template
  39     system at the parser level.  Custom extensions are bound to an environment
  40     but may not store environment specific data on `self`.  The reason for
  41     this is that an extension can be bound to another environment (for
  42     overlays) by creating a copy and reassigning the `environment` attribute.
  43
  44     As extensions are created by the environment they cannot accept any
  45     arguments for configuration.  One may want to work around that by using
  46     a factory function, but that is not possible as extensions are identified
  47     by their import name.  The correct way to configure the extension is
  48     storing the configuration values on the environment.  Because this way the
  49     environment ends up acting as central configuration storage the
  50     attributes may clash which is why extensions have to ensure that the names
  51     they choose for configuration are not too generic.  ``prefix`` for example
  52     is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
  53     name as includes the name of the extension (fragment cache).
  54     """
  55     __metaclass__ = ExtensionRegistry
  56
  57     #: if this extension parses this is the list of tags it's listening to.
  58     tags = set()
  59
  60     #: the priority of that extension.  This is especially useful for
  61     #: extensions that preprocess values.  A lower value means higher
  62     #: priority.
  63     #:
  64     #: .. versionadded:: 2.4
  65     priority = 100
  66
  67     def __init__(self, environment):
  68         self.environment = environment
  69
  70     def bind(self, environment):
  71         """Create a copy of this extension bound to another environment."""
  72         rv = object.__new__(self.__class__)
  73         rv.__dict__.update(self.__dict__)
  74         rv.environment = environment
  75         return rv
  76
  77     def preprocess(self, source, name, filename=None):
  78         """This method is called before the actual lexing and can be used to
  79         preprocess the source.  The `filename` is optional.  The return value
  80         must be the preprocessed source.
  81         """
  82         return source
  83
  84     def filter_stream(self, stream):
  85         """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
  86         to filter tokens returned.  This method has to return an iterable of
  87         :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
  88         :class:`~jinja2.lexer.TokenStream`.
  89
  90         In the `ext` folder of the Jinja2 source distribution there is a file
  91         called `inlinegettext.py` which implements a filter that utilizes this
  92         method.
  93         """
  94         return stream
  95
  96     def parse(self, parser):
  97         """If any of the :attr:`tags` matched this method is called with the
  98         parser as first argument.  The token the parser stream is pointing at
  99         is the name token that matched.  This method has to return one or a
 100         list of multiple nodes.
 101         """
 102         raise NotImplementedError()
 103
 104     def attr(self, name, lineno=None):
 105         """Return an attribute node for the current extension.  This is useful
 106         to pass constants on extensions to generated template code::
 107
 108             self.attr('_my_attribute', lineno=lineno)
 109         """
 110         return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
 111
 112     def call_method(self, name, args=None, kwargs=None, dyn_args=None,
 113                     dyn_kwargs=None, lineno=None):
 114         """Call a method of the extension.  This is a shortcut for
 115         :meth:`attr` + :class:`jinja2.nodes.Call`.
 116         """
 117         if args is None:
 118             args = []
 119         if kwargs is None:
 120             kwargs = []
 121         return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
 122                           dyn_args, dyn_kwargs, lineno=lineno)
 123
 124
 125 @contextfunction
 126 def _gettext_alias(__context, *args, **kwargs):
 127     return __context.resolve('gettext')(*args, **kwargs)
 128
 129
 130 def _make_new_gettext(func):
 131     @contextfunction
 132     def gettext(__context, __string, **variables):
 133         rv  = func(__string)
 134         if __context.eval_ctx.autoescape:
 135             rv = Markup(rv)
 136         return rv % variables
 137     return gettext
 138
 139
 140 def _make_new_ngettext(func):
 141     @contextfunction
 142     def ngettext(__context, __singular, __plural, num, **variables):
 143         variables.setdefault('num', num)
 144         rv = func(__singular, __plural, num)
 145         if __context.eval_ctx.autoescape:
 146             rv = Markup(rv)
 147         return rv % variables
 148     return ngettext
 149
 150
 151 class InternationalizationExtension(Extension):
 152     """This extension adds gettext support to Jinja2."""
 153     tags = set(['trans'])
 154
 155     # TODO: the i18n extension is currently reevaluating values in a few
 156     # situations.  Take this example:
 157     #   {% trans count=something() %}{{ count }} foo{% pluralize
 158     #     %}{{ count }} fooss{% endtrans %}
 159     # something is called twice here.  One time for the gettext value and
 160     # the other time for the n-parameter of the ngettext function.
 161
 162     def __init__(self, environment):
 163         Extension.__init__(self, environment)
 164         environment.globals['_'] = _gettext_alias
 165         environment.extend(
 166             install_gettext_translations=self._install,
 167             install_null_translations=self._install_null,
 168             install_gettext_callables=self._install_callables,
 169             uninstall_gettext_translations=self._uninstall,
 170             extract_translations=self._extract,
 171             newstyle_gettext=False
 172         )
 173
 174     def _install(self, translations, newstyle=None):
 175         gettext = getattr(translations, 'ugettext', None)
 176         if gettext is None:
 177             gettext = translations.gettext
 178         ngettext = getattr(translations, 'ungettext', None)
 179         if ngettext is None:
 180             ngettext = translations.ngettext
 181         self._install_callables(gettext, ngettext, newstyle)
 182
 183     def _install_null(self, newstyle=None):
 184         self._install_callables(
 185             lambda x: x,
 186             lambda s, p, n: (n != 1 and (p,) or (s,))[0],
 187             newstyle
 188         )
 189
 190     def _install_callables(self, gettext, ngettext, newstyle=None):
 191         if newstyle is not None:
 192             self.environment.newstyle_gettext = newstyle
 193         if self.environment.newstyle_gettext:
 194             gettext = _make_new_gettext(gettext)
 195             ngettext = _make_new_ngettext(ngettext)
 196         self.environment.globals.update(
 197             gettext=gettext,
 198             ngettext=ngettext
 199         )
 200
 201     def _uninstall(self, translations):
 202         for key in 'gettext', 'ngettext':
 203             self.environment.globals.pop(key, None)
 204
 205     def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
 206         if isinstance(source, basestring):
 207             source = self.environment.parse(source)
 208         return extract_from_ast(source, gettext_functions)
 209
 210     def parse(self, parser):
 211         """Parse a translatable tag."""
 212         lineno = next(parser.stream).lineno
 213
 214         # find all the variables referenced.  Additionally a variable can be
 215         # defined in the body of the trans block too, but this is checked at
 216         # a later state.
 217         plural_expr = None
 218         variables = {}
 219         while parser.stream.current.type != 'block_end':
 220             if variables:
 221                 parser.stream.expect('comma')
 222
 223             # skip colon for python compatibility
 224             if parser.stream.skip_if('colon'):
 225                 break
 226
 227             name = parser.stream.expect('name')
 228             if name.value in variables:
 229                 parser.fail('translatable variable %r defined twice.' %
 230                             name.value, name.lineno,
 231                             exc=TemplateAssertionError)
 232
 233             # expressions
 234             if parser.stream.current.type == 'assign':
 235                 next(parser.stream)
 236                 variables[name.value] = var = parser.parse_expression()
 237             else:
 238                 variables[name.value] = var = nodes.Name(name.value, 'load')
 239             if plural_expr is None:
 240                 plural_expr = var
 241
 242         parser.stream.expect('block_end')
 243
 244         plural = plural_names = None
 245         have_plural = False
 246         referenced = set()
 247
 248         # now parse until endtrans or pluralize
 249         singular_names, singular = self._parse_block(parser, True)
 250         if singular_names:
 251             referenced.update(singular_names)
 252             if plural_expr is None:
 253                 plural_expr = nodes.Name(singular_names[0], 'load')
 254
 255         # if we have a pluralize block, we parse that too
 256         if parser.stream.current.test('name:pluralize'):
 257             have_plural = True
 258             next(parser.stream)
 259             if parser.stream.current.type != 'block_end':
 260                 name = parser.stream.expect('name')
 261                 if name.value not in variables:
 262                     parser.fail('unknown variable %r for pluralization' %
 263                                 name.value, name.lineno,
 264                                 exc=TemplateAssertionError)
 265                 plural_expr = variables[name.value]
 266             parser.stream.expect('block_end')
 267             plural_names, plural = self._parse_block(parser, False)
 268             next(parser.stream)
 269             referenced.update(plural_names)
 270         else:
 271             next(parser.stream)
 272
 273         # register free names as simple name expressions
 274         for var in referenced:
 275             if var not in variables:
 276                 variables[var] = nodes.Name(var, 'load')
 277
 278         # no variables referenced?  no need to escape
 279         if not referenced:
 280             singular = singular.replace('%%', '%')
 281             if plural:
 282                 plural = plural.replace('%%', '%')
 283
 284         if not have_plural:
 285             plural_expr = None
 286         elif plural_expr is None:
 287             parser.fail('pluralize without variables', lineno)
 288
 289         if variables:
 290             variables = nodes.Dict([nodes.Pair(nodes.Const(x, lineno=lineno), y)
 291                                     for x, y in variables.items()])
 292         else:
 293             variables = None
 294
 295         node = self._make_node(singular, plural, variables, plural_expr)
 296         node.set_lineno(lineno)
 297         return node
 298
 299     def _parse_block(self, parser, allow_pluralize):
 300         """Parse until the next block tag with a given name."""
 301         referenced = []
 302         buf = []
 303         while 1:
 304             if parser.stream.current.type == 'data':
 305                 buf.append(parser.stream.current.value.replace('%', '%%'))
 306                 next(parser.stream)
 307             elif parser.stream.current.type == 'variable_begin':
 308                 next(parser.stream)
 309                 name = parser.stream.expect('name').value
 310                 referenced.append(name)
 311                 buf.append('%%(%s)s' % name)
 312                 parser.stream.expect('variable_end')
 313             elif parser.stream.current.type == 'block_begin':
 314                 next(parser.stream)
 315                 if parser.stream.current.test('name:endtrans'):
 316                     break
 317                 elif parser.stream.current.test('name:pluralize'):
 318                     if allow_pluralize:
 319                         break
 320                     parser.fail('a translatable section can have only one '
 321                                 'pluralize section')
 322                 parser.fail('control structures in translatable sections are '
 323                             'not allowed')
 324             elif parser.stream.eos:
 325                 parser.fail('unclosed translation block')
 326             else:
 327                 assert False, 'internal parser error'
 328
 329         return referenced, concat(buf)
 330
 331     def _make_node(self, singular, plural, variables, plural_expr):
 332         """Generates a useful node from the data provided."""
 333         # singular only:
 334         if plural_expr is None:
 335             gettext = nodes.Name('gettext', 'load')
 336             node = nodes.Call(gettext, [nodes.Const(singular)],
 337                               [], None, None)
 338
 339         # singular and plural
 340         else:
 341             ngettext = nodes.Name('ngettext', 'load')
 342             node = nodes.Call(ngettext, [
 343                 nodes.Const(singular),
 344                 nodes.Const(plural),
 345                 plural_expr
 346             ], [], None, None)
 347
 348         # in case newstyle gettext is used, the method is powerful
 349         # enough to handle the variable expansion and autoescape
 350         # handling itself
 351         if self.environment.newstyle_gettext:
 352             if variables is None:
 353                 variables = nodes.Dict([])
 354             node.kwargs = variables
 355
 356         # otherwise do that here
 357         else:
 358             # mark the return value as safe if we are in an
 359             # environment with autoescaping turned on
 360             node = nodes.MarkSafeIfAutoescape(node)
 361             if variables:
 362                 node = nodes.Mod(node, variables)
 363         return nodes.Output([node])
 364
 365
 366 class ExprStmtExtension(Extension):
 367     """Adds a `do` tag to Jinja2 that works like the print statement just
 368     that it doesn't print the return value.
 369     """
 370     tags = set(['do'])
 371
 372     def parse(self, parser):
 373         node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
 374         node.node = parser.parse_tuple()
 375         return node
 376
 377
 378 class LoopControlExtension(Extension):
 379     """Adds break and continue to the template engine."""
 380     tags = set(['break', 'continue'])
 381
 382     def parse(self, parser):
 383         token = next(parser.stream)
 384         if token.value == 'break':
 385             return nodes.Break(lineno=token.lineno)
 386         return nodes.Continue(lineno=token.lineno)
 387
 388
 389 class WithExtension(Extension):
 390     """Adds support for a django-like with block."""
 391     tags = set(['with'])
 392
 393     def parse(self, parser):
 394         node = nodes.Scope(lineno=next(parser.stream).lineno)
 395         assignments = []
 396         while parser.stream.current.type != 'block_end':
 397             lineno = parser.stream.current.lineno
 398             if assignments:
 399                 parser.stream.expect('comma')
 400             target = parser.parse_assign_target()
 401             parser.stream.expect('assign')
 402             expr = parser.parse_expression()
 403             assignments.append(nodes.Assign(target, expr, lineno=lineno))
 404         node.body = assignments + \
 405             list(parser.parse_statements(('name:endwith',),
 406                                          drop_needle=True))
 407         return node
 408
 409
 410 class AutoEscapeExtension(Extension):
 411     """Changes auto escape rules for a scope."""
 412     tags = set(['autoescape'])
 413
 414     def parse(self, parser):
 415         node = nodes.ScopedEvalContextModifier(lineno=next(parser.stream).lineno)
 416         node.options = [
 417             nodes.Keyword('autoescape', parser.parse_expression())
 418         ]
 419         node.body = parser.parse_statements(('name:endautoescape',),
 420                                             drop_needle=True)
 421         return nodes.Scope([node])
 422
 423
 424 def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
 425                      babel_style=True):
 426     """Extract localizable strings from the given template node.  Per
 427     default this function returns matches in babel style that means non string
 428     parameters as well as keyword arguments are returned as `None`.  This
 429     allows Babel to figure out what you really meant if you are using
 430     gettext functions that allow keyword arguments for placeholder expansion.
 431     If you don't want that behavior set the `babel_style` parameter to `False`
 432     which causes only strings to be returned and parameters are always stored
 433     in tuples.  As a consequence invalid gettext calls (calls without a single
 434     string parameter or string parameters after non-string parameters) are
 435     skipped.
 436
 437     This example explains the behavior:
 438
 439     >>> from jinja2 import Environment
 440     >>> env = Environment()
 441     >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
 442     >>> list(extract_from_ast(node))
 443     [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
 444     >>> list(extract_from_ast(node, babel_style=False))
 445     [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
 446
 447     For every string found this function yields a ``(lineno, function,
 448     message)`` tuple, where:
 449
 450     * ``lineno`` is the number of the line on which the string was found,
 451     * ``function`` is the name of the ``gettext`` function used (if the
 452       string was extracted from embedded Python code), and
 453     *  ``message`` is the string itself (a ``unicode`` object, or a tuple
 454        of ``unicode`` objects for functions with multiple string arguments).
 455
 456     This extraction function operates on the AST and is because of that unable
 457     to extract any comments.  For comment support you have to use the babel
 458     extraction interface or extract comments yourself.
 459     """
 460     for node in node.find_all(nodes.Call):
 461         if not isinstance(node.node, nodes.Name) or \
 462            node.node.name not in gettext_functions:
 463             continue
 464
 465         strings = []
 466         for arg in node.args:
 467             if isinstance(arg, nodes.Const) and \
 468                isinstance(arg.value, basestring):
 469                 strings.append(arg.value)
 470             else:
 471                 strings.append(None)
 472
 473         for arg in node.kwargs:
 474             strings.append(None)
 475         if node.dyn_args is not None:
 476             strings.append(None)
 477         if node.dyn_kwargs is not None:
 478             strings.append(None)
 479
 480         if not babel_style:
 481             strings = tuple(x for x in strings if x is not None)
 482             if not strings:
 483                 continue
 484         else:
 485             if len(strings) == 1:
 486                 strings = strings[0]
 487             else:
 488                 strings = tuple(strings)
 489         yield node.lineno, node.node.name, strings
 490
 491
 492 class _CommentFinder(object):
 493     """Helper class to find comments in a token stream.  Can only
 494     find comments for gettext calls forwards.  Once the comment
 495     from line 4 is found, a comment for line 1 will not return a
 496     usable value.
 497     """
 498
 499     def __init__(self, tokens, comment_tags):
 500         self.tokens = tokens
 501         self.comment_tags = comment_tags
 502         self.offset = 0
 503         self.last_lineno = 0
 504
 505     def find_backwards(self, offset):
 506         try:
 507             for _, token_type, token_value in \
 508                     reversed(self.tokens[self.offset:offset]):
 509                 if token_type in ('comment', 'linecomment'):
 510                     try:
 511                         prefix, comment = token_value.split(None, 1)
 512                     except ValueError:
 513                         continue
 514                     if prefix in self.comment_tags:
 515                         return [comment.rstrip()]
 516             return []
 517         finally:
 518             self.offset = offset
 519
 520     def find_comments(self, lineno):
 521         if not self.comment_tags or self.last_lineno > lineno:
 522             return []
 523         for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
 524             if token_lineno > lineno:
 525                 return self.find_backwards(self.offset + idx)
 526         return self.find_backwards(len(self.tokens))
 527
 528
 529 def babel_extract(fileobj, keywords, comment_tags, options):
 530     """Babel extraction method for Jinja templates.
 531
 532     .. versionchanged:: 2.3
 533        Basic support for translation comments was added.  If `comment_tags`
 534        is now set to a list of keywords for extraction, the extractor will
 535        try to find the best preceeding comment that begins with one of the
 536        keywords.  For best results, make sure to not have more than one
 537        gettext call in one line of code and the matching comment in the
 538        same line or the line before.
 539
 540     :param fileobj: the file-like object the messages should be extracted from
 541     :param keywords: a list of keywords (i.e. function names) that should be
 542                      recognized as translation functions
 543     :param comment_tags: a list of translator tags to search for and include
 544                          in the results.
 545     :param options: a dictionary of additional options (optional)
 546     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
 547              (comments will be empty currently)
 548     """
 549     extensions = set()
 550     for extension in options.get('extensions', '').split(','):
 551         extension = extension.strip()
 552         if not extension:
 553             continue
 554         extensions.add(import_string(extension))
 555     if InternationalizationExtension not in extensions:
 556         extensions.add(InternationalizationExtension)
 557
 558     environment = get_spontaneous_environment(
 559         options.get('block_start_string', BLOCK_START_STRING),
 560         options.get('block_end_string', BLOCK_END_STRING),
 561         options.get('variable_start_string', VARIABLE_START_STRING),
 562         options.get('variable_end_string', VARIABLE_END_STRING),
 563         options.get('comment_start_string', COMMENT_START_STRING),
 564         options.get('comment_end_string', COMMENT_END_STRING),
 565         options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
 566         options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
 567         str(options.get('trim_blocks', TRIM_BLOCKS)).lower() in \
 568             ('1', 'on', 'yes', 'true'),
 569         NEWLINE_SEQUENCE, frozenset(extensions),
 570         # fill with defaults so that environments are shared
 571         # with other spontaneus environments.  The rest of the
 572         # arguments are optimizer, undefined, finalize, autoescape,
 573         # loader, cache size, auto reloading setting and the
 574         # bytecode cache
 575         True, Undefined, None, False, None, 0, False, None
 576     )
 577
 578     source = fileobj.read().decode(options.get('encoding', 'utf-8'))
 579     try:
 580         node = environment.parse(source)
 581         tokens = list(environment.lex(environment.preprocess(source)))
 582     except TemplateSyntaxError, e:
 583         # skip templates with syntax errors
 584         return
 585
 586     finder = _CommentFinder(tokens, comment_tags)
 587     for lineno, func, message in extract_from_ast(node, keywords):
 588         yield lineno, func, message, finder.find_comments(lineno)
 589
 590
 591 #: nicer import names
 592 i18n = InternationalizationExtension
 593 do = ExprStmtExtension
 594 loopcontrols = LoopControlExtension
 595 with_ = WithExtension
 596 autoescape = AutoEscapeExtension