"""
import sys
from jinja2.defaults import *
-from jinja2.lexer import Lexer
+from jinja2.lexer import Lexer, TokenStream
from jinja2.parser import Parser
from jinja2.optimizer import optimize
from jinja2.compiler import generate
tokens as tuples in the form ``(lineno, token_type, value)``.
This can be useful for :ref:`extension development <writing-extensions>`
and debugging templates.
+
+ This does not perform preprocessing. If you want the preprocessing
+ of the extensions to be applied you have to filter source through
+ the :meth:`preprocess` method.
+ """
+ return self.lexer.tokeniter(unicode(source), name, filename)
+
+ def preprocess(self, source, name=None, filename=None):
+ """Preprocesses the source with all extensions. This is automatically
+ called for all parsing and compiling methods but *not* for :meth:`lex`
+ because there you usually only want the actual source tokenized.
+ """
+ return reduce(lambda s, e: e.preprocess(s, name, filename),
+ self.extensions.itervalues(), unicode(source))
+
+ def _tokenize(self, source, name, filename=None):
+ """Called by the parser to do the preprocessing and filtering
+ for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`.
"""
- return self.lexer.tokeniter(source, name, filename)
+ def _stream(iterable):
+ if not isinstance(iterable, TokenStream):
+ iterable = TokenStream(iterable, name, filename)
+ return iterable
+ source = self.preprocess(source, name, filename)
+ tokeniter = self.lexer.tokeniter(source, name, filename)
+ stream = _stream(self.lexer.wrap(tokeniter, name, filename))
+ for ext in self.extensions.itervalues():
+ stream = _stream(ext.filter_stream(stream))
+ return stream
def compile(self, source, name=None, filename=None, raw=False):
"""Compile a node or template source code. The `name` parameter is
from jinja2.environment import get_spontaneous_environment
from jinja2.runtime import Undefined, concat
from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
+from jinja2.lexer import Token
from jinja2.utils import contextfunction, import_string, Markup
rv.environment = environment
return rv
+ def preprocess(self, source, name, filename=None):
+ """This method is called before the actual lexing and can be used to
+ preprocess the source. The `filename` is optional. The return value
+ must be the preprocessed source.
+ """
+ return source
+
+ def filter_stream(self, stream):
+ """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
+ to filter tokens returned. This method has to return an iterable of
+ :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a
+ :class:`~jinja2.lexer.TokenStream`.
+ """
+ return stream
+
def parse(self, parser):
"""If any of the :attr:`tags` matched this method is called with the
parser as first argument. The token the parser stream is pointing at
"""
def __init__(self, stream):
- self._stream = stream
+ self.stream = stream
def __iter__(self):
return self
def next(self):
- token = self._stream.current
+ token = self.stream.current
if token.type == 'eof':
- self._stream.close()
+ self.stream.close()
raise StopIteration()
- self._stream.next(False)
+ self.stream.next()
return token
"""
def __init__(self, generator, name, filename):
- self._next = generator.next
+ self._next = iter(generator).next
self._pushed = deque()
- self.current = Token(1, 'initial', '')
self.name = name
self.filename = filename
+ self.closed = False
+ self.current = Token(1, 'initial', '')
self.next()
def __iter__(self):
"""Close the stream."""
self.current = Token(self.current.lineno, 'eof', '')
self._next = None
+ self.closed = True
def expect(self, expr):
"""Expect a given token type and return it. This accepts the same
return newline_re.sub(self.newline_sequence, value)
def tokenize(self, source, name=None, filename=None):
- """Works like `tokeniter` but returns a tokenstream of tokens and not
- a generator or token tuples. Additionally all token values are already
- converted into types and postprocessed. For example comments are removed,
- integers and floats converted, strings unescaped etc.
+ """Calls tokeniter + tokenize and wraps it in a token stream.
+ This is currently only used for unittests.
"""
- def generate():
- for lineno, token, value in self.tokeniter(source, name, filename):
- if token in ('comment_begin', 'comment', 'comment_end',
- 'whitespace'):
- continue
- elif token == 'linestatement_begin':
- token = 'block_begin'
- elif token == 'linestatement_end':
- token = 'block_end'
- # we are not interested in those tokens in the parser
- elif token in ('raw_begin', 'raw_end'):
- continue
- elif token == 'data':
- value = self._normalize_newlines(value)
- elif token == 'keyword':
- token = value
- elif token == 'name':
+ stream = self.tokeniter(source, name, filename)
+ return TokenStream(self.wrap(stream, name, filename), name, filename)
+
+ def wrap(self, stream, name=None, filename=None):
+ """This is called with the stream as returned by `tokenize` and wraps
+ every token in a :class:`Token` and converts the value.
+ """
+ for lineno, token, value in stream:
+ if token in ('comment_begin', 'comment', 'comment_end',
+ 'whitespace'):
+ continue
+ elif token == 'linestatement_begin':
+ token = 'block_begin'
+ elif token == 'linestatement_end':
+ token = 'block_end'
+ # we are not interested in those tokens in the parser
+ elif token in ('raw_begin', 'raw_end'):
+ continue
+ elif token == 'data':
+ value = self._normalize_newlines(value)
+ elif token == 'keyword':
+ token = value
+ elif token == 'name':
+ value = str(value)
+ elif token == 'string':
+ # try to unescape string
+ try:
+ value = self._normalize_newlines(value[1:-1]) \
+ .encode('ascii', 'backslashreplace') \
+ .decode('unicode-escape')
+ except Exception, e:
+ msg = str(e).split(':')[-1].strip()
+ raise TemplateSyntaxError(msg, lineno, name, filename)
+ # if we can express it as bytestring (ascii only)
+ # we do that for support of semi broken APIs
+ # as datetime.datetime.strftime
+ try:
value = str(value)
- elif token == 'string':
- # try to unescape string
- try:
- value = self._normalize_newlines(value[1:-1]) \
- .encode('ascii', 'backslashreplace') \
- .decode('unicode-escape')
- except Exception, e:
- msg = str(e).split(':')[-1].strip()
- raise TemplateSyntaxError(msg, lineno, name, filename)
- # if we can express it as bytestring (ascii only)
- # we do that for support of semi broken APIs
- # as datetime.datetime.strftime
- try:
- value = str(value)
- except UnicodeError:
- pass
- elif token == 'integer':
- value = int(value)
- elif token == 'float':
- value = float(value)
- elif token == 'operator':
- token = operators[value]
- yield Token(lineno, token, value)
- return TokenStream(generate(), name, filename)
+ except UnicodeError:
+ pass
+ elif token == 'integer':
+ value = int(value)
+ elif token == 'float':
+ value = float(value)
+ elif token == 'operator':
+ token = operators[value]
+ yield Token(lineno, token, value)
def tokeniter(self, source, name, filename=None):
"""This method tokenizes the text and returns the tokens in a
generator. Use this method if you just want to tokenize a template.
- The output you get is not compatible with the input the jinja parser
- wants. The parser uses the `tokenize` function with returns a
- `TokenStream` and postprocessed tokens.
"""
source = '\n'.join(unicode(source).splitlines())
pos = 0