From: Armin Ronacher Date: Fri, 13 Jun 2008 20:44:01 +0000 (+0200) Subject: added support for token stream filtering and preprocessing. X-Git-Tag: 2.0~20 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=9ad96e7af659846abcf45e0c64bf07dacf7881c4;p=jinja2.git added support for token stream filtering and preprocessing. --HG-- branch : trunk --- diff --git a/CHANGES b/CHANGES index 393ba0c..4277283 100644 --- a/CHANGES +++ b/CHANGES @@ -13,6 +13,10 @@ Version 2.0 For more information see :ref:`the implementation details `. +- added support for preprocessing and token stream filtering for extensions. + This would allow extensions to allow simplified gettext calls in template + data and something similar. + Version 2.0rc1 -------------- (no codename, released on July 9th 2008) diff --git a/docs/api.rst b/docs/api.rst index e4c7a50..e16107a 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -590,6 +590,8 @@ don't recommend using any of those. .. automethod:: Environment.parse +.. automethod:: Environment.preprocess + .. automethod:: Template.new_context .. method:: Template.root_render_func(context) diff --git a/docs/extensions.rst b/docs/extensions.rst index 8ea554a..215a684 100644 --- a/docs/extensions.rst +++ b/docs/extensions.rst @@ -168,7 +168,7 @@ Extension API Extensions always have to extend the :class:`jinja2.ext.Extension` class: .. autoclass:: Extension - :members: parse, attr, call_method + :members: preprocess, filter_stream, parse, attr, call_method .. attribute:: identifier diff --git a/jinja2/environment.py b/jinja2/environment.py index 689bc92..e24e78e 100644 --- a/jinja2/environment.py +++ b/jinja2/environment.py @@ -10,7 +10,7 @@ """ import sys from jinja2.defaults import * -from jinja2.lexer import Lexer +from jinja2.lexer import Lexer, TokenStream from jinja2.parser import Parser from jinja2.optimizer import optimize from jinja2.compiler import generate @@ -339,8 +339,35 @@ class Environment(object): tokens as tuples in the form ``(lineno, token_type, value)``. This can be useful for :ref:`extension development ` and debugging templates. + + This does not perform preprocessing. If you want the preprocessing + of the extensions to be applied you have to filter source through + the :meth:`preprocess` method. + """ + return self.lexer.tokeniter(unicode(source), name, filename) + + def preprocess(self, source, name=None, filename=None): + """Preprocesses the source with all extensions. This is automatically + called for all parsing and compiling methods but *not* for :meth:`lex` + because there you usually only want the actual source tokenized. + """ + return reduce(lambda s, e: e.preprocess(s, name, filename), + self.extensions.itervalues(), unicode(source)) + + def _tokenize(self, source, name, filename=None): + """Called by the parser to do the preprocessing and filtering + for all the extensions. Returns a :class:`~jinja2.lexer.TokenStream`. """ - return self.lexer.tokeniter(source, name, filename) + def _stream(iterable): + if not isinstance(iterable, TokenStream): + iterable = TokenStream(iterable, name, filename) + return iterable + source = self.preprocess(source, name, filename) + tokeniter = self.lexer.tokeniter(source, name, filename) + stream = _stream(self.lexer.wrap(tokeniter, name, filename)) + for ext in self.extensions.itervalues(): + stream = _stream(ext.filter_stream(stream)) + return stream def compile(self, source, name=None, filename=None, raw=False): """Compile a node or template source code. The `name` parameter is diff --git a/jinja2/ext.py b/jinja2/ext.py index 93cde83..9dfa87c 100644 --- a/jinja2/ext.py +++ b/jinja2/ext.py @@ -16,6 +16,7 @@ from jinja2.defaults import * from jinja2.environment import get_spontaneous_environment from jinja2.runtime import Undefined, concat from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError +from jinja2.lexer import Token from jinja2.utils import contextfunction, import_string, Markup @@ -67,6 +68,21 @@ class Extension(object): rv.environment = environment return rv + def preprocess(self, source, name, filename=None): + """This method is called before the actual lexing and can be used to + preprocess the source. The `filename` is optional. The return value + must be the preprocessed source. + """ + return source + + def filter_stream(self, stream): + """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used + to filter tokens returned. This method has to return an iterable of + :class:`~jinja2.lexer.Token`\s, but it doesn't have to return a + :class:`~jinja2.lexer.TokenStream`. + """ + return stream + def parse(self, parser): """If any of the :attr:`tags` matched this method is called with the parser as first argument. The token the parser stream is pointing at diff --git a/jinja2/lexer.py b/jinja2/lexer.py index 64621fd..1f22ed7 100644 --- a/jinja2/lexer.py +++ b/jinja2/lexer.py @@ -133,17 +133,17 @@ class TokenStreamIterator(object): """ def __init__(self, stream): - self._stream = stream + self.stream = stream def __iter__(self): return self def next(self): - token = self._stream.current + token = self.stream.current if token.type == 'eof': - self._stream.close() + self.stream.close() raise StopIteration() - self._stream.next(False) + self.stream.next() return token @@ -154,11 +154,12 @@ class TokenStream(object): """ def __init__(self, generator, name, filename): - self._next = generator.next + self._next = iter(generator).next self._pushed = deque() - self.current = Token(1, 'initial', '') self.name = name self.filename = filename + self.closed = False + self.current = Token(1, 'initial', '') self.next() def __iter__(self): @@ -214,6 +215,7 @@ class TokenStream(object): """Close the stream.""" self.current = Token(self.current.lineno, 'eof', '') self._next = None + self.closed = True def expect(self, expr): """Expect a given token type and return it. This accepts the same @@ -374,60 +376,60 @@ class Lexer(object): return newline_re.sub(self.newline_sequence, value) def tokenize(self, source, name=None, filename=None): - """Works like `tokeniter` but returns a tokenstream of tokens and not - a generator or token tuples. Additionally all token values are already - converted into types and postprocessed. For example comments are removed, - integers and floats converted, strings unescaped etc. + """Calls tokeniter + tokenize and wraps it in a token stream. + This is currently only used for unittests. """ - def generate(): - for lineno, token, value in self.tokeniter(source, name, filename): - if token in ('comment_begin', 'comment', 'comment_end', - 'whitespace'): - continue - elif token == 'linestatement_begin': - token = 'block_begin' - elif token == 'linestatement_end': - token = 'block_end' - # we are not interested in those tokens in the parser - elif token in ('raw_begin', 'raw_end'): - continue - elif token == 'data': - value = self._normalize_newlines(value) - elif token == 'keyword': - token = value - elif token == 'name': + stream = self.tokeniter(source, name, filename) + return TokenStream(self.wrap(stream, name, filename), name, filename) + + def wrap(self, stream, name=None, filename=None): + """This is called with the stream as returned by `tokenize` and wraps + every token in a :class:`Token` and converts the value. + """ + for lineno, token, value in stream: + if token in ('comment_begin', 'comment', 'comment_end', + 'whitespace'): + continue + elif token == 'linestatement_begin': + token = 'block_begin' + elif token == 'linestatement_end': + token = 'block_end' + # we are not interested in those tokens in the parser + elif token in ('raw_begin', 'raw_end'): + continue + elif token == 'data': + value = self._normalize_newlines(value) + elif token == 'keyword': + token = value + elif token == 'name': + value = str(value) + elif token == 'string': + # try to unescape string + try: + value = self._normalize_newlines(value[1:-1]) \ + .encode('ascii', 'backslashreplace') \ + .decode('unicode-escape') + except Exception, e: + msg = str(e).split(':')[-1].strip() + raise TemplateSyntaxError(msg, lineno, name, filename) + # if we can express it as bytestring (ascii only) + # we do that for support of semi broken APIs + # as datetime.datetime.strftime + try: value = str(value) - elif token == 'string': - # try to unescape string - try: - value = self._normalize_newlines(value[1:-1]) \ - .encode('ascii', 'backslashreplace') \ - .decode('unicode-escape') - except Exception, e: - msg = str(e).split(':')[-1].strip() - raise TemplateSyntaxError(msg, lineno, name, filename) - # if we can express it as bytestring (ascii only) - # we do that for support of semi broken APIs - # as datetime.datetime.strftime - try: - value = str(value) - except UnicodeError: - pass - elif token == 'integer': - value = int(value) - elif token == 'float': - value = float(value) - elif token == 'operator': - token = operators[value] - yield Token(lineno, token, value) - return TokenStream(generate(), name, filename) + except UnicodeError: + pass + elif token == 'integer': + value = int(value) + elif token == 'float': + value = float(value) + elif token == 'operator': + token = operators[value] + yield Token(lineno, token, value) def tokeniter(self, source, name, filename=None): """This method tokenizes the text and returns the tokens in a generator. Use this method if you just want to tokenize a template. - The output you get is not compatible with the input the jinja parser - wants. The parser uses the `tokenize` function with returns a - `TokenStream` and postprocessed tokens. """ source = '\n'.join(unicode(source).splitlines()) pos = 0 diff --git a/jinja2/parser.py b/jinja2/parser.py index 810e381..ea7391f 100644 --- a/jinja2/parser.py +++ b/jinja2/parser.py @@ -25,11 +25,10 @@ class Parser(object): def __init__(self, environment, source, name=None, filename=None): self.environment = environment - self.source = unicode(source) + self.stream = environment._tokenize(source, name, filename) self.name = name self.filename = filename self.closed = False - self.stream = environment.lexer.tokenize(self.source, name, filename) self.extensions = {} for extension in environment.extensions.itervalues(): for tag in extension.tags: