From d1ff858a7afd41a5cc166820f70c96f08b6d2136 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 11 May 2008 00:30:43 +0200 Subject: [PATCH] Backed out changeset 6afb554797b6, which added unicode identifier support. This doesn't work in reality under Python 2.6 properly. --HG-- branch : trunk --- docs/api.rst | 29 ++++++++++++++++----- jinja2/compiler.py | 65 ++++++++++++---------------------------------- jinja2/debug.py | 9 +------ jinja2/lexer.py | 23 ++++++++-------- 4 files changed, 52 insertions(+), 74 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 7d07af7..5a131ed 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -68,17 +68,15 @@ High Level API A dict of filters for this environment. As long as no template was loaded it's safe to add new filters or remove old. For custom filters - see :ref:`writing-filters`. Unlike regular identifiers filters and - tests may contain dots to group functions with similar functionality. - For example `to.unicode` is a valid name for a filter. + see :ref:`writing-filters`. For valid filter names have a look at + :ref:`identifier-naming`. .. attribute:: tests A dict of test functions for this environment. As long as no template was loaded it's safe to modify this dict. For custom tests - see :ref:`writing-tests`. Unlike regular identifiers filters and - tests may contain dots to group functions with similar functionality. - For example `check.positive` is a valid name for a test. + see :ref:`writing-tests`. For valid test names have a look at + :ref:`identifier-naming`. .. attribute:: globals @@ -86,6 +84,7 @@ High Level API in a template and (if the optimizer is enabled) may not be overridden by templates. As long as no template was loaded it's safe to modify this dict. For more details see :ref:`global-namespace`. + For valid object names have a look at :ref:`identifier-naming`. .. automethod:: overlay([options]) @@ -115,6 +114,24 @@ High Level API :members: disable_buffering, enable_buffering +.. _identifier-naming: + +Notes on Identifiers +~~~~~~~~~~~~~~~~~~~~ + +Jinja2 uses the regular Python 2.x naming rules. Valid identifiers have to +match ``[a-zA-Z_][a-zA-Z0-9_]*``. As a matter of fact non ASCII characters +are currently not allowed. This limitation will probably go away as soon as +unicode identifiers are fully specified for Python 3. + +Filters and tests are looked up in separate namespaces and have slightly +modified identifier syntax. Filters and tests may contain dots to group +filters and tests by topic. For example it's perfectly valid to add a +function into the filter dict and call it `to.unicode`. The regular +expression for filter and test identifiers is +``[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*```. + + Undefined Types --------------- diff --git a/jinja2/compiler.py b/jinja2/compiler.py index b45aeb5..6518427 100644 --- a/jinja2/compiler.py +++ b/jinja2/compiler.py @@ -8,13 +8,12 @@ :copyright: Copyright 2008 by Armin Ronacher. :license: GNU GPL. """ -import string from time import time from copy import copy from random import randrange from keyword import iskeyword from cStringIO import StringIO -from itertools import chain, takewhile +from itertools import chain from jinja2 import nodes from jinja2.visitor import NodeVisitor, NodeTransformer from jinja2.exceptions import TemplateAssertionError @@ -39,8 +38,6 @@ except SyntaxError: else: have_condexpr = True -_safe_ident_chars = set(string.letters + '0123456789') - def generate(node, environment, name, filename, stream=None): """Generate the python source for a node tree.""" @@ -52,30 +49,6 @@ def generate(node, environment, name, filename, stream=None): return generator.stream.getvalue() -def mask_identifier(ident): - """Mask an identifier properly for python source code.""" - rv = ['l_'] - for char in ident: - if char in _safe_ident_chars: - rv.append(char) - else: - rv.append('_%x_' % ord(char)) - return str(''.join(rv)) - - -def unmask_identifier(ident): - """Unmask an identifier.""" - if not ident.startswith('l_'): - return ident - rv = [] - i = iter(ident[2:]) - for c in i: - if c == '_': - c = unichr(int(concat(takewhile(lambda c: c != '_', i)), 16)) - rv.append(c) - return ''.join(rv) - - def has_safe_repr(value): """Does the node have a safe representation?""" if value is None or value is NotImplemented or value is Ellipsis: @@ -497,8 +470,7 @@ class CodeGenerator(NodeVisitor): def pull_locals(self, frame): """Pull all the references identifiers into the local scope.""" for name in frame.identifiers.undeclared: - self.writeline('%s = context.resolve(%r)' % (mask_identifier(name), - name)) + self.writeline('l_%s = context.resolve(%r)' % (name, name)) def pull_dependencies(self, nodes): """Pull all the dependencies.""" @@ -524,7 +496,7 @@ class CodeGenerator(NodeVisitor): aliases = {} for name in frame.identifiers.find_shadowed(): aliases[name] = ident = self.temporary_identifier() - self.writeline('%s = %s' % (ident, mask_identifier(name))) + self.writeline('%s = l_%s' % (ident, name)) return aliases def function_scoping(self, node, frame, children=None): @@ -573,8 +545,7 @@ class CodeGenerator(NodeVisitor): func_frame.accesses_kwargs = False func_frame.accesses_varargs = False func_frame.accesses_caller = False - func_frame.arguments = args = [mask_identifier(x.name) - for x in node.args] + func_frame.arguments = args = ['l_' + x.name for x in node.args] undeclared = find_undeclared(children, ('caller', 'kwargs', 'varargs')) @@ -775,7 +746,7 @@ class CodeGenerator(NodeVisitor): def visit_Import(self, node, frame): """Visit regular imports.""" - self.writeline(mask_identifier(node.target) + ' = ', node) + self.writeline('l_%s = ' % node.target, node) if frame.toplevel: self.write('context.vars[%r] = ' % node.target) self.write('environment.get_template(') @@ -803,19 +774,18 @@ class CodeGenerator(NodeVisitor): name, alias = name else: alias = name - self.writeline('%s = getattr(included_template, ' - '%r, missing)' % (mask_identifier(alias), name)) - self.writeline('if %s is missing:' % mask_identifier(alias)) + self.writeline('l_%s = getattr(included_template, ' + '%r, missing)' % (alias, name)) + self.writeline('if l_%s is missing:' % alias) self.indent() - self.writeline('%s = environment.undefined(%r %% ' + self.writeline('l_%s = environment.undefined(%r %% ' 'included_template.name, ' 'name=included_template.name)' % - (mask_identifier(alias), 'the template %r does ' - 'not export the requested name ' + repr(name))) + (alias, 'the template %r does not export ' + 'the requested name ' + repr(name))) self.outdent() if frame.toplevel: - self.writeline('context.vars[%r] = %s' % - (alias, mask_identifier(alias))) + self.writeline('context.vars[%r] = l_%s' % (alias, alias)) if not alias.startswith('__'): self.writeline('context.exported_vars.discard(%r)' % alias) @@ -889,7 +859,7 @@ class CodeGenerator(NodeVisitor): # reset the aliases if there are any. for name, alias in aliases.iteritems(): - self.writeline('%s = %s' % (mask_identifier(name), alias)) + self.writeline('l_%s = %s' % (name, alias)) def visit_If(self, node, frame): if_frame = frame.soft() @@ -927,8 +897,8 @@ class CodeGenerator(NodeVisitor): arg_tuple = ', '.join(repr(x.name) for x in node.args) if len(node.args) == 1: arg_tuple += ',' - self.write('%s = Macro(environment, macro, %r, (%s), (' % - (mask_identifier(node.name), node.name, arg_tuple)) + self.write('l_%s = Macro(environment, macro, %r, (%s), (' % + (node.name, node.name, arg_tuple)) for arg in node.defaults: self.visit(arg, macro_frame) self.write(', ') @@ -1112,15 +1082,14 @@ class CodeGenerator(NodeVisitor): # make sure toplevel assignments are added to the context. if frame.toplevel: for name in assignment_frame.assigned_names: - self.writeline('context.vars[%r] = %s' % - (name, mask_identifier(name))) + self.writeline('context.vars[%r] = l_%s' % (name, name)) if not name.startswith('__'): self.writeline('context.exported_vars.add(%r)' % name) def visit_Name(self, node, frame): if node.ctx == 'store' and frame.toplevel: frame.assigned_names.add(node.name) - self.write(mask_identifier(node.name)) + self.write('l_' + node.name) def visit_MarkSafe(self, node, frame): self.write('Markup(') diff --git a/jinja2/debug.py b/jinja2/debug.py index cfc2bc8..9209054 100644 --- a/jinja2/debug.py +++ b/jinja2/debug.py @@ -12,7 +12,6 @@ """ import sys from types import CodeType -from jinja2.compiler import unmask_identifier def translate_exception(exc_info): @@ -67,13 +66,7 @@ def fake_exc_info(exc_info, filename, lineno, tb_back=None): locals = {} for name, value in real_locals.iteritems(): if name.startswith('l_'): - try: - locals[str(unmask_identifier(name))] = value - except UnicodeError: - # bummer. someone actually used an unicode identifier. - # there is no way this can be added back into the python - # layer with python < 3. we have to ignore it... - pass + locals[name[2:]] = value # if there is a local called __jinja_exception__, we get # rid of it to not break the debug functionality. diff --git a/jinja2/lexer.py b/jinja2/lexer.py index f472d4b..d950025 100644 --- a/jinja2/lexer.py +++ b/jinja2/lexer.py @@ -31,7 +31,7 @@ whitespace_re = re.compile(r'\s+(?um)') string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)') integer_re = re.compile(r'\d+') -name_re = re.compile(r'\b[^\W\d]\w*\b(?u)') +name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') float_re = re.compile(r'\d+\.\d+') # bind operators to token types @@ -88,13 +88,6 @@ unicode_escapes = { } -def _trystr(s): - try: - return str(s) - except UnicodeError: - return s - - def unescape_string(lineno, filename, s): r"""Unescape a string. Supported escapes: \a, \n, \r\, \f, \v, \\, \", \', \0 @@ -102,8 +95,7 @@ def unescape_string(lineno, filename, s): \x00, \u0000, \U00000000, \N{...} """ try: - return _trystr(s.encode('ascii', 'backslashreplace') - .decode('unicode-escape')) + return s.encode('ascii', 'backslashreplace').decode('unicode-escape') except UnicodeError, e: msg = str(e).split(':')[-1].strip() raise TemplateSyntaxError(msg, lineno, filename) @@ -417,13 +409,20 @@ class Lexer(object): elif token in ('raw_begin', 'raw_end'): continue elif token == 'data': - value = _trystr(value) + try: + value = str(value) + except UnicodeError: + pass elif token == 'keyword': token = value elif token == 'name': - value = _trystr(value) + value = str(value) elif token == 'string': value = unescape_string(lineno, filename, value[1:-1]) + try: + value = str(value) + except UnicodeError: + pass elif token == 'integer': value = int(value) elif token == 'float': -- 2.26.2