From: Armin Ronacher Date: Sat, 10 May 2008 22:18:35 +0000 (+0200) Subject: added support for unicode identifiers X-Git-Tag: 2.0rc1~76 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=b5365480c7ac04821e16a549c16e581a73f121b6;p=jinja2.git added support for unicode identifiers --HG-- branch : trunk --- diff --git a/docs/api.rst b/docs/api.rst index 5a131ed..7d07af7 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -68,15 +68,17 @@ High Level API A dict of filters for this environment. As long as no template was loaded it's safe to add new filters or remove old. For custom filters - see :ref:`writing-filters`. For valid filter names have a look at - :ref:`identifier-naming`. + see :ref:`writing-filters`. Unlike regular identifiers filters and + tests may contain dots to group functions with similar functionality. + For example `to.unicode` is a valid name for a filter. .. attribute:: tests A dict of test functions for this environment. As long as no template was loaded it's safe to modify this dict. For custom tests - see :ref:`writing-tests`. For valid test names have a look at - :ref:`identifier-naming`. + see :ref:`writing-tests`. Unlike regular identifiers filters and + tests may contain dots to group functions with similar functionality. + For example `check.positive` is a valid name for a test. .. attribute:: globals @@ -84,7 +86,6 @@ High Level API in a template and (if the optimizer is enabled) may not be overridden by templates. As long as no template was loaded it's safe to modify this dict. For more details see :ref:`global-namespace`. - For valid object names have a look at :ref:`identifier-naming`. .. automethod:: overlay([options]) @@ -114,24 +115,6 @@ High Level API :members: disable_buffering, enable_buffering -.. _identifier-naming: - -Notes on Identifiers -~~~~~~~~~~~~~~~~~~~~ - -Jinja2 uses the regular Python 2.x naming rules. Valid identifiers have to -match ``[a-zA-Z_][a-zA-Z0-9_]*``. As a matter of fact non ASCII characters -are currently not allowed. This limitation will probably go away as soon as -unicode identifiers are fully specified for Python 3. - -Filters and tests are looked up in separate namespaces and have slightly -modified identifier syntax. Filters and tests may contain dots to group -filters and tests by topic. For example it's perfectly valid to add a -function into the filter dict and call it `to.unicode`. The regular -expression for filter and test identifiers is -``[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*```. - - Undefined Types --------------- diff --git a/jinja2/compiler.py b/jinja2/compiler.py index 6518427..b45aeb5 100644 --- a/jinja2/compiler.py +++ b/jinja2/compiler.py @@ -8,12 +8,13 @@ :copyright: Copyright 2008 by Armin Ronacher. :license: GNU GPL. """ +import string from time import time from copy import copy from random import randrange from keyword import iskeyword from cStringIO import StringIO -from itertools import chain +from itertools import chain, takewhile from jinja2 import nodes from jinja2.visitor import NodeVisitor, NodeTransformer from jinja2.exceptions import TemplateAssertionError @@ -38,6 +39,8 @@ except SyntaxError: else: have_condexpr = True +_safe_ident_chars = set(string.letters + '0123456789') + def generate(node, environment, name, filename, stream=None): """Generate the python source for a node tree.""" @@ -49,6 +52,30 @@ def generate(node, environment, name, filename, stream=None): return generator.stream.getvalue() +def mask_identifier(ident): + """Mask an identifier properly for python source code.""" + rv = ['l_'] + for char in ident: + if char in _safe_ident_chars: + rv.append(char) + else: + rv.append('_%x_' % ord(char)) + return str(''.join(rv)) + + +def unmask_identifier(ident): + """Unmask an identifier.""" + if not ident.startswith('l_'): + return ident + rv = [] + i = iter(ident[2:]) + for c in i: + if c == '_': + c = unichr(int(concat(takewhile(lambda c: c != '_', i)), 16)) + rv.append(c) + return ''.join(rv) + + def has_safe_repr(value): """Does the node have a safe representation?""" if value is None or value is NotImplemented or value is Ellipsis: @@ -470,7 +497,8 @@ class CodeGenerator(NodeVisitor): def pull_locals(self, frame): """Pull all the references identifiers into the local scope.""" for name in frame.identifiers.undeclared: - self.writeline('l_%s = context.resolve(%r)' % (name, name)) + self.writeline('%s = context.resolve(%r)' % (mask_identifier(name), + name)) def pull_dependencies(self, nodes): """Pull all the dependencies.""" @@ -496,7 +524,7 @@ class CodeGenerator(NodeVisitor): aliases = {} for name in frame.identifiers.find_shadowed(): aliases[name] = ident = self.temporary_identifier() - self.writeline('%s = l_%s' % (ident, name)) + self.writeline('%s = %s' % (ident, mask_identifier(name))) return aliases def function_scoping(self, node, frame, children=None): @@ -545,7 +573,8 @@ class CodeGenerator(NodeVisitor): func_frame.accesses_kwargs = False func_frame.accesses_varargs = False func_frame.accesses_caller = False - func_frame.arguments = args = ['l_' + x.name for x in node.args] + func_frame.arguments = args = [mask_identifier(x.name) + for x in node.args] undeclared = find_undeclared(children, ('caller', 'kwargs', 'varargs')) @@ -746,7 +775,7 @@ class CodeGenerator(NodeVisitor): def visit_Import(self, node, frame): """Visit regular imports.""" - self.writeline('l_%s = ' % node.target, node) + self.writeline(mask_identifier(node.target) + ' = ', node) if frame.toplevel: self.write('context.vars[%r] = ' % node.target) self.write('environment.get_template(') @@ -774,18 +803,19 @@ class CodeGenerator(NodeVisitor): name, alias = name else: alias = name - self.writeline('l_%s = getattr(included_template, ' - '%r, missing)' % (alias, name)) - self.writeline('if l_%s is missing:' % alias) + self.writeline('%s = getattr(included_template, ' + '%r, missing)' % (mask_identifier(alias), name)) + self.writeline('if %s is missing:' % mask_identifier(alias)) self.indent() - self.writeline('l_%s = environment.undefined(%r %% ' + self.writeline('%s = environment.undefined(%r %% ' 'included_template.name, ' 'name=included_template.name)' % - (alias, 'the template %r does not export ' - 'the requested name ' + repr(name))) + (mask_identifier(alias), 'the template %r does ' + 'not export the requested name ' + repr(name))) self.outdent() if frame.toplevel: - self.writeline('context.vars[%r] = l_%s' % (alias, alias)) + self.writeline('context.vars[%r] = %s' % + (alias, mask_identifier(alias))) if not alias.startswith('__'): self.writeline('context.exported_vars.discard(%r)' % alias) @@ -859,7 +889,7 @@ class CodeGenerator(NodeVisitor): # reset the aliases if there are any. for name, alias in aliases.iteritems(): - self.writeline('l_%s = %s' % (name, alias)) + self.writeline('%s = %s' % (mask_identifier(name), alias)) def visit_If(self, node, frame): if_frame = frame.soft() @@ -897,8 +927,8 @@ class CodeGenerator(NodeVisitor): arg_tuple = ', '.join(repr(x.name) for x in node.args) if len(node.args) == 1: arg_tuple += ',' - self.write('l_%s = Macro(environment, macro, %r, (%s), (' % - (node.name, node.name, arg_tuple)) + self.write('%s = Macro(environment, macro, %r, (%s), (' % + (mask_identifier(node.name), node.name, arg_tuple)) for arg in node.defaults: self.visit(arg, macro_frame) self.write(', ') @@ -1082,14 +1112,15 @@ class CodeGenerator(NodeVisitor): # make sure toplevel assignments are added to the context. if frame.toplevel: for name in assignment_frame.assigned_names: - self.writeline('context.vars[%r] = l_%s' % (name, name)) + self.writeline('context.vars[%r] = %s' % + (name, mask_identifier(name))) if not name.startswith('__'): self.writeline('context.exported_vars.add(%r)' % name) def visit_Name(self, node, frame): if node.ctx == 'store' and frame.toplevel: frame.assigned_names.add(node.name) - self.write('l_' + node.name) + self.write(mask_identifier(node.name)) def visit_MarkSafe(self, node, frame): self.write('Markup(') diff --git a/jinja2/debug.py b/jinja2/debug.py index 9209054..cfc2bc8 100644 --- a/jinja2/debug.py +++ b/jinja2/debug.py @@ -12,6 +12,7 @@ """ import sys from types import CodeType +from jinja2.compiler import unmask_identifier def translate_exception(exc_info): @@ -66,7 +67,13 @@ def fake_exc_info(exc_info, filename, lineno, tb_back=None): locals = {} for name, value in real_locals.iteritems(): if name.startswith('l_'): - locals[name[2:]] = value + try: + locals[str(unmask_identifier(name))] = value + except UnicodeError: + # bummer. someone actually used an unicode identifier. + # there is no way this can be added back into the python + # layer with python < 3. we have to ignore it... + pass # if there is a local called __jinja_exception__, we get # rid of it to not break the debug functionality. diff --git a/jinja2/lexer.py b/jinja2/lexer.py index d950025..f472d4b 100644 --- a/jinja2/lexer.py +++ b/jinja2/lexer.py @@ -31,7 +31,7 @@ whitespace_re = re.compile(r'\s+(?um)') string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)') integer_re = re.compile(r'\d+') -name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') +name_re = re.compile(r'\b[^\W\d]\w*\b(?u)') float_re = re.compile(r'\d+\.\d+') # bind operators to token types @@ -88,6 +88,13 @@ unicode_escapes = { } +def _trystr(s): + try: + return str(s) + except UnicodeError: + return s + + def unescape_string(lineno, filename, s): r"""Unescape a string. Supported escapes: \a, \n, \r\, \f, \v, \\, \", \', \0 @@ -95,7 +102,8 @@ def unescape_string(lineno, filename, s): \x00, \u0000, \U00000000, \N{...} """ try: - return s.encode('ascii', 'backslashreplace').decode('unicode-escape') + return _trystr(s.encode('ascii', 'backslashreplace') + .decode('unicode-escape')) except UnicodeError, e: msg = str(e).split(':')[-1].strip() raise TemplateSyntaxError(msg, lineno, filename) @@ -409,20 +417,13 @@ class Lexer(object): elif token in ('raw_begin', 'raw_end'): continue elif token == 'data': - try: - value = str(value) - except UnicodeError: - pass + value = _trystr(value) elif token == 'keyword': token = value elif token == 'name': - value = str(value) + value = _trystr(value) elif token == 'string': value = unescape_string(lineno, filename, value[1:-1]) - try: - value = str(value) - except UnicodeError: - pass elif token == 'integer': value = int(value) elif token == 'float':