From: Armin Ronacher <armin.ronacher@active-4.com>
Date: Sat, 10 May 2008 22:30:43 +0000 (+0200)
Subject: Backed out changeset 6afb554797b6, which added unicode identifier support.  This... 
X-Git-Tag: 2.0rc1~75
X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=d1ff858a7afd41a5cc166820f70c96f08b6d2136;p=jinja2.git

Backed out changeset 6afb554797b6, which added unicode identifier support.  This doesn't work in reality under Python 2.6 properly.

--HG--
branch : trunk
---

diff --git a/docs/api.rst b/docs/api.rst
index 7d07af7..5a131ed 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -68,17 +68,15 @@ High Level API
 
         A dict of filters for this environment.  As long as no template was
         loaded it's safe to add new filters or remove old.  For custom filters
-        see :ref:`writing-filters`.  Unlike regular identifiers filters and
-        tests may contain dots to group functions with similar functionality.
-        For example `to.unicode` is a valid name for a filter.
+        see :ref:`writing-filters`.  For valid filter names have a look at
+        :ref:`identifier-naming`.
 
     .. attribute:: tests
 
         A dict of test functions for this environment.  As long as no
         template was loaded it's safe to modify this dict.  For custom tests
-        see :ref:`writing-tests`. Unlike regular identifiers filters and
-        tests may contain dots to group functions with similar functionality.
-        For example `check.positive` is a valid name for a test.
+        see :ref:`writing-tests`.  For valid test names have a look at
+        :ref:`identifier-naming`.
 
     .. attribute:: globals
 
@@ -86,6 +84,7 @@ High Level API
         in a template and (if the optimizer is enabled) may not be
         overridden by templates.  As long as no template was loaded it's safe
         to modify this dict.  For more details see :ref:`global-namespace`.
+        For valid object names have a look at :ref:`identifier-naming`.
 
     .. automethod:: overlay([options])
 
@@ -115,6 +114,24 @@ High Level API
     :members: disable_buffering, enable_buffering
 
 
+.. _identifier-naming:
+
+Notes on Identifiers
+~~~~~~~~~~~~~~~~~~~~
+
+Jinja2 uses the regular Python 2.x naming rules.  Valid identifiers have to
+match ``[a-zA-Z_][a-zA-Z0-9_]*``.  As a matter of fact non ASCII characters
+are currently not allowed.  This limitation will probably go away as soon as
+unicode identifiers are fully specified for Python 3.
+
+Filters and tests are looked up in separate namespaces and have slightly
+modified identifier syntax.  Filters and tests may contain dots to group
+filters and tests by topic.  For example it's perfectly valid to add a
+function into the filter dict and call it `to.unicode`.  The regular
+expression for filter and test identifiers is
+``[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*```.
+
+
 Undefined Types
 ---------------
 
diff --git a/jinja2/compiler.py b/jinja2/compiler.py
index b45aeb5..6518427 100644
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -8,13 +8,12 @@
     :copyright: Copyright 2008 by Armin Ronacher.
     :license: GNU GPL.
 """
-import string
 from time import time
 from copy import copy
 from random import randrange
 from keyword import iskeyword
 from cStringIO import StringIO
-from itertools import chain, takewhile
+from itertools import chain
 from jinja2 import nodes
 from jinja2.visitor import NodeVisitor, NodeTransformer
 from jinja2.exceptions import TemplateAssertionError
@@ -39,8 +38,6 @@ except SyntaxError:
 else:
     have_condexpr = True
 
-_safe_ident_chars = set(string.letters + '0123456789')
-
 
 def generate(node, environment, name, filename, stream=None):
     """Generate the python source for a node tree."""
@@ -52,30 +49,6 @@ def generate(node, environment, name, filename, stream=None):
         return generator.stream.getvalue()
 
 
-def mask_identifier(ident):
-    """Mask an identifier properly for python source code."""
-    rv = ['l_']
-    for char in ident:
-        if char in _safe_ident_chars:
-            rv.append(char)
-        else:
-            rv.append('_%x_' % ord(char))
-    return str(''.join(rv))
-
-
-def unmask_identifier(ident):
-    """Unmask an identifier."""
-    if not ident.startswith('l_'):
-        return ident
-    rv = []
-    i = iter(ident[2:])
-    for c in i:
-        if c == '_':
-            c = unichr(int(concat(takewhile(lambda c: c != '_', i)), 16))
-        rv.append(c)
-    return ''.join(rv)
-
-
 def has_safe_repr(value):
     """Does the node have a safe representation?"""
     if value is None or value is NotImplemented or value is Ellipsis:
@@ -497,8 +470,7 @@ class CodeGenerator(NodeVisitor):
     def pull_locals(self, frame):
         """Pull all the references identifiers into the local scope."""
         for name in frame.identifiers.undeclared:
-            self.writeline('%s = context.resolve(%r)' % (mask_identifier(name),
-                                                         name))
+            self.writeline('l_%s = context.resolve(%r)' % (name, name))
 
     def pull_dependencies(self, nodes):
         """Pull all the dependencies."""
@@ -524,7 +496,7 @@ class CodeGenerator(NodeVisitor):
         aliases = {}
         for name in frame.identifiers.find_shadowed():
             aliases[name] = ident = self.temporary_identifier()
-            self.writeline('%s = %s' % (ident, mask_identifier(name)))
+            self.writeline('%s = l_%s' % (ident, name))
         return aliases
 
     def function_scoping(self, node, frame, children=None):
@@ -573,8 +545,7 @@ class CodeGenerator(NodeVisitor):
         func_frame.accesses_kwargs = False
         func_frame.accesses_varargs = False
         func_frame.accesses_caller = False
-        func_frame.arguments = args = [mask_identifier(x.name)
-                                       for x in node.args]
+        func_frame.arguments = args = ['l_' + x.name for x in node.args]
 
         undeclared = find_undeclared(children, ('caller', 'kwargs', 'varargs'))
 
@@ -775,7 +746,7 @@ class CodeGenerator(NodeVisitor):
 
     def visit_Import(self, node, frame):
         """Visit regular imports."""
-        self.writeline(mask_identifier(node.target) + ' = ', node)
+        self.writeline('l_%s = ' % node.target, node)
         if frame.toplevel:
             self.write('context.vars[%r] = ' % node.target)
         self.write('environment.get_template(')
@@ -803,19 +774,18 @@ class CodeGenerator(NodeVisitor):
                 name, alias = name
             else:
                 alias = name
-            self.writeline('%s = getattr(included_template, '
-                           '%r, missing)' % (mask_identifier(alias), name))
-            self.writeline('if %s is missing:' % mask_identifier(alias))
+            self.writeline('l_%s = getattr(included_template, '
+                           '%r, missing)' % (alias, name))
+            self.writeline('if l_%s is missing:' % alias)
             self.indent()
-            self.writeline('%s = environment.undefined(%r %% '
+            self.writeline('l_%s = environment.undefined(%r %% '
                            'included_template.name, '
                            'name=included_template.name)' %
-                           (mask_identifier(alias), 'the template %r does '
-                            'not export the requested name ' + repr(name)))
+                           (alias, 'the template %r does not export '
+                            'the requested name ' + repr(name)))
             self.outdent()
             if frame.toplevel:
-                self.writeline('context.vars[%r] = %s' %
-                               (alias, mask_identifier(alias)))
+                self.writeline('context.vars[%r] = l_%s' % (alias, alias))
                 if not alias.startswith('__'):
                     self.writeline('context.exported_vars.discard(%r)' % alias)
 
@@ -889,7 +859,7 @@ class CodeGenerator(NodeVisitor):
 
         # reset the aliases if there are any.
         for name, alias in aliases.iteritems():
-            self.writeline('%s = %s' % (mask_identifier(name), alias))
+            self.writeline('l_%s = %s' % (name, alias))
 
     def visit_If(self, node, frame):
         if_frame = frame.soft()
@@ -927,8 +897,8 @@ class CodeGenerator(NodeVisitor):
         arg_tuple = ', '.join(repr(x.name) for x in node.args)
         if len(node.args) == 1:
             arg_tuple += ','
-        self.write('%s = Macro(environment, macro, %r, (%s), (' %
-                   (mask_identifier(node.name), node.name, arg_tuple))
+        self.write('l_%s = Macro(environment, macro, %r, (%s), (' %
+                   (node.name, node.name, arg_tuple))
         for arg in node.defaults:
             self.visit(arg, macro_frame)
             self.write(', ')
@@ -1112,15 +1082,14 @@ class CodeGenerator(NodeVisitor):
         # make sure toplevel assignments are added to the context.
         if frame.toplevel:
             for name in assignment_frame.assigned_names:
-                self.writeline('context.vars[%r] = %s' %
-                               (name, mask_identifier(name)))
+                self.writeline('context.vars[%r] = l_%s' % (name, name))
                 if not name.startswith('__'):
                     self.writeline('context.exported_vars.add(%r)' % name)
 
     def visit_Name(self, node, frame):
         if node.ctx == 'store' and frame.toplevel:
             frame.assigned_names.add(node.name)
-        self.write(mask_identifier(node.name))
+        self.write('l_' + node.name)
 
     def visit_MarkSafe(self, node, frame):
         self.write('Markup(')
diff --git a/jinja2/debug.py b/jinja2/debug.py
index cfc2bc8..9209054 100644
--- a/jinja2/debug.py
+++ b/jinja2/debug.py
@@ -12,7 +12,6 @@
 """
 import sys
 from types import CodeType
-from jinja2.compiler import unmask_identifier
 
 
 def translate_exception(exc_info):
@@ -67,13 +66,7 @@ def fake_exc_info(exc_info, filename, lineno, tb_back=None):
         locals = {}
     for name, value in real_locals.iteritems():
         if name.startswith('l_'):
-            try:
-                locals[str(unmask_identifier(name))] = value
-            except UnicodeError:
-                # bummer.  someone actually used an unicode identifier.
-                # there is no way this can be added back into the python
-                # layer with python < 3.  we have to ignore it...
-                pass
+            locals[name[2:]] = value
 
     # if there is a local called __jinja_exception__, we get
     # rid of it to not break the debug functionality.
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index f472d4b..d950025 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -31,7 +31,7 @@ whitespace_re = re.compile(r'\s+(?um)')
 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
 integer_re = re.compile(r'\d+')
-name_re = re.compile(r'\b[^\W\d]\w*\b(?u)')
+name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
 float_re = re.compile(r'\d+\.\d+')
 
 # bind operators to token types
@@ -88,13 +88,6 @@ unicode_escapes = {
 }
 
 
-def _trystr(s):
-    try:
-        return str(s)
-    except UnicodeError:
-        return s
-
-
 def unescape_string(lineno, filename, s):
     r"""Unescape a string. Supported escapes:
         \a, \n, \r\, \f, \v, \\, \", \', \0
@@ -102,8 +95,7 @@ def unescape_string(lineno, filename, s):
         \x00, \u0000, \U00000000, \N{...}
     """
     try:
-        return _trystr(s.encode('ascii', 'backslashreplace')
-                        .decode('unicode-escape'))
+        return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
     except UnicodeError, e:
         msg = str(e).split(':')[-1].strip()
         raise TemplateSyntaxError(msg, lineno, filename)
@@ -417,13 +409,20 @@ class Lexer(object):
                 elif token in ('raw_begin', 'raw_end'):
                     continue
                 elif token == 'data':
-                    value = _trystr(value)
+                    try:
+                        value = str(value)
+                    except UnicodeError:
+                        pass
                 elif token == 'keyword':
                     token = value
                 elif token == 'name':
-                    value = _trystr(value)
+                    value = str(value)
                 elif token == 'string':
                     value = unescape_string(lineno, filename, value[1:-1])
+                    try:
+                        value = str(value)
+                    except UnicodeError:
+                        pass
                 elif token == 'integer':
                     value = int(value)
                 elif token == 'float':