added support for unicode identifiers

author Armin Ronacher <armin.ronacher@active-4.com>

Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)

committer Armin Ronacher <armin.ronacher@active-4.com>

Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)
author Armin Ronacher <armin.ronacher@active-4.com>
Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)
committer Armin Ronacher <armin.ronacher@active-4.com>
Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)
diff --git a/docs/api.rst b/docs/api.rst

index 5a131ed3b62aa69ef6ea898d4efd03efead04b19..7d07af7d3bfac517c7f2f1747336835e3eea98a1 100644 (file)
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -68,15 +68,17 @@ High Level API
  
          A dict of filters for this environment.  As long as no template was
          loaded it's safe to add new filters or remove old.  For custom filters
-        see :ref:`writing-filters`.  For valid filter names have a look at
-        :ref:`identifier-naming`.
+        see :ref:`writing-filters`.  Unlike regular identifiers filters and
+        tests may contain dots to group functions with similar functionality.
+        For example `to.unicode` is a valid name for a filter.
  
      .. attribute:: tests
  
          A dict of test functions for this environment.  As long as no
          template was loaded it's safe to modify this dict.  For custom tests
-        see :ref:`writing-tests`.  For valid test names have a look at
-        :ref:`identifier-naming`.
+        see :ref:`writing-tests`. Unlike regular identifiers filters and
+        tests may contain dots to group functions with similar functionality.
+        For example `check.positive` is a valid name for a test.
  
      .. attribute:: globals
  
@@ -84,7 +86,6 @@ High Level API
          in a template and (if the optimizer is enabled) may not be
          overridden by templates.  As long as no template was loaded it's safe
          to modify this dict.  For more details see :ref:`global-namespace`.
-        For valid object names have a look at :ref:`identifier-naming`.
  
      .. automethod:: overlay([options])
  
@@ -114,24 +115,6 @@ High Level API
      :members: disable_buffering, enable_buffering
  
  
-.. _identifier-naming:
-
-Notes on Identifiers
-~~~~~~~~~~~~~~~~~~~~
-
-Jinja2 uses the regular Python 2.x naming rules.  Valid identifiers have to
-match ``[a-zA-Z_][a-zA-Z0-9_]*``.  As a matter of fact non ASCII characters
-are currently not allowed.  This limitation will probably go away as soon as
-unicode identifiers are fully specified for Python 3.
-
-Filters and tests are looked up in separate namespaces and have slightly
-modified identifier syntax.  Filters and tests may contain dots to group
-filters and tests by topic.  For example it's perfectly valid to add a
-function into the filter dict and call it `to.unicode`.  The regular
-expression for filter and test identifiers is
-``[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*```.
-
-
  Undefined Types
  ---------------
  
diff --git a/jinja2/compiler.py b/jinja2/compiler.py

index 6518427d2fddcdf1142d0cba7e92a21720f3a4d6..b45aeb5241c86ba64a6d4d6733d9031c899165ec 100644 (file)
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -8,12 +8,13 @@
      :copyright: Copyright 2008 by Armin Ronacher.
      :license: GNU GPL.
  """
+import string
  from time import time
  from copy import copy
  from random import randrange
  from keyword import iskeyword
  from cStringIO import StringIO
-from itertools import chain
+from itertools import chain, takewhile
  from jinja2 import nodes
  from jinja2.visitor import NodeVisitor, NodeTransformer
  from jinja2.exceptions import TemplateAssertionError
@@ -38,6 +39,8 @@ except SyntaxError:
  else:
      have_condexpr = True
  
+_safe_ident_chars = set(string.letters + '0123456789')
+
  
  def generate(node, environment, name, filename, stream=None):
      """Generate the python source for a node tree."""
@@ -49,6 +52,30 @@ def generate(node, environment, name, filename, stream=None):
          return generator.stream.getvalue()
  
  
+def mask_identifier(ident):
+    """Mask an identifier properly for python source code."""
+    rv = ['l_']
+    for char in ident:
+        if char in _safe_ident_chars:
+            rv.append(char)
+        else:
+            rv.append('_%x_' % ord(char))
+    return str(''.join(rv))
+
+
+def unmask_identifier(ident):
+    """Unmask an identifier."""
+    if not ident.startswith('l_'):
+        return ident
+    rv = []
+    i = iter(ident[2:])
+    for c in i:
+        if c == '_':
+            c = unichr(int(concat(takewhile(lambda c: c != '_', i)), 16))
+        rv.append(c)
+    return ''.join(rv)
+
+
  def has_safe_repr(value):
      """Does the node have a safe representation?"""
      if value is None or value is NotImplemented or value is Ellipsis:
@@ -470,7 +497,8 @@ class CodeGenerator(NodeVisitor):
      def pull_locals(self, frame):
          """Pull all the references identifiers into the local scope."""
          for name in frame.identifiers.undeclared:
-            self.writeline('l_%s = context.resolve(%r)' % (name, name))
+            self.writeline('%s = context.resolve(%r)' % (mask_identifier(name),
+                                                         name))
  
      def pull_dependencies(self, nodes):
          """Pull all the dependencies."""
@@ -496,7 +524,7 @@ class CodeGenerator(NodeVisitor):
          aliases = {}
          for name in frame.identifiers.find_shadowed():
              aliases[name] = ident = self.temporary_identifier()
-            self.writeline('%s = l_%s' % (ident, name))
+            self.writeline('%s = %s' % (ident, mask_identifier(name)))
          return aliases
  
      def function_scoping(self, node, frame, children=None):
@@ -545,7 +573,8 @@ class CodeGenerator(NodeVisitor):
          func_frame.accesses_kwargs = False
          func_frame.accesses_varargs = False
          func_frame.accesses_caller = False
-        func_frame.arguments = args = ['l_' + x.name for x in node.args]
+        func_frame.arguments = args = [mask_identifier(x.name)
+                                       for x in node.args]
  
          undeclared = find_undeclared(children, ('caller', 'kwargs', 'varargs'))
  
@@ -746,7 +775,7 @@ class CodeGenerator(NodeVisitor):
  
      def visit_Import(self, node, frame):
          """Visit regular imports."""
-        self.writeline('l_%s = ' % node.target, node)
+        self.writeline(mask_identifier(node.target) + ' = ', node)
          if frame.toplevel:
              self.write('context.vars[%r] = ' % node.target)
          self.write('environment.get_template(')
@@ -774,18 +803,19 @@ class CodeGenerator(NodeVisitor):
                  name, alias = name
              else:
                  alias = name
-            self.writeline('l_%s = getattr(included_template, '
-                           '%r, missing)' % (alias, name))
-            self.writeline('if l_%s is missing:' % alias)
+            self.writeline('%s = getattr(included_template, '
+                           '%r, missing)' % (mask_identifier(alias), name))
+            self.writeline('if %s is missing:' % mask_identifier(alias))
              self.indent()
-            self.writeline('l_%s = environment.undefined(%r %% '
+            self.writeline('%s = environment.undefined(%r %% '
                             'included_template.name, '
                             'name=included_template.name)' %
-                           (alias, 'the template %r does not export '
-                            'the requested name ' + repr(name)))
+                           (mask_identifier(alias), 'the template %r does '
+                            'not export the requested name ' + repr(name)))
              self.outdent()
              if frame.toplevel:
-                self.writeline('context.vars[%r] = l_%s' % (alias, alias))
+                self.writeline('context.vars[%r] = %s' %
+                               (alias, mask_identifier(alias)))
                  if not alias.startswith('__'):
                      self.writeline('context.exported_vars.discard(%r)' % alias)
  
@@ -859,7 +889,7 @@ class CodeGenerator(NodeVisitor):
  
          # reset the aliases if there are any.
          for name, alias in aliases.iteritems():
-            self.writeline('l_%s = %s' % (name, alias))
+            self.writeline('%s = %s' % (mask_identifier(name), alias))
  
      def visit_If(self, node, frame):
          if_frame = frame.soft()
@@ -897,8 +927,8 @@ class CodeGenerator(NodeVisitor):
          arg_tuple = ', '.join(repr(x.name) for x in node.args)
          if len(node.args) == 1:
              arg_tuple += ','
-        self.write('l_%s = Macro(environment, macro, %r, (%s), (' %
-                   (node.name, node.name, arg_tuple))
+        self.write('%s = Macro(environment, macro, %r, (%s), (' %
+                   (mask_identifier(node.name), node.name, arg_tuple))
          for arg in node.defaults:
              self.visit(arg, macro_frame)
              self.write(', ')
@@ -1082,14 +1112,15 @@ class CodeGenerator(NodeVisitor):
          # make sure toplevel assignments are added to the context.
          if frame.toplevel:
              for name in assignment_frame.assigned_names:
-                self.writeline('context.vars[%r] = l_%s' % (name, name))
+                self.writeline('context.vars[%r] = %s' %
+                               (name, mask_identifier(name)))
                  if not name.startswith('__'):
                      self.writeline('context.exported_vars.add(%r)' % name)
  
      def visit_Name(self, node, frame):
          if node.ctx == 'store' and frame.toplevel:
              frame.assigned_names.add(node.name)
-        self.write('l_' + node.name)
+        self.write(mask_identifier(node.name))
  
      def visit_MarkSafe(self, node, frame):
          self.write('Markup(')
diff --git a/jinja2/debug.py b/jinja2/debug.py

index 9209054848d2f1cba4673b825f5adb75e3717777..cfc2bc846a4c526bd474517680dfc55fd049941e 100644 (file)
--- a/jinja2/debug.py
+++ b/jinja2/debug.py
@@ -12,6 +12,7 @@
  """
  import sys
  from types import CodeType
+from jinja2.compiler import unmask_identifier
  
  
  def translate_exception(exc_info):
@@ -66,7 +67,13 @@ def fake_exc_info(exc_info, filename, lineno, tb_back=None):
          locals = {}
      for name, value in real_locals.iteritems():
          if name.startswith('l_'):
-            locals[name[2:]] = value
+            try:
+                locals[str(unmask_identifier(name))] = value
+            except UnicodeError:
+                # bummer.  someone actually used an unicode identifier.
+                # there is no way this can be added back into the python
+                # layer with python < 3.  we have to ignore it...
+                pass
  
      # if there is a local called __jinja_exception__, we get
      # rid of it to not break the debug functionality.
diff --git a/jinja2/lexer.py b/jinja2/lexer.py

index d950025b70cc5306b72253f62ecc9b5b71d5fa2a..f472d4ba56631e93318b886b14b96f36fae48d49 100644 (file)
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -31,7 +31,7 @@ whitespace_re = re.compile(r'\s+(?um)')
  string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                         r'|"([^"\\]*(?:\\.[^"\\]*)*)")(?ms)')
  integer_re = re.compile(r'\d+')
-name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
+name_re = re.compile(r'\b[^\W\d]\w*\b(?u)')
  float_re = re.compile(r'\d+\.\d+')
  
  # bind operators to token types
@@ -88,6 +88,13 @@ unicode_escapes = {
  }
  
  
+def _trystr(s):
+    try:
+        return str(s)
+    except UnicodeError:
+        return s
+
+
  def unescape_string(lineno, filename, s):
      r"""Unescape a string. Supported escapes:
          \a, \n, \r\, \f, \v, \\, \", \', \0
@@ -95,7 +102,8 @@ def unescape_string(lineno, filename, s):
          \x00, \u0000, \U00000000, \N{...}
      """
      try:
-        return s.encode('ascii', 'backslashreplace').decode('unicode-escape')
+        return _trystr(s.encode('ascii', 'backslashreplace')
+                        .decode('unicode-escape'))
      except UnicodeError, e:
          msg = str(e).split(':')[-1].strip()
          raise TemplateSyntaxError(msg, lineno, filename)
@@ -409,20 +417,13 @@ class Lexer(object):
                  elif token in ('raw_begin', 'raw_end'):
                      continue
                  elif token == 'data':
-                    try:
-                        value = str(value)
-                    except UnicodeError:
-                        pass
+                    value = _trystr(value)
                  elif token == 'keyword':
                      token = value
                  elif token == 'name':
-                    value = str(value)
+                    value = _trystr(value)
                  elif token == 'string':
                      value = unescape_string(lineno, filename, value[1:-1])
-                    try:
-                        value = str(value)
-                    except UnicodeError:
-                        pass
                  elif token == 'integer':
                      value = int(value)
                  elif token == 'float':
author	Armin Ronacher <armin.ronacher@active-4.com>
	Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Sat, 10 May 2008 22:18:35 +0000 (00:18 +0200)
docs/api.rst		patch \| blob \| history
jinja2/compiler.py		patch \| blob \| history
jinja2/debug.py		patch \| blob \| history
jinja2/lexer.py		patch \| blob \| history