From f3c35c42451ccdf8cd12fe542df483fda7cd1b70 Mon Sep 17 00:00:00 2001
From: Armin Ronacher <armin.ronacher@active-4.com>
Date: Fri, 23 May 2008 23:18:14 +0200
Subject: [PATCH] end of line sequence is no configurable

--HG--
branch : trunk
---
 docs/api.rst          | 30 ++++++++++++++++++++++++++++++
 jinja2/compiler.py    |  1 -
 jinja2/environment.py | 20 ++++++++++++++++----
 jinja2/exceptions.py  | 38 ++++++++++++++++++++++++--------------
 jinja2/ext.py         |  2 +-
 jinja2/lexer.py       | 19 ++++++++++++-------
 jinja2/nodes.py       | 23 -----------------------
 7 files changed, 83 insertions(+), 50 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index d520b40..95064b5 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -44,6 +44,15 @@ To render it with some variables, just call the :meth:`render` method::
     print template.render(the='variables', go='here')
 
 
+Unicode
+-------
+
+Jinja2 is using unicode internally which means that you have to pass unicode
+objects to the render function or bytestrings that only consist of ASCII
+characters.  Additionally newlines are normalized to one end of line
+sequence which is per default UNIX style (``\n``).
+
+
 High Level API
 --------------
 
@@ -261,6 +270,27 @@ Exceptions
 
 .. autoexception:: jinja2.exceptions.TemplateSyntaxError
 
+    .. attribute:: message
+
+        The error message as utf-8 bytestring.
+
+    .. attribute:: lineno
+
+        The line number where the error occurred
+
+    .. attribute:: name
+
+        The load name for the template as unicode string.
+
+    .. attribute:: filename
+
+        The filename that loaded the template as bytestring in the encoding
+        of the file system (most likely utf-8 or mbcs on Windows systems).
+
+    The reason why the filename and error message are bytestrings and not
+    unicode strings is that Python 2.x is not using unicode for exceptions
+    and tracebacks as well as the compiler.  This will change with Python 3.
+
 .. autoexception:: jinja2.exceptions.TemplateAssertionError
 
 
diff --git a/jinja2/compiler.py b/jinja2/compiler.py
index 6dcaf08..24cae81 100644
--- a/jinja2/compiler.py
+++ b/jinja2/compiler.py
@@ -41,7 +41,6 @@ def generate(node, environment, name, filename, stream=None):
     """Generate the python source for a node tree."""
     if not isinstance(node, nodes.Template):
         raise TypeError('Can\'t compile non template nodes')
-    node.freeze()
     generator = CodeGenerator(environment, name, filename, stream)
     generator.visit(node)
     if stream is None:
diff --git a/jinja2/environment.py b/jinja2/environment.py
index f8826e6..9a00bb7 100644
--- a/jinja2/environment.py
+++ b/jinja2/environment.py
@@ -68,11 +68,13 @@ def _environment_sanity_check(environment):
            environment.variable_start_string != \
            environment.comment_start_string, 'block, variable and comment ' \
            'start strings must be different'
+    assert environment.newline_sequence in ('\r', '\r\n', '\n'), \
+           'newline_sequence set to unknown line ending string.'
     return environment
 
 
 class Environment(object):
-    """The core component of Jinja is the `Environment`.  It contains
+    r"""The core component of Jinja is the `Environment`.  It contains
     important shared variables like configuration, filters, tests,
     globals and others.  Instances of this class may be modified if
     they are not shared and if no template was loaded so far.
@@ -109,6 +111,12 @@ class Environment(object):
             If this is set to ``True`` the first newline after a block is
             removed (block, not variable tag!).  Defaults to `False`.
 
+        `newline_sequence`
+            The sequence that starts a newline.  Must be one of ``'\r'``,
+            ``'\n'`` or ``'\r\n'``.  The default is ``'\n'`` which is a
+            useful default for Linux and OS X systems as well as web
+            applications.
+
         `extensions`
             List of Jinja extensions to use.  This can either be import paths
             as strings or extension classes.  For more information have a
@@ -171,6 +179,7 @@ class Environment(object):
                  comment_end_string=COMMENT_END_STRING,
                  line_statement_prefix=LINE_STATEMENT_PREFIX,
                  trim_blocks=False,
+                 newline_sequence='\n',
                  extensions=(),
                  optimized=True,
                  undefined=Undefined,
@@ -199,6 +208,7 @@ class Environment(object):
         self.comment_end_string = comment_end_string
         self.line_statement_prefix = line_statement_prefix
         self.trim_blocks = trim_blocks
+        self.newline_sequence = newline_sequence
 
         # runtime information
         self.undefined = undefined
@@ -440,6 +450,7 @@ class Template(object):
                 comment_end_string='#}',
                 line_statement_prefix=None,
                 trim_blocks=False,
+                newline_sequence='\n',
                 extensions=(),
                 optimized=True,
                 undefined=Undefined,
@@ -448,8 +459,9 @@ class Template(object):
         env = get_spontaneous_environment(
             block_start_string, block_end_string, variable_start_string,
             variable_end_string, comment_start_string, comment_end_string,
-            line_statement_prefix, trim_blocks, tuple(extensions), optimized,
-            undefined, finalize, autoescape, None, 0, False)
+            line_statement_prefix, trim_blocks, newline_sequence,
+            frozenset(extensions), optimized, undefined, finalize,
+            autoescape, None, 0, False)
         return env.from_string(source, template_class=cls)
 
     @classmethod
@@ -541,7 +553,7 @@ class Template(object):
         without arguments but it will evaluate the template every call
         rather then caching the template.  It's also possible to provide
         a dict which is then used as context.  The arguments are the same
-        as fo the :meth:`new_context` method.
+        as for the :meth:`new_context` method.
         """
         return TemplateModule(self, self.new_context(vars, shared))
 
diff --git a/jinja2/exceptions.py b/jinja2/exceptions.py
index 147ddb9..154cf44 100644
--- a/jinja2/exceptions.py
+++ b/jinja2/exceptions.py
@@ -14,16 +14,6 @@ class TemplateError(Exception):
     """Baseclass for all template errors."""
 
 
-class UndefinedError(TemplateError):
-    """Raised if a template tries to operate on :class:`Undefined`."""
-
-
-class SecurityError(TemplateError):
-    """Raised if a template tries to do something insecure if the
-    sandbox is enabled.
-    """
-
-
 class TemplateNotFound(IOError, LookupError, TemplateError):
     """Raised if a template does not exist."""
 
@@ -37,9 +27,16 @@ class TemplateSyntaxError(TemplateError):
 
     def __init__(self, message, lineno, name=None, filename=None):
         if name is not None:
-            extra = '%s, line %d' % (name, lineno)
+            extra = '%s, line %d' % (name.encode('utf-8'), lineno)
         else:
             extra = 'line %d' % lineno
+        # if the message was provided as unicode we have to encode it
+        # to utf-8 explicitly
+        if isinstance(message, unicode):
+            message = message.encode('utf-8')
+        # otherwise make sure it's a in fact valid utf-8
+        else:
+            message = message.decode('utf-8', 'ignore').encode('utf-8')
         TemplateError.__init__(self, '%s (%s)' % (message, extra))
         self.message = message
         self.lineno = lineno
@@ -50,15 +47,28 @@ class TemplateSyntaxError(TemplateError):
 class TemplateAssertionError(TemplateSyntaxError):
     """Like a template syntax error, but covers cases where something in the
     template caused an error at compile time that wasn't necessarily caused
-    by a syntax error.
+    by a syntax error.  However it's a direct subclass of
+    :exc:`TemplateSyntaxError` and has the same attributes.
     """
 
 
 class TemplateRuntimeError(TemplateError):
-    """A runtime error."""
+    """A generic runtime error in the template engine.  Under some situations
+    Jinja may raise this exception.
+    """
+
+
+class UndefinedError(TemplateRuntimeError):
+    """Raised if a template tries to operate on :class:`Undefined`."""
+
+
+class SecurityError(TemplateRuntimeError):
+    """Raised if a template tries to do something insecure if the
+    sandbox is enabled.
+    """
 
 
-class FilterArgumentError(Exception):
+class FilterArgumentError(TemplateRuntimeError):
     """This error is raised if a filter was called with inappropriate
     arguments
     """
diff --git a/jinja2/ext.py b/jinja2/ext.py
index 701f54d..7445705 100644
--- a/jinja2/ext.py
+++ b/jinja2/ext.py
@@ -367,7 +367,7 @@ def babel_extract(fileobj, keywords, comment_tags, options):
         options.get('comment_end_string', '#}'),
         options.get('line_statement_prefix') or None,
         options.get('trim_blocks', '').lower() in ('1', 'on', 'yes', 'true'),
-        tuple(extensions),
+        '\n', frozenset(extensions),
         # fill with defaults so that environments are shared
         # with other spontaneus environments.  The rest of the
         # arguments are optimizer, undefined, finalize, autoescape,
diff --git a/jinja2/lexer.py b/jinja2/lexer.py
index 92ff12e..350023d 100644
--- a/jinja2/lexer.py
+++ b/jinja2/lexer.py
@@ -33,6 +33,7 @@ string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
 integer_re = re.compile(r'\d+')
 name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
 float_re = re.compile(r'\d+\.\d+')
+newline_re = re.compile(r'(\r\n|\r|\n)')
 
 # bind operators to token types
 operators = {
@@ -249,7 +250,8 @@ class LexerMeta(type):
                environment.comment_start_string,
                environment.comment_end_string,
                environment.line_statement_prefix,
-               environment.trim_blocks)
+               environment.trim_blocks,
+               environment.newline_sequence)
         lexer = _lexer_cache.get(key)
         if lexer is None:
             lexer = type.__call__(cls, environment)
@@ -308,6 +310,8 @@ class Lexer(object):
         # block suffix if trimming is enabled
         block_suffix_re = environment.trim_blocks and '\\n?' or ''
 
+        self.newline_sequence = environment.newline_sequence
+
         # global lexing rules
         self.rules = {
             'root': [
@@ -365,6 +369,10 @@ class Lexer(object):
             ] + tag_rules
         }
 
+    def _normalize_newlines(self, value):
+        """Called for strings and template data to normlize it to unicode."""
+        return newline_re.sub(self.newline_sequence, value)
+
     def tokenize(self, source, name=None, filename=None):
         """Works like `tokeniter` but returns a tokenstream of tokens and not
         a generator or token tuples.  Additionally all token values are already
@@ -384,10 +392,7 @@ class Lexer(object):
                 elif token in ('raw_begin', 'raw_end'):
                     continue
                 elif token == 'data':
-                    try:
-                        value = str(value)
-                    except UnicodeError:
-                        pass
+                    value = self._normalize_newlines(value)
                 elif token == 'keyword':
                     token = value
                 elif token == 'name':
@@ -395,7 +400,7 @@ class Lexer(object):
                 elif token == 'string':
                     # try to unescape string
                     try:
-                        value = value[1:-1] \
+                        value = self._normalize_newlines(value[1:-1]) \
                             .encode('ascii', 'backslashreplace') \
                             .decode('unicode-escape')
                     except Exception, e:
@@ -424,7 +429,7 @@ class Lexer(object):
         wants.  The parser uses the `tokenize` function with returns a
         `TokenStream` and postprocessed tokens.
         """
-        source = u'\n'.join(unicode(source).splitlines())
+        source = '\n'.join(unicode(source).splitlines())
         pos = 0
         lineno = 1
         stack = ['root']
diff --git a/jinja2/nodes.py b/jinja2/nodes.py
index 12fdc34..9d32737 100644
--- a/jinja2/nodes.py
+++ b/jinja2/nodes.py
@@ -84,16 +84,11 @@ class Node(object):
     two attributes: `lineno` (the line number of the node) and `environment`.
     The `environment` attribute is set at the end of the parsing process for
     all nodes automatically.
-
-    Nodes can be frozen which makes them hashable.  The compiler freezes the
-    nodes automatically.  Modifications on frozen nodes are possible but not
-    allowed.
     """
     __metaclass__ = NodeType
     fields = ()
     attributes = ('lineno', 'environment')
     abstract = True
-    frozen = False
 
     def __init__(self, *fields, **attributes):
         if self.abstract:
@@ -218,19 +213,6 @@ class Node(object):
             todo.extend(node.iter_child_nodes())
         return self
 
-    def freeze(self):
-        """Freeze the complete node tree which makes them hashable.
-        This happens automatically on compilation.  Frozen nodes must not be
-        modified any further.  Extensions may not freeze nodes that appear
-        in the final node tree (ie: nodes that are returned from the extension
-        parse method).
-        """
-        todo = deque([self])
-        while todo:
-            node = todo.popleft()
-            node.frozen = True
-            todo.extend(node.iter_child_nodes())
-
     def __eq__(self, other):
         return type(self) is type(other) and \
                tuple(self.iter_fields()) == tuple(other.iter_fields())
@@ -238,11 +220,6 @@ class Node(object):
     def __ne__(self, other):
         return not self.__eq__(other)
 
-    def __hash__(self):
-        if not self.frozen:
-            raise TypeError('unfrozen nodes are unhashable')
-        return hash(tuple(self.iter_fields()))
-
     def __repr__(self):
         return '%s(%s)' % (
             self.__class__.__name__,
-- 
2.26.2