added a semi realworld benchmark (jinja2 and mako)

[jinja2.git] / jinja2 / filters.py
diff --git a/jinja2/filters.py b/jinja2/filters.py

index c042a5bbea5d0cd81e79a650d90580c09bb3f6ac..94086a63cd42df4a212c40ba6cafbb0fbc762aa1 100644 (file)
--- a/jinja2/filters.py
+++ b/jinja2/filters.py
@@ -1,7 +1,7 @@
  # -*- coding: utf-8 -*-
  """
-    jinja.filters
-    ~~~~~~~~~~~~~
+    jinja2.filters
+    ~~~~~~~~~~~~~~
  
      Bundled jinja filters.
  
@@ -10,19 +10,16 @@
  """
  import re
  import math
+import textwrap
  from random import choice
-try:
-    from operator import itemgetter
-except ImportError:
-    itemgetter = lambda a: lambda b: b[a]
-from urllib import urlencode, quote
-from itertools import imap
+from operator import itemgetter
+from itertools import imap, groupby
  from jinja2.utils import Markup, escape, pformat, urlize, soft_unicode
  from jinja2.runtime import Undefined
+from jinja2.exceptions import FilterArgumentError
  
  
-
-_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+_word_re = re.compile(r'\w+')
  
  
  def contextfilter(f):
@@ -45,7 +42,15 @@ def environmentfilter(f):
      return f
  
  
-def do_replace(s, old, new, count=None):
+def do_forceescape(value):
+    """Enforce HTML escaping.  This will probably double escape variables."""
+    if hasattr(value, '__html__'):
+        value = value.__html__()
+    return escape(unicode(value))
+
+
+@environmentfilter
+def do_replace(environment, s, old, new, count=None):
      """Return a copy of the value with all occurrences of a substring
      replaced with a new one. The first argument is the substring
      that should be replaced, the second is the replacement string.
@@ -62,6 +67,8 @@ def do_replace(s, old, new, count=None):
      """
      if count is None:
          count = -1
+    if not environment.autoescape:
+        return unicode(s).replace(unicode(old), unicode(new), count)
      if hasattr(old, '__html__') or hasattr(new, '__html__') and \
         not hasattr(s, '__html__'):
          s = escape(s)
@@ -80,22 +87,15 @@ def do_lower(s):
      return soft_unicode(s).lower()
  
  
-def do_escape(s):
-    """XML escape ``&``, ``<``, ``>``, and ``"`` in a string of data.
-
-    This method will have no effect it the value is already escaped.
-    """
-    return escape(s)
-
-
-def do_xmlattr(d, autospace=False):
+@environmentfilter
+def do_xmlattr(_environment, d, autospace=True):
      """Create an SGML/XML attribute string based on the items in a dict.
      All values that are neither `none` nor `undefined` are automatically
      escaped:
  
      .. sourcecode:: html+jinja
  
-        <ul{{ {'class': 'my_list', 'missing': None,
+        <ul{{ {'class': 'my_list', 'missing': none,
                  'id': 'list-%d'|format(variable)}|xmlattr }}>
          ...
          </ul>
@@ -109,26 +109,18 @@ def do_xmlattr(d, autospace=False):
          </ul>
  
      As you can see it automatically prepends a space in front of the item
-    if the filter returned something. You can disable this by passing
-    `false` as only argument to the filter.
+    if the filter returned something unless the second parameter is false.
      """
-    if not hasattr(d, 'iteritems'):
-        raise TypeError('a dict is required')
-    result = []
-    for key, value in d.iteritems():
-        if value is not None and not isinstance(value, Undefined):
-            result.append(u'%s="%s"' % (
-                escape(env.to_unicode(key)),
-                escape(env.to_unicode(value), True)
-            ))
      rv = u' '.join(
          u'%s="%s"' % (escape(key), escape(value))
          for key, value in d.iteritems()
          if value is not None and not isinstance(value, Undefined)
      )
-    if autospace:
-        rv = ' ' + rv
-    return Markup(rv)
+    if autospace and rv:
+        rv = u' ' + rv
+    if _environment.autoescape:
+        rv = Markup(rv)
+    return rv
  
  
  def do_capitalize(s):
@@ -202,10 +194,11 @@ def do_default(value, default_value=u'', boolean=False):
      return value
  
  
-def do_join(value, d=u''):
+@environmentfilter
+def do_join(environment, value, d=u''):
      """Return a string which is the concatenation of the strings in the
      sequence. The separator between elements is an empty string per
-    default, you can define ith with the optional parameter:
+    default, you can define it with the optional parameter:
  
      .. sourcecode:: jinja
  
@@ -215,9 +208,13 @@ def do_join(value, d=u''):
          {{ [1, 2, 3]|join }}
              -> 123
      """
+    # no automatic escaping?  joining is a lot eaiser then
+    if not environment.autoescape:
+        return unicode(d).join(imap(unicode, value))
+
      # if the delimiter doesn't have an html representation we check
      # if any of the items has.  If yes we do a coercion to Markup
-    if not hasttr(d, '__html__'):
+    if not hasattr(d, '__html__'):
          value = list(value)
          do_escape = False
          for idx, item in enumerate(value):
@@ -242,12 +239,11 @@ def do_center(value, width=80):
  
  @environmentfilter
  def do_first(environment, seq):
-    """Return the frist item of a sequence."""
+    """Return the first item of a sequence."""
      try:
          return iter(seq).next()
      except StopIteration:
-        return environment.undefined('seq|first',
-            extra='the sequence was empty')
+        return environment.undefined('No first item, sequence was empty.')
  
  
  @environmentfilter
@@ -256,8 +252,7 @@ def do_last(environment, seq):
      try:
          return iter(reversed(seq)).next()
      except StopIteration:
-        return environment.undefined('seq|last',
-            extra='the sequence was empty')
+        return environment.undefined('No last item, sequence was empty.')
  
  
  @environmentfilter
@@ -266,8 +261,7 @@ def do_random(environment, seq):
      try:
          return choice(seq)
      except IndexError:
-        return environment.undefined('seq|random',
-            extra='the sequence was empty')
+        return environment.undefined('No random item, sequence was empty.')
  
  
  def do_filesizeformat(value):
@@ -298,7 +292,8 @@ def do_pprint(value, verbose=False):
      return pformat(value, verbose=verbose)
  
  
-def do_urlize(value, trim_url_limit=None, nofollow=False):
+@environmentfilter
+def do_urlize(environment, value, trim_url_limit=None, nofollow=False):
      """Converts URLs in plain text into clickable links.
  
      If you pass the filter an additional integer it will shorten the urls
@@ -307,24 +302,24 @@ def do_urlize(value, trim_url_limit=None, nofollow=False):
  
      .. sourcecode:: jinja
  
-        {{ mytext|urlize(40, True) }}
+        {{ mytext|urlize(40, true) }}
              links are shortened to 40 chars and defined with rel="nofollow"
      """
-    return urlize(soft_unicode(value), trim_url_limit, nofollow)
+    rv = urlize(soft_unicode(value), trim_url_limit, nofollow)
+    if environment.autoescape:
+        rv = Markup(rv)
+    return rv
  
  
  def do_indent(s, width=4, indentfirst=False):
-    """
-    {{ s|indent[ width[ indentfirst[ usetab]]] }}
-
-    Return a copy of the passed string, each line indented by
+    """Return a copy of the passed string, each line indented by
      4 spaces. The first line is not indented. If you want to
      change the number of spaces or indent the first line too
      you can pass additional parameters to the filter:
  
      .. sourcecode:: jinja
  
-        {{ mytext|indent(2, True) }}
+        {{ mytext|indent(2, true) }}
              indent by two spaces and indent the first line too.
      """
      indention = ' ' * width
@@ -334,8 +329,7 @@ def do_indent(s, width=4, indentfirst=False):
  
  
  def do_truncate(s, length=255, killwords=False, end='...'):
-    """
-    Return a truncated copy of the string. The length is specified
+    """Return a truncated copy of the string. The length is specified
      with the first parameter which defaults to ``255``. If the second
      parameter is ``true`` the filter will cut the text at length. Otherwise
      it will try to save the last word. If the text was in fact
@@ -365,31 +359,21 @@ def do_truncate(s, length=255, killwords=False, end='...'):
      return u' '.join(result)
  
  
-def do_wordwrap(s, pos=79, hard=False):
+def do_wordwrap(s, width=79, break_long_words=True):
      """
      Return a copy of the string passed to the filter wrapped after
-    ``79`` characters. You can override this default using the first
-    parameter. If you set the second parameter to `true` Jinja will
-    also split words apart (usually a bad idea because it makes
-    reading hard).
+    ``79`` characters.  You can override this default using the first
+    parameter.  If you set the second parameter to `false` Jinja will not
+    split words apart if they are longer than `width`.
      """
-    if len(s) < pos:
-        return s
-    if hard:
-        return u'\n'.join(s[idx:idx + pos] for idx in
-                          xrange(0, len(s), pos))
-
-    # TODO: switch to wordwrap.wrap
-    # code from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061
-    return reduce(lambda line, word, pos=pos: u'%s%s%s' %
-                  (line, u' \n'[(len(line)-line.rfind('\n') - 1 +
-                                len(word.split('\n', 1)[0]) >= pos)],
-                   word), s.split(' '))
+    return textwrap.wrap(s, width=width, expand_tabs=False,
+                         replace_whitespace=False,
+                         break_long_words=break_long_words)
  
  
  def do_wordcount(s):
      """Count the words in that string."""
-    return len(x for x in s.split() if x)
+    return len(_word_re.findall(s))
  
  
  def do_int(value, default=0):
@@ -400,6 +384,7 @@ def do_int(value, default=0):
      try:
          return int(value)
      except (TypeError, ValueError):
+        # this quirk is necessary so that "42.23"|int gives 42.
          try:
              return int(float(value))
          except (TypeError, ValueError):
@@ -417,11 +402,6 @@ def do_float(value, default=0.0):
          return default
  
  
-def do_string(value):
-    """Convert the value into an string."""
-    return soft_unicode(value)
-
-
  def do_format(value, *args, **kwargs):
      """
      Apply python string formatting on an object:
@@ -431,10 +411,10 @@ def do_format(value, *args, **kwargs):
          {{ "%s - %s"|format("Hello?", "Foo!") }}
              -> Hello? - Foo!
      """
-    if kwargs:
-        kwargs.update(idx, arg in enumerate(args))
-        args = kwargs
-    return soft_unicode(value) % args
+    if args and kwargs:
+        raise FilterArgumentError('can\'t handle positional and keyword '
+                                  'arguments at the same time')
+    return soft_unicode(value) % (kwargs or args)
  
  
  def do_trim(value):
@@ -445,7 +425,9 @@ def do_trim(value):
  def do_striptags(value):
      """Strip SGML/XML tags and replace adjacent whitespace by one space.
      """
-    return ' '.join(_striptags_re.sub('', value).split())
+    if hasattr(value, '__html__'):
+        value = value.__html__()
+    return Markup(unicode(value)).striptags()
  
  
  def do_slice(value, slices, fill_with=None):
@@ -468,7 +450,6 @@ def do_slice(value, slices, fill_with=None):
      If you pass it a second argument it's used to fill missing
      values on the last iteration.
      """
-    result = []
      seq = list(value)
      length = len(seq)
      items_per_slice = length // slices
@@ -482,8 +463,7 @@ def do_slice(value, slices, fill_with=None):
          tmp = seq[start:end]
          if fill_with is not None and slice_number >= slices_with_extra:
              tmp.append(fill_with)
-        result.append(tmp)
-    return result
+        yield tmp
  
  
  def do_batch(value, linecount, fill_with=None):
@@ -509,14 +489,13 @@ def do_batch(value, linecount, fill_with=None):
      tmp = []
      for item in value:
          if len(tmp) == linecount:
-            result.append(tmp)
+            yield tmp
              tmp = []
          tmp.append(item)
      if tmp:
          if fill_with is not None and len(tmp) < linecount:
              tmp += [fill_with] * (linecount - len(tmp))
-        result.append(tmp)
-    return result
+        yield tmp
  
  
  def do_round(value, precision=0, method='common'):
@@ -553,7 +532,7 @@ def do_round(value, precision=0, method='common'):
  
  def do_sort(value, reverse=False):
      """Sort a sequence. Per default it sorts ascending, if you pass it
-    `True` as first argument it will reverse the sorting.
+    true as first argument it will reverse the sorting.
      """
      return sorted(value, reverse=reverse)
  
@@ -578,34 +557,83 @@ def do_groupby(environment, value, attribute):
          {% endfor %}
          </ul>
  
+    Additionally it's possible to use tuple unpacking for the grouper and
+    list:
+
+    .. sourcecode:: html+jinja
+
+        <ul>
+        {% for grouper, list in persons|groupby('gender') %}
+            ...
+        {% endfor %}
+        </ul>
+
      As you can see the item we're grouping by is stored in the `grouper`
      attribute and the `list` contains all the objects that have this grouper
      in common.
      """
      expr = lambda x: environment.subscribe(x, attribute)
-    return sorted([{
-        'grouper':  a,
-        'list':     b
-    } for a, b in groupby(sorted(value, key=expr), expr)],
-        key=itemgetter('grouper'))
+    return sorted(map(_GroupTuple, groupby(sorted(value, key=expr), expr)))
+
+
+class _GroupTuple(tuple):
+    __slots__ = ()
+    grouper = property(itemgetter(0))
+    list = property(itemgetter(1))
+
+    def __new__(cls, (key, value)):
+        return tuple.__new__(cls, (key, list(value)))
+
+
+def do_list(value):
+    """Convert the value into a list.  If it was a string the returned list
+    will be a list of characters.
+    """
+    return list(value)
+
+
+def do_mark_safe(value):
+    """Mark the value as safe which means that in an environment with automatic
+    escaping enabled this variable will not be escaped.
+    """
+    return Markup(value)
+
+
+def do_reverse(value):
+    """Reverse the object or return an iterator the iterates over it the other
+    way round.
+    """
+    if isinstance(value, basestring):
+        return value[::-1]
+    try:
+        return reversed(value)
+    except TypeError:
+        try:
+            rv = list(value)
+            rv.reverse()
+            return rv
+        except TypeError:
+            raise FilterArgumentError('argument must be iterable')
  
  
  FILTERS = {
      'replace':              do_replace,
      'upper':                do_upper,
      'lower':                do_lower,
-    'escape':               do_escape,
-    'e':                    do_escape,
-    'xmlattr':              do_xmlattr,
+    'escape':               escape,
+    'e':                    escape,
+    'forceescape':          do_forceescape,
      'capitalize':           do_capitalize,
      'title':                do_title,
      'default':              do_default,
+    'd':                    do_default,
      'join':                 do_join,
      'count':                len,
      'dictsort':             do_dictsort,
      'length':               len,
-    'reverse':              reversed,
+    'reverse':              do_reverse,
      'center':               do_center,
+    'indent':               do_indent,
      'title':                do_title,
      'capitalize':           do_capitalize,
      'first':                do_first,
@@ -613,13 +641,13 @@ FILTERS = {
      'random':               do_random,
      'filesizeformat':       do_filesizeformat,
      'pprint':               do_pprint,
-    'indent':               do_indent,
      'truncate':             do_truncate,
      'wordwrap':             do_wordwrap,
      'wordcount':            do_wordcount,
      'int':                  do_int,
      'float':                do_float,
-    'string':               do_string,
+    'string':               soft_unicode,
+    'list':                 do_list,
      'urlize':               do_urlize,
      'format':               do_format,
      'trim':                 do_trim,
@@ -631,5 +659,6 @@ FILTERS = {
      'round':                do_round,
      'sort':                 do_sort,
      'groupby':              do_groupby,
-    'safe':                 Markup
+    'safe':                 do_mark_safe,
+    'xmlattr':              do_xmlattr
  }