jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import sys
  13 import string
  14 try:
  15     from thread import allocate_lock
  16 except ImportError:
  17     from dummy_thread import allocate_lock
  18 from htmlentitydefs import name2codepoint
  19 from collections import deque
  20 from copy import deepcopy
  21 from itertools import imap
  22
  23
  24 _word_split_re = re.compile(r'(\s+)')
  25 _punctuation_re = re.compile(
  26     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  27         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  28         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  29     )
  30 )
  31 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  32 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  33 _entity_re = re.compile(r'&([^;]+);')
  34 _entities = name2codepoint.copy()
  35 _entities['apos'] = 39
  36
  37 # special singleton representing missing values for the runtime
  38 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
  39
  40
  41 # concatenate a list of strings and convert them to unicode.
  42 # unfortunately there is a bug in python 2.4 and lower that causes
  43 # unicode.join trash the traceback.
  44 _concat = u''.join
  45 try:
  46     def _test_gen_bug():
  47         raise TypeError(_test_gen_bug)
  48         yield None
  49     _concat(_test_gen_bug())
  50 except TypeError, _error:
  51     if not _error.args or _error.args[0] is not _test_gen_bug:
  52         def concat(gen):
  53             try:
  54                 return _concat(list(gen))
  55             except:
  56                 # this hack is needed so that the current frame
  57                 # does not show up in the traceback.
  58                 exc_type, exc_value, tb = sys.exc_info()
  59                 raise exc_type, exc_value, tb.tb_next
  60     else:
  61         concat = _concat
  62     del _test_gen_bug, _error
  63
  64
  65 def contextfunction(f):
  66     """This decorator can be used to mark a function or method context callable.
  67     A context callable is passed the active :class:`Context` as first argument when
  68     called from the template.  This is useful if a function wants to get access
  69     to the context or functions provided on the context object.  For example
  70     a function that returns a sorted list of template variables the current
  71     template exports could look like this::
  72
  73         @contextfunction
  74         def get_exported_names(context):
  75             return sorted(context.exported_vars)
  76     """
  77     f.contextfunction = True
  78     return f
  79
  80
  81 def environmentfunction(f):
  82     """This decorator can be used to mark a function or method as environment
  83     callable.  This decorator works exactly like the :func:`contextfunction`
  84     decorator just that the first argument is the active :class:`Environment`
  85     and not context.
  86     """
  87     f.environmentfunction = True
  88     return f
  89
  90
  91 def is_undefined(obj):
  92     """Check if the object passed is undefined.  This does nothing more than
  93     performing an instance check against :class:`Undefined` but looks nicer.
  94     This can be used for custom filters or tests that want to react to
  95     undefined variables.  For example a custom default filter can look like
  96     this::
  97
  98         def default(var, default=''):
  99             if is_undefined(var):
 100                 return default
 101             return var
 102     """
 103     from jinja2.runtime import Undefined
 104     return isinstance(obj, Undefined)
 105
 106
 107 def clear_caches():
 108     """Jinja2 keeps internal caches for environments and lexers.  These are
 109     used so that Jinja2 doesn't have to recreate environments and lexers all
 110     the time.  Normally you don't have to care about that but if you are
 111     messuring memory consumption you may want to clean the caches.
 112     """
 113     from jinja2.environment import _spontaneous_environments
 114     from jinja2.lexer import _lexer_cache
 115     _spontaneous_environments.clear()
 116     _lexer_cache.clear()
 117
 118
 119 def import_string(import_name, silent=False):
 120     """Imports an object based on a string.  This use useful if you want to
 121     use import paths as endpoints or something similar.  An import path can
 122     be specified either in dotted notation (``xml.sax.saxutils.escape``)
 123     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
 124
 125     If the `silent` is True the return value will be `None` if the import
 126     fails.
 127
 128     :return: imported object
 129     """
 130     try:
 131         if ':' in import_name:
 132             module, obj = import_name.split(':', 1)
 133         elif '.' in import_name:
 134             items = import_name.split('.')
 135             module = '.'.join(items[:-1])
 136             obj = items[-1]
 137         else:
 138             return __import__(import_name)
 139         return getattr(__import__(module, None, None, [obj]), obj)
 140     except (ImportError, AttributeError):
 141         if not silent:
 142             raise
 143
 144
 145 def pformat(obj, verbose=False):
 146     """Prettyprint an object.  Either use the `pretty` library or the
 147     builtin `pprint`.
 148     """
 149     try:
 150         from pretty import pretty
 151         return pretty(obj, verbose=verbose)
 152     except ImportError:
 153         from pprint import pformat
 154         return pformat(obj)
 155
 156
 157 def urlize(text, trim_url_limit=None, nofollow=False):
 158     """Converts any URLs in text into clickable links. Works on http://,
 159     https:// and www. links. Links can have trailing punctuation (periods,
 160     commas, close-parens) and leading punctuation (opening parens) and
 161     it'll still do the right thing.
 162
 163     If trim_url_limit is not None, the URLs in link text will be limited
 164     to trim_url_limit characters.
 165
 166     If nofollow is True, the URLs in link text will get a rel="nofollow"
 167     attribute.
 168     """
 169     trim_url = lambda x, limit=trim_url_limit: limit is not None \
 170                          and (x[:limit] + (len(x) >=limit and '...'
 171                          or '')) or x
 172     words = _word_split_re.split(text)
 173     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 174     for i, word in enumerate(words):
 175         match = _punctuation_re.match(word)
 176         if match:
 177             lead, middle, trail = match.groups()
 178             if middle.startswith('www.') or (
 179                 '@' not in middle and
 180                 not middle.startswith('http://') and
 181                 len(middle) > 0 and
 182                 middle[0] in string.letters + string.digits and (
 183                     middle.endswith('.org') or
 184                     middle.endswith('.net') or
 185                     middle.endswith('.com')
 186                 )):
 187                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 188                     nofollow_attr, trim_url(middle))
 189             if middle.startswith('http://') or \
 190                middle.startswith('https://'):
 191                 middle = '<a href="%s"%s>%s</a>' % (middle,
 192                     nofollow_attr, trim_url(middle))
 193             if '@' in middle and not middle.startswith('www.') and \
 194                not ':' in middle and _simple_email_re.match(middle):
 195                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 196             if lead + middle + trail != word:
 197                 words[i] = lead + middle + trail
 198     return u''.join(words)
 199
 200
 201 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 202     """Generate some lorem impsum for the template."""
 203     from jinja2.constants import LOREM_IPSUM_WORDS
 204     from random import choice, random, randrange
 205     words = LOREM_IPSUM_WORDS.split()
 206     result = []
 207
 208     for _ in xrange(n):
 209         next_capitalized = True
 210         last_comma = last_fullstop = 0
 211         word = None
 212         last = None
 213         p = []
 214
 215         # each paragraph contains out of 20 to 100 words.
 216         for idx, _ in enumerate(xrange(randrange(min, max))):
 217             while True:
 218                 word = choice(words)
 219                 if word != last:
 220                     last = word
 221                     break
 222             if next_capitalized:
 223                 word = word.capitalize()
 224                 next_capitalized = False
 225             # add commas
 226             if idx - randrange(3, 8) > last_comma:
 227                 last_comma = idx
 228                 last_fullstop += 2
 229                 word += ','
 230             # add end of sentences
 231             if idx - randrange(10, 20) > last_fullstop:
 232                 last_comma = last_fullstop = idx
 233                 word += '.'
 234                 next_capitalized = True
 235             p.append(word)
 236
 237         # ensure that the paragraph ends with a dot.
 238         p = u' '.join(p)
 239         if p.endswith(','):
 240             p = p[:-1] + '.'
 241         elif not p.endswith('.'):
 242             p += '.'
 243         result.append(p)
 244
 245     if not html:
 246         return u'\n\n'.join(result)
 247     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 248
 249
 250 class Markup(unicode):
 251     r"""Marks a string as being safe for inclusion in HTML/XML output without
 252     needing to be escaped.  This implements the `__html__` interface a couple
 253     of frameworks and web applications use.  :class:`Markup` is a direct
 254     subclass of `unicode` and provides all the methods of `unicode` just that
 255     it escapes arguments passed and always returns `Markup`.
 256
 257     The `escape` function returns markup objects so that double escaping can't
 258     happen.  If you want to use autoescaping in Jinja just set the finalizer
 259     of the environment to `escape`.
 260
 261     The constructor of the :class:`Markup` class can be used for three
 262     different things:  When passed an unicode object it's assumed to be safe,
 263     when passed an object with an HTML representation (has an `__html__`
 264     method) that representation is used, otherwise the object passed is
 265     converted into a unicode string and then assumed to be safe:
 266
 267     >>> Markup("Hello <em>World</em>!")
 268     Markup(u'Hello <em>World</em>!')
 269     >>> class Foo(object):
 270     ...  def __html__(self):
 271     ...   return '<a href="#">foo</a>'
 272     ...
 273     >>> Markup(Foo())
 274     Markup(u'<a href="#">foo</a>')
 275
 276     If you want object passed being always treated as unsafe you can use the
 277     :meth:`escape` classmethod to create a :class:`Markup` object:
 278
 279     >>> Markup.escape("Hello <em>World</em>!")
 280     Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
 281
 282     Operations on a markup string are markup aware which means that all
 283     arguments are passed through the :func:`escape` function:
 284
 285     >>> em = Markup("<em>%s</em>")
 286     >>> em % "foo & bar"
 287     Markup(u'<em>foo &amp; bar</em>')
 288     >>> strong = Markup("<strong>%(text)s</strong>")
 289     >>> strong % {'text': '<blink>hacker here</blink>'}
 290     Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
 291     >>> Markup("<em>Hello</em> ") + "<foo>"
 292     Markup(u'<em>Hello</em> &lt;foo&gt;')
 293     """
 294     __slots__ = ()
 295
 296     def __new__(cls, base=u'', encoding=None, errors='strict'):
 297         if hasattr(base, '__html__'):
 298             base = base.__html__()
 299         if encoding is None:
 300             return unicode.__new__(cls, base)
 301         return unicode.__new__(cls, base, encoding, errors)
 302
 303     def __html__(self):
 304         return self
 305
 306     def __add__(self, other):
 307         if hasattr(other, '__html__') or isinstance(other, basestring):
 308             return self.__class__(unicode(self) + unicode(escape(other)))
 309         return NotImplemented
 310
 311     def __radd__(self, other):
 312         if hasattr(other, '__html__') or isinstance(other, basestring):
 313             return self.__class__(unicode(escape(other)) + unicode(self))
 314         return NotImplemented
 315
 316     def __mul__(self, num):
 317         if isinstance(num, (int, long)):
 318             return self.__class__(unicode.__mul__(self, num))
 319         return NotImplemented
 320     __rmul__ = __mul__
 321
 322     def __mod__(self, arg):
 323         if isinstance(arg, tuple):
 324             arg = tuple(imap(_MarkupEscapeHelper, arg))
 325         else:
 326             arg = _MarkupEscapeHelper(arg)
 327         return self.__class__(unicode.__mod__(self, arg))
 328
 329     def __repr__(self):
 330         return '%s(%s)' % (
 331             self.__class__.__name__,
 332             unicode.__repr__(self)
 333         )
 334
 335     def join(self, seq):
 336         return self.__class__(unicode.join(self, imap(escape, seq)))
 337     join.__doc__ = unicode.join.__doc__
 338
 339     def split(self, *args, **kwargs):
 340         return map(self.__class__, unicode.split(self, *args, **kwargs))
 341     split.__doc__ = unicode.split.__doc__
 342
 343     def rsplit(self, *args, **kwargs):
 344         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 345     rsplit.__doc__ = unicode.rsplit.__doc__
 346
 347     def splitlines(self, *args, **kwargs):
 348         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 349     splitlines.__doc__ = unicode.splitlines.__doc__
 350
 351     def unescape(self):
 352         r"""Unescape markup again into an unicode string.  This also resolves
 353         known HTML4 and XHTML entities:
 354
 355         >>> Markup("Main &raquo; <em>About</em>").unescape()
 356         u'Main \xbb <em>About</em>'
 357         """
 358         def handle_match(m):
 359             name = m.group(1)
 360             if name in _entities:
 361                 return unichr(_entities[name])
 362             try:
 363                 if name[:2] in ('#x', '#X'):
 364                     return unichr(int(name[2:], 16))
 365                 elif name.startswith('#'):
 366                     return unichr(int(name[1:]))
 367             except ValueError:
 368                 pass
 369             return u''
 370         return _entity_re.sub(handle_match, unicode(self))
 371
 372     def striptags(self):
 373         r"""Unescape markup into an unicode string and strip all tags.  This
 374         also resolves known HTML4 and XHTML entities.  Whitespace is
 375         normalized to one:
 376
 377         >>> Markup("Main &raquo;  <em>About</em>").striptags()
 378         u'Main \xbb About'
 379         """
 380         stripped = u' '.join(_striptags_re.sub('', self).split())
 381         return Markup(stripped).unescape()
 382
 383     @classmethod
 384     def escape(cls, s):
 385         """Escape the string.  Works like :func:`escape` with the difference
 386         that for subclasses of :class:`Markup` this function would return the
 387         correct subclass.
 388         """
 389         rv = escape(s)
 390         if rv.__class__ is not cls:
 391             return cls(rv)
 392         return rv
 393
 394     def make_wrapper(name):
 395         orig = getattr(unicode, name)
 396         def func(self, *args, **kwargs):
 397             args = _escape_argspec(list(args), enumerate(args))
 398             _escape_argspec(kwargs, kwargs.iteritems())
 399             return self.__class__(orig(self, *args, **kwargs))
 400         func.__name__ = orig.__name__
 401         func.__doc__ = orig.__doc__
 402         return func
 403
 404     for method in '__getitem__', '__getslice__', 'capitalize', \
 405                   'title', 'lower', 'upper', 'replace', 'ljust', \
 406                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 407                   'translate', 'expandtabs', 'swapcase', 'zfill':
 408         locals()[method] = make_wrapper(method)
 409
 410     # new in python 2.5
 411     if hasattr(unicode, 'partition'):
 412         partition = make_wrapper('partition'),
 413         rpartition = make_wrapper('rpartition')
 414
 415     # new in python 2.6
 416     if hasattr(unicode, 'format'):
 417         format = make_wrapper('format')
 418
 419     del method, make_wrapper
 420
 421
 422 def _escape_argspec(obj, iterable):
 423     """Helper for various string-wrapped functions."""
 424     for key, value in iterable:
 425         if hasattr(value, '__html__') or isinstance(value, basestring):
 426             obj[key] = escape(value)
 427     return obj
 428
 429
 430 class _MarkupEscapeHelper(object):
 431     """Helper for Markup.__mod__"""
 432
 433     def __init__(self, obj):
 434         self.obj = obj
 435
 436     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 437     __unicode__ = lambda s: unicode(escape(s.obj))
 438     __str__ = lambda s: str(escape(s.obj))
 439     __repr__ = lambda s: str(escape(repr(s.obj)))
 440     __int__ = lambda s: int(s.obj)
 441     __float__ = lambda s: float(s.obj)
 442
 443
 444 class LRUCache(object):
 445     """A simple LRU Cache implementation."""
 446
 447     # this is fast for small capacities (something below 1000) but doesn't
 448     # scale.  But as long as it's only used as storage for templates this
 449     # won't do any harm.
 450
 451     def __init__(self, capacity):
 452         self.capacity = capacity
 453         self._mapping = {}
 454         self._queue = deque()
 455         self._postinit()
 456
 457     def _postinit(self):
 458         # alias all queue methods for faster lookup
 459         self._popleft = self._queue.popleft
 460         self._pop = self._queue.pop
 461         if hasattr(self._queue, 'remove'):
 462             self._remove = self._queue.remove
 463         self._wlock = allocate_lock()
 464         self._append = self._queue.append
 465
 466     def _remove(self, obj):
 467         """Python 2.4 compatibility."""
 468         for idx, item in enumerate(self._queue):
 469             if item == obj:
 470                 del self._queue[idx]
 471                 break
 472
 473     def __getstate__(self):
 474         return {
 475             'capacity':     self.capacity,
 476             '_mapping':     self._mapping,
 477             '_queue':       self._queue
 478         }
 479
 480     def __setstate__(self, d):
 481         self.__dict__.update(d)
 482         self._postinit()
 483
 484     def __getnewargs__(self):
 485         return (self.capacity,)
 486
 487     def copy(self):
 488         """Return an shallow copy of the instance."""
 489         rv = self.__class__(self.capacity)
 490         rv._mapping.update(self._mapping)
 491         rv._queue = deque(self._queue)
 492         return rv
 493
 494     def get(self, key, default=None):
 495         """Return an item from the cache dict or `default`"""
 496         try:
 497             return self[key]
 498         except KeyError:
 499             return default
 500
 501     def setdefault(self, key, default=None):
 502         """Set `default` if the key is not in the cache otherwise
 503         leave unchanged. Return the value of this key.
 504         """
 505         try:
 506             return self[key]
 507         except KeyError:
 508             self[key] = default
 509             return default
 510
 511     def clear(self):
 512         """Clear the cache."""
 513         self._wlock.acquire()
 514         try:
 515             self._mapping.clear()
 516             self._queue.clear()
 517         finally:
 518             self._wlock.release()
 519
 520     def __contains__(self, key):
 521         """Check if a key exists in this cache."""
 522         return key in self._mapping
 523
 524     def __len__(self):
 525         """Return the current size of the cache."""
 526         return len(self._mapping)
 527
 528     def __repr__(self):
 529         return '<%s %r>' % (
 530             self.__class__.__name__,
 531             self._mapping
 532         )
 533
 534     def __getitem__(self, key):
 535         """Get an item from the cache. Moves the item up so that it has the
 536         highest priority then.
 537
 538         Raise an `KeyError` if it does not exist.
 539         """
 540         rv = self._mapping[key]
 541         if self._queue[-1] != key:
 542             self._remove(key)
 543             self._append(key)
 544         return rv
 545
 546     def __setitem__(self, key, value):
 547         """Sets the value for an item. Moves the item up so that it
 548         has the highest priority then.
 549         """
 550         self._wlock.acquire()
 551         try:
 552             if key in self._mapping:
 553                 self._remove(key)
 554             elif len(self._mapping) == self.capacity:
 555                 del self._mapping[self._popleft()]
 556             self._append(key)
 557             self._mapping[key] = value
 558         finally:
 559             self._wlock.release()
 560
 561     def __delitem__(self, key):
 562         """Remove an item from the cache dict.
 563         Raise an `KeyError` if it does not exist.
 564         """
 565         self._wlock.acquire()
 566         try:
 567             del self._mapping[key]
 568             self._remove(key)
 569         finally:
 570             self._wlock.release()
 571
 572     def items(self):
 573         """Return a list of items."""
 574         result = [(key, self._mapping[key]) for key in list(self._queue)]
 575         result.reverse()
 576         return result
 577
 578     def iteritems(self):
 579         """Iterate over all items."""
 580         return iter(self.items())
 581
 582     def values(self):
 583         """Return a list of all values."""
 584         return [x[1] for x in self.items()]
 585
 586     def itervalue(self):
 587         """Iterate over all values."""
 588         return iter(self.values())
 589
 590     def keys(self):
 591         """Return a list of all keys ordered by most recent usage."""
 592         return list(self)
 593
 594     def iterkeys(self):
 595         """Iterate over all keys in the cache dict, ordered by
 596         the most recent usage.
 597         """
 598         return reversed(tuple(self._queue))
 599
 600     __iter__ = iterkeys
 601
 602     def __reversed__(self):
 603         """Iterate over the values in the cache dict, oldest items
 604         coming first.
 605         """
 606         return iter(tuple(self._queue))
 607
 608     __copy__ = copy
 609
 610
 611 # register the LRU cache as mutable mapping if possible
 612 try:
 613     from collections import MutableMapping
 614     MutableMapping.register(LRUCache)
 615 except ImportError:
 616     pass
 617
 618
 619 # we have to import it down here as the speedups module imports the
 620 # markup type which is define above.
 621 try:
 622     from jinja2._speedups import escape, soft_unicode
 623 except ImportError:
 624     def escape(s):
 625         """Convert the characters &, <, >, ' and " in string s to HTML-safe
 626         sequences.  Use this if you need to display text that might contain
 627         such characters in HTML.  Marks return value as markup string.
 628         """
 629         if hasattr(s, '__html__'):
 630             return s.__html__()
 631         return Markup(unicode(s)
 632             .replace('&', '&amp;')
 633             .replace('>', '&gt;')
 634             .replace('<', '&lt;')
 635             .replace("'", '&#39;')
 636             .replace('"', '&#34;')
 637         )
 638
 639     def soft_unicode(s):
 640         """Make a string unicode if it isn't already.  That way a markup
 641         string is not converted back to unicode.
 642         """
 643         if not isinstance(s, unicode):
 644             s = unicode(s)
 645         return s
 646
 647
 648 # partials
 649 try:
 650     from functools import partial
 651 except ImportError:
 652     class partial(object):
 653         def __init__(self, _func, *args, **kwargs):
 654             self._func = _func
 655             self._args = args
 656             self._kwargs = kwargs
 657         def __call__(self, *args, **kwargs):
 658             kwargs.update(self._kwargs)
 659             return self._func(*(self._args + args), **kwargs)