jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import sys
  13 import string
  14 try:
  15     from thread import allocate_lock
  16 except ImportError:
  17     from dummy_thread import allocate_lock
  18 from htmlentitydefs import name2codepoint
  19 from collections import deque
  20 from copy import deepcopy
  21 from itertools import imap
  22
  23
  24 _word_split_re = re.compile(r'(\s+)')
  25 _punctuation_re = re.compile(
  26     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  27         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  28         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  29     )
  30 )
  31 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  32 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  33 _entity_re = re.compile(r'&([^;]+);')
  34 _entities = name2codepoint.copy()
  35 _entities['apos'] = 39
  36
  37 # special singleton representing missing values for the runtime
  38 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
  39
  40
  41 # concatenate a list of strings and convert them to unicode.
  42 # unfortunately there is a bug in python 2.4 and lower that causes
  43 # unicode.join trash the traceback.
  44 _concat = u''.join
  45 try:
  46     def _test_gen_bug():
  47         raise TypeError(_test_gen_bug)
  48         yield None
  49     _concat(_test_gen_bug())
  50 except TypeError, _error:
  51     if not _error.args or _error.args[0] is not _test_gen_bug:
  52         def concat(gen):
  53             try:
  54                 return _concat(list(gen))
  55             except:
  56                 # this hack is needed so that the current frame
  57                 # does not show up in the traceback.
  58                 exc_type, exc_value, tb = sys.exc_info()
  59                 raise exc_type, exc_value, tb.tb_next
  60     else:
  61         concat = _concat
  62     del _test_gen_bug, _error
  63
  64
  65 def contextfunction(f):
  66     """This decorator can be used to mark a function or method context callable.
  67     A context callable is passed the active :class:`Context` as first argument when
  68     called from the template.  This is useful if a function wants to get access
  69     to the context or functions provided on the context object.  For example
  70     a function that returns a sorted list of template variables the current
  71     template exports could look like this::
  72
  73         @contextcallable
  74         def get_exported_names(context):
  75             return sorted(context.exported_vars)
  76     """
  77     f.contextfunction = True
  78     return f
  79
  80
  81 def environmentfunction(f):
  82     """This decorator can be used to mark a function or method as environment
  83     callable.  This decorator works exactly like the :func:`contextfunction`
  84     decorator just that the first argument is the active :class:`Environment`
  85     and not context.
  86     """
  87     f.environmentfunction = True
  88     return f
  89
  90
  91 def is_undefined(obj):
  92     """Check if the object passed is undefined.  This does nothing more than
  93     performing an instance check against :class:`Undefined` but looks nicer.
  94     This can be used for custom filters or tests that want to react to
  95     undefined variables.  For example a custom default filter can look like
  96     this::
  97
  98         def default(var, default=''):
  99             if is_undefined(var):
 100                 return default
 101             return var
 102     """
 103     from jinja2.runtime import Undefined
 104     return isinstance(obj, Undefined)
 105
 106
 107 def clear_caches():
 108     """Jinja2 keeps internal caches for environments and lexers.  These are
 109     used so that Jinja2 doesn't have to recreate environments and lexers all
 110     the time.  Normally you don't have to care about that but if you are
 111     messuring memory consumption you may want to clean the caches.
 112     """
 113     from jinja2.environment import _spontaneous_environments
 114     from jinja2.lexer import _lexer_cache
 115     _spontaneous_environments.clear()
 116     _lexer_cache.clear()
 117
 118
 119 def import_string(import_name, silent=False):
 120     """Imports an object based on a string.  This use useful if you want to
 121     use import paths as endpoints or something similar.  An import path can
 122     be specified either in dotted notation (``xml.sax.saxutils.escape``)
 123     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
 124
 125     If the `silent` is True the return value will be `None` if the import
 126     fails.
 127
 128     :return: imported object
 129     """
 130     try:
 131         if ':' in import_name:
 132             module, obj = import_name.split(':', 1)
 133         elif '.' in import_name:
 134             items = import_name.split('.')
 135             module = '.'.join(items[:-1])
 136             obj = items[-1]
 137         else:
 138             return __import__(import_name)
 139         return getattr(__import__(module, None, None, [obj]), obj)
 140     except (ImportError, AttributeError):
 141         if not silent:
 142             raise
 143
 144
 145 def pformat(obj, verbose=False):
 146     """Prettyprint an object.  Either use the `pretty` library or the
 147     builtin `pprint`.
 148     """
 149     try:
 150         from pretty import pretty
 151         return pretty(obj, verbose=verbose)
 152     except ImportError:
 153         from pprint import pformat
 154         return pformat(obj)
 155
 156
 157 def urlize(text, trim_url_limit=None, nofollow=False):
 158     """Converts any URLs in text into clickable links. Works on http://,
 159     https:// and www. links. Links can have trailing punctuation (periods,
 160     commas, close-parens) and leading punctuation (opening parens) and
 161     it'll still do the right thing.
 162
 163     If trim_url_limit is not None, the URLs in link text will be limited
 164     to trim_url_limit characters.
 165
 166     If nofollow is True, the URLs in link text will get a rel="nofollow"
 167     attribute.
 168     """
 169     trim_url = lambda x, limit=trim_url_limit: limit is not None \
 170                          and (x[:limit] + (len(x) >=limit and '...'
 171                          or '')) or x
 172     words = _word_split_re.split(text)
 173     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 174     for i, word in enumerate(words):
 175         match = _punctuation_re.match(word)
 176         if match:
 177             lead, middle, trail = match.groups()
 178             if middle.startswith('www.') or (
 179                 '@' not in middle and
 180                 not middle.startswith('http://') and
 181                 len(middle) > 0 and
 182                 middle[0] in string.letters + string.digits and (
 183                     middle.endswith('.org') or
 184                     middle.endswith('.net') or
 185                     middle.endswith('.com')
 186                 )):
 187                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 188                     nofollow_attr, trim_url(middle))
 189             if middle.startswith('http://') or \
 190                middle.startswith('https://'):
 191                 middle = '<a href="%s"%s>%s</a>' % (middle,
 192                     nofollow_attr, trim_url(middle))
 193             if '@' in middle and not middle.startswith('www.') and \
 194                not ':' in middle and _simple_email_re.match(middle):
 195                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 196             if lead + middle + trail != word:
 197                 words[i] = lead + middle + trail
 198     return u''.join(words)
 199
 200
 201 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 202     """Generate some lorem impsum for the template."""
 203     from jinja2.constants import LOREM_IPSUM_WORDS
 204     from random import choice, random, randrange
 205     words = LOREM_IPSUM_WORDS.split()
 206     result = []
 207
 208     for _ in xrange(n):
 209         next_capitalized = True
 210         last_comma = last_fullstop = 0
 211         word = None
 212         last = None
 213         p = []
 214
 215         # each paragraph contains out of 20 to 100 words.
 216         for idx, _ in enumerate(xrange(randrange(min, max))):
 217             while True:
 218                 word = choice(words)
 219                 if word != last:
 220                     last = word
 221                     break
 222             if next_capitalized:
 223                 word = word.capitalize()
 224                 next_capitalized = False
 225             # add commas
 226             if idx - randrange(3, 8) > last_comma:
 227                 last_comma = idx
 228                 last_fullstop += 2
 229                 word += ','
 230             # add end of sentences
 231             if idx - randrange(10, 20) > last_fullstop:
 232                 last_comma = last_fullstop = idx
 233                 word += '.'
 234                 next_capitalized = True
 235             p.append(word)
 236
 237         # ensure that the paragraph ends with a dot.
 238         p = u' '.join(p)
 239         if p.endswith(','):
 240             p = p[:-1] + '.'
 241         elif not p.endswith('.'):
 242             p += '.'
 243         result.append(p)
 244
 245     if not html:
 246         return u'\n\n'.join(result)
 247     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 248
 249
 250 class Markup(unicode):
 251     """Marks a string as being safe for inclusion in HTML/XML output without
 252     needing to be escaped.  This implements the `__html__` interface a couple
 253     of frameworks and web applications use.
 254
 255     The `escape` function returns markup objects so that double escaping can't
 256     happen.  If you want to use autoescaping in Jinja just set the finalizer
 257     of the environment to `escape`.
 258     """
 259     __slots__ = ()
 260
 261     def __new__(cls, base=u''):
 262         if hasattr(base, '__html__'):
 263             base = base.__html__()
 264         return unicode.__new__(cls, base)
 265
 266     def __html__(self):
 267         return self
 268
 269     def __add__(self, other):
 270         if hasattr(other, '__html__') or isinstance(other, basestring):
 271             return self.__class__(unicode(self) + unicode(escape(other)))
 272         return NotImplemented
 273
 274     def __radd__(self, other):
 275         if hasattr(other, '__html__') or isinstance(other, basestring):
 276             return self.__class__(unicode(escape(other)) + unicode(self))
 277         return NotImplemented
 278
 279     def __mul__(self, num):
 280         if isinstance(num, (int, long)):
 281             return self.__class__(unicode.__mul__(self, num))
 282         return NotImplemented
 283     __rmul__ = __mul__
 284
 285     def __mod__(self, arg):
 286         if isinstance(arg, tuple):
 287             arg = tuple(imap(_MarkupEscapeHelper, arg))
 288         else:
 289             arg = _MarkupEscapeHelper(arg)
 290         return self.__class__(unicode.__mod__(self, arg))
 291
 292     def __repr__(self):
 293         return '%s(%s)' % (
 294             self.__class__.__name__,
 295             unicode.__repr__(self)
 296         )
 297
 298     def join(self, seq):
 299         return self.__class__(unicode.join(self, imap(escape, seq)))
 300     join.__doc__ = unicode.join.__doc__
 301
 302     def split(self, *args, **kwargs):
 303         return map(self.__class__, unicode.split(self, *args, **kwargs))
 304     split.__doc__ = unicode.split.__doc__
 305
 306     def rsplit(self, *args, **kwargs):
 307         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 308     rsplit.__doc__ = unicode.rsplit.__doc__
 309
 310     def splitlines(self, *args, **kwargs):
 311         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 312     splitlines.__doc__ = unicode.splitlines.__doc__
 313
 314     def unescape(self):
 315         """Unescape markup."""
 316         def handle_match(m):
 317             name = m.group(1)
 318             if name in _entities:
 319                 return unichr(_entities[name])
 320             try:
 321                 if name[:2] in ('#x', '#X'):
 322                     return unichr(int(name[2:], 16))
 323                 elif name.startswith('#'):
 324                     return unichr(int(name[1:]))
 325             except ValueError:
 326                 pass
 327             return u''
 328         return _entity_re.sub(handle_match, unicode(self))
 329
 330     def striptags(self):
 331         """Strip tags and resolve enities."""
 332         stripped = u' '.join(_striptags_re.sub('', self).split())
 333         return Markup(stripped).unescape()
 334
 335     @classmethod
 336     def escape(cls, s):
 337         """Escape the string.  Works like :func:`escape`."""
 338         rv = escape(s)
 339         if rv.__class__ is not cls:
 340             return cls(rv)
 341         return rv
 342
 343     def make_wrapper(name):
 344         orig = getattr(unicode, name)
 345         def func(self, *args, **kwargs):
 346             args = _escape_argspec(list(args), enumerate(args))
 347             _escape_argspec(kwargs, kwargs.iteritems())
 348             return self.__class__(orig(self, *args, **kwargs))
 349         func.__name__ = orig.__name__
 350         func.__doc__ = orig.__doc__
 351         return func
 352
 353     for method in '__getitem__', '__getslice__', 'capitalize', \
 354                   'title', 'lower', 'upper', 'replace', 'ljust', \
 355                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 356                   'translate', 'expandtabs', 'swapcase', 'zfill':
 357         locals()[method] = make_wrapper(method)
 358
 359     # new in python 2.5
 360     if hasattr(unicode, 'partition'):
 361         partition = make_wrapper('partition'),
 362         rpartition = make_wrapper('rpartition')
 363
 364     # new in python 2.6
 365     if hasattr(unicode, 'format'):
 366         format = make_wrapper('format')
 367
 368     del method, make_wrapper
 369
 370
 371 def _escape_argspec(obj, iterable):
 372     """Helper for various string-wrapped functions."""
 373     for key, value in iterable:
 374         if hasattr(value, '__html__') or isinstance(value, basestring):
 375             obj[key] = escape(value)
 376     return obj
 377
 378
 379 class _MarkupEscapeHelper(object):
 380     """Helper for Markup.__mod__"""
 381
 382     def __init__(self, obj):
 383         self.obj = obj
 384
 385     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 386     __unicode__ = lambda s: unicode(escape(s.obj))
 387     __str__ = lambda s: str(escape(s.obj))
 388     __repr__ = lambda s: str(repr(escape(s.obj)))
 389     __int__ = lambda s: int(s.obj)
 390     __float__ = lambda s: float(s.obj)
 391
 392
 393 class LRUCache(object):
 394     """A simple LRU Cache implementation."""
 395     # this is fast for small capacities (something around 200) but doesn't
 396     # scale.  But as long as it's only used for the database connections in
 397     # a non request fallback it's fine.
 398
 399     def __init__(self, capacity):
 400         self.capacity = capacity
 401         self._mapping = {}
 402         self._queue = deque()
 403         self._postinit()
 404
 405     def _postinit(self):
 406         # alias all queue methods for faster lookup
 407         self._popleft = self._queue.popleft
 408         self._pop = self._queue.pop
 409         if hasattr(self._queue, 'remove'):
 410             self._remove = self._queue.remove
 411         self._wlock = allocate_lock()
 412         self._append = self._queue.append
 413
 414     def _remove(self, obj):
 415         """Python 2.4 compatibility."""
 416         for idx, item in enumerate(self._queue):
 417             if item == obj:
 418                 del self._queue[idx]
 419                 break
 420
 421     def __getstate__(self):
 422         return {
 423             'capacity':     self.capacity,
 424             '_mapping':     self._mapping,
 425             '_queue':       self._queue
 426         }
 427
 428     def __setstate__(self, d):
 429         self.__dict__.update(d)
 430         self._postinit()
 431
 432     def __getnewargs__(self):
 433         return (self.capacity,)
 434
 435     def copy(self):
 436         """Return an shallow copy of the instance."""
 437         rv = self.__class__(self.capacity)
 438         rv._mapping.update(self._mapping)
 439         rv._queue = deque(self._queue)
 440         return rv
 441
 442     def get(self, key, default=None):
 443         """Return an item from the cache dict or `default`"""
 444         try:
 445             return self[key]
 446         except KeyError:
 447             return default
 448
 449     def setdefault(self, key, default=None):
 450         """Set `default` if the key is not in the cache otherwise
 451         leave unchanged. Return the value of this key.
 452         """
 453         try:
 454             return self[key]
 455         except KeyError:
 456             self[key] = default
 457             return default
 458
 459     def clear(self):
 460         """Clear the cache."""
 461         self._wlock.acquire()
 462         try:
 463             self._mapping.clear()
 464             self._queue.clear()
 465         finally:
 466             self._wlock.release()
 467
 468     def __contains__(self, key):
 469         """Check if a key exists in this cache."""
 470         return key in self._mapping
 471
 472     def __len__(self):
 473         """Return the current size of the cache."""
 474         return len(self._mapping)
 475
 476     def __repr__(self):
 477         return '<%s %r>' % (
 478             self.__class__.__name__,
 479             self._mapping
 480         )
 481
 482     def __getitem__(self, key):
 483         """Get an item from the cache. Moves the item up so that it has the
 484         highest priority then.
 485
 486         Raise an `KeyError` if it does not exist.
 487         """
 488         rv = self._mapping[key]
 489         if self._queue[-1] != key:
 490             self._remove(key)
 491             self._append(key)
 492         return rv
 493
 494     def __setitem__(self, key, value):
 495         """Sets the value for an item. Moves the item up so that it
 496         has the highest priority then.
 497         """
 498         self._wlock.acquire()
 499         try:
 500             if key in self._mapping:
 501                 self._remove(key)
 502             elif len(self._mapping) == self.capacity:
 503                 del self._mapping[self._popleft()]
 504             self._append(key)
 505             self._mapping[key] = value
 506         finally:
 507             self._wlock.release()
 508
 509     def __delitem__(self, key):
 510         """Remove an item from the cache dict.
 511         Raise an `KeyError` if it does not exist.
 512         """
 513         self._wlock.acquire()
 514         try:
 515             del self._mapping[key]
 516             self._remove(key)
 517         finally:
 518             self._wlock.release()
 519
 520     def items(self):
 521         """Return a list of items."""
 522         result = [(key, self._mapping[key]) for key in list(self._queue)]
 523         result.reverse()
 524         return result
 525
 526     def iteritems(self):
 527         """Iterate over all items."""
 528         return iter(self.items())
 529
 530     def values(self):
 531         """Return a list of all values."""
 532         return [x[1] for x in self.items()]
 533
 534     def itervalue(self):
 535         """Iterate over all values."""
 536         return iter(self.values())
 537
 538     def keys(self):
 539         """Return a list of all keys ordered by most recent usage."""
 540         return list(self)
 541
 542     def iterkeys(self):
 543         """Iterate over all keys in the cache dict, ordered by
 544         the most recent usage.
 545         """
 546         return reversed(tuple(self._queue))
 547
 548     __iter__ = iterkeys
 549
 550     def __reversed__(self):
 551         """Iterate over the values in the cache dict, oldest items
 552         coming first.
 553         """
 554         return iter(tuple(self._queue))
 555
 556     __copy__ = copy
 557
 558
 559 # register the LRU cache as mutable mapping if possible
 560 try:
 561     from collections import MutableMapping
 562     MutableMapping.register(LRUCache)
 563 except ImportError:
 564     pass
 565
 566
 567 # we have to import it down here as the speedups module imports the
 568 # markup type which is define above.
 569 try:
 570     from jinja2._speedups import escape, soft_unicode
 571 except ImportError:
 572     def escape(s):
 573         """Convert the characters &, <, >, ' and " in string s to HTML-safe
 574         sequences.  Use this if you need to display text that might contain
 575         such characters in HTML.  Marks return value as markup string.
 576         """
 577         if hasattr(s, '__html__'):
 578             return s.__html__()
 579         return Markup(unicode(s)
 580             .replace('&', '&amp;')
 581             .replace('>', '&gt;')
 582             .replace('<', '&lt;')
 583             .replace("'", '&#39;')
 584             .replace('"', '&#34;')
 585         )
 586
 587     def soft_unicode(s):
 588         """Make a string unicode if it isn't already.  That way a markup
 589         string is not converted back to unicode.
 590         """
 591         if not isinstance(s, unicode):
 592             s = unicode(s)
 593         return s
 594
 595
 596 # partials
 597 try:
 598     from functools import partial
 599 except ImportError:
 600     class partial(object):
 601         def __init__(self, _func, *args, **kwargs):
 602             self._func = _func
 603             self._args = args
 604             self._kwargs = kwargs
 605         def __call__(self, *args, **kwargs):
 606             kwargs.update(self._kwargs)
 607             return self._func(*(self._args + args), **kwargs)