jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import sys
  13 import string
  14 try:
  15     from thread import allocate_lock
  16 except ImportError:
  17     from dummy_thread import allocate_lock
  18 from htmlentitydefs import name2codepoint
  19 from collections import deque
  20 from copy import deepcopy
  21 from itertools import imap
  22
  23
  24 _word_split_re = re.compile(r'(\s+)')
  25 _punctuation_re = re.compile(
  26     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  27         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  28         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  29     )
  30 )
  31 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  32 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  33 _entity_re = re.compile(r'&([^;]+);')
  34 _entities = name2codepoint.copy()
  35 _entities['apos'] = 39
  36
  37 # special singleton representing missing values for the runtime
  38 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
  39
  40
  41 # concatenate a list of strings and convert them to unicode.
  42 # unfortunately there is a bug in python 2.4 and lower that causes
  43 # unicode.join trash the traceback.
  44 _concat = u''.join
  45 try:
  46     def _test_gen_bug():
  47         raise TypeError(_test_gen_bug)
  48         yield None
  49     _concat(_test_gen_bug())
  50 except TypeError, _error:
  51     if not _error.args or _error.args[0] is not _test_gen_bug:
  52         def concat(gen):
  53             try:
  54                 return _concat(list(gen))
  55             except:
  56                 # this hack is needed so that the current frame
  57                 # does not show up in the traceback.
  58                 exc_type, exc_value, tb = sys.exc_info()
  59                 raise exc_type, exc_value, tb.tb_next
  60     else:
  61         concat = _concat
  62     del _test_gen_bug, _error
  63
  64
  65 def contextfunction(f):
  66     """This decorator can be used to mark a callable as context callable.  A
  67     context callable is passed the active context as first argument if it
  68     was directly stored in the context.
  69     """
  70     f.contextfunction = True
  71     return f
  72
  73
  74 def environmentfunction(f):
  75     """This decorator can be used to mark a callable as environment callable.
  76     A environment callable is passed the current environment as first argument
  77     if it was directly stored in the context.
  78     """
  79     f.environmentfunction = True
  80     return f
  81
  82
  83 def clear_caches():
  84     """Jinja2 keeps internal caches for environments and lexers.  These are
  85     used so that Jinja2 doesn't have to recreate environments and lexers all
  86     the time.  Normally you don't have to care about that but if you are
  87     messuring memory consumption you may want to clean the caches.
  88     """
  89     from jinja2.environment import _spontaneous_environments
  90     from jinja2.lexer import _lexer_cache
  91     _spontaneous_environments.clear()
  92     _lexer_cache.clear()
  93
  94
  95 def import_string(import_name, silent=False):
  96     """Imports an object based on a string.  This use useful if you want to
  97     use import paths as endpoints or something similar.  An import path can
  98     be specified either in dotted notation (``xml.sax.saxutils.escape``)
  99     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
 100
 101     If the `silent` is True the return value will be `None` if the import
 102     fails.
 103
 104     :return: imported object
 105     """
 106     try:
 107         if ':' in import_name:
 108             module, obj = import_name.split(':', 1)
 109         elif '.' in import_name:
 110             items = import_name.split('.')
 111             module = '.'.join(items[:-1])
 112             obj = items[-1]
 113         else:
 114             return __import__(import_name)
 115         return getattr(__import__(module, None, None, [obj]), obj)
 116     except (ImportError, AttributeError):
 117         if not silent:
 118             raise
 119
 120
 121 def pformat(obj, verbose=False):
 122     """Prettyprint an object.  Either use the `pretty` library or the
 123     builtin `pprint`.
 124     """
 125     try:
 126         from pretty import pretty
 127         return pretty(obj, verbose=verbose)
 128     except ImportError:
 129         from pprint import pformat
 130         return pformat(obj)
 131
 132
 133 def urlize(text, trim_url_limit=None, nofollow=False):
 134     """Converts any URLs in text into clickable links. Works on http://,
 135     https:// and www. links. Links can have trailing punctuation (periods,
 136     commas, close-parens) and leading punctuation (opening parens) and
 137     it'll still do the right thing.
 138
 139     If trim_url_limit is not None, the URLs in link text will be limited
 140     to trim_url_limit characters.
 141
 142     If nofollow is True, the URLs in link text will get a rel="nofollow"
 143     attribute.
 144     """
 145     trim_url = lambda x, limit=trim_url_limit: limit is not None \
 146                          and (x[:limit] + (len(x) >=limit and '...'
 147                          or '')) or x
 148     words = _word_split_re.split(text)
 149     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 150     for i, word in enumerate(words):
 151         match = _punctuation_re.match(word)
 152         if match:
 153             lead, middle, trail = match.groups()
 154             if middle.startswith('www.') or (
 155                 '@' not in middle and
 156                 not middle.startswith('http://') and
 157                 len(middle) > 0 and
 158                 middle[0] in string.letters + string.digits and (
 159                     middle.endswith('.org') or
 160                     middle.endswith('.net') or
 161                     middle.endswith('.com')
 162                 )):
 163                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 164                     nofollow_attr, trim_url(middle))
 165             if middle.startswith('http://') or \
 166                middle.startswith('https://'):
 167                 middle = '<a href="%s"%s>%s</a>' % (middle,
 168                     nofollow_attr, trim_url(middle))
 169             if '@' in middle and not middle.startswith('www.') and \
 170                not ':' in middle and _simple_email_re.match(middle):
 171                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 172             if lead + middle + trail != word:
 173                 words[i] = lead + middle + trail
 174     return u''.join(words)
 175
 176
 177 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 178     """Generate some lorem impsum for the template."""
 179     from jinja2.constants import LOREM_IPSUM_WORDS
 180     from random import choice, random, randrange
 181     words = LOREM_IPSUM_WORDS.split()
 182     result = []
 183
 184     for _ in xrange(n):
 185         next_capitalized = True
 186         last_comma = last_fullstop = 0
 187         word = None
 188         last = None
 189         p = []
 190
 191         # each paragraph contains out of 20 to 100 words.
 192         for idx, _ in enumerate(xrange(randrange(min, max))):
 193             while True:
 194                 word = choice(words)
 195                 if word != last:
 196                     last = word
 197                     break
 198             if next_capitalized:
 199                 word = word.capitalize()
 200                 next_capitalized = False
 201             # add commas
 202             if idx - randrange(3, 8) > last_comma:
 203                 last_comma = idx
 204                 last_fullstop += 2
 205                 word += ','
 206             # add end of sentences
 207             if idx - randrange(10, 20) > last_fullstop:
 208                 last_comma = last_fullstop = idx
 209                 word += '.'
 210                 next_capitalized = True
 211             p.append(word)
 212
 213         # ensure that the paragraph ends with a dot.
 214         p = u' '.join(p)
 215         if p.endswith(','):
 216             p = p[:-1] + '.'
 217         elif not p.endswith('.'):
 218             p += '.'
 219         result.append(p)
 220
 221     if not html:
 222         return u'\n\n'.join(result)
 223     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 224
 225
 226 class Markup(unicode):
 227     """Marks a string as being safe for inclusion in HTML/XML output without
 228     needing to be escaped.  This implements the `__html__` interface a couple
 229     of frameworks and web applications use.
 230
 231     The `escape` function returns markup objects so that double escaping can't
 232     happen.  If you want to use autoescaping in Jinja just set the finalizer
 233     of the environment to `escape`.
 234     """
 235     __slots__ = ()
 236
 237     def __html__(self):
 238         return self
 239
 240     def __add__(self, other):
 241         if hasattr(other, '__html__') or isinstance(other, basestring):
 242             return self.__class__(unicode(self) + unicode(escape(other)))
 243         return NotImplemented
 244
 245     def __radd__(self, other):
 246         if hasattr(other, '__html__') or isinstance(other, basestring):
 247             return self.__class__(unicode(escape(other)) + unicode(self))
 248         return NotImplemented
 249
 250     def __mul__(self, num):
 251         if not isinstance(num, (int, long)):
 252             return NotImplemented
 253         return self.__class__(unicode.__mul__(self, num))
 254     __rmul__ = __mul__
 255
 256     def __mod__(self, arg):
 257         if isinstance(arg, tuple):
 258             arg = tuple(imap(_MarkupEscapeHelper, arg))
 259         else:
 260             arg = _MarkupEscapeHelper(arg)
 261         return self.__class__(unicode.__mod__(self, arg))
 262
 263     def __repr__(self):
 264         return '%s(%s)' % (
 265             self.__class__.__name__,
 266             unicode.__repr__(self)
 267         )
 268
 269     def join(self, seq):
 270         return self.__class__(unicode.join(self, imap(escape, seq)))
 271     join.__doc__ = unicode.join.__doc__
 272
 273     def split(self, *args, **kwargs):
 274         return map(self.__class__, unicode.split(self, *args, **kwargs))
 275     split.__doc__ = unicode.split.__doc__
 276
 277     def rsplit(self, *args, **kwargs):
 278         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 279     rsplit.__doc__ = unicode.rsplit.__doc__
 280
 281     def splitlines(self, *args, **kwargs):
 282         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 283     splitlines.__doc__ = unicode.splitlines.__doc__
 284
 285     def unescape(self):
 286         """Unescape markup."""
 287         def handle_match(m):
 288             name = m.group(1)
 289             if name in _entities:
 290                 return unichr(_entities[name])
 291             try:
 292                 if name[:2] in ('#x', '#X'):
 293                     return unichr(int(name[2:], 16))
 294                 elif name.startswith('#'):
 295                     return unichr(int(name[1:]))
 296             except ValueError:
 297                 pass
 298             return u''
 299         return _entity_re.sub(handle_match, unicode(self))
 300
 301     def striptags(self):
 302         """Strip tags and resolve enities."""
 303         stripped = u' '.join(_striptags_re.sub('', self).split())
 304         return Markup(stripped).unescape()
 305
 306     @classmethod
 307     def escape(cls, s):
 308         """Escape the string.  Works like :func:`escape`."""
 309         rv = escape(s)
 310         if rv.__class__ is not cls:
 311             return cls(rv)
 312         return rv
 313
 314     def make_wrapper(name):
 315         orig = getattr(unicode, name)
 316         def func(self, *args, **kwargs):
 317             args = list(args)
 318             for idx, arg in enumerate(args):
 319                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 320                     args[idx] = escape(arg)
 321             for name, arg in kwargs.iteritems():
 322                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 323                     kwargs[name] = escape(arg)
 324             return self.__class__(orig(self, *args, **kwargs))
 325         func.__name__ = orig.__name__
 326         func.__doc__ = orig.__doc__
 327         return func
 328     for method in '__getitem__', '__getslice__', 'capitalize', \
 329                   'title', 'lower', 'upper', 'replace', 'ljust', \
 330                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 331                   'translate', 'expandtabs', 'swapcase', 'zfill':
 332         locals()[method] = make_wrapper(method)
 333
 334     # new in python 2.5
 335     if hasattr(unicode, 'partition'):
 336         locals().update(
 337             partition=make_wrapper('partition'),
 338             rpartition=make_wrapper('rpartition')
 339         )
 340     del method, make_wrapper
 341
 342
 343 class _MarkupEscapeHelper(object):
 344     """Helper for Markup.__mod__"""
 345
 346     def __init__(self, obj):
 347         self.obj = obj
 348
 349     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 350     __unicode__ = lambda s: unicode(escape(s.obj))
 351     __str__ = lambda s: str(escape(s.obj))
 352     __repr__ = lambda s: str(repr(escape(s.obj)))
 353     __int__ = lambda s: int(s.obj)
 354     __float__ = lambda s: float(s.obj)
 355
 356
 357 class LRUCache(object):
 358     """A simple LRU Cache implementation."""
 359     # this is fast for small capacities (something around 200) but doesn't
 360     # scale.  But as long as it's only used for the database connections in
 361     # a non request fallback it's fine.
 362
 363     def __init__(self, capacity):
 364         self.capacity = capacity
 365         self._mapping = {}
 366         self._queue = deque()
 367
 368         # alias all queue methods for faster lookup
 369         self._popleft = self._queue.popleft
 370         self._pop = self._queue.pop
 371         if hasattr(self._queue, 'remove'):
 372             self._remove = self._queue.remove
 373         self._wlock = allocate_lock()
 374         self._append = self._queue.append
 375
 376     def _remove(self, obj):
 377         """Python 2.4 compatibility."""
 378         for idx, item in enumerate(self._queue):
 379             if item == obj:
 380                 del self._queue[idx]
 381                 break
 382
 383     def copy(self):
 384         """Return an shallow copy of the instance."""
 385         rv = self.__class__(self.capacity)
 386         rv._mapping.update(self._mapping)
 387         rv._queue = deque(self._queue)
 388         return rv
 389
 390     def get(self, key, default=None):
 391         """Return an item from the cache dict or `default`"""
 392         try:
 393             return self[key]
 394         except KeyError:
 395             return default
 396
 397     def setdefault(self, key, default=None):
 398         """Set `default` if the key is not in the cache otherwise
 399         leave unchanged. Return the value of this key.
 400         """
 401         try:
 402             return self[key]
 403         except KeyError:
 404             self[key] = default
 405             return default
 406
 407     def clear(self):
 408         """Clear the cache."""
 409         self._wlock.acquire()
 410         try:
 411             self._mapping.clear()
 412             self._queue.clear()
 413         finally:
 414             self._wlock.release()
 415
 416     def __contains__(self, key):
 417         """Check if a key exists in this cache."""
 418         return key in self._mapping
 419
 420     def __len__(self):
 421         """Return the current size of the cache."""
 422         return len(self._mapping)
 423
 424     def __repr__(self):
 425         return '<%s %r>' % (
 426             self.__class__.__name__,
 427             self._mapping
 428         )
 429
 430     def __getitem__(self, key):
 431         """Get an item from the cache. Moves the item up so that it has the
 432         highest priority then.
 433
 434         Raise an `KeyError` if it does not exist.
 435         """
 436         rv = self._mapping[key]
 437         if self._queue[-1] != key:
 438             self._remove(key)
 439             self._append(key)
 440         return rv
 441
 442     def __setitem__(self, key, value):
 443         """Sets the value for an item. Moves the item up so that it
 444         has the highest priority then.
 445         """
 446         self._wlock.acquire()
 447         try:
 448             if key in self._mapping:
 449                 self._remove(key)
 450             elif len(self._mapping) == self.capacity:
 451                 del self._mapping[self._popleft()]
 452             self._append(key)
 453             self._mapping[key] = value
 454         finally:
 455             self._wlock.release()
 456
 457     def __delitem__(self, key):
 458         """Remove an item from the cache dict.
 459         Raise an `KeyError` if it does not exist.
 460         """
 461         self._wlock.acquire()
 462         try:
 463             del self._mapping[key]
 464             self._remove(key)
 465         finally:
 466             self._wlock.release()
 467
 468     def __iter__(self):
 469         """Iterate over all values in the cache dict, ordered by
 470         the most recent usage.
 471         """
 472         return reversed(self._queue)
 473
 474     def __reversed__(self):
 475         """Iterate over the values in the cache dict, oldest items
 476         coming first.
 477         """
 478         return iter(self._queue)
 479
 480     __copy__ = copy
 481
 482
 483 # we have to import it down here as the speedups module imports the
 484 # markup type which is define above.
 485 try:
 486     from jinja2._speedups import escape, soft_unicode
 487 except ImportError:
 488     def escape(s):
 489         """Convert the characters &, <, >, ' and " in string s to HTML-safe
 490         sequences.  Use this if you need to display text that might contain
 491         such characters in HTML.  Marks return value as markup string.
 492         """
 493         if hasattr(s, '__html__'):
 494             return s.__html__()
 495         return Markup(unicode(s)
 496             .replace('&', '&amp;')
 497             .replace('>', '&gt;')
 498             .replace('<', '&lt;')
 499             .replace("'", '&#39;')
 500             .replace('"', '&#34;')
 501         )
 502
 503     def soft_unicode(s):
 504         """Make a string unicode if it isn't already.  That way a markup
 505         string is not converted back to unicode.
 506         """
 507         if not isinstance(s, unicode):
 508             s = unicode(s)
 509         return s
 510
 511
 512 # partials
 513 try:
 514     from functools import partial
 515 except ImportError:
 516     class partial(object):
 517         def __init__(self, _func, *args, **kwargs):
 518             self._func = _func
 519             self._args = args
 520             self._kwargs = kwargs
 521         def __call__(self, *args, **kwargs):
 522             kwargs.update(self._kwargs)
 523             return self._func(*(self._args + args), **kwargs)