jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import string
  13 from collections import deque
  14 from copy import deepcopy
  15 from itertools import imap
  16
  17
  18 _word_split_re = re.compile(r'(\s+)')
  19 _punctuation_re = re.compile(
  20     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  21         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  22         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  23     )
  24 )
  25 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  26
  27
  28 def contextfunction(f):
  29     """This decorator can be used to mark a callable as context callable.  A
  30     context callable is passed the active context as first argument if it
  31     was directly stored in the context.
  32     """
  33     f.contextfunction = True
  34     return f
  35
  36
  37 def environmentfunction(f):
  38     """This decorator can be used to mark a callable as environment callable.
  39     A environment callable is passed the current environment as first argument
  40     if it was directly stored in the context.
  41     """
  42     f.environmentfunction = True
  43     return f
  44
  45
  46 def import_string(import_name, silent=False):
  47     """Imports an object based on a string.  This use useful if you want to
  48     use import paths as endpoints or something similar.  An import path can
  49     be specified either in dotted notation (``xml.sax.saxutils.escape``)
  50     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
  51
  52     If the `silent` is True the return value will be `None` if the import
  53     fails.
  54
  55     :return: imported object
  56     """
  57     try:
  58         if ':' in import_name:
  59             module, obj = import_name.split(':', 1)
  60         elif '.' in import_name:
  61             items = import_name.split('.')
  62             module = '.'.join(items[:-1])
  63             obj = items[-1]
  64         else:
  65             return __import__(import_name)
  66         return getattr(__import__(module, None, None, [obj]), obj)
  67     except (ImportError, AttributeError):
  68         if not silent:
  69             raise
  70
  71
  72 def pformat(obj, verbose=False):
  73     """Prettyprint an object.  Either use the `pretty` library or the
  74     builtin `pprint`.
  75     """
  76     try:
  77         from pretty import pretty
  78         return pretty(obj, verbose=verbose)
  79     except ImportError:
  80         from pprint import pformat
  81         return pformat(obj)
  82
  83
  84 def urlize(text, trim_url_limit=None, nofollow=False):
  85     """Converts any URLs in text into clickable links. Works on http://,
  86     https:// and www. links. Links can have trailing punctuation (periods,
  87     commas, close-parens) and leading punctuation (opening parens) and
  88     it'll still do the right thing.
  89
  90     If trim_url_limit is not None, the URLs in link text will be limited
  91     to trim_url_limit characters.
  92
  93     If nofollow is True, the URLs in link text will get a rel="nofollow"
  94     attribute.
  95     """
  96     trim_url = lambda x, limit=trim_url_limit: limit is not None \
  97                          and (x[:limit] + (len(x) >=limit and '...'
  98                          or '')) or x
  99     words = _word_split_re.split(text)
 100     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 101     for i, word in enumerate(words):
 102         match = _punctuation_re.match(word)
 103         if match:
 104             lead, middle, trail = match.groups()
 105             if middle.startswith('www.') or (
 106                 '@' not in middle and
 107                 not middle.startswith('http://') and
 108                 len(middle) > 0 and
 109                 middle[0] in string.letters + string.digits and (
 110                     middle.endswith('.org') or
 111                     middle.endswith('.net') or
 112                     middle.endswith('.com')
 113                 )):
 114                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 115                     nofollow_attr, trim_url(middle))
 116             if middle.startswith('http://') or \
 117                middle.startswith('https://'):
 118                 middle = '<a href="%s"%s>%s</a>' % (middle,
 119                     nofollow_attr, trim_url(middle))
 120             if '@' in middle and not middle.startswith('www.') and \
 121                not ':' in middle and _simple_email_re.match(middle):
 122                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 123             if lead + middle + trail != word:
 124                 words[i] = lead + middle + trail
 125     return u''.join(words)
 126
 127
 128 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 129     """Generate some lorem impsum for the template."""
 130     from jinja2.constants import LOREM_IPSUM_WORDS
 131     from random import choice, random, randrange
 132     words = LOREM_IPSUM_WORDS.split()
 133     result = []
 134
 135     for _ in xrange(n):
 136         next_capitalized = True
 137         last_comma = last_fullstop = 0
 138         word = None
 139         last = None
 140         p = []
 141
 142         # each paragraph contains out of 20 to 100 words.
 143         for idx, _ in enumerate(xrange(randrange(min, max))):
 144             while True:
 145                 word = choice(words)
 146                 if word != last:
 147                     last = word
 148                     break
 149             if next_capitalized:
 150                 word = word.capitalize()
 151                 next_capitalized = False
 152             # add commas
 153             if idx - randrange(3, 8) > last_comma:
 154                 last_comma = idx
 155                 last_fullstop += 2
 156                 word += ','
 157             # add end of sentences
 158             if idx - randrange(10, 20) > last_fullstop:
 159                 last_comma = last_fullstop = idx
 160                 word += '.'
 161                 next_capitalized = True
 162             p.append(word)
 163
 164         # ensure that the paragraph ends with a dot.
 165         p = u' '.join(p)
 166         if p.endswith(','):
 167             p = p[:-1] + '.'
 168         elif not p.endswith('.'):
 169             p += '.'
 170         result.append(p)
 171
 172     if not html:
 173         return u'\n\n'.join(result)
 174     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 175
 176
 177 class Markup(unicode):
 178     """Marks a string as being safe for inclusion in HTML/XML output without
 179     needing to be escaped.  This implements the `__html__` interface a couple
 180     of frameworks and web applications use.
 181
 182     The `escape` function returns markup objects so that double escaping can't
 183     happen.  If you want to use autoescaping in Jinja just set the finalizer
 184     of the environment to `escape`.
 185     """
 186     __slots__ = ()
 187
 188     def __html__(self):
 189         return self
 190
 191     def __add__(self, other):
 192         if hasattr(other, '__html__') or isinstance(other, basestring):
 193             return self.__class__(unicode(self) + unicode(escape(other)))
 194         return NotImplemented
 195
 196     def __radd__(self, other):
 197         if hasattr(other, '__html__') or isinstance(other, basestring):
 198             return self.__class__(unicode(escape(other)) + unicode(self))
 199         return NotImplemented
 200
 201     def __mul__(self, num):
 202         if not isinstance(num, (int, long)):
 203             return NotImplemented
 204         return self.__class__(unicode.__mul__(self, num))
 205     __rmul__ = __mul__
 206
 207     def __mod__(self, arg):
 208         if isinstance(arg, tuple):
 209             arg = tuple(imap(_MarkupEscapeHelper, arg))
 210         else:
 211             arg = _MarkupEscapeHelper(arg)
 212         return self.__class__(unicode.__mod__(self, arg))
 213
 214     def __repr__(self):
 215         return '%s(%s)' % (
 216             self.__class__.__name__,
 217             unicode.__repr__(self)
 218         )
 219
 220     def join(self, seq):
 221         return self.__class__(unicode.join(self, imap(escape, seq)))
 222     join.__doc__ = unicode.join.__doc__
 223
 224     def split(self, *args, **kwargs):
 225         return map(self.__class__, unicode.split(self, *args, **kwargs))
 226     split.__doc__ = unicode.split.__doc__
 227
 228     def rsplit(self, *args, **kwargs):
 229         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 230     rsplit.__doc__ = unicode.rsplit.__doc__
 231
 232     def splitlines(self, *args, **kwargs):
 233         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 234     splitlines.__doc__ = unicode.splitlines.__doc__
 235
 236     def make_wrapper(name):
 237         orig = getattr(unicode, name)
 238         def func(self, *args, **kwargs):
 239             args = list(args)
 240             for idx, arg in enumerate(args):
 241                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 242                     args[idx] = escape(arg)
 243             for name, arg in kwargs.iteritems():
 244                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 245                     kwargs[name] = escape(arg)
 246             return self.__class__(orig(self, *args, **kwargs))
 247         func.__name__ = orig.__name__
 248         func.__doc__ = orig.__doc__
 249         return func
 250     for method in '__getitem__', '__getslice__', 'capitalize', \
 251                   'title', 'lower', 'upper', 'replace', 'ljust', \
 252                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 253                   'translate', 'expandtabs', 'swapcase', 'zfill':
 254         locals()[method] = make_wrapper(method)
 255
 256     # new in python 2.5
 257     if hasattr(unicode, 'partition'):
 258         locals().update(
 259             partition=make_wrapper('partition'),
 260             rpartition=make_wrapper('rpartition')
 261         )
 262     del method, make_wrapper
 263
 264
 265 class _MarkupEscapeHelper(object):
 266     """Helper for Markup.__mod__"""
 267
 268     def __init__(self, obj):
 269         self.obj = obj
 270
 271     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 272     __unicode__ = lambda s: unicode(escape(s.obj))
 273     __str__ = lambda s: str(escape(s.obj))
 274     __repr__ = lambda s: str(repr(escape(s.obj)))
 275     __int__ = lambda s: int(s.obj)
 276     __float__ = lambda s: float(s.obj)
 277
 278
 279 class LRUCache(object):
 280     """A simple LRU Cache implementation."""
 281     # this is fast for small capacities (something around 200) but doesn't
 282     # scale.  But as long as it's only used for the database connections in
 283     # a non request fallback it's fine.
 284
 285     def __init__(self, capacity):
 286         self.capacity = capacity
 287         self._mapping = {}
 288         self._queue = deque()
 289
 290         # alias all queue methods for faster lookup
 291         self._popleft = self._queue.popleft
 292         self._pop = self._queue.pop
 293         if hasattr(self._queue, 'remove'):
 294             self._remove = self._queue.remove
 295         self._append = self._queue.append
 296
 297     def _remove(self, obj):
 298         """Python 2.4 compatibility."""
 299         for idx, item in enumerate(self._queue):
 300             if item == obj:
 301                 del self._queue[idx]
 302                 break
 303
 304     def copy(self):
 305         """Return an shallow copy of the instance."""
 306         rv = self.__class__(self.capacity)
 307         rv._mapping.update(self._mapping)
 308         rv._queue = deque(self._queue)
 309         return rv
 310
 311     def get(self, key, default=None):
 312         """Return an item from the cache dict or `default`"""
 313         if key in self:
 314             return self[key]
 315         return default
 316
 317     def setdefault(self, key, default=None):
 318         """Set `default` if the key is not in the cache otherwise
 319         leave unchanged. Return the value of this key.
 320         """
 321         if key in self:
 322             return self[key]
 323         self[key] = default
 324         return default
 325
 326     def clear(self):
 327         """Clear the cache."""
 328         self._mapping.clear()
 329         self._queue.clear()
 330
 331     def __contains__(self, key):
 332         """Check if a key exists in this cache."""
 333         return key in self._mapping
 334
 335     def __len__(self):
 336         """Return the current size of the cache."""
 337         return len(self._mapping)
 338
 339     def __repr__(self):
 340         return '<%s %r>' % (
 341             self.__class__.__name__,
 342             self._mapping
 343         )
 344
 345     def __getitem__(self, key):
 346         """Get an item from the cache. Moves the item up so that it has the
 347         highest priority then.
 348
 349         Raise an `KeyError` if it does not exist.
 350         """
 351         rv = self._mapping[key]
 352         if self._queue[-1] != key:
 353             self._remove(key)
 354             self._append(key)
 355         return rv
 356
 357     def __setitem__(self, key, value):
 358         """Sets the value for an item. Moves the item up so that it
 359         has the highest priority then.
 360         """
 361         if key in self._mapping:
 362             self._remove(key)
 363         elif len(self._mapping) == self.capacity:
 364             del self._mapping[self._popleft()]
 365         self._append(key)
 366         self._mapping[key] = value
 367
 368     def __delitem__(self, key):
 369         """Remove an item from the cache dict.
 370         Raise an `KeyError` if it does not exist.
 371         """
 372         del self._mapping[key]
 373         self._remove(key)
 374
 375     def __iter__(self):
 376         """Iterate over all values in the cache dict, ordered by
 377         the most recent usage.
 378         """
 379         return reversed(self._queue)
 380
 381     def __reversed__(self):
 382         """Iterate over the values in the cache dict, oldest items
 383         coming first.
 384         """
 385         return iter(self._queue)
 386
 387     __copy__ = copy
 388
 389
 390 # we have to import it down here as the speedups module imports the
 391 # markup type which is define above.
 392 try:
 393     from jinja2._speedups import escape, soft_unicode
 394 except ImportError:
 395     def escape(obj):
 396         """Convert the characters &, <, >, and " in string s to HTML-safe
 397         sequences. Use this if you need to display text that might contain
 398         such characters in HTML.
 399         """
 400         if hasattr(obj, '__html__'):
 401             return obj.__html__()
 402         return Markup(unicode(obj)
 403             .replace('&', '&amp;')
 404             .replace('>', '&gt;')
 405             .replace('<', '&lt;')
 406             .replace('"', '&quot;')
 407         )
 408
 409     def soft_unicode(s):
 410         """Make a string unicode if it isn't already.  That way a markup
 411         string is not converted back to unicode.
 412         """
 413         if not isinstance(s, unicode):
 414             s = unicode(s)
 415         return s
 416
 417
 418 # partials
 419 try:
 420     from functools import partial
 421 except ImportError:
 422     class partial(object):
 423         def __init__(self, _func, *args, **kwargs):
 424             self._func = _func
 425             self._args = args
 426             self._kwargs = kwargs
 427         def __call__(self, *args, **kwargs):
 428             kwargs.update(self._kwargs)
 429             return self._func(*(self._args + args), **kwargs)