jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import string
  13 from collections import deque
  14 from copy import deepcopy
  15 from itertools import imap
  16
  17
  18 _word_split_re = re.compile(r'(\s+)')
  19 _punctuation_re = re.compile(
  20     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  21         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  22         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  23     )
  24 )
  25 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  26
  27
  28 # special singleton representing missing values for the runtime
  29 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
  30
  31
  32 def contextfunction(f):
  33     """This decorator can be used to mark a callable as context callable.  A
  34     context callable is passed the active context as first argument if it
  35     was directly stored in the context.
  36     """
  37     f.contextfunction = True
  38     return f
  39
  40
  41 def environmentfunction(f):
  42     """This decorator can be used to mark a callable as environment callable.
  43     A environment callable is passed the current environment as first argument
  44     if it was directly stored in the context.
  45     """
  46     f.environmentfunction = True
  47     return f
  48
  49
  50 def import_string(import_name, silent=False):
  51     """Imports an object based on a string.  This use useful if you want to
  52     use import paths as endpoints or something similar.  An import path can
  53     be specified either in dotted notation (``xml.sax.saxutils.escape``)
  54     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
  55
  56     If the `silent` is True the return value will be `None` if the import
  57     fails.
  58
  59     :return: imported object
  60     """
  61     try:
  62         if ':' in import_name:
  63             module, obj = import_name.split(':', 1)
  64         elif '.' in import_name:
  65             items = import_name.split('.')
  66             module = '.'.join(items[:-1])
  67             obj = items[-1]
  68         else:
  69             return __import__(import_name)
  70         return getattr(__import__(module, None, None, [obj]), obj)
  71     except (ImportError, AttributeError):
  72         if not silent:
  73             raise
  74
  75
  76 def pformat(obj, verbose=False):
  77     """Prettyprint an object.  Either use the `pretty` library or the
  78     builtin `pprint`.
  79     """
  80     try:
  81         from pretty import pretty
  82         return pretty(obj, verbose=verbose)
  83     except ImportError:
  84         from pprint import pformat
  85         return pformat(obj)
  86
  87
  88 def urlize(text, trim_url_limit=None, nofollow=False):
  89     """Converts any URLs in text into clickable links. Works on http://,
  90     https:// and www. links. Links can have trailing punctuation (periods,
  91     commas, close-parens) and leading punctuation (opening parens) and
  92     it'll still do the right thing.
  93
  94     If trim_url_limit is not None, the URLs in link text will be limited
  95     to trim_url_limit characters.
  96
  97     If nofollow is True, the URLs in link text will get a rel="nofollow"
  98     attribute.
  99     """
 100     trim_url = lambda x, limit=trim_url_limit: limit is not None \
 101                          and (x[:limit] + (len(x) >=limit and '...'
 102                          or '')) or x
 103     words = _word_split_re.split(text)
 104     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 105     for i, word in enumerate(words):
 106         match = _punctuation_re.match(word)
 107         if match:
 108             lead, middle, trail = match.groups()
 109             if middle.startswith('www.') or (
 110                 '@' not in middle and
 111                 not middle.startswith('http://') and
 112                 len(middle) > 0 and
 113                 middle[0] in string.letters + string.digits and (
 114                     middle.endswith('.org') or
 115                     middle.endswith('.net') or
 116                     middle.endswith('.com')
 117                 )):
 118                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 119                     nofollow_attr, trim_url(middle))
 120             if middle.startswith('http://') or \
 121                middle.startswith('https://'):
 122                 middle = '<a href="%s"%s>%s</a>' % (middle,
 123                     nofollow_attr, trim_url(middle))
 124             if '@' in middle and not middle.startswith('www.') and \
 125                not ':' in middle and _simple_email_re.match(middle):
 126                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 127             if lead + middle + trail != word:
 128                 words[i] = lead + middle + trail
 129     return u''.join(words)
 130
 131
 132 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 133     """Generate some lorem impsum for the template."""
 134     from jinja2.constants import LOREM_IPSUM_WORDS
 135     from random import choice, random, randrange
 136     words = LOREM_IPSUM_WORDS.split()
 137     result = []
 138
 139     for _ in xrange(n):
 140         next_capitalized = True
 141         last_comma = last_fullstop = 0
 142         word = None
 143         last = None
 144         p = []
 145
 146         # each paragraph contains out of 20 to 100 words.
 147         for idx, _ in enumerate(xrange(randrange(min, max))):
 148             while True:
 149                 word = choice(words)
 150                 if word != last:
 151                     last = word
 152                     break
 153             if next_capitalized:
 154                 word = word.capitalize()
 155                 next_capitalized = False
 156             # add commas
 157             if idx - randrange(3, 8) > last_comma:
 158                 last_comma = idx
 159                 last_fullstop += 2
 160                 word += ','
 161             # add end of sentences
 162             if idx - randrange(10, 20) > last_fullstop:
 163                 last_comma = last_fullstop = idx
 164                 word += '.'
 165                 next_capitalized = True
 166             p.append(word)
 167
 168         # ensure that the paragraph ends with a dot.
 169         p = u' '.join(p)
 170         if p.endswith(','):
 171             p = p[:-1] + '.'
 172         elif not p.endswith('.'):
 173             p += '.'
 174         result.append(p)
 175
 176     if not html:
 177         return u'\n\n'.join(result)
 178     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 179
 180
 181 class Markup(unicode):
 182     """Marks a string as being safe for inclusion in HTML/XML output without
 183     needing to be escaped.  This implements the `__html__` interface a couple
 184     of frameworks and web applications use.
 185
 186     The `escape` function returns markup objects so that double escaping can't
 187     happen.  If you want to use autoescaping in Jinja just set the finalizer
 188     of the environment to `escape`.
 189     """
 190     __slots__ = ()
 191
 192     def __html__(self):
 193         return self
 194
 195     def __add__(self, other):
 196         if hasattr(other, '__html__') or isinstance(other, basestring):
 197             return self.__class__(unicode(self) + unicode(escape(other)))
 198         return NotImplemented
 199
 200     def __radd__(self, other):
 201         if hasattr(other, '__html__') or isinstance(other, basestring):
 202             return self.__class__(unicode(escape(other)) + unicode(self))
 203         return NotImplemented
 204
 205     def __mul__(self, num):
 206         if not isinstance(num, (int, long)):
 207             return NotImplemented
 208         return self.__class__(unicode.__mul__(self, num))
 209     __rmul__ = __mul__
 210
 211     def __mod__(self, arg):
 212         if isinstance(arg, tuple):
 213             arg = tuple(imap(_MarkupEscapeHelper, arg))
 214         else:
 215             arg = _MarkupEscapeHelper(arg)
 216         return self.__class__(unicode.__mod__(self, arg))
 217
 218     def __repr__(self):
 219         return '%s(%s)' % (
 220             self.__class__.__name__,
 221             unicode.__repr__(self)
 222         )
 223
 224     def join(self, seq):
 225         return self.__class__(unicode.join(self, imap(escape, seq)))
 226     join.__doc__ = unicode.join.__doc__
 227
 228     def split(self, *args, **kwargs):
 229         return map(self.__class__, unicode.split(self, *args, **kwargs))
 230     split.__doc__ = unicode.split.__doc__
 231
 232     def rsplit(self, *args, **kwargs):
 233         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 234     rsplit.__doc__ = unicode.rsplit.__doc__
 235
 236     def splitlines(self, *args, **kwargs):
 237         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 238     splitlines.__doc__ = unicode.splitlines.__doc__
 239
 240     def make_wrapper(name):
 241         orig = getattr(unicode, name)
 242         def func(self, *args, **kwargs):
 243             args = list(args)
 244             for idx, arg in enumerate(args):
 245                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 246                     args[idx] = escape(arg)
 247             for name, arg in kwargs.iteritems():
 248                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 249                     kwargs[name] = escape(arg)
 250             return self.__class__(orig(self, *args, **kwargs))
 251         func.__name__ = orig.__name__
 252         func.__doc__ = orig.__doc__
 253         return func
 254     for method in '__getitem__', '__getslice__', 'capitalize', \
 255                   'title', 'lower', 'upper', 'replace', 'ljust', \
 256                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 257                   'translate', 'expandtabs', 'swapcase', 'zfill':
 258         locals()[method] = make_wrapper(method)
 259
 260     # new in python 2.5
 261     if hasattr(unicode, 'partition'):
 262         locals().update(
 263             partition=make_wrapper('partition'),
 264             rpartition=make_wrapper('rpartition')
 265         )
 266     del method, make_wrapper
 267
 268
 269 class _MarkupEscapeHelper(object):
 270     """Helper for Markup.__mod__"""
 271
 272     def __init__(self, obj):
 273         self.obj = obj
 274
 275     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 276     __unicode__ = lambda s: unicode(escape(s.obj))
 277     __str__ = lambda s: str(escape(s.obj))
 278     __repr__ = lambda s: str(repr(escape(s.obj)))
 279     __int__ = lambda s: int(s.obj)
 280     __float__ = lambda s: float(s.obj)
 281
 282
 283 class LRUCache(object):
 284     """A simple LRU Cache implementation."""
 285     # this is fast for small capacities (something around 200) but doesn't
 286     # scale.  But as long as it's only used for the database connections in
 287     # a non request fallback it's fine.
 288
 289     def __init__(self, capacity):
 290         self.capacity = capacity
 291         self._mapping = {}
 292         self._queue = deque()
 293
 294         # alias all queue methods for faster lookup
 295         self._popleft = self._queue.popleft
 296         self._pop = self._queue.pop
 297         if hasattr(self._queue, 'remove'):
 298             self._remove = self._queue.remove
 299         self._append = self._queue.append
 300
 301     def _remove(self, obj):
 302         """Python 2.4 compatibility."""
 303         for idx, item in enumerate(self._queue):
 304             if item == obj:
 305                 del self._queue[idx]
 306                 break
 307
 308     def copy(self):
 309         """Return an shallow copy of the instance."""
 310         rv = self.__class__(self.capacity)
 311         rv._mapping.update(self._mapping)
 312         rv._queue = deque(self._queue)
 313         return rv
 314
 315     def get(self, key, default=None):
 316         """Return an item from the cache dict or `default`"""
 317         if key in self:
 318             return self[key]
 319         return default
 320
 321     def setdefault(self, key, default=None):
 322         """Set `default` if the key is not in the cache otherwise
 323         leave unchanged. Return the value of this key.
 324         """
 325         if key in self:
 326             return self[key]
 327         self[key] = default
 328         return default
 329
 330     def clear(self):
 331         """Clear the cache."""
 332         self._mapping.clear()
 333         self._queue.clear()
 334
 335     def __contains__(self, key):
 336         """Check if a key exists in this cache."""
 337         return key in self._mapping
 338
 339     def __len__(self):
 340         """Return the current size of the cache."""
 341         return len(self._mapping)
 342
 343     def __repr__(self):
 344         return '<%s %r>' % (
 345             self.__class__.__name__,
 346             self._mapping
 347         )
 348
 349     def __getitem__(self, key):
 350         """Get an item from the cache. Moves the item up so that it has the
 351         highest priority then.
 352
 353         Raise an `KeyError` if it does not exist.
 354         """
 355         rv = self._mapping[key]
 356         if self._queue[-1] != key:
 357             self._remove(key)
 358             self._append(key)
 359         return rv
 360
 361     def __setitem__(self, key, value):
 362         """Sets the value for an item. Moves the item up so that it
 363         has the highest priority then.
 364         """
 365         if key in self._mapping:
 366             self._remove(key)
 367         elif len(self._mapping) == self.capacity:
 368             del self._mapping[self._popleft()]
 369         self._append(key)
 370         self._mapping[key] = value
 371
 372     def __delitem__(self, key):
 373         """Remove an item from the cache dict.
 374         Raise an `KeyError` if it does not exist.
 375         """
 376         del self._mapping[key]
 377         self._remove(key)
 378
 379     def __iter__(self):
 380         """Iterate over all values in the cache dict, ordered by
 381         the most recent usage.
 382         """
 383         return reversed(self._queue)
 384
 385     def __reversed__(self):
 386         """Iterate over the values in the cache dict, oldest items
 387         coming first.
 388         """
 389         return iter(self._queue)
 390
 391     __copy__ = copy
 392
 393
 394 # we have to import it down here as the speedups module imports the
 395 # markup type which is define above.
 396 try:
 397     from jinja2._speedups import escape, soft_unicode
 398 except ImportError:
 399     def escape(obj):
 400         """Convert the characters &, <, >, and " in string s to HTML-safe
 401         sequences. Use this if you need to display text that might contain
 402         such characters in HTML.
 403         """
 404         if hasattr(obj, '__html__'):
 405             return obj.__html__()
 406         return Markup(unicode(obj)
 407             .replace('&', '&amp;')
 408             .replace('>', '&gt;')
 409             .replace('<', '&lt;')
 410             .replace('"', '&quot;')
 411         )
 412
 413     def soft_unicode(s):
 414         """Make a string unicode if it isn't already.  That way a markup
 415         string is not converted back to unicode.
 416         """
 417         if not isinstance(s, unicode):
 418             s = unicode(s)
 419         return s
 420
 421
 422 # partials
 423 try:
 424     from functools import partial
 425 except ImportError:
 426     class partial(object):
 427         def __init__(self, _func, *args, **kwargs):
 428             self._func = _func
 429             self._args = args
 430             self._kwargs = kwargs
 431         def __call__(self, *args, **kwargs):
 432             kwargs.update(self._kwargs)
 433             return self._func(*(self._args + args), **kwargs)