jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: 2008 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import string
  13 from collections import deque
  14 from copy import deepcopy
  15 from itertools import imap
  16
  17
  18 _word_split_re = re.compile(r'(\s+)')
  19 _punctuation_re = re.compile(
  20     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  21         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  22         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  23     )
  24 )
  25 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  26
  27
  28 def contextfunction(f):
  29     """Mark a callable as context callable.  A context callable is passed
  30     the active context as first argument.
  31     """
  32     f.contextfunction = True
  33     return f
  34
  35
  36 def environmentfunction(f):
  37     """Mark a callable as environment callable.  An environment callable is
  38     passed the current environment as first argument.
  39     """
  40     f.environmentfunction = True
  41     return f
  42
  43
  44 def import_string(import_name, silent=False):
  45     """Imports an object based on a string.  This use useful if you want to
  46     use import paths as endpoints or something similar.  An import path can
  47     be specified either in dotted notation (``xml.sax.saxutils.escape``)
  48     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
  49
  50     If the `silent` is True the return value will be `None` if the import
  51     fails.
  52
  53     :return: imported object
  54     """
  55     try:
  56         if ':' in import_name:
  57             module, obj = import_name.split(':', 1)
  58         elif '.' in import_name:
  59             items = import_name.split('.')
  60             module = '.'.join(items[:-1])
  61             obj = items[-1]
  62         else:
  63             return __import__(import_name)
  64         return getattr(__import__(module, None, None, [obj]), obj)
  65     except (ImportError, AttributeError):
  66         if not silent:
  67             raise
  68
  69
  70 def pformat(obj, verbose=False):
  71     """Prettyprint an object.  Either use the `pretty` library or the
  72     builtin `pprint`.
  73     """
  74     try:
  75         from pretty import pretty
  76         return pretty(obj, verbose=verbose)
  77     except ImportError:
  78         from pprint import pformat
  79         return pformat(obj)
  80
  81
  82 def urlize(text, trim_url_limit=None, nofollow=False):
  83     """Converts any URLs in text into clickable links. Works on http://,
  84     https:// and www. links. Links can have trailing punctuation (periods,
  85     commas, close-parens) and leading punctuation (opening parens) and
  86     it'll still do the right thing.
  87
  88     If trim_url_limit is not None, the URLs in link text will be limited
  89     to trim_url_limit characters.
  90
  91     If nofollow is True, the URLs in link text will get a rel="nofollow"
  92     attribute.
  93     """
  94     trim_url = lambda x, limit=trim_url_limit: limit is not None \
  95                          and (x[:limit] + (len(x) >=limit and '...'
  96                          or '')) or x
  97     words = _word_split_re.split(text)
  98     nofollow_attr = nofollow and ' rel="nofollow"' or ''
  99     for i, word in enumerate(words):
 100         match = _punctuation_re.match(word)
 101         if match:
 102             lead, middle, trail = match.groups()
 103             if middle.startswith('www.') or (
 104                 '@' not in middle and
 105                 not middle.startswith('http://') and
 106                 len(middle) > 0 and
 107                 middle[0] in string.letters + string.digits and (
 108                     middle.endswith('.org') or
 109                     middle.endswith('.net') or
 110                     middle.endswith('.com')
 111                 )):
 112                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 113                     nofollow_attr, trim_url(middle))
 114             if middle.startswith('http://') or \
 115                middle.startswith('https://'):
 116                 middle = '<a href="%s"%s>%s</a>' % (middle,
 117                     nofollow_attr, trim_url(middle))
 118             if '@' in middle and not middle.startswith('www.') and \
 119                not ':' in middle and _simple_email_re.match(middle):
 120                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 121             if lead + middle + trail != word:
 122                 words[i] = lead + middle + trail
 123     return u''.join(words)
 124
 125
 126 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 127     """Generate some lorem impsum for the template."""
 128     from jinja2.constants import LOREM_IPSUM_WORDS
 129     from random import choice, random, randrange
 130     words = LOREM_IPSUM_WORDS.split()
 131     result = []
 132
 133     for _ in xrange(n):
 134         next_capitalized = True
 135         last_comma = last_fullstop = 0
 136         word = None
 137         last = None
 138         p = []
 139
 140         # each paragraph contains out of 20 to 100 words.
 141         for idx, _ in enumerate(xrange(randrange(min, max))):
 142             while True:
 143                 word = choice(words)
 144                 if word != last:
 145                     last = word
 146                     break
 147             if next_capitalized:
 148                 word = word.capitalize()
 149                 next_capitalized = False
 150             # add commas
 151             if idx - randrange(3, 8) > last_comma:
 152                 last_comma = idx
 153                 last_fullstop += 2
 154                 word += ','
 155             # add end of sentences
 156             if idx - randrange(10, 20) > last_fullstop:
 157                 last_comma = last_fullstop = idx
 158                 word += '.'
 159                 next_capitalized = True
 160             p.append(word)
 161
 162         # ensure that the paragraph ends with a dot.
 163         p = u' '.join(p)
 164         if p.endswith(','):
 165             p = p[:-1] + '.'
 166         elif not p.endswith('.'):
 167             p += '.'
 168         result.append(p)
 169
 170     if not html:
 171         return u'\n\n'.join(result)
 172     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 173
 174
 175 class Markup(unicode):
 176     """Marks a string as being safe for inclusion in HTML/XML output without
 177     needing to be escaped.  This implements the `__html__` interface a couple
 178     of frameworks and web applications use.
 179
 180     The `escape` function returns markup objects so that double escaping can't
 181     happen.  If you want to use autoescaping in Jinja just set the finalizer
 182     of the environment to `escape`.
 183     """
 184     __slots__ = ()
 185
 186     def __html__(self):
 187         return self
 188
 189     def __add__(self, other):
 190         if hasattr(other, '__html__') or isinstance(other, basestring):
 191             return self.__class__(unicode(self) + unicode(escape(other)))
 192         return NotImplemented
 193
 194     def __radd__(self, other):
 195         if hasattr(other, '__html__') or isinstance(other, basestring):
 196             return self.__class__(unicode(escape(other)) + unicode(self))
 197         return NotImplemented
 198
 199     def __mul__(self, num):
 200         if not isinstance(num, (int, long)):
 201             return NotImplemented
 202         return self.__class__(unicode.__mul__(self, num))
 203     __rmul__ = __mul__
 204
 205     def __mod__(self, arg):
 206         if isinstance(arg, tuple):
 207             arg = tuple(imap(_MarkupEscapeHelper, arg))
 208         else:
 209             arg = _MarkupEscapeHelper(arg)
 210         return self.__class__(unicode.__mod__(self, arg))
 211
 212     def __repr__(self):
 213         return '%s(%s)' % (
 214             self.__class__.__name__,
 215             unicode.__repr__(self)
 216         )
 217
 218     def join(self, seq):
 219         return self.__class__(unicode.join(self, imap(escape, seq)))
 220     join.__doc__ = unicode.join.__doc__
 221
 222     def split(self, *args, **kwargs):
 223         return map(self.__class__, unicode.split(self, *args, **kwargs))
 224     split.__doc__ = unicode.split.__doc__
 225
 226     def rsplit(self, *args, **kwargs):
 227         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 228     rsplit.__doc__ = unicode.rsplit.__doc__
 229
 230     def splitlines(self, *args, **kwargs):
 231         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 232     splitlines.__doc__ = unicode.splitlines.__doc__
 233
 234     def make_wrapper(name):
 235         orig = getattr(unicode, name)
 236         def func(self, *args, **kwargs):
 237             args = list(args)
 238             for idx, arg in enumerate(args):
 239                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 240                     args[idx] = escape(arg)
 241             for name, arg in kwargs.iteritems():
 242                 if hasattr(arg, '__html__') or isinstance(arg, basestring):
 243                     kwargs[name] = escape(arg)
 244             return self.__class__(orig(self, *args, **kwargs))
 245         func.__name__ = orig.__name__
 246         func.__doc__ = orig.__doc__
 247         return func
 248     for method in '__getitem__', '__getslice__', 'capitalize', \
 249                   'title', 'lower', 'upper', 'replace', 'ljust', \
 250                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 251                   'translate', 'expandtabs', 'rpartition', 'swapcase', \
 252                   'zfill':
 253         locals()[method] = make_wrapper(method)
 254
 255     # new in python 2.5
 256     if hasattr(unicode, 'partition'):
 257         locals().update(partition=make_wrapper('partition'),
 258                         rpartition=make_wrapper('rpartition'))
 259     del method, make_wrapper
 260
 261
 262 class _MarkupEscapeHelper(object):
 263     """Helper for Markup.__mod__"""
 264
 265     def __init__(self, obj):
 266         self.obj = obj
 267
 268     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 269     __unicode__ = lambda s: unicode(escape(s.obj))
 270     __str__ = lambda s: str(escape(s.obj))
 271     __repr__ = lambda s: str(repr(escape(s.obj)))
 272     __int__ = lambda s: int(s.obj)
 273     __float__ = lambda s: float(s.obj)
 274
 275
 276 class LRUCache(object):
 277     """A simple LRU Cache implementation."""
 278     # this is fast for small capacities (something around 200) but doesn't
 279     # scale.  But as long as it's only used for the database connections in
 280     # a non request fallback it's fine.
 281
 282     def __init__(self, capacity):
 283         self.capacity = capacity
 284         self._mapping = {}
 285         self._queue = deque()
 286
 287         # alias all queue methods for faster lookup
 288         self._popleft = self._queue.popleft
 289         self._pop = self._queue.pop
 290         if hasattr(self._queue, 'remove'):
 291             self._remove = self._queue.remove
 292         self._append = self._queue.append
 293
 294     def _remove(self, obj):
 295         """Python 2.4 compatibility."""
 296         for idx, item in enumerate(self._queue):
 297             if item == obj:
 298                 del self._queue[idx]
 299                 break
 300
 301     def copy(self):
 302         """Return an shallow copy of the instance."""
 303         rv = self.__class__(self.capacity)
 304         rv._mapping.update(self._mapping)
 305         rv._queue = deque(self._queue)
 306         return rv
 307
 308     def get(self, key, default=None):
 309         """Return an item from the cache dict or `default`"""
 310         if key in self:
 311             return self[key]
 312         return default
 313
 314     def setdefault(self, key, default=None):
 315         """Set `default` if the key is not in the cache otherwise
 316         leave unchanged. Return the value of this key.
 317         """
 318         if key in self:
 319             return self[key]
 320         self[key] = default
 321         return default
 322
 323     def clear(self):
 324         """Clear the cache."""
 325         self._mapping.clear()
 326         self._queue.clear()
 327
 328     def __contains__(self, key):
 329         """Check if a key exists in this cache."""
 330         return key in self._mapping
 331
 332     def __len__(self):
 333         """Return the current size of the cache."""
 334         return len(self._mapping)
 335
 336     def __repr__(self):
 337         return '<%s %r>' % (
 338             self.__class__.__name__,
 339             self._mapping
 340         )
 341
 342     def __getitem__(self, key):
 343         """Get an item from the cache. Moves the item up so that it has the
 344         highest priority then.
 345
 346         Raise an `KeyError` if it does not exist.
 347         """
 348         rv = self._mapping[key]
 349         if self._queue[-1] != key:
 350             self._remove(key)
 351             self._append(key)
 352         return rv
 353
 354     def __setitem__(self, key, value):
 355         """Sets the value for an item. Moves the item up so that it
 356         has the highest priority then.
 357         """
 358         if key in self._mapping:
 359             self._remove(key)
 360         elif len(self._mapping) == self.capacity:
 361             del self._mapping[self._popleft()]
 362         self._append(key)
 363         self._mapping[key] = value
 364
 365     def __delitem__(self, key):
 366         """Remove an item from the cache dict.
 367         Raise an `KeyError` if it does not exist.
 368         """
 369         del self._mapping[key]
 370         self._remove(key)
 371
 372     def __iter__(self):
 373         """Iterate over all values in the cache dict, ordered by
 374         the most recent usage.
 375         """
 376         return reversed(self._queue)
 377
 378     def __reversed__(self):
 379         """Iterate over the values in the cache dict, oldest items
 380         coming first.
 381         """
 382         return iter(self._queue)
 383
 384     __copy__ = copy
 385
 386
 387 # we have to import it down here as the speedups module imports the
 388 # markup type which is define above.
 389 try:
 390     from jinja2._speedups import escape, soft_unicode
 391 except ImportError:
 392     def escape(obj):
 393         """Convert the characters &, <, >, and " in string s to HTML-safe
 394         sequences. Use this if you need to display text that might contain
 395         such characters in HTML.
 396         """
 397         if hasattr(obj, '__html__'):
 398             return obj.__html__()
 399         return Markup(unicode(obj)
 400             .replace('&', '&amp;')
 401             .replace('>', '&gt;')
 402             .replace('<', '&lt;')
 403             .replace('"', '&quot;')
 404         )
 405
 406     def soft_unicode(s):
 407         """Make a string unicode if it isn't already.  That way a markup
 408         string is not converted back to unicode.
 409         """
 410         if not isinstance(s, unicode):
 411             s = unicode(s)
 412         return s
 413
 414
 415 # partials
 416 try:
 417     from functools import partial
 418 except ImportError:
 419     class partial(object):
 420         def __init__(self, _func, *args, **kwargs):
 421             self._func = func
 422             self._args = args
 423             self._kwargs = kwargs
 424         def __call__(self, *args, **kwargs):
 425             kwargs.update(self._kwargs)
 426             return self._func(*(self._args + args), **kwargs)