jinja2/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.utils
   4     ~~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     :copyright: (c) 2010 by the Jinja Team.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 import sys
  13 import errno
  14 try:
  15     from urllib.parse import quote_from_bytes as url_quote
  16 except ImportError:
  17     from urllib import quote as url_quote
  18 try:
  19     from thread import allocate_lock
  20 except ImportError:
  21     from dummy_thread import allocate_lock
  22 from collections import deque
  23 from itertools import imap
  24
  25
  26 _word_split_re = re.compile(r'(\s+)')
  27 _punctuation_re = re.compile(
  28     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % (
  29         '|'.join(imap(re.escape, ('(', '<', '&lt;'))),
  30         '|'.join(imap(re.escape, ('.', ',', ')', '>', '\n', '&gt;')))
  31     )
  32 )
  33 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  34 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  35 _entity_re = re.compile(r'&([^;]+);')
  36 _letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
  37 _digits = '0123456789'
  38
  39 # special singleton representing missing values for the runtime
  40 missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})()
  41
  42 # internal code
  43 internal_code = set()
  44
  45
  46 # concatenate a list of strings and convert them to unicode.
  47 # unfortunately there is a bug in python 2.4 and lower that causes
  48 # unicode.join trash the traceback.
  49 _concat = u''.join
  50 try:
  51     def _test_gen_bug():
  52         raise TypeError(_test_gen_bug)
  53         yield None
  54     _concat(_test_gen_bug())
  55 except TypeError, _error:
  56     if not _error.args or _error.args[0] is not _test_gen_bug:
  57         def concat(gen):
  58             try:
  59                 return _concat(list(gen))
  60             except Exception:
  61                 # this hack is needed so that the current frame
  62                 # does not show up in the traceback.
  63                 exc_type, exc_value, tb = sys.exc_info()
  64                 raise exc_type, exc_value, tb.tb_next
  65     else:
  66         concat = _concat
  67     del _test_gen_bug, _error
  68
  69
  70 # for python 2.x we create ourselves a next() function that does the
  71 # basics without exception catching.
  72 try:
  73     next = next
  74 except NameError:
  75     def next(x):
  76         return x.next()
  77
  78
  79 # if this python version is unable to deal with unicode filenames
  80 # when passed to encode we let this function encode it properly.
  81 # This is used in a couple of places.  As far as Jinja is concerned
  82 # filenames are unicode *or* bytestrings in 2.x and unicode only in
  83 # 3.x because compile cannot handle bytes
  84 if sys.version_info < (3, 0):
  85     def _encode_filename(filename):
  86         if isinstance(filename, unicode):
  87             return filename.encode('utf-8')
  88         return filename
  89 else:
  90     def _encode_filename(filename):
  91         assert filename is None or isinstance(filename, str), \
  92             'filenames must be strings'
  93         return filename
  94
  95 from keyword import iskeyword as is_python_keyword
  96
  97
  98 # common types.  These do exist in the special types module too which however
  99 # does not exist in IronPython out of the box.  Also that way we don't have
 100 # to deal with implementation specific stuff here
 101 class _C(object):
 102     def method(self): pass
 103 def _func():
 104     yield None
 105 FunctionType = type(_func)
 106 GeneratorType = type(_func())
 107 MethodType = type(_C.method)
 108 CodeType = type(_C.method.func_code)
 109 try:
 110     raise TypeError()
 111 except TypeError:
 112     _tb = sys.exc_info()[2]
 113     TracebackType = type(_tb)
 114     FrameType = type(_tb.tb_frame)
 115 del _C, _tb, _func
 116
 117
 118 def contextfunction(f):
 119     """This decorator can be used to mark a function or method context callable.
 120     A context callable is passed the active :class:`Context` as first argument when
 121     called from the template.  This is useful if a function wants to get access
 122     to the context or functions provided on the context object.  For example
 123     a function that returns a sorted list of template variables the current
 124     template exports could look like this::
 125
 126         @contextfunction
 127         def get_exported_names(context):
 128             return sorted(context.exported_vars)
 129     """
 130     f.contextfunction = True
 131     return f
 132
 133
 134 def evalcontextfunction(f):
 135     """This decorator can be used to mark a function or method as an eval
 136     context callable.  This is similar to the :func:`contextfunction`
 137     but instead of passing the context, an evaluation context object is
 138     passed.  For more information about the eval context, see
 139     :ref:`eval-context`.
 140
 141     .. versionadded:: 2.4
 142     """
 143     f.evalcontextfunction = True
 144     return f
 145
 146
 147 def environmentfunction(f):
 148     """This decorator can be used to mark a function or method as environment
 149     callable.  This decorator works exactly like the :func:`contextfunction`
 150     decorator just that the first argument is the active :class:`Environment`
 151     and not context.
 152     """
 153     f.environmentfunction = True
 154     return f
 155
 156
 157 def internalcode(f):
 158     """Marks the function as internally used"""
 159     internal_code.add(f.func_code)
 160     return f
 161
 162
 163 def is_undefined(obj):
 164     """Check if the object passed is undefined.  This does nothing more than
 165     performing an instance check against :class:`Undefined` but looks nicer.
 166     This can be used for custom filters or tests that want to react to
 167     undefined variables.  For example a custom default filter can look like
 168     this::
 169
 170         def default(var, default=''):
 171             if is_undefined(var):
 172                 return default
 173             return var
 174     """
 175     from jinja2.runtime import Undefined
 176     return isinstance(obj, Undefined)
 177
 178
 179 def consume(iterable):
 180     """Consumes an iterable without doing anything with it."""
 181     for event in iterable:
 182         pass
 183
 184
 185 def clear_caches():
 186     """Jinja2 keeps internal caches for environments and lexers.  These are
 187     used so that Jinja2 doesn't have to recreate environments and lexers all
 188     the time.  Normally you don't have to care about that but if you are
 189     messuring memory consumption you may want to clean the caches.
 190     """
 191     from jinja2.environment import _spontaneous_environments
 192     from jinja2.lexer import _lexer_cache
 193     _spontaneous_environments.clear()
 194     _lexer_cache.clear()
 195
 196
 197 def import_string(import_name, silent=False):
 198     """Imports an object based on a string.  This is useful if you want to
 199     use import paths as endpoints or something similar.  An import path can
 200     be specified either in dotted notation (``xml.sax.saxutils.escape``)
 201     or with a colon as object delimiter (``xml.sax.saxutils:escape``).
 202
 203     If the `silent` is True the return value will be `None` if the import
 204     fails.
 205
 206     :return: imported object
 207     """
 208     try:
 209         if ':' in import_name:
 210             module, obj = import_name.split(':', 1)
 211         elif '.' in import_name:
 212             items = import_name.split('.')
 213             module = '.'.join(items[:-1])
 214             obj = items[-1]
 215         else:
 216             return __import__(import_name)
 217         return getattr(__import__(module, None, None, [obj]), obj)
 218     except (ImportError, AttributeError):
 219         if not silent:
 220             raise
 221
 222
 223 def open_if_exists(filename, mode='rb'):
 224     """Returns a file descriptor for the filename if that file exists,
 225     otherwise `None`.
 226     """
 227     try:
 228         return open(filename, mode)
 229     except IOError, e:
 230         if e.errno not in (errno.ENOENT, errno.EISDIR):
 231             raise
 232
 233
 234 def object_type_repr(obj):
 235     """Returns the name of the object's type.  For some recognized
 236     singletons the name of the object is returned instead. (For
 237     example for `None` and `Ellipsis`).
 238     """
 239     if obj is None:
 240         return 'None'
 241     elif obj is Ellipsis:
 242         return 'Ellipsis'
 243     # __builtin__ in 2.x, builtins in 3.x
 244     if obj.__class__.__module__ in ('__builtin__', 'builtins'):
 245         name = obj.__class__.__name__
 246     else:
 247         name = obj.__class__.__module__ + '.' + obj.__class__.__name__
 248     return '%s object' % name
 249
 250
 251 def pformat(obj, verbose=False):
 252     """Prettyprint an object.  Either use the `pretty` library or the
 253     builtin `pprint`.
 254     """
 255     try:
 256         from pretty import pretty
 257         return pretty(obj, verbose=verbose)
 258     except ImportError:
 259         from pprint import pformat
 260         return pformat(obj)
 261
 262
 263 def urlize(text, trim_url_limit=None, nofollow=False):
 264     """Converts any URLs in text into clickable links. Works on http://,
 265     https:// and www. links. Links can have trailing punctuation (periods,
 266     commas, close-parens) and leading punctuation (opening parens) and
 267     it'll still do the right thing.
 268
 269     If trim_url_limit is not None, the URLs in link text will be limited
 270     to trim_url_limit characters.
 271
 272     If nofollow is True, the URLs in link text will get a rel="nofollow"
 273     attribute.
 274     """
 275     trim_url = lambda x, limit=trim_url_limit: limit is not None \
 276                          and (x[:limit] + (len(x) >=limit and '...'
 277                          or '')) or x
 278     words = _word_split_re.split(unicode(escape(text)))
 279     nofollow_attr = nofollow and ' rel="nofollow"' or ''
 280     for i, word in enumerate(words):
 281         match = _punctuation_re.match(word)
 282         if match:
 283             lead, middle, trail = match.groups()
 284             if middle.startswith('www.') or (
 285                 '@' not in middle and
 286                 not middle.startswith('http://') and
 287                 len(middle) > 0 and
 288                 middle[0] in _letters + _digits and (
 289                     middle.endswith('.org') or
 290                     middle.endswith('.net') or
 291                     middle.endswith('.com')
 292                 )):
 293                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
 294                     nofollow_attr, trim_url(middle))
 295             if middle.startswith('http://') or \
 296                middle.startswith('https://'):
 297                 middle = '<a href="%s"%s>%s</a>' % (middle,
 298                     nofollow_attr, trim_url(middle))
 299             if '@' in middle and not middle.startswith('www.') and \
 300                not ':' in middle and _simple_email_re.match(middle):
 301                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
 302             if lead + middle + trail != word:
 303                 words[i] = lead + middle + trail
 304     return u''.join(words)
 305
 306
 307 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 308     """Generate some lorem impsum for the template."""
 309     from jinja2.constants import LOREM_IPSUM_WORDS
 310     from random import choice, randrange
 311     words = LOREM_IPSUM_WORDS.split()
 312     result = []
 313
 314     for _ in xrange(n):
 315         next_capitalized = True
 316         last_comma = last_fullstop = 0
 317         word = None
 318         last = None
 319         p = []
 320
 321         # each paragraph contains out of 20 to 100 words.
 322         for idx, _ in enumerate(xrange(randrange(min, max))):
 323             while True:
 324                 word = choice(words)
 325                 if word != last:
 326                     last = word
 327                     break
 328             if next_capitalized:
 329                 word = word.capitalize()
 330                 next_capitalized = False
 331             # add commas
 332             if idx - randrange(3, 8) > last_comma:
 333                 last_comma = idx
 334                 last_fullstop += 2
 335                 word += ','
 336             # add end of sentences
 337             if idx - randrange(10, 20) > last_fullstop:
 338                 last_comma = last_fullstop = idx
 339                 word += '.'
 340                 next_capitalized = True
 341             p.append(word)
 342
 343         # ensure that the paragraph ends with a dot.
 344         p = u' '.join(p)
 345         if p.endswith(','):
 346             p = p[:-1] + '.'
 347         elif not p.endswith('.'):
 348             p += '.'
 349         result.append(p)
 350
 351     if not html:
 352         return u'\n\n'.join(result)
 353     return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
 354
 355
 356 def unicode_urlencode(obj, charset='utf-8'):
 357     """URL escapes a single bytestring or unicode string with the
 358     given charset if applicable to URL safe quoting under all rules
 359     that need to be considered under all supported Python versions.
 360
 361     If non strings are provided they are converted to their unicode
 362     representation first.
 363     """
 364     if not isinstance(obj, basestring):
 365         obj = unicode(obj)
 366     if isinstance(obj, unicode):
 367         obj = obj.encode(charset)
 368     return unicode(url_quote(obj))
 369
 370
 371 class LRUCache(object):
 372     """A simple LRU Cache implementation."""
 373
 374     # this is fast for small capacities (something below 1000) but doesn't
 375     # scale.  But as long as it's only used as storage for templates this
 376     # won't do any harm.
 377
 378     def __init__(self, capacity):
 379         self.capacity = capacity
 380         self._mapping = {}
 381         self._queue = deque()
 382         self._postinit()
 383
 384     def _postinit(self):
 385         # alias all queue methods for faster lookup
 386         self._popleft = self._queue.popleft
 387         self._pop = self._queue.pop
 388         if hasattr(self._queue, 'remove'):
 389             self._remove = self._queue.remove
 390         self._wlock = allocate_lock()
 391         self._append = self._queue.append
 392
 393     def _remove(self, obj):
 394         """Python 2.4 compatibility."""
 395         for idx, item in enumerate(self._queue):
 396             if item == obj:
 397                 del self._queue[idx]
 398                 break
 399
 400     def __getstate__(self):
 401         return {
 402             'capacity':     self.capacity,
 403             '_mapping':     self._mapping,
 404             '_queue':       self._queue
 405         }
 406
 407     def __setstate__(self, d):
 408         self.__dict__.update(d)
 409         self._postinit()
 410
 411     def __getnewargs__(self):
 412         return (self.capacity,)
 413
 414     def copy(self):
 415         """Return a shallow copy of the instance."""
 416         rv = self.__class__(self.capacity)
 417         rv._mapping.update(self._mapping)
 418         rv._queue = deque(self._queue)
 419         return rv
 420
 421     def get(self, key, default=None):
 422         """Return an item from the cache dict or `default`"""
 423         try:
 424             return self[key]
 425         except KeyError:
 426             return default
 427
 428     def setdefault(self, key, default=None):
 429         """Set `default` if the key is not in the cache otherwise
 430         leave unchanged. Return the value of this key.
 431         """
 432         try:
 433             return self[key]
 434         except KeyError:
 435             self[key] = default
 436             return default
 437
 438     def clear(self):
 439         """Clear the cache."""
 440         self._wlock.acquire()
 441         try:
 442             self._mapping.clear()
 443             self._queue.clear()
 444         finally:
 445             self._wlock.release()
 446
 447     def __contains__(self, key):
 448         """Check if a key exists in this cache."""
 449         return key in self._mapping
 450
 451     def __len__(self):
 452         """Return the current size of the cache."""
 453         return len(self._mapping)
 454
 455     def __repr__(self):
 456         return '<%s %r>' % (
 457             self.__class__.__name__,
 458             self._mapping
 459         )
 460
 461     def __getitem__(self, key):
 462         """Get an item from the cache. Moves the item up so that it has the
 463         highest priority then.
 464
 465         Raise a `KeyError` if it does not exist.
 466         """
 467         rv = self._mapping[key]
 468         if self._queue[-1] != key:
 469             try:
 470                 self._remove(key)
 471             except ValueError:
 472                 # if something removed the key from the container
 473                 # when we read, ignore the ValueError that we would
 474                 # get otherwise.
 475                 pass
 476             self._append(key)
 477         return rv
 478
 479     def __setitem__(self, key, value):
 480         """Sets the value for an item. Moves the item up so that it
 481         has the highest priority then.
 482         """
 483         self._wlock.acquire()
 484         try:
 485             if key in self._mapping:
 486                 try:
 487                     self._remove(key)
 488                 except ValueError:
 489                     # __getitem__ is not locked, it might happen
 490                     pass
 491             elif len(self._mapping) == self.capacity:
 492                 del self._mapping[self._popleft()]
 493             self._append(key)
 494             self._mapping[key] = value
 495         finally:
 496             self._wlock.release()
 497
 498     def __delitem__(self, key):
 499         """Remove an item from the cache dict.
 500         Raise a `KeyError` if it does not exist.
 501         """
 502         self._wlock.acquire()
 503         try:
 504             del self._mapping[key]
 505             try:
 506                 self._remove(key)
 507             except ValueError:
 508                 # __getitem__ is not locked, it might happen
 509                 pass
 510         finally:
 511             self._wlock.release()
 512
 513     def items(self):
 514         """Return a list of items."""
 515         result = [(key, self._mapping[key]) for key in list(self._queue)]
 516         result.reverse()
 517         return result
 518
 519     def iteritems(self):
 520         """Iterate over all items."""
 521         return iter(self.items())
 522
 523     def values(self):
 524         """Return a list of all values."""
 525         return [x[1] for x in self.items()]
 526
 527     def itervalue(self):
 528         """Iterate over all values."""
 529         return iter(self.values())
 530
 531     def keys(self):
 532         """Return a list of all keys ordered by most recent usage."""
 533         return list(self)
 534
 535     def iterkeys(self):
 536         """Iterate over all keys in the cache dict, ordered by
 537         the most recent usage.
 538         """
 539         return reversed(tuple(self._queue))
 540
 541     __iter__ = iterkeys
 542
 543     def __reversed__(self):
 544         """Iterate over the values in the cache dict, oldest items
 545         coming first.
 546         """
 547         return iter(tuple(self._queue))
 548
 549     __copy__ = copy
 550
 551
 552 # register the LRU cache as mutable mapping if possible
 553 try:
 554     from collections import MutableMapping
 555     MutableMapping.register(LRUCache)
 556 except ImportError:
 557     pass
 558
 559
 560 class Cycler(object):
 561     """A cycle helper for templates."""
 562
 563     def __init__(self, *items):
 564         if not items:
 565             raise RuntimeError('at least one item has to be provided')
 566         self.items = items
 567         self.reset()
 568
 569     def reset(self):
 570         """Resets the cycle."""
 571         self.pos = 0
 572
 573     @property
 574     def current(self):
 575         """Returns the current item."""
 576         return self.items[self.pos]
 577
 578     def next(self):
 579         """Goes one item ahead and returns it."""
 580         rv = self.current
 581         self.pos = (self.pos + 1) % len(self.items)
 582         return rv
 583
 584
 585 class Joiner(object):
 586     """A joining helper for templates."""
 587
 588     def __init__(self, sep=u', '):
 589         self.sep = sep
 590         self.used = False
 591
 592     def __call__(self):
 593         if not self.used:
 594             self.used = True
 595             return u''
 596         return self.sep
 597
 598
 599 # try markupsafe first, if that fails go with Jinja2's bundled version
 600 # of markupsafe.  Markupsafe was previously Jinja2's implementation of
 601 # the Markup object but was moved into a separate package in a patchlevel
 602 # release
 603 try:
 604     from markupsafe import Markup, escape, soft_unicode
 605 except ImportError:
 606     from jinja2._markupsafe import Markup, escape, soft_unicode
 607
 608
 609 # partials
 610 try:
 611     from functools import partial
 612 except ImportError:
 613     class partial(object):
 614         def __init__(self, _func, *args, **kwargs):
 615             self._func = _func
 616             self._args = args
 617             self._kwargs = kwargs
 618         def __call__(self, *args, **kwargs):
 619             kwargs.update(self._kwargs)
 620             return self._func(*(self._args + args), **kwargs)