jinja/utils.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja.utils
   4     ~~~~~~~~~~~
   5
   6     Utility functions.
   7
   8     **license information**: some of the regular expressions and
   9     the ``urlize`` function were taken from the django framework.
  10
  11     :copyright: 2007 by Armin Ronacher, Lawrence Journal-World.
  12     :license: BSD, see LICENSE for more details.
  13 """
  14 import re
  15 import sys
  16 import string
  17 import cgi
  18 from types import MethodType, FunctionType
  19 from compiler.ast import CallFunc, Name, Const
  20 from jinja.nodes import Trans
  21 from jinja.datastructure import Context, TemplateData
  22 from jinja.exceptions import SecurityException, TemplateNotFound
  23
  24 #: the python2.4 version of deque is missing the remove method
  25 #: because a for loop with a lookup for the missing value written
  26 #: in python is slower we just use deque if we have python2.5 or higher
  27 if sys.version_info >= (2, 5):
  28     from collections import deque
  29 else:
  30     deque = None
  31
  32 #: number of maximal range items
  33 MAX_RANGE = 1000000
  34
  35 _word_split_re = re.compile(r'(\s+)')
  36
  37 _punctuation_re = re.compile(
  38     '^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' %  (
  39         '|'.join([re.escape(p) for p in ('(', '<', '&lt;')]),
  40         '|'.join([re.escape(p) for p in ('.', ',', ')', '>', '\n', '&gt;')])
  41     )
  42 )
  43
  44 _simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
  45
  46 #: used by from_string as cache
  47 _from_string_env = None
  48
  49 escape = cgi.escape
  50
  51
  52 def urlize(text, trim_url_limit=None, nofollow=False):
  53     """
  54     Converts any URLs in text into clickable links. Works on http://,
  55     https:// and www. links. Links can have trailing punctuation (periods,
  56     commas, close-parens) and leading punctuation (opening parens) and
  57     it'll still do the right thing.
  58
  59     If trim_url_limit is not None, the URLs in link text will be limited
  60     to trim_url_limit characters.
  61
  62     If nofollow is True, the URLs in link text will get a rel="nofollow"
  63     attribute.
  64     """
  65     trim_url = lambda x, limit=trim_url_limit: limit is not None \
  66                          and (x[:limit] + (len(x) >=limit and '...'
  67                          or '')) or x
  68     words = _word_split_re.split(text)
  69     nofollow_attr = nofollow and ' rel="nofollow"' or ''
  70     for i, word in enumerate(words):
  71         match = _punctuation_re.match(word)
  72         if match:
  73             lead, middle, trail = match.groups()
  74             if middle.startswith('www.') or (
  75                 '@' not in middle and
  76                 not middle.startswith('http://') and
  77                 len(middle) > 0 and
  78                 middle[0] in string.letters + string.digits and (
  79                     middle.endswith('.org') or
  80                     middle.endswith('.net') or
  81                     middle.endswith('.com')
  82                 )):
  83                 middle = '<a href="http://%s"%s>%s</a>' % (middle,
  84                     nofollow_attr, trim_url(middle))
  85             if middle.startswith('http://') or \
  86                middle.startswith('https://'):
  87                 middle = '<a href="%s"%s>%s</a>' % (middle,
  88                     nofollow_attr, trim_url(middle))
  89             if '@' in middle and not middle.startswith('www.') and \
  90                not ':' in middle and _simple_email_re.match(middle):
  91                 middle = '<a href="mailto:%s">%s</a>' % (middle, middle)
  92             if lead + middle + trail != word:
  93                 words[i] = lead + middle + trail
  94     return u''.join(words)
  95
  96
  97 def from_string(source):
  98     """
  99     Create a template from the template source.
 100     """
 101     global _from_string_env
 102     if _from_string_env is None:
 103         from jinja.environment import Environment
 104         _from_string_env = Environment()
 105     return _from_string_env.from_string(source)
 106
 107
 108 def get_attribute(obj, name):
 109     """
 110     Return the attribute from name. Raise either `AttributeError`
 111     or `SecurityException` if something goes wrong.
 112     """
 113     if not isinstance(name, basestring):
 114         raise AttributeError(name)
 115     if name[:2] == name[-2:] == '__' or name[:2] == '::':
 116         raise SecurityException('not allowed to access internal attributes')
 117     if (obj.__class__ is FunctionType and name.startswith('func_') or
 118         obj.__class__ is MethodType and name.startswith('im_')):
 119         raise SecurityException('not allowed to access function attributes')
 120     r = getattr(obj, 'jinja_allowed_attributes', None)
 121     if r is not None and name not in r:
 122         raise SecurityException('not allowed attribute accessed')
 123     return getattr(obj, name)
 124
 125
 126 def debug_context(env, context):
 127     """
 128     Use this function in templates to get a printed context.
 129     """
 130     from pprint import pformat
 131     return pformat(context.to_dict())
 132 debug_context.jinja_context_callable = True
 133
 134
 135 def safe_range(start, stop=None, step=None):
 136     """
 137     "Safe" form of range that does not generate too large lists.
 138     """
 139     # this also works with None since None is always smaller than
 140     # any other value.
 141     if start > MAX_RANGE:
 142         start = MAX_RANGE
 143     if stop > MAX_RANGE:
 144         stop = MAX_RANGE
 145     if step is None:
 146         step = 1
 147     if stop is None:
 148         return range(0, start, step)
 149     return range(start, stop, step)
 150
 151
 152 def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
 153     """
 154     Generate some lorem impsum for the template.
 155     """
 156     from jinja.constants import LOREM_IPSUM_WORDS
 157     from random import choice, random, randrange
 158     words = LOREM_IPSUM_WORDS.split()
 159     result = []
 160
 161     for _ in xrange(n):
 162         next_capitalized = True
 163         last_comma = last_fullstop = 0
 164         word = None
 165         last = None
 166         p = []
 167
 168         # each paragraph contains out of 20 to 100 words.
 169         for idx, _ in enumerate(xrange(randrange(min, max))):
 170             while True:
 171                 word = choice(words)
 172                 if word != last:
 173                     last = word
 174                     break
 175             if next_capitalized:
 176                 word = word.capitalize()
 177                 next_capitalized = False
 178             # add commas
 179             if idx - randrange(3, 8) > last_comma:
 180                 last_comma = idx
 181                 last_fullstop += 2
 182                 word += ','
 183             # add end of sentences
 184             if idx - randrange(10, 20) > last_fullstop:
 185                 last_comma = last_fullstop = idx
 186                 word += '.'
 187                 next_capitalized = True
 188             p.append(word)
 189
 190         # ensure that the paragraph ends with a dot.
 191         p = u' '.join(p)
 192         if p.endswith(','):
 193             p = p[:-1] + '.'
 194         elif not p.endswith('.'):
 195             p += '.'
 196         result.append(p)
 197
 198     if not html:
 199         return u'\n\n'.join(result)
 200     return u'\n'.join([u'<p>%s</p>' % escape(x) for x in result])
 201
 202
 203 def watch_changes(env, context, iterable, *attributes):
 204     """
 205     Wise replacement for ``{% ifchanged %}``.
 206     """
 207     # find the attributes to watch
 208     if attributes:
 209         tests = []
 210         tmp = []
 211         for attribute in attributes:
 212             if isinstance(attribute, (str, unicode, int, long, bool)):
 213                 tmp.append(attribute)
 214             else:
 215                 tests.append(tuple(attribute))
 216         if tmp:
 217             tests.append(tuple(attribute))
 218         last = tuple([object() for x in tests])
 219     # or no attributes if we watch the object itself
 220     else:
 221         tests = None
 222         last = object()
 223
 224     # iterate trough it and keep check the attributes or values
 225     for item in iterable:
 226         if tests is None:
 227             cur = item
 228         else:
 229             cur = tuple([env.get_attributes(item, x) for x in tests])
 230         if cur != last:
 231             changed = True
 232             last = cur
 233         else:
 234             changed = False
 235         yield changed, item
 236 watch_changes.jinja_context_callable = True
 237
 238
 239 # python2.4 and lower has a bug regarding joining of broken generators.
 240 # because of the runtime debugging system we have to keep track of the
 241 # number of frames to skip. that's what RUNTIME_EXCEPTION_OFFSET is for.
 242 if sys.version_info < (2, 5):
 243     capture_generator = lambda gen: u''.join(tuple(gen))
 244     RUNTIME_EXCEPTION_OFFSET = 2
 245
 246 # this should be faster and used in python2.5 and higher
 247 else:
 248     capture_generator = u''.join
 249     RUNTIME_EXCEPTION_OFFSET = 1
 250
 251
 252 def buffereater(f):
 253     """
 254     Used by the python translator to capture output of substreams.
 255     (macros, filter sections etc)
 256     """
 257     def wrapped(*args, **kwargs):
 258         return TemplateData(capture_generator(f(*args, **kwargs)))
 259     return wrapped
 260
 261
 262 def fake_template_exception(exception, filename, lineno, source,
 263                             context_or_env):
 264     """
 265     Raise an exception "in a template". Return a traceback
 266     object. This is used for runtime debugging, not compile time.
 267     """
 268     # some traceback systems allow to skip frames
 269     __traceback_hide__ = True
 270     if isinstance(context_or_env, Context):
 271         env = context_or_env.environment
 272         namespace = context_or_env.to_dict()
 273     else:
 274         env = context_or_env
 275         namespace = {}
 276
 277     # generate an jinja unique filename used so that linecache
 278     # gets data that doesn't interferes with other modules
 279     if filename is None:
 280         from random import randrange
 281         vfilename = 'jinja://~%d' % randrange(0, 10000)
 282         filename = '<string>'
 283     else:
 284         vfilename = 'jinja://%s' % filename
 285
 286     offset = '\n' * (lineno - 1)
 287     code = compile(offset + 'raise __exception_to_raise__',
 288                    vfilename or '<template>', 'exec')
 289
 290     loader = TracebackLoader(env, source, filename)
 291     loader.update_linecache(vfilename)
 292     globals = {
 293         '__name__':                 vfilename,
 294         '__file__':                 vfilename,
 295         '__loader__':               loader,
 296         '__exception_to_raise__':   exception
 297     }
 298     try:
 299         exec code in globals, namespace
 300     except:
 301         return sys.exc_info()
 302
 303
 304 def translate_exception(template, exc_type, exc_value, traceback, context):
 305     """
 306     Translate an exception and return the new traceback.
 307     """
 308     error_line = traceback.tb_lineno
 309     for code_line, tmpl_filename, tmpl_line in template._debug_info[::-1]:
 310         if code_line <= error_line:
 311             break
 312     else:
 313         # no debug symbol found. give up
 314         return traceback
 315
 316     return fake_template_exception(exc_value, tmpl_filename, tmpl_line,
 317                                    template._source, context)[2]
 318
 319
 320 def raise_syntax_error(exception, env, source=None):
 321     """
 322     This method raises an exception that includes more debugging
 323     informations so that debugging works better. Unlike
 324     `translate_exception` this method raises the exception with
 325     the traceback.
 326     """
 327     exc_info = fake_template_exception(exception, exception.filename,
 328                                        exception.lineno, source, env)
 329     raise exc_info[0], exc_info[1], exc_info[2]
 330
 331
 332 def collect_translations(ast):
 333     """
 334     Collect all translatable strings for the given ast. The
 335     return value is a list of tuples in the form ``(lineno, singular,
 336     plural)``. If a translation doesn't require a plural form the
 337     third item is `None`.
 338     """
 339     todo = [ast]
 340     result = []
 341     while todo:
 342         node = todo.pop()
 343         if node.__class__ is Trans:
 344             result.append((node.lineno, node.singular, node.plural))
 345         elif node.__class__ is CallFunc and \
 346              node.node.__class__ is Name and \
 347              node.node.name == '_':
 348             if len(node.args) in (1, 3):
 349                 args = []
 350                 for arg in node.args:
 351                     if not arg.__class__ is Const:
 352                         break
 353                     args.append(arg.value)
 354                 else:
 355                     if len(args) == 1:
 356                         singular = args[0]
 357                         plural = None
 358                     else:
 359                         singular, plural, _ = args
 360                     result.append((node.lineno, singular, plural))
 361         todo.extend(node.getChildNodes())
 362     result.sort(lambda a, b: cmp(a[0], b[0]))
 363     return result
 364
 365
 366 class TracebackLoader(object):
 367     """
 368     Fake importer that just returns the source of a template.
 369     """
 370
 371     def __init__(self, environment, source, filename):
 372         self.loader = environment.loader
 373         self.source = source
 374         self.filename = filename
 375
 376     def update_linecache(self, virtual_filename):
 377         """
 378         Hacky way to let traceback systems know about the
 379         Jinja template sourcecode. Very hackish indeed.
 380         """
 381         # check for linecache, not every implementation of python
 382         # might have such an module.
 383         try:
 384             from linecache import cache
 385         except ImportError:
 386             return
 387         data = self.get_source(None)
 388         cache[virtual_filename] = (
 389             len(data),
 390             None,
 391             data.splitlines(True),
 392             virtual_filename
 393         )
 394
 395     def get_source(self, impname):
 396         source = ''
 397         if self.source is not None:
 398             source = self.source
 399         elif self.loader is not None:
 400             try:
 401                 source = self.loader.get_source(self.filename)
 402             except TemplateNotFound:
 403                 pass
 404         if isinstance(source, unicode):
 405             source = source.encode('utf-8')
 406         return source
 407
 408
 409 class CacheDict(object):
 410     """
 411     A dict like object that stores a limited number of items and forgets
 412     about the least recently used items::
 413
 414         >>> cache = CacheDict(3)
 415         >>> cache['A'] = 0
 416         >>> cache['B'] = 1
 417         >>> cache['C'] = 2
 418         >>> len(cache)
 419         3
 420
 421     If we now access 'A' again it has a higher priority than B::
 422
 423         >>> cache['A']
 424         0
 425
 426     If we add a new item 'D' now 'B' will disappear::
 427
 428         >>> cache['D'] = 3
 429         >>> len(cache)
 430         3
 431         >>> 'B' in cache
 432         False
 433
 434     If you iterate over the object the most recently used item will be
 435     yielded First::
 436
 437         >>> for item in cache:
 438         ...     print item
 439         D
 440         A
 441         C
 442
 443     If you want to iterate the other way round use ``reverse(cache)``.
 444
 445     Implementation note: This is not a nice way to solve that problem but
 446     for smaller capacities it's faster than a linked list.
 447     Perfect for template environments where you don't expect too many
 448     different keys.
 449     """
 450
 451     def __init__(self, capacity):
 452         self.capacity = capacity
 453         self._mapping = {}
 454
 455         # use a deque here if possible
 456         if deque is not None:
 457             self._queue = deque()
 458             self._popleft = self._queue.popleft
 459         # python2.3/2.4, just use a list
 460         else:
 461             self._queue = []
 462             pop = self._queue.pop
 463             self._popleft = lambda: pop(0)
 464
 465         # alias all queue methods for faster lookup
 466         self._pop = self._queue.pop
 467         self._remove = self._queue.remove
 468         self._append = self._queue.append
 469
 470     def copy(self):
 471         """
 472         Return an shallow copy of the instance.
 473         """
 474         rv = CacheDict(self.capacity)
 475         rv._mapping.update(self._mapping)
 476         rv._queue = self._queue[:]
 477         return rv
 478
 479     def get(self, key, default=None):
 480         """
 481         Return an item from the cache dict or `default`
 482         """
 483         if key in self:
 484             return self[key]
 485         return default
 486
 487     def setdefault(self, key, default=None):
 488         """
 489         Set `default` if the key is not in the cache otherwise
 490         leave unchanged. Return the value of this key.
 491         """
 492         if key in self:
 493             return self[key]
 494         self[key] = default
 495         return default
 496
 497     def clear(self):
 498         """
 499         Clear the cache dict.
 500         """
 501         self._mapping.clear()
 502         try:
 503             self._queue.clear()
 504         except AttributeError:
 505             del self._queue[:]
 506
 507     def __contains__(self, key):
 508         """
 509         Check if a key exists in this cache dict.
 510         """
 511         return key in self._mapping
 512
 513     def __len__(self):
 514         """
 515         Return the current size of the cache dict.
 516         """
 517         return len(self._mapping)
 518
 519     def __repr__(self):
 520         return '<%s %r>' % (
 521             self.__class__.__name__,
 522             self._mapping
 523         )
 524
 525     def __getitem__(self, key):
 526         """
 527         Get an item from the cache dict. Moves the item up so that
 528         it has the highest priority then.
 529
 530         Raise an `KeyError` if it does not exist.
 531         """
 532         rv = self._mapping[key]
 533         if self._queue[-1] != key:
 534             self._remove(key)
 535             self._append(key)
 536         return rv
 537
 538     def __setitem__(self, key, value):
 539         """
 540         Sets the value for an item. Moves the item up so that it
 541         has the highest priority then.
 542         """
 543         if key in self._mapping:
 544             self._remove(key)
 545         elif len(self._mapping) == self.capacity:
 546             del self._mapping[self._popleft()]
 547         self._append(key)
 548         self._mapping[key] = value
 549
 550     def __delitem__(self, key):
 551         """
 552         Remove an item from the cache dict.
 553         Raise an `KeyError` if it does not exist.
 554         """
 555         del self._mapping[key]
 556         self._remove(key)
 557
 558     def __iter__(self):
 559         """
 560         Iterate over all values in the cache dict, ordered by
 561         the most recent usage.
 562         """
 563         try:
 564             return reversed(self._queue)
 565         except NameError:
 566             return iter(self._queue[::-1])
 567
 568     def __reversed__(self):
 569         """
 570         Iterate over the values in the cache dict, oldest items
 571         coming first.
 572         """
 573         return iter(self._queue)
 574
 575     __copy__ = copy
 576
 577     def __deepcopy__(self):
 578         """
 579         Return a deep copy of the cache dict.
 580         """
 581         from copy import deepcopy
 582         rv = CacheDict(self.capacity)
 583         rv._mapping = deepcopy(self._mapping)
 584         rv._queue = deepcopy(self._queue)
 585         return rv