jinja2/bccache.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     jinja2.bccache
   4     ~~~~~~~~~~~~~~
   5
   6     This module implements the bytecode cache system Jinja is optionally
   7     using.  This is useful if you have very complex template situations and
   8     the compiliation of all those templates slow down your application too
   9     much.
  10
  11     Situations where this is useful are often forking web applications that
  12     are initialized on the first request.
  13
  14     :copyright: (c) 2010 by the Jinja Team.
  15     :license: BSD.
  16 """
  17 from os import path, listdir
  18 import sys
  19 import marshal
  20 import tempfile
  21 import cPickle as pickle
  22 import fnmatch
  23 try:
  24     from hashlib import sha1
  25 except ImportError:
  26     from sha import new as sha1
  27 from jinja2.utils import open_if_exists
  28
  29
  30 # marshal works better on 3.x, one hack less required
  31 if sys.version_info > (3, 0):
  32     from io import BytesIO
  33     marshal_dump = marshal.dump
  34     marshal_load = marshal.load
  35 else:
  36     from cStringIO import StringIO as BytesIO
  37
  38     def marshal_dump(code, f):
  39         if isinstance(f, file):
  40             marshal.dump(code, f)
  41         else:
  42             f.write(marshal.dumps(code))
  43
  44     def marshal_load(f):
  45         if isinstance(f, file):
  46             return marshal.load(f)
  47         return marshal.loads(f.read())
  48
  49
  50 bc_version = 2
  51
  52 # magic version used to only change with new jinja versions.  With 2.6
  53 # we change this to also take Python version changes into account.  The
  54 # reason for this is that Python tends to segfault if fed earlier bytecode
  55 # versions because someone thought it would be a good idea to reuse opcodes
  56 # or make Python incompatible with earlier versions.
  57 bc_magic = 'j2'.encode('ascii') + \
  58     pickle.dumps(bc_version, 2) + \
  59     pickle.dumps((sys.version_info[0] << 24) | sys.version_info[1])
  60
  61
  62 class Bucket(object):
  63     """Buckets are used to store the bytecode for one template.  It's created
  64     and initialized by the bytecode cache and passed to the loading functions.
  65
  66     The buckets get an internal checksum from the cache assigned and use this
  67     to automatically reject outdated cache material.  Individual bytecode
  68     cache subclasses don't have to care about cache invalidation.
  69     """
  70
  71     def __init__(self, environment, key, checksum):
  72         self.environment = environment
  73         self.key = key
  74         self.checksum = checksum
  75         self.reset()
  76
  77     def reset(self):
  78         """Resets the bucket (unloads the bytecode)."""
  79         self.code = None
  80
  81     def load_bytecode(self, f):
  82         """Loads bytecode from a file or file like object."""
  83         # make sure the magic header is correct
  84         magic = f.read(len(bc_magic))
  85         if magic != bc_magic:
  86             self.reset()
  87             return
  88         # the source code of the file changed, we need to reload
  89         checksum = pickle.load(f)
  90         if self.checksum != checksum:
  91             self.reset()
  92             return
  93         self.code = marshal_load(f)
  94
  95     def write_bytecode(self, f):
  96         """Dump the bytecode into the file or file like object passed."""
  97         if self.code is None:
  98             raise TypeError('can\'t write empty bucket')
  99         f.write(bc_magic)
 100         pickle.dump(self.checksum, f, 2)
 101         marshal_dump(self.code, f)
 102
 103     def bytecode_from_string(self, string):
 104         """Load bytecode from a string."""
 105         self.load_bytecode(BytesIO(string))
 106
 107     def bytecode_to_string(self):
 108         """Return the bytecode as string."""
 109         out = BytesIO()
 110         self.write_bytecode(out)
 111         return out.getvalue()
 112
 113
 114 class BytecodeCache(object):
 115     """To implement your own bytecode cache you have to subclass this class
 116     and override :meth:`load_bytecode` and :meth:`dump_bytecode`.  Both of
 117     these methods are passed a :class:`~jinja2.bccache.Bucket`.
 118
 119     A very basic bytecode cache that saves the bytecode on the file system::
 120
 121         from os import path
 122
 123         class MyCache(BytecodeCache):
 124
 125             def __init__(self, directory):
 126                 self.directory = directory
 127
 128             def load_bytecode(self, bucket):
 129                 filename = path.join(self.directory, bucket.key)
 130                 if path.exists(filename):
 131                     with open(filename, 'rb') as f:
 132                         bucket.load_bytecode(f)
 133
 134             def dump_bytecode(self, bucket):
 135                 filename = path.join(self.directory, bucket.key)
 136                 with open(filename, 'wb') as f:
 137                     bucket.write_bytecode(f)
 138
 139     A more advanced version of a filesystem based bytecode cache is part of
 140     Jinja2.
 141     """
 142
 143     def load_bytecode(self, bucket):
 144         """Subclasses have to override this method to load bytecode into a
 145         bucket.  If they are not able to find code in the cache for the
 146         bucket, it must not do anything.
 147         """
 148         raise NotImplementedError()
 149
 150     def dump_bytecode(self, bucket):
 151         """Subclasses have to override this method to write the bytecode
 152         from a bucket back to the cache.  If it unable to do so it must not
 153         fail silently but raise an exception.
 154         """
 155         raise NotImplementedError()
 156
 157     def clear(self):
 158         """Clears the cache.  This method is not used by Jinja2 but should be
 159         implemented to allow applications to clear the bytecode cache used
 160         by a particular environment.
 161         """
 162
 163     def get_cache_key(self, name, filename=None):
 164         """Returns the unique hash key for this template name."""
 165         hash = sha1(name.encode('utf-8'))
 166         if filename is not None:
 167             filename = '|' + filename
 168             if isinstance(filename, unicode):
 169                 filename = filename.encode('utf-8')
 170             hash.update(filename)
 171         return hash.hexdigest()
 172
 173     def get_source_checksum(self, source):
 174         """Returns a checksum for the source."""
 175         return sha1(source.encode('utf-8')).hexdigest()
 176
 177     def get_bucket(self, environment, name, filename, source):
 178         """Return a cache bucket for the given template.  All arguments are
 179         mandatory but filename may be `None`.
 180         """
 181         key = self.get_cache_key(name, filename)
 182         checksum = self.get_source_checksum(source)
 183         bucket = Bucket(environment, key, checksum)
 184         self.load_bytecode(bucket)
 185         return bucket
 186
 187     def set_bucket(self, bucket):
 188         """Put the bucket into the cache."""
 189         self.dump_bytecode(bucket)
 190
 191
 192 class FileSystemBytecodeCache(BytecodeCache):
 193     """A bytecode cache that stores bytecode on the filesystem.  It accepts
 194     two arguments: The directory where the cache items are stored and a
 195     pattern string that is used to build the filename.
 196
 197     If no directory is specified the system temporary items folder is used.
 198
 199     The pattern can be used to have multiple separate caches operate on the
 200     same directory.  The default pattern is ``'__jinja2_%s.cache'``.  ``%s``
 201     is replaced with the cache key.
 202
 203     >>> bcc = FileSystemBytecodeCache('/tmp/jinja_cache', '%s.cache')
 204
 205     This bytecode cache supports clearing of the cache using the clear method.
 206     """
 207
 208     def __init__(self, directory=None, pattern='__jinja2_%s.cache'):
 209         if directory is None:
 210             directory = tempfile.gettempdir()
 211         self.directory = directory
 212         self.pattern = pattern
 213
 214     def _get_cache_filename(self, bucket):
 215         return path.join(self.directory, self.pattern % bucket.key)
 216
 217     def load_bytecode(self, bucket):
 218         f = open_if_exists(self._get_cache_filename(bucket), 'rb')
 219         if f is not None:
 220             try:
 221                 bucket.load_bytecode(f)
 222             finally:
 223                 f.close()
 224
 225     def dump_bytecode(self, bucket):
 226         f = open(self._get_cache_filename(bucket), 'wb')
 227         try:
 228             bucket.write_bytecode(f)
 229         finally:
 230             f.close()
 231
 232     def clear(self):
 233         # imported lazily here because google app-engine doesn't support
 234         # write access on the file system and the function does not exist
 235         # normally.
 236         from os import remove
 237         files = fnmatch.filter(listdir(self.directory), self.pattern % '*')
 238         for filename in files:
 239             try:
 240                 remove(path.join(self.directory, filename))
 241             except OSError:
 242                 pass
 243
 244
 245 class MemcachedBytecodeCache(BytecodeCache):
 246     """This class implements a bytecode cache that uses a memcache cache for
 247     storing the information.  It does not enforce a specific memcache library
 248     (tummy's memcache or cmemcache) but will accept any class that provides
 249     the minimal interface required.
 250
 251     Libraries compatible with this class:
 252
 253     -   `werkzeug <http://werkzeug.pocoo.org/>`_.contrib.cache
 254     -   `python-memcached <http://www.tummy.com/Community/software/python-memcached/>`_
 255     -   `cmemcache <http://gijsbert.org/cmemcache/>`_
 256
 257     (Unfortunately the django cache interface is not compatible because it
 258     does not support storing binary data, only unicode.  You can however pass
 259     the underlying cache client to the bytecode cache which is available
 260     as `django.core.cache.cache._client`.)
 261
 262     The minimal interface for the client passed to the constructor is this:
 263
 264     .. class:: MinimalClientInterface
 265
 266         .. method:: set(key, value[, timeout])
 267
 268             Stores the bytecode in the cache.  `value` is a string and
 269             `timeout` the timeout of the key.  If timeout is not provided
 270             a default timeout or no timeout should be assumed, if it's
 271             provided it's an integer with the number of seconds the cache
 272             item should exist.
 273
 274         .. method:: get(key)
 275
 276             Returns the value for the cache key.  If the item does not
 277             exist in the cache the return value must be `None`.
 278
 279     The other arguments to the constructor are the prefix for all keys that
 280     is added before the actual cache key and the timeout for the bytecode in
 281     the cache system.  We recommend a high (or no) timeout.
 282
 283     This bytecode cache does not support clearing of used items in the cache.
 284     The clear method is a no-operation function.
 285     """
 286
 287     def __init__(self, client, prefix='jinja2/bytecode/', timeout=None):
 288         self.client = client
 289         self.prefix = prefix
 290         self.timeout = timeout
 291
 292     def load_bytecode(self, bucket):
 293         code = self.client.get(self.prefix + bucket.key)
 294         if code is not None:
 295             bucket.bytecode_from_string(code)
 296
 297     def dump_bytecode(self, bucket):
 298         args = (self.prefix + bucket.key, bucket.bytecode_to_string())
 299         if self.timeout is not None:
 300             args += (self.timeout,)
 301         self.client.set(*args)