MarkupSafe is now used for the Jinja2 speedups

author Armin Ronacher <armin.ronacher@active-4.com>

Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)

committer Armin Ronacher <armin.ronacher@active-4.com>

Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)
author Armin Ronacher <armin.ronacher@active-4.com>
Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)
committer Armin Ronacher <armin.ronacher@active-4.com>
Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)
diff --git a/jinja2/_markupsafe/_bundle.py b/jinja2/_markupsafe/_bundle.py

new file mode 100644 (file)

index 0000000..e694faf
--- /dev/null
+++ b/jinja2/_markupsafe/_bundle.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+"""
+    jinja2._markupsafe._bundle
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    This script pulls in markupsafe from a source folder and
+    bundles it with Jinja2.  It does not pull in the speedups
+    module though.
+
+    :copyright: Copyright 2010 by the Jinja team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+import sys
+import os
+import re
+
+
+def rewrite_imports(lines):
+    for idx, line in enumerate(lines):
+        new_line = re.sub(r'(import|from)\s+markupsafe\b',
+                          r'\1 jinja2._markupsafe', line)
+        if new_line != line:
+            lines[idx] = new_line
+
+
+def main():
+    if len(sys.argv) != 2:
+        print 'error: only argument is path to markupsafe'
+        sys.exit(1)
+    basedir = os.path.dirname(__file__)
+    markupdir = sys.argv[1]
+    for filename in os.listdir(markupdir):
+        if filename.endswith('.py'):
+            f = open(os.path.join(markupdir, filename))
+            try:
+                lines = list(f)
+            finally:
+                f.close()
+            rewrite_imports(lines)
+            f = open(os.path.join(basedir, filename), 'w')
+            try:
+                for line in lines:
+                    f.write(line)
+            finally:
+                f.close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/jinja2/constants.py b/jinja2/constants.py

index d83e44bbf9ea2746e4985216a4a541c1d4d905dc..cab203cc772f7175193a3781dff8b875ac9f1a9e 100644 (file)
--- a/jinja2/constants.py
+++ b/jinja2/constants.py
@@ -30,261 +30,3 @@ sociis sociosqu sodales sollicitudin suscipit suspendisse taciti tellus tempor
  tempus tincidunt torquent tortor tristique turpis ullamcorper ultrices
  ultricies urna ut varius vehicula vel velit venenatis vestibulum vitae vivamus
  viverra volutpat vulputate'''
-
-
-#: a dict of all html entities + apos
-HTML_ENTITIES = {
-    'AElig': 198,
-    'Aacute': 193,
-    'Acirc': 194,
-    'Agrave': 192,
-    'Alpha': 913,
-    'Aring': 197,
-    'Atilde': 195,
-    'Auml': 196,
-    'Beta': 914,
-    'Ccedil': 199,
-    'Chi': 935,
-    'Dagger': 8225,
-    'Delta': 916,
-    'ETH': 208,
-    'Eacute': 201,
-    'Ecirc': 202,
-    'Egrave': 200,
-    'Epsilon': 917,
-    'Eta': 919,
-    'Euml': 203,
-    'Gamma': 915,
-    'Iacute': 205,
-    'Icirc': 206,
-    'Igrave': 204,
-    'Iota': 921,
-    'Iuml': 207,
-    'Kappa': 922,
-    'Lambda': 923,
-    'Mu': 924,
-    'Ntilde': 209,
-    'Nu': 925,
-    'OElig': 338,
-    'Oacute': 211,
-    'Ocirc': 212,
-    'Ograve': 210,
-    'Omega': 937,
-    'Omicron': 927,
-    'Oslash': 216,
-    'Otilde': 213,
-    'Ouml': 214,
-    'Phi': 934,
-    'Pi': 928,
-    'Prime': 8243,
-    'Psi': 936,
-    'Rho': 929,
-    'Scaron': 352,
-    'Sigma': 931,
-    'THORN': 222,
-    'Tau': 932,
-    'Theta': 920,
-    'Uacute': 218,
-    'Ucirc': 219,
-    'Ugrave': 217,
-    'Upsilon': 933,
-    'Uuml': 220,
-    'Xi': 926,
-    'Yacute': 221,
-    'Yuml': 376,
-    'Zeta': 918,
-    'aacute': 225,
-    'acirc': 226,
-    'acute': 180,
-    'aelig': 230,
-    'agrave': 224,
-    'alefsym': 8501,
-    'alpha': 945,
-    'amp': 38,
-    'and': 8743,
-    'ang': 8736,
-    'apos': 39,
-    'aring': 229,
-    'asymp': 8776,
-    'atilde': 227,
-    'auml': 228,
-    'bdquo': 8222,
-    'beta': 946,
-    'brvbar': 166,
-    'bull': 8226,
-    'cap': 8745,
-    'ccedil': 231,
-    'cedil': 184,
-    'cent': 162,
-    'chi': 967,
-    'circ': 710,
-    'clubs': 9827,
-    'cong': 8773,
-    'copy': 169,
-    'crarr': 8629,
-    'cup': 8746,
-    'curren': 164,
-    'dArr': 8659,
-    'dagger': 8224,
-    'darr': 8595,
-    'deg': 176,
-    'delta': 948,
-    'diams': 9830,
-    'divide': 247,
-    'eacute': 233,
-    'ecirc': 234,
-    'egrave': 232,
-    'empty': 8709,
-    'emsp': 8195,
-    'ensp': 8194,
-    'epsilon': 949,
-    'equiv': 8801,
-    'eta': 951,
-    'eth': 240,
-    'euml': 235,
-    'euro': 8364,
-    'exist': 8707,
-    'fnof': 402,
-    'forall': 8704,
-    'frac12': 189,
-    'frac14': 188,
-    'frac34': 190,
-    'frasl': 8260,
-    'gamma': 947,
-    'ge': 8805,
-    'gt': 62,
-    'hArr': 8660,
-    'harr': 8596,
-    'hearts': 9829,
-    'hellip': 8230,
-    'iacute': 237,
-    'icirc': 238,
-    'iexcl': 161,
-    'igrave': 236,
-    'image': 8465,
-    'infin': 8734,
-    'int': 8747,
-    'iota': 953,
-    'iquest': 191,
-    'isin': 8712,
-    'iuml': 239,
-    'kappa': 954,
-    'lArr': 8656,
-    'lambda': 955,
-    'lang': 9001,
-    'laquo': 171,
-    'larr': 8592,
-    'lceil': 8968,
-    'ldquo': 8220,
-    'le': 8804,
-    'lfloor': 8970,
-    'lowast': 8727,
-    'loz': 9674,
-    'lrm': 8206,
-    'lsaquo': 8249,
-    'lsquo': 8216,
-    'lt': 60,
-    'macr': 175,
-    'mdash': 8212,
-    'micro': 181,
-    'middot': 183,
-    'minus': 8722,
-    'mu': 956,
-    'nabla': 8711,
-    'nbsp': 160,
-    'ndash': 8211,
-    'ne': 8800,
-    'ni': 8715,
-    'not': 172,
-    'notin': 8713,
-    'nsub': 8836,
-    'ntilde': 241,
-    'nu': 957,
-    'oacute': 243,
-    'ocirc': 244,
-    'oelig': 339,
-    'ograve': 242,
-    'oline': 8254,
-    'omega': 969,
-    'omicron': 959,
-    'oplus': 8853,
-    'or': 8744,
-    'ordf': 170,
-    'ordm': 186,
-    'oslash': 248,
-    'otilde': 245,
-    'otimes': 8855,
-    'ouml': 246,
-    'para': 182,
-    'part': 8706,
-    'permil': 8240,
-    'perp': 8869,
-    'phi': 966,
-    'pi': 960,
-    'piv': 982,
-    'plusmn': 177,
-    'pound': 163,
-    'prime': 8242,
-    'prod': 8719,
-    'prop': 8733,
-    'psi': 968,
-    'quot': 34,
-    'rArr': 8658,
-    'radic': 8730,
-    'rang': 9002,
-    'raquo': 187,
-    'rarr': 8594,
-    'rceil': 8969,
-    'rdquo': 8221,
-    'real': 8476,
-    'reg': 174,
-    'rfloor': 8971,
-    'rho': 961,
-    'rlm': 8207,
-    'rsaquo': 8250,
-    'rsquo': 8217,
-    'sbquo': 8218,
-    'scaron': 353,
-    'sdot': 8901,
-    'sect': 167,
-    'shy': 173,
-    'sigma': 963,
-    'sigmaf': 962,
-    'sim': 8764,
-    'spades': 9824,
-    'sub': 8834,
-    'sube': 8838,
-    'sum': 8721,
-    'sup': 8835,
-    'sup1': 185,
-    'sup2': 178,
-    'sup3': 179,
-    'supe': 8839,
-    'szlig': 223,
-    'tau': 964,
-    'there4': 8756,
-    'theta': 952,
-    'thetasym': 977,
-    'thinsp': 8201,
-    'thorn': 254,
-    'tilde': 732,
-    'times': 215,
-    'trade': 8482,
-    'uArr': 8657,
-    'uacute': 250,
-    'uarr': 8593,
-    'ucirc': 251,
-    'ugrave': 249,
-    'uml': 168,
-    'upsih': 978,
-    'upsilon': 965,
-    'uuml': 252,
-    'weierp': 8472,
-    'xi': 958,
-    'yacute': 253,
-    'yen': 165,
-    'yuml': 255,
-    'zeta': 950,
-    'zwj': 8205,
-    'zwnj': 8204
-}
diff --git a/jinja2/utils.py b/jinja2/utils.py

index 0db4653e9f8208136b819ef14548b97b7557c42d..7b77b8eb7d139a28c6d08891d6acb845c319180e 100644 (file)
--- a/jinja2/utils.py
+++ b/jinja2/utils.py
@@ -349,205 +349,6 @@ def generate_lorem_ipsum(n=5, html=True, min=20, max=100):
      return Markup(u'\n'.join(u'<p>%s</p>' % escape(x) for x in result))
  
  
-class Markup(unicode):
-    r"""Marks a string as being safe for inclusion in HTML/XML output without
-    needing to be escaped.  This implements the `__html__` interface a couple
-    of frameworks and web applications use.  :class:`Markup` is a direct
-    subclass of `unicode` and provides all the methods of `unicode` just that
-    it escapes arguments passed and always returns `Markup`.
-
-    The `escape` function returns markup objects so that double escaping can't
-    happen.  If you want to use autoescaping in Jinja just enable the
-    autoescaping feature in the environment.
-
-    The constructor of the :class:`Markup` class can be used for three
-    different things:  When passed an unicode object it's assumed to be safe,
-    when passed an object with an HTML representation (has an `__html__`
-    method) that representation is used, otherwise the object passed is
-    converted into a unicode string and then assumed to be safe:
-
-    >>> Markup("Hello <em>World</em>!")
-    Markup(u'Hello <em>World</em>!')
-    >>> class Foo(object):
-    ...  def __html__(self):
-    ...   return '<a href="#">foo</a>'
-    ... 
-    >>> Markup(Foo())
-    Markup(u'<a href="#">foo</a>')
-
-    If you want object passed being always treated as unsafe you can use the
-    :meth:`escape` classmethod to create a :class:`Markup` object:
-
-    >>> Markup.escape("Hello <em>World</em>!")
-    Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
-
-    Operations on a markup string are markup aware which means that all
-    arguments are passed through the :func:`escape` function:
-
-    >>> em = Markup("<em>%s</em>")
-    >>> em % "foo & bar"
-    Markup(u'<em>foo &amp; bar</em>')
-    >>> strong = Markup("<strong>%(text)s</strong>")
-    >>> strong % {'text': '<blink>hacker here</blink>'}
-    Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
-    >>> Markup("<em>Hello</em> ") + "<foo>"
-    Markup(u'<em>Hello</em> &lt;foo&gt;')
-    """
-    __slots__ = ()
-
-    def __new__(cls, base=u'', encoding=None, errors='strict'):
-        if hasattr(base, '__html__'):
-            base = base.__html__()
-        if encoding is None:
-            return unicode.__new__(cls, base)
-        return unicode.__new__(cls, base, encoding, errors)
-
-    def __html__(self):
-        return self
-
-    def __add__(self, other):
-        if hasattr(other, '__html__') or isinstance(other, basestring):
-            return self.__class__(unicode(self) + unicode(escape(other)))
-        return NotImplemented
-
-    def __radd__(self, other):
-        if hasattr(other, '__html__') or isinstance(other, basestring):
-            return self.__class__(unicode(escape(other)) + unicode(self))
-        return NotImplemented
-
-    def __mul__(self, num):
-        if isinstance(num, (int, long)):
-            return self.__class__(unicode.__mul__(self, num))
-        return NotImplemented
-    __rmul__ = __mul__
-
-    def __mod__(self, arg):
-        if isinstance(arg, tuple):
-            arg = tuple(imap(_MarkupEscapeHelper, arg))
-        else:
-            arg = _MarkupEscapeHelper(arg)
-        return self.__class__(unicode.__mod__(self, arg))
-
-    def __repr__(self):
-        return '%s(%s)' % (
-            self.__class__.__name__,
-            unicode.__repr__(self)
-        )
-
-    def join(self, seq):
-        return self.__class__(unicode.join(self, imap(escape, seq)))
-    join.__doc__ = unicode.join.__doc__
-
-    def split(self, *args, **kwargs):
-        return map(self.__class__, unicode.split(self, *args, **kwargs))
-    split.__doc__ = unicode.split.__doc__
-
-    def rsplit(self, *args, **kwargs):
-        return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
-    rsplit.__doc__ = unicode.rsplit.__doc__
-
-    def splitlines(self, *args, **kwargs):
-        return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
-    splitlines.__doc__ = unicode.splitlines.__doc__
-
-    def unescape(self):
-        r"""Unescape markup again into an unicode string.  This also resolves
-        known HTML4 and XHTML entities:
-
-        >>> Markup("Main &raquo; <em>About</em>").unescape()
-        u'Main \xbb <em>About</em>'
-        """
-        from jinja2.constants import HTML_ENTITIES
-        def handle_match(m):
-            name = m.group(1)
-            if name in HTML_ENTITIES:
-                return unichr(HTML_ENTITIES[name])
-            try:
-                if name[:2] in ('#x', '#X'):
-                    return unichr(int(name[2:], 16))
-                elif name.startswith('#'):
-                    return unichr(int(name[1:]))
-            except ValueError:
-                pass
-            return u''
-        return _entity_re.sub(handle_match, unicode(self))
-
-    def striptags(self):
-        r"""Unescape markup into an unicode string and strip all tags.  This
-        also resolves known HTML4 and XHTML entities.  Whitespace is
-        normalized to one:
-
-        >>> Markup("Main &raquo;  <em>About</em>").striptags()
-        u'Main \xbb About'
-        """
-        stripped = u' '.join(_striptags_re.sub('', self).split())
-        return Markup(stripped).unescape()
-
-    @classmethod
-    def escape(cls, s):
-        """Escape the string.  Works like :func:`escape` with the difference
-        that for subclasses of :class:`Markup` this function would return the
-        correct subclass.
-        """
-        rv = escape(s)
-        if rv.__class__ is not cls:
-            return cls(rv)
-        return rv
-
-    def make_wrapper(name):
-        orig = getattr(unicode, name)
-        def func(self, *args, **kwargs):
-            args = _escape_argspec(list(args), enumerate(args))
-            _escape_argspec(kwargs, kwargs.iteritems())
-            return self.__class__(orig(self, *args, **kwargs))
-        func.__name__ = orig.__name__
-        func.__doc__ = orig.__doc__
-        return func
-
-    for method in '__getitem__', 'capitalize', \
-                  'title', 'lower', 'upper', 'replace', 'ljust', \
-                  'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
-                  'translate', 'expandtabs', 'swapcase', 'zfill':
-        locals()[method] = make_wrapper(method)
-
-    # new in python 2.5
-    if hasattr(unicode, 'partition'):
-        partition = make_wrapper('partition'),
-        rpartition = make_wrapper('rpartition')
-
-    # new in python 2.6
-    if hasattr(unicode, 'format'):
-        format = make_wrapper('format')
-
-    # not in python 3
-    if hasattr(unicode, '__getslice__'):
-        __getslice__ = make_wrapper('__getslice__')
-
-    del method, make_wrapper
-
-
-def _escape_argspec(obj, iterable):
-    """Helper for various string-wrapped functions."""
-    for key, value in iterable:
-        if hasattr(value, '__html__') or isinstance(value, basestring):
-            obj[key] = escape(value)
-    return obj
-
-
-class _MarkupEscapeHelper(object):
-    """Helper for Markup.__mod__"""
-
-    def __init__(self, obj):
-        self.obj = obj
-
-    __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
-    __str__ = lambda s: str(escape(s.obj))
-    __unicode__ = lambda s: unicode(escape(s.obj))
-    __repr__ = lambda s: str(escape(repr(s.obj)))
-    __int__ = lambda s: int(s.obj)
-    __float__ = lambda s: float(s.obj)
-
-
  class LRUCache(object):
      """A simple LRU Cache implementation."""
  
@@ -776,33 +577,14 @@ class Joiner(object):
          return self.sep
  
  
-# we have to import it down here as the speedups module imports the
-# markup type which is define above.
+# try markupsafe first, if that fails go with Jinja2's bundled version
+# of markupsafe.  Markupsafe was previously Jinja2's implementation of
+# the Markup object but was moved into a separate package in a patchleve
+# release
  try:
-    from jinja2._speedups import escape, soft_unicode
+    from markupsafe import Markup, escape, soft_unicode
  except ImportError:
-    def escape(s):
-        """Convert the characters &, <, >, ' and " in string s to HTML-safe
-        sequences.  Use this if you need to display text that might contain
-        such characters in HTML.  Marks return value as markup string.
-        """
-        if hasattr(s, '__html__'):
-            return s.__html__()
-        return Markup(unicode(s)
-            .replace('&', '&amp;')
-            .replace('>', '&gt;')
-            .replace('<', '&lt;')
-            .replace("'", '&#39;')
-            .replace('"', '&#34;')
-        )
-
-    def soft_unicode(s):
-        """Make a string unicode if it isn't already.  That way a markup
-        string is not converted back to unicode.
-        """
-        if not isinstance(s, unicode):
-            s = unicode(s)
-        return s
+    from jinja2._markupsafe import Markup, escape, soft_unicode
  
  
  # partials
diff --git a/setup.py b/setup.py

index e409e5587dbdad88a13ce8be9ae9fe0d1ba94b2f..cee83cc1817adab14b54f2b79f9c7383da346e56 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@ Jinja2==dev``.
  import os
  import sys
  
-from setuptools import setup, Extension, Feature
+from setuptools import setup
  
  # tell distribute to use 2to3 with our own fixers.
  extra = {}
@@ -77,15 +77,8 @@ setup(
          'Topic :: Software Development :: Libraries :: Python Modules',
          'Topic :: Text Processing :: Markup :: HTML'
      ],
-    packages=['jinja2', 'jinja2.testsuite', 'jinja2.testsuite.res'],
-    features={
-        'speedups': Feature("optional C speed-enhancements",
-            standard=False,
-            ext_modules=[
-                Extension('jinja2._speedups', ['jinja2/_speedups.c'])
-            ]
-        )
-    },
+    packages=['jinja2', 'jinja2.testsuite', 'jinja2.testsuite.res',
+              'jinja2._markupsafe'],
      extras_require={'i18n': ['Babel>=0.8']},
      test_suite='jinja2.testsuite.suite',
      include_package_data=True,
author	Armin Ronacher <armin.ronacher@active-4.com>
	Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)
committer	Armin Ronacher <armin.ronacher@active-4.com>
	Tue, 17 Aug 2010 09:57:07 +0000 (11:57 +0200)
jinja2/_markupsafe/_bundle.py	[new file with mode: 0644]	patch \| blob
jinja2/constants.py		patch \| blob \| history
jinja2/utils.py		patch \| blob \| history
setup.py		patch \| blob \| history