From: Armin Ronacher Date: Tue, 17 Aug 2010 09:57:18 +0000 (+0200) Subject: Pulled in code from markupsafe X-Git-Tag: 2.5.1~3 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=53675ce87c1462d19cd7960cd80602b5457263d9;p=jinja2.git Pulled in code from markupsafe --HG-- branch : trunk --- diff --git a/jinja2/_markupsafe/__init__.py b/jinja2/_markupsafe/__init__.py new file mode 100644 index 0000000..ec7bd57 --- /dev/null +++ b/jinja2/_markupsafe/__init__.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- +""" + markupsafe + ~~~~~~~~~~ + + Implements a Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +import re +from itertools import imap + + +__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] + + +_striptags_re = re.compile(r'(|<[^>]*>)') +_entity_re = re.compile(r'&([^;]+);') + + +class Markup(unicode): + r"""Marks a string as being safe for inclusion in HTML/XML output without + needing to be escaped. This implements the `__html__` interface a couple + of frameworks and web applications use. :class:`Markup` is a direct + subclass of `unicode` and provides all the methods of `unicode` just that + it escapes arguments passed and always returns `Markup`. + + The `escape` function returns markup objects so that double escaping can't + happen. + + The constructor of the :class:`Markup` class can be used for three + different things: When passed an unicode object it's assumed to be safe, + when passed an object with an HTML representation (has an `__html__` + method) that representation is used, otherwise the object passed is + converted into a unicode string and then assumed to be safe: + + >>> Markup("Hello World!") + Markup(u'Hello World!') + >>> class Foo(object): + ... def __html__(self): + ... return 'foo' + ... + >>> Markup(Foo()) + Markup(u'foo') + + If you want object passed being always treated as unsafe you can use the + :meth:`escape` classmethod to create a :class:`Markup` object: + + >>> Markup.escape("Hello World!") + Markup(u'Hello <em>World</em>!') + + Operations on a markup string are markup aware which means that all + arguments are passed through the :func:`escape` function: + + >>> em = Markup("%s") + >>> em % "foo & bar" + Markup(u'foo & bar') + >>> strong = Markup("%(text)s") + >>> strong % {'text': 'hacker here'} + Markup(u'<blink>hacker here</blink>') + >>> Markup("Hello ") + "" + Markup(u'Hello <foo>') + """ + __slots__ = () + + def __new__(cls, base=u'', encoding=None, errors='strict'): + if hasattr(base, '__html__'): + base = base.__html__() + if encoding is None: + return unicode.__new__(cls, base) + return unicode.__new__(cls, base, encoding, errors) + + def __html__(self): + return self + + def __add__(self, other): + if hasattr(other, '__html__') or isinstance(other, basestring): + return self.__class__(unicode(self) + unicode(escape(other))) + return NotImplemented + + def __radd__(self, other): + if hasattr(other, '__html__') or isinstance(other, basestring): + return self.__class__(unicode(escape(other)) + unicode(self)) + return NotImplemented + + def __mul__(self, num): + if isinstance(num, (int, long)): + return self.__class__(unicode.__mul__(self, num)) + return NotImplemented + __rmul__ = __mul__ + + def __mod__(self, arg): + if isinstance(arg, tuple): + arg = tuple(imap(_MarkupEscapeHelper, arg)) + else: + arg = _MarkupEscapeHelper(arg) + return self.__class__(unicode.__mod__(self, arg)) + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, + unicode.__repr__(self) + ) + + def join(self, seq): + return self.__class__(unicode.join(self, imap(escape, seq))) + join.__doc__ = unicode.join.__doc__ + + def split(self, *args, **kwargs): + return map(self.__class__, unicode.split(self, *args, **kwargs)) + split.__doc__ = unicode.split.__doc__ + + def rsplit(self, *args, **kwargs): + return map(self.__class__, unicode.rsplit(self, *args, **kwargs)) + rsplit.__doc__ = unicode.rsplit.__doc__ + + def splitlines(self, *args, **kwargs): + return map(self.__class__, unicode.splitlines(self, *args, **kwargs)) + splitlines.__doc__ = unicode.splitlines.__doc__ + + def unescape(self): + r"""Unescape markup again into an unicode string. This also resolves + known HTML4 and XHTML entities: + + >>> Markup("Main » About").unescape() + u'Main \xbb About' + """ + from jinja2._markupsafe._constants import HTML_ENTITIES + def handle_match(m): + name = m.group(1) + if name in HTML_ENTITIES: + return unichr(HTML_ENTITIES[name]) + try: + if name[:2] in ('#x', '#X'): + return unichr(int(name[2:], 16)) + elif name.startswith('#'): + return unichr(int(name[1:])) + except ValueError: + pass + return u'' + return _entity_re.sub(handle_match, unicode(self)) + + def striptags(self): + r"""Unescape markup into an unicode string and strip all tags. This + also resolves known HTML4 and XHTML entities. Whitespace is + normalized to one: + + >>> Markup("Main » About").striptags() + u'Main \xbb About' + """ + stripped = u' '.join(_striptags_re.sub('', self).split()) + return Markup(stripped).unescape() + + @classmethod + def escape(cls, s): + """Escape the string. Works like :func:`escape` with the difference + that for subclasses of :class:`Markup` this function would return the + correct subclass. + """ + rv = escape(s) + if rv.__class__ is not cls: + return cls(rv) + return rv + + def make_wrapper(name): + orig = getattr(unicode, name) + def func(self, *args, **kwargs): + args = _escape_argspec(list(args), enumerate(args)) + _escape_argspec(kwargs, kwargs.iteritems()) + return self.__class__(orig(self, *args, **kwargs)) + func.__name__ = orig.__name__ + func.__doc__ = orig.__doc__ + return func + + for method in '__getitem__', 'capitalize', \ + 'title', 'lower', 'upper', 'replace', 'ljust', \ + 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ + 'translate', 'expandtabs', 'swapcase', 'zfill': + locals()[method] = make_wrapper(method) + + # new in python 2.5 + if hasattr(unicode, 'partition'): + partition = make_wrapper('partition'), + rpartition = make_wrapper('rpartition') + + # new in python 2.6 + if hasattr(unicode, 'format'): + format = make_wrapper('format') + + # not in python 3 + if hasattr(unicode, '__getslice__'): + __getslice__ = make_wrapper('__getslice__') + + del method, make_wrapper + + +def _escape_argspec(obj, iterable): + """Helper for various string-wrapped functions.""" + for key, value in iterable: + if hasattr(value, '__html__') or isinstance(value, basestring): + obj[key] = escape(value) + return obj + + +class _MarkupEscapeHelper(object): + """Helper for Markup.__mod__""" + + def __init__(self, obj): + self.obj = obj + + __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x]) + __str__ = lambda s: str(escape(s.obj)) + __unicode__ = lambda s: unicode(escape(s.obj)) + __repr__ = lambda s: str(escape(repr(s.obj))) + __int__ = lambda s: int(s.obj) + __float__ = lambda s: float(s.obj) + + +# we have to import it down here as the speedups and native +# modules imports the markup type which is define above. +try: + from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode +except ImportError: + from jinja2._markupsafe._native import escape, escape_silent, soft_unicode diff --git a/jinja2/_markupsafe/_constants.py b/jinja2/_markupsafe/_constants.py new file mode 100644 index 0000000..919bf03 --- /dev/null +++ b/jinja2/_markupsafe/_constants.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._constants + ~~~~~~~~~~~~~~~~~~~~~ + + Highlevel implementation of the Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" + + +HTML_ENTITIES = { + 'AElig': 198, + 'Aacute': 193, + 'Acirc': 194, + 'Agrave': 192, + 'Alpha': 913, + 'Aring': 197, + 'Atilde': 195, + 'Auml': 196, + 'Beta': 914, + 'Ccedil': 199, + 'Chi': 935, + 'Dagger': 8225, + 'Delta': 916, + 'ETH': 208, + 'Eacute': 201, + 'Ecirc': 202, + 'Egrave': 200, + 'Epsilon': 917, + 'Eta': 919, + 'Euml': 203, + 'Gamma': 915, + 'Iacute': 205, + 'Icirc': 206, + 'Igrave': 204, + 'Iota': 921, + 'Iuml': 207, + 'Kappa': 922, + 'Lambda': 923, + 'Mu': 924, + 'Ntilde': 209, + 'Nu': 925, + 'OElig': 338, + 'Oacute': 211, + 'Ocirc': 212, + 'Ograve': 210, + 'Omega': 937, + 'Omicron': 927, + 'Oslash': 216, + 'Otilde': 213, + 'Ouml': 214, + 'Phi': 934, + 'Pi': 928, + 'Prime': 8243, + 'Psi': 936, + 'Rho': 929, + 'Scaron': 352, + 'Sigma': 931, + 'THORN': 222, + 'Tau': 932, + 'Theta': 920, + 'Uacute': 218, + 'Ucirc': 219, + 'Ugrave': 217, + 'Upsilon': 933, + 'Uuml': 220, + 'Xi': 926, + 'Yacute': 221, + 'Yuml': 376, + 'Zeta': 918, + 'aacute': 225, + 'acirc': 226, + 'acute': 180, + 'aelig': 230, + 'agrave': 224, + 'alefsym': 8501, + 'alpha': 945, + 'amp': 38, + 'and': 8743, + 'ang': 8736, + 'apos': 39, + 'aring': 229, + 'asymp': 8776, + 'atilde': 227, + 'auml': 228, + 'bdquo': 8222, + 'beta': 946, + 'brvbar': 166, + 'bull': 8226, + 'cap': 8745, + 'ccedil': 231, + 'cedil': 184, + 'cent': 162, + 'chi': 967, + 'circ': 710, + 'clubs': 9827, + 'cong': 8773, + 'copy': 169, + 'crarr': 8629, + 'cup': 8746, + 'curren': 164, + 'dArr': 8659, + 'dagger': 8224, + 'darr': 8595, + 'deg': 176, + 'delta': 948, + 'diams': 9830, + 'divide': 247, + 'eacute': 233, + 'ecirc': 234, + 'egrave': 232, + 'empty': 8709, + 'emsp': 8195, + 'ensp': 8194, + 'epsilon': 949, + 'equiv': 8801, + 'eta': 951, + 'eth': 240, + 'euml': 235, + 'euro': 8364, + 'exist': 8707, + 'fnof': 402, + 'forall': 8704, + 'frac12': 189, + 'frac14': 188, + 'frac34': 190, + 'frasl': 8260, + 'gamma': 947, + 'ge': 8805, + 'gt': 62, + 'hArr': 8660, + 'harr': 8596, + 'hearts': 9829, + 'hellip': 8230, + 'iacute': 237, + 'icirc': 238, + 'iexcl': 161, + 'igrave': 236, + 'image': 8465, + 'infin': 8734, + 'int': 8747, + 'iota': 953, + 'iquest': 191, + 'isin': 8712, + 'iuml': 239, + 'kappa': 954, + 'lArr': 8656, + 'lambda': 955, + 'lang': 9001, + 'laquo': 171, + 'larr': 8592, + 'lceil': 8968, + 'ldquo': 8220, + 'le': 8804, + 'lfloor': 8970, + 'lowast': 8727, + 'loz': 9674, + 'lrm': 8206, + 'lsaquo': 8249, + 'lsquo': 8216, + 'lt': 60, + 'macr': 175, + 'mdash': 8212, + 'micro': 181, + 'middot': 183, + 'minus': 8722, + 'mu': 956, + 'nabla': 8711, + 'nbsp': 160, + 'ndash': 8211, + 'ne': 8800, + 'ni': 8715, + 'not': 172, + 'notin': 8713, + 'nsub': 8836, + 'ntilde': 241, + 'nu': 957, + 'oacute': 243, + 'ocirc': 244, + 'oelig': 339, + 'ograve': 242, + 'oline': 8254, + 'omega': 969, + 'omicron': 959, + 'oplus': 8853, + 'or': 8744, + 'ordf': 170, + 'ordm': 186, + 'oslash': 248, + 'otilde': 245, + 'otimes': 8855, + 'ouml': 246, + 'para': 182, + 'part': 8706, + 'permil': 8240, + 'perp': 8869, + 'phi': 966, + 'pi': 960, + 'piv': 982, + 'plusmn': 177, + 'pound': 163, + 'prime': 8242, + 'prod': 8719, + 'prop': 8733, + 'psi': 968, + 'quot': 34, + 'rArr': 8658, + 'radic': 8730, + 'rang': 9002, + 'raquo': 187, + 'rarr': 8594, + 'rceil': 8969, + 'rdquo': 8221, + 'real': 8476, + 'reg': 174, + 'rfloor': 8971, + 'rho': 961, + 'rlm': 8207, + 'rsaquo': 8250, + 'rsquo': 8217, + 'sbquo': 8218, + 'scaron': 353, + 'sdot': 8901, + 'sect': 167, + 'shy': 173, + 'sigma': 963, + 'sigmaf': 962, + 'sim': 8764, + 'spades': 9824, + 'sub': 8834, + 'sube': 8838, + 'sum': 8721, + 'sup': 8835, + 'sup1': 185, + 'sup2': 178, + 'sup3': 179, + 'supe': 8839, + 'szlig': 223, + 'tau': 964, + 'there4': 8756, + 'theta': 952, + 'thetasym': 977, + 'thinsp': 8201, + 'thorn': 254, + 'tilde': 732, + 'times': 215, + 'trade': 8482, + 'uArr': 8657, + 'uacute': 250, + 'uarr': 8593, + 'ucirc': 251, + 'ugrave': 249, + 'uml': 168, + 'upsih': 978, + 'upsilon': 965, + 'uuml': 252, + 'weierp': 8472, + 'xi': 958, + 'yacute': 253, + 'yen': 165, + 'yuml': 255, + 'zeta': 950, + 'zwj': 8205, + 'zwnj': 8204 +} diff --git a/jinja2/_markupsafe/_native.py b/jinja2/_markupsafe/_native.py new file mode 100644 index 0000000..7b95828 --- /dev/null +++ b/jinja2/_markupsafe/_native.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._native + ~~~~~~~~~~~~~~~~~~ + + Native Python implementation the C module is not compiled. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +from jinja2._markupsafe import Markup + + +def escape(s): + """Convert the characters &, <, >, ' and " in string s to HTML-safe + sequences. Use this if you need to display text that might contain + such characters in HTML. Marks return value as markup string. + """ + if hasattr(s, '__html__'): + return s.__html__() + return Markup(unicode(s) + .replace('&', '&') + .replace('>', '>') + .replace('<', '<') + .replace("'", ''') + .replace('"', '"') + ) + + +def escape_silent(s): + """Like :func:`escape` but converts `None` into an empty + markup string. + """ + if s is None: + return Markup() + return escape(s) + + +def soft_unicode(s): + """Make a string unicode if it isn't already. That way a markup + string is not converted back to unicode. + """ + if not isinstance(s, unicode): + s = unicode(s) + return s diff --git a/jinja2/_markupsafe/tests.py b/jinja2/_markupsafe/tests.py new file mode 100644 index 0000000..c1ce394 --- /dev/null +++ b/jinja2/_markupsafe/tests.py @@ -0,0 +1,80 @@ +import gc +import unittest +from jinja2._markupsafe import Markup, escape, escape_silent + + +class MarkupTestCase(unittest.TestCase): + + def test_markup_operations(self): + # adding two strings should escape the unsafe one + unsafe = '' + safe = Markup('username') + assert unsafe + safe == unicode(escape(unsafe)) + unicode(safe) + + # string interpolations are safe to use too + assert Markup('%s') % '' == \ + '<bad user>' + assert Markup('%(username)s') % { + 'username': '' + } == '<bad user>' + + # an escaped object is markup too + assert type(Markup('foo') + 'bar') is Markup + + # and it implements __html__ by returning itself + x = Markup("foo") + assert x.__html__() is x + + # it also knows how to treat __html__ objects + class Foo(object): + def __html__(self): + return 'awesome' + def __unicode__(self): + return 'awesome' + assert Markup(Foo()) == 'awesome' + assert Markup('%s') % Foo() == \ + 'awesome' + + # escaping and unescaping + assert escape('"<>&\'') == '"<>&'' + assert Markup("Foo & Bar").striptags() == "Foo & Bar" + assert Markup("<test>").unescape() == "" + + def test_all_set(self): + import jinja2._markupsafe as markup + for item in markup.__all__: + getattr(markup, item) + + def test_escape_silent(self): + assert escape_silent(None) == Markup() + assert escape(None) == Markup(None) + assert escape_silent('') == Markup(u'<foo>') + + +class MarkupLeakTestCase(unittest.TestCase): + + def test_markup_leaks(self): + counts = set() + for count in xrange(20): + for item in xrange(1000): + escape("foo") + escape("") + escape(u"foo") + escape(u"") + counts.add(len(gc.get_objects())) + assert len(counts) == 1, 'ouch, c extension seems to leak objects' + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(MarkupTestCase)) + + # this test only tests the c extension + if not hasattr(escape, 'func_code'): + suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) + + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest='suite')