summaryrefslogtreecommitdiffstats
path: root/lib/jinja2/_markupsafe
diff options
context:
space:
mode:
Diffstat (limited to 'lib/jinja2/_markupsafe')
-rw-r--r--lib/jinja2/_markupsafe/__init__.py225
-rw-r--r--lib/jinja2/_markupsafe/_bundle.py49
-rw-r--r--lib/jinja2/_markupsafe/_constants.py267
-rw-r--r--lib/jinja2/_markupsafe/_native.py45
-rw-r--r--lib/jinja2/_markupsafe/tests.py80
5 files changed, 666 insertions, 0 deletions
diff --git a/lib/jinja2/_markupsafe/__init__.py b/lib/jinja2/_markupsafe/__init__.py
new file mode 100644
index 000000000..ec7bd572d
--- /dev/null
+++ b/lib/jinja2/_markupsafe/__init__.py
@@ -0,0 +1,225 @@
+# -*- coding: utf-8 -*-
+"""
+ markupsafe
+ ~~~~~~~~~~
+
+ Implements a Markup string.
+
+ :copyright: (c) 2010 by Armin Ronacher.
+ :license: BSD, see LICENSE for more details.
+"""
+import re
+from itertools import imap
+
+
+__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
+
+
+_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
+_entity_re = re.compile(r'&([^;]+);')
+
+
+class Markup(unicode):
+ r"""Marks a string as being safe for inclusion in HTML/XML output without
+ needing to be escaped. This implements the `__html__` interface a couple
+ of frameworks and web applications use. :class:`Markup` is a direct
+ subclass of `unicode` and provides all the methods of `unicode` just that
+ it escapes arguments passed and always returns `Markup`.
+
+ The `escape` function returns markup objects so that double escaping can't
+ happen.
+
+ The constructor of the :class:`Markup` class can be used for three
+ different things: When passed an unicode object it's assumed to be safe,
+ when passed an object with an HTML representation (has an `__html__`
+ method) that representation is used, otherwise the object passed is
+ converted into a unicode string and then assumed to be safe:
+
+ >>> Markup("Hello <em>World</em>!")
+ Markup(u'Hello <em>World</em>!')
+ >>> class Foo(object):
+ ... def __html__(self):
+ ... return '<a href="#">foo</a>'
+ ...
+ >>> Markup(Foo())
+ Markup(u'<a href="#">foo</a>')
+
+ If you want object passed being always treated as unsafe you can use the
+ :meth:`escape` classmethod to create a :class:`Markup` object:
+
+ >>> Markup.escape("Hello <em>World</em>!")
+ Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
+
+ Operations on a markup string are markup aware which means that all
+ arguments are passed through the :func:`escape` function:
+
+ >>> em = Markup("<em>%s</em>")
+ >>> em % "foo & bar"
+ Markup(u'<em>foo &amp; bar</em>')
+ >>> strong = Markup("<strong>%(text)s</strong>")
+ >>> strong % {'text': '<blink>hacker here</blink>'}
+ Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
+ >>> Markup("<em>Hello</em> ") + "<foo>"
+ Markup(u'<em>Hello</em> &lt;foo&gt;')
+ """
+ __slots__ = ()
+
+ def __new__(cls, base=u'', encoding=None, errors='strict'):
+ if hasattr(base, '__html__'):
+ base = base.__html__()
+ if encoding is None:
+ return unicode.__new__(cls, base)
+ return unicode.__new__(cls, base, encoding, errors)
+
+ def __html__(self):
+ return self
+
+ def __add__(self, other):
+ if hasattr(other, '__html__') or isinstance(other, basestring):
+ return self.__class__(unicode(self) + unicode(escape(other)))
+ return NotImplemented
+
+ def __radd__(self, other):
+ if hasattr(other, '__html__') or isinstance(other, basestring):
+ return self.__class__(unicode(escape(other)) + unicode(self))
+ return NotImplemented
+
+ def __mul__(self, num):
+ if isinstance(num, (int, long)):
+ return self.__class__(unicode.__mul__(self, num))
+ return NotImplemented
+ __rmul__ = __mul__
+
+ def __mod__(self, arg):
+ if isinstance(arg, tuple):
+ arg = tuple(imap(_MarkupEscapeHelper, arg))
+ else:
+ arg = _MarkupEscapeHelper(arg)
+ return self.__class__(unicode.__mod__(self, arg))
+
+ def __repr__(self):
+ return '%s(%s)' % (
+ self.__class__.__name__,
+ unicode.__repr__(self)
+ )
+
+ def join(self, seq):
+ return self.__class__(unicode.join(self, imap(escape, seq)))
+ join.__doc__ = unicode.join.__doc__
+
+ def split(self, *args, **kwargs):
+ return map(self.__class__, unicode.split(self, *args, **kwargs))
+ split.__doc__ = unicode.split.__doc__
+
+ def rsplit(self, *args, **kwargs):
+ return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
+ rsplit.__doc__ = unicode.rsplit.__doc__
+
+ def splitlines(self, *args, **kwargs):
+ return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
+ splitlines.__doc__ = unicode.splitlines.__doc__
+
+ def unescape(self):
+ r"""Unescape markup again into an unicode string. This also resolves
+ known HTML4 and XHTML entities:
+
+ >>> Markup("Main &raquo; <em>About</em>").unescape()
+ u'Main \xbb <em>About</em>'
+ """
+ from jinja2._markupsafe._constants import HTML_ENTITIES
+ def handle_match(m):
+ name = m.group(1)
+ if name in HTML_ENTITIES:
+ return unichr(HTML_ENTITIES[name])
+ try:
+ if name[:2] in ('#x', '#X'):
+ return unichr(int(name[2:], 16))
+ elif name.startswith('#'):
+ return unichr(int(name[1:]))
+ except ValueError:
+ pass
+ return u''
+ return _entity_re.sub(handle_match, unicode(self))
+
+ def striptags(self):
+ r"""Unescape markup into an unicode string and strip all tags. This
+ also resolves known HTML4 and XHTML entities. Whitespace is
+ normalized to one:
+
+ >>> Markup("Main &raquo; <em>About</em>").striptags()
+ u'Main \xbb About'
+ """
+ stripped = u' '.join(_striptags_re.sub('', self).split())
+ return Markup(stripped).unescape()
+
+ @classmethod
+ def escape(cls, s):
+ """Escape the string. Works like :func:`escape` with the difference
+ that for subclasses of :class:`Markup` this function would return the
+ correct subclass.
+ """
+ rv = escape(s)
+ if rv.__class__ is not cls:
+ return cls(rv)
+ return rv
+
+ def make_wrapper(name):
+ orig = getattr(unicode, name)
+ def func(self, *args, **kwargs):
+ args = _escape_argspec(list(args), enumerate(args))
+ _escape_argspec(kwargs, kwargs.iteritems())
+ return self.__class__(orig(self, *args, **kwargs))
+ func.__name__ = orig.__name__
+ func.__doc__ = orig.__doc__
+ return func
+
+ for method in '__getitem__', 'capitalize', \
+ 'title', 'lower', 'upper', 'replace', 'ljust', \
+ 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
+ 'translate', 'expandtabs', 'swapcase', 'zfill':
+ locals()[method] = make_wrapper(method)
+
+ # new in python 2.5
+ if hasattr(unicode, 'partition'):
+ partition = make_wrapper('partition'),
+ rpartition = make_wrapper('rpartition')
+
+ # new in python 2.6
+ if hasattr(unicode, 'format'):
+ format = make_wrapper('format')
+
+ # not in python 3
+ if hasattr(unicode, '__getslice__'):
+ __getslice__ = make_wrapper('__getslice__')
+
+ del method, make_wrapper
+
+
+def _escape_argspec(obj, iterable):
+ """Helper for various string-wrapped functions."""
+ for key, value in iterable:
+ if hasattr(value, '__html__') or isinstance(value, basestring):
+ obj[key] = escape(value)
+ return obj
+
+
+class _MarkupEscapeHelper(object):
+ """Helper for Markup.__mod__"""
+
+ def __init__(self, obj):
+ self.obj = obj
+
+ __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
+ __str__ = lambda s: str(escape(s.obj))
+ __unicode__ = lambda s: unicode(escape(s.obj))
+ __repr__ = lambda s: str(escape(repr(s.obj)))
+ __int__ = lambda s: int(s.obj)
+ __float__ = lambda s: float(s.obj)
+
+
+# we have to import it down here as the speedups and native
+# modules imports the markup type which is define above.
+try:
+ from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode
+except ImportError:
+ from jinja2._markupsafe._native import escape, escape_silent, soft_unicode
diff --git a/lib/jinja2/_markupsafe/_bundle.py b/lib/jinja2/_markupsafe/_bundle.py
new file mode 100644
index 000000000..e694faf23
--- /dev/null
+++ b/lib/jinja2/_markupsafe/_bundle.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+"""
+ jinja2._markupsafe._bundle
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ This script pulls in markupsafe from a source folder and
+ bundles it with Jinja2. It does not pull in the speedups
+ module though.
+
+ :copyright: Copyright 2010 by the Jinja team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+import sys
+import os
+import re
+
+
+def rewrite_imports(lines):
+ for idx, line in enumerate(lines):
+ new_line = re.sub(r'(import|from)\s+markupsafe\b',
+ r'\1 jinja2._markupsafe', line)
+ if new_line != line:
+ lines[idx] = new_line
+
+
+def main():
+ if len(sys.argv) != 2:
+ print 'error: only argument is path to markupsafe'
+ sys.exit(1)
+ basedir = os.path.dirname(__file__)
+ markupdir = sys.argv[1]
+ for filename in os.listdir(markupdir):
+ if filename.endswith('.py'):
+ f = open(os.path.join(markupdir, filename))
+ try:
+ lines = list(f)
+ finally:
+ f.close()
+ rewrite_imports(lines)
+ f = open(os.path.join(basedir, filename), 'w')
+ try:
+ for line in lines:
+ f.write(line)
+ finally:
+ f.close()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/lib/jinja2/_markupsafe/_constants.py b/lib/jinja2/_markupsafe/_constants.py
new file mode 100644
index 000000000..919bf03c5
--- /dev/null
+++ b/lib/jinja2/_markupsafe/_constants.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+"""
+ markupsafe._constants
+ ~~~~~~~~~~~~~~~~~~~~~
+
+ Highlevel implementation of the Markup string.
+
+ :copyright: (c) 2010 by Armin Ronacher.
+ :license: BSD, see LICENSE for more details.
+"""
+
+
+HTML_ENTITIES = {
+ 'AElig': 198,
+ 'Aacute': 193,
+ 'Acirc': 194,
+ 'Agrave': 192,
+ 'Alpha': 913,
+ 'Aring': 197,
+ 'Atilde': 195,
+ 'Auml': 196,
+ 'Beta': 914,
+ 'Ccedil': 199,
+ 'Chi': 935,
+ 'Dagger': 8225,
+ 'Delta': 916,
+ 'ETH': 208,
+ 'Eacute': 201,
+ 'Ecirc': 202,
+ 'Egrave': 200,
+ 'Epsilon': 917,
+ 'Eta': 919,
+ 'Euml': 203,
+ 'Gamma': 915,
+ 'Iacute': 205,
+ 'Icirc': 206,
+ 'Igrave': 204,
+ 'Iota': 921,
+ 'Iuml': 207,
+ 'Kappa': 922,
+ 'Lambda': 923,
+ 'Mu': 924,
+ 'Ntilde': 209,
+ 'Nu': 925,
+ 'OElig': 338,
+ 'Oacute': 211,
+ 'Ocirc': 212,
+ 'Ograve': 210,
+ 'Omega': 937,
+ 'Omicron': 927,
+ 'Oslash': 216,
+ 'Otilde': 213,
+ 'Ouml': 214,
+ 'Phi': 934,
+ 'Pi': 928,
+ 'Prime': 8243,
+ 'Psi': 936,
+ 'Rho': 929,
+ 'Scaron': 352,
+ 'Sigma': 931,
+ 'THORN': 222,
+ 'Tau': 932,
+ 'Theta': 920,
+ 'Uacute': 218,
+ 'Ucirc': 219,
+ 'Ugrave': 217,
+ 'Upsilon': 933,
+ 'Uuml': 220,
+ 'Xi': 926,
+ 'Yacute': 221,
+ 'Yuml': 376,
+ 'Zeta': 918,
+ 'aacute': 225,
+ 'acirc': 226,
+ 'acute': 180,
+ 'aelig': 230,
+ 'agrave': 224,
+ 'alefsym': 8501,
+ 'alpha': 945,
+ 'amp': 38,
+ 'and': 8743,
+ 'ang': 8736,
+ 'apos': 39,
+ 'aring': 229,
+ 'asymp': 8776,
+ 'atilde': 227,
+ 'auml': 228,
+ 'bdquo': 8222,
+ 'beta': 946,
+ 'brvbar': 166,
+ 'bull': 8226,
+ 'cap': 8745,
+ 'ccedil': 231,
+ 'cedil': 184,
+ 'cent': 162,
+ 'chi': 967,
+ 'circ': 710,
+ 'clubs': 9827,
+ 'cong': 8773,
+ 'copy': 169,
+ 'crarr': 8629,
+ 'cup': 8746,
+ 'curren': 164,
+ 'dArr': 8659,
+ 'dagger': 8224,
+ 'darr': 8595,
+ 'deg': 176,
+ 'delta': 948,
+ 'diams': 9830,
+ 'divide': 247,
+ 'eacute': 233,
+ 'ecirc': 234,
+ 'egrave': 232,
+ 'empty': 8709,
+ 'emsp': 8195,
+ 'ensp': 8194,
+ 'epsilon': 949,
+ 'equiv': 8801,
+ 'eta': 951,
+ 'eth': 240,
+ 'euml': 235,
+ 'euro': 8364,
+ 'exist': 8707,
+ 'fnof': 402,
+ 'forall': 8704,
+ 'frac12': 189,
+ 'frac14': 188,
+ 'frac34': 190,
+ 'frasl': 8260,
+ 'gamma': 947,
+ 'ge': 8805,
+ 'gt': 62,
+ 'hArr': 8660,
+ 'harr': 8596,
+ 'hearts': 9829,
+ 'hellip': 8230,
+ 'iacute': 237,
+ 'icirc': 238,
+ 'iexcl': 161,
+ 'igrave': 236,
+ 'image': 8465,
+ 'infin': 8734,
+ 'int': 8747,
+ 'iota': 953,
+ 'iquest': 191,
+ 'isin': 8712,
+ 'iuml': 239,
+ 'kappa': 954,
+ 'lArr': 8656,
+ 'lambda': 955,
+ 'lang': 9001,
+ 'laquo': 171,
+ 'larr': 8592,
+ 'lceil': 8968,
+ 'ldquo': 8220,
+ 'le': 8804,
+ 'lfloor': 8970,
+ 'lowast': 8727,
+ 'loz': 9674,
+ 'lrm': 8206,
+ 'lsaquo': 8249,
+ 'lsquo': 8216,
+ 'lt': 60,
+ 'macr': 175,
+ 'mdash': 8212,
+ 'micro': 181,
+ 'middot': 183,
+ 'minus': 8722,
+ 'mu': 956,
+ 'nabla': 8711,
+ 'nbsp': 160,
+ 'ndash': 8211,
+ 'ne': 8800,
+ 'ni': 8715,
+ 'not': 172,
+ 'notin': 8713,
+ 'nsub': 8836,
+ 'ntilde': 241,
+ 'nu': 957,
+ 'oacute': 243,
+ 'ocirc': 244,
+ 'oelig': 339,
+ 'ograve': 242,
+ 'oline': 8254,
+ 'omega': 969,
+ 'omicron': 959,
+ 'oplus': 8853,
+ 'or': 8744,
+ 'ordf': 170,
+ 'ordm': 186,
+ 'oslash': 248,
+ 'otilde': 245,
+ 'otimes': 8855,
+ 'ouml': 246,
+ 'para': 182,
+ 'part': 8706,
+ 'permil': 8240,
+ 'perp': 8869,
+ 'phi': 966,
+ 'pi': 960,
+ 'piv': 982,
+ 'plusmn': 177,
+ 'pound': 163,
+ 'prime': 8242,
+ 'prod': 8719,
+ 'prop': 8733,
+ 'psi': 968,
+ 'quot': 34,
+ 'rArr': 8658,
+ 'radic': 8730,
+ 'rang': 9002,
+ 'raquo': 187,
+ 'rarr': 8594,
+ 'rceil': 8969,
+ 'rdquo': 8221,
+ 'real': 8476,
+ 'reg': 174,
+ 'rfloor': 8971,
+ 'rho': 961,
+ 'rlm': 8207,
+ 'rsaquo': 8250,
+ 'rsquo': 8217,
+ 'sbquo': 8218,
+ 'scaron': 353,
+ 'sdot': 8901,
+ 'sect': 167,
+ 'shy': 173,
+ 'sigma': 963,
+ 'sigmaf': 962,
+ 'sim': 8764,
+ 'spades': 9824,
+ 'sub': 8834,
+ 'sube': 8838,
+ 'sum': 8721,
+ 'sup': 8835,
+ 'sup1': 185,
+ 'sup2': 178,
+ 'sup3': 179,
+ 'supe': 8839,
+ 'szlig': 223,
+ 'tau': 964,
+ 'there4': 8756,
+ 'theta': 952,
+ 'thetasym': 977,
+ 'thinsp': 8201,
+ 'thorn': 254,
+ 'tilde': 732,
+ 'times': 215,
+ 'trade': 8482,
+ 'uArr': 8657,
+ 'uacute': 250,
+ 'uarr': 8593,
+ 'ucirc': 251,
+ 'ugrave': 249,
+ 'uml': 168,
+ 'upsih': 978,
+ 'upsilon': 965,
+ 'uuml': 252,
+ 'weierp': 8472,
+ 'xi': 958,
+ 'yacute': 253,
+ 'yen': 165,
+ 'yuml': 255,
+ 'zeta': 950,
+ 'zwj': 8205,
+ 'zwnj': 8204
+}
diff --git a/lib/jinja2/_markupsafe/_native.py b/lib/jinja2/_markupsafe/_native.py
new file mode 100644
index 000000000..7b95828ec
--- /dev/null
+++ b/lib/jinja2/_markupsafe/_native.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+"""
+ markupsafe._native
+ ~~~~~~~~~~~~~~~~~~
+
+ Native Python implementation the C module is not compiled.
+
+ :copyright: (c) 2010 by Armin Ronacher.
+ :license: BSD, see LICENSE for more details.
+"""
+from jinja2._markupsafe import Markup
+
+
+def escape(s):
+ """Convert the characters &, <, >, ' and " in string s to HTML-safe
+ sequences. Use this if you need to display text that might contain
+ such characters in HTML. Marks return value as markup string.
+ """
+ if hasattr(s, '__html__'):
+ return s.__html__()
+ return Markup(unicode(s)
+ .replace('&', '&amp;')
+ .replace('>', '&gt;')
+ .replace('<', '&lt;')
+ .replace("'", '&#39;')
+ .replace('"', '&#34;')
+ )
+
+
+def escape_silent(s):
+ """Like :func:`escape` but converts `None` into an empty
+ markup string.
+ """
+ if s is None:
+ return Markup()
+ return escape(s)
+
+
+def soft_unicode(s):
+ """Make a string unicode if it isn't already. That way a markup
+ string is not converted back to unicode.
+ """
+ if not isinstance(s, unicode):
+ s = unicode(s)
+ return s
diff --git a/lib/jinja2/_markupsafe/tests.py b/lib/jinja2/_markupsafe/tests.py
new file mode 100644
index 000000000..c1ce3943a
--- /dev/null
+++ b/lib/jinja2/_markupsafe/tests.py
@@ -0,0 +1,80 @@
+import gc
+import unittest
+from jinja2._markupsafe import Markup, escape, escape_silent
+
+
+class MarkupTestCase(unittest.TestCase):
+
+ def test_markup_operations(self):
+ # adding two strings should escape the unsafe one
+ unsafe = '<script type="application/x-some-script">alert("foo");</script>'
+ safe = Markup('<em>username</em>')
+ assert unsafe + safe == unicode(escape(unsafe)) + unicode(safe)
+
+ # string interpolations are safe to use too
+ assert Markup('<em>%s</em>') % '<bad user>' == \
+ '<em>&lt;bad user&gt;</em>'
+ assert Markup('<em>%(username)s</em>') % {
+ 'username': '<bad user>'
+ } == '<em>&lt;bad user&gt;</em>'
+
+ # an escaped object is markup too
+ assert type(Markup('foo') + 'bar') is Markup
+
+ # and it implements __html__ by returning itself
+ x = Markup("foo")
+ assert x.__html__() is x
+
+ # it also knows how to treat __html__ objects
+ class Foo(object):
+ def __html__(self):
+ return '<em>awesome</em>'
+ def __unicode__(self):
+ return 'awesome'
+ assert Markup(Foo()) == '<em>awesome</em>'
+ assert Markup('<strong>%s</strong>') % Foo() == \
+ '<strong><em>awesome</em></strong>'
+
+ # escaping and unescaping
+ assert escape('"<>&\'') == '&#34;&lt;&gt;&amp;&#39;'
+ assert Markup("<em>Foo &amp; Bar</em>").striptags() == "Foo & Bar"
+ assert Markup("&lt;test&gt;").unescape() == "<test>"
+
+ def test_all_set(self):
+ import jinja2._markupsafe as markup
+ for item in markup.__all__:
+ getattr(markup, item)
+
+ def test_escape_silent(self):
+ assert escape_silent(None) == Markup()
+ assert escape(None) == Markup(None)
+ assert escape_silent('<foo>') == Markup(u'&lt;foo&gt;')
+
+
+class MarkupLeakTestCase(unittest.TestCase):
+
+ def test_markup_leaks(self):
+ counts = set()
+ for count in xrange(20):
+ for item in xrange(1000):
+ escape("foo")
+ escape("<foo>")
+ escape(u"foo")
+ escape(u"<foo>")
+ counts.add(len(gc.get_objects()))
+ assert len(counts) == 1, 'ouch, c extension seems to leak objects'
+
+
+def suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(MarkupTestCase))
+
+ # this test only tests the c extension
+ if not hasattr(escape, 'func_code'):
+ suite.addTest(unittest.makeSuite(MarkupLeakTestCase))
+
+ return suite
+
+
+if __name__ == '__main__':
+ unittest.main(defaultTest='suite')