From 68d662e689cd42687341c550fb6ebb74e6968d21 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Mon, 8 Sep 2014 00:29:57 +0200 Subject: module -> pyload --- pyload/lib/BeautifulSoup.py | 2017 ++++++++++ pyload/lib/Getch.py | 76 + pyload/lib/MultipartPostHandler.py | 139 + pyload/lib/SafeEval.py | 47 + pyload/lib/__init__.py | 0 pyload/lib/beaker/__init__.py | 1 + pyload/lib/beaker/cache.py | 589 +++ pyload/lib/beaker/container.py | 750 ++++ pyload/lib/beaker/converters.py | 29 + pyload/lib/beaker/crypto/__init__.py | 44 + pyload/lib/beaker/crypto/jcecrypto.py | 32 + pyload/lib/beaker/crypto/nsscrypto.py | 45 + pyload/lib/beaker/crypto/pbkdf2.py | 347 ++ pyload/lib/beaker/crypto/pycrypto.py | 34 + pyload/lib/beaker/crypto/util.py | 30 + pyload/lib/beaker/exceptions.py | 29 + pyload/lib/beaker/ext/__init__.py | 0 pyload/lib/beaker/ext/database.py | 174 + pyload/lib/beaker/ext/google.py | 121 + pyload/lib/beaker/ext/memcached.py | 203 + pyload/lib/beaker/ext/sqla.py | 136 + pyload/lib/beaker/middleware.py | 168 + pyload/lib/beaker/session.py | 726 ++++ pyload/lib/beaker/synchronization.py | 386 ++ pyload/lib/beaker/util.py | 462 +++ pyload/lib/bottle.py | 3732 ++++++++++++++++++ pyload/lib/feedparser.py | 4013 ++++++++++++++++++++ pyload/lib/jinja2/__init__.py | 69 + pyload/lib/jinja2/_compat.py | 150 + pyload/lib/jinja2/_stringdefs.py | 132 + pyload/lib/jinja2/bccache.py | 344 ++ pyload/lib/jinja2/compiler.py | 1640 ++++++++ pyload/lib/jinja2/constants.py | 32 + pyload/lib/jinja2/debug.py | 337 ++ pyload/lib/jinja2/defaults.py | 43 + pyload/lib/jinja2/environment.py | 1191 ++++++ pyload/lib/jinja2/exceptions.py | 146 + pyload/lib/jinja2/ext.py | 636 ++++ pyload/lib/jinja2/filters.py | 987 +++++ pyload/lib/jinja2/lexer.py | 733 ++++ pyload/lib/jinja2/loaders.py | 471 +++ pyload/lib/jinja2/meta.py | 103 + pyload/lib/jinja2/nodes.py | 914 +++++ pyload/lib/jinja2/optimizer.py | 68 + pyload/lib/jinja2/parser.py | 895 +++++ pyload/lib/jinja2/runtime.py | 581 +++ pyload/lib/jinja2/sandbox.py | 368 ++ pyload/lib/jinja2/tests.py | 149 + pyload/lib/jinja2/testsuite/__init__.py | 156 + pyload/lib/jinja2/testsuite/api.py | 261 ++ pyload/lib/jinja2/testsuite/bytecode_cache.py | 37 + pyload/lib/jinja2/testsuite/core_tags.py | 305 ++ pyload/lib/jinja2/testsuite/debug.py | 58 + pyload/lib/jinja2/testsuite/doctests.py | 29 + pyload/lib/jinja2/testsuite/ext.py | 459 +++ pyload/lib/jinja2/testsuite/filters.py | 515 +++ pyload/lib/jinja2/testsuite/imports.py | 141 + pyload/lib/jinja2/testsuite/inheritance.py | 250 ++ pyload/lib/jinja2/testsuite/lexnparse.py | 593 +++ pyload/lib/jinja2/testsuite/loader.py | 226 ++ pyload/lib/jinja2/testsuite/regression.py | 279 ++ pyload/lib/jinja2/testsuite/res/__init__.py | 0 .../lib/jinja2/testsuite/res/templates/broken.html | 3 + .../jinja2/testsuite/res/templates/foo/test.html | 1 + .../testsuite/res/templates/syntaxerror.html | 4 + .../lib/jinja2/testsuite/res/templates/test.html | 1 + pyload/lib/jinja2/testsuite/security.py | 166 + pyload/lib/jinja2/testsuite/tests.py | 93 + pyload/lib/jinja2/testsuite/utils.py | 82 + pyload/lib/jinja2/utils.py | 520 +++ pyload/lib/jinja2/visitor.py | 87 + pyload/lib/markupsafe/__init__.py | 298 ++ pyload/lib/markupsafe/_compat.py | 26 + pyload/lib/markupsafe/_constants.py | 267 ++ pyload/lib/markupsafe/_native.py | 46 + pyload/lib/markupsafe/_speedups.c | 239 ++ pyload/lib/markupsafe/tests.py | 179 + pyload/lib/rename_process.py | 14 + pyload/lib/simplejson/__init__.py | 560 +++ pyload/lib/simplejson/compat.py | 46 + pyload/lib/simplejson/decoder.py | 393 ++ pyload/lib/simplejson/encoder.py | 648 ++++ pyload/lib/simplejson/ordered_dict.py | 119 + pyload/lib/simplejson/scanner.py | 133 + pyload/lib/simplejson/tool.py | 42 + pyload/lib/thrift/TSCons.py | 35 + pyload/lib/thrift/TSerialization.py | 38 + pyload/lib/thrift/TTornado.py | 153 + pyload/lib/thrift/Thrift.py | 170 + pyload/lib/thrift/__init__.py | 20 + pyload/lib/thrift/protocol/TBase.py | 81 + pyload/lib/thrift/protocol/TBinaryProtocol.py | 260 ++ pyload/lib/thrift/protocol/TCompactProtocol.py | 403 ++ pyload/lib/thrift/protocol/TJSONProtocol.py | 550 +++ pyload/lib/thrift/protocol/TProtocol.py | 406 ++ pyload/lib/thrift/protocol/__init__.py | 20 + pyload/lib/thrift/protocol/fastbinary.c | 1219 ++++++ pyload/lib/thrift/server/THttpServer.py | 87 + pyload/lib/thrift/server/TNonblockingServer.py | 346 ++ pyload/lib/thrift/server/TProcessPoolServer.py | 118 + pyload/lib/thrift/server/TServer.py | 269 ++ pyload/lib/thrift/server/__init__.py | 20 + pyload/lib/thrift/transport/THttpClient.py | 149 + pyload/lib/thrift/transport/TSSLSocket.py | 214 ++ pyload/lib/thrift/transport/TSocket.py | 176 + pyload/lib/thrift/transport/TTransport.py | 330 ++ pyload/lib/thrift/transport/TTwisted.py | 221 ++ pyload/lib/thrift/transport/TZlibTransport.py | 248 ++ pyload/lib/thrift/transport/__init__.py | 20 + pyload/lib/wsgiserver/LICENSE.txt | 25 + pyload/lib/wsgiserver/__init__.py | 2299 +++++++++++ 111 files changed, 39472 insertions(+) create mode 100644 pyload/lib/BeautifulSoup.py create mode 100644 pyload/lib/Getch.py create mode 100644 pyload/lib/MultipartPostHandler.py create mode 100644 pyload/lib/SafeEval.py create mode 100644 pyload/lib/__init__.py create mode 100644 pyload/lib/beaker/__init__.py create mode 100644 pyload/lib/beaker/cache.py create mode 100644 pyload/lib/beaker/container.py create mode 100644 pyload/lib/beaker/converters.py create mode 100644 pyload/lib/beaker/crypto/__init__.py create mode 100644 pyload/lib/beaker/crypto/jcecrypto.py create mode 100644 pyload/lib/beaker/crypto/nsscrypto.py create mode 100644 pyload/lib/beaker/crypto/pbkdf2.py create mode 100644 pyload/lib/beaker/crypto/pycrypto.py create mode 100644 pyload/lib/beaker/crypto/util.py create mode 100644 pyload/lib/beaker/exceptions.py create mode 100644 pyload/lib/beaker/ext/__init__.py create mode 100644 pyload/lib/beaker/ext/database.py create mode 100644 pyload/lib/beaker/ext/google.py create mode 100644 pyload/lib/beaker/ext/memcached.py create mode 100644 pyload/lib/beaker/ext/sqla.py create mode 100644 pyload/lib/beaker/middleware.py create mode 100644 pyload/lib/beaker/session.py create mode 100644 pyload/lib/beaker/synchronization.py create mode 100644 pyload/lib/beaker/util.py create mode 100644 pyload/lib/bottle.py create mode 100644 pyload/lib/feedparser.py create mode 100644 pyload/lib/jinja2/__init__.py create mode 100644 pyload/lib/jinja2/_compat.py create mode 100644 pyload/lib/jinja2/_stringdefs.py create mode 100644 pyload/lib/jinja2/bccache.py create mode 100644 pyload/lib/jinja2/compiler.py create mode 100644 pyload/lib/jinja2/constants.py create mode 100644 pyload/lib/jinja2/debug.py create mode 100644 pyload/lib/jinja2/defaults.py create mode 100644 pyload/lib/jinja2/environment.py create mode 100644 pyload/lib/jinja2/exceptions.py create mode 100644 pyload/lib/jinja2/ext.py create mode 100644 pyload/lib/jinja2/filters.py create mode 100644 pyload/lib/jinja2/lexer.py create mode 100644 pyload/lib/jinja2/loaders.py create mode 100644 pyload/lib/jinja2/meta.py create mode 100644 pyload/lib/jinja2/nodes.py create mode 100644 pyload/lib/jinja2/optimizer.py create mode 100644 pyload/lib/jinja2/parser.py create mode 100644 pyload/lib/jinja2/runtime.py create mode 100644 pyload/lib/jinja2/sandbox.py create mode 100644 pyload/lib/jinja2/tests.py create mode 100644 pyload/lib/jinja2/testsuite/__init__.py create mode 100644 pyload/lib/jinja2/testsuite/api.py create mode 100644 pyload/lib/jinja2/testsuite/bytecode_cache.py create mode 100644 pyload/lib/jinja2/testsuite/core_tags.py create mode 100644 pyload/lib/jinja2/testsuite/debug.py create mode 100644 pyload/lib/jinja2/testsuite/doctests.py create mode 100644 pyload/lib/jinja2/testsuite/ext.py create mode 100644 pyload/lib/jinja2/testsuite/filters.py create mode 100644 pyload/lib/jinja2/testsuite/imports.py create mode 100644 pyload/lib/jinja2/testsuite/inheritance.py create mode 100644 pyload/lib/jinja2/testsuite/lexnparse.py create mode 100644 pyload/lib/jinja2/testsuite/loader.py create mode 100644 pyload/lib/jinja2/testsuite/regression.py create mode 100644 pyload/lib/jinja2/testsuite/res/__init__.py create mode 100644 pyload/lib/jinja2/testsuite/res/templates/broken.html create mode 100644 pyload/lib/jinja2/testsuite/res/templates/foo/test.html create mode 100644 pyload/lib/jinja2/testsuite/res/templates/syntaxerror.html create mode 100644 pyload/lib/jinja2/testsuite/res/templates/test.html create mode 100644 pyload/lib/jinja2/testsuite/security.py create mode 100644 pyload/lib/jinja2/testsuite/tests.py create mode 100644 pyload/lib/jinja2/testsuite/utils.py create mode 100644 pyload/lib/jinja2/utils.py create mode 100644 pyload/lib/jinja2/visitor.py create mode 100644 pyload/lib/markupsafe/__init__.py create mode 100644 pyload/lib/markupsafe/_compat.py create mode 100644 pyload/lib/markupsafe/_constants.py create mode 100644 pyload/lib/markupsafe/_native.py create mode 100644 pyload/lib/markupsafe/_speedups.c create mode 100644 pyload/lib/markupsafe/tests.py create mode 100644 pyload/lib/rename_process.py create mode 100644 pyload/lib/simplejson/__init__.py create mode 100644 pyload/lib/simplejson/compat.py create mode 100644 pyload/lib/simplejson/decoder.py create mode 100644 pyload/lib/simplejson/encoder.py create mode 100644 pyload/lib/simplejson/ordered_dict.py create mode 100644 pyload/lib/simplejson/scanner.py create mode 100644 pyload/lib/simplejson/tool.py create mode 100644 pyload/lib/thrift/TSCons.py create mode 100644 pyload/lib/thrift/TSerialization.py create mode 100644 pyload/lib/thrift/TTornado.py create mode 100644 pyload/lib/thrift/Thrift.py create mode 100644 pyload/lib/thrift/__init__.py create mode 100644 pyload/lib/thrift/protocol/TBase.py create mode 100644 pyload/lib/thrift/protocol/TBinaryProtocol.py create mode 100644 pyload/lib/thrift/protocol/TCompactProtocol.py create mode 100644 pyload/lib/thrift/protocol/TJSONProtocol.py create mode 100644 pyload/lib/thrift/protocol/TProtocol.py create mode 100644 pyload/lib/thrift/protocol/__init__.py create mode 100644 pyload/lib/thrift/protocol/fastbinary.c create mode 100644 pyload/lib/thrift/server/THttpServer.py create mode 100644 pyload/lib/thrift/server/TNonblockingServer.py create mode 100644 pyload/lib/thrift/server/TProcessPoolServer.py create mode 100644 pyload/lib/thrift/server/TServer.py create mode 100644 pyload/lib/thrift/server/__init__.py create mode 100644 pyload/lib/thrift/transport/THttpClient.py create mode 100644 pyload/lib/thrift/transport/TSSLSocket.py create mode 100644 pyload/lib/thrift/transport/TSocket.py create mode 100644 pyload/lib/thrift/transport/TTransport.py create mode 100644 pyload/lib/thrift/transport/TTwisted.py create mode 100644 pyload/lib/thrift/transport/TZlibTransport.py create mode 100644 pyload/lib/thrift/transport/__init__.py create mode 100644 pyload/lib/wsgiserver/LICENSE.txt create mode 100644 pyload/lib/wsgiserver/__init__.py (limited to 'pyload/lib') diff --git a/pyload/lib/BeautifulSoup.py b/pyload/lib/BeautifulSoup.py new file mode 100644 index 000000000..7278215ca --- /dev/null +++ b/pyload/lib/BeautifulSoup.py @@ -0,0 +1,2017 @@ +"""Beautiful Soup +Elixir and Tonic +"The Screen-Scraper's Friend" +http://www.crummy.com/software/BeautifulSoup/ + +Beautiful Soup parses a (possibly invalid) XML or HTML document into a +tree representation. It provides methods and Pythonic idioms that make +it easy to navigate, search, and modify the tree. + +A well-formed XML/HTML document yields a well-formed data +structure. An ill-formed XML/HTML document yields a correspondingly +ill-formed data structure. If your document is only locally +well-formed, you can use this library to find and process the +well-formed part of it. + +Beautiful Soup works with Python 2.2 and up. It has no external +dependencies, but you'll have more success at converting data to UTF-8 +if you also install these three packages: + +* chardet, for auto-detecting character encodings + http://chardet.feedparser.org/ +* cjkcodecs and iconv_codec, which add more encodings to the ones supported + by stock Python. + http://cjkpython.i18n.org/ + +Beautiful Soup defines classes for two main parsing strategies: + + * BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific + language that kind of looks like XML. + + * BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid + or invalid. This class has web browser-like heuristics for + obtaining a sensible parse tree in the face of common HTML errors. + +Beautiful Soup also defines a class (UnicodeDammit) for autodetecting +the encoding of an HTML or XML document, and converting it to +Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser. + +For more than you ever wanted to know about Beautiful Soup, see the +documentation: +http://www.crummy.com/software/BeautifulSoup/documentation.html + +Here, have some legalese: + +Copyright (c) 2004-2010, Leonard Richardson + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the the Beautiful Soup Consortium and All + Night Kosher Bakery nor the names of its contributors may be + used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT. + +""" +from __future__ import generators + +__author__ = "Leonard Richardson (leonardr@segfault.org)" +__version__ = "3.2.1" +__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" +__license__ = "New-style BSD" + +from sgmllib import SGMLParser, SGMLParseError +import codecs +import markupbase +import types +import re +import sgmllib +try: + from htmlentitydefs import name2codepoint +except ImportError: + name2codepoint = {} +try: + set +except NameError: + from sets import Set as set + +#These hacks make Beautiful Soup able to parse XML with namespaces +sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*') +markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match + +DEFAULT_OUTPUT_ENCODING = "utf-8" + +def _match_css_class(str): + """Build a RE to match the given CSS class.""" + return re.compile(r"(^|.*\s)%s($|\s)" % str) + +# First, the classes that represent markup elements. + +class PageElement(object): + """Contains the navigational information for some part of the page + (either a tag or a piece of text)""" + + def _invert(h): + "Cheap function to invert a hash." + i = {} + for k,v in h.items(): + i[v] = k + return i + + XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'", + "quot" : '"', + "amp" : "&", + "lt" : "<", + "gt" : ">" } + + XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS) + + def setup(self, parent=None, previous=None): + """Sets up the initial relations between this element and + other elements.""" + self.parent = parent + self.previous = previous + self.next = None + self.previousSibling = None + self.nextSibling = None + if self.parent and self.parent.contents: + self.previousSibling = self.parent.contents[-1] + self.previousSibling.nextSibling = self + + def replaceWith(self, replaceWith): + oldParent = self.parent + myIndex = self.parent.index(self) + if hasattr(replaceWith, "parent")\ + and replaceWith.parent is self.parent: + # We're replacing this element with one of its siblings. + index = replaceWith.parent.index(replaceWith) + if index and index < myIndex: + # Furthermore, it comes before this element. That + # means that when we extract it, the index of this + # element will change. + myIndex = myIndex - 1 + self.extract() + oldParent.insert(myIndex, replaceWith) + + def replaceWithChildren(self): + myParent = self.parent + myIndex = self.parent.index(self) + self.extract() + reversedChildren = list(self.contents) + reversedChildren.reverse() + for child in reversedChildren: + myParent.insert(myIndex, child) + + def extract(self): + """Destructively rips this element out of the tree.""" + if self.parent: + try: + del self.parent.contents[self.parent.index(self)] + except ValueError: + pass + + #Find the two elements that would be next to each other if + #this element (and any children) hadn't been parsed. Connect + #the two. + lastChild = self._lastRecursiveChild() + nextElement = lastChild.next + + if self.previous: + self.previous.next = nextElement + if nextElement: + nextElement.previous = self.previous + self.previous = None + lastChild.next = None + + self.parent = None + if self.previousSibling: + self.previousSibling.nextSibling = self.nextSibling + if self.nextSibling: + self.nextSibling.previousSibling = self.previousSibling + self.previousSibling = self.nextSibling = None + return self + + def _lastRecursiveChild(self): + "Finds the last element beneath this object to be parsed." + lastChild = self + while hasattr(lastChild, 'contents') and lastChild.contents: + lastChild = lastChild.contents[-1] + return lastChild + + def insert(self, position, newChild): + if isinstance(newChild, basestring) \ + and not isinstance(newChild, NavigableString): + newChild = NavigableString(newChild) + + position = min(position, len(self.contents)) + if hasattr(newChild, 'parent') and newChild.parent is not None: + # We're 'inserting' an element that's already one + # of this object's children. + if newChild.parent is self: + index = self.index(newChild) + if index > position: + # Furthermore we're moving it further down the + # list of this object's children. That means that + # when we extract this element, our target index + # will jump down one. + position = position - 1 + newChild.extract() + + newChild.parent = self + previousChild = None + if position == 0: + newChild.previousSibling = None + newChild.previous = self + else: + previousChild = self.contents[position-1] + newChild.previousSibling = previousChild + newChild.previousSibling.nextSibling = newChild + newChild.previous = previousChild._lastRecursiveChild() + if newChild.previous: + newChild.previous.next = newChild + + newChildsLastElement = newChild._lastRecursiveChild() + + if position >= len(self.contents): + newChild.nextSibling = None + + parent = self + parentsNextSibling = None + while not parentsNextSibling: + parentsNextSibling = parent.nextSibling + parent = parent.parent + if not parent: # This is the last element in the document. + break + if parentsNextSibling: + newChildsLastElement.next = parentsNextSibling + else: + newChildsLastElement.next = None + else: + nextChild = self.contents[position] + newChild.nextSibling = nextChild + if newChild.nextSibling: + newChild.nextSibling.previousSibling = newChild + newChildsLastElement.next = nextChild + + if newChildsLastElement.next: + newChildsLastElement.next.previous = newChildsLastElement + self.contents.insert(position, newChild) + + def append(self, tag): + """Appends the given tag to the contents of this tag.""" + self.insert(len(self.contents), tag) + + def findNext(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears after this Tag in the document.""" + return self._findOne(self.findAllNext, name, attrs, text, **kwargs) + + def findAllNext(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.nextGenerator, + **kwargs) + + def findNextSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears after this Tag in the document.""" + return self._findOne(self.findNextSiblings, name, attrs, text, + **kwargs) + + def findNextSiblings(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear after this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.nextSiblingGenerator, **kwargs) + fetchNextSiblings = findNextSiblings # Compatibility with pre-3.x + + def findPrevious(self, name=None, attrs={}, text=None, **kwargs): + """Returns the first item that matches the given criteria and + appears before this Tag in the document.""" + return self._findOne(self.findAllPrevious, name, attrs, text, **kwargs) + + def findAllPrevious(self, name=None, attrs={}, text=None, limit=None, + **kwargs): + """Returns all items that match the given criteria and appear + before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, self.previousGenerator, + **kwargs) + fetchPrevious = findAllPrevious # Compatibility with pre-3.x + + def findPreviousSibling(self, name=None, attrs={}, text=None, **kwargs): + """Returns the closest sibling to this Tag that matches the + given criteria and appears before this Tag in the document.""" + return self._findOne(self.findPreviousSiblings, name, attrs, text, + **kwargs) + + def findPreviousSiblings(self, name=None, attrs={}, text=None, + limit=None, **kwargs): + """Returns the siblings of this Tag that match the given + criteria and appear before this Tag in the document.""" + return self._findAll(name, attrs, text, limit, + self.previousSiblingGenerator, **kwargs) + fetchPreviousSiblings = findPreviousSiblings # Compatibility with pre-3.x + + def findParent(self, name=None, attrs={}, **kwargs): + """Returns the closest parent of this Tag that matches the given + criteria.""" + # NOTE: We can't use _findOne because findParents takes a different + # set of arguments. + r = None + l = self.findParents(name, attrs, 1) + if l: + r = l[0] + return r + + def findParents(self, name=None, attrs={}, limit=None, **kwargs): + """Returns the parents of this Tag that match the given + criteria.""" + + return self._findAll(name, attrs, None, limit, self.parentGenerator, + **kwargs) + fetchParents = findParents # Compatibility with pre-3.x + + #These methods do the real heavy lifting. + + def _findOne(self, method, name, attrs, text, **kwargs): + r = None + l = method(name, attrs, text, 1, **kwargs) + if l: + r = l[0] + return r + + def _findAll(self, name, attrs, text, limit, generator, **kwargs): + "Iterates over a generator looking for things that match." + + if isinstance(name, SoupStrainer): + strainer = name + # (Possibly) special case some findAll*(...) searches + elif text is None and not limit and not attrs and not kwargs: + # findAll*(True) + if name is True: + return [element for element in generator() + if isinstance(element, Tag)] + # findAll*('tag-name') + elif isinstance(name, basestring): + return [element for element in generator() + if isinstance(element, Tag) and + element.name == name] + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + # Build a SoupStrainer + else: + strainer = SoupStrainer(name, attrs, text, **kwargs) + results = ResultSet(strainer) + g = generator() + while True: + try: + i = g.next() + except StopIteration: + break + if i: + found = strainer.search(i) + if found: + results.append(found) + if limit and len(results) >= limit: + break + return results + + #These Generators can be used to navigate starting from both + #NavigableStrings and Tags. + def nextGenerator(self): + i = self + while i is not None: + i = i.next + yield i + + def nextSiblingGenerator(self): + i = self + while i is not None: + i = i.nextSibling + yield i + + def previousGenerator(self): + i = self + while i is not None: + i = i.previous + yield i + + def previousSiblingGenerator(self): + i = self + while i is not None: + i = i.previousSibling + yield i + + def parentGenerator(self): + i = self + while i is not None: + i = i.parent + yield i + + # Utility methods + def substituteEncoding(self, str, encoding=None): + encoding = encoding or "utf-8" + return str.replace("%SOUP-ENCODING%", encoding) + + def toEncoding(self, s, encoding=None): + """Encodes an object to a string in some encoding, or to Unicode. + .""" + if isinstance(s, unicode): + if encoding: + s = s.encode(encoding) + elif isinstance(s, str): + if encoding: + s = s.encode(encoding) + else: + s = unicode(s) + else: + if encoding: + s = self.toEncoding(str(s), encoding) + else: + s = unicode(s) + return s + + BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|" + + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)" + + ")") + + def _sub_entity(self, x): + """Used with a regular expression to substitute the + appropriate XML entity for an XML special character.""" + return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";" + + +class NavigableString(unicode, PageElement): + + def __new__(cls, value): + """Create a new NavigableString. + + When unpickling a NavigableString, this method is called with + the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be + passed in to the superclass's __new__ or the superclass won't know + how to handle non-ASCII characters. + """ + if isinstance(value, unicode): + return unicode.__new__(cls, value) + return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING) + + def __getnewargs__(self): + return (NavigableString.__str__(self),) + + def __getattr__(self, attr): + """text.string gives you text. This is for backwards + compatibility for Navigable*String, but for CData* it lets you + get the string without the CData wrapper.""" + if attr == 'string': + return self + else: + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr) + + def __unicode__(self): + return str(self).decode(DEFAULT_OUTPUT_ENCODING) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + # Substitute outgoing XML entities. + data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self) + if encoding: + return data.encode(encoding) + else: + return data + +class CData(NavigableString): + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class ProcessingInstruction(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + output = self + if "%SOUP-ENCODING%" in output: + output = self.substituteEncoding(output, encoding) + return "" % self.toEncoding(output, encoding) + +class Comment(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Declaration(NavigableString): + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING): + return "" % NavigableString.__str__(self, encoding) + +class Tag(PageElement): + + """Represents a found HTML tag with its attributes and contents.""" + + def _convertEntities(self, match): + """Used in a call to re.sub to replace HTML, XML, and numeric + entities with the appropriate Unicode characters. If HTML + entities are being converted, any unrecognized entities are + escaped.""" + x = match.group(1) + if self.convertHTMLEntities and x in name2codepoint: + return unichr(name2codepoint[x]) + elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS: + if self.convertXMLEntities: + return self.XML_ENTITIES_TO_SPECIAL_CHARS[x] + else: + return u'&%s;' % x + elif len(x) > 0 and x[0] == '#': + # Handle numeric entities + if len(x) > 1 and x[1] == 'x': + return unichr(int(x[2:], 16)) + else: + return unichr(int(x[1:])) + + elif self.escapeUnrecognizedEntities: + return u'&%s;' % x + else: + return u'&%s;' % x + + def __init__(self, parser, name, attrs=None, parent=None, + previous=None): + "Basic constructor." + + # We don't actually store the parser object: that lets extracted + # chunks be garbage-collected + self.parserClass = parser.__class__ + self.isSelfClosing = parser.isSelfClosingTag(name) + self.name = name + if attrs is None: + attrs = [] + elif isinstance(attrs, dict): + attrs = attrs.items() + self.attrs = attrs + self.contents = [] + self.setup(parent, previous) + self.hidden = False + self.containsSubstitutions = False + self.convertHTMLEntities = parser.convertHTMLEntities + self.convertXMLEntities = parser.convertXMLEntities + self.escapeUnrecognizedEntities = parser.escapeUnrecognizedEntities + + # Convert any HTML, XML, or numeric entities in the attribute values. + convert = lambda(k, val): (k, + re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);", + self._convertEntities, + val)) + self.attrs = map(convert, self.attrs) + + def getString(self): + if (len(self.contents) == 1 + and isinstance(self.contents[0], NavigableString)): + return self.contents[0] + + def setString(self, string): + """Replace the contents of the tag with a string""" + self.clear() + self.append(string) + + string = property(getString, setString) + + def getText(self, separator=u""): + if not len(self.contents): + return u"" + stopNode = self._lastRecursiveChild().next + strings = [] + current = self.contents[0] + while current is not stopNode: + if isinstance(current, NavigableString): + strings.append(current.strip()) + current = current.next + return separator.join(strings) + + text = property(getText) + + def get(self, key, default=None): + """Returns the value of the 'key' attribute for the tag, or + the value given for 'default' if it doesn't have that + attribute.""" + return self._getAttrMap().get(key, default) + + def clear(self): + """Extract all children.""" + for child in self.contents[:]: + child.extract() + + def index(self, element): + for i, child in enumerate(self.contents): + if child is element: + return i + raise ValueError("Tag.index: element not in tag") + + def has_key(self, key): + return self._getAttrMap().has_key(key) + + def __getitem__(self, key): + """tag[key] returns the value of the 'key' attribute for the tag, + and throws an exception if it's not there.""" + return self._getAttrMap()[key] + + def __iter__(self): + "Iterating over a tag iterates over its contents." + return iter(self.contents) + + def __len__(self): + "The length of a tag is the length of its list of contents." + return len(self.contents) + + def __contains__(self, x): + return x in self.contents + + def __nonzero__(self): + "A tag is non-None even if it has no contents." + return True + + def __setitem__(self, key, value): + """Setting tag[key] sets the value of the 'key' attribute for the + tag.""" + self._getAttrMap() + self.attrMap[key] = value + found = False + for i in range(0, len(self.attrs)): + if self.attrs[i][0] == key: + self.attrs[i] = (key, value) + found = True + if not found: + self.attrs.append((key, value)) + self._getAttrMap()[key] = value + + def __delitem__(self, key): + "Deleting tag[key] deletes all 'key' attributes for the tag." + for item in self.attrs: + if item[0] == key: + self.attrs.remove(item) + #We don't break because bad HTML can define the same + #attribute multiple times. + self._getAttrMap() + if self.attrMap.has_key(key): + del self.attrMap[key] + + def __call__(self, *args, **kwargs): + """Calling a tag like a function is the same as calling its + findAll() method. Eg. tag('a') returns a list of all the A tags + found within this tag.""" + return apply(self.findAll, args, kwargs) + + def __getattr__(self, tag): + #print "Getattr %s.%s" % (self.__class__, tag) + if len(tag) > 3 and tag.rfind('Tag') == len(tag)-3: + return self.find(tag[:-3]) + elif tag.find('__') != 0: + return self.find(tag) + raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag) + + def __eq__(self, other): + """Returns true iff this tag has the same name, the same attributes, + and the same contents (recursively) as the given tag. + + NOTE: right now this will return false if two tags have the + same attributes in a different order. Should this be fixed?""" + if other is self: + return True + if not hasattr(other, 'name') or not hasattr(other, 'attrs') or not hasattr(other, 'contents') or self.name != other.name or self.attrs != other.attrs or len(self) != len(other): + return False + for i in range(0, len(self.contents)): + if self.contents[i] != other.contents[i]: + return False + return True + + def __ne__(self, other): + """Returns true iff this tag is not identical to the other tag, + as defined in __eq__.""" + return not self == other + + def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING): + """Renders this tag as a string.""" + return self.__str__(encoding) + + def __unicode__(self): + return self.__str__(None) + + def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Returns a string or Unicode representation of this tag and + its contents. To get Unicode, pass None for encoding. + + NOTE: since Python's HTML parser consumes whitespace, this + method is not certain to reproduce the whitespace present in + the original string.""" + + encodedName = self.toEncoding(self.name, encoding) + + attrs = [] + if self.attrs: + for key, val in self.attrs: + fmt = '%s="%s"' + if isinstance(val, basestring): + if self.containsSubstitutions and '%SOUP-ENCODING%' in val: + val = self.substituteEncoding(val, encoding) + + # The attribute value either: + # + # * Contains no embedded double quotes or single quotes. + # No problem: we enclose it in double quotes. + # * Contains embedded single quotes. No problem: + # double quotes work here too. + # * Contains embedded double quotes. No problem: + # we enclose it in single quotes. + # * Embeds both single _and_ double quotes. This + # can't happen naturally, but it can happen if + # you modify an attribute value after parsing + # the document. Now we have a bit of a + # problem. We solve it by enclosing the + # attribute in single quotes, and escaping any + # embedded single quotes to XML entities. + if '"' in val: + fmt = "%s='%s'" + if "'" in val: + # TODO: replace with apos when + # appropriate. + val = val.replace("'", "&squot;") + + # Now we're okay w/r/t quotes. But the attribute + # value might also contain angle brackets, or + # ampersands that aren't part of entities. We need + # to escape those to XML entities too. + val = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, val) + + attrs.append(fmt % (self.toEncoding(key, encoding), + self.toEncoding(val, encoding))) + close = '' + closeTag = '' + if self.isSelfClosing: + close = ' /' + else: + closeTag = '' % encodedName + + indentTag, indentContents = 0, 0 + if prettyPrint: + indentTag = indentLevel + space = (' ' * (indentTag-1)) + indentContents = indentTag + 1 + contents = self.renderContents(encoding, prettyPrint, indentContents) + if self.hidden: + s = contents + else: + s = [] + attributeString = '' + if attrs: + attributeString = ' ' + ' '.join(attrs) + if prettyPrint: + s.append(space) + s.append('<%s%s%s>' % (encodedName, attributeString, close)) + if prettyPrint: + s.append("\n") + s.append(contents) + if prettyPrint and contents and contents[-1] != "\n": + s.append("\n") + if prettyPrint and closeTag: + s.append(space) + s.append(closeTag) + if prettyPrint and closeTag and self.nextSibling: + s.append("\n") + s = ''.join(s) + return s + + def decompose(self): + """Recursively destroys the contents of this tree.""" + self.extract() + if len(self.contents) == 0: + return + current = self.contents[0] + while current is not None: + next = current.next + if isinstance(current, Tag): + del current.contents[:] + current.parent = None + current.previous = None + current.previousSibling = None + current.next = None + current.nextSibling = None + current = next + + def prettify(self, encoding=DEFAULT_OUTPUT_ENCODING): + return self.__str__(encoding, True) + + def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING, + prettyPrint=False, indentLevel=0): + """Renders the contents of this tag as a string in the given + encoding. If encoding is None, returns a Unicode string..""" + s=[] + for c in self: + text = None + if isinstance(c, NavigableString): + text = c.__str__(encoding) + elif isinstance(c, Tag): + s.append(c.__str__(encoding, prettyPrint, indentLevel)) + if text and prettyPrint: + text = text.strip() + if text: + if prettyPrint: + s.append(" " * (indentLevel-1)) + s.append(text) + if prettyPrint: + s.append("\n") + return ''.join(s) + + #Soup methods + + def find(self, name=None, attrs={}, recursive=True, text=None, + **kwargs): + """Return only the first child of this Tag matching the given + criteria.""" + r = None + l = self.findAll(name, attrs, recursive, text, 1, **kwargs) + if l: + r = l[0] + return r + findChild = find + + def findAll(self, name=None, attrs={}, recursive=True, text=None, + limit=None, **kwargs): + """Extracts a list of Tag objects that match the given + criteria. You can specify the name of the Tag and any + attributes you want the Tag to have. + + The value of a key-value pair in the 'attrs' map can be a + string, a list of strings, a regular expression object, or a + callable that takes a string and returns whether or not the + string matches for some custom definition of 'matches'. The + same is true of the tag name.""" + generator = self.recursiveChildGenerator + if not recursive: + generator = self.childGenerator + return self._findAll(name, attrs, text, limit, generator, **kwargs) + findChildren = findAll + + # Pre-3.x compatibility methods + first = find + fetch = findAll + + def fetchText(self, text=None, recursive=True, limit=None): + return self.findAll(text=text, recursive=recursive, limit=limit) + + def firstText(self, text=None, recursive=True): + return self.find(text=text, recursive=recursive) + + #Private methods + + def _getAttrMap(self): + """Initializes a map representation of this tag's attributes, + if not already initialized.""" + if not getattr(self, 'attrMap'): + self.attrMap = {} + for (key, value) in self.attrs: + self.attrMap[key] = value + return self.attrMap + + #Generator methods + def childGenerator(self): + # Just use the iterator from the contents + return iter(self.contents) + + def recursiveChildGenerator(self): + if not len(self.contents): + raise StopIteration + stopNode = self._lastRecursiveChild().next + current = self.contents[0] + while current is not stopNode: + yield current + current = current.next + + +# Next, a couple classes to represent queries and their results. +class SoupStrainer: + """Encapsulates a number of ways of matching a markup element (tag or + text).""" + + def __init__(self, name=None, attrs={}, text=None, **kwargs): + self.name = name + if isinstance(attrs, basestring): + kwargs['class'] = _match_css_class(attrs) + attrs = None + if kwargs: + if attrs: + attrs = attrs.copy() + attrs.update(kwargs) + else: + attrs = kwargs + self.attrs = attrs + self.text = text + + def __str__(self): + if self.text: + return self.text + else: + return "%s|%s" % (self.name, self.attrs) + + def searchTag(self, markupName=None, markupAttrs={}): + found = None + markup = None + if isinstance(markupName, Tag): + markup = markupName + markupAttrs = markup + callFunctionWithTagData = callable(self.name) \ + and not isinstance(markupName, Tag) + + if (not self.name) \ + or callFunctionWithTagData \ + or (markup and self._matches(markup, self.name)) \ + or (not markup and self._matches(markupName, self.name)): + if callFunctionWithTagData: + match = self.name(markupName, markupAttrs) + else: + match = True + markupAttrMap = None + for attr, matchAgainst in self.attrs.items(): + if not markupAttrMap: + if hasattr(markupAttrs, 'get'): + markupAttrMap = markupAttrs + else: + markupAttrMap = {} + for k,v in markupAttrs: + markupAttrMap[k] = v + attrValue = markupAttrMap.get(attr) + if not self._matches(attrValue, matchAgainst): + match = False + break + if match: + if markup: + found = markup + else: + found = markupName + return found + + def search(self, markup): + #print 'looking for %s in %s' % (self, markup) + found = None + # If given a list of items, scan it for a text element that + # matches. + if hasattr(markup, "__iter__") \ + and not isinstance(markup, Tag): + for element in markup: + if isinstance(element, NavigableString) \ + and self.search(element): + found = element + break + # If it's a Tag, make sure its name or attributes match. + # Don't bother with Tags if we're searching for text. + elif isinstance(markup, Tag): + if not self.text: + found = self.searchTag(markup) + # If it's text, make sure the text matches. + elif isinstance(markup, NavigableString) or \ + isinstance(markup, basestring): + if self._matches(markup, self.text): + found = markup + else: + raise Exception, "I don't know how to match against a %s" \ + % markup.__class__ + return found + + def _matches(self, markup, matchAgainst): + #print "Matching %s against %s" % (markup, matchAgainst) + result = False + if matchAgainst is True: + result = markup is not None + elif callable(matchAgainst): + result = matchAgainst(markup) + else: + #Custom match methods take the tag as an argument, but all + #other ways of matching match the tag name as a string. + if isinstance(markup, Tag): + markup = markup.name + if markup and not isinstance(markup, basestring): + markup = unicode(markup) + #Now we know that chunk is either a string, or None. + if hasattr(matchAgainst, 'match'): + # It's a regexp object. + result = markup and matchAgainst.search(markup) + elif hasattr(matchAgainst, '__iter__'): # list-like + result = markup in matchAgainst + elif hasattr(matchAgainst, 'items'): + result = markup.has_key(matchAgainst) + elif matchAgainst and isinstance(markup, basestring): + if isinstance(markup, unicode): + matchAgainst = unicode(matchAgainst) + else: + matchAgainst = str(matchAgainst) + + if not result: + result = matchAgainst == markup + return result + +class ResultSet(list): + """A ResultSet is just a list that keeps track of the SoupStrainer + that created it.""" + def __init__(self, source): + list.__init__([]) + self.source = source + +# Now, some helper functions. + +def buildTagMap(default, *args): + """Turns a list of maps, lists, or scalars into a single map. + Used to build the SELF_CLOSING_TAGS, NESTABLE_TAGS, and + NESTING_RESET_TAGS maps out of lists and partial maps.""" + built = {} + for portion in args: + if hasattr(portion, 'items'): + #It's a map. Merge it. + for k,v in portion.items(): + built[k] = v + elif hasattr(portion, '__iter__'): # is a list + #It's a list. Map each item to the default. + for k in portion: + built[k] = default + else: + #It's a scalar. Map it to the default. + built[portion] = default + return built + +# Now, the parser classes. + +class BeautifulStoneSoup(Tag, SGMLParser): + + """This class contains the basic parser and search code. It defines + a parser that knows nothing about tag behavior except for the + following: + + You can't close a tag without closing all the tags it encloses. + That is, "" actually means + "". + + [Another possible explanation is "", but since + this class defines no SELF_CLOSING_TAGS, it will never use that + explanation.] + + This class is useful for parsing XML or made-up markup languages, + or when BeautifulSoup makes an assumption counter to what you were + expecting.""" + + SELF_CLOSING_TAGS = {} + NESTABLE_TAGS = {} + RESET_NESTING_TAGS = {} + QUOTE_TAGS = {} + PRESERVE_WHITESPACE_TAGS = [] + + MARKUP_MASSAGE = [(re.compile('(<[^<>]*)/>'), + lambda x: x.group(1) + ' />'), + (re.compile(']*)>'), + lambda x: '') + ] + + ROOT_TAG_NAME = u'[document]' + + HTML_ENTITIES = "html" + XML_ENTITIES = "xml" + XHTML_ENTITIES = "xhtml" + # TODO: This only exists for backwards-compatibility + ALL_ENTITIES = XHTML_ENTITIES + + # Used when determining whether a text node is all whitespace and + # can be replaced with a single space. A text node that contains + # fancy Unicode spaces (usually non-breaking) should be left + # alone. + STRIP_ASCII_SPACES = { 9: None, 10: None, 12: None, 13: None, 32: None, } + + def __init__(self, markup="", parseOnlyThese=None, fromEncoding=None, + markupMassage=True, smartQuotesTo=XML_ENTITIES, + convertEntities=None, selfClosingTags=None, isHTML=False): + """The Soup object is initialized as the 'root tag', and the + provided markup (which can be a string or a file-like object) + is fed into the underlying parser. + + sgmllib will process most bad HTML, and the BeautifulSoup + class has some tricks for dealing with some HTML that kills + sgmllib, but Beautiful Soup can nonetheless choke or lose data + if your data uses self-closing tags or declarations + incorrectly. + + By default, Beautiful Soup uses regexes to sanitize input, + avoiding the vast majority of these problems. If the problems + don't apply to you, pass in False for markupMassage, and + you'll get better performance. + + The default parser massage techniques fix the two most common + instances of invalid HTML that choke sgmllib: + +
(No space between name of closing tag and tag close) + (Extraneous whitespace in declaration) + + You can pass in a custom list of (RE object, replace method) + tuples to get Beautiful Soup to scrub your input the way you + want.""" + + self.parseOnlyThese = parseOnlyThese + self.fromEncoding = fromEncoding + self.smartQuotesTo = smartQuotesTo + self.convertEntities = convertEntities + # Set the rules for how we'll deal with the entities we + # encounter + if self.convertEntities: + # It doesn't make sense to convert encoded characters to + # entities even while you're converting entities to Unicode. + # Just convert it all to Unicode. + self.smartQuotesTo = None + if convertEntities == self.HTML_ENTITIES: + self.convertXMLEntities = False + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = True + elif convertEntities == self.XHTML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = True + self.escapeUnrecognizedEntities = False + elif convertEntities == self.XML_ENTITIES: + self.convertXMLEntities = True + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + else: + self.convertXMLEntities = False + self.convertHTMLEntities = False + self.escapeUnrecognizedEntities = False + + self.instanceSelfClosingTags = buildTagMap(None, selfClosingTags) + SGMLParser.__init__(self) + + if hasattr(markup, 'read'): # It's a file-type object. + markup = markup.read() + self.markup = markup + self.markupMassage = markupMassage + try: + self._feed(isHTML=isHTML) + except StopParsing: + pass + self.markup = None # The markup can now be GCed + + def convert_charref(self, name): + """This method fixes a bug in Python's SGMLParser.""" + try: + n = int(name) + except ValueError: + return + if not 0 <= n <= 127 : # ASCII ends at 127, not 255 + return + return self.convert_codepoint(n) + + def _feed(self, inDocumentEncoding=None, isHTML=False): + # Convert the document to Unicode. + markup = self.markup + if isinstance(markup, unicode): + if not hasattr(self, 'originalEncoding'): + self.originalEncoding = None + else: + dammit = UnicodeDammit\ + (markup, [self.fromEncoding, inDocumentEncoding], + smartQuotesTo=self.smartQuotesTo, isHTML=isHTML) + markup = dammit.unicode + self.originalEncoding = dammit.originalEncoding + self.declaredHTMLEncoding = dammit.declaredHTMLEncoding + if markup: + if self.markupMassage: + if not hasattr(self.markupMassage, "__iter__"): + self.markupMassage = self.MARKUP_MASSAGE + for fix, m in self.markupMassage: + markup = fix.sub(m, markup) + # TODO: We get rid of markupMassage so that the + # soup object can be deepcopied later on. Some + # Python installations can't copy regexes. If anyone + # was relying on the existence of markupMassage, this + # might cause problems. + del(self.markupMassage) + self.reset() + + SGMLParser.feed(self, markup) + # Close out any unfinished strings and close all the open tags. + self.endData() + while self.currentTag.name != self.ROOT_TAG_NAME: + self.popTag() + + def __getattr__(self, methodName): + """This method routes method call requests to either the SGMLParser + superclass or the Tag superclass, depending on the method name.""" + #print "__getattr__ called on %s.%s" % (self.__class__, methodName) + + if methodName.startswith('start_') or methodName.startswith('end_') \ + or methodName.startswith('do_'): + return SGMLParser.__getattr__(self, methodName) + elif not methodName.startswith('__'): + return Tag.__getattr__(self, methodName) + else: + raise AttributeError + + def isSelfClosingTag(self, name): + """Returns true iff the given string is the name of a + self-closing tag according to this parser.""" + return self.SELF_CLOSING_TAGS.has_key(name) \ + or self.instanceSelfClosingTags.has_key(name) + + def reset(self): + Tag.__init__(self, self, self.ROOT_TAG_NAME) + self.hidden = 1 + SGMLParser.reset(self) + self.currentData = [] + self.currentTag = None + self.tagStack = [] + self.quoteStack = [] + self.pushTag(self) + + def popTag(self): + tag = self.tagStack.pop() + + #print "Pop", tag.name + if self.tagStack: + self.currentTag = self.tagStack[-1] + return self.currentTag + + def pushTag(self, tag): + #print "Push", tag.name + if self.currentTag: + self.currentTag.contents.append(tag) + self.tagStack.append(tag) + self.currentTag = self.tagStack[-1] + + def endData(self, containerClass=NavigableString): + if self.currentData: + currentData = u''.join(self.currentData) + if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and + not set([tag.name for tag in self.tagStack]).intersection( + self.PRESERVE_WHITESPACE_TAGS)): + if '\n' in currentData: + currentData = '\n' + else: + currentData = ' ' + self.currentData = [] + if self.parseOnlyThese and len(self.tagStack) <= 1 and \ + (not self.parseOnlyThese.text or \ + not self.parseOnlyThese.search(currentData)): + return + o = containerClass(currentData) + o.setup(self.currentTag, self.previous) + if self.previous: + self.previous.next = o + self.previous = o + self.currentTag.contents.append(o) + + + def _popToTag(self, name, inclusivePop=True): + """Pops the tag stack up to and including the most recent + instance of the given tag. If inclusivePop is false, pops the tag + stack up to but *not* including the most recent instqance of + the given tag.""" + #print "Popping to %s" % name + if name == self.ROOT_TAG_NAME: + return + + numPops = 0 + mostRecentTag = None + for i in range(len(self.tagStack)-1, 0, -1): + if name == self.tagStack[i].name: + numPops = len(self.tagStack)-i + break + if not inclusivePop: + numPops = numPops - 1 + + for i in range(0, numPops): + mostRecentTag = self.popTag() + return mostRecentTag + + def _smartPop(self, name): + + """We need to pop up to the previous tag of this type, unless + one of this tag's nesting reset triggers comes between this + tag and the previous tag of this type, OR unless this tag is a + generic nesting trigger and another generic nesting trigger + comes between this tag and the previous tag of this type. + + Examples: +

FooBar *

* should pop to 'p', not 'b'. +

FooBar *

* should pop to 'table', not 'p'. +

Foo

Bar *

* should pop to 'tr', not 'p'. + +

    • *
    • * should pop to 'ul', not the first 'li'. +
  • ** should pop to 'table', not the first 'tr' + tag should + implicitly close the previous tag within the same
    ** should pop to 'tr', not the first 'td' + """ + + nestingResetTriggers = self.NESTABLE_TAGS.get(name) + isNestable = nestingResetTriggers != None + isResetNesting = self.RESET_NESTING_TAGS.has_key(name) + popTo = None + inclusive = True + for i in range(len(self.tagStack)-1, 0, -1): + p = self.tagStack[i] + if (not p or p.name == name) and not isNestable: + #Non-nestable tags get popped to the top or to their + #last occurance. + popTo = name + break + if (nestingResetTriggers is not None + and p.name in nestingResetTriggers) \ + or (nestingResetTriggers is None and isResetNesting + and self.RESET_NESTING_TAGS.has_key(p.name)): + + #If we encounter one of the nesting reset triggers + #peculiar to this tag, or we encounter another tag + #that causes nesting to reset, pop up to but not + #including that tag. + popTo = p.name + inclusive = False + break + p = p.parent + if popTo: + self._popToTag(popTo, inclusive) + + def unknown_starttag(self, name, attrs, selfClosing=0): + #print "Start tag %s: %s" % (name, attrs) + if self.quoteStack: + #This is not a real tag. + #print "<%s> is not real!" % name + attrs = ''.join([' %s="%s"' % (x, y) for x, y in attrs]) + self.handle_data('<%s%s>' % (name, attrs)) + return + self.endData() + + if not self.isSelfClosingTag(name) and not selfClosing: + self._smartPop(name) + + if self.parseOnlyThese and len(self.tagStack) <= 1 \ + and (self.parseOnlyThese.text or not self.parseOnlyThese.searchTag(name, attrs)): + return + + tag = Tag(self, name, attrs, self.currentTag, self.previous) + if self.previous: + self.previous.next = tag + self.previous = tag + self.pushTag(tag) + if selfClosing or self.isSelfClosingTag(name): + self.popTag() + if name in self.QUOTE_TAGS: + #print "Beginning quote (%s)" % name + self.quoteStack.append(name) + self.literal = 1 + return tag + + def unknown_endtag(self, name): + #print "End tag %s" % name + if self.quoteStack and self.quoteStack[-1] != name: + #This is not a real end tag. + #print " is not real!" % name + self.handle_data('' % name) + return + self.endData() + self._popToTag(name) + if self.quoteStack and self.quoteStack[-1] == name: + self.quoteStack.pop() + self.literal = (len(self.quoteStack) > 0) + + def handle_data(self, data): + self.currentData.append(data) + + def _toStringSubclass(self, text, subclass): + """Adds a certain piece of text to the tree as a NavigableString + subclass.""" + self.endData() + self.handle_data(text) + self.endData(subclass) + + def handle_pi(self, text): + """Handle a processing instruction as a ProcessingInstruction + object, possibly one with a %SOUP-ENCODING% slot into which an + encoding will be plugged later.""" + if text[:3] == "xml": + text = u"xml version='1.0' encoding='%SOUP-ENCODING%'" + self._toStringSubclass(text, ProcessingInstruction) + + def handle_comment(self, text): + "Handle comments as Comment objects." + self._toStringSubclass(text, Comment) + + def handle_charref(self, ref): + "Handle character references as data." + if self.convertEntities: + data = unichr(int(ref)) + else: + data = '&#%s;' % ref + self.handle_data(data) + + def handle_entityref(self, ref): + """Handle entity references as data, possibly converting known + HTML and/or XML entity references to the corresponding Unicode + characters.""" + data = None + if self.convertHTMLEntities: + try: + data = unichr(name2codepoint[ref]) + except KeyError: + pass + + if not data and self.convertXMLEntities: + data = self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref) + + if not data and self.convertHTMLEntities and \ + not self.XML_ENTITIES_TO_SPECIAL_CHARS.get(ref): + # TODO: We've got a problem here. We're told this is + # an entity reference, but it's not an XML entity + # reference or an HTML entity reference. Nonetheless, + # the logical thing to do is to pass it through as an + # unrecognized entity reference. + # + # Except: when the input is "&carol;" this function + # will be called with input "carol". When the input is + # "AT&T", this function will be called with input + # "T". We have no way of knowing whether a semicolon + # was present originally, so we don't know whether + # this is an unknown entity or just a misplaced + # ampersand. + # + # The more common case is a misplaced ampersand, so I + # escape the ampersand and omit the trailing semicolon. + data = "&%s" % ref + if not data: + # This case is different from the one above, because we + # haven't already gone through a supposedly comprehensive + # mapping of entities to Unicode characters. We might not + # have gone through any mapping at all. So the chances are + # very high that this is a real entity, and not a + # misplaced ampersand. + data = "&%s;" % ref + self.handle_data(data) + + def handle_decl(self, data): + "Handle DOCTYPEs and the like as Declaration objects." + self._toStringSubclass(data, Declaration) + + def parse_declaration(self, i): + """Treat a bogus SGML declaration as raw data. Treat a CDATA + declaration as a CData object.""" + j = None + if self.rawdata[i:i+9] == '', i) + if k == -1: + k = len(self.rawdata) + data = self.rawdata[i+9:k] + j = k+3 + self._toStringSubclass(data, CData) + else: + try: + j = SGMLParser.parse_declaration(self, i) + except SGMLParseError: + toHandle = self.rawdata[i:] + self.handle_data(toHandle) + j = i + len(toHandle) + return j + +class BeautifulSoup(BeautifulStoneSoup): + + """This parser knows the following facts about HTML: + + * Some tags have no closing tag and should be interpreted as being + closed as soon as they are encountered. + + * The text inside some tags (ie. 'script') may contain tags which + are not really part of the document and which should be parsed + as text, not tags. If you want to parse the text as tags, you can + always fetch it and parse it explicitly. + + * Tag nesting rules: + + Most tags can't be nested at all. For instance, the occurance of + a

    tag should implicitly close the previous

    tag. + +

    Para1

    Para2 + should be transformed into: +

    Para1

    Para2 + + Some tags can be nested arbitrarily. For instance, the occurance + of a

    tag should _not_ implicitly close the previous +
    tag. + + Alice said:
    Bob said:
    Blah + should NOT be transformed into: + Alice said:
    Bob said:
    Blah + + Some tags can be nested, but the nesting is reset by the + interposition of other tags. For instance, a
    , + but not close a tag in another table. + +
    BlahBlah + should be transformed into: +
    BlahBlah + but, + Blah
    Blah + should NOT be transformed into + Blah
    Blah + + Differing assumptions about tag nesting rules are a major source + of problems with the BeautifulSoup class. If BeautifulSoup is not + treating as nestable a tag your page author treats as nestable, + try ICantBelieveItsBeautifulSoup, MinimalSoup, or + BeautifulStoneSoup before writing your own subclass.""" + + def __init__(self, *args, **kwargs): + if not kwargs.has_key('smartQuotesTo'): + kwargs['smartQuotesTo'] = self.HTML_ENTITIES + kwargs['isHTML'] = True + BeautifulStoneSoup.__init__(self, *args, **kwargs) + + SELF_CLOSING_TAGS = buildTagMap(None, + ('br' , 'hr', 'input', 'img', 'meta', + 'spacer', 'link', 'frame', 'base', 'col')) + + PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea']) + + QUOTE_TAGS = {'script' : None, 'textarea' : None} + + #According to the HTML standard, each of these inline tags can + #contain another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_INLINE_TAGS = ('span', 'font', 'q', 'object', 'bdo', 'sub', 'sup', + 'center') + + #According to the HTML standard, these block tags can contain + #another tag of the same type. Furthermore, it's common + #to actually use these tags this way. + NESTABLE_BLOCK_TAGS = ('blockquote', 'div', 'fieldset', 'ins', 'del') + + #Lists can contain other lists, but there are restrictions. + NESTABLE_LIST_TAGS = { 'ol' : [], + 'ul' : [], + 'li' : ['ul', 'ol'], + 'dl' : [], + 'dd' : ['dl'], + 'dt' : ['dl'] } + + #Tables can contain other tables, but there are restrictions. + NESTABLE_TABLE_TAGS = {'table' : [], + 'tr' : ['table', 'tbody', 'tfoot', 'thead'], + 'td' : ['tr'], + 'th' : ['tr'], + 'thead' : ['table'], + 'tbody' : ['table'], + 'tfoot' : ['table'], + } + + NON_NESTABLE_BLOCK_TAGS = ('address', 'form', 'p', 'pre') + + #If one of these tags is encountered, all tags up to the next tag of + #this type are popped. + RESET_NESTING_TAGS = buildTagMap(None, NESTABLE_BLOCK_TAGS, 'noscript', + NON_NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, + NESTABLE_TABLE_TAGS) + + NESTABLE_TAGS = buildTagMap([], NESTABLE_INLINE_TAGS, NESTABLE_BLOCK_TAGS, + NESTABLE_LIST_TAGS, NESTABLE_TABLE_TAGS) + + # Used to detect the charset in a META tag; see start_meta + CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M) + + def start_meta(self, attrs): + """Beautiful Soup can detect a charset included in a META tag, + try to convert the document to that charset, and re-parse the + document from the beginning.""" + httpEquiv = None + contentType = None + contentTypeIndex = None + tagNeedsEncodingSubstitution = False + + for i in range(0, len(attrs)): + key, value = attrs[i] + key = key.lower() + if key == 'http-equiv': + httpEquiv = value + elif key == 'content': + contentType = value + contentTypeIndex = i + + if httpEquiv and contentType: # It's an interesting meta tag. + match = self.CHARSET_RE.search(contentType) + if match: + if (self.declaredHTMLEncoding is not None or + self.originalEncoding == self.fromEncoding): + # An HTML encoding was sniffed while converting + # the document to Unicode, or an HTML encoding was + # sniffed during a previous pass through the + # document, or an encoding was specified + # explicitly and it worked. Rewrite the meta tag. + def rewrite(match): + return match.group(1) + "%SOUP-ENCODING%" + newAttr = self.CHARSET_RE.sub(rewrite, contentType) + attrs[contentTypeIndex] = (attrs[contentTypeIndex][0], + newAttr) + tagNeedsEncodingSubstitution = True + else: + # This is our first pass through the document. + # Go through it again with the encoding information. + newCharset = match.group(3) + if newCharset and newCharset != self.originalEncoding: + self.declaredHTMLEncoding = newCharset + self._feed(self.declaredHTMLEncoding) + raise StopParsing + pass + tag = self.unknown_starttag("meta", attrs) + if tag and tagNeedsEncodingSubstitution: + tag.containsSubstitutions = True + +class StopParsing(Exception): + pass + +class ICantBelieveItsBeautifulSoup(BeautifulSoup): + + """The BeautifulSoup class is oriented towards skipping over + common HTML errors like unclosed tags. However, sometimes it makes + errors of its own. For instance, consider this fragment: + + FooBar + + This is perfectly valid (if bizarre) HTML. However, the + BeautifulSoup class will implicitly close the first b tag when it + encounters the second 'b'. It will think the author wrote + "FooBar", and didn't close the first 'b' tag, because + there's no real-world reason to bold something that's already + bold. When it encounters '' it will close two more 'b' + tags, for a grand total of three tags closed instead of two. This + can throw off the rest of your document structure. The same is + true of a number of other tags, listed below. + + It's much more common for someone to forget to close a 'b' tag + than to actually use nested 'b' tags, and the BeautifulSoup class + handles the common case. This class handles the not-co-common + case: where you can't believe someone wrote what they did, but + it's valid HTML and BeautifulSoup screwed up by assuming it + wouldn't be.""" + + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS = \ + ('em', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'strong', + 'cite', 'code', 'dfn', 'kbd', 'samp', 'strong', 'var', 'b', + 'big') + + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS = ('noscript',) + + NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_BLOCK_TAGS, + I_CANT_BELIEVE_THEYRE_NESTABLE_INLINE_TAGS) + +class MinimalSoup(BeautifulSoup): + """The MinimalSoup class is for parsing HTML that contains + pathologically bad markup. It makes no assumptions about tag + nesting, but it does know which tags are self-closing, that + ' + safe = Markup('username') + assert unsafe + safe == text_type(escape(unsafe)) + text_type(safe) + + # string interpolations are safe to use too + assert Markup('%s') % '' == \ + '<bad user>' + assert Markup('%(username)s') % { + 'username': '' + } == '<bad user>' + + # an escaped object is markup too + assert type(Markup('foo') + 'bar') is Markup + + # and it implements __html__ by returning itself + x = Markup("foo") + assert x.__html__() is x + + # it also knows how to treat __html__ objects + class Foo(object): + def __html__(self): + return 'awesome' + def __unicode__(self): + return 'awesome' + assert Markup(Foo()) == 'awesome' + assert Markup('%s') % Foo() == \ + 'awesome' + + # escaping and unescaping + assert escape('"<>&\'') == '"<>&'' + assert Markup("Foo & Bar").striptags() == "Foo & Bar" + assert Markup("<test>").unescape() == "" + + def test_template_data(self): + env = Environment(autoescape=True) + t = env.from_string('{% macro say_hello(name) %}' + '

    Hello {{ name }}!

    {% endmacro %}' + '{{ say_hello("foo") }}') + escaped_out = '

    Hello <blink>foo</blink>!

    ' + assert t.render() == escaped_out + assert text_type(t.module) == escaped_out + assert escape(t.module) == escaped_out + assert t.module.say_hello('foo') == escaped_out + assert escape(t.module.say_hello('foo')) == escaped_out + + def test_attr_filter(self): + env = SandboxedEnvironment() + tmpl = env.from_string('{{ cls|attr("__subclasses__")() }}') + self.assert_raises(SecurityError, tmpl.render, cls=int) + + def test_binary_operator_intercepting(self): + def disable_op(left, right): + raise TemplateRuntimeError('that operator so does not work') + for expr, ctx, rv in ('1 + 2', {}, '3'), ('a + 2', {'a': 2}, '4'): + env = SandboxedEnvironment() + env.binop_table['+'] = disable_op + t = env.from_string('{{ %s }}' % expr) + assert t.render(ctx) == rv + env.intercepted_binops = frozenset(['+']) + t = env.from_string('{{ %s }}' % expr) + try: + t.render(ctx) + except TemplateRuntimeError as e: + pass + else: + self.fail('expected runtime error') + + def test_unary_operator_intercepting(self): + def disable_op(arg): + raise TemplateRuntimeError('that operator so does not work') + for expr, ctx, rv in ('-1', {}, '-1'), ('-a', {'a': 2}, '-2'): + env = SandboxedEnvironment() + env.unop_table['-'] = disable_op + t = env.from_string('{{ %s }}' % expr) + assert t.render(ctx) == rv + env.intercepted_unops = frozenset(['-']) + t = env.from_string('{{ %s }}' % expr) + try: + t.render(ctx) + except TemplateRuntimeError as e: + pass + else: + self.fail('expected runtime error') + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(SandboxTestCase)) + return suite diff --git a/pyload/lib/jinja2/testsuite/tests.py b/pyload/lib/jinja2/testsuite/tests.py new file mode 100644 index 000000000..3ece7a8ff --- /dev/null +++ b/pyload/lib/jinja2/testsuite/tests.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +""" + jinja2.testsuite.tests + ~~~~~~~~~~~~~~~~~~~~~~ + + Who tests the tests? + + :copyright: (c) 2010 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import unittest +from jinja2.testsuite import JinjaTestCase + +from jinja2 import Markup, Environment + +env = Environment() + + +class TestsTestCase(JinjaTestCase): + + def test_defined(self): + tmpl = env.from_string('{{ missing is defined }}|{{ true is defined }}') + assert tmpl.render() == 'False|True' + + def test_even(self): + tmpl = env.from_string('''{{ 1 is even }}|{{ 2 is even }}''') + assert tmpl.render() == 'False|True' + + def test_odd(self): + tmpl = env.from_string('''{{ 1 is odd }}|{{ 2 is odd }}''') + assert tmpl.render() == 'True|False' + + def test_lower(self): + tmpl = env.from_string('''{{ "foo" is lower }}|{{ "FOO" is lower }}''') + assert tmpl.render() == 'True|False' + + def test_typechecks(self): + tmpl = env.from_string(''' + {{ 42 is undefined }} + {{ 42 is defined }} + {{ 42 is none }} + {{ none is none }} + {{ 42 is number }} + {{ 42 is string }} + {{ "foo" is string }} + {{ "foo" is sequence }} + {{ [1] is sequence }} + {{ range is callable }} + {{ 42 is callable }} + {{ range(5) is iterable }} + {{ {} is mapping }} + {{ mydict is mapping }} + {{ [] is mapping }} + ''') + class MyDict(dict): + pass + assert tmpl.render(mydict=MyDict()).split() == [ + 'False', 'True', 'False', 'True', 'True', 'False', + 'True', 'True', 'True', 'True', 'False', 'True', + 'True', 'True', 'False' + ] + + def test_sequence(self): + tmpl = env.from_string( + '{{ [1, 2, 3] is sequence }}|' + '{{ "foo" is sequence }}|' + '{{ 42 is sequence }}' + ) + assert tmpl.render() == 'True|True|False' + + def test_upper(self): + tmpl = env.from_string('{{ "FOO" is upper }}|{{ "foo" is upper }}') + assert tmpl.render() == 'True|False' + + def test_sameas(self): + tmpl = env.from_string('{{ foo is sameas false }}|' + '{{ 0 is sameas false }}') + assert tmpl.render(foo=False) == 'True|False' + + def test_no_paren_for_arg1(self): + tmpl = env.from_string('{{ foo is sameas none }}') + assert tmpl.render(foo=None) == 'True' + + def test_escaped(self): + env = Environment(autoescape=True) + tmpl = env.from_string('{{ x is escaped }}|{{ y is escaped }}') + assert tmpl.render(x='foo', y=Markup('foo')) == 'False|True' + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(TestsTestCase)) + return suite diff --git a/pyload/lib/jinja2/testsuite/utils.py b/pyload/lib/jinja2/testsuite/utils.py new file mode 100644 index 000000000..cab9b09a9 --- /dev/null +++ b/pyload/lib/jinja2/testsuite/utils.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- +""" + jinja2.testsuite.utils + ~~~~~~~~~~~~~~~~~~~~~~ + + Tests utilities jinja uses. + + :copyright: (c) 2010 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import gc +import unittest + +import pickle + +from jinja2.testsuite import JinjaTestCase + +from jinja2.utils import LRUCache, escape, object_type_repr + + +class LRUCacheTestCase(JinjaTestCase): + + def test_simple(self): + d = LRUCache(3) + d["a"] = 1 + d["b"] = 2 + d["c"] = 3 + d["a"] + d["d"] = 4 + assert len(d) == 3 + assert 'a' in d and 'c' in d and 'd' in d and 'b' not in d + + def test_pickleable(self): + cache = LRUCache(2) + cache["foo"] = 42 + cache["bar"] = 23 + cache["foo"] + + for protocol in range(3): + copy = pickle.loads(pickle.dumps(cache, protocol)) + assert copy.capacity == cache.capacity + assert copy._mapping == cache._mapping + assert copy._queue == cache._queue + + +class HelpersTestCase(JinjaTestCase): + + def test_object_type_repr(self): + class X(object): + pass + self.assert_equal(object_type_repr(42), 'int object') + self.assert_equal(object_type_repr([]), 'list object') + self.assert_equal(object_type_repr(X()), + 'jinja2.testsuite.utils.X object') + self.assert_equal(object_type_repr(None), 'None') + self.assert_equal(object_type_repr(Ellipsis), 'Ellipsis') + + +class MarkupLeakTestCase(JinjaTestCase): + + def test_markup_leaks(self): + counts = set() + for count in range(20): + for item in range(1000): + escape("foo") + escape("") + escape(u"foo") + escape(u"") + counts.add(len(gc.get_objects())) + assert len(counts) == 1, 'ouch, c extension seems to leak objects' + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(LRUCacheTestCase)) + suite.addTest(unittest.makeSuite(HelpersTestCase)) + + # this test only tests the c extension + if not hasattr(escape, 'func_code'): + suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) + + return suite diff --git a/pyload/lib/jinja2/utils.py b/pyload/lib/jinja2/utils.py new file mode 100644 index 000000000..ddc47da0a --- /dev/null +++ b/pyload/lib/jinja2/utils.py @@ -0,0 +1,520 @@ +# -*- coding: utf-8 -*- +""" + jinja2.utils + ~~~~~~~~~~~~ + + Utility functions. + + :copyright: (c) 2010 by the Jinja Team. + :license: BSD, see LICENSE for more details. +""" +import re +import errno +from collections import deque +from jinja2._compat import text_type, string_types, implements_iterator, \ + allocate_lock, url_quote + + +_word_split_re = re.compile(r'(\s+)') +_punctuation_re = re.compile( + '^(?P(?:%s)*)(?P.*?)(?P(?:%s)*)$' % ( + '|'.join(map(re.escape, ('(', '<', '<'))), + '|'.join(map(re.escape, ('.', ',', ')', '>', '\n', '>'))) + ) +) +_simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') +_striptags_re = re.compile(r'(|<[^>]*>)') +_entity_re = re.compile(r'&([^;]+);') +_letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +_digits = '0123456789' + +# special singleton representing missing values for the runtime +missing = type('MissingType', (), {'__repr__': lambda x: 'missing'})() + +# internal code +internal_code = set() + +concat = u''.join + + +def contextfunction(f): + """This decorator can be used to mark a function or method context callable. + A context callable is passed the active :class:`Context` as first argument when + called from the template. This is useful if a function wants to get access + to the context or functions provided on the context object. For example + a function that returns a sorted list of template variables the current + template exports could look like this:: + + @contextfunction + def get_exported_names(context): + return sorted(context.exported_vars) + """ + f.contextfunction = True + return f + + +def evalcontextfunction(f): + """This decorator can be used to mark a function or method as an eval + context callable. This is similar to the :func:`contextfunction` + but instead of passing the context, an evaluation context object is + passed. For more information about the eval context, see + :ref:`eval-context`. + + .. versionadded:: 2.4 + """ + f.evalcontextfunction = True + return f + + +def environmentfunction(f): + """This decorator can be used to mark a function or method as environment + callable. This decorator works exactly like the :func:`contextfunction` + decorator just that the first argument is the active :class:`Environment` + and not context. + """ + f.environmentfunction = True + return f + + +def internalcode(f): + """Marks the function as internally used""" + internal_code.add(f.__code__) + return f + + +def is_undefined(obj): + """Check if the object passed is undefined. This does nothing more than + performing an instance check against :class:`Undefined` but looks nicer. + This can be used for custom filters or tests that want to react to + undefined variables. For example a custom default filter can look like + this:: + + def default(var, default=''): + if is_undefined(var): + return default + return var + """ + from jinja2.runtime import Undefined + return isinstance(obj, Undefined) + + +def consume(iterable): + """Consumes an iterable without doing anything with it.""" + for event in iterable: + pass + + +def clear_caches(): + """Jinja2 keeps internal caches for environments and lexers. These are + used so that Jinja2 doesn't have to recreate environments and lexers all + the time. Normally you don't have to care about that but if you are + messuring memory consumption you may want to clean the caches. + """ + from jinja2.environment import _spontaneous_environments + from jinja2.lexer import _lexer_cache + _spontaneous_environments.clear() + _lexer_cache.clear() + + +def import_string(import_name, silent=False): + """Imports an object based on a string. This is useful if you want to + use import paths as endpoints or something similar. An import path can + be specified either in dotted notation (``xml.sax.saxutils.escape``) + or with a colon as object delimiter (``xml.sax.saxutils:escape``). + + If the `silent` is True the return value will be `None` if the import + fails. + + :return: imported object + """ + try: + if ':' in import_name: + module, obj = import_name.split(':', 1) + elif '.' in import_name: + items = import_name.split('.') + module = '.'.join(items[:-1]) + obj = items[-1] + else: + return __import__(import_name) + return getattr(__import__(module, None, None, [obj]), obj) + except (ImportError, AttributeError): + if not silent: + raise + + +def open_if_exists(filename, mode='rb'): + """Returns a file descriptor for the filename if that file exists, + otherwise `None`. + """ + try: + return open(filename, mode) + except IOError as e: + if e.errno not in (errno.ENOENT, errno.EISDIR): + raise + + +def object_type_repr(obj): + """Returns the name of the object's type. For some recognized + singletons the name of the object is returned instead. (For + example for `None` and `Ellipsis`). + """ + if obj is None: + return 'None' + elif obj is Ellipsis: + return 'Ellipsis' + # __builtin__ in 2.x, builtins in 3.x + if obj.__class__.__module__ in ('__builtin__', 'builtins'): + name = obj.__class__.__name__ + else: + name = obj.__class__.__module__ + '.' + obj.__class__.__name__ + return '%s object' % name + + +def pformat(obj, verbose=False): + """Prettyprint an object. Either use the `pretty` library or the + builtin `pprint`. + """ + try: + from pretty import pretty + return pretty(obj, verbose=verbose) + except ImportError: + from pprint import pformat + return pformat(obj) + + +def urlize(text, trim_url_limit=None, nofollow=False): + """Converts any URLs in text into clickable links. Works on http://, + https:// and www. links. Links can have trailing punctuation (periods, + commas, close-parens) and leading punctuation (opening parens) and + it'll still do the right thing. + + If trim_url_limit is not None, the URLs in link text will be limited + to trim_url_limit characters. + + If nofollow is True, the URLs in link text will get a rel="nofollow" + attribute. + """ + trim_url = lambda x, limit=trim_url_limit: limit is not None \ + and (x[:limit] + (len(x) >=limit and '...' + or '')) or x + words = _word_split_re.split(text_type(escape(text))) + nofollow_attr = nofollow and ' rel="nofollow"' or '' + for i, word in enumerate(words): + match = _punctuation_re.match(word) + if match: + lead, middle, trail = match.groups() + if middle.startswith('www.') or ( + '@' not in middle and + not middle.startswith('http://') and + not middle.startswith('https://') and + len(middle) > 0 and + middle[0] in _letters + _digits and ( + middle.endswith('.org') or + middle.endswith('.net') or + middle.endswith('.com') + )): + middle = '%s' % (middle, + nofollow_attr, trim_url(middle)) + if middle.startswith('http://') or \ + middle.startswith('https://'): + middle = '%s' % (middle, + nofollow_attr, trim_url(middle)) + if '@' in middle and not middle.startswith('www.') and \ + not ':' in middle and _simple_email_re.match(middle): + middle = '%s' % (middle, middle) + if lead + middle + trail != word: + words[i] = lead + middle + trail + return u''.join(words) + + +def generate_lorem_ipsum(n=5, html=True, min=20, max=100): + """Generate some lorem impsum for the template.""" + from jinja2.constants import LOREM_IPSUM_WORDS + from random import choice, randrange + words = LOREM_IPSUM_WORDS.split() + result = [] + + for _ in range(n): + next_capitalized = True + last_comma = last_fullstop = 0 + word = None + last = None + p = [] + + # each paragraph contains out of 20 to 100 words. + for idx, _ in enumerate(range(randrange(min, max))): + while True: + word = choice(words) + if word != last: + last = word + break + if next_capitalized: + word = word.capitalize() + next_capitalized = False + # add commas + if idx - randrange(3, 8) > last_comma: + last_comma = idx + last_fullstop += 2 + word += ',' + # add end of sentences + if idx - randrange(10, 20) > last_fullstop: + last_comma = last_fullstop = idx + word += '.' + next_capitalized = True + p.append(word) + + # ensure that the paragraph ends with a dot. + p = u' '.join(p) + if p.endswith(','): + p = p[:-1] + '.' + elif not p.endswith('.'): + p += '.' + result.append(p) + + if not html: + return u'\n\n'.join(result) + return Markup(u'\n'.join(u'

    %s

    ' % escape(x) for x in result)) + + +def unicode_urlencode(obj, charset='utf-8'): + """URL escapes a single bytestring or unicode string with the + given charset if applicable to URL safe quoting under all rules + that need to be considered under all supported Python versions. + + If non strings are provided they are converted to their unicode + representation first. + """ + if not isinstance(obj, string_types): + obj = text_type(obj) + if isinstance(obj, text_type): + obj = obj.encode(charset) + return text_type(url_quote(obj)) + + +class LRUCache(object): + """A simple LRU Cache implementation.""" + + # this is fast for small capacities (something below 1000) but doesn't + # scale. But as long as it's only used as storage for templates this + # won't do any harm. + + def __init__(self, capacity): + self.capacity = capacity + self._mapping = {} + self._queue = deque() + self._postinit() + + def _postinit(self): + # alias all queue methods for faster lookup + self._popleft = self._queue.popleft + self._pop = self._queue.pop + self._remove = self._queue.remove + self._wlock = allocate_lock() + self._append = self._queue.append + + def __getstate__(self): + return { + 'capacity': self.capacity, + '_mapping': self._mapping, + '_queue': self._queue + } + + def __setstate__(self, d): + self.__dict__.update(d) + self._postinit() + + def __getnewargs__(self): + return (self.capacity,) + + def copy(self): + """Return a shallow copy of the instance.""" + rv = self.__class__(self.capacity) + rv._mapping.update(self._mapping) + rv._queue = deque(self._queue) + return rv + + def get(self, key, default=None): + """Return an item from the cache dict or `default`""" + try: + return self[key] + except KeyError: + return default + + def setdefault(self, key, default=None): + """Set `default` if the key is not in the cache otherwise + leave unchanged. Return the value of this key. + """ + self._wlock.acquire() + try: + try: + return self[key] + except KeyError: + self[key] = default + return default + finally: + self._wlock.release() + + def clear(self): + """Clear the cache.""" + self._wlock.acquire() + try: + self._mapping.clear() + self._queue.clear() + finally: + self._wlock.release() + + def __contains__(self, key): + """Check if a key exists in this cache.""" + return key in self._mapping + + def __len__(self): + """Return the current size of the cache.""" + return len(self._mapping) + + def __repr__(self): + return '<%s %r>' % ( + self.__class__.__name__, + self._mapping + ) + + def __getitem__(self, key): + """Get an item from the cache. Moves the item up so that it has the + highest priority then. + + Raise a `KeyError` if it does not exist. + """ + self._wlock.acquire() + try: + rv = self._mapping[key] + if self._queue[-1] != key: + try: + self._remove(key) + except ValueError: + # if something removed the key from the container + # when we read, ignore the ValueError that we would + # get otherwise. + pass + self._append(key) + return rv + finally: + self._wlock.release() + + def __setitem__(self, key, value): + """Sets the value for an item. Moves the item up so that it + has the highest priority then. + """ + self._wlock.acquire() + try: + if key in self._mapping: + self._remove(key) + elif len(self._mapping) == self.capacity: + del self._mapping[self._popleft()] + self._append(key) + self._mapping[key] = value + finally: + self._wlock.release() + + def __delitem__(self, key): + """Remove an item from the cache dict. + Raise a `KeyError` if it does not exist. + """ + self._wlock.acquire() + try: + del self._mapping[key] + try: + self._remove(key) + except ValueError: + # __getitem__ is not locked, it might happen + pass + finally: + self._wlock.release() + + def items(self): + """Return a list of items.""" + result = [(key, self._mapping[key]) for key in list(self._queue)] + result.reverse() + return result + + def iteritems(self): + """Iterate over all items.""" + return iter(self.items()) + + def values(self): + """Return a list of all values.""" + return [x[1] for x in self.items()] + + def itervalue(self): + """Iterate over all values.""" + return iter(self.values()) + + def keys(self): + """Return a list of all keys ordered by most recent usage.""" + return list(self) + + def iterkeys(self): + """Iterate over all keys in the cache dict, ordered by + the most recent usage. + """ + return reversed(tuple(self._queue)) + + __iter__ = iterkeys + + def __reversed__(self): + """Iterate over the values in the cache dict, oldest items + coming first. + """ + return iter(tuple(self._queue)) + + __copy__ = copy + + +# register the LRU cache as mutable mapping if possible +try: + from collections import MutableMapping + MutableMapping.register(LRUCache) +except ImportError: + pass + + +@implements_iterator +class Cycler(object): + """A cycle helper for templates.""" + + def __init__(self, *items): + if not items: + raise RuntimeError('at least one item has to be provided') + self.items = items + self.reset() + + def reset(self): + """Resets the cycle.""" + self.pos = 0 + + @property + def current(self): + """Returns the current item.""" + return self.items[self.pos] + + def __next__(self): + """Goes one item ahead and returns it.""" + rv = self.current + self.pos = (self.pos + 1) % len(self.items) + return rv + + +class Joiner(object): + """A joining helper for templates.""" + + def __init__(self, sep=u', '): + self.sep = sep + self.used = False + + def __call__(self): + if not self.used: + self.used = True + return u'' + return self.sep + + +# Imported here because that's where it was in the past +from markupsafe import Markup, escape, soft_unicode diff --git a/pyload/lib/jinja2/visitor.py b/pyload/lib/jinja2/visitor.py new file mode 100644 index 000000000..413e7c309 --- /dev/null +++ b/pyload/lib/jinja2/visitor.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- +""" + jinja2.visitor + ~~~~~~~~~~~~~~ + + This module implements a visitor for the nodes. + + :copyright: (c) 2010 by the Jinja Team. + :license: BSD. +""" +from jinja2.nodes import Node + + +class NodeVisitor(object): + """Walks the abstract syntax tree and call visitor functions for every + node found. The visitor functions may return values which will be + forwarded by the `visit` method. + + Per default the visitor functions for the nodes are ``'visit_'`` + + class name of the node. So a `TryFinally` node visit function would + be `visit_TryFinally`. This behavior can be changed by overriding + the `get_visitor` function. If no visitor function exists for a node + (return value `None`) the `generic_visit` visitor is used instead. + """ + + def get_visitor(self, node): + """Return the visitor function for this node or `None` if no visitor + exists for this node. In that case the generic visit function is + used instead. + """ + method = 'visit_' + node.__class__.__name__ + return getattr(self, method, None) + + def visit(self, node, *args, **kwargs): + """Visit a node.""" + f = self.get_visitor(node) + if f is not None: + return f(node, *args, **kwargs) + return self.generic_visit(node, *args, **kwargs) + + def generic_visit(self, node, *args, **kwargs): + """Called if no explicit visitor function exists for a node.""" + for node in node.iter_child_nodes(): + self.visit(node, *args, **kwargs) + + +class NodeTransformer(NodeVisitor): + """Walks the abstract syntax tree and allows modifications of nodes. + + The `NodeTransformer` will walk the AST and use the return value of the + visitor functions to replace or remove the old node. If the return + value of the visitor function is `None` the node will be removed + from the previous location otherwise it's replaced with the return + value. The return value may be the original node in which case no + replacement takes place. + """ + + def generic_visit(self, node, *args, **kwargs): + for field, old_value in node.iter_fields(): + if isinstance(old_value, list): + new_values = [] + for value in old_value: + if isinstance(value, Node): + value = self.visit(value, *args, **kwargs) + if value is None: + continue + elif not isinstance(value, Node): + new_values.extend(value) + continue + new_values.append(value) + old_value[:] = new_values + elif isinstance(old_value, Node): + new_node = self.visit(old_value, *args, **kwargs) + if new_node is None: + delattr(node, field) + else: + setattr(node, field, new_node) + return node + + def visit_list(self, node, *args, **kwargs): + """As transformers may return lists in some places this method + can be used to enforce a list as return value. + """ + rv = self.visit(node, *args, **kwargs) + if not isinstance(rv, list): + rv = [rv] + return rv diff --git a/pyload/lib/markupsafe/__init__.py b/pyload/lib/markupsafe/__init__.py new file mode 100644 index 000000000..275540154 --- /dev/null +++ b/pyload/lib/markupsafe/__init__.py @@ -0,0 +1,298 @@ +# -*- coding: utf-8 -*- +""" + markupsafe + ~~~~~~~~~~ + + Implements a Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +import re +import string +from collections import Mapping +from markupsafe._compat import text_type, string_types, int_types, \ + unichr, iteritems, PY2 + + +__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] + + +_striptags_re = re.compile(r'(|<[^>]*>)') +_entity_re = re.compile(r'&([^;]+);') + + +class Markup(text_type): + r"""Marks a string as being safe for inclusion in HTML/XML output without + needing to be escaped. This implements the `__html__` interface a couple + of frameworks and web applications use. :class:`Markup` is a direct + subclass of `unicode` and provides all the methods of `unicode` just that + it escapes arguments passed and always returns `Markup`. + + The `escape` function returns markup objects so that double escaping can't + happen. + + The constructor of the :class:`Markup` class can be used for three + different things: When passed an unicode object it's assumed to be safe, + when passed an object with an HTML representation (has an `__html__` + method) that representation is used, otherwise the object passed is + converted into a unicode string and then assumed to be safe: + + >>> Markup("Hello World!") + Markup(u'Hello World!') + >>> class Foo(object): + ... def __html__(self): + ... return 'foo' + ... + >>> Markup(Foo()) + Markup(u'foo') + + If you want object passed being always treated as unsafe you can use the + :meth:`escape` classmethod to create a :class:`Markup` object: + + >>> Markup.escape("Hello World!") + Markup(u'Hello <em>World</em>!') + + Operations on a markup string are markup aware which means that all + arguments are passed through the :func:`escape` function: + + >>> em = Markup("%s") + >>> em % "foo & bar" + Markup(u'foo & bar') + >>> strong = Markup("%(text)s") + >>> strong % {'text': 'hacker here'} + Markup(u'<blink>hacker here</blink>') + >>> Markup("Hello ") + "" + Markup(u'Hello <foo>') + """ + __slots__ = () + + def __new__(cls, base=u'', encoding=None, errors='strict'): + if hasattr(base, '__html__'): + base = base.__html__() + if encoding is None: + return text_type.__new__(cls, base) + return text_type.__new__(cls, base, encoding, errors) + + def __html__(self): + return self + + def __add__(self, other): + if isinstance(other, string_types) or hasattr(other, '__html__'): + return self.__class__(super(Markup, self).__add__(self.escape(other))) + return NotImplemented + + def __radd__(self, other): + if hasattr(other, '__html__') or isinstance(other, string_types): + return self.escape(other).__add__(self) + return NotImplemented + + def __mul__(self, num): + if isinstance(num, int_types): + return self.__class__(text_type.__mul__(self, num)) + return NotImplemented + __rmul__ = __mul__ + + def __mod__(self, arg): + if isinstance(arg, tuple): + arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg) + else: + arg = _MarkupEscapeHelper(arg, self.escape) + return self.__class__(text_type.__mod__(self, arg)) + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, + text_type.__repr__(self) + ) + + def join(self, seq): + return self.__class__(text_type.join(self, map(self.escape, seq))) + join.__doc__ = text_type.join.__doc__ + + def split(self, *args, **kwargs): + return list(map(self.__class__, text_type.split(self, *args, **kwargs))) + split.__doc__ = text_type.split.__doc__ + + def rsplit(self, *args, **kwargs): + return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs))) + rsplit.__doc__ = text_type.rsplit.__doc__ + + def splitlines(self, *args, **kwargs): + return list(map(self.__class__, text_type.splitlines( + self, *args, **kwargs))) + splitlines.__doc__ = text_type.splitlines.__doc__ + + def unescape(self): + r"""Unescape markup again into an text_type string. This also resolves + known HTML4 and XHTML entities: + + >>> Markup("Main » About").unescape() + u'Main \xbb About' + """ + from markupsafe._constants import HTML_ENTITIES + def handle_match(m): + name = m.group(1) + if name in HTML_ENTITIES: + return unichr(HTML_ENTITIES[name]) + try: + if name[:2] in ('#x', '#X'): + return unichr(int(name[2:], 16)) + elif name.startswith('#'): + return unichr(int(name[1:])) + except ValueError: + pass + return u'' + return _entity_re.sub(handle_match, text_type(self)) + + def striptags(self): + r"""Unescape markup into an text_type string and strip all tags. This + also resolves known HTML4 and XHTML entities. Whitespace is + normalized to one: + + >>> Markup("Main » About").striptags() + u'Main \xbb About' + """ + stripped = u' '.join(_striptags_re.sub('', self).split()) + return Markup(stripped).unescape() + + @classmethod + def escape(cls, s): + """Escape the string. Works like :func:`escape` with the difference + that for subclasses of :class:`Markup` this function would return the + correct subclass. + """ + rv = escape(s) + if rv.__class__ is not cls: + return cls(rv) + return rv + + def make_simple_escaping_wrapper(name): + orig = getattr(text_type, name) + def func(self, *args, **kwargs): + args = _escape_argspec(list(args), enumerate(args), self.escape) + _escape_argspec(kwargs, iteritems(kwargs), self.escape) + return self.__class__(orig(self, *args, **kwargs)) + func.__name__ = orig.__name__ + func.__doc__ = orig.__doc__ + return func + + for method in '__getitem__', 'capitalize', \ + 'title', 'lower', 'upper', 'replace', 'ljust', \ + 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ + 'translate', 'expandtabs', 'swapcase', 'zfill': + locals()[method] = make_simple_escaping_wrapper(method) + + # new in python 2.5 + if hasattr(text_type, 'partition'): + def partition(self, sep): + return tuple(map(self.__class__, + text_type.partition(self, self.escape(sep)))) + def rpartition(self, sep): + return tuple(map(self.__class__, + text_type.rpartition(self, self.escape(sep)))) + + # new in python 2.6 + if hasattr(text_type, 'format'): + def format(*args, **kwargs): + self, args = args[0], args[1:] + formatter = EscapeFormatter(self.escape) + kwargs = _MagicFormatMapping(args, kwargs) + return self.__class__(formatter.vformat(self, args, kwargs)) + + def __html_format__(self, format_spec): + if format_spec: + raise ValueError('Unsupported format specification ' + 'for Markup.') + return self + + # not in python 3 + if hasattr(text_type, '__getslice__'): + __getslice__ = make_simple_escaping_wrapper('__getslice__') + + del method, make_simple_escaping_wrapper + + +class _MagicFormatMapping(Mapping): + """This class implements a dummy wrapper to fix a bug in the Python + standard library for string formatting. + + See http://bugs.python.org/issue13598 for information about why + this is necessary. + """ + + def __init__(self, args, kwargs): + self._args = args + self._kwargs = kwargs + self._last_index = 0 + + def __getitem__(self, key): + if key == '': + idx = self._last_index + self._last_index += 1 + try: + return self._args[idx] + except LookupError: + pass + key = str(idx) + return self._kwargs[key] + + def __iter__(self): + return iter(self._kwargs) + + def __len__(self): + return len(self._kwargs) + + +if hasattr(text_type, 'format'): + class EscapeFormatter(string.Formatter): + + def __init__(self, escape): + self.escape = escape + + def format_field(self, value, format_spec): + if hasattr(value, '__html_format__'): + rv = value.__html_format__(format_spec) + elif hasattr(value, '__html__'): + if format_spec: + raise ValueError('No format specification allowed ' + 'when formatting an object with ' + 'its __html__ method.') + rv = value.__html__() + else: + rv = string.Formatter.format_field(self, value, format_spec) + return text_type(self.escape(rv)) + + +def _escape_argspec(obj, iterable, escape): + """Helper for various string-wrapped functions.""" + for key, value in iterable: + if hasattr(value, '__html__') or isinstance(value, string_types): + obj[key] = escape(value) + return obj + + +class _MarkupEscapeHelper(object): + """Helper for Markup.__mod__""" + + def __init__(self, obj, escape): + self.obj = obj + self.escape = escape + + __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape) + __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj)) + __repr__ = lambda s: str(s.escape(repr(s.obj))) + __int__ = lambda s: int(s.obj) + __float__ = lambda s: float(s.obj) + + +# we have to import it down here as the speedups and native +# modules imports the markup type which is define above. +try: + from markupsafe._speedups import escape, escape_silent, soft_unicode +except ImportError: + from markupsafe._native import escape, escape_silent, soft_unicode + +if not PY2: + soft_str = soft_unicode + __all__.append('soft_str') diff --git a/pyload/lib/markupsafe/_compat.py b/pyload/lib/markupsafe/_compat.py new file mode 100644 index 000000000..62e5632ad --- /dev/null +++ b/pyload/lib/markupsafe/_compat.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._compat + ~~~~~~~~~~~~~~~~~~ + + Compatibility module for different Python versions. + + :copyright: (c) 2013 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +import sys + +PY2 = sys.version_info[0] == 2 + +if not PY2: + text_type = str + string_types = (str,) + unichr = chr + int_types = (int,) + iteritems = lambda x: iter(x.items()) +else: + text_type = unicode + string_types = (str, unicode) + unichr = unichr + int_types = (int, long) + iteritems = lambda x: x.iteritems() diff --git a/pyload/lib/markupsafe/_constants.py b/pyload/lib/markupsafe/_constants.py new file mode 100644 index 000000000..919bf03c5 --- /dev/null +++ b/pyload/lib/markupsafe/_constants.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._constants + ~~~~~~~~~~~~~~~~~~~~~ + + Highlevel implementation of the Markup string. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" + + +HTML_ENTITIES = { + 'AElig': 198, + 'Aacute': 193, + 'Acirc': 194, + 'Agrave': 192, + 'Alpha': 913, + 'Aring': 197, + 'Atilde': 195, + 'Auml': 196, + 'Beta': 914, + 'Ccedil': 199, + 'Chi': 935, + 'Dagger': 8225, + 'Delta': 916, + 'ETH': 208, + 'Eacute': 201, + 'Ecirc': 202, + 'Egrave': 200, + 'Epsilon': 917, + 'Eta': 919, + 'Euml': 203, + 'Gamma': 915, + 'Iacute': 205, + 'Icirc': 206, + 'Igrave': 204, + 'Iota': 921, + 'Iuml': 207, + 'Kappa': 922, + 'Lambda': 923, + 'Mu': 924, + 'Ntilde': 209, + 'Nu': 925, + 'OElig': 338, + 'Oacute': 211, + 'Ocirc': 212, + 'Ograve': 210, + 'Omega': 937, + 'Omicron': 927, + 'Oslash': 216, + 'Otilde': 213, + 'Ouml': 214, + 'Phi': 934, + 'Pi': 928, + 'Prime': 8243, + 'Psi': 936, + 'Rho': 929, + 'Scaron': 352, + 'Sigma': 931, + 'THORN': 222, + 'Tau': 932, + 'Theta': 920, + 'Uacute': 218, + 'Ucirc': 219, + 'Ugrave': 217, + 'Upsilon': 933, + 'Uuml': 220, + 'Xi': 926, + 'Yacute': 221, + 'Yuml': 376, + 'Zeta': 918, + 'aacute': 225, + 'acirc': 226, + 'acute': 180, + 'aelig': 230, + 'agrave': 224, + 'alefsym': 8501, + 'alpha': 945, + 'amp': 38, + 'and': 8743, + 'ang': 8736, + 'apos': 39, + 'aring': 229, + 'asymp': 8776, + 'atilde': 227, + 'auml': 228, + 'bdquo': 8222, + 'beta': 946, + 'brvbar': 166, + 'bull': 8226, + 'cap': 8745, + 'ccedil': 231, + 'cedil': 184, + 'cent': 162, + 'chi': 967, + 'circ': 710, + 'clubs': 9827, + 'cong': 8773, + 'copy': 169, + 'crarr': 8629, + 'cup': 8746, + 'curren': 164, + 'dArr': 8659, + 'dagger': 8224, + 'darr': 8595, + 'deg': 176, + 'delta': 948, + 'diams': 9830, + 'divide': 247, + 'eacute': 233, + 'ecirc': 234, + 'egrave': 232, + 'empty': 8709, + 'emsp': 8195, + 'ensp': 8194, + 'epsilon': 949, + 'equiv': 8801, + 'eta': 951, + 'eth': 240, + 'euml': 235, + 'euro': 8364, + 'exist': 8707, + 'fnof': 402, + 'forall': 8704, + 'frac12': 189, + 'frac14': 188, + 'frac34': 190, + 'frasl': 8260, + 'gamma': 947, + 'ge': 8805, + 'gt': 62, + 'hArr': 8660, + 'harr': 8596, + 'hearts': 9829, + 'hellip': 8230, + 'iacute': 237, + 'icirc': 238, + 'iexcl': 161, + 'igrave': 236, + 'image': 8465, + 'infin': 8734, + 'int': 8747, + 'iota': 953, + 'iquest': 191, + 'isin': 8712, + 'iuml': 239, + 'kappa': 954, + 'lArr': 8656, + 'lambda': 955, + 'lang': 9001, + 'laquo': 171, + 'larr': 8592, + 'lceil': 8968, + 'ldquo': 8220, + 'le': 8804, + 'lfloor': 8970, + 'lowast': 8727, + 'loz': 9674, + 'lrm': 8206, + 'lsaquo': 8249, + 'lsquo': 8216, + 'lt': 60, + 'macr': 175, + 'mdash': 8212, + 'micro': 181, + 'middot': 183, + 'minus': 8722, + 'mu': 956, + 'nabla': 8711, + 'nbsp': 160, + 'ndash': 8211, + 'ne': 8800, + 'ni': 8715, + 'not': 172, + 'notin': 8713, + 'nsub': 8836, + 'ntilde': 241, + 'nu': 957, + 'oacute': 243, + 'ocirc': 244, + 'oelig': 339, + 'ograve': 242, + 'oline': 8254, + 'omega': 969, + 'omicron': 959, + 'oplus': 8853, + 'or': 8744, + 'ordf': 170, + 'ordm': 186, + 'oslash': 248, + 'otilde': 245, + 'otimes': 8855, + 'ouml': 246, + 'para': 182, + 'part': 8706, + 'permil': 8240, + 'perp': 8869, + 'phi': 966, + 'pi': 960, + 'piv': 982, + 'plusmn': 177, + 'pound': 163, + 'prime': 8242, + 'prod': 8719, + 'prop': 8733, + 'psi': 968, + 'quot': 34, + 'rArr': 8658, + 'radic': 8730, + 'rang': 9002, + 'raquo': 187, + 'rarr': 8594, + 'rceil': 8969, + 'rdquo': 8221, + 'real': 8476, + 'reg': 174, + 'rfloor': 8971, + 'rho': 961, + 'rlm': 8207, + 'rsaquo': 8250, + 'rsquo': 8217, + 'sbquo': 8218, + 'scaron': 353, + 'sdot': 8901, + 'sect': 167, + 'shy': 173, + 'sigma': 963, + 'sigmaf': 962, + 'sim': 8764, + 'spades': 9824, + 'sub': 8834, + 'sube': 8838, + 'sum': 8721, + 'sup': 8835, + 'sup1': 185, + 'sup2': 178, + 'sup3': 179, + 'supe': 8839, + 'szlig': 223, + 'tau': 964, + 'there4': 8756, + 'theta': 952, + 'thetasym': 977, + 'thinsp': 8201, + 'thorn': 254, + 'tilde': 732, + 'times': 215, + 'trade': 8482, + 'uArr': 8657, + 'uacute': 250, + 'uarr': 8593, + 'ucirc': 251, + 'ugrave': 249, + 'uml': 168, + 'upsih': 978, + 'upsilon': 965, + 'uuml': 252, + 'weierp': 8472, + 'xi': 958, + 'yacute': 253, + 'yen': 165, + 'yuml': 255, + 'zeta': 950, + 'zwj': 8205, + 'zwnj': 8204 +} diff --git a/pyload/lib/markupsafe/_native.py b/pyload/lib/markupsafe/_native.py new file mode 100644 index 000000000..5e83f10a1 --- /dev/null +++ b/pyload/lib/markupsafe/_native.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +""" + markupsafe._native + ~~~~~~~~~~~~~~~~~~ + + Native Python implementation the C module is not compiled. + + :copyright: (c) 2010 by Armin Ronacher. + :license: BSD, see LICENSE for more details. +""" +from markupsafe import Markup +from markupsafe._compat import text_type + + +def escape(s): + """Convert the characters &, <, >, ' and " in string s to HTML-safe + sequences. Use this if you need to display text that might contain + such characters in HTML. Marks return value as markup string. + """ + if hasattr(s, '__html__'): + return s.__html__() + return Markup(text_type(s) + .replace('&', '&') + .replace('>', '>') + .replace('<', '<') + .replace("'", ''') + .replace('"', '"') + ) + + +def escape_silent(s): + """Like :func:`escape` but converts `None` into an empty + markup string. + """ + if s is None: + return Markup() + return escape(s) + + +def soft_unicode(s): + """Make a string unicode if it isn't already. That way a markup + string is not converted back to unicode. + """ + if not isinstance(s, text_type): + s = text_type(s) + return s diff --git a/pyload/lib/markupsafe/_speedups.c b/pyload/lib/markupsafe/_speedups.c new file mode 100644 index 000000000..f349febf2 --- /dev/null +++ b/pyload/lib/markupsafe/_speedups.c @@ -0,0 +1,239 @@ +/** + * markupsafe._speedups + * ~~~~~~~~~~~~~~~~~~~~ + * + * This module implements functions for automatic escaping in C for better + * performance. + * + * :copyright: (c) 2010 by Armin Ronacher. + * :license: BSD. + */ + +#include + +#define ESCAPED_CHARS_TABLE_SIZE 63 +#define UNICHR(x) (PyUnicode_AS_UNICODE((PyUnicodeObject*)PyUnicode_DecodeASCII(x, strlen(x), NULL))); + +#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) +typedef int Py_ssize_t; +#define PY_SSIZE_T_MAX INT_MAX +#define PY_SSIZE_T_MIN INT_MIN +#endif + + +static PyObject* markup; +static Py_ssize_t escaped_chars_delta_len[ESCAPED_CHARS_TABLE_SIZE]; +static Py_UNICODE *escaped_chars_repl[ESCAPED_CHARS_TABLE_SIZE]; + +static int +init_constants(void) +{ + PyObject *module; + /* happing of characters to replace */ + escaped_chars_repl['"'] = UNICHR("""); + escaped_chars_repl['\''] = UNICHR("'"); + escaped_chars_repl['&'] = UNICHR("&"); + escaped_chars_repl['<'] = UNICHR("<"); + escaped_chars_repl['>'] = UNICHR(">"); + + /* lengths of those characters when replaced - 1 */ + memset(escaped_chars_delta_len, 0, sizeof (escaped_chars_delta_len)); + escaped_chars_delta_len['"'] = escaped_chars_delta_len['\''] = \ + escaped_chars_delta_len['&'] = 4; + escaped_chars_delta_len['<'] = escaped_chars_delta_len['>'] = 3; + + /* import markup type so that we can mark the return value */ + module = PyImport_ImportModule("markupsafe"); + if (!module) + return 0; + markup = PyObject_GetAttrString(module, "Markup"); + Py_DECREF(module); + + return 1; +} + +static PyObject* +escape_unicode(PyUnicodeObject *in) +{ + PyUnicodeObject *out; + Py_UNICODE *inp = PyUnicode_AS_UNICODE(in); + const Py_UNICODE *inp_end = PyUnicode_AS_UNICODE(in) + PyUnicode_GET_SIZE(in); + Py_UNICODE *next_escp; + Py_UNICODE *outp; + Py_ssize_t delta=0, erepl=0, delta_len=0; + + /* First we need to figure out how long the escaped string will be */ + while (*(inp) || inp < inp_end) { + if (*inp < ESCAPED_CHARS_TABLE_SIZE) { + delta += escaped_chars_delta_len[*inp]; + erepl += !!escaped_chars_delta_len[*inp]; + } + ++inp; + } + + /* Do we need to escape anything at all? */ + if (!erepl) { + Py_INCREF(in); + return (PyObject*)in; + } + + out = (PyUnicodeObject*)PyUnicode_FromUnicode(NULL, PyUnicode_GET_SIZE(in) + delta); + if (!out) + return NULL; + + outp = PyUnicode_AS_UNICODE(out); + inp = PyUnicode_AS_UNICODE(in); + while (erepl-- > 0) { + /* look for the next substitution */ + next_escp = inp; + while (next_escp < inp_end) { + if (*next_escp < ESCAPED_CHARS_TABLE_SIZE && + (delta_len = escaped_chars_delta_len[*next_escp])) { + ++delta_len; + break; + } + ++next_escp; + } + + if (next_escp > inp) { + /* copy unescaped chars between inp and next_escp */ + Py_UNICODE_COPY(outp, inp, next_escp-inp); + outp += next_escp - inp; + } + + /* escape 'next_escp' */ + Py_UNICODE_COPY(outp, escaped_chars_repl[*next_escp], delta_len); + outp += delta_len; + + inp = next_escp + 1; + } + if (inp < inp_end) + Py_UNICODE_COPY(outp, inp, PyUnicode_GET_SIZE(in) - (inp - PyUnicode_AS_UNICODE(in))); + + return (PyObject*)out; +} + + +static PyObject* +escape(PyObject *self, PyObject *text) +{ + PyObject *s = NULL, *rv = NULL, *html; + + /* we don't have to escape integers, bools or floats */ + if (PyLong_CheckExact(text) || +#if PY_MAJOR_VERSION < 3 + PyInt_CheckExact(text) || +#endif + PyFloat_CheckExact(text) || PyBool_Check(text) || + text == Py_None) + return PyObject_CallFunctionObjArgs(markup, text, NULL); + + /* if the object has an __html__ method that performs the escaping */ + html = PyObject_GetAttrString(text, "__html__"); + if (html) { + rv = PyObject_CallObject(html, NULL); + Py_DECREF(html); + return rv; + } + + /* otherwise make the object unicode if it isn't, then escape */ + PyErr_Clear(); + if (!PyUnicode_Check(text)) { +#if PY_MAJOR_VERSION < 3 + PyObject *unicode = PyObject_Unicode(text); +#else + PyObject *unicode = PyObject_Str(text); +#endif + if (!unicode) + return NULL; + s = escape_unicode((PyUnicodeObject*)unicode); + Py_DECREF(unicode); + } + else + s = escape_unicode((PyUnicodeObject*)text); + + /* convert the unicode string into a markup object. */ + rv = PyObject_CallFunctionObjArgs(markup, (PyObject*)s, NULL); + Py_DECREF(s); + return rv; +} + + +static PyObject* +escape_silent(PyObject *self, PyObject *text) +{ + if (text != Py_None) + return escape(self, text); + return PyObject_CallFunctionObjArgs(markup, NULL); +} + + +static PyObject* +soft_unicode(PyObject *self, PyObject *s) +{ + if (!PyUnicode_Check(s)) +#if PY_MAJOR_VERSION < 3 + return PyObject_Unicode(s); +#else + return PyObject_Str(s); +#endif + Py_INCREF(s); + return s; +} + + +static PyMethodDef module_methods[] = { + {"escape", (PyCFunction)escape, METH_O, + "escape(s) -> markup\n\n" + "Convert the characters &, <, >, ', and \" in string s to HTML-safe\n" + "sequences. Use this if you need to display text that might contain\n" + "such characters in HTML. Marks return value as markup string."}, + {"escape_silent", (PyCFunction)escape_silent, METH_O, + "escape_silent(s) -> markup\n\n" + "Like escape but converts None to an empty string."}, + {"soft_unicode", (PyCFunction)soft_unicode, METH_O, + "soft_unicode(object) -> string\n\n" + "Make a string unicode if it isn't already. That way a markup\n" + "string is not converted back to unicode."}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + +#if PY_MAJOR_VERSION < 3 + +#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */ +#define PyMODINIT_FUNC void +#endif +PyMODINIT_FUNC +init_speedups(void) +{ + if (!init_constants()) + return; + + Py_InitModule3("markupsafe._speedups", module_methods, ""); +} + +#else /* Python 3.x module initialization */ + +static struct PyModuleDef module_definition = { + PyModuleDef_HEAD_INIT, + "markupsafe._speedups", + NULL, + -1, + module_methods, + NULL, + NULL, + NULL, + NULL +}; + +PyMODINIT_FUNC +PyInit__speedups(void) +{ + if (!init_constants()) + return NULL; + + return PyModule_Create(&module_definition); +} + +#endif diff --git a/pyload/lib/markupsafe/tests.py b/pyload/lib/markupsafe/tests.py new file mode 100644 index 000000000..636993629 --- /dev/null +++ b/pyload/lib/markupsafe/tests.py @@ -0,0 +1,179 @@ +# -*- coding: utf-8 -*- +import gc +import sys +import unittest +from markupsafe import Markup, escape, escape_silent +from markupsafe._compat import text_type + + +class MarkupTestCase(unittest.TestCase): + + def test_adding(self): + # adding two strings should escape the unsafe one + unsafe = '' + safe = Markup('username') + assert unsafe + safe == text_type(escape(unsafe)) + text_type(safe) + + def test_string_interpolation(self): + # string interpolations are safe to use too + assert Markup('%s') % '' == \ + '<bad user>' + assert Markup('%(username)s') % { + 'username': '' + } == '<bad user>' + + assert Markup('%i') % 3.14 == '3' + assert Markup('%.2f') % 3.14 == '3.14' + + def test_type_behavior(self): + # an escaped object is markup too + assert type(Markup('foo') + 'bar') is Markup + + # and it implements __html__ by returning itself + x = Markup("foo") + assert x.__html__() is x + + def test_html_interop(self): + # it also knows how to treat __html__ objects + class Foo(object): + def __html__(self): + return 'awesome' + def __unicode__(self): + return 'awesome' + __str__ = __unicode__ + assert Markup(Foo()) == 'awesome' + assert Markup('%s') % Foo() == \ + 'awesome' + + def test_tuple_interpol(self): + self.assertEqual(Markup('%s:%s') % ( + '', + '', + ), Markup(u'<foo>:<bar>')) + + def test_dict_interpol(self): + self.assertEqual(Markup('%(foo)s') % { + 'foo': '', + }, Markup(u'<foo>')) + self.assertEqual(Markup('%(foo)s:%(bar)s') % { + 'foo': '', + 'bar': '', + }, Markup(u'<foo>:<bar>')) + + def test_escaping(self): + # escaping and unescaping + assert escape('"<>&\'') == '"<>&'' + assert Markup("Foo & Bar").striptags() == "Foo & Bar" + assert Markup("<test>").unescape() == "" + + def test_formatting(self): + for actual, expected in ( + (Markup('%i') % 3.14, '3'), + (Markup('%.2f') % 3.14159, '3.14'), + (Markup('%s %s %s') % ('<', 123, '>'), '< 123 >'), + (Markup('{awesome}').format(awesome=''), + '<awesome>'), + (Markup('{0[1][bar]}').format([0, {'bar': ''}]), + '<bar/>'), + (Markup('{0[1][bar]}').format([0, {'bar': Markup('')}]), + '')): + assert actual == expected, "%r should be %r!" % (actual, expected) + + # This is new in 2.7 + if sys.version_info >= (2, 7): + def test_formatting_empty(self): + formatted = Markup('{}').format(0) + assert formatted == Markup('0') + + def test_custom_formatting(self): + class HasHTMLOnly(object): + def __html__(self): + return Markup('') + + class HasHTMLAndFormat(object): + def __html__(self): + return Markup('') + def __html_format__(self, spec): + return Markup('') + + assert Markup('{0}').format(HasHTMLOnly()) == Markup('') + assert Markup('{0}').format(HasHTMLAndFormat()) == Markup('') + + def test_complex_custom_formatting(self): + class User(object): + def __init__(self, id, username): + self.id = id + self.username = username + def __html_format__(self, format_spec): + if format_spec == 'link': + return Markup('{1}').format( + self.id, + self.__html__(), + ) + elif format_spec: + raise ValueError('Invalid format spec') + return self.__html__() + def __html__(self): + return Markup('{0}').format(self.username) + + user = User(1, 'foo') + assert Markup('

    User: {0:link}').format(user) == \ + Markup('

    User: foo') + + def test_all_set(self): + import markupsafe as markup + for item in markup.__all__: + getattr(markup, item) + + def test_escape_silent(self): + assert escape_silent(None) == Markup() + assert escape(None) == Markup(None) + assert escape_silent('') == Markup(u'<foo>') + + def test_splitting(self): + self.assertEqual(Markup('a b').split(), [ + Markup('a'), + Markup('b') + ]) + self.assertEqual(Markup('a b').rsplit(), [ + Markup('a'), + Markup('b') + ]) + self.assertEqual(Markup('a\nb').splitlines(), [ + Markup('a'), + Markup('b') + ]) + + def test_mul(self): + self.assertEqual(Markup('a') * 3, Markup('aaa')) + + +class MarkupLeakTestCase(unittest.TestCase): + + def test_markup_leaks(self): + counts = set() + for count in range(20): + for item in range(1000): + escape("foo") + escape("") + escape(u"foo") + escape(u"") + counts.add(len(gc.get_objects())) + assert len(counts) == 1, 'ouch, c extension seems to leak objects' + + +def suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(MarkupTestCase)) + + # this test only tests the c extension + if not hasattr(escape, 'func_code'): + suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) + + return suite + + +if __name__ == '__main__': + unittest.main(defaultTest='suite') + +# vim:sts=4:sw=4:et: diff --git a/pyload/lib/rename_process.py b/pyload/lib/rename_process.py new file mode 100644 index 000000000..2527cef39 --- /dev/null +++ b/pyload/lib/rename_process.py @@ -0,0 +1,14 @@ +import sys + +def renameProcess(new_name): + """ Renames the process calling the function to the given name. """ + if sys.platform != 'linux2': + return False + try: + from ctypes import CDLL + libc = CDLL('libc.so.6') + libc.prctl(15, new_name, 0, 0, 0) + return True + except Exception, e: + #print "Rename process failed", e + return False diff --git a/pyload/lib/simplejson/__init__.py b/pyload/lib/simplejson/__init__.py new file mode 100644 index 000000000..a5c01379a --- /dev/null +++ b/pyload/lib/simplejson/__init__.py @@ -0,0 +1,560 @@ +r"""JSON (JavaScript Object Notation) is a subset of +JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data +interchange format. + +:mod:`simplejson` exposes an API familiar to users of the standard library +:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained +version of the :mod:`json` library contained in Python 2.6, but maintains +compatibility with Python 2.4 and Python 2.5 and (currently) has +significant performance advantages, even without using the optional C +extension for speedups. + +Encoding basic Python object hierarchies:: + + >>> import simplejson as json + >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) + '["foo", {"bar": ["baz", null, 1.0, 2]}]' + >>> print(json.dumps("\"foo\bar")) + "\"foo\bar" + >>> print(json.dumps(u'\u1234')) + "\u1234" + >>> print(json.dumps('\\')) + "\\" + >>> print(json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)) + {"a": 0, "b": 0, "c": 0} + >>> from simplejson.compat import StringIO + >>> io = StringIO() + >>> json.dump(['streaming API'], io) + >>> io.getvalue() + '["streaming API"]' + +Compact encoding:: + + >>> import simplejson as json + >>> obj = [1,2,3,{'4': 5, '6': 7}] + >>> json.dumps(obj, separators=(',',':'), sort_keys=True) + '[1,2,3,{"4":5,"6":7}]' + +Pretty printing:: + + >>> import simplejson as json + >>> print(json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ')) + { + "4": 5, + "6": 7 + } + +Decoding JSON:: + + >>> import simplejson as json + >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] + >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj + True + >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' + True + >>> from simplejson.compat import StringIO + >>> io = StringIO('["streaming API"]') + >>> json.load(io)[0] == 'streaming API' + True + +Specializing JSON object decoding:: + + >>> import simplejson as json + >>> def as_complex(dct): + ... if '__complex__' in dct: + ... return complex(dct['real'], dct['imag']) + ... return dct + ... + >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', + ... object_hook=as_complex) + (1+2j) + >>> from decimal import Decimal + >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') + True + +Specializing JSON object encoding:: + + >>> import simplejson as json + >>> def encode_complex(obj): + ... if isinstance(obj, complex): + ... return [obj.real, obj.imag] + ... raise TypeError(repr(o) + " is not JSON serializable") + ... + >>> json.dumps(2 + 1j, default=encode_complex) + '[2.0, 1.0]' + >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) + '[2.0, 1.0]' + >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) + '[2.0, 1.0]' + + +Using simplejson.tool from the shell to validate and pretty-print:: + + $ echo '{"json":"obj"}' | python -m simplejson.tool + { + "json": "obj" + } + $ echo '{ 1.2:3.4}' | python -m simplejson.tool + Expecting property name: line 1 column 3 (char 2) +""" +from __future__ import absolute_import +__version__ = '3.5.3' +__all__ = [ + 'dump', 'dumps', 'load', 'loads', + 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', + 'OrderedDict', 'simple_first', +] + +__author__ = 'Bob Ippolito ' + +from decimal import Decimal + +from .scanner import JSONDecodeError +from .decoder import JSONDecoder +from .encoder import JSONEncoder, JSONEncoderForHTML +def _import_OrderedDict(): + import collections + try: + return collections.OrderedDict + except AttributeError: + from . import ordered_dict + return ordered_dict.OrderedDict +OrderedDict = _import_OrderedDict() + +def _import_c_make_encoder(): + try: + from ._speedups import make_encoder + return make_encoder + except ImportError: + return None + +_default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + use_decimal=True, + namedtuple_as_object=True, + tuple_as_array=True, + bigint_as_string=False, + item_sort_key=None, + for_json=False, + ignore_nan=False, + int_as_string_bitcount=None, +) + +def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): + """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a + ``.write()``-supporting file-like object). + + If *skipkeys* is true then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If *ensure_ascii* is false, then the some chunks written to ``fp`` + may be ``unicode`` instances, subject to normal Python ``str`` to + ``unicode`` coercion rules. Unless ``fp.write()`` explicitly + understands ``unicode`` (as in ``codecs.getwriter()``) this is likely + to cause an error. + + If *check_circular* is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If *allow_nan* is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) + in strict compliance of the original JSON specification, instead of using + the JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). See + *ignore_nan* for ECMA-262 compliant behavior. + + If *indent* is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, *separators* should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + *encoding* is the character encoding for str instances, default is UTF-8. + + *default(obj)* is a function that should return a serializable version + of obj or raise ``TypeError``. The default simply raises ``TypeError``. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (default: ``False``), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. Note that this is still a + lossy operation that will not round-trip correctly and should be used + sparingly. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precedence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* or *for_json* instead + of subclassing whenever possible. + + """ + # cached encoder + if (not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and int_as_string_bitcount is None + and not item_sort_key and not for_json and not ignore_nan and not kw): + iterable = _default_encoder.iterencode(obj) + else: + if cls is None: + cls = JSONEncoder + iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, + default=default, use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).iterencode(obj) + # could accelerate with writelines in some versions of Python, at + # a debuggability cost + for chunk in iterable: + fp.write(chunk) + + +def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, + allow_nan=True, cls=None, indent=None, separators=None, + encoding='utf-8', default=None, use_decimal=True, + namedtuple_as_object=True, tuple_as_array=True, + bigint_as_string=False, sort_keys=False, item_sort_key=None, + for_json=False, ignore_nan=False, int_as_string_bitcount=None, **kw): + """Serialize ``obj`` to a JSON formatted ``str``. + + If ``skipkeys`` is false then ``dict`` keys that are not basic types + (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) + will be skipped instead of raising a ``TypeError``. + + If ``ensure_ascii`` is false, then the return value will be a + ``unicode`` instance subject to normal Python ``str`` to ``unicode`` + coercion rules instead of being escaped to an ASCII ``str``. + + If ``check_circular`` is false, then the circular reference check + for container types will be skipped and a circular reference will + result in an ``OverflowError`` (or worse). + + If ``allow_nan`` is false, then it will be a ``ValueError`` to + serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in + strict compliance of the JSON specification, instead of using the + JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). + + If ``indent`` is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, ``separators`` should be an + ``(item_separator, key_separator)`` tuple. The default is ``(', ', ': ')`` + if *indent* is ``None`` and ``(',', ': ')`` otherwise. To get the most + compact JSON representation, you should specify ``(',', ':')`` to eliminate + whitespace. + + ``encoding`` is the character encoding for str instances, default is UTF-8. + + ``default(obj)`` is a function that should return a serializable version + of obj or raise TypeError. The default simply raises TypeError. + + If *use_decimal* is true (default: ``True``) then decimal.Decimal + will be natively serialized to JSON with full precision. + + If *namedtuple_as_object* is true (default: ``True``), + :class:`tuple` subclasses with ``_asdict()`` methods will be encoded + as JSON objects. + + If *tuple_as_array* is true (default: ``True``), + :class:`tuple` (and subclasses) will be encoded as JSON arrays. + + If *bigint_as_string* is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If *int_as_string_bitcount* is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, *item_sort_key* is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. This option takes precendence over + *sort_keys*. + + If *sort_keys* is true (default: ``False``), the output of dictionaries + will be sorted by item. + + If *for_json* is true (default: ``False``), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized as + ``null`` in compliance with the ECMA-262 specification. If true, this will + override *allow_nan*. + + To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the + ``.default()`` method to serialize additional types), specify it with + the ``cls`` kwarg. NOTE: You should use *default* instead of subclassing + whenever possible. + + """ + # cached encoder + if ( + not skipkeys and ensure_ascii and + check_circular and allow_nan and + cls is None and indent is None and separators is None and + encoding == 'utf-8' and default is None and use_decimal + and namedtuple_as_object and tuple_as_array + and not bigint_as_string and int_as_string_bitcount is None + and not sort_keys and not item_sort_key and not for_json + and not ignore_nan and not kw + ): + return _default_encoder.encode(obj) + if cls is None: + cls = JSONEncoder + return cls( + skipkeys=skipkeys, ensure_ascii=ensure_ascii, + check_circular=check_circular, allow_nan=allow_nan, indent=indent, + separators=separators, encoding=encoding, default=default, + use_decimal=use_decimal, + namedtuple_as_object=namedtuple_as_object, + tuple_as_array=tuple_as_array, + bigint_as_string=bigint_as_string, + sort_keys=sort_keys, + item_sort_key=item_sort_key, + for_json=for_json, + ignore_nan=ignore_nan, + int_as_string_bitcount=int_as_string_bitcount, + **kw).encode(obj) + + +_default_decoder = JSONDecoder(encoding=None, object_hook=None, + object_pairs_hook=None) + + +def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, namedtuple_as_object=True, tuple_as_array=True, + **kw): + """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing + a JSON document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + return loads(fp.read(), + encoding=encoding, cls=cls, object_hook=object_hook, + parse_float=parse_float, parse_int=parse_int, + parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, + use_decimal=use_decimal, **kw) + + +def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, object_pairs_hook=None, + use_decimal=False, **kw): + """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON + document) to a Python object. + + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + If *use_decimal* is true (default: ``False``) then it implies + parse_float=decimal.Decimal for parity with ``dump``. + + To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` + kwarg. NOTE: You should use *object_hook* or *object_pairs_hook* instead + of subclassing whenever possible. + + """ + if (cls is None and encoding is None and object_hook is None and + parse_int is None and parse_float is None and + parse_constant is None and object_pairs_hook is None + and not use_decimal and not kw): + return _default_decoder.decode(s) + if cls is None: + cls = JSONDecoder + if object_hook is not None: + kw['object_hook'] = object_hook + if object_pairs_hook is not None: + kw['object_pairs_hook'] = object_pairs_hook + if parse_float is not None: + kw['parse_float'] = parse_float + if parse_int is not None: + kw['parse_int'] = parse_int + if parse_constant is not None: + kw['parse_constant'] = parse_constant + if use_decimal: + if parse_float is not None: + raise TypeError("use_decimal=True implies parse_float=Decimal") + kw['parse_float'] = Decimal + return cls(encoding=encoding, **kw).decode(s) + + +def _toggle_speedups(enabled): + from . import decoder as dec + from . import encoder as enc + from . import scanner as scan + c_make_encoder = _import_c_make_encoder() + if enabled: + dec.scanstring = dec.c_scanstring or dec.py_scanstring + enc.c_make_encoder = c_make_encoder + enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or + enc.py_encode_basestring_ascii) + scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner + else: + dec.scanstring = dec.py_scanstring + enc.c_make_encoder = None + enc.encode_basestring_ascii = enc.py_encode_basestring_ascii + scan.make_scanner = scan.py_make_scanner + dec.make_scanner = scan.make_scanner + global _default_decoder + _default_decoder = JSONDecoder( + encoding=None, + object_hook=None, + object_pairs_hook=None, + ) + global _default_encoder + _default_encoder = JSONEncoder( + skipkeys=False, + ensure_ascii=True, + check_circular=True, + allow_nan=True, + indent=None, + separators=None, + encoding='utf-8', + default=None, + ) + +def simple_first(kv): + """Helper function to pass to item_sort_key to sort simple + elements to the top, then container elements. + """ + return (isinstance(kv[1], (list, dict, tuple)), kv[0]) diff --git a/pyload/lib/simplejson/compat.py b/pyload/lib/simplejson/compat.py new file mode 100644 index 000000000..a0af4a1cb --- /dev/null +++ b/pyload/lib/simplejson/compat.py @@ -0,0 +1,46 @@ +"""Python 3 compatibility shims +""" +import sys +if sys.version_info[0] < 3: + PY3 = False + def b(s): + return s + def u(s): + return unicode(s, 'unicode_escape') + import cStringIO as StringIO + StringIO = BytesIO = StringIO.StringIO + text_type = unicode + binary_type = str + string_types = (basestring,) + integer_types = (int, long) + unichr = unichr + reload_module = reload + def fromhex(s): + return s.decode('hex') + +else: + PY3 = True + if sys.version_info[:2] >= (3, 4): + from importlib import reload as reload_module + else: + from imp import reload as reload_module + import codecs + def b(s): + return codecs.latin_1_encode(s)[0] + def u(s): + return s + import io + StringIO = io.StringIO + BytesIO = io.BytesIO + text_type = str + binary_type = bytes + string_types = (str,) + integer_types = (int,) + + def unichr(s): + return u(chr(s)) + + def fromhex(s): + return bytes.fromhex(s) + +long_type = integer_types[-1] diff --git a/pyload/lib/simplejson/decoder.py b/pyload/lib/simplejson/decoder.py new file mode 100644 index 000000000..1a6c5d938 --- /dev/null +++ b/pyload/lib/simplejson/decoder.py @@ -0,0 +1,393 @@ +"""Implementation of JSONDecoder +""" +from __future__ import absolute_import +import re +import sys +import struct +from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr +from .scanner import make_scanner, JSONDecodeError + +def _import_c_scanstring(): + try: + from ._speedups import scanstring + return scanstring + except ImportError: + return None +c_scanstring = _import_c_scanstring() + +# NOTE (3.1.0): JSONDecodeError may still be imported from this module for +# compatibility, but it was never in the __all__ +__all__ = ['JSONDecoder'] + +FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL + +def _floatconstants(): + _BYTES = fromhex('7FF80000000000007FF0000000000000') + # The struct module in Python 2.4 would get frexp() out of range here + # when an endian is specified in the format string. Fixed in Python 2.5+ + if sys.byteorder != 'big': + _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] + nan, inf = struct.unpack('dd', _BYTES) + return nan, inf, -inf + +NaN, PosInf, NegInf = _floatconstants() + +_CONSTANTS = { + '-Infinity': NegInf, + 'Infinity': PosInf, + 'NaN': NaN, +} + +STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) +BACKSLASH = { + '"': u('"'), '\\': u('\u005c'), '/': u('/'), + 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'), +} + +DEFAULT_ENCODING = "utf-8" + +def py_scanstring(s, end, encoding=None, strict=True, + _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join, + _PY3=PY3, _maxunicode=sys.maxunicode): + """Scan the string s for a JSON string. End is the index of the + character in s after the quote that started the JSON string. + Unescapes all valid JSON string escape sequences and raises ValueError + on attempt to decode an invalid string. If strict is False then literal + control characters are allowed in the string. + + Returns a tuple of the decoded string and the index of the character in s + after the end quote.""" + if encoding is None: + encoding = DEFAULT_ENCODING + chunks = [] + _append = chunks.append + begin = end - 1 + while 1: + chunk = _m(s, end) + if chunk is None: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + end = chunk.end() + content, terminator = chunk.groups() + # Content is contains zero or more unescaped string characters + if content: + if not _PY3 and not isinstance(content, text_type): + content = text_type(content, encoding) + _append(content) + # Terminator is the end of string, a literal control character, + # or a backslash denoting that an escape sequence follows + if terminator == '"': + break + elif terminator != '\\': + if strict: + msg = "Invalid control character %r at" + raise JSONDecodeError(msg, s, end) + else: + _append(terminator) + continue + try: + esc = s[end] + except IndexError: + raise JSONDecodeError( + "Unterminated string starting at", s, begin) + # If not a unicode escape sequence, must be in the lookup table + if esc != 'u': + try: + char = _b[esc] + except KeyError: + msg = "Invalid \\X escape sequence %r" + raise JSONDecodeError(msg, s, end) + end += 1 + else: + # Unicode escape sequence + msg = "Invalid \\uXXXX escape sequence" + esc = s[end + 1:end + 5] + escX = esc[1:2] + if len(esc) != 4 or escX == 'x' or escX == 'X': + raise JSONDecodeError(msg, s, end - 1) + try: + uni = int(esc, 16) + except ValueError: + raise JSONDecodeError(msg, s, end - 1) + end += 5 + # Check for surrogate pair on UCS-4 systems + # Note that this will join high/low surrogate pairs + # but will also pass unpaired surrogates through + if (_maxunicode > 65535 and + uni & 0xfc00 == 0xd800 and + s[end:end + 2] == '\\u'): + esc2 = s[end + 2:end + 6] + escX = esc2[1:2] + if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): + try: + uni2 = int(esc2, 16) + except ValueError: + raise JSONDecodeError(msg, s, end) + if uni2 & 0xfc00 == 0xdc00: + uni = 0x10000 + (((uni - 0xd800) << 10) | + (uni2 - 0xdc00)) + end += 6 + char = unichr(uni) + # Append the unescaped character + _append(char) + return _join(chunks), end + + +# Use speedup if available +scanstring = c_scanstring or py_scanstring + +WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) +WHITESPACE_STR = ' \t\n\r' + +def JSONObject(state, encoding, strict, scan_once, object_hook, + object_pairs_hook, memo=None, + _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + # Backwards compatibility + if memo is None: + memo = {} + memo_get = memo.setdefault + pairs = [] + # Use a slice to prevent IndexError from being raised, the following + # check will raise a more specific ValueError if the string is empty + nextchar = s[end:end + 1] + # Normally we expect nextchar == '"' + if nextchar != '"': + if nextchar in _ws: + end = _w(s, end).end() + nextchar = s[end:end + 1] + # Trivial empty object + if nextchar == '}': + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + 1 + pairs = {} + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + 1 + elif nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end) + end += 1 + while True: + key, end = scanstring(s, end, encoding, strict) + key = memo_get(key, key) + + # To skip some function call overhead we optimize the fast paths where + # the JSON key separator is ": " or just ":". + if s[end:end + 1] != ':': + end = _w(s, end).end() + if s[end:end + 1] != ':': + raise JSONDecodeError("Expecting ':' delimiter", s, end) + + end += 1 + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + value, end = scan_once(s, end) + pairs.append((key, value)) + + try: + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + end += 1 + + if nextchar == '}': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) + + try: + nextchar = s[end] + if nextchar in _ws: + end += 1 + nextchar = s[end] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end] + except IndexError: + nextchar = '' + + end += 1 + if nextchar != '"': + raise JSONDecodeError( + "Expecting property name enclosed in double quotes", + s, end - 1) + + if object_pairs_hook is not None: + result = object_pairs_hook(pairs) + return result, end + pairs = dict(pairs) + if object_hook is not None: + pairs = object_hook(pairs) + return pairs, end + +def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): + (s, end) = state + values = [] + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + # Look-ahead for trivial empty array + if nextchar == ']': + return values, end + 1 + elif nextchar == '': + raise JSONDecodeError("Expecting value or ']'", s, end) + _append = values.append + while True: + value, end = scan_once(s, end) + _append(value) + nextchar = s[end:end + 1] + if nextchar in _ws: + end = _w(s, end + 1).end() + nextchar = s[end:end + 1] + end += 1 + if nextchar == ']': + break + elif nextchar != ',': + raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) + + try: + if s[end] in _ws: + end += 1 + if s[end] in _ws: + end = _w(s, end + 1).end() + except IndexError: + pass + + return values, end + +class JSONDecoder(object): + """Simple JSON decoder + + Performs the following translations in decoding by default: + + +---------------+-------------------+ + | JSON | Python | + +===============+===================+ + | object | dict | + +---------------+-------------------+ + | array | list | + +---------------+-------------------+ + | string | str, unicode | + +---------------+-------------------+ + | number (int) | int, long | + +---------------+-------------------+ + | number (real) | float | + +---------------+-------------------+ + | true | True | + +---------------+-------------------+ + | false | False | + +---------------+-------------------+ + | null | None | + +---------------+-------------------+ + + It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as + their corresponding ``float`` values, which is outside the JSON spec. + + """ + + def __init__(self, encoding=None, object_hook=None, parse_float=None, + parse_int=None, parse_constant=None, strict=True, + object_pairs_hook=None): + """ + *encoding* determines the encoding used to interpret any + :class:`str` objects decoded by this instance (``'utf-8'`` by + default). It has no effect when decoding :class:`unicode` objects. + + Note that currently only encodings that are a superset of ASCII work, + strings of other encodings should be passed in as :class:`unicode`. + + *object_hook*, if specified, will be called with the result of every + JSON object decoded and its return value will be used in place of the + given :class:`dict`. This can be used to provide custom + deserializations (e.g. to support JSON-RPC class hinting). + + *object_pairs_hook* is an optional function that will be called with + the result of any object literal decode with an ordered list of pairs. + The return value of *object_pairs_hook* will be used instead of the + :class:`dict`. This feature can be used to implement custom decoders + that rely on the order that the key and value pairs are decoded (for + example, :func:`collections.OrderedDict` will remember the order of + insertion). If *object_hook* is also defined, the *object_pairs_hook* + takes priority. + + *parse_float*, if specified, will be called with the string of every + JSON float to be decoded. By default, this is equivalent to + ``float(num_str)``. This can be used to use another datatype or parser + for JSON floats (e.g. :class:`decimal.Decimal`). + + *parse_int*, if specified, will be called with the string of every + JSON int to be decoded. By default, this is equivalent to + ``int(num_str)``. This can be used to use another datatype or parser + for JSON integers (e.g. :class:`float`). + + *parse_constant*, if specified, will be called with one of the + following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This + can be used to raise an exception if invalid JSON numbers are + encountered. + + *strict* controls the parser's behavior when it encounters an + invalid control character in a string. The default setting of + ``True`` means that unescaped control characters are parse errors, if + ``False`` then control characters will be allowed in strings. + + """ + if encoding is None: + encoding = DEFAULT_ENCODING + self.encoding = encoding + self.object_hook = object_hook + self.object_pairs_hook = object_pairs_hook + self.parse_float = parse_float or float + self.parse_int = parse_int or int + self.parse_constant = parse_constant or _CONSTANTS.__getitem__ + self.strict = strict + self.parse_object = JSONObject + self.parse_array = JSONArray + self.parse_string = scanstring + self.memo = {} + self.scan_once = make_scanner(self) + + def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): + """Return the Python representation of ``s`` (a ``str`` or ``unicode`` + instance containing a JSON document) + + """ + if _PY3 and isinstance(s, binary_type): + s = s.decode(self.encoding) + obj, end = self.raw_decode(s) + end = _w(s, end).end() + if end != len(s): + raise JSONDecodeError("Extra data", s, end, len(s)) + return obj + + def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): + """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` + beginning with a JSON document) and return a 2-tuple of the Python + representation and the index in ``s`` where the document ended. + Optionally, ``idx`` can be used to specify an offset in ``s`` where + the JSON document begins. + + This can be used to decode a JSON document from a string that may + have extraneous data at the end. + + """ + if idx < 0: + # Ensure that raw_decode bails on negative indexes, the regex + # would otherwise mask this behavior. #98 + raise JSONDecodeError('Expecting value', s, idx) + if _PY3 and not isinstance(s, text_type): + raise TypeError("Input string must be text, not bytes") + return self.scan_once(s, idx=_w(s, idx).end()) diff --git a/pyload/lib/simplejson/encoder.py b/pyload/lib/simplejson/encoder.py new file mode 100644 index 000000000..db18244ec --- /dev/null +++ b/pyload/lib/simplejson/encoder.py @@ -0,0 +1,648 @@ +"""Implementation of JSONEncoder +""" +from __future__ import absolute_import +import re +from operator import itemgetter +from decimal import Decimal +from .compat import u, unichr, binary_type, string_types, integer_types, PY3 +def _import_speedups(): + try: + from . import _speedups + return _speedups.encode_basestring_ascii, _speedups.make_encoder + except ImportError: + return None, None +c_encode_basestring_ascii, c_make_encoder = _import_speedups() + +from simplejson.decoder import PosInf + +#ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]') +# This is required because u() will mangle the string and ur'' isn't valid +# python3 syntax +ESCAPE = re.compile(u'[\\x00-\\x1f\\\\"\\b\\f\\n\\r\\t\u2028\u2029]') +ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') +HAS_UTF8 = re.compile(r'[\x80-\xff]') +ESCAPE_DCT = { + '\\': '\\\\', + '"': '\\"', + '\b': '\\b', + '\f': '\\f', + '\n': '\\n', + '\r': '\\r', + '\t': '\\t', +} +for i in range(0x20): + #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) +for i in [0x2028, 0x2029]: + ESCAPE_DCT.setdefault(unichr(i), '\\u%04x' % (i,)) + +FLOAT_REPR = repr + +def encode_basestring(s, _PY3=PY3, _q=u('"')): + """Return a JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + return ESCAPE_DCT[match.group(0)] + return _q + ESCAPE.sub(replace, s) + _q + + +def py_encode_basestring_ascii(s, _PY3=PY3): + """Return an ASCII-only JSON representation of a Python string + + """ + if _PY3: + if isinstance(s, binary_type): + s = s.decode('utf-8') + else: + if isinstance(s, str) and HAS_UTF8.search(s) is not None: + s = s.decode('utf-8') + def replace(match): + s = match.group(0) + try: + return ESCAPE_DCT[s] + except KeyError: + n = ord(s) + if n < 0x10000: + #return '\\u{0:04x}'.format(n) + return '\\u%04x' % (n,) + else: + # surrogate pair + n -= 0x10000 + s1 = 0xd800 | ((n >> 10) & 0x3ff) + s2 = 0xdc00 | (n & 0x3ff) + #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) + return '\\u%04x\\u%04x' % (s1, s2) + return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' + + +encode_basestring_ascii = ( + c_encode_basestring_ascii or py_encode_basestring_ascii) + +class JSONEncoder(object): + """Extensible JSON encoder for Python data structures. + + Supports the following objects and types by default: + + +-------------------+---------------+ + | Python | JSON | + +===================+===============+ + | dict, namedtuple | object | + +-------------------+---------------+ + | list, tuple | array | + +-------------------+---------------+ + | str, unicode | string | + +-------------------+---------------+ + | int, long, float | number | + +-------------------+---------------+ + | True | true | + +-------------------+---------------+ + | False | false | + +-------------------+---------------+ + | None | null | + +-------------------+---------------+ + + To extend this to recognize other objects, subclass and implement a + ``.default()`` method with another method that returns a serializable + object for ``o`` if possible, otherwise it should call the superclass + implementation (to raise ``TypeError``). + + """ + item_separator = ', ' + key_separator = ': ' + + def __init__(self, skipkeys=False, ensure_ascii=True, + check_circular=True, allow_nan=True, sort_keys=False, + indent=None, separators=None, encoding='utf-8', default=None, + use_decimal=True, namedtuple_as_object=True, + tuple_as_array=True, bigint_as_string=False, + item_sort_key=None, for_json=False, ignore_nan=False, + int_as_string_bitcount=None): + """Constructor for JSONEncoder, with sensible defaults. + + If skipkeys is false, then it is a TypeError to attempt + encoding of keys that are not str, int, long, float or None. If + skipkeys is True, such items are simply skipped. + + If ensure_ascii is true, the output is guaranteed to be str + objects with all incoming unicode characters escaped. If + ensure_ascii is false, the output will be unicode object. + + If check_circular is true, then lists, dicts, and custom encoded + objects will be checked for circular references during encoding to + prevent an infinite recursion (which would cause an OverflowError). + Otherwise, no such check takes place. + + If allow_nan is true, then NaN, Infinity, and -Infinity will be + encoded as such. This behavior is not JSON specification compliant, + but is consistent with most JavaScript based encoders and decoders. + Otherwise, it will be a ValueError to encode such floats. + + If sort_keys is true, then the output of dictionaries will be + sorted by key; this is useful for regression tests to ensure + that JSON serializations can be compared on a day-to-day basis. + + If indent is a string, then JSON array elements and object members + will be pretty-printed with a newline followed by that string repeated + for each level of nesting. ``None`` (the default) selects the most compact + representation without any newlines. For backwards compatibility with + versions of simplejson earlier than 2.1.0, an integer is also accepted + and is converted to a string with that many spaces. + + If specified, separators should be an (item_separator, key_separator) + tuple. The default is (', ', ': ') if *indent* is ``None`` and + (',', ': ') otherwise. To get the most compact JSON representation, + you should specify (',', ':') to eliminate whitespace. + + If specified, default is a function that gets called for objects + that can't otherwise be serialized. It should return a JSON encodable + version of the object or raise a ``TypeError``. + + If encoding is not None, then all input strings will be + transformed into unicode using that encoding prior to JSON-encoding. + The default is UTF-8. + + If use_decimal is true (not the default), ``decimal.Decimal`` will + be supported directly by the encoder. For the inverse, decode JSON + with ``parse_float=decimal.Decimal``. + + If namedtuple_as_object is true (the default), objects with + ``_asdict()`` methods will be encoded as JSON objects. + + If tuple_as_array is true (the default), tuple (and subclasses) will + be encoded as JSON arrays. + + If bigint_as_string is true (not the default), ints 2**53 and higher + or lower than -2**53 will be encoded as strings. This is to avoid the + rounding that happens in Javascript otherwise. + + If int_as_string_bitcount is a positive number (n), then int of size + greater than or equal to 2**n or lower than or equal to -2**n will be + encoded as strings. + + If specified, item_sort_key is a callable used to sort the items in + each dictionary. This is useful if you want to sort items other than + in alphabetical order by key. + + If for_json is true (not the default), objects with a ``for_json()`` + method will use the return value of that method for encoding as JSON + instead of the object. + + If *ignore_nan* is true (default: ``False``), then out of range + :class:`float` values (``nan``, ``inf``, ``-inf``) will be serialized + as ``null`` in compliance with the ECMA-262 specification. If true, + this will override *allow_nan*. + + """ + + self.skipkeys = skipkeys + self.ensure_ascii = ensure_ascii + self.check_circular = check_circular + self.allow_nan = allow_nan + self.sort_keys = sort_keys + self.use_decimal = use_decimal + self.namedtuple_as_object = namedtuple_as_object + self.tuple_as_array = tuple_as_array + self.bigint_as_string = bigint_as_string + self.item_sort_key = item_sort_key + self.for_json = for_json + self.ignore_nan = ignore_nan + self.int_as_string_bitcount = int_as_string_bitcount + if indent is not None and not isinstance(indent, string_types): + indent = indent * ' ' + self.indent = indent + if separators is not None: + self.item_separator, self.key_separator = separators + elif indent is not None: + self.item_separator = ',' + if default is not None: + self.default = default + self.encoding = encoding + + def default(self, o): + """Implement this method in a subclass such that it returns + a serializable object for ``o``, or calls the base implementation + (to raise a ``TypeError``). + + For example, to support arbitrary iterators, you could + implement default like this:: + + def default(self, o): + try: + iterable = iter(o) + except TypeError: + pass + else: + return list(iterable) + return JSONEncoder.default(self, o) + + """ + raise TypeError(repr(o) + " is not JSON serializable") + + def encode(self, o): + """Return a JSON string representation of a Python data structure. + + >>> from simplejson import JSONEncoder + >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) + '{"foo": ["bar", "baz"]}' + + """ + # This is for extremely simple cases and benchmarks. + if isinstance(o, binary_type): + _encoding = self.encoding + if (_encoding is not None and not (_encoding == 'utf-8')): + o = o.decode(_encoding) + if isinstance(o, string_types): + if self.ensure_ascii: + return encode_basestring_ascii(o) + else: + return encode_basestring(o) + # This doesn't pass the iterator directly to ''.join() because the + # exceptions aren't as detailed. The list call should be roughly + # equivalent to the PySequence_Fast that ''.join() would do. + chunks = self.iterencode(o, _one_shot=True) + if not isinstance(chunks, (list, tuple)): + chunks = list(chunks) + if self.ensure_ascii: + return ''.join(chunks) + else: + return u''.join(chunks) + + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + if self.encoding != 'utf-8': + def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): + if isinstance(o, binary_type): + o = o.decode(_encoding) + return _orig_encoder(o) + + def floatstr(o, allow_nan=self.allow_nan, ignore_nan=self.ignore_nan, + _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on + # the internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if ignore_nan: + text = 'null' + elif not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + key_memo = {} + int_as_string_bitcount = ( + 53 if self.bigint_as_string else self.int_as_string_bitcount) + if (_one_shot and c_make_encoder is not None + and self.indent is None): + _iterencode = c_make_encoder( + markers, self.default, _encoder, self.indent, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, self.allow_nan, key_memo, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + self.ignore_nan, Decimal) + else: + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot, self.use_decimal, + self.namedtuple_as_object, self.tuple_as_array, + int_as_string_bitcount, + self.item_sort_key, self.encoding, self.for_json, + Decimal=Decimal) + try: + return _iterencode(o, 0) + finally: + key_memo.clear() + + +class JSONEncoderForHTML(JSONEncoder): + """An encoder that produces JSON safe to embed in HTML. + + To embed JSON content in, say, a script tag on a web page, the + characters &, < and > should be escaped. They cannot be escaped + with the usual entities (e.g. &) because they are not expanded + within