summaryrefslogtreecommitdiffstats
path: root/module/lib
diff options
context:
space:
mode:
Diffstat (limited to 'module/lib')
-rw-r--r--module/lib/BeautifulSoup.py63
1 files changed, 34 insertions, 29 deletions
diff --git a/module/lib/BeautifulSoup.py b/module/lib/BeautifulSoup.py
index 55567f588..7278215ca 100644
--- a/module/lib/BeautifulSoup.py
+++ b/module/lib/BeautifulSoup.py
@@ -79,8 +79,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "3.0.8.1"
-__copyright__ = "Copyright (c) 2004-2010 Leonard Richardson"
+__version__ = "3.2.1"
+__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
__license__ = "New-style BSD"
from sgmllib import SGMLParser, SGMLParseError
@@ -114,6 +114,21 @@ class PageElement(object):
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
+ def _invert(h):
+ "Cheap function to invert a hash."
+ i = {}
+ for k,v in h.items():
+ i[v] = k
+ return i
+
+ XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
+ "quot" : '"',
+ "amp" : "&",
+ "lt" : "<",
+ "gt" : ">" }
+
+ XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
+
def setup(self, parent=None, previous=None):
"""Sets up the initial relations between this element and
other elements."""
@@ -421,6 +436,16 @@ class PageElement(object):
s = unicode(s)
return s
+ BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+ + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+ + ")")
+
+ def _sub_entity(self, x):
+ """Used with a regular expression to substitute the
+ appropriate XML entity for an XML special character."""
+ return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
+
+
class NavigableString(unicode, PageElement):
def __new__(cls, value):
@@ -451,10 +476,12 @@ class NavigableString(unicode, PageElement):
return str(self).decode(DEFAULT_OUTPUT_ENCODING)
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+ # Substitute outgoing XML entities.
+ data = self.BARE_AMPERSAND_OR_BRACKET.sub(self._sub_entity, self)
if encoding:
- return self.encode(encoding)
+ return data.encode(encoding)
else:
- return self
+ return data
class CData(NavigableString):
@@ -480,21 +507,6 @@ class Tag(PageElement):
"""Represents a found HTML tag with its attributes and contents."""
- def _invert(h):
- "Cheap function to invert a hash."
- i = {}
- for k,v in h.items():
- i[v] = k
- return i
-
- XML_ENTITIES_TO_SPECIAL_CHARS = { "apos" : "'",
- "quot" : '"',
- "amp" : "&",
- "lt" : "<",
- "gt" : ">" }
-
- XML_SPECIAL_CHARS_TO_ENTITIES = _invert(XML_ENTITIES_TO_SPECIAL_CHARS)
-
def _convertEntities(self, match):
"""Used in a call to re.sub to replace HTML, XML, and numeric
entities with the appropriate Unicode characters. If HTML
@@ -531,6 +543,8 @@ class Tag(PageElement):
self.name = name
if attrs is None:
attrs = []
+ elif isinstance(attrs, dict):
+ attrs = attrs.items()
self.attrs = attrs
self.contents = []
self.setup(parent, previous)
@@ -679,15 +693,6 @@ class Tag(PageElement):
def __unicode__(self):
return self.__str__(None)
- BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
- + "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
- + ")")
-
- def _sub_entity(self, x):
- """Used with a regular expression to substitute the
- appropriate XML entity for an XML special character."""
- return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
-
def __str__(self, encoding=DEFAULT_OUTPUT_ENCODING,
prettyPrint=False, indentLevel=0):
"""Returns a string or Unicode representation of this tag and
@@ -1295,7 +1300,7 @@ class BeautifulStoneSoup(Tag, SGMLParser):
"""
nestingResetTriggers = self.NESTABLE_TAGS.get(name)
- isNestable = nestingResetTriggers is not None
+ isNestable = nestingResetTriggers != None
isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
popTo = None
inclusive = True