diff options
author | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-10-23 03:01:59 +0200 |
---|---|---|
committer | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-10-23 03:01:59 +0200 |
commit | b4be99c4ba9492b8c1e7fdb84eaeca76d90ff381 (patch) | |
tree | 010651577084433d2685941f06fbce715c247ce7 /module/plugins/internal/utils.py | |
parent | [MultiHoster][XFSHoster] Set DIRECT_LINK (diff) | |
download | pyload-b4be99c4ba9492b8c1e7fdb84eaeca76d90ff381.tar.xz |
[utils] Improve method html_unescape
Diffstat (limited to 'module/plugins/internal/utils.py')
-rw-r--r-- | module/plugins/internal/utils.py | 29 |
1 files changed, 5 insertions, 24 deletions
diff --git a/module/plugins/internal/utils.py b/module/plugins/internal/utils.py index 4bb5d21b2..e01baf30f 100644 --- a/module/plugins/internal/utils.py +++ b/module/plugins/internal/utils.py @@ -14,6 +14,8 @@ import traceback import urllib import urlparse +import HTMLParser + try: import simplejson as json @@ -24,7 +26,7 @@ except ImportError: class utils(object): __name__ = "utils" __type__ = "plugin" - __version__ = "0.06" + __version__ = "0.07" __status__ = "stable" __pattern__ = r'^unmatchable$' @@ -94,28 +96,6 @@ def uniqify(seq): return [x for x in seq if x not in seen and not seen_add(x)] -def fixup(m): - text = m.group(0) - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - pass - else: - # named entity - try: - name = text[1:-1] - text = unichr(htmlentitydefs.name2codepoint[name]) - except KeyError: - pass - - return text #: leave as is - - def has_method(obj, name): """ Check if name was defined in obj (return false if inhereted) @@ -127,7 +107,8 @@ def html_unescape(text): """ Removes HTML or XML character references and entities from a text string """ - return re.sub("&#?\w+;", fixup, text) + h = HTMLParser.HTMLParser() + return h.unescape(text) def isiterable(obj): |