summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-10-23 03:01:59 +0200
committerGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-10-23 03:01:59 +0200
commitb4be99c4ba9492b8c1e7fdb84eaeca76d90ff381 (patch)
tree010651577084433d2685941f06fbce715c247ce7 /module/plugins/internal
parent[MultiHoster][XFSHoster] Set DIRECT_LINK (diff)
downloadpyload-b4be99c4ba9492b8c1e7fdb84eaeca76d90ff381.tar.xz
[utils] Improve method html_unescape
Diffstat (limited to 'module/plugins/internal')
-rw-r--r--module/plugins/internal/utils.py29
1 files changed, 5 insertions, 24 deletions
diff --git a/module/plugins/internal/utils.py b/module/plugins/internal/utils.py
index 4bb5d21b2..e01baf30f 100644
--- a/module/plugins/internal/utils.py
+++ b/module/plugins/internal/utils.py
@@ -14,6 +14,8 @@ import traceback
import urllib
import urlparse
+import HTMLParser
+
try:
import simplejson as json
@@ -24,7 +26,7 @@ except ImportError:
class utils(object):
__name__ = "utils"
__type__ = "plugin"
- __version__ = "0.06"
+ __version__ = "0.07"
__status__ = "stable"
__pattern__ = r'^unmatchable$'
@@ -94,28 +96,6 @@ def uniqify(seq):
return [x for x in seq if x not in seen and not seen_add(x)]
-def fixup(m):
- text = m.group(0)
- if text[:2] == "&#":
- # character reference
- try:
- if text[:3] == "&#x":
- return unichr(int(text[3:-1], 16))
- else:
- return unichr(int(text[2:-1]))
- except ValueError:
- pass
- else:
- # named entity
- try:
- name = text[1:-1]
- text = unichr(htmlentitydefs.name2codepoint[name])
- except KeyError:
- pass
-
- return text #: leave as is
-
-
def has_method(obj, name):
"""
Check if name was defined in obj (return false if inhereted)
@@ -127,7 +107,8 @@ def html_unescape(text):
"""
Removes HTML or XML character references and entities from a text string
"""
- return re.sub("&#?\w+;", fixup, text)
+ h = HTMLParser.HTMLParser()
+ return h.unescape(text)
def isiterable(obj):