summaryrefslogtreecommitdiffstats
path: root/module/Utils.py
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-06-15 17:35:48 +0200
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-06-15 17:35:48 +0200
commita0805f27015748638a5fb05fd55b746852c53362 (patch)
tree1f56b476ae3ce998f62abe1617303319b04f1c3f /module/Utils.py
parenthagg's rapidshare patch (diff)
downloadpyload-a0805f27015748638a5fb05fd55b746852c53362.tar.xz
html_unescape function, little plugin improvements
Diffstat (limited to 'module/Utils.py')
-rw-r--r--module/Utils.py27
1 files changed, 27 insertions, 0 deletions
diff --git a/module/Utils.py b/module/Utils.py
index e6e40c956..cdf76c144 100644
--- a/module/Utils.py
+++ b/module/Utils.py
@@ -8,6 +8,7 @@ import time
import re
from os.path import join
from string import maketrans
+from htmlentitydefs import name2codepoint
def chmod(*args):
try:
@@ -129,6 +130,32 @@ def lock(func):
return new
+
+def fixup(m):
+ text = m.group(0)
+ if text[:2] == "&#":
+ # character reference
+ try:
+ if text[:3] == "&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ name = text[1:-1]
+ text = unichr(name2codepoint[name])
+ except KeyError:
+ pass
+
+ return text # leave as is
+
+def html_unescape(text):
+ """Removes HTML or XML character references and entities from a text string"""
+ return re.sub("&#?\w+;", fixup, text)
+
if __name__ == "__main__":
print freeSpace(".")