diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-06-15 17:35:48 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-06-15 17:35:48 +0200 |
commit | a0805f27015748638a5fb05fd55b746852c53362 (patch) | |
tree | 1f56b476ae3ce998f62abe1617303319b04f1c3f /module/Utils.py | |
parent | hagg's rapidshare patch (diff) | |
download | pyload-a0805f27015748638a5fb05fd55b746852c53362.tar.xz |
html_unescape function, little plugin improvements
Diffstat (limited to 'module/Utils.py')
-rw-r--r-- | module/Utils.py | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/module/Utils.py b/module/Utils.py index e6e40c956..cdf76c144 100644 --- a/module/Utils.py +++ b/module/Utils.py @@ -8,6 +8,7 @@ import time import re from os.path import join from string import maketrans +from htmlentitydefs import name2codepoint def chmod(*args): try: @@ -129,6 +130,32 @@ def lock(func): return new + +def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + name = text[1:-1] + text = unichr(name2codepoint[name]) + except KeyError: + pass + + return text # leave as is + +def html_unescape(text): + """Removes HTML or XML character references and entities from a text string""" + return re.sub("&#?\w+;", fixup, text) + if __name__ == "__main__": print freeSpace(".") |