diff options
author | mkaay <mkaay@mkaay.de> | 2010-08-25 16:48:55 +0200 |
---|---|---|
committer | mkaay <mkaay@mkaay.de> | 2010-08-25 16:48:55 +0200 |
commit | 3c9f55270a83b0e88ec0dc516f9d9921e4d7b6ea (patch) | |
tree | c5b2b1bfeb7eb8df2b97be118f6cbcec4e29cb3b /core/module/unescape.py | |
parent | ul.to fetching, so.biz expire (diff) | |
download | pyload-3c9f55270a83b0e88ec0dc516f9d9921e4d7b6ea.tar.xz |
merged gui
Diffstat (limited to 'core/module/unescape.py')
-rw-r--r-- | core/module/unescape.py | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/core/module/unescape.py b/core/module/unescape.py new file mode 100644 index 000000000..41a23be5b --- /dev/null +++ b/core/module/unescape.py @@ -0,0 +1,54 @@ +from htmlentitydefs import name2codepoint as n2cp +from urllib import unquote +import re + +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def unescape(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, unquote(string))[0] + + +""" +import re + +def unescape(text): + def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + print "erreur de valeur" + pass + else: + # named entity + try: + if text[1:-1] == "amp": + text = "&amp;" + elif text[1:-1] == "gt": + text = "&gt;" + elif text[1:-1] == "lt": + text = "&lt;" + else: + print text[1:-1] + text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) + except KeyError: + print "keyerror" + pass + return text # leave as is + return re.sub("&#?\w+;", fixup, text) +""" |