summaryrefslogtreecommitdiffstats
path: root/module/unescape.py
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-08-25 18:22:27 +0200
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-08-25 18:22:27 +0200
commit29f9dc8fb3396b03d732ebcbeb1cc8f00fe13897 (patch)
treef2a910cbea747a7b0c0a50d6c66691e54f5ef47f /module/unescape.py
parentmerged gui (diff)
downloadpyload-29f9dc8fb3396b03d732ebcbeb1cc8f00fe13897.tar.xz
new dirs
Diffstat (limited to 'module/unescape.py')
-rw-r--r--module/unescape.py54
1 files changed, 54 insertions, 0 deletions
diff --git a/module/unescape.py b/module/unescape.py
new file mode 100644
index 000000000..41a23be5b
--- /dev/null
+++ b/module/unescape.py
@@ -0,0 +1,54 @@
+from htmlentitydefs import name2codepoint as n2cp
+from urllib import unquote
+import re
+
+def substitute_entity(match):
+ ent = match.group(2)
+ if match.group(1) == "#":
+ return unichr(int(ent))
+ else:
+ cp = n2cp.get(ent)
+ if cp:
+ return unichr(cp)
+ else:
+ return match.group()
+
+def unescape(string):
+ entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
+ return entity_re.subn(substitute_entity, unquote(string))[0]
+
+
+"""
+import re
+
+def unescape(text):
+ def fixup(m):
+ text = m.group(0)
+ if text[:2] == "&#":
+ # character reference
+ try:
+ if text[:3] == "&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ print "erreur de valeur"
+ pass
+ else:
+ # named entity
+ try:
+ if text[1:-1] == "amp":
+ text = "&amp;amp;"
+ elif text[1:-1] == "gt":
+ text = "&amp;gt;"
+ elif text[1:-1] == "lt":
+ text = "&amp;lt;"
+ else:
+ print text[1:-1]
+ text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+ except KeyError:
+ print "keyerror"
+ pass
+ return text # leave as is
+ return re.sub("&#?\w+;", fixup, text)
+"""