summaryrefslogtreecommitdiffstats
path: root/module/unescape.py
diff options
context:
space:
mode:
Diffstat (limited to 'module/unescape.py')
-rw-r--r--module/unescape.py57
1 files changed, 3 insertions, 54 deletions
diff --git a/module/unescape.py b/module/unescape.py
index 41a23be5b..d8999e077 100644
--- a/module/unescape.py
+++ b/module/unescape.py
@@ -1,54 +1,3 @@
-from htmlentitydefs import name2codepoint as n2cp
-from urllib import unquote
-import re
-
-def substitute_entity(match):
- ent = match.group(2)
- if match.group(1) == "#":
- return unichr(int(ent))
- else:
- cp = n2cp.get(ent)
- if cp:
- return unichr(cp)
- else:
- return match.group()
-
-def unescape(string):
- entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
- return entity_re.subn(substitute_entity, unquote(string))[0]
-
-
-"""
-import re
-
-def unescape(text):
- def fixup(m):
- text = m.group(0)
- if text[:2] == "&#":
- # character reference
- try:
- if text[:3] == "&#x":
- return unichr(int(text[3:-1], 16))
- else:
- return unichr(int(text[2:-1]))
- except ValueError:
- print "erreur de valeur"
- pass
- else:
- # named entity
- try:
- if text[1:-1] == "amp":
- text = "&"
- elif text[1:-1] == "gt":
- text = ">"
- elif text[1:-1] == "lt":
- text = "<"
- else:
- print text[1:-1]
- text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
- except KeyError:
- print "keyerror"
- pass
- return text # leave as is
- return re.sub("&#?\w+;", fixup, text)
-"""
+from module.utils import html_unescape
+#deprecated
+unescape = html_unescape \ No newline at end of file