diff options
Diffstat (limited to 'module/unescape.py')
-rw-r--r-- | module/unescape.py | 57 |
1 files changed, 3 insertions, 54 deletions
diff --git a/module/unescape.py b/module/unescape.py index 41a23be5b..d8999e077 100644 --- a/module/unescape.py +++ b/module/unescape.py @@ -1,54 +1,3 @@ -from htmlentitydefs import name2codepoint as n2cp -from urllib import unquote -import re - -def substitute_entity(match): - ent = match.group(2) - if match.group(1) == "#": - return unichr(int(ent)) - else: - cp = n2cp.get(ent) - if cp: - return unichr(cp) - else: - return match.group() - -def unescape(string): - entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") - return entity_re.subn(substitute_entity, unquote(string))[0] - - -""" -import re - -def unescape(text): - def fixup(m): - text = m.group(0) - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - print "erreur de valeur" - pass - else: - # named entity - try: - if text[1:-1] == "amp": - text = "&" - elif text[1:-1] == "gt": - text = ">" - elif text[1:-1] == "lt": - text = "<" - else: - print text[1:-1] - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: - print "keyerror" - pass - return text # leave as is - return re.sub("&#?\w+;", fixup, text) -""" +from module.utils import html_unescape +#deprecated +unescape = html_unescape
\ No newline at end of file |