From 6cad171c269c07d5b365ffba7b676f25e98e449f Mon Sep 17 00:00:00 2001
From: spoob <spoob@gmx.de>
Date: Sun, 14 Jun 2009 02:32:26 +0200
Subject: added 1kh.de container plugin

---
 module/unescape.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 module/unescape.py

(limited to 'module/unescape.py')

diff --git a/module/unescape.py b/module/unescape.py
new file mode 100644
index 000000000..462423b03
--- /dev/null
+++ b/module/unescape.py
@@ -0,0 +1,38 @@
+import re
+
+def unescape(text):
+   """Removes HTML or XML character references 
+      and entities from a text string.
+      keep &amp;, &gt;, &lt; in the source code.
+   from Fredrik Lundh
+   http://effbot.org/zone/re-sub.htm#unescape-html
+   """
+   def fixup(m):
+      text = m.group(0)
+      if text[:2] == "&#":
+         # character reference
+         try:
+            if text[:3] == "&#x":
+               return unichr(int(text[3:-1], 16))
+            else:
+               return unichr(int(text[2:-1]))
+         except ValueError:
+            print "erreur de valeur"
+            pass
+      else:
+         # named entity
+         try:
+            if text[1:-1] == "amp":
+               text = "&amp;amp;"
+            elif text[1:-1] == "gt":
+               text = "&amp;gt;"
+            elif text[1:-1] == "lt":
+               text = "&amp;lt;"
+            else:
+               print text[1:-1]
+               text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
+         except KeyError:
+            print "keyerror"
+            pass
+      return text # leave as is
+   return str(re.sub("&#?\w+;", fixup, text))
-- 
cgit v1.2.3