From ce1c2b6b05c08b669357947e61ae40efce7fc50f Mon Sep 17 00:00:00 2001
From: Walter Purcaro <vuolter@gmail.com>
Date: Mon, 16 Feb 2015 10:46:28 +0100
Subject: module temp

---
 module/plugins/ocr/GigasizeCom.py    |  24 ++++++
 module/plugins/ocr/LinksaveIn.py     | 158 +++++++++++++++++++++++++++++++++++
 module/plugins/ocr/NetloadIn.py      |  29 +++++++
 module/plugins/ocr/ShareonlineBiz.py |  39 +++++++++
 module/plugins/ocr/__init__.py       |   1 +
 5 files changed, 251 insertions(+)
 create mode 100644 module/plugins/ocr/GigasizeCom.py
 create mode 100644 module/plugins/ocr/LinksaveIn.py
 create mode 100644 module/plugins/ocr/NetloadIn.py
 create mode 100644 module/plugins/ocr/ShareonlineBiz.py
 create mode 100644 module/plugins/ocr/__init__.py

(limited to 'module/plugins/ocr')

diff --git a/module/plugins/ocr/GigasizeCom.py b/module/plugins/ocr/GigasizeCom.py
new file mode 100644
index 000000000..6982e6ca9
--- /dev/null
+++ b/module/plugins/ocr/GigasizeCom.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+
+from pyload.plugin.OCR import OCR
+
+
+class GigasizeCom(OCR):
+    __name__    = "GigasizeCom"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Gigasize.com ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
+    def __init__(self):
+        OCR.__init__(self)
+
+
+    def get_captcha(self, image):
+        self.load_image(image)
+        self.threshold(2.8)
+        self.run_tesser(True, False, False, True)
+        return self.result_captcha
diff --git a/module/plugins/ocr/LinksaveIn.py b/module/plugins/ocr/LinksaveIn.py
new file mode 100644
index 000000000..7ced74f4b
--- /dev/null
+++ b/module/plugins/ocr/LinksaveIn.py
@@ -0,0 +1,158 @@
+# -*- coding: utf-8 -*-
+
+try:
+    from PIL import Image
+except ImportError:
+    import Image
+
+from glob import glob
+from os import sep
+from os.path import abspath, dirname
+
+from pyload.plugin.OCR import OCR
+
+
+class LinksaveIn(OCR):
+    __name__    = "LinksaveIn"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Linksave.in ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
+    def __init__(self):
+        OCR.__init__(self)
+        self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep
+
+
+    def load_image(self, image):
+        im = Image.open(image)
+        frame_nr = 0
+
+        lut = im.resize((256, 1))
+        lut.putdata(range(256))
+        lut = list(lut.convert("RGB").getdata())
+
+        new = Image.new("RGB", im.size)
+        npix = new.load()
+        while True:
+            try:
+                im.seek(frame_nr)
+            except EOFError:
+                break
+            frame = im.copy()
+            pix = frame.load()
+            for x in xrange(frame.size[0]):
+                for y in xrange(frame.size[1]):
+                    if lut[pix[x, y]] != (0,0,0):
+                        npix[x, y] = lut[pix[x, y]]
+            frame_nr += 1
+        new.save(self.data_dir+"unblacked.png")
+        self.image = new.copy()
+        self.pixels = self.image.load()
+        self.result_captcha = ''
+
+
+    def get_bg(self):
+        stat = {}
+        cstat = {}
+        img = self.image.convert("P")
+        for bgpath in glob(self.data_dir+"bg/*.gif"):
+            stat[bgpath] = 0
+            bg = Image.open(bgpath)
+
+            bglut = bg.resize((256, 1))
+            bglut.putdata(range(256))
+            bglut = list(bglut.convert("RGB").getdata())
+
+            lut = img.resize((256, 1))
+            lut.putdata(range(256))
+            lut = list(lut.convert("RGB").getdata())
+
+            bgpix = bg.load()
+            pix = img.load()
+            for x in xrange(bg.size[0]):
+                for y in xrange(bg.size[1]):
+                    rgb_bg = bglut[bgpix[x, y]]
+                    rgb_c = lut[pix[x, y]]
+                    try:
+                        cstat[rgb_c] += 1
+                    except Exception:
+                        cstat[rgb_c] = 1
+                    if rgb_bg == rgb_c:
+                        stat[bgpath] += 1
+        max_p = 0
+        bg = ""
+        for bgpath, value in stat.iteritems():
+            if max_p < value:
+                bg = bgpath
+                max_p = value
+        return bg
+
+
+    def substract_bg(self, bgpath):
+        bg = Image.open(bgpath)
+        img = self.image.convert("P")
+
+        bglut = bg.resize((256, 1))
+        bglut.putdata(range(256))
+        bglut = list(bglut.convert("RGB").getdata())
+
+        lut = img.resize((256, 1))
+        lut.putdata(range(256))
+        lut = list(lut.convert("RGB").getdata())
+
+        bgpix = bg.load()
+        pix = img.load()
+        orgpix = self.image.load()
+        for x in xrange(bg.size[0]):
+            for y in xrange(bg.size[1]):
+                rgb_bg = bglut[bgpix[x, y]]
+                rgb_c = lut[pix[x, y]]
+                if rgb_c == rgb_bg:
+                    orgpix[x, y] = (255,255,255)
+
+
+    def eval_black_white(self):
+        new = Image.new("RGB", (140, 75))
+        pix = new.load()
+        orgpix = self.image.load()
+        thresh = 4
+        for x in xrange(new.size[0]):
+            for y in xrange(new.size[1]):
+                rgb = orgpix[x, y]
+                r, g, b = rgb
+                pix[x, y] = (255,255,255)
+                if r > max(b, g)+thresh:
+                    pix[x, y] = (0,0,0)
+                if g < min(r, b):
+                    pix[x, y] = (0,0,0)
+                if g > max(r, b)+thresh:
+                    pix[x, y] = (0,0,0)
+                if b > max(r, g)+thresh:
+                    pix[x, y] = (0,0,0)
+        self.image = new
+        self.pixels = self.image.load()
+
+
+    def get_captcha(self, image):
+        self.load_image(image)
+        bg = self.get_bg()
+        self.substract_bg(bg)
+        self.eval_black_white()
+        self.to_greyscale()
+        self.image.save(self.data_dir+"cleaned_pass1.png")
+        self.clean(4)
+        self.clean(4)
+        self.image.save(self.data_dir+"cleaned_pass2.png")
+        letters = self.split_captcha_letters()
+        final = ""
+        for n, letter in enumerate(letters):
+            self.image = letter
+            self.image.save(ocr.data_dir+"letter%d.png" % n)
+            self.run_tesser(True, True, False, False)
+            final += self.result_captcha
+
+        return final
diff --git a/module/plugins/ocr/NetloadIn.py b/module/plugins/ocr/NetloadIn.py
new file mode 100644
index 000000000..8939b5318
--- /dev/null
+++ b/module/plugins/ocr/NetloadIn.py
@@ -0,0 +1,29 @@
+# -*- coding: utf-8 -*-
+
+from pyload.plugin.OCR import OCR
+
+
+class NetloadIn(OCR):
+    __name__    = "NetloadIn"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Netload.in ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
+    def __init__(self):
+        OCR.__init__(self)
+
+
+    def get_captcha(self, image):
+        self.load_image(image)
+        self.to_greyscale()
+        self.clean(3)
+        self.clean(3)
+        self.run_tesser(True, True, False, False)
+
+        self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers
+
+        return self.result_captcha
diff --git a/module/plugins/ocr/ShareonlineBiz.py b/module/plugins/ocr/ShareonlineBiz.py
new file mode 100644
index 000000000..bbc3d1762
--- /dev/null
+++ b/module/plugins/ocr/ShareonlineBiz.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+from pyload.plugin.OCR import OCR
+
+
+class ShareonlineBiz(OCR):
+    __name__    = "ShareonlineBiz"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Shareonline.biz ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("RaNaN", "RaNaN@pyload.org")]
+
+
+    def __init__(self):
+        OCR.__init__(self)
+
+
+    def get_captcha(self, image):
+        self.load_image(image)
+        self.to_greyscale()
+        self.image = self.image.resize((160, 50))
+        self.pixels = self.image.load()
+        self.threshold(1.85)
+        #self.eval_black_white(240)
+        #self.derotate_by_average()
+
+        letters = self.split_captcha_letters()
+
+        final = ""
+        for letter in letters:
+            self.image = letter
+            self.run_tesser(True, True, False, False)
+            final += self.result_captcha
+
+        return final
+
+        #tesseract at 60%
diff --git a/module/plugins/ocr/__init__.py b/module/plugins/ocr/__init__.py
new file mode 100644
index 000000000..40a96afc6
--- /dev/null
+++ b/module/plugins/ocr/__init__.py
@@ -0,0 +1 @@
+# -*- coding: utf-8 -*-
-- 
cgit v1.2.3