From de32c12bfc05b3d04df2686373e84494eefed44f Mon Sep 17 00:00:00 2001
From: Walter Purcaro <vuolter@gmail.com>
Date: Tue, 24 Feb 2015 22:22:15 +0100
Subject: captcha -> OCR

---
 module/plugins/captcha/GigasizeCom.py    |   4 +-
 module/plugins/captcha/LinksaveIn.py     |   4 +-
 module/plugins/captcha/NetloadIn.py      |   4 +-
 module/plugins/captcha/OCR.py            | 319 +++++++++++++++++++++++++++++++
 module/plugins/captcha/ShareonlineBiz.py |   4 +-
 module/plugins/captcha/captcha.py        | 319 -------------------------------
 6 files changed, 327 insertions(+), 327 deletions(-)
 create mode 100644 module/plugins/captcha/OCR.py
 delete mode 100644 module/plugins/captcha/captcha.py

(limited to 'module/plugins/captcha')

diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py
index 244cf6a2a..52c41729b 100644
--- a/module/plugins/captcha/GigasizeCom.py
+++ b/module/plugins/captcha/GigasizeCom.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-from module.plugins.captcha.captcha import OCR
+from module.plugins.captcha.OCR import OCR
 
 
 class GigasizeCom(OCR):
     __name__    = "GigasizeCom"
     __type__    = "ocr"
-    __version__ = "0.10"
+    __version__ = "0.11"
 
     __description__ = """Gigasize.com ocr plugin"""
     __license__     = "GPLv3"
diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py
index de6b0e7ff..b5cb0f608 100644
--- a/module/plugins/captcha/LinksaveIn.py
+++ b/module/plugins/captcha/LinksaveIn.py
@@ -9,13 +9,13 @@ from glob import glob
 from os import sep
 from os.path import abspath, dirname
 
-from module.plugins.captcha.captcha import OCR
+from module.plugins.captcha.OCR import OCR
 
 
 class LinksaveIn(OCR):
     __name__    = "LinksaveIn"
     __type__    = "ocr"
-    __version__ = "0.10"
+    __version__ = "0.11"
 
     __description__ = """Linksave.in ocr plugin"""
     __license__     = "GPLv3"
diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py
index 28eb18fb5..1fb258c47 100644
--- a/module/plugins/captcha/NetloadIn.py
+++ b/module/plugins/captcha/NetloadIn.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-from module.plugins.captcha.captcha import OCR
+from module.plugins.captcha.OCR import OCR
 
 
 class NetloadIn(OCR):
     __name__    = "NetloadIn"
     __type__    = "ocr"
-    __version__ = "0.10"
+    __version__ = "0.11"
 
     __description__ = """Netload.in ocr plugin"""
     __license__     = "GPLv3"
diff --git a/module/plugins/captcha/OCR.py b/module/plugins/captcha/OCR.py
new file mode 100644
index 000000000..1874ba07d
--- /dev/null
+++ b/module/plugins/captcha/OCR.py
@@ -0,0 +1,319 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+try:
+    from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+
+except ImportError:
+    import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+
+import logging
+import os
+import subprocess
+#import tempfile
+
+from module.utils import save_join
+
+
+class OCR(object):
+    __name__    = "OCR"
+    __type__    = "ocr"
+    __version__ = "0.11"
+
+    __description__ = """OCR base plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
+    def __init__(self):
+        self.logger = logging.getLogger("log")
+
+
+    def load_image(self, image):
+        self.image = Image.open(image)
+        self.pixels = self.image.load()
+        self.result_captcha = ''
+
+
+    def unload(self):
+        """delete all tmp images"""
+        pass
+
+
+    def threshold(self, value):
+        self.image = self.image.point(lambda a: a * value + 10)
+
+
+    def run(self, command):
+        """Run a command"""
+
+        popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        popen.wait()
+        output = popen.stdout.read() +" | "+ popen.stderr.read()
+        popen.stdout.close()
+        popen.stderr.close()
+        self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
+
+
+    def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
+        #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+        try:
+            tmpTif = open(save_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
+            tmpTif.close()
+
+            #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+            tmpTxt = open(save_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
+            tmpTxt.close()
+
+        except IOError, e:
+            self.logError(e)
+            return
+
+        self.logger.debug("save tiff")
+        self.image.save(tmpTif.name, 'TIFF')
+
+        if os.name == "nt":
+            tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")]
+        else:
+            tessparams = ["tesseract"]
+
+        tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] )
+
+        if subset and (digits or lowercase or uppercase):
+            #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
+            with open(save_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub:
+                tmpSub.write("tessedit_char_whitelist ")
+
+                if digits:
+                    tmpSub.write("0123456789")
+                if lowercase:
+                    tmpSub.write("abcdefghijklmnopqrstuvwxyz")
+                if uppercase:
+                    tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+
+                tmpSub.write("\n")
+                tessparams.append("nobatch")
+                tessparams.append(os.path.abspath(tmpSub.name))
+
+        self.logger.debug("run tesseract")
+        self.run(tessparams)
+        self.logger.debug("read txt")
+
+        try:
+            with open(tmpTxt.name, 'r') as f:
+                self.result_captcha = f.read().replace("\n", "")
+        except Exception:
+            self.result_captcha = ""
+
+        self.logger.debug(self.result_captcha)
+        try:
+            os.remove(tmpTif.name)
+            os.remove(tmpTxt.name)
+            if subset and (digits or lowercase or uppercase):
+                os.remove(tmpSub.name)
+        except Exception:
+            pass
+
+
+    def get_captcha(self, name):
+        raise NotImplementedError
+
+
+    def to_greyscale(self):
+        if self.image.mode != 'L':
+            self.image = self.image.convert('L')
+
+        self.pixels = self.image.load()
+
+
+    def eval_black_white(self, limit):
+        self.pixels = self.image.load()
+        w, h = self.image.size
+        for x in xrange(w):
+            for y in xrange(h):
+                if self.pixels[x, y] > limit:
+                    self.pixels[x, y] = 255
+                else:
+                    self.pixels[x, y] = 0
+
+
+    def clean(self, allowed):
+        pixels = self.pixels
+
+        w, h = self.image.size
+
+        for x in xrange(w):
+            for y in xrange(h):
+                if pixels[x, y] == 255:
+                    continue
+                # No point in processing white pixels since we only want to remove black pixel
+                count = 0
+
+                try:
+                    if pixels[x-1, y-1] != 255:
+                        count += 1
+                    if pixels[x-1, y] != 255:
+                        count += 1
+                    if pixels[x-1, y + 1] != 255:
+                        count += 1
+                    if pixels[x, y + 1] != 255:
+                        count += 1
+                    if pixels[x + 1, y + 1] != 255:
+                        count += 1
+                    if pixels[x + 1, y] != 255:
+                        count += 1
+                    if pixels[x + 1, y-1] != 255:
+                        count += 1
+                    if pixels[x, y-1] != 255:
+                        count += 1
+                except Exception:
+                    pass
+
+        # not enough neighbors are dark pixels so mark this pixel
+            # to be changed to white
+                if count < allowed:
+                    pixels[x, y] = 1
+
+            # second pass: this time set all 1's to 255 (white)
+        for x in xrange(w):
+            for y in xrange(h):
+                if pixels[x, y] == 1:
+                    pixels[x, y] = 255
+
+        self.pixels = pixels
+
+
+    def derotate_by_average(self):
+        """rotate by checking each angle and guess most suitable"""
+
+        w, h = self.image.size
+        pixels = self.pixels
+
+        for x in xrange(w):
+            for y in xrange(h):
+                if pixels[x, y] == 0:
+                    pixels[x, y] = 155
+
+        highest = {}
+        counts = {}
+
+        for angle in xrange(-45, 45):
+
+            tmpimage = self.image.rotate(angle)
+
+            pixels = tmpimage.load()
+
+            w, h = self.image.size
+
+            for x in xrange(w):
+                for y in xrange(h):
+                    if pixels[x, y] == 0:
+                        pixels[x, y] = 255
+
+
+            count = {}
+
+            for x in xrange(w):
+                count[x] = 0
+                for y in xrange(h):
+                    if pixels[x, y] == 155:
+                        count[x] += 1
+
+            sum = 0
+            cnt = 0
+
+            for x in count.values():
+                if x != 0:
+                    sum += x
+                    cnt += 1
+
+            avg = sum / cnt
+            counts[angle] = cnt
+            highest[angle] = 0
+            for x in count.values():
+                if x > highest[angle]:
+                    highest[angle] = x
+
+            highest[angle] = highest[angle] - avg
+
+        hkey = 0
+        hvalue = 0
+
+        for key, value in highest.iteritems():
+            if value > hvalue:
+                hkey = key
+                hvalue = value
+
+        self.image = self.image.rotate(hkey)
+        pixels = self.image.load()
+
+        for x in xrange(w):
+            for y in xrange(h):
+                if pixels[x, y] == 0:
+                    pixels[x, y] = 255
+
+                if pixels[x, y] == 155:
+                    pixels[x, y] = 0
+
+        self.pixels = pixels
+
+
+    def split_captcha_letters(self):
+        captcha = self.image
+        started = False
+        letters = []
+        width, height = captcha.size
+        bottomY, topY = 0, height
+        pixels = captcha.load()
+
+        for x in xrange(width):
+            black_pixel_in_col = False
+            for y in xrange(height):
+                if pixels[x, y] != 255:
+                    if not started:
+                        started = True
+                        firstX = x
+                        lastX = x
+
+                    if y > bottomY:
+                        bottomY = y
+                    if y < topY:
+                        topY = y
+                    if x > lastX:
+                        lastX = x
+
+                    black_pixel_in_col = True
+
+            if black_pixel_in_col is False and started is True:
+                rect = (firstX, topY, lastX, bottomY)
+                new_captcha = captcha.crop(rect)
+
+                w, h = new_captcha.size
+                if w > 5 and h > 5:
+                    letters.append(new_captcha)
+
+                started = False
+                bottomY, topY = 0, height
+
+        return letters
+
+
+    def correct(self, values, var=None):
+        if var:
+            result = var
+        else:
+            result = self.result_captcha
+
+        for key, item in values.iteritems():
+
+            if key.__class__ == str:
+                result = result.replace(key, item)
+            else:
+                for expr in key:
+                    result = result.replace(expr, item)
+
+        if var:
+            return result
+        else:
+            self.result_captcha = result
diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py
index 8210e8859..6fad66600 100644
--- a/module/plugins/captcha/ShareonlineBiz.py
+++ b/module/plugins/captcha/ShareonlineBiz.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 
-from module.plugins.captcha.captcha import OCR
+from module.plugins.captcha.OCR import OCR
 
 
 class ShareonlineBiz(OCR):
     __name__    = "ShareonlineBiz"
     __type__    = "ocr"
-    __version__ = "0.10"
+    __version__ = "0.11"
 
     __description__ = """Shareonline.biz ocr plugin"""
     __license__     = "GPLv3"
diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py
deleted file mode 100644
index 1874ba07d..000000000
--- a/module/plugins/captcha/captcha.py
+++ /dev/null
@@ -1,319 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from __future__ import with_statement
-
-try:
-    from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
-
-except ImportError:
-    import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
-
-import logging
-import os
-import subprocess
-#import tempfile
-
-from module.utils import save_join
-
-
-class OCR(object):
-    __name__    = "OCR"
-    __type__    = "ocr"
-    __version__ = "0.11"
-
-    __description__ = """OCR base plugin"""
-    __license__     = "GPLv3"
-    __authors__     = [("pyLoad Team", "admin@pyload.org")]
-
-
-    def __init__(self):
-        self.logger = logging.getLogger("log")
-
-
-    def load_image(self, image):
-        self.image = Image.open(image)
-        self.pixels = self.image.load()
-        self.result_captcha = ''
-
-
-    def unload(self):
-        """delete all tmp images"""
-        pass
-
-
-    def threshold(self, value):
-        self.image = self.image.point(lambda a: a * value + 10)
-
-
-    def run(self, command):
-        """Run a command"""
-
-        popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        popen.wait()
-        output = popen.stdout.read() +" | "+ popen.stderr.read()
-        popen.stdout.close()
-        popen.stderr.close()
-        self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
-
-
-    def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
-        #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
-        try:
-            tmpTif = open(save_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
-            tmpTif.close()
-
-            #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
-            tmpTxt = open(save_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
-            tmpTxt.close()
-
-        except IOError, e:
-            self.logError(e)
-            return
-
-        self.logger.debug("save tiff")
-        self.image.save(tmpTif.name, 'TIFF')
-
-        if os.name == "nt":
-            tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")]
-        else:
-            tessparams = ["tesseract"]
-
-        tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] )
-
-        if subset and (digits or lowercase or uppercase):
-            #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
-            with open(save_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub:
-                tmpSub.write("tessedit_char_whitelist ")
-
-                if digits:
-                    tmpSub.write("0123456789")
-                if lowercase:
-                    tmpSub.write("abcdefghijklmnopqrstuvwxyz")
-                if uppercase:
-                    tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
-
-                tmpSub.write("\n")
-                tessparams.append("nobatch")
-                tessparams.append(os.path.abspath(tmpSub.name))
-
-        self.logger.debug("run tesseract")
-        self.run(tessparams)
-        self.logger.debug("read txt")
-
-        try:
-            with open(tmpTxt.name, 'r') as f:
-                self.result_captcha = f.read().replace("\n", "")
-        except Exception:
-            self.result_captcha = ""
-
-        self.logger.debug(self.result_captcha)
-        try:
-            os.remove(tmpTif.name)
-            os.remove(tmpTxt.name)
-            if subset and (digits or lowercase or uppercase):
-                os.remove(tmpSub.name)
-        except Exception:
-            pass
-
-
-    def get_captcha(self, name):
-        raise NotImplementedError
-
-
-    def to_greyscale(self):
-        if self.image.mode != 'L':
-            self.image = self.image.convert('L')
-
-        self.pixels = self.image.load()
-
-
-    def eval_black_white(self, limit):
-        self.pixels = self.image.load()
-        w, h = self.image.size
-        for x in xrange(w):
-            for y in xrange(h):
-                if self.pixels[x, y] > limit:
-                    self.pixels[x, y] = 255
-                else:
-                    self.pixels[x, y] = 0
-
-
-    def clean(self, allowed):
-        pixels = self.pixels
-
-        w, h = self.image.size
-
-        for x in xrange(w):
-            for y in xrange(h):
-                if pixels[x, y] == 255:
-                    continue
-                # No point in processing white pixels since we only want to remove black pixel
-                count = 0
-
-                try:
-                    if pixels[x-1, y-1] != 255:
-                        count += 1
-                    if pixels[x-1, y] != 255:
-                        count += 1
-                    if pixels[x-1, y + 1] != 255:
-                        count += 1
-                    if pixels[x, y + 1] != 255:
-                        count += 1
-                    if pixels[x + 1, y + 1] != 255:
-                        count += 1
-                    if pixels[x + 1, y] != 255:
-                        count += 1
-                    if pixels[x + 1, y-1] != 255:
-                        count += 1
-                    if pixels[x, y-1] != 255:
-                        count += 1
-                except Exception:
-                    pass
-
-        # not enough neighbors are dark pixels so mark this pixel
-            # to be changed to white
-                if count < allowed:
-                    pixels[x, y] = 1
-
-            # second pass: this time set all 1's to 255 (white)
-        for x in xrange(w):
-            for y in xrange(h):
-                if pixels[x, y] == 1:
-                    pixels[x, y] = 255
-
-        self.pixels = pixels
-
-
-    def derotate_by_average(self):
-        """rotate by checking each angle and guess most suitable"""
-
-        w, h = self.image.size
-        pixels = self.pixels
-
-        for x in xrange(w):
-            for y in xrange(h):
-                if pixels[x, y] == 0:
-                    pixels[x, y] = 155
-
-        highest = {}
-        counts = {}
-
-        for angle in xrange(-45, 45):
-
-            tmpimage = self.image.rotate(angle)
-
-            pixels = tmpimage.load()
-
-            w, h = self.image.size
-
-            for x in xrange(w):
-                for y in xrange(h):
-                    if pixels[x, y] == 0:
-                        pixels[x, y] = 255
-
-
-            count = {}
-
-            for x in xrange(w):
-                count[x] = 0
-                for y in xrange(h):
-                    if pixels[x, y] == 155:
-                        count[x] += 1
-
-            sum = 0
-            cnt = 0
-
-            for x in count.values():
-                if x != 0:
-                    sum += x
-                    cnt += 1
-
-            avg = sum / cnt
-            counts[angle] = cnt
-            highest[angle] = 0
-            for x in count.values():
-                if x > highest[angle]:
-                    highest[angle] = x
-
-            highest[angle] = highest[angle] - avg
-
-        hkey = 0
-        hvalue = 0
-
-        for key, value in highest.iteritems():
-            if value > hvalue:
-                hkey = key
-                hvalue = value
-
-        self.image = self.image.rotate(hkey)
-        pixels = self.image.load()
-
-        for x in xrange(w):
-            for y in xrange(h):
-                if pixels[x, y] == 0:
-                    pixels[x, y] = 255
-
-                if pixels[x, y] == 155:
-                    pixels[x, y] = 0
-
-        self.pixels = pixels
-
-
-    def split_captcha_letters(self):
-        captcha = self.image
-        started = False
-        letters = []
-        width, height = captcha.size
-        bottomY, topY = 0, height
-        pixels = captcha.load()
-
-        for x in xrange(width):
-            black_pixel_in_col = False
-            for y in xrange(height):
-                if pixels[x, y] != 255:
-                    if not started:
-                        started = True
-                        firstX = x
-                        lastX = x
-
-                    if y > bottomY:
-                        bottomY = y
-                    if y < topY:
-                        topY = y
-                    if x > lastX:
-                        lastX = x
-
-                    black_pixel_in_col = True
-
-            if black_pixel_in_col is False and started is True:
-                rect = (firstX, topY, lastX, bottomY)
-                new_captcha = captcha.crop(rect)
-
-                w, h = new_captcha.size
-                if w > 5 and h > 5:
-                    letters.append(new_captcha)
-
-                started = False
-                bottomY, topY = 0, height
-
-        return letters
-
-
-    def correct(self, values, var=None):
-        if var:
-            result = var
-        else:
-            result = self.result_captcha
-
-        for key, item in values.iteritems():
-
-            if key.__class__ == str:
-                result = result.replace(key, item)
-            else:
-                for expr in key:
-                    result = result.replace(expr, item)
-
-        if var:
-            return result
-        else:
-            self.result_captcha = result
-- 
cgit v1.2.3