5 files changed, 144 insertions, 137 deletions
diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py
index 2d0837257..244cf6a2a 100644
--- a/module/plugins/captcha/GigasizeCom.py
+++ b/module/plugins/captcha/GigasizeCom.py
@@ -1,20 +1,24 @@
 # -*- coding: utf-8 -*-
 
-from captcha import OCR
+from module.plugins.captcha.captcha import OCR
+
 
 class GigasizeCom(OCR):
+    __name__    = "GigasizeCom"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Gigasize.com ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
     def __init__(self):
         OCR.__init__(self)
 
+
     def get_captcha(self, image):
         self.load_image(image)
         self.threshold(2.8)
         self.run_tesser(True, False, False, True)
         return self.result_captcha
-
-if __name__ == '__main__':
-    ocr = GigasizeCom()
-    import urllib
-    urllib.urlretrieve('http://www.gigasize.com/randomImage.php', "gigasize_tmp.jpg")
-
-    print ocr.get_captcha('gigasize_tmp.jpg')
diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py
index 8ce26fbac..56cbd58a0 100644
--- a/module/plugins/captcha/LinksaveIn.py
+++ b/module/plugins/captcha/LinksaveIn.py
@@ -1,19 +1,32 @@
 # -*- coding: utf-8 -*-
 
-from captcha import OCR
-import Image
-from os import sep
-from os.path import dirname
-from os.path import abspath
+try:
+    from PIL import Image
+except ImportError:
+    import Image
+
 from glob import glob
+from os import sep
+from os.path import abspath, dirname
+
+from module.plugins.captcha.captcha import OCR
 
 
 class LinksaveIn(OCR):
-    __name__ = "LinksaveIn"
+    __name__    = "LinksaveIn"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Linksave.in ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
     def __init__(self):
         OCR.__init__(self)
         self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep
 
+
     def load_image(self, image):
         im = Image.open(image)
         frame_nr = 0
@@ -41,6 +54,7 @@ class LinksaveIn(OCR):
         self.pixels = self.image.load()
         self.result_captcha = ''
 
+
     def get_bg(self):
         stat = {}
         cstat = {}
@@ -71,12 +85,13 @@ class LinksaveIn(OCR):
                         stat[bgpath] += 1
         max_p = 0
         bg = ""
-        for bgpath, value in stat.items():
+        for bgpath, value in stat.iteritems():
             if max_p < value:
                 bg = bgpath
                 max_p = value
         return bg
 
+
     def substract_bg(self, bgpath):
         bg = Image.open(bgpath)
         img = self.image.convert("P")
@@ -99,6 +114,7 @@ class LinksaveIn(OCR):
                 if rgb_c == rgb_bg:
                     orgpix[x, y] = (255,255,255)
 
+
     def eval_black_white(self):
         new = Image.new("RGB", (140, 75))
         pix = new.load()
@@ -120,6 +136,7 @@ class LinksaveIn(OCR):
         self.image = new
         self.pixels = self.image.load()
 
+
     def get_captcha(self, image):
         self.load_image(image)
         bg = self.get_bg()
@@ -139,11 +156,3 @@ class LinksaveIn(OCR):
             final += self.result_captcha
 
         return final
-
-if __name__ == '__main__':
-    import urllib
-    ocr = LinksaveIn()
-    testurl = "http://linksave.in/captcha/cap.php?hsh=2229185&code=ZzHdhl3UffV3lXTH5U4b7nShXj%2Bwma1vyoNBcbc6lcc%3D"
-    urllib.urlretrieve(testurl, ocr.data_dir+"captcha.gif")
-
-    print ocr.get_captcha(ocr.data_dir+'captcha.gif')
diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py
index 733fe99db..28eb18fb5 100644
--- a/module/plugins/captcha/NetloadIn.py
+++ b/module/plugins/captcha/NetloadIn.py
@@ -1,12 +1,22 @@
 # -*- coding: utf-8 -*-
 
-from captcha import OCR
+from module.plugins.captcha.captcha import OCR
+
 
 class NetloadIn(OCR):
-    __name__ = "NetloadIn"
+    __name__    = "NetloadIn"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Netload.in ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
+
+
     def __init__(self):
         OCR.__init__(self)
 
+
     def get_captcha(self, image):
         self.load_image(image)
         self.to_greyscale()
@@ -17,10 +27,3 @@ class NetloadIn(OCR):
         self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers
 
         return self.result_captcha
-
-if __name__ == '__main__':
-    import urllib
-    ocr = NetloadIn()
-    urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png")
-
-    print  ocr.get_captcha('captcha.png')
diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py
index 0c87b636d..8210e8859 100644
--- a/module/plugins/captcha/ShareonlineBiz.py
+++ b/module/plugins/captcha/ShareonlineBiz.py
@@ -1,31 +1,23 @@
 # -*- coding: utf-8 -*-
 
-#
-#Copyright (C) 2009 kingzero, RaNaN
-#
-#This program is free software; you can redistribute it and/or modify
-#it under the terms of the GNU General Public License as published by
-#the Free Software Foundation; either version 3 of the License,
-#or (at your option) any later version.
-#
-#This program is distributed in the hope that it will be useful,
-#but WITHOUT ANY WARRANTY; without even the implied warranty of
-#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#See the GNU General Public License for more details.
-#
-#You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-###
-from captcha import OCR
+from module.plugins.captcha.captcha import OCR
+
 
 class ShareonlineBiz(OCR):
-    __name__ = "ShareonlineBiz"
+    __name__    = "ShareonlineBiz"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """Shareonline.biz ocr plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("RaNaN", "RaNaN@pyload.org")]
+
 
     def __init__(self):
         OCR.__init__(self)
 
-    def get_captcha(self, image): 
+
+    def get_captcha(self, image):
         self.load_image(image)
         self.to_greyscale()
         self.image = self.image.resize((160, 50))
@@ -45,9 +37,3 @@ class ShareonlineBiz(OCR):
         return final
 
         #tesseract at 60%
-
-if __name__ == '__main__':
-    import urllib
-    ocr = ShareonlineBiz()
-    urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")
-    print  ocr.get_captcha('captcha.jpeg')
diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py
index 7e4dec697..0f233ec00 100644
--- a/module/plugins/captcha/captcha.py
+++ b/module/plugins/captcha/captcha.py
@@ -1,56 +1,49 @@
 # -*- coding: utf-8 -*-
 
-#
-#Copyright (C) 2009 kingzero, RaNaN
-#
-#This program is free software; you can redistribute it and/or modify
-#it under the terms of the GNU General Public License as published by
-#the Free Software Foundation; either version 3 of the License,
-#or (at your option) any later version.
-#
-#This program is distributed in the hope that it will be useful,
-#but WITHOUT ANY WARRANTY; without even the implied warranty of
-#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#See the GNU General Public License for more details.
-#
-#You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-###
 from __future__ import with_statement
-import os
-from os.path import join
-from os.path import abspath
+
+try:
+    from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+except ImportError:
+    import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+
 import logging
+import os
 import subprocess
 #import tempfile
 
-import Image
-import TiffImagePlugin
-import PngImagePlugin
-import GifImagePlugin
-import JpegImagePlugin
+from os.path import abspath, join
 
 
 class OCR(object):
+    __name__    = "OCR"
+    __type__    = "ocr"
+    __version__ = "0.10"
+
+    __description__ = """OCR base plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("pyLoad Team", "admin@pyload.org")]
 
-    __name__ = "OCR"
 
     def __init__(self):
         self.logger = logging.getLogger("log")
 
+
     def load_image(self, image):
         self.image = Image.open(image)
         self.pixels = self.image.load()
         self.result_captcha = ''
 
+
     def unload(self):
         """delete all tmp images"""
         pass
 
+
     def threshold(self, value):
         self.image = self.image.point(lambda a: a * value + 10)
 
+
     def run(self, command):
         """Run a command"""
 
@@ -61,42 +54,46 @@ class OCR(object):
         popen.stderr.close()
         self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
 
+
     def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
-        #self.logger.debug("create tmp tif")
+        #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+        try:
+            tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
+            tmpTif.close()
+
+            #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+            tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
+            tmpTxt.close()
 
-        #tmp = tempfile.NamedTemporaryFile(suffix=".tif")
-        tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
-        tmp.close()
-        #self.logger.debug("create tmp txt")
-        #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
-        tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
-        tmpTxt.close()
+        except IOError, e:
+            self.logError(e)
+            return
 
         self.logger.debug("save tiff")
-        self.image.save(tmp.name, 'TIFF')
+        self.image.save(tmpTif.name, 'TIFF')
 
         if os.name == "nt":
-            tessparams = [join(pypath,"tesseract","tesseract.exe")]
+            tessparams = [join(pypath, "tesseract", "tesseract.exe")]
         else:
-            tessparams = ['tesseract']
+            tessparams = ["tesseract"]
 
-        tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] )
+        tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] )
 
         if subset and (digits or lowercase or uppercase):
-            #self.logger.debug("create temp subset config")
             #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
-            tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb")
-            tmpSub.write("tessedit_char_whitelist ")
-            if digits:
-                tmpSub.write("0123456789")
-            if lowercase:
-                tmpSub.write("abcdefghijklmnopqrstuvwxyz")
-            if uppercase:
-                tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
-            tmpSub.write("\n")
-            tessparams.append("nobatch")
-            tessparams.append(abspath(tmpSub.name))
-            tmpSub.close()
+            with open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub:
+                tmpSub.write("tessedit_char_whitelist ")
+
+                if digits:
+                    tmpSub.write("0123456789")
+                if lowercase:
+                    tmpSub.write("abcdefghijklmnopqrstuvwxyz")
+                if uppercase:
+                    tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+
+                tmpSub.write("\n")
+                tessparams.append("nobatch")
+                tessparams.append(abspath(tmpSub.name))
 
         self.logger.debug("run tesseract")
         self.run(tessparams)
@@ -110,22 +107,25 @@ class OCR(object):
 
         self.logger.debug(self.result_captcha)
         try:
-            os.remove(tmp.name)
+            os.remove(tmpTif.name)
             os.remove(tmpTxt.name)
             if subset and (digits or lowercase or uppercase):
                 os.remove(tmpSub.name)
         except:
             pass
 
+
     def get_captcha(self, name):
         raise NotImplementedError
 
+
     def to_greyscale(self):
         if self.image.mode != 'L':
             self.image = self.image.convert('L')
 
         self.pixels = self.image.load()
 
+
     def eval_black_white(self, limit):
         self.pixels = self.image.load()
         w, h = self.image.size
@@ -136,6 +136,7 @@ class OCR(object):
                 else:
                     self.pixels[x, y] = 0
 
+
     def clean(self, allowed):
         pixels = self.pixels
 
@@ -143,19 +144,28 @@ class OCR(object):
 
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 255: continue
-                # no point in processing white pixels since we only want to remove black pixel
+                if pixels[x, y] == 255:
+                    continue
+                # No point in processing white pixels since we only want to remove black pixel
                 count = 0
 
                 try:
-                    if pixels[x-1, y-1] != 255: count += 1
-                    if pixels[x-1, y] != 255: count += 1
-                    if pixels[x-1, y + 1] != 255: count += 1
-                    if pixels[x, y + 1] != 255: count += 1
-                    if pixels[x + 1, y + 1] != 255: count += 1
-                    if pixels[x + 1, y] != 255: count += 1
-                    if pixels[x + 1, y-1] != 255: count += 1
-                    if pixels[x, y-1] != 255: count += 1
+                    if pixels[x-1, y-1] != 255:
+                        count += 1
+                    if pixels[x-1, y] != 255:
+                        count += 1
+                    if pixels[x-1, y + 1] != 255:
+                        count += 1
+                    if pixels[x, y + 1] != 255:
+                        count += 1
+                    if pixels[x + 1, y + 1] != 255:
+                        count += 1
+                    if pixels[x + 1, y] != 255:
+                        count += 1
+                    if pixels[x + 1, y-1] != 255:
+                        count += 1
+                    if pixels[x, y-1] != 255:
+                        count += 1
                 except:
                     pass
 
@@ -167,10 +177,12 @@ class OCR(object):
             # second pass: this time set all 1's to 255 (white)
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 1: pixels[x, y] = 255
+                if pixels[x, y] == 1:
+                    pixels[x, y] = 255
 
         self.pixels = pixels
 
+
     def derotate_by_average(self):
         """rotate by checking each angle and guess most suitable"""
 
@@ -245,6 +257,7 @@ class OCR(object):
 
         self.pixels = pixels
 
+
     def split_captcha_letters(self):
         captcha = self.image
         started = False
@@ -262,13 +275,16 @@ class OCR(object):
                         firstX = x
                         lastX = x
 
-                    if y > bottomY: bottomY = y
-                    if y < topY: topY = y
-                    if x > lastX: lastX = x
+                    if y > bottomY:
+                        bottomY = y
+                    if y < topY:
+                        topY = y
+                    if x > lastX:
+                        lastX = x
 
                     black_pixel_in_col = True
 
-            if black_pixel_in_col == False and started == True:
+            if black_pixel_in_col is False and started is True:
                 rect = (firstX, topY, lastX, bottomY)
                 new_captcha = captcha.crop(rect)
 
@@ -281,8 +297,8 @@ class OCR(object):
 
         return letters
 
-    def correct(self, values, var=None):
 
+    def correct(self, values, var=None):
         if var:
             result = var
         else:
@@ -300,14 +316,3 @@ class OCR(object):
             return result
         else:
             self.result_captcha = result
-
-
-if __name__ == '__main__':
-    ocr = OCR()
-    ocr.load_image("B.jpg")
-    ocr.to_greyscale()
-    ocr.eval_black_white(140)
-    ocr.derotate_by_average()
-    ocr.run_tesser()
-    print "Tesseract", ocr.result_captcha
-    ocr.image.save("derotated.jpg")