summaryrefslogtreecommitdiffstats
path: root/module/plugins/captcha
diff options
context:
space:
mode:
Diffstat (limited to 'module/plugins/captcha')
-rw-r--r--module/plugins/captcha/GigasizeCom.py11
-rw-r--r--module/plugins/captcha/LinksaveIn.py23
-rw-r--r--module/plugins/captcha/NetloadIn.py11
-rw-r--r--module/plugins/captcha/ShareonlineBiz.py11
-rw-r--r--module/plugins/captcha/captcha.py56
5 files changed, 69 insertions, 43 deletions
diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py
index add3ffc57..99f432d12 100644
--- a/module/plugins/captcha/GigasizeCom.py
+++ b/module/plugins/captcha/GigasizeCom.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
-from module.plugins.captcha import OCR
+from module.plugins.captcha.captcha import OCR
class GigasizeCom(OCR):
- __name__ = "GigasizeCom"
- __type__ = "ocr"
+ __name__ = "GigasizeCom"
+ __type__ = "ocr"
__version__ = "0.1"
__description__ = """Gigasize.com ocr plugin"""
- __author_name__ = "pyLoad Team"
- __author_mail__ = "admin@pyload.org"
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
def __init__(self):
OCR.__init__(self)
+
def get_captcha(self, image):
self.load_image(image)
self.threshold(2.8)
diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py
index dd5ac7b98..41673d8a6 100644
--- a/module/plugins/captcha/LinksaveIn.py
+++ b/module/plugins/captcha/LinksaveIn.py
@@ -1,27 +1,32 @@
# -*- coding: utf-8 -*-
-from PIL import Image
+try:
+ from PIL import Image
+except ImportError:
+ import Image
+
from glob import glob
from os import sep
from os.path import abspath, dirname
-from module.plugins.captcha import OCR
+from module.plugins.captcha.captcha import OCR
class LinksaveIn(OCR):
- __name__ = "LinksaveIn"
- __type__ = "ocr"
+ __name__ = "LinksaveIn"
+ __type__ = "ocr"
__version__ = "0.1"
__description__ = """Linksave.in ocr plugin"""
- __author_name__ = "pyLoad Team"
- __author_mail__ = "admin@pyload.org"
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
def __init__(self):
OCR.__init__(self)
self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep
+
def load_image(self, image):
im = Image.open(image)
frame_nr = 0
@@ -49,6 +54,7 @@ class LinksaveIn(OCR):
self.pixels = self.image.load()
self.result_captcha = ''
+
def get_bg(self):
stat = {}
cstat = {}
@@ -79,12 +85,13 @@ class LinksaveIn(OCR):
stat[bgpath] += 1
max_p = 0
bg = ""
- for bgpath, value in stat.items():
+ for bgpath, value in stat.iteritems():
if max_p < value:
bg = bgpath
max_p = value
return bg
+
def substract_bg(self, bgpath):
bg = Image.open(bgpath)
img = self.image.convert("P")
@@ -107,6 +114,7 @@ class LinksaveIn(OCR):
if rgb_c == rgb_bg:
orgpix[x, y] = (255,255,255)
+
def eval_black_white(self):
new = Image.new("RGB", (140, 75))
pix = new.load()
@@ -128,6 +136,7 @@ class LinksaveIn(OCR):
self.image = new
self.pixels = self.image.load()
+
def get_captcha(self, image):
self.load_image(image)
bg = self.get_bg()
diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py
index cb6cb9264..fc8eecf59 100644
--- a/module/plugins/captcha/NetloadIn.py
+++ b/module/plugins/captcha/NetloadIn.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
-from module.plugins.captcha import OCR
+from module.plugins.captcha.captcha import OCR
class NetloadIn(OCR):
- __name__ = "NetloadIn"
- __type__ = "ocr"
+ __name__ = "NetloadIn"
+ __type__ = "ocr"
__version__ = "0.1"
__description__ = """Netload.in ocr plugin"""
- __author_name__ = "pyLoad Team"
- __author_mail__ = "admin@pyload.org"
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
def __init__(self):
OCR.__init__(self)
+
def get_captcha(self, image):
self.load_image(image)
self.to_greyscale()
diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py
index aab4e9da0..6e513941d 100644
--- a/module/plugins/captcha/ShareonlineBiz.py
+++ b/module/plugins/captcha/ShareonlineBiz.py
@@ -1,21 +1,22 @@
# -*- coding: utf-8 -*-
-from module.plugins.captcha import OCR
+from module.plugins.captcha.captcha import OCR
class ShareonlineBiz(OCR):
- __name__ = "ShareonlineBiz"
- __type__ = "ocr"
+ __name__ = "ShareonlineBiz"
+ __type__ = "ocr"
__version__ = "0.1"
__description__ = """Shareonline.biz ocr plugin"""
- __author_name__ = "RaNaN"
- __author_mail__ = "RaNaN@pyload.org"
+ __license__ = "GPLv3"
+ __authors__ = [("RaNaN", "RaNaN@pyload.org")]
def __init__(self):
OCR.__init__(self)
+
def get_captcha(self, image):
self.load_image(image)
self.to_greyscale()
diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py
index cc07f50cf..b67ce9b9e 100644
--- a/module/plugins/captcha/captcha.py
+++ b/module/plugins/captcha/captcha.py
@@ -2,11 +2,11 @@
from __future__ import with_statement
-import GifImagePlugin
-import Image
-import JpegImagePlugin
-import PngImagePlugin
-import TiffImagePlugin
+try:
+ from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+except ImportError:
+ import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+
import logging
import os
import subprocess
@@ -16,30 +16,34 @@ from os.path import abspath, join
class OCR(object):
- __name__ = "OCR"
- __type__ = "ocr"
+ __name__ = "OCR"
+ __type__ = "ocr"
__version__ = "0.1"
__description__ = """OCR base plugin"""
- __author_name__ = "pyLoad Team"
- __author_mail__ = "admin@pyload.org"
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
def __init__(self):
self.logger = logging.getLogger("log")
+
def load_image(self, image):
self.image = Image.open(image)
self.pixels = self.image.load()
self.result_captcha = ''
+
def unload(self):
"""delete all tmp images"""
pass
+
def threshold(self, value):
self.image = self.image.point(lambda a: a * value + 10)
+
def run(self, command):
"""Run a command"""
@@ -50,29 +54,32 @@ class OCR(object):
popen.stderr.close()
self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
+
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
- #self.logger.debug("create tmp tif")
+ #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+ try:
+ tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
+ tmpTif.close()
+
+ #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+ tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
+ tmpTxt.close()
- #tmp = tempfile.NamedTemporaryFile(suffix=".tif")
- tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
- tmp.close()
- #self.logger.debug("create tmp txt")
- #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
- tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
- tmpTxt.close()
+ except IOError, e:
+ self.logError(e)
+ return
self.logger.debug("save tiff")
- self.image.save(tmp.name, 'TIFF')
+ self.image.save(tmpTif.name, 'TIFF')
if os.name == "nt":
tessparams = [join(pypath, "tesseract", "tesseract.exe")]
else:
tessparams = ["tesseract"]
- tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] )
+ tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] )
if subset and (digits or lowercase or uppercase):
- #self.logger.debug("create temp subset config")
#tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb")
tmpSub.write("tessedit_char_whitelist ")
@@ -99,22 +106,25 @@ class OCR(object):
self.logger.debug(self.result_captcha)
try:
- os.remove(tmp.name)
+ os.remove(tmpTif.name)
os.remove(tmpTxt.name)
if subset and (digits or lowercase or uppercase):
os.remove(tmpSub.name)
except:
pass
+
def get_captcha(self, name):
raise NotImplementedError
+
def to_greyscale(self):
if self.image.mode != 'L':
self.image = self.image.convert('L')
self.pixels = self.image.load()
+
def eval_black_white(self, limit):
self.pixels = self.image.load()
w, h = self.image.size
@@ -125,6 +135,7 @@ class OCR(object):
else:
self.pixels[x, y] = 0
+
def clean(self, allowed):
pixels = self.pixels
@@ -170,6 +181,7 @@ class OCR(object):
self.pixels = pixels
+
def derotate_by_average(self):
"""rotate by checking each angle and guess most suitable"""
@@ -244,6 +256,7 @@ class OCR(object):
self.pixels = pixels
+
def split_captcha_letters(self):
captcha = self.image
started = False
@@ -283,6 +296,7 @@ class OCR(object):
return letters
+
def correct(self, values, var=None):
if var:
result = var