diff options
Diffstat (limited to 'module/plugins/captcha')
-rw-r--r-- | module/plugins/captcha/GigasizeCom.py | 11 | ||||
-rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 23 | ||||
-rw-r--r-- | module/plugins/captcha/NetloadIn.py | 11 | ||||
-rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 11 | ||||
-rw-r--r-- | module/plugins/captcha/captcha.py | 56 |
5 files changed, 69 insertions, 43 deletions
diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py index add3ffc57..99f432d12 100644 --- a/module/plugins/captcha/GigasizeCom.py +++ b/module/plugins/captcha/GigasizeCom.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR class GigasizeCom(OCR): - __name__ = "GigasizeCom" - __type__ = "ocr" + __name__ = "GigasizeCom" + __type__ = "ocr" __version__ = "0.1" __description__ = """Gigasize.com ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): OCR.__init__(self) + def get_captcha(self, image): self.load_image(image) self.threshold(2.8) diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index dd5ac7b98..41673d8a6 100644 --- a/module/plugins/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py @@ -1,27 +1,32 @@ # -*- coding: utf-8 -*- -from PIL import Image +try: + from PIL import Image +except ImportError: + import Image + from glob import glob from os import sep from os.path import abspath, dirname -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR class LinksaveIn(OCR): - __name__ = "LinksaveIn" - __type__ = "ocr" + __name__ = "LinksaveIn" + __type__ = "ocr" __version__ = "0.1" __description__ = """Linksave.in ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): OCR.__init__(self) self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep + def load_image(self, image): im = Image.open(image) frame_nr = 0 @@ -49,6 +54,7 @@ class LinksaveIn(OCR): self.pixels = self.image.load() self.result_captcha = '' + def get_bg(self): stat = {} cstat = {} @@ -79,12 +85,13 @@ class LinksaveIn(OCR): stat[bgpath] += 1 max_p = 0 bg = "" - for bgpath, value in stat.items(): + for bgpath, value in stat.iteritems(): if max_p < value: bg = bgpath max_p = value return bg + def substract_bg(self, bgpath): bg = Image.open(bgpath) img = self.image.convert("P") @@ -107,6 +114,7 @@ class LinksaveIn(OCR): if rgb_c == rgb_bg: orgpix[x, y] = (255,255,255) + def eval_black_white(self): new = Image.new("RGB", (140, 75)) pix = new.load() @@ -128,6 +136,7 @@ class LinksaveIn(OCR): self.image = new self.pixels = self.image.load() + def get_captcha(self, image): self.load_image(image) bg = self.get_bg() diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index cb6cb9264..fc8eecf59 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR class NetloadIn(OCR): - __name__ = "NetloadIn" - __type__ = "ocr" + __name__ = "NetloadIn" + __type__ = "ocr" __version__ = "0.1" __description__ = """Netload.in ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): OCR.__init__(self) + def get_captcha(self, image): self.load_image(image) self.to_greyscale() diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index aab4e9da0..6e513941d 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py @@ -1,21 +1,22 @@ # -*- coding: utf-8 -*- -from module.plugins.captcha import OCR +from module.plugins.captcha.captcha import OCR class ShareonlineBiz(OCR): - __name__ = "ShareonlineBiz" - __type__ = "ocr" + __name__ = "ShareonlineBiz" + __type__ = "ocr" __version__ = "0.1" __description__ = """Shareonline.biz ocr plugin""" - __author_name__ = "RaNaN" - __author_mail__ = "RaNaN@pyload.org" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "RaNaN@pyload.org")] def __init__(self): OCR.__init__(self) + def get_captcha(self, image): self.load_image(image) self.to_greyscale() diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index cc07f50cf..b67ce9b9e 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -2,11 +2,11 @@ from __future__ import with_statement -import GifImagePlugin -import Image -import JpegImagePlugin -import PngImagePlugin -import TiffImagePlugin +try: + from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin +except ImportError: + import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin + import logging import os import subprocess @@ -16,30 +16,34 @@ from os.path import abspath, join class OCR(object): - __name__ = "OCR" - __type__ = "ocr" + __name__ = "OCR" + __type__ = "ocr" __version__ = "0.1" __description__ = """OCR base plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] def __init__(self): self.logger = logging.getLogger("log") + def load_image(self, image): self.image = Image.open(image) self.pixels = self.image.load() self.result_captcha = '' + def unload(self): """delete all tmp images""" pass + def threshold(self, value): self.image = self.image.point(lambda a: a * value + 10) + def run(self, command): """Run a command""" @@ -50,29 +54,32 @@ class OCR(object): popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): - #self.logger.debug("create tmp tif") + #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + try: + tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmpTif.close() + + #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") + tmpTxt.close() - #tmp = tempfile.NamedTemporaryFile(suffix=".tif") - tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") - tmp.close() - #self.logger.debug("create tmp txt") - #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") - tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") - tmpTxt.close() + except IOError, e: + self.logError(e) + return self.logger.debug("save tiff") - self.image.save(tmp.name, 'TIFF') + self.image.save(tmpTif.name, 'TIFF') if os.name == "nt": tessparams = [join(pypath, "tesseract", "tesseract.exe")] else: tessparams = ["tesseract"] - tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] ) + tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] ) if subset and (digits or lowercase or uppercase): - #self.logger.debug("create temp subset config") #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") tmpSub.write("tessedit_char_whitelist ") @@ -99,22 +106,25 @@ class OCR(object): self.logger.debug(self.result_captcha) try: - os.remove(tmp.name) + os.remove(tmpTif.name) os.remove(tmpTxt.name) if subset and (digits or lowercase or uppercase): os.remove(tmpSub.name) except: pass + def get_captcha(self, name): raise NotImplementedError + def to_greyscale(self): if self.image.mode != 'L': self.image = self.image.convert('L') self.pixels = self.image.load() + def eval_black_white(self, limit): self.pixels = self.image.load() w, h = self.image.size @@ -125,6 +135,7 @@ class OCR(object): else: self.pixels[x, y] = 0 + def clean(self, allowed): pixels = self.pixels @@ -170,6 +181,7 @@ class OCR(object): self.pixels = pixels + def derotate_by_average(self): """rotate by checking each angle and guess most suitable""" @@ -244,6 +256,7 @@ class OCR(object): self.pixels = pixels + def split_captcha_letters(self): captcha = self.image started = False @@ -283,6 +296,7 @@ class OCR(object): return letters + def correct(self, values, var=None): if var: result = var |