diff options
Diffstat (limited to 'module/plugins/ocr')
-rw-r--r-- | module/plugins/ocr/GigasizeCom.py | 23 | ||||
-rw-r--r-- | module/plugins/ocr/LinksaveIn.py | 148 | ||||
-rw-r--r-- | module/plugins/ocr/NetloadIn.py | 27 | ||||
-rw-r--r-- | module/plugins/ocr/ShareonlineBiz.py | 38 | ||||
-rw-r--r-- | module/plugins/ocr/__init__.py | 0 |
5 files changed, 0 insertions, 236 deletions
diff --git a/module/plugins/ocr/GigasizeCom.py b/module/plugins/ocr/GigasizeCom.py deleted file mode 100644 index ba0b805e6..000000000 --- a/module/plugins/ocr/GigasizeCom.py +++ /dev/null @@ -1,23 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR - - -class GigasizeCom(OCR): - __name__ = "GigasizeCom" - __type__ = "ocr" - __version__ = "0.1" - - __description__ = """Gigasize.com ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" - - - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.threshold(2.8) - self.run_tesser(True, False, False, True) - return self.result_captcha diff --git a/module/plugins/ocr/LinksaveIn.py b/module/plugins/ocr/LinksaveIn.py deleted file mode 100644 index 3ae139a4e..000000000 --- a/module/plugins/ocr/LinksaveIn.py +++ /dev/null @@ -1,148 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR -from PIL import Image -from os import sep -from os.path import abspath, dirname -from glob import glob - - -class LinksaveIn(OCR): - __name__ = "LinksaveIn" - __type__ = "ocr" - __version__ = "0.1" - - __description__ = """Linksave.in ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" - - - def __init__(self): - OCR.__init__(self) - self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep - - def load_image(self, image): - im = Image.open(image) - frame_nr = 0 - - lut = im.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - new = Image.new("RGB", im.size) - npix = new.load() - while True: - try: - im.seek(frame_nr) - except EOFError: - break - frame = im.copy() - pix = frame.load() - for x in xrange(frame.size[0]): - for y in xrange(frame.size[1]): - if lut[pix[x, y]] != (0,0,0): - npix[x, y] = lut[pix[x, y]] - frame_nr += 1 - new.save(self.data_dir+"unblacked.png") - self.image = new.copy() - self.pixels = self.image.load() - self.result_captcha = '' - - def get_bg(self): - stat = {} - cstat = {} - img = self.image.convert("P") - for bgpath in glob(self.data_dir+"bg/*.gif"): - stat[bgpath] = 0 - bg = Image.open(bgpath) - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - for x in xrange(bg.size[0]): - for y in xrange(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - try: - cstat[rgb_c] += 1 - except: - cstat[rgb_c] = 1 - if rgb_bg == rgb_c: - stat[bgpath] += 1 - max_p = 0 - bg = "" - for bgpath, value in stat.items(): - if max_p < value: - bg = bgpath - max_p = value - return bg - - def substract_bg(self, bgpath): - bg = Image.open(bgpath) - img = self.image.convert("P") - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - orgpix = self.image.load() - for x in xrange(bg.size[0]): - for y in xrange(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - if rgb_c == rgb_bg: - orgpix[x, y] = (255,255,255) - - def eval_black_white(self): - new = Image.new("RGB", (140, 75)) - pix = new.load() - orgpix = self.image.load() - thresh = 4 - for x in xrange(new.size[0]): - for y in xrange(new.size[1]): - rgb = orgpix[x, y] - r, g, b = rgb - pix[x, y] = (255,255,255) - if r > max(b, g)+thresh: - pix[x, y] = (0,0,0) - if g < min(r, b): - pix[x, y] = (0,0,0) - if g > max(r, b)+thresh: - pix[x, y] = (0,0,0) - if b > max(r, g)+thresh: - pix[x, y] = (0,0,0) - self.image = new - self.pixels = self.image.load() - - def get_captcha(self, image): - self.load_image(image) - bg = self.get_bg() - self.substract_bg(bg) - self.eval_black_white() - self.to_greyscale() - self.image.save(self.data_dir+"cleaned_pass1.png") - self.clean(4) - self.clean(4) - self.image.save(self.data_dir+"cleaned_pass2.png") - letters = self.split_captcha_letters() - final = "" - for n, letter in enumerate(letters): - self.image = letter - self.image.save(ocr.data_dir+"letter%d.png" % n) - self.run_tesser(True, True, False, False) - final += self.result_captcha - - return final diff --git a/module/plugins/ocr/NetloadIn.py b/module/plugins/ocr/NetloadIn.py deleted file mode 100644 index 0de88302e..000000000 --- a/module/plugins/ocr/NetloadIn.py +++ /dev/null @@ -1,27 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR - -class NetloadIn(OCR): - __name__ = "NetloadIn" - __type__ = "ocr" - __version__ = "0.1" - - __description__ = """Netload.in ocr plugin""" - __author_name__ = "pyLoad Team" - __author_mail__ = "admin@pyload.org" - - - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.to_greyscale() - self.clean(3) - self.clean(3) - self.run_tesser(True, True, False, False) - - self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers - - return self.result_captcha diff --git a/module/plugins/ocr/ShareonlineBiz.py b/module/plugins/ocr/ShareonlineBiz.py deleted file mode 100644 index 0ad018bf9..000000000 --- a/module/plugins/ocr/ShareonlineBiz.py +++ /dev/null @@ -1,38 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR - - -class ShareonlineBiz(OCR): - __name__ = "ShareonlineBiz" - __type__ = "ocr" - __version__ = "0.1" - - __description__ = """Shareonline.biz ocr plugin""" - __author_name__ = "RaNaN" - __author_mail__ = "RaNaN@pyload.org" - - - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.to_greyscale() - self.image = self.image.resize((160, 50)) - self.pixels = self.image.load() - self.threshold(1.85) - #self.eval_black_white(240) - #self.derotate_by_average() - - letters = self.split_captcha_letters() - - final = "" - for letter in letters: - self.image = letter - self.run_tesser(True, True, False, False) - final += self.result_captcha - - return final - - #tesseract at 60% diff --git a/module/plugins/ocr/__init__.py b/module/plugins/ocr/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/module/plugins/ocr/__init__.py +++ /dev/null |