diff options
Diffstat (limited to 'pyload/plugin/ocr/LinksaveIn.py')
-rw-r--r-- | pyload/plugin/ocr/LinksaveIn.py | 157 |
1 files changed, 157 insertions, 0 deletions
diff --git a/pyload/plugin/ocr/LinksaveIn.py b/pyload/plugin/ocr/LinksaveIn.py new file mode 100644 index 000000000..44ab08592 --- /dev/null +++ b/pyload/plugin/ocr/LinksaveIn.py @@ -0,0 +1,157 @@ +# -*- coding: utf-8 -*- + +try: + from PIL import Image +except ImportError: + import Image + +import glob +import os + +from pyload.plugin.OCR import OCR + + +class LinksaveIn(OCR): + __name = "LinksaveIn" + __type = "ocr" + __version = "0.11" + + __description = """Linksave.in ocr plugin""" + __license = "GPLv3" + __authors = [("pyLoad Team", "admin@pyload.org")] + + + def __init__(self): + OCR.__init__(self) + self.data_dir = os.path.dirname(os.path.abspath(__file__)) + os.sep + "LinksaveIn" + os.sep + + + def load_image(self, image): + im = Image.open(image) + frame_nr = 0 + + lut = im.resize((256, 1)) + lut.putdata(range(256)) + lut = list(lut.convert("RGB").getdata()) + + new = Image.new("RGB", im.size) + npix = new.load() + while True: + try: + im.seek(frame_nr) + except EOFError: + break + frame = im.copy() + pix = frame.load() + for x in xrange(frame.size[0]): + for y in xrange(frame.size[1]): + if lut[pix[x, y]] != (0, 0, 0): + npix[x, y] = lut[pix[x, y]] + frame_nr += 1 + new.save(self.data_dir+"unblacked.png") + self.image = new.copy() + self.pixels = self.image.load() + self.result_captcha = '' + + + def get_bg(self): + stat = {} + cstat = {} + img = self.image.convert("P") + for bgpath in glob.glob(self.data_dir+"bg/*.gif"): + stat[bgpath] = 0 + bg = Image.open(bgpath) + + bglut = bg.resize((256, 1)) + bglut.putdata(range(256)) + bglut = list(bglut.convert("RGB").getdata()) + + lut = img.resize((256, 1)) + lut.putdata(range(256)) + lut = list(lut.convert("RGB").getdata()) + + bgpix = bg.load() + pix = img.load() + for x in xrange(bg.size[0]): + for y in xrange(bg.size[1]): + rgb_bg = bglut[bgpix[x, y]] + rgb_c = lut[pix[x, y]] + try: + cstat[rgb_c] += 1 + except Exception: + cstat[rgb_c] = 1 + if rgb_bg == rgb_c: + stat[bgpath] += 1 + max_p = 0 + bg = "" + for bgpath, value in stat.iteritems(): + if max_p < value: + bg = bgpath + max_p = value + return bg + + + def substract_bg(self, bgpath): + bg = Image.open(bgpath) + img = self.image.convert("P") + + bglut = bg.resize((256, 1)) + bglut.putdata(range(256)) + bglut = list(bglut.convert("RGB").getdata()) + + lut = img.resize((256, 1)) + lut.putdata(range(256)) + lut = list(lut.convert("RGB").getdata()) + + bgpix = bg.load() + pix = img.load() + orgpix = self.image.load() + for x in xrange(bg.size[0]): + for y in xrange(bg.size[1]): + rgb_bg = bglut[bgpix[x, y]] + rgb_c = lut[pix[x, y]] + if rgb_c == rgb_bg: + orgpix[x, y] = (255, 255, 255) + + + def eval_black_white(self): + new = Image.new("RGB", (140, 75)) + pix = new.load() + orgpix = self.image.load() + thresh = 4 + for x in xrange(new.size[0]): + for y in xrange(new.size[1]): + rgb = orgpix[x, y] + r, g, b = rgb + pix[x, y] = (255, 255, 255) + if r > max(b, g)+thresh: + pix[x, y] = (0, 0, 0) + if g < min(r, b): + pix[x, y] = (0, 0, 0) + if g > max(r, b)+thresh: + pix[x, y] = (0, 0, 0) + if b > max(r, g)+thresh: + pix[x, y] = (0, 0, 0) + self.image = new + self.pixels = self.image.load() + + + def get_captcha(self, image): + self.load_image(image) + bg = self.get_bg() + self.substract_bg(bg) + self.eval_black_white() + self.to_greyscale() + self.image.save(self.data_dir+"cleaned_pass1.png") + self.clean(4) + self.clean(4) + self.image.save(self.data_dir+"cleaned_pass2.png") + letters = self.split_captcha_letters() + final = "" + for n, letter in enumerate(letters): + self.image = letter + self.image.save(ocr.data_dir+"letter%d.png" % n) + self.run_tesser(True, True, False, False) + final += self.result_captcha + + return final |