diff options
Diffstat (limited to 'pyload/plugin/ocr/LinksaveIn.py')
| -rw-r--r-- | pyload/plugin/ocr/LinksaveIn.py | 158 | 
1 files changed, 158 insertions, 0 deletions
| diff --git a/pyload/plugin/ocr/LinksaveIn.py b/pyload/plugin/ocr/LinksaveIn.py new file mode 100644 index 000000000..ddffb190a --- /dev/null +++ b/pyload/plugin/ocr/LinksaveIn.py @@ -0,0 +1,158 @@ +# -*- coding: utf-8 -*- + +try: +    from PIL import Image +except ImportError: +    import Image + +from glob import glob +from os import sep +from os.path import abspath, dirname + +from pyload.plugin.OCR import OCR + + +class LinksaveIn(OCR): +    __name__    = "LinksaveIn" +    __type__    = "ocr" +    __version__ = "0.11" + +    __description__ = """Linksave.in ocr plugin""" +    __license__     = "GPLv3" +    __authors__     = [("pyLoad Team", "admin@pyload.org")] + + +    def __init__(self): +        OCR.__init__(self) +        self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep + + +    def load_image(self, image): +        im = Image.open(image) +        frame_nr = 0 + +        lut = im.resize((256, 1)) +        lut.putdata(range(256)) +        lut = list(lut.convert("RGB").getdata()) + +        new = Image.new("RGB", im.size) +        npix = new.load() +        while True: +            try: +                im.seek(frame_nr) +            except EOFError: +                break +            frame = im.copy() +            pix = frame.load() +            for x in xrange(frame.size[0]): +                for y in xrange(frame.size[1]): +                    if lut[pix[x, y]] != (0, 0, 0): +                        npix[x, y] = lut[pix[x, y]] +            frame_nr += 1 +        new.save(self.data_dir+"unblacked.png") +        self.image = new.copy() +        self.pixels = self.image.load() +        self.result_captcha = '' + + +    def get_bg(self): +        stat = {} +        cstat = {} +        img = self.image.convert("P") +        for bgpath in glob(self.data_dir+"bg/*.gif"): +            stat[bgpath] = 0 +            bg = Image.open(bgpath) + +            bglut = bg.resize((256, 1)) +            bglut.putdata(range(256)) +            bglut = list(bglut.convert("RGB").getdata()) + +            lut = img.resize((256, 1)) +            lut.putdata(range(256)) +            lut = list(lut.convert("RGB").getdata()) + +            bgpix = bg.load() +            pix = img.load() +            for x in xrange(bg.size[0]): +                for y in xrange(bg.size[1]): +                    rgb_bg = bglut[bgpix[x, y]] +                    rgb_c = lut[pix[x, y]] +                    try: +                        cstat[rgb_c] += 1 +                    except Exception: +                        cstat[rgb_c] = 1 +                    if rgb_bg == rgb_c: +                        stat[bgpath] += 1 +        max_p = 0 +        bg = "" +        for bgpath, value in stat.iteritems(): +            if max_p < value: +                bg = bgpath +                max_p = value +        return bg + + +    def substract_bg(self, bgpath): +        bg = Image.open(bgpath) +        img = self.image.convert("P") + +        bglut = bg.resize((256, 1)) +        bglut.putdata(range(256)) +        bglut = list(bglut.convert("RGB").getdata()) + +        lut = img.resize((256, 1)) +        lut.putdata(range(256)) +        lut = list(lut.convert("RGB").getdata()) + +        bgpix = bg.load() +        pix = img.load() +        orgpix = self.image.load() +        for x in xrange(bg.size[0]): +            for y in xrange(bg.size[1]): +                rgb_bg = bglut[bgpix[x, y]] +                rgb_c = lut[pix[x, y]] +                if rgb_c == rgb_bg: +                    orgpix[x, y] = (255, 255, 255) + + +    def eval_black_white(self): +        new = Image.new("RGB", (140, 75)) +        pix = new.load() +        orgpix = self.image.load() +        thresh = 4 +        for x in xrange(new.size[0]): +            for y in xrange(new.size[1]): +                rgb = orgpix[x, y] +                r, g, b = rgb +                pix[x, y] = (255,255,255) +                if r > max(b, g)+thresh: +                    pix[x, y] = (0, 0, 0) +                if g < min(r, b): +                    pix[x, y] = (0, 0, 0) +                if g > max(r, b)+thresh: +                    pix[x, y] = (0, 0, 0) +                if b > max(r, g)+thresh: +                    pix[x, y] = (0, 0, 0) +        self.image = new +        self.pixels = self.image.load() + + +    def get_captcha(self, image): +        self.load_image(image) +        bg = self.get_bg() +        self.substract_bg(bg) +        self.eval_black_white() +        self.to_greyscale() +        self.image.save(self.data_dir+"cleaned_pass1.png") +        self.clean(4) +        self.clean(4) +        self.image.save(self.data_dir+"cleaned_pass2.png") +        letters = self.split_captcha_letters() +        final = "" +        for n, letter in enumerate(letters): +            self.image = letter +            self.image.save(ocr.data_dir+"letter%d.png" % n) +            self.run_tesser(True, True, False, False) +            final += self.result_captcha + +        return final | 
