diff options
Diffstat (limited to 'module/plugins/ocr')
-rw-r--r-- | module/plugins/ocr/GigasizeCom.py | 20 | ||||
-rw-r--r-- | module/plugins/ocr/LinksaveIn.py | 149 | ||||
-rw-r--r-- | module/plugins/ocr/NetloadIn.py | 26 | ||||
-rw-r--r-- | module/plugins/ocr/ShareonlineBiz.py | 53 | ||||
-rw-r--r-- | module/plugins/ocr/__init__.py | 0 |
5 files changed, 0 insertions, 248 deletions
diff --git a/module/plugins/ocr/GigasizeCom.py b/module/plugins/ocr/GigasizeCom.py deleted file mode 100644 index 8f9d78710..000000000 --- a/module/plugins/ocr/GigasizeCom.py +++ /dev/null @@ -1,20 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR - -class GigasizeCom(OCR): - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.threshold(2.8) - self.run_tesser(True, False, False, True) - return self.result_captcha - -if __name__ == '__main__': - ocr = GigasizeCom() - import urllib - urllib.urlretrieve('http://www.gigasize.com/randomImage.php', "gigasize_tmp.jpg") - - print ocr.get_captcha('gigasize_tmp.jpg') diff --git a/module/plugins/ocr/LinksaveIn.py b/module/plugins/ocr/LinksaveIn.py deleted file mode 100644 index 0ddd50a50..000000000 --- a/module/plugins/ocr/LinksaveIn.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR -from PIL import Image -from os import sep -from os.path import dirname -from os.path import abspath -from glob import glob - - -class LinksaveIn(OCR): - __name__ = "LinksaveIn" - def __init__(self): - OCR.__init__(self) - self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep - - def load_image(self, image): - im = Image.open(image) - frame_nr = 0 - - lut = im.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - new = Image.new("RGB", im.size) - npix = new.load() - while True: - try: - im.seek(frame_nr) - except EOFError: - break - frame = im.copy() - pix = frame.load() - for x in xrange(frame.size[0]): - for y in xrange(frame.size[1]): - if lut[pix[x, y]] != (0,0,0): - npix[x, y] = lut[pix[x, y]] - frame_nr += 1 - new.save(self.data_dir+"unblacked.png") - self.image = new.copy() - self.pixels = self.image.load() - self.result_captcha = '' - - def get_bg(self): - stat = {} - cstat = {} - img = self.image.convert("P") - for bgpath in glob(self.data_dir+"bg/*.gif"): - stat[bgpath] = 0 - bg = Image.open(bgpath) - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - for x in xrange(bg.size[0]): - for y in xrange(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - try: - cstat[rgb_c] += 1 - except: - cstat[rgb_c] = 1 - if rgb_bg == rgb_c: - stat[bgpath] += 1 - max_p = 0 - bg = "" - for bgpath, value in stat.items(): - if max_p < value: - bg = bgpath - max_p = value - return bg - - def substract_bg(self, bgpath): - bg = Image.open(bgpath) - img = self.image.convert("P") - - bglut = bg.resize((256, 1)) - bglut.putdata(range(256)) - bglut = list(bglut.convert("RGB").getdata()) - - lut = img.resize((256, 1)) - lut.putdata(range(256)) - lut = list(lut.convert("RGB").getdata()) - - bgpix = bg.load() - pix = img.load() - orgpix = self.image.load() - for x in xrange(bg.size[0]): - for y in xrange(bg.size[1]): - rgb_bg = bglut[bgpix[x, y]] - rgb_c = lut[pix[x, y]] - if rgb_c == rgb_bg: - orgpix[x, y] = (255,255,255) - - def eval_black_white(self): - new = Image.new("RGB", (140, 75)) - pix = new.load() - orgpix = self.image.load() - thresh = 4 - for x in xrange(new.size[0]): - for y in xrange(new.size[1]): - rgb = orgpix[x, y] - r, g, b = rgb - pix[x, y] = (255,255,255) - if r > max(b, g)+thresh: - pix[x, y] = (0,0,0) - if g < min(r, b): - pix[x, y] = (0,0,0) - if g > max(r, b)+thresh: - pix[x, y] = (0,0,0) - if b > max(r, g)+thresh: - pix[x, y] = (0,0,0) - self.image = new - self.pixels = self.image.load() - - def get_captcha(self, image): - self.load_image(image) - bg = self.get_bg() - self.substract_bg(bg) - self.eval_black_white() - self.to_greyscale() - self.image.save(self.data_dir+"cleaned_pass1.png") - self.clean(4) - self.clean(4) - self.image.save(self.data_dir+"cleaned_pass2.png") - letters = self.split_captcha_letters() - final = "" - for n, letter in enumerate(letters): - self.image = letter - self.image.save(ocr.data_dir+"letter%d.png" % n) - self.run_tesser(True, True, False, False) - final += self.result_captcha - - return final - -if __name__ == '__main__': - import urllib - ocr = LinksaveIn() - testurl = "http://linksave.in/captcha/cap.php?hsh=2229185&code=ZzHdhl3UffV3lXTH5U4b7nShXj%2Bwma1vyoNBcbc6lcc%3D" - urllib.urlretrieve(testurl, ocr.data_dir+"captcha.gif") - - print ocr.get_captcha(ocr.data_dir+'captcha.gif') diff --git a/module/plugins/ocr/NetloadIn.py b/module/plugins/ocr/NetloadIn.py deleted file mode 100644 index 9fc2f0725..000000000 --- a/module/plugins/ocr/NetloadIn.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- - -from module.plugins.OCR import OCR - -class NetloadIn(OCR): - __name__ = "NetloadIn" - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.to_greyscale() - self.clean(3) - self.clean(3) - self.run_tesser(True, True, False, False) - - self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers - - return self.result_captcha - -if __name__ == '__main__': - import urllib - ocr = NetloadIn() - urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png") - - print ocr.get_captcha('captcha.png') diff --git a/module/plugins/ocr/ShareonlineBiz.py b/module/plugins/ocr/ShareonlineBiz.py deleted file mode 100644 index db72449d1..000000000 --- a/module/plugins/ocr/ShareonlineBiz.py +++ /dev/null @@ -1,53 +0,0 @@ -# -*- coding: utf-8 -*- - -# -#Copyright (C) 2009 kingzero, RaNaN -# -#This program is free software; you can redistribute it and/or modify -#it under the terms of the GNU General Public License as published by -#the Free Software Foundation; either version 3 of the License, -#or (at your option) any later version. -# -#This program is distributed in the hope that it will be useful, -#but WITHOUT ANY WARRANTY; without even the implied warranty of -#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -#See the GNU General Public License for more details. -# -#You should have received a copy of the GNU General Public License -# along with this program; if not, see <http://www.gnu.org/licenses/>. -# -### -from module.plugins.OCR import OCR - -class ShareonlineBiz(OCR): - __name__ = "ShareonlineBiz" - - def __init__(self): - OCR.__init__(self) - - def get_captcha(self, image): - self.load_image(image) - self.to_greyscale() - self.image = self.image.resize((160, 50)) - self.pixels = self.image.load() - self.threshold(1.85) - #self.eval_black_white(240) - #self.derotate_by_average() - - letters = self.split_captcha_letters() - - final = "" - for letter in letters: - self.image = letter - self.run_tesser(True, True, False, False) - final += self.result_captcha - - return final - - #tesseract at 60% - -if __name__ == '__main__': - import urllib - ocr = ShareonlineBiz() - urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") - print ocr.get_captcha('captcha.jpeg') diff --git a/module/plugins/ocr/__init__.py b/module/plugins/ocr/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/module/plugins/ocr/__init__.py +++ /dev/null |