diff options
author | mkaay <mkaay@mkaay.de> | 2009-12-05 22:36:19 +0100 |
---|---|---|
committer | mkaay <mkaay@mkaay.de> | 2009-12-05 22:36:19 +0100 |
commit | 8ee17f2be2576dc9f8be17aadc44a8014edbb1ab (patch) | |
tree | 007456e51107855c042b2788a614e8b7600a7681 | |
parent | linksave.in captcha test (diff) | |
download | pyload-8ee17f2be2576dc9f8be17aadc44a8014edbb1ab.tar.xz |
linksave captcha 2nd try
-rw-r--r-- | module/captcha/LinksaveIn.py | 113 | ||||
-rw-r--r-- | module/captcha/LinksaveIn/bg/flecken_1.gif | bin | 0 -> 10744 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/flecken_2.gif | bin | 0 -> 11076 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/gewebe_fein.gif | bin | 0 -> 10504 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/gewebe_grob.gif | bin | 0 -> 10127 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/gitter.gif | bin | 0 -> 8151 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/mauer_horizontal.gif | bin | 0 -> 9105 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/mauer_vertikal.gif | bin | 0 -> 10830 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/scheckig.gif | bin | 0 -> 10214 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/bg/wellen.gif | bin | 0 -> 10041 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/cleaned_pass1.png | bin | 0 -> 1979 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/cleaned_pass2.png | bin | 0 -> 1162 bytes | |||
-rw-r--r-- | module/captcha/LinksaveIn/tesser_conf | 1 | ||||
-rw-r--r-- | module/captcha/LinksaveIn/unblacked.png | bin | 0 -> 10488 bytes |
14 files changed, 113 insertions, 1 deletions
diff --git a/module/captcha/LinksaveIn.py b/module/captcha/LinksaveIn.py index cd4e97f87..4219f03b5 100644 --- a/module/captcha/LinksaveIn.py +++ b/module/captcha/LinksaveIn.py @@ -1,9 +1,17 @@ from captcha import OCR import Image +from os import sep +from os.path import dirname +from os.path import abspath +from glob import glob +import tempfile + +from pprint import pprint class LinksaveIn(OCR): def __init__(self): OCR.__init__(self) + self.data_dir = dirname(abspath(__file__)) + sep + "LinksaveIn" + sep def load_image(self, image): im = Image.open(image) @@ -27,12 +35,115 @@ class LinksaveIn(OCR): if lut[pix[x, y]] != (0,0,0): npix[x, y] = lut[pix[x, y]] frame_nr += 1 + new.save(self.data_dir+"unblacked.png") self.image = new.copy() self.pixels = self.image.load() self.result_captcha = '' + + def get_bg(self): + stat = {} + cstat = {} + img = self.image.convert("P") + for bgpath in glob(self.data_dir+"bg/*.gif"): + stat[bgpath] = 0 + bg = Image.open(bgpath) + + bglut = bg.resize((256, 1)) + bglut.putdata(range(256)) + bglut = list(bglut.convert("RGB").getdata()) + + lut = img.resize((256, 1)) + lut.putdata(range(256)) + lut = list(lut.convert("RGB").getdata()) + + bgpix = bg.load() + pix = img.load() + for x in range(bg.size[0]): + for y in range(bg.size[1]): + rgb_bg = bglut[bgpix[x, y]] + rgb_c = lut[pix[x, y]] + try: + cstat[rgb_c] += 1 + except: + cstat[rgb_c] = 1 + if rgb_bg == rgb_c: + stat[bgpath] += 1 + max_p = 0 + bg = "" + for bgpath, value in stat.items(): + if max_p < value: + bg = bgpath + max_p = value + return bg + + def substract_bg(self, bgpath): + bg = Image.open(bgpath) + img = self.image.convert("P") + + bglut = bg.resize((256, 1)) + bglut.putdata(range(256)) + bglut = list(bglut.convert("RGB").getdata()) + + lut = img.resize((256, 1)) + lut.putdata(range(256)) + lut = list(lut.convert("RGB").getdata()) + + bgpix = bg.load() + pix = img.load() + orgpix = self.image.load() + for x in range(bg.size[0]): + for y in range(bg.size[1]): + rgb_bg = bglut[bgpix[x, y]] + rgb_c = lut[pix[x, y]] + if rgb_c == rgb_bg: + orgpix[x, y] = (255,255,255) + + def eval_black_white(self): + new = Image.new("RGB", (140, 75)) + pix = new.load() + orgpix = self.image.load() + thresh = 4 + for x in range(new.size[0]): + for y in range(new.size[1]): + rgb = orgpix[x, y] + r, g, b = rgb + pix[x, y] = (255,255,255) + if r > max(b, g)+thresh: + pix[x, y] = (0,0,0) + if g < min(r, b): + pix[x, y] = (0,0,0) + if g > max(r, b)+thresh: + pix[x, y] = (0,0,0) + if b > max(r, g)+thresh: + pix[x, y] = (0,0,0) + self.image = new + self.pixels = self.image.load() + + def run_tesser(self): + self.logger.debug("create tmp tif") + tmp = tempfile.NamedTemporaryFile(suffix=".tif") + self.logger.debug("create tmp txt") + tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + self.logger.debug("save tiff") + self.image.save(tmp.name, 'TIFF') + self.logger.debug("run tesseract") + self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", ""), "nobatch", self.data_dir+"tesser_conf"]) + self.logger.debug("read txt") + + with open(tmpTxt.name, 'r') as f: + self.result_captcha = f.read().replace("\n", "") def get_captcha(self, image): self.load_image(image) + bg = self.get_bg() + self.substract_bg(bg) + self.eval_black_white() + self.to_greyscale() + self.image.save(self.data_dir+"cleaned_pass1.png") + self.clean(6) + self.image.save(self.data_dir+"cleaned_pass2.png") + letters = self.split_captcha_letters() + self.run_tesser() return self.result_captcha @@ -42,5 +153,5 @@ if __name__ == '__main__': ocr = LinksaveIn() testurl = "http://linksave.in/captcha/cap.php?hsh=2229185&code=ZzHdhl3UffV3lXTH5U4b7nShXj%2Bwma1vyoNBcbc6lcc%3D" urllib.urlretrieve(testurl, "captcha.gif") - + print ocr.get_captcha('captcha.gif') diff --git a/module/captcha/LinksaveIn/bg/flecken_1.gif b/module/captcha/LinksaveIn/bg/flecken_1.gif Binary files differnew file mode 100644 index 000000000..df2f51217 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/flecken_1.gif diff --git a/module/captcha/LinksaveIn/bg/flecken_2.gif b/module/captcha/LinksaveIn/bg/flecken_2.gif Binary files differnew file mode 100644 index 000000000..838276188 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/flecken_2.gif diff --git a/module/captcha/LinksaveIn/bg/gewebe_fein.gif b/module/captcha/LinksaveIn/bg/gewebe_fein.gif Binary files differnew file mode 100644 index 000000000..502f18cc4 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gewebe_fein.gif diff --git a/module/captcha/LinksaveIn/bg/gewebe_grob.gif b/module/captcha/LinksaveIn/bg/gewebe_grob.gif Binary files differnew file mode 100644 index 000000000..e66a365ad --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gewebe_grob.gif diff --git a/module/captcha/LinksaveIn/bg/gitter.gif b/module/captcha/LinksaveIn/bg/gitter.gif Binary files differnew file mode 100644 index 000000000..ec52ef68d --- /dev/null +++ b/module/captcha/LinksaveIn/bg/gitter.gif diff --git a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif b/module/captcha/LinksaveIn/bg/mauer_horizontal.gif Binary files differnew file mode 100644 index 000000000..3d75fafa8 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/mauer_horizontal.gif diff --git a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif b/module/captcha/LinksaveIn/bg/mauer_vertikal.gif Binary files differnew file mode 100644 index 000000000..2ada6fdae --- /dev/null +++ b/module/captcha/LinksaveIn/bg/mauer_vertikal.gif diff --git a/module/captcha/LinksaveIn/bg/scheckig.gif b/module/captcha/LinksaveIn/bg/scheckig.gif Binary files differnew file mode 100644 index 000000000..8bfb45c56 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/scheckig.gif diff --git a/module/captcha/LinksaveIn/bg/wellen.gif b/module/captcha/LinksaveIn/bg/wellen.gif Binary files differnew file mode 100644 index 000000000..a181ebe74 --- /dev/null +++ b/module/captcha/LinksaveIn/bg/wellen.gif diff --git a/module/captcha/LinksaveIn/cleaned_pass1.png b/module/captcha/LinksaveIn/cleaned_pass1.png Binary files differnew file mode 100644 index 000000000..f0cc0ae47 --- /dev/null +++ b/module/captcha/LinksaveIn/cleaned_pass1.png diff --git a/module/captcha/LinksaveIn/cleaned_pass2.png b/module/captcha/LinksaveIn/cleaned_pass2.png Binary files differnew file mode 100644 index 000000000..fb2ef44cd --- /dev/null +++ b/module/captcha/LinksaveIn/cleaned_pass2.png diff --git a/module/captcha/LinksaveIn/tesser_conf b/module/captcha/LinksaveIn/tesser_conf new file mode 100644 index 000000000..34ca8fa02 --- /dev/null +++ b/module/captcha/LinksaveIn/tesser_conf @@ -0,0 +1 @@ +tessedit_char_whitelist 0123456789 diff --git a/module/captcha/LinksaveIn/unblacked.png b/module/captcha/LinksaveIn/unblacked.png Binary files differnew file mode 100644 index 000000000..4fa056d3d --- /dev/null +++ b/module/captcha/LinksaveIn/unblacked.png |