diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-22 00:06:19 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-22 00:06:19 +0200 |
commit | 0cf40f51365e21c3025824ec9f8a8065876ad18f (patch) | |
tree | 4deee3bd211cf6f686ffb160e7077d0e3d4338a1 | |
parent | Added Shareonline.biz captcha (diff) | |
download | pyload-0cf40f51365e21c3025824ec9f8a8065876ad18f.tar.xz |
sharonline captcha method
-rw-r--r-- | captcha/ShareonlineBiz.py | 25 | ||||
-rw-r--r-- | captcha/captcha.py | 38 |
2 files changed, 53 insertions, 10 deletions
diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 038f6639e..1e3fc6214 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -6,16 +6,23 @@ class ShareonlineBiz(OCR): OCR.__init__(self) def get_captcha(self, image): - urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") self.load_image(image) - #self.to_greyscale() - #self.image.save('grey.jpeg') - self.image.threshold(32500) - #self.threshold(1.3) - self.run_tesser() - self.image.save('captcha_bla.jpeg') - return self.result_captcha + self.to_greyscale() + self.image = self.image.resize((160, 50)) + self.pixels = self.image.load() + self.threshold(1.85) + self.eval_black_white(240) + + letters = self.split_captcha_letters() + + final = "" + for letter in letters: + self.image = letter + self.run_tesser() + final += self.result_captcha + + return final if __name__ == '__main__': ocr = ShareonlineBiz() - print ocr.get_captcha('captcha.jpeg') + print ocr.get_captcha('captcha.php3.jpeg') diff --git a/captcha/captcha.py b/captcha/captcha.py index 539e51932..48816940d 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -72,6 +72,7 @@ class OCR(object): self.pixels = self.image.load() def eval_black_white(self, limit): + self.pixels = self.image.load() w, h = self.image.size for x in xrange(w): for y in xrange(h): @@ -115,7 +116,7 @@ class OCR(object): self.pixels = pixels - def derotate_by_avergage(self): + def derotate_by_average(self): """rotate by checking each angle and guess most suitable""" w, h = self.image.size @@ -189,7 +190,42 @@ class OCR(object): self.pixels = pixels + def split_captcha_letters(self): + captcha = self.image + started = False + letters = [] + width, height = captcha.size + bottomY, topY = 0, height + pixels = captcha.load() + + for x in xrange(width): + black_pixel_in_col = False + for y in xrange(height): + if pixels[x, y] != 255: + if started == False: + started = True + firstX = x + lastX = x + + if y > bottomY: bottomY = y + if y < topY: topY = y + if x > lastX: lastX = x + + black_pixel_in_col = True + + if black_pixel_in_col == False and started == True: + rect = (firstX, topY, lastX, bottomY) + new_captcha = captcha.crop(rect) + + letters.append(new_captcha) + + started = False + bottomY, topY = 0, height + + return letters + + if __name__ == '__main__': ocr = OCR() ocr.load_image("B.jpg") |