diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-24 13:43:22 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-24 13:43:22 +0200 |
commit | 04b1015aa6ceefd3735de21d21519eb01316f1dc (patch) | |
tree | 7dccfce9d421b22cf846d41cebf3d6af1dd85c1c /captcha | |
parent | fixed some bugs (diff) | |
download | pyload-04b1015aa6ceefd3735de21d21519eb01316f1dc.tar.xz |
fixed occasionally appearing cli bug, catpcha method for sharebiz @ ~60%
Diffstat (limited to 'captcha')
-rw-r--r-- | captcha/ShareonlineBiz.py | 48 | ||||
-rw-r--r-- | captcha/captcha.py | 6 |
2 files changed, 43 insertions, 11 deletions
diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 5c8e682f4..5d0eb37b8 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -1,33 +1,63 @@ -from captcha import OCR +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### import urllib +from captcha import OCR + class ShareonlineBiz(OCR): def __init__(self): OCR.__init__(self) - def get_captcha(self, image): - urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") + def get_captcha(self, image): self.load_image(image) self.to_greyscale() self.image = self.image.resize((160, 50)) self.pixels = self.image.load() self.threshold(1.85) - self.eval_black_white(240) - self.derotate_by_average() + #self.eval_black_white(240) + #self.derotate_by_average() letters = self.split_captcha_letters() final = "" - i = 0 for letter in letters: self.image = letter - self.image.save(str(i) + ".jpeg") - self.run_gocr() + self.run_tesser() final += self.result_captcha - i += 1 + + #replace common errors + final = final.replace("A", "4") + final = final.replace("‘5", "3") + final = final.replace("‘1", "7") + final = final.replace("‘L", "2") + final = final.replace("T", "7") + final = final.replace("b", "6") + final = final.replace("B", "2") + final = final.replace("I", "1") + final = final.replace("X", "1") return final + #tesseract at 60% + if __name__ == '__main__': ocr = ShareonlineBiz() + urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") print ocr.get_captcha('captcha.jpeg') diff --git a/captcha/captcha.py b/captcha/captcha.py index 48816940d..7092e21c1 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -216,8 +216,10 @@ class OCR(object): if black_pixel_in_col == False and started == True: rect = (firstX, topY, lastX, bottomY) new_captcha = captcha.crop(rect) - - letters.append(new_captcha) + + w, h = new_captcha.size + if w > 5 and h > 5: + letters.append(new_captcha) started = False bottomY, topY = 0, height |