diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-30 13:11:55 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2009-06-30 13:11:55 +0200 |
commit | 5ce2a1b305825da5bd1d13d77c38af21a0803141 (patch) | |
tree | 35069f4a078870ddbd151e1f4c2720aa392e4444 /captcha | |
parent | rapidshare fix (diff) | |
download | pyload-5ce2a1b305825da5bd1d13d77c38af21a0803141.tar.xz |
little captcha improvements
Diffstat (limited to 'captcha')
-rw-r--r-- | captcha/NetloadIn.py | 10 | ||||
-rw-r--r-- | captcha/ShareonlineBiz.py | 20 | ||||
-rw-r--r-- | captcha/captcha.py | 23 |
3 files changed, 40 insertions, 13 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py index 94103f78b..9799a6a2b 100644 --- a/captcha/NetloadIn.py +++ b/captcha/NetloadIn.py @@ -10,8 +10,16 @@ class NetloadIn(OCR): self.clean(3) self.clean(3) self.run_tesser() + + self.correct({ + ("$", "g"): "5", + }) + return self.result_captcha if __name__ == '__main__': + import urllib ocr = NetloadIn() - print ocr.get_captcha('captchas/netload/captcha.php10.png') + urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png") + + print ocr.get_captcha('captcha.png') diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 5d0eb37b8..91124f181 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -17,8 +17,6 @@ # along with this program; if not, see <http://www.gnu.org/licenses/>. # ### -import urllib - from captcha import OCR class ShareonlineBiz(OCR): @@ -43,21 +41,21 @@ class ShareonlineBiz(OCR): final += self.result_captcha #replace common errors - final = final.replace("A", "4") - final = final.replace("‘5", "3") - final = final.replace("‘1", "7") - final = final.replace("‘L", "2") - final = final.replace("T", "7") - final = final.replace("b", "6") - final = final.replace("B", "2") - final = final.replace("I", "1") - final = final.replace("X", "1") + final = self.correct({ + "A": "4", + "‘5": "3", + ("‘1", "T"): "7", + ("‘L", "B", "'L"): "2", + "b": "6", + ("I", "X"): "1" + }, final) return final #tesseract at 60% if __name__ == '__main__': + import urllib ocr = ShareonlineBiz() urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") print ocr.get_captcha('captcha.jpeg') diff --git a/captcha/captcha.py b/captcha/captcha.py index 7092e21c1..22c097f38 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -226,8 +226,29 @@ class OCR(object): return letters + def correct(self, values, var=None): + + if var: + result = var + else: + result = self.result_captcha + + for key, item in values.iteritems(): + + if key.__class__ == str: + print key, "->", item + result = result.replace(key, item) + else: + for expr in key: + print expr, "->", item + result = result.replace(expr, item) + + if var: + return result + else: + self.result_captcha = result + - if __name__ == '__main__': ocr = OCR() ocr.load_image("B.jpg") |