summaryrefslogtreecommitdiffstats
path: root/captcha
diff options
context:
space:
mode:
Diffstat (limited to 'captcha')
-rw-r--r--captcha/ShareonlineBiz.py48
-rw-r--r--captcha/captcha.py6
2 files changed, 43 insertions, 11 deletions
diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py
index 5c8e682f4..5d0eb37b8 100644
--- a/captcha/ShareonlineBiz.py
+++ b/captcha/ShareonlineBiz.py
@@ -1,33 +1,63 @@
-from captcha import OCR
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+#Copyright (C) 2009 kingzero, RaNaN
+#
+#This program is free software; you can redistribute it and/or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License,
+#or (at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#See the GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+###
import urllib
+from captcha import OCR
+
class ShareonlineBiz(OCR):
def __init__(self):
OCR.__init__(self)
- def get_captcha(self, image):
- urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")
+ def get_captcha(self, image):
self.load_image(image)
self.to_greyscale()
self.image = self.image.resize((160, 50))
self.pixels = self.image.load()
self.threshold(1.85)
- self.eval_black_white(240)
- self.derotate_by_average()
+ #self.eval_black_white(240)
+ #self.derotate_by_average()
letters = self.split_captcha_letters()
final = ""
- i = 0
for letter in letters:
self.image = letter
- self.image.save(str(i) + ".jpeg")
- self.run_gocr()
+ self.run_tesser()
final += self.result_captcha
- i += 1
+
+ #replace common errors
+ final = final.replace("A", "4")
+ final = final.replace("‘5", "3")
+ final = final.replace("‘1", "7")
+ final = final.replace("‘L", "2")
+ final = final.replace("T", "7")
+ final = final.replace("b", "6")
+ final = final.replace("B", "2")
+ final = final.replace("I", "1")
+ final = final.replace("X", "1")
return final
+ #tesseract at 60%
+
if __name__ == '__main__':
ocr = ShareonlineBiz()
+ urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")
print ocr.get_captcha('captcha.jpeg')
diff --git a/captcha/captcha.py b/captcha/captcha.py
index 48816940d..7092e21c1 100644
--- a/captcha/captcha.py
+++ b/captcha/captcha.py
@@ -216,8 +216,10 @@ class OCR(object):
if black_pixel_in_col == False and started == True:
rect = (firstX, topY, lastX, bottomY)
new_captcha = captcha.crop(rect)
-
- letters.append(new_captcha)
+
+ w, h = new_captcha.size
+ if w > 5 and h > 5:
+ letters.append(new_captcha)
started = False
bottomY, topY = 0, height