summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2009-06-30 13:11:55 +0200
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2009-06-30 13:11:55 +0200
commit5ce2a1b305825da5bd1d13d77c38af21a0803141 (patch)
tree35069f4a078870ddbd151e1f4c2720aa392e4444
parentrapidshare fix (diff)
downloadpyload-5ce2a1b305825da5bd1d13d77c38af21a0803141.tar.xz
little captcha improvements
-rw-r--r--captcha/NetloadIn.py10
-rw-r--r--captcha/ShareonlineBiz.py20
-rw-r--r--captcha/captcha.py23
3 files changed, 40 insertions, 13 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py
index 94103f78b..9799a6a2b 100644
--- a/captcha/NetloadIn.py
+++ b/captcha/NetloadIn.py
@@ -10,8 +10,16 @@ class NetloadIn(OCR):
self.clean(3)
self.clean(3)
self.run_tesser()
+
+ self.correct({
+ ("$", "g"): "5",
+ })
+
return self.result_captcha
if __name__ == '__main__':
+ import urllib
ocr = NetloadIn()
- print ocr.get_captcha('captchas/netload/captcha.php10.png')
+ urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png")
+
+ print ocr.get_captcha('captcha.png')
diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py
index 5d0eb37b8..91124f181 100644
--- a/captcha/ShareonlineBiz.py
+++ b/captcha/ShareonlineBiz.py
@@ -17,8 +17,6 @@
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
###
-import urllib
-
from captcha import OCR
class ShareonlineBiz(OCR):
@@ -43,21 +41,21 @@ class ShareonlineBiz(OCR):
final += self.result_captcha
#replace common errors
- final = final.replace("A", "4")
- final = final.replace("‘5", "3")
- final = final.replace("‘1", "7")
- final = final.replace("‘L", "2")
- final = final.replace("T", "7")
- final = final.replace("b", "6")
- final = final.replace("B", "2")
- final = final.replace("I", "1")
- final = final.replace("X", "1")
+ final = self.correct({
+ "A": "4",
+ "‘5": "3",
+ ("‘1", "T"): "7",
+ ("‘L", "B", "'L"): "2",
+ "b": "6",
+ ("I", "X"): "1"
+ }, final)
return final
#tesseract at 60%
if __name__ == '__main__':
+ import urllib
ocr = ShareonlineBiz()
urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")
print ocr.get_captcha('captcha.jpeg')
diff --git a/captcha/captcha.py b/captcha/captcha.py
index 7092e21c1..22c097f38 100644
--- a/captcha/captcha.py
+++ b/captcha/captcha.py
@@ -226,8 +226,29 @@ class OCR(object):
return letters
+ def correct(self, values, var=None):
+
+ if var:
+ result = var
+ else:
+ result = self.result_captcha
+
+ for key, item in values.iteritems():
+
+ if key.__class__ == str:
+ print key, "->", item
+ result = result.replace(key, item)
+ else:
+ for expr in key:
+ print expr, "->", item
+ result = result.replace(expr, item)
+
+ if var:
+ return result
+ else:
+ self.result_captcha = result
+
-
if __name__ == '__main__':
ocr = OCR()
ocr.load_image("B.jpg")