summaryrefslogtreecommitdiffstats
path: root/captcha
diff options
context:
space:
mode:
Diffstat (limited to 'captcha')
-rw-r--r--captcha/NetloadIn.py16
-rw-r--r--captcha/captcha.py55
2 files changed, 70 insertions, 1 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py
new file mode 100644
index 000000000..a8fc38757
--- /dev/null
+++ b/captcha/NetloadIn.py
@@ -0,0 +1,16 @@
+from captcha import Ocr
+
+class NetloadIn(Ocr):
+ def __init__(self, image):
+ Ocr.__init__(self, image)
+
+ def get_captcha(self):
+ self.to_greyscale()
+ self.clean(3)
+ self.clean(3)
+ self.run_tesser()
+ return self.result_captcha
+
+if __name__ == '__main__':
+ ocr = NetloadIn('captchas/netload/captcha.php10.png')
+ print ocr.get_captcha()
diff --git a/captcha/captcha.py b/captcha/captcha.py
index b57fa1b7e..361893fa3 100644
--- a/captcha/captcha.py
+++ b/captcha/captcha.py
@@ -5,6 +5,7 @@ import subprocess
class Ocr(object):
def __init__(self, image):
self.image = Image.open(image)
+ self.pixels = self.image.load()
self.image_name = 'captcha_clean.png'
self.result_captcha = ''
@@ -17,9 +18,61 @@ class Ocr(object):
cmd = ['gocr', self.image_name]
self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
+ def run_tesser(self):
+ self.image.save('captcha.tif', 'TIFF')
+ cmd = ['tesseract', 'captcha.tif', '0']
+ self.result_captcha = subprocess.Popen(cmd)
+ self.result_captcha.wait()
+ cmd = ['cat', '0.txt']
+ self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
+
def get_captcha(self):
pass
-
+
+ def to_greyscale(self):
+ if self.image.mode != 'L':
+ self.image = self.image.convert('L')
+
+ self.pixels = self.image.load()
+
+
+ def clean(self, allowed):
+ pixels = self.pixels
+
+ w, h = self.image.size
+
+ for x in xrange(w):
+ for y in xrange(h):
+ # no point in processing white pixels since we only want to remove black pixels
+ if pixels[x, y] == 255: continue
+
+ count = 0
+
+ try:
+ if pixels[x-1, y-1] != 255: count += 1
+ if pixels[x-1, y ] != 255: count += 1
+ if pixels[x-1, y+1] != 255: count += 1
+ if pixels[x, y+1 ] != 255: count += 1
+ if pixels[x+1, y+1] != 255: count += 1
+ if pixels[x+1, y ] != 255: count += 1
+ if pixels[x+1, y-1] != 255: count += 1
+ if pixels[x, y-1 ] != 255: count += 1
+ except:
+ pass
+
+ # not enough neighbors are dark pixels so mark this pixel
+ # to be changed to white
+ if count < allowed:
+ pixels[x, y] = 1
+
+ # second pass: this time set all 1's to 255 (white)
+ for x in xrange(w):
+ for y in xrange(h):
+ if pixels[x, y] == 1: pixels[x, y] = 255
+
+ self.pixels = pixels
+
+
if __name__ == '__main__':
ocr = Ocr('gigasize-com/7.jpg')
print ocr.get_captcha()