netload.in captha method, new captcha functions, see #6

author: RaNaN <Mast3rRaNaN@hotmail.de> 2009-06-17 14:58:06 +0200
committer: RaNaN <Mast3rRaNaN@hotmail.de> 2009-06-17 14:58:06 +0200
commit: 6316018bf3313c1c80f55bc5ec80f931e8ce8204 (patch)
tree: 5c54c3dd2663da6f95bc3ea3993eda2b7b3bb20b
parent: new server functions (diff)
download: pyload-6316018bf3313c1c80f55bc5ec80f931e8ce8204.tar.xz
2 files changed, 70 insertions, 1 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py
new file mode 100644
index 000000000..a8fc38757
--- /dev/null
+++ b/captcha/NetloadIn.py
@@ -0,0 +1,16 @@
+from captcha import Ocr
+
+class NetloadIn(Ocr):
+    def __init__(self, image):
+        Ocr.__init__(self, image)
+
+    def get_captcha(self):
+        self.to_greyscale()
+        self.clean(3)
+        self.clean(3)
+        self.run_tesser()
+        return self.result_captcha
+
+if __name__ == '__main__':
+    ocr = NetloadIn('captchas/netload/captcha.php10.png')
+    print  ocr.get_captcha()
diff --git a/captcha/captcha.py b/captcha/captcha.py
index b57fa1b7e..361893fa3 100644
--- a/captcha/captcha.py
+++ b/captcha/captcha.py
@@ -5,6 +5,7 @@ import subprocess
 class Ocr(object):
     def __init__(self, image):
         self.image = Image.open(image)
+        self.pixels = self.image.load()
         self.image_name = 'captcha_clean.png'
         self.result_captcha = ''
 
@@ -17,9 +18,61 @@ class Ocr(object):
         cmd = ['gocr', self.image_name]
         self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
 
+    def run_tesser(self):
+        self.image.save('captcha.tif', 'TIFF')
+        cmd = ['tesseract', 'captcha.tif', '0']
+        self.result_captcha = subprocess.Popen(cmd)
+        self.result_captcha.wait()
+        cmd = ['cat', '0.txt']
+        self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','')
+
     def get_captcha(self):
         pass
-        
+
+    def to_greyscale(self):
+        if self.image.mode != 'L':
+            self.image = self.image.convert('L')
+
+        self.pixels = self.image.load()
+
+
+    def clean(self, allowed):
+        pixels = self.pixels
+
+        w, h = self.image.size
+
+        for x in xrange(w):
+            for y in xrange(h):
+           # no point in processing white pixels since we only want to remove black pixels
+                if pixels[x, y] == 255: continue
+
+                count = 0
+
+                try:
+                    if pixels[x-1, y-1] != 255: count += 1
+                    if pixels[x-1, y  ] != 255: count += 1
+                    if pixels[x-1, y+1] != 255: count += 1
+                    if pixels[x, y+1  ] != 255: count += 1
+                    if pixels[x+1, y+1] != 255: count += 1
+                    if pixels[x+1, y  ] != 255: count += 1
+                    if pixels[x+1, y-1] != 255: count += 1
+                    if pixels[x, y-1  ] != 255: count += 1
+                except:
+                    pass
+
+           # not enough neighbors are dark pixels so mark this pixel
+           # to be changed to white
+                if count < allowed:
+                    pixels[x, y] = 1
+                    
+           # second pass: this time set all 1's to 255 (white)
+        for x in xrange(w):
+            for y in xrange(h):
+                if pixels[x, y] == 1: pixels[x, y] = 255
+
+        self.pixels = pixels
+
+
 if __name__ == '__main__':
     ocr = Ocr('gigasize-com/7.jpg')
     print  ocr.get_captcha()
author	RaNaN <Mast3rRaNaN@hotmail.de>	2009-06-17 14:58:06 +0200
committer	RaNaN <Mast3rRaNaN@hotmail.de>	2009-06-17 14:58:06 +0200
commit	6316018bf3313c1c80f55bc5ec80f931e8ce8204 (patch)
tree	5c54c3dd2663da6f95bc3ea3993eda2b7b3bb20b
parent	new server functions (diff)
download	pyload-6316018bf3313c1c80f55bc5ec80f931e8ce8204.tar.xz