diff options
Diffstat (limited to 'captcha')
-rw-r--r-- | captcha/NetloadIn.py | 16 | ||||
-rw-r--r-- | captcha/captcha.py | 55 |
2 files changed, 70 insertions, 1 deletions
diff --git a/captcha/NetloadIn.py b/captcha/NetloadIn.py new file mode 100644 index 000000000..a8fc38757 --- /dev/null +++ b/captcha/NetloadIn.py @@ -0,0 +1,16 @@ +from captcha import Ocr + +class NetloadIn(Ocr): + def __init__(self, image): + Ocr.__init__(self, image) + + def get_captcha(self): + self.to_greyscale() + self.clean(3) + self.clean(3) + self.run_tesser() + return self.result_captcha + +if __name__ == '__main__': + ocr = NetloadIn('captchas/netload/captcha.php10.png') + print ocr.get_captcha() diff --git a/captcha/captcha.py b/captcha/captcha.py index b57fa1b7e..361893fa3 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -5,6 +5,7 @@ import subprocess class Ocr(object): def __init__(self, image): self.image = Image.open(image) + self.pixels = self.image.load() self.image_name = 'captcha_clean.png' self.result_captcha = '' @@ -17,9 +18,61 @@ class Ocr(object): cmd = ['gocr', self.image_name] self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') + def run_tesser(self): + self.image.save('captcha.tif', 'TIFF') + cmd = ['tesseract', 'captcha.tif', '0'] + self.result_captcha = subprocess.Popen(cmd) + self.result_captcha.wait() + cmd = ['cat', '0.txt'] + self.result_captcha = subprocess.Popen(cmd, stdout=subprocess.PIPE).communicate()[0].replace('\n','') + def get_captcha(self): pass - + + def to_greyscale(self): + if self.image.mode != 'L': + self.image = self.image.convert('L') + + self.pixels = self.image.load() + + + def clean(self, allowed): + pixels = self.pixels + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + # no point in processing white pixels since we only want to remove black pixels + if pixels[x, y] == 255: continue + + count = 0 + + try: + if pixels[x-1, y-1] != 255: count += 1 + if pixels[x-1, y ] != 255: count += 1 + if pixels[x-1, y+1] != 255: count += 1 + if pixels[x, y+1 ] != 255: count += 1 + if pixels[x+1, y+1] != 255: count += 1 + if pixels[x+1, y ] != 255: count += 1 + if pixels[x+1, y-1] != 255: count += 1 + if pixels[x, y-1 ] != 255: count += 1 + except: + pass + + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white + if count < allowed: + pixels[x, y] = 1 + + # second pass: this time set all 1's to 255 (white) + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 1: pixels[x, y] = 255 + + self.pixels = pixels + + if __name__ == '__main__': ocr = Ocr('gigasize-com/7.jpg') print ocr.get_captcha() |