diff options
| author | 2009-06-22 00:06:19 +0200 | |
|---|---|---|
| committer | 2009-06-22 00:06:19 +0200 | |
| commit | 0cf40f51365e21c3025824ec9f8a8065876ad18f (patch) | |
| tree | 4deee3bd211cf6f686ffb160e7077d0e3d4338a1 | |
| parent | Added Shareonline.biz captcha (diff) | |
| download | pyload-0cf40f51365e21c3025824ec9f8a8065876ad18f.tar.xz | |
sharonline captcha method
| -rw-r--r-- | captcha/ShareonlineBiz.py | 25 | ||||
| -rw-r--r-- | captcha/captcha.py | 38 | 
2 files changed, 53 insertions, 10 deletions
| diff --git a/captcha/ShareonlineBiz.py b/captcha/ShareonlineBiz.py index 038f6639e..1e3fc6214 100644 --- a/captcha/ShareonlineBiz.py +++ b/captcha/ShareonlineBiz.py @@ -6,16 +6,23 @@ class ShareonlineBiz(OCR):          OCR.__init__(self)      def get_captcha(self, image): -        urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg")           self.load_image(image) -        #self.to_greyscale() -        #self.image.save('grey.jpeg') -        self.image.threshold(32500) -        #self.threshold(1.3) -        self.run_tesser() -        self.image.save('captcha_bla.jpeg') -        return self.result_captcha +        self.to_greyscale() +        self.image = self.image.resize((160, 50)) +        self.pixels = self.image.load() +        self.threshold(1.85) +        self.eval_black_white(240) + +        letters = self.split_captcha_letters() +         +        final = "" +        for letter in letters: +            self.image = letter +            self.run_tesser() +            final += self.result_captcha + +        return final  if __name__ == '__main__':      ocr = ShareonlineBiz() -    print  ocr.get_captcha('captcha.jpeg') +    print  ocr.get_captcha('captcha.php3.jpeg') diff --git a/captcha/captcha.py b/captcha/captcha.py index 539e51932..48816940d 100644 --- a/captcha/captcha.py +++ b/captcha/captcha.py @@ -72,6 +72,7 @@ class OCR(object):          self.pixels = self.image.load()      def eval_black_white(self, limit): +        self.pixels = self.image.load()          w, h = self.image.size          for x in xrange(w):              for y in xrange(h): @@ -115,7 +116,7 @@ class OCR(object):          self.pixels = pixels -    def derotate_by_avergage(self): +    def derotate_by_average(self):          """rotate by checking each angle and guess most suitable"""          w, h = self.image.size @@ -189,7 +190,42 @@ class OCR(object):          self.pixels = pixels +    def split_captcha_letters(self): +        captcha = self.image +        started = False +        letters = [] +        width, height = captcha.size +        bottomY, topY = 0, height +        pixels = captcha.load() +   +        for x in xrange(width): +            black_pixel_in_col = False +            for y in xrange(height): +                if pixels[x, y] != 255: +                    if started == False: +                        started = True +                        firstX = x +                        lastX = x +    +                    if y > bottomY: bottomY = y +                    if y < topY: topY = y +                    if x > lastX: lastX = x +    +                    black_pixel_in_col = True +    +            if black_pixel_in_col == False and started == True: +                rect = (firstX, topY, lastX, bottomY) +                new_captcha = captcha.crop(rect) +   +                letters.append(new_captcha) +   +                started = False +                bottomY, topY = 0, height +   +        return letters + +          if __name__ == '__main__':      ocr = OCR()      ocr.load_image("B.jpg") | 
