summaryrefslogtreecommitdiffstats
path: root/module/plugins/captcha/captcha.py
diff options
context:
space:
mode:
Diffstat (limited to 'module/plugins/captcha/captcha.py')
-rw-r--r--module/plugins/captcha/captcha.py163
1 files changed, 84 insertions, 79 deletions
diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py
index 7e4dec697..0f233ec00 100644
--- a/module/plugins/captcha/captcha.py
+++ b/module/plugins/captcha/captcha.py
@@ -1,56 +1,49 @@
# -*- coding: utf-8 -*-
-#
-#Copyright (C) 2009 kingzero, RaNaN
-#
-#This program is free software; you can redistribute it and/or modify
-#it under the terms of the GNU General Public License as published by
-#the Free Software Foundation; either version 3 of the License,
-#or (at your option) any later version.
-#
-#This program is distributed in the hope that it will be useful,
-#but WITHOUT ANY WARRANTY; without even the implied warranty of
-#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#See the GNU General Public License for more details.
-#
-#You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
-#
-###
from __future__ import with_statement
-import os
-from os.path import join
-from os.path import abspath
+
+try:
+ from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+except ImportError:
+ import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin
+
import logging
+import os
import subprocess
#import tempfile
-import Image
-import TiffImagePlugin
-import PngImagePlugin
-import GifImagePlugin
-import JpegImagePlugin
+from os.path import abspath, join
class OCR(object):
+ __name__ = "OCR"
+ __type__ = "ocr"
+ __version__ = "0.10"
+
+ __description__ = """OCR base plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
- __name__ = "OCR"
def __init__(self):
self.logger = logging.getLogger("log")
+
def load_image(self, image):
self.image = Image.open(image)
self.pixels = self.image.load()
self.result_captcha = ''
+
def unload(self):
"""delete all tmp images"""
pass
+
def threshold(self, value):
self.image = self.image.point(lambda a: a * value + 10)
+
def run(self, command):
"""Run a command"""
@@ -61,42 +54,46 @@ class OCR(object):
popen.stderr.close()
self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
+
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
- #self.logger.debug("create tmp tif")
+ #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+ try:
+ tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
+ tmpTif.close()
+
+ #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+ tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
+ tmpTxt.close()
- #tmp = tempfile.NamedTemporaryFile(suffix=".tif")
- tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
- tmp.close()
- #self.logger.debug("create tmp txt")
- #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
- tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
- tmpTxt.close()
+ except IOError, e:
+ self.logError(e)
+ return
self.logger.debug("save tiff")
- self.image.save(tmp.name, 'TIFF')
+ self.image.save(tmpTif.name, 'TIFF')
if os.name == "nt":
- tessparams = [join(pypath,"tesseract","tesseract.exe")]
+ tessparams = [join(pypath, "tesseract", "tesseract.exe")]
else:
- tessparams = ['tesseract']
+ tessparams = ["tesseract"]
- tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] )
+ tessparams.extend( [abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] )
if subset and (digits or lowercase or uppercase):
- #self.logger.debug("create temp subset config")
#tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
- tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb")
- tmpSub.write("tessedit_char_whitelist ")
- if digits:
- tmpSub.write("0123456789")
- if lowercase:
- tmpSub.write("abcdefghijklmnopqrstuvwxyz")
- if uppercase:
- tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
- tmpSub.write("\n")
- tessparams.append("nobatch")
- tessparams.append(abspath(tmpSub.name))
- tmpSub.close()
+ with open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub:
+ tmpSub.write("tessedit_char_whitelist ")
+
+ if digits:
+ tmpSub.write("0123456789")
+ if lowercase:
+ tmpSub.write("abcdefghijklmnopqrstuvwxyz")
+ if uppercase:
+ tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
+
+ tmpSub.write("\n")
+ tessparams.append("nobatch")
+ tessparams.append(abspath(tmpSub.name))
self.logger.debug("run tesseract")
self.run(tessparams)
@@ -110,22 +107,25 @@ class OCR(object):
self.logger.debug(self.result_captcha)
try:
- os.remove(tmp.name)
+ os.remove(tmpTif.name)
os.remove(tmpTxt.name)
if subset and (digits or lowercase or uppercase):
os.remove(tmpSub.name)
except:
pass
+
def get_captcha(self, name):
raise NotImplementedError
+
def to_greyscale(self):
if self.image.mode != 'L':
self.image = self.image.convert('L')
self.pixels = self.image.load()
+
def eval_black_white(self, limit):
self.pixels = self.image.load()
w, h = self.image.size
@@ -136,6 +136,7 @@ class OCR(object):
else:
self.pixels[x, y] = 0
+
def clean(self, allowed):
pixels = self.pixels
@@ -143,19 +144,28 @@ class OCR(object):
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 255: continue
- # no point in processing white pixels since we only want to remove black pixel
+ if pixels[x, y] == 255:
+ continue
+ # No point in processing white pixels since we only want to remove black pixel
count = 0
try:
- if pixels[x-1, y-1] != 255: count += 1
- if pixels[x-1, y] != 255: count += 1
- if pixels[x-1, y + 1] != 255: count += 1
- if pixels[x, y + 1] != 255: count += 1
- if pixels[x + 1, y + 1] != 255: count += 1
- if pixels[x + 1, y] != 255: count += 1
- if pixels[x + 1, y-1] != 255: count += 1
- if pixels[x, y-1] != 255: count += 1
+ if pixels[x-1, y-1] != 255:
+ count += 1
+ if pixels[x-1, y] != 255:
+ count += 1
+ if pixels[x-1, y + 1] != 255:
+ count += 1
+ if pixels[x, y + 1] != 255:
+ count += 1
+ if pixels[x + 1, y + 1] != 255:
+ count += 1
+ if pixels[x + 1, y] != 255:
+ count += 1
+ if pixels[x + 1, y-1] != 255:
+ count += 1
+ if pixels[x, y-1] != 255:
+ count += 1
except:
pass
@@ -167,10 +177,12 @@ class OCR(object):
# second pass: this time set all 1's to 255 (white)
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 1: pixels[x, y] = 255
+ if pixels[x, y] == 1:
+ pixels[x, y] = 255
self.pixels = pixels
+
def derotate_by_average(self):
"""rotate by checking each angle and guess most suitable"""
@@ -245,6 +257,7 @@ class OCR(object):
self.pixels = pixels
+
def split_captcha_letters(self):
captcha = self.image
started = False
@@ -262,13 +275,16 @@ class OCR(object):
firstX = x
lastX = x
- if y > bottomY: bottomY = y
- if y < topY: topY = y
- if x > lastX: lastX = x
+ if y > bottomY:
+ bottomY = y
+ if y < topY:
+ topY = y
+ if x > lastX:
+ lastX = x
black_pixel_in_col = True
- if black_pixel_in_col == False and started == True:
+ if black_pixel_in_col is False and started is True:
rect = (firstX, topY, lastX, bottomY)
new_captcha = captcha.crop(rect)
@@ -281,8 +297,8 @@ class OCR(object):
return letters
- def correct(self, values, var=None):
+ def correct(self, values, var=None):
if var:
result = var
else:
@@ -300,14 +316,3 @@ class OCR(object):
return result
else:
self.result_captcha = result
-
-
-if __name__ == '__main__':
- ocr = OCR()
- ocr.load_image("B.jpg")
- ocr.to_greyscale()
- ocr.eval_black_white(140)
- ocr.derotate_by_average()
- ocr.run_tesser()
- print "Tesseract", ocr.result_captcha
- ocr.image.save("derotated.jpg")