diff options
Diffstat (limited to 'module/plugins/captcha/captcha.py')
-rw-r--r-- | module/plugins/captcha/captcha.py | 20 |
1 files changed, 18 insertions, 2 deletions
diff --git a/module/plugins/captcha/captcha.py b/module/plugins/captcha/captcha.py index 283b171e0..452952533 100644 --- a/module/plugins/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py @@ -82,15 +82,31 @@ class OCR(object): self.image.save(tmp) self.result_captcha = self.run(['gocr', tmp.name]).replace("\n", "") - def run_tesser(self): + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True ): self.logger.debug("create tmp tif") tmp = tempfile.NamedTemporaryFile(suffix=".tif") self.logger.debug("create tmp txt") tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") self.logger.debug("save tiff") self.image.save(tmp.name, 'TIFF') + + tessparams = ['tesseract', tmp.name, tmpTxt.name.replace(".txt", "") + + if subset and (digits or lowercase or uppercase): + self.logger.debug("create temp subset config") + tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + tmpSub.write("tessedit_char_whitelist ") + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + tessparams.append("nobatch") + tessparams.append(tmpSub.name) + self.logger.debug("run tesseract") - self.run(['tesseract', tmp.name, tmpTxt.name.replace(".txt", "")]) + self.run(tessparams) self.logger.debug("read txt") with open(tmpTxt.name, 'r') as f: |