diff options
Diffstat (limited to 'pyload/plugin/OCR.py')
-rw-r--r-- | pyload/plugin/OCR.py | 30 |
1 files changed, 10 insertions, 20 deletions
diff --git a/pyload/plugin/OCR.py b/pyload/plugin/OCR.py index 01ba6d534..df32b9f23 100644 --- a/pyload/plugin/OCR.py +++ b/pyload/plugin/OCR.py @@ -11,7 +11,7 @@ except ImportError: import logging import os import subprocess -#import tempfile +# import tempfile from pyload.plugin.Plugin import Base from pyload.utils import fs_join @@ -20,32 +20,27 @@ from pyload.utils import fs_join class OCR(Base): __name = "OCR" __type = "ocr" - __version = "0.11" + __version = "0.12" __description = """OCR base plugin""" __license = "GPLv3" __authors = [("pyLoad Team", "admin@pyload.org")] - def __init__(self): self.logger = logging.getLogger("log") - def load_image(self, image): self.image = Image.open(image) self.pixels = self.image.load() self.result_captcha = '' - def deactivate(self): """delete all tmp images""" pass - def threshold(self, value): self.image = self.image.point(lambda a: a * value + 10) - def run(self, command): """Run a command""" @@ -56,14 +51,13 @@ class OCR(Base): popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) - - def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): - #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): + # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__class__.__name__), "wb") tmpTif.close() - #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__class__.__name__), "wb") tmpTxt.close() @@ -79,10 +73,13 @@ class OCR(Base): else: tessparams = ["tesseract"] - tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] ) + tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]) + + if pagesegmode: + tessparams.extend(["-psm", str(pagesegmode)]) if subset and (digits or lowercase or uppercase): - #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") with open(fs_join("tmp", "tmpSub_%s.subset" % self.__class__.__name__), "wb") as tmpSub: tmpSub.write("tessedit_char_whitelist ") @@ -116,18 +113,15 @@ class OCR(Base): except Exception: pass - def get_captcha(self, name): raise NotImplementedError - def to_greyscale(self): if self.image.mode != 'L': self.image = self.image.convert('L') self.pixels = self.image.load() - def eval_black_white(self, limit): self.pixels = self.image.load() w, h = self.image.size @@ -138,7 +132,6 @@ class OCR(Base): else: self.pixels[x, y] = 0 - def clean(self, allowed): pixels = self.pixels @@ -184,7 +177,6 @@ class OCR(Base): self.pixels = pixels - def derotate_by_average(self): """rotate by checking each angle and guess most suitable""" @@ -258,7 +250,6 @@ class OCR(Base): self.pixels = pixels - def split_captcha_letters(self): captcha = self.image started = False @@ -298,7 +289,6 @@ class OCR(Base): return letters - def correct(self, values, var=None): if var: result = var |