summaryrefslogtreecommitdiffstats
path: root/pyload/plugin/OCR.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyload/plugin/OCR.py')
-rw-r--r--pyload/plugin/OCR.py30
1 files changed, 10 insertions, 20 deletions
diff --git a/pyload/plugin/OCR.py b/pyload/plugin/OCR.py
index 01ba6d534..df32b9f23 100644
--- a/pyload/plugin/OCR.py
+++ b/pyload/plugin/OCR.py
@@ -11,7 +11,7 @@ except ImportError:
import logging
import os
import subprocess
-#import tempfile
+# import tempfile
from pyload.plugin.Plugin import Base
from pyload.utils import fs_join
@@ -20,32 +20,27 @@ from pyload.utils import fs_join
class OCR(Base):
__name = "OCR"
__type = "ocr"
- __version = "0.11"
+ __version = "0.12"
__description = """OCR base plugin"""
__license = "GPLv3"
__authors = [("pyLoad Team", "admin@pyload.org")]
-
def __init__(self):
self.logger = logging.getLogger("log")
-
def load_image(self, image):
self.image = Image.open(image)
self.pixels = self.image.load()
self.result_captcha = ''
-
def deactivate(self):
"""delete all tmp images"""
pass
-
def threshold(self, value):
self.image = self.image.point(lambda a: a * value + 10)
-
def run(self, command):
"""Run a command"""
@@ -56,14 +51,13 @@ class OCR(Base):
popen.stderr.close()
self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
-
- def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
- #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+ def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None):
+ # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
try:
tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__class__.__name__), "wb")
tmpTif.close()
- #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+ # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__class__.__name__), "wb")
tmpTxt.close()
@@ -79,10 +73,13 @@ class OCR(Base):
else:
tessparams = ["tesseract"]
- tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] )
+ tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")])
+
+ if pagesegmode:
+ tessparams.extend(["-psm", str(pagesegmode)])
if subset and (digits or lowercase or uppercase):
- #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
+ # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
with open(fs_join("tmp", "tmpSub_%s.subset" % self.__class__.__name__), "wb") as tmpSub:
tmpSub.write("tessedit_char_whitelist ")
@@ -116,18 +113,15 @@ class OCR(Base):
except Exception:
pass
-
def get_captcha(self, name):
raise NotImplementedError
-
def to_greyscale(self):
if self.image.mode != 'L':
self.image = self.image.convert('L')
self.pixels = self.image.load()
-
def eval_black_white(self, limit):
self.pixels = self.image.load()
w, h = self.image.size
@@ -138,7 +132,6 @@ class OCR(Base):
else:
self.pixels[x, y] = 0
-
def clean(self, allowed):
pixels = self.pixels
@@ -184,7 +177,6 @@ class OCR(Base):
self.pixels = pixels
-
def derotate_by_average(self):
"""rotate by checking each angle and guess most suitable"""
@@ -258,7 +250,6 @@ class OCR(Base):
self.pixels = pixels
-
def split_captcha_letters(self):
captcha = self.image
started = False
@@ -298,7 +289,6 @@ class OCR(Base):
return letters
-
def correct(self, values, var=None):
if var:
result = var