summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--module/plugins/internal/OCR.py89
1 files changed, 47 insertions, 42 deletions
diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py
index b4e28ca0f..8d080e436 100644
--- a/module/plugins/internal/OCR.py
+++ b/module/plugins/internal/OCR.py
@@ -14,13 +14,13 @@ import subprocess
# import tempfile
from module.plugins.internal.Plugin import Plugin
-from module.plugins.internal.utils import fs_join
+from module.plugins.internal.misc import encode, fsjoin
class OCR(Plugin):
__name__ = "OCR"
__type__ = "ocr"
- __version__ = "0.21"
+ __version__ = "0.22"
__status__ = "stable"
__description__ = """OCR base plugin"""
@@ -28,15 +28,15 @@ class OCR(Plugin):
__authors__ = [("pyLoad Team", "admin@pyload.org")]
- def __init__(self, plugin):
- self._init(plugin.pyload)
- self.plugin = plugin
+ def __init__(self, pyfile):
+ self._init(pyfile.m.core)
+ self.pyfile = pyfile
self.init()
def _log(self, level, plugintype, pluginname, messages):
messages = (self.__name__,) + messages
- return self.plugin._log(level, plugintype, self.plugin.__name__, messages)
+ return self.pyfile.plugin._log(level, plugintype, self.pyfile.plugin.__name__, messages)
def load_image(self, image):
@@ -56,26 +56,33 @@ class OCR(Plugin):
self.image = self.image.point(lambda a: a * value + 10)
- def run(self, command):
+ def call_cmd(self, command, *args, **kwargs):
"""
Run a command
"""
- popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ call = [command] + args
+ self.log_debug("EXECUTE " + " ".join(call))
+
+ call = map(encode, call)
+ popen = subprocess.Popen(call, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
popen.wait()
+
output = popen.stdout.read() + " | " + popen.stderr.read()
+
popen.stdout.close()
popen.stderr.close()
+
self.log_debug("Tesseract ReturnCode %d" % popen.returncode, "Output: %s" % output)
def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None):
# tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
try:
- tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.classname), "wb")
+ tmpTif = open(fsjoin("tmp", "tmpTif_%s.tif" % self.classname), "wb")
tmpTif.close()
# tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
- tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.classname), "wb")
+ tmpTxt = open(fsjoin("tmp", "tmpTxt_%s.txt" % self.classname), "wb")
tmpTxt.close()
except IOError, e:
@@ -86,18 +93,18 @@ class OCR(Plugin):
self.image.save(tmpTif.name, 'TIFF')
if os.name is "nt":
- tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")]
+ command = os.path.join(pypath, "tesseract", "tesseract.exe")
else:
- tessparams = ["tesseract"]
+ command = "tesseract"
- tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")])
+ args = [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]
if pagesegmode:
- tessparams.extend(["-psm", str(pagesegmode)])
+ args.extend(["-psm", str(pagesegmode)])
if subset and (digits or lowercase or uppercase):
# tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
- with open(fs_join("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub:
+ with open(fsjoin("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub:
tmpSub.write("tessedit_char_whitelist ")
if digits:
@@ -108,11 +115,11 @@ class OCR(Plugin):
tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
tmpSub.write("\n")
- tessparams.append("nobatch")
- tessparams.append(os.path.abspath(tmpSub.name))
+ args.append("nobatch")
+ args.append(os.path.abspath(tmpSub.name))
self.log_debug("Running tesseract...")
- self.run(tessparams)
+ self.call_cmd(command, *args)
self.log_debug("Reading txt...")
try:
@@ -123,14 +130,12 @@ class OCR(Plugin):
self.result_captcha = ""
self.log_info(_("OCR result: ") + self.result_captcha)
- try:
- os.remove(tmpTif.name)
- os.remove(tmpTxt.name)
- if subset and (digits or lowercase or uppercase):
- os.remove(tmpSub.name)
- except OSError, e:
- self.log_warning(e)
+ self.remove(tmpTif.name, trash=False)
+ self.remove(tmpTxt.name, trash=False)
+
+ if subset and (digits or lowercase or uppercase):
+ self.remove(tmpSub.name, trash=False)
def recognize(self, name):
@@ -162,34 +167,34 @@ class OCR(Plugin):
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 255:
+ if pixels[x, y] is 255:
continue
#: No point in processing white pixels since we only want to remove black pixel
count = 0
try:
- if pixels[x - 1, y - 1] != 255:
+ if pixels[x - 1, y - 1] is not 255:
count += 1
- if pixels[x - 1, y] != 255:
+ if pixels[x - 1, y] is not 255:
count += 1
- if pixels[x - 1, y + 1] != 255:
+ if pixels[x - 1, y + 1] is not 255:
count += 1
- if pixels[x, y + 1] != 255:
+ if pixels[x, y + 1] is not 255:
count += 1
- if pixels[x + 1, y + 1] != 255:
+ if pixels[x + 1, y + 1] is not 255:
count += 1
- if pixels[x + 1, y] != 255:
+ if pixels[x + 1, y] is not 255:
count += 1
- if pixels[x + 1, y - 1] != 255:
+ if pixels[x + 1, y - 1] is not 255:
count += 1
- if pixels[x, y - 1] != 255:
+ if pixels[x, y - 1] is not 255:
count += 1
except Exception:
@@ -203,7 +208,7 @@ class OCR(Plugin):
#: Second pass: this time set all 1's to 255 (white)
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 1:
+ if pixels[x, y] is 1:
pixels[x, y] = 255
self.pixels = pixels
@@ -218,7 +223,7 @@ class OCR(Plugin):
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 0:
+ if pixels[x, y] is 0:
pixels[x, y] = 155
highest = {}
@@ -234,7 +239,7 @@ class OCR(Plugin):
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 0:
+ if pixels[x, y] is 0:
pixels[x, y] = 255
count = {}
@@ -242,14 +247,14 @@ class OCR(Plugin):
for x in xrange(w):
count[x] = 0
for y in xrange(h):
- if pixels[x, y] == 155:
+ if pixels[x, y] is 155:
count[x] += 1
sum = 0
cnt = 0
for x in count.values():
- if x != 0:
+ if x is not 0:
sum += x
cnt += 1
@@ -275,10 +280,10 @@ class OCR(Plugin):
for x in xrange(w):
for y in xrange(h):
- if pixels[x, y] == 0:
+ if pixels[x, y] is 0:
pixels[x, y] = 255
- if pixels[x, y] == 155:
+ if pixels[x, y] is 155:
pixels[x, y] = 0
self.pixels = pixels
@@ -295,7 +300,7 @@ class OCR(Plugin):
for x in xrange(width):
black_pixel_in_col = False
for y in xrange(height):
- if pixels[x, y] != 255:
+ if pixels[x, y] is not 255:
if not started:
started = True
firstX = x