1 files changed, 64 insertions, 59 deletions
diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py
index b4e28ca0f..217305459 100644
--- a/module/plugins/internal/OCR.py
+++ b/module/plugins/internal/OCR.py
@@ -14,13 +14,13 @@ import subprocess
 # import tempfile
 
 from module.plugins.internal.Plugin import Plugin
-from module.plugins.internal.utils import fs_join
+from module.plugins.internal.misc import encode, fsjoin
 
 
 class OCR(Plugin):
     __name__    = "OCR"
     __type__    = "ocr"
-    __version__ = "0.21"
+    __version__ = "0.22"
     __status__  = "stable"
 
     __description__ = """OCR base plugin"""
@@ -28,20 +28,20 @@ class OCR(Plugin):
     __authors__     = [("pyLoad Team", "admin@pyload.org")]
 
 
-    def __init__(self, plugin):
-        self._init(plugin.pyload)
-        self.plugin = plugin
+    def __init__(self, pyfile):
+        self._init(pyfile.m.core)
+        self.pyfile = pyfile
         self.init()
 
 
     def _log(self, level, plugintype, pluginname, messages):
         messages = (self.__name__,) + messages
-        return self.plugin._log(level, plugintype, self.plugin.__name__, messages)
+        return self.pyfile.plugin._log(level, plugintype, self.pyfile.plugin.__name__, messages)
 
 
     def load_image(self, image):
-        self.image = Image.open(image)
-        self.pixels = self.image.load()
+        self.img = Image.open(image)
+        self.pixels = self.img.load()
         self.result_captcha = ""
 
 
@@ -53,29 +53,36 @@ class OCR(Plugin):
 
 
     def threshold(self, value):
-        self.image = self.image.point(lambda a: a * value + 10)
+        self.img = self.img.point(lambda a: a * value + 10)
 
 
-    def run(self, command):
+    def call_cmd(self, command, *args, **kwargs):
         """
         Run a command
         """
-        popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        call = [command] + args
+        self.log_debug("EXECUTE " + " ".join(call))
+
+        call = map(encode, call)
+        popen = subprocess.Popen(call, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         popen.wait()
+
         output = popen.stdout.read() + " | " + popen.stderr.read()
+
         popen.stdout.close()
         popen.stderr.close()
+
         self.log_debug("Tesseract ReturnCode %d" % popen.returncode, "Output: %s" % output)
 
 
     def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None):
         # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
         try:
-            tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.classname), "wb")
+            tmpTif = open(fsjoin("tmp", "tmpTif_%s.tif" % self.classname), "wb")
             tmpTif.close()
 
             # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
-            tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.classname), "wb")
+            tmpTxt = open(fsjoin("tmp", "tmpTxt_%s.txt" % self.classname), "wb")
             tmpTxt.close()
 
         except IOError, e:
@@ -83,21 +90,21 @@ class OCR(Plugin):
             return
 
         self.log_debug("Saving tiff...")
-        self.image.save(tmpTif.name, 'TIFF')
+        self.img.save(tmpTif.name, 'TIFF')
 
         if os.name is "nt":
-            tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")]
+            command = os.path.join(pypath, "tesseract", "tesseract.exe")
         else:
-            tessparams = ["tesseract"]
+            command = "tesseract"
 
-        tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")])
+        args = [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]
 
         if pagesegmode:
-            tessparams.extend(["-psm", str(pagesegmode)])
+            args.extend(["-psm", str(pagesegmode)])
 
         if subset and (digits or lowercase or uppercase):
             # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
-            with open(fs_join("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub:
+            with open(fsjoin("tmp", "tmpSub_%s.subset" % self.classname), "wb") as tmpSub:
                 tmpSub.write("tessedit_char_whitelist ")
 
                 if digits:
@@ -108,11 +115,11 @@ class OCR(Plugin):
                     tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
 
                 tmpSub.write("\n")
-                tessparams.append("nobatch")
-                tessparams.append(os.path.abspath(tmpSub.name))
+                args.append("nobatch")
+                args.append(os.path.abspath(tmpSub.name))
 
         self.log_debug("Running tesseract...")
-        self.run(tessparams)
+        self.call_cmd(command, *args)
         self.log_debug("Reading txt...")
 
         try:
@@ -123,30 +130,28 @@ class OCR(Plugin):
             self.result_captcha = ""
 
         self.log_info(_("OCR result: ") + self.result_captcha)
-        try:
-            os.remove(tmpTif.name)
-            os.remove(tmpTxt.name)
-            if subset and (digits or lowercase or uppercase):
-                os.remove(tmpSub.name)
 
-        except OSError, e:
-            self.log_warning(e)
+        self.remove(tmpTif.name, trash=False)
+        self.remove(tmpTxt.name, trash=False)
+
+        if subset and (digits or lowercase or uppercase):
+            self.remove(tmpSub.name, trash=False)
 
 
-    def recognize(self, name):
+    def recognize(self, image):
         raise NotImplementedError
 
 
     def to_greyscale(self):
-        if self.image.mode != 'L':
-            self.image = self.image.convert('L')
+        if self.img.mode != 'L':
+            self.img = self.img.convert('L')
 
-        self.pixels = self.image.load()
+        self.pixels = self.img.load()
 
 
     def eval_black_white(self, limit):
-        self.pixels = self.image.load()
-        w, h = self.image.size
+        self.pixels = self.img.load()
+        w, h = self.img.size
         for x in xrange(w):
             for y in xrange(h):
                 if self.pixels[x, y] > limit:
@@ -158,38 +163,38 @@ class OCR(Plugin):
     def clean(self, allowed):
         pixels = self.pixels
 
-        w, h = self.image.size
+        w, h = self.img.size
 
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 255:
+                if pixels[x, y] is 255:
                     continue
                 #: No point in processing white pixels since we only want to remove black pixel
                 count = 0
 
                 try:
-                    if pixels[x - 1, y - 1] != 255:
+                    if pixels[x - 1, y - 1] is not 255:
                         count += 1
 
-                    if pixels[x - 1, y] != 255:
+                    if pixels[x - 1, y] is not 255:
                         count += 1
 
-                    if pixels[x - 1, y + 1] != 255:
+                    if pixels[x - 1, y + 1] is not 255:
                         count += 1
 
-                    if pixels[x, y + 1] != 255:
+                    if pixels[x, y + 1] is not 255:
                         count += 1
 
-                    if pixels[x + 1, y + 1] != 255:
+                    if pixels[x + 1, y + 1] is not 255:
                         count += 1
 
-                    if pixels[x + 1, y] != 255:
+                    if pixels[x + 1, y] is not 255:
                         count += 1
 
-                    if pixels[x + 1, y - 1] != 255:
+                    if pixels[x + 1, y - 1] is not 255:
                         count += 1
 
-                    if pixels[x, y - 1] != 255:
+                    if pixels[x, y - 1] is not 255:
                         count += 1
 
                 except Exception:
@@ -203,7 +208,7 @@ class OCR(Plugin):
         #: Second pass: this time set all 1's to 255 (white)
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 1:
+                if pixels[x, y] is 1:
                     pixels[x, y] = 255
 
         self.pixels = pixels
@@ -213,12 +218,12 @@ class OCR(Plugin):
         """
         Rotate by checking each angle and guess most suitable
         """
-        w, h = self.image.size
+        w, h = self.img.size
         pixels = self.pixels
 
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 0:
+                if pixels[x, y] is 0:
                     pixels[x, y] = 155
 
         highest = {}
@@ -226,15 +231,15 @@ class OCR(Plugin):
 
         for angle in xrange(-45, 45):
 
-            tmpimage = self.image.rotate(angle)
+            tmpimage = self.img.rotate(angle)
 
             pixels = tmpimage.load()
 
-            w, h = self.image.size
+            w, h = self.img.size
 
             for x in xrange(w):
                 for y in xrange(h):
-                    if pixels[x, y] == 0:
+                    if pixels[x, y] is 0:
                         pixels[x, y] = 255
 
             count = {}
@@ -242,14 +247,14 @@ class OCR(Plugin):
             for x in xrange(w):
                 count[x] = 0
                 for y in xrange(h):
-                    if pixels[x, y] == 155:
+                    if pixels[x, y] is 155:
                         count[x] += 1
 
             sum = 0
             cnt = 0
 
             for x in count.values():
-                if x != 0:
+                if x is not 0:
                     sum += x
                     cnt += 1
 
@@ -270,22 +275,22 @@ class OCR(Plugin):
                 hkey = key
                 hvalue = value
 
-        self.image = self.image.rotate(hkey)
-        pixels = self.image.load()
+        self.img = self.img.rotate(hkey)
+        pixels = self.img.load()
 
         for x in xrange(w):
             for y in xrange(h):
-                if pixels[x, y] == 0:
+                if pixels[x, y] is 0:
                     pixels[x, y] = 255
 
-                if pixels[x, y] == 155:
+                if pixels[x, y] is 155:
                     pixels[x, y] = 0
 
         self.pixels = pixels
 
 
     def split_captcha_letters(self):
-        captcha = self.image
+        captcha = self.img
         started = False
         letters = []
         width, height = captcha.size
@@ -295,7 +300,7 @@ class OCR(Plugin):
         for x in xrange(width):
             black_pixel_in_col = False
             for y in xrange(height):
-                if pixels[x, y] != 255:
+                if pixels[x, y] is not 255:
                     if not started:
                         started = True
                         firstX = x