summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal/OCR.py
diff options
context:
space:
mode:
Diffstat (limited to 'module/plugins/internal/OCR.py')
-rw-r--r--module/plugins/internal/OCR.py43
1 files changed, 23 insertions, 20 deletions
diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py
index 1782e17f0..2d41ab39e 100644
--- a/module/plugins/internal/OCR.py
+++ b/module/plugins/internal/OCR.py
@@ -11,12 +11,13 @@ except ImportError:
import logging
import os
import subprocess
-#import tempfile
+# import tempfile
+from module.plugins.internal.Plugin import Plugin
from module.utils import save_join as fs_join
-class OCR(object):
+class OCR(Plugin):
__name__ = "OCR"
__type__ = "ocr"
__version__ = "0.11"
@@ -37,7 +38,7 @@ class OCR(object):
def deactivate(self):
- """delete all tmp images"""
+ """Delete all tmp images"""
pass
@@ -48,21 +49,21 @@ class OCR(object):
def run(self, command):
"""Run a command"""
- popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
popen.wait()
- output = popen.stdout.read() +" | "+ popen.stderr.read()
+ output = popen.stdout.read() + " | " + popen.stderr.read()
popen.stdout.close()
popen.stderr.close()
self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output))
- def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True):
- #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
+ def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None):
+ # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif")
try:
tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb")
tmpTif.close()
- #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
+ # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt")
tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb")
tmpTxt.close()
@@ -78,10 +79,13 @@ class OCR(object):
else:
tessparams = ["tesseract"]
- tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] )
+ tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")])
+
+ if pagesegmode:
+ tessparams.extend(["-psm", str(pagesegmode)])
if subset and (digits or lowercase or uppercase):
- #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
+ # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset")
with open(fs_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub:
tmpSub.write("tessedit_char_whitelist ")
@@ -151,11 +155,11 @@ class OCR(object):
count = 0
try:
- if pixels[x-1, y-1] != 255:
+ if pixels[x - 1, y - 1] != 255:
count += 1
- if pixels[x-1, y] != 255:
+ if pixels[x - 1, y] != 255:
count += 1
- if pixels[x-1, y + 1] != 255:
+ if pixels[x - 1, y + 1] != 255:
count += 1
if pixels[x, y + 1] != 255:
count += 1
@@ -163,19 +167,19 @@ class OCR(object):
count += 1
if pixels[x + 1, y] != 255:
count += 1
- if pixels[x + 1, y-1] != 255:
+ if pixels[x + 1, y - 1] != 255:
count += 1
- if pixels[x, y-1] != 255:
+ if pixels[x, y - 1] != 255:
count += 1
except Exception:
pass
- # not enough neighbors are dark pixels so mark this pixel
- # to be changed to white
+ # not enough neighbors are dark pixels so mark this pixel
+ # to be changed to white
if count < allowed:
pixels[x, y] = 1
- # second pass: this time set all 1's to 255 (white)
+ # second pass: this time set all 1's to 255 (white)
for x in xrange(w):
for y in xrange(h):
if pixels[x, y] == 1:
@@ -185,7 +189,7 @@ class OCR(object):
def derotate_by_average(self):
- """rotate by checking each angle and guess most suitable"""
+ """Rotate by checking each angle and guess most suitable"""
w, h = self.image.size
pixels = self.pixels
@@ -211,7 +215,6 @@ class OCR(object):
if pixels[x, y] == 0:
pixels[x, y] = 255
-
count = {}
for x in xrange(w):