diff options
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/Captcha.py | 10 | ||||
-rw-r--r-- | module/plugins/internal/Extractor.py | 27 | ||||
-rw-r--r-- | module/plugins/internal/Hoster.py | 78 | ||||
-rw-r--r-- | module/plugins/internal/OCR.py | 43 |
4 files changed, 90 insertions, 68 deletions
diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index 8dbc33da2..4629c9522 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -3,7 +3,6 @@ from module.plugins.internal.Plugin import Plugin -#@TODO: Extend (new) Plugin class; remove all `html` args class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" @@ -11,16 +10,15 @@ class Captcha(Plugin): __description__ = """Base captcha service plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - key = None #: last key detected + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] def __init__(self, plugin): - self.plugin = plugin super(Captcha, self).__init__(plugin.core) + self.plugin = plugin + self.key = None #: last key detected + #@TODO: Recheck in 0.4.10 def retrieve_key(self, html): diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 1a98060d9..2aa26e64a 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -4,6 +4,7 @@ import os import re from module.PyFile import PyFile +from module.plugins.internal.Plugin import Plugin class ArchiveError(Exception): @@ -18,8 +19,9 @@ class PasswordError(Exception): pass -class Extractor: +class Extractor(Plugin): __name__ = "Extractor" + __type__ = "extractor" __version__ = "0.24" __description__ = """Base extractor plugin""" @@ -46,7 +48,8 @@ class Extractor: @classmethod def isUsable(cls): - """ Check if system statisfy dependencies + """ + Check if system statisfy dependencies :return: boolean """ return None @@ -54,7 +57,8 @@ class Extractor: @classmethod def getTargets(cls, files_ids): - """ Filter suited targets from list of filename id tuple list + """ + Filter suited targets from list of filename id tuple list :param files_ids: List of filepathes :return: List of targets, id tuple list """ @@ -78,7 +82,7 @@ class Extractor: delete='No', keepbroken=False, fid=None): - """ Initialize extractor for specific file """ + """Initialize extractor for specific file""" self.manager = manager self.filename = filename self.out = out @@ -95,12 +99,13 @@ class Extractor: def init(self): - """ Initialize additional data structures """ + """Initialize additional data structures""" pass def check(self): - """Quick Check by listing content of archive. + """ + Quick Check by listing content of archive. Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -109,8 +114,10 @@ class Extractor: """ raise NotImplementedError + def verify(self): - """Testing with Extractors buildt-in method + """ + Testing with Extractors buildt-in method Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -125,7 +132,8 @@ class Extractor: def extract(self, password=None): - """Extract the archive. Raise specific errors in case of failure. + """ + Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status :param password password to use @@ -138,7 +146,8 @@ class Extractor: def getDeleteFiles(self): - """Return list of files to delete, do *not* delete them here. + """ + Return list of files to delete, do *not* delete them here. :return: List with paths of files to delete """ diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 834a200ac..40430c38d 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -178,11 +178,11 @@ class Hoster(Plugin): :param seconds: wait time in seconds :param reconnect: True if a reconnect would avoid wait time """ - wait_time = int(seconds) + 1 - wait_until = time.time() + wait_time + wait_time = max(int(seconds), 1) + wait_until = time.time() + wait_time + 1 self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.pyfile.waitUntil), - "Wait: %d seconds" % wait_time) + "Wait: %d(+1) seconds" % wait_time) self.pyfile.waitUntil = wait_until @@ -190,13 +190,13 @@ class Hoster(Plugin): self.setReconnect(reconnect) - def wait(self, seconds=None, reconnect=None): + def wait(self, seconds=0, reconnect=None): """ Waits the time previously set """ pyfile = self.pyfile - if seconds is not None: + if seconds > 0: self.setWait(seconds) if reconnect is not None: @@ -284,7 +284,7 @@ class Hoster(Plugin): self.retries[id] = 0 if 0 < max_tries <= self.retries[id]: - self.fail(reason or _("Max retries reached"), "retry") + self.fail(reason or _("Max retries reached"), _("retry")) self.wait(wait_time, False) @@ -480,7 +480,7 @@ class Hoster(Plugin): return self.lastDownload - def checkDownload(self, rules, delete=True, file_size=None, size_tolerance=1000, read_size=100000): + def checkDownload(self, rules, delete=True, file_size=0, size_tolerance=1000, read_size=100000): """ Checks the content of the last downloaded file, re match is saved to `lastCheck` @@ -491,40 +491,52 @@ class Hoster(Plugin): :param read_size: amount of bytes to read from files :return: dictionary key of the first rule that matched """ + do_delete = False lastDownload = fs_encode(self.lastDownload) if not self.lastDownload or not os.path.exists(lastDownload): self.lastDownload = "" self.fail(self.pyfile.error or _("No file downloaded")) - download_size = os.stat(lastDownload).st_size + try: + download_size = os.stat(lastDownload).st_size + + if download_size < 1: + do_delete = True + self.fail(_("Empty file")) + + elif file_size > 0: + diff = abs(file_size - download_size) + + if diff > size_tolerance: + do_delete = True + self.fail(_("File size mismatch")) + + elif diff != 0: + self.logWarning(_("File size is not equal to expected size")) - if download_size < 1 or (file_size and abs(file_size - download_size) > size_tolerance): - if delete: + self.logDebug("Download Check triggered") + + with open(lastDownload, "rb") as f: + content = f.read(read_size) + + # produces encoding errors, better log to other file in the future? + # self.logDebug("Content: %s" % content) + for name, rule in rules.iteritems(): + if isinstance(rule, basestring): + if rule in content: + do_delete = True + return name + + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + do_delete = True + self.lastCheck = m + return name + finally: + if delete and do_delete: os.remove(lastDownload) - self.fail(_("Empty file")) - - self.logDebug("Download Check triggered") - - with open(lastDownload, "rb") as f: - content = f.read(read_size) - - # produces encoding errors, better log to other file in the future? - # self.logDebug("Content: %s" % content) - for name, rule in rules.iteritems(): - if isinstance(rule, basestring): - if rule in content: - if delete: - os.remove(lastDownload) - return name - - elif hasattr(rule, "search"): - m = rule.search(content) - if m: - if delete: - os.remove(lastDownload) - self.lastCheck = m - return name def directLink(self, url, follow_location=None): diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py index 1782e17f0..2d41ab39e 100644 --- a/module/plugins/internal/OCR.py +++ b/module/plugins/internal/OCR.py @@ -11,12 +11,13 @@ except ImportError: import logging import os import subprocess -#import tempfile +# import tempfile +from module.plugins.internal.Plugin import Plugin from module.utils import save_join as fs_join -class OCR(object): +class OCR(Plugin): __name__ = "OCR" __type__ = "ocr" __version__ = "0.11" @@ -37,7 +38,7 @@ class OCR(object): def deactivate(self): - """delete all tmp images""" + """Delete all tmp images""" pass @@ -48,21 +49,21 @@ class OCR(object): def run(self, command): """Run a command""" - popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) popen.wait() - output = popen.stdout.read() +" | "+ popen.stderr.read() + output = popen.stdout.read() + " | " + popen.stderr.read() popen.stdout.close() popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) - def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): - #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): + # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") tmpTif.close() - #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") tmpTxt.close() @@ -78,10 +79,13 @@ class OCR(object): else: tessparams = ["tesseract"] - tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] ) + tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]) + + if pagesegmode: + tessparams.extend(["-psm", str(pagesegmode)]) if subset and (digits or lowercase or uppercase): - #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") with open(fs_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub: tmpSub.write("tessedit_char_whitelist ") @@ -151,11 +155,11 @@ class OCR(object): count = 0 try: - if pixels[x-1, y-1] != 255: + if pixels[x - 1, y - 1] != 255: count += 1 - if pixels[x-1, y] != 255: + if pixels[x - 1, y] != 255: count += 1 - if pixels[x-1, y + 1] != 255: + if pixels[x - 1, y + 1] != 255: count += 1 if pixels[x, y + 1] != 255: count += 1 @@ -163,19 +167,19 @@ class OCR(object): count += 1 if pixels[x + 1, y] != 255: count += 1 - if pixels[x + 1, y-1] != 255: + if pixels[x + 1, y - 1] != 255: count += 1 - if pixels[x, y-1] != 255: + if pixels[x, y - 1] != 255: count += 1 except Exception: pass - # not enough neighbors are dark pixels so mark this pixel - # to be changed to white + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white if count < allowed: pixels[x, y] = 1 - # second pass: this time set all 1's to 255 (white) + # second pass: this time set all 1's to 255 (white) for x in xrange(w): for y in xrange(h): if pixels[x, y] == 1: @@ -185,7 +189,7 @@ class OCR(object): def derotate_by_average(self): - """rotate by checking each angle and guess most suitable""" + """Rotate by checking each angle and guess most suitable""" w, h = self.image.size pixels = self.pixels @@ -211,7 +215,6 @@ class OCR(object): if pixels[x, y] == 0: pixels[x, y] = 255 - count = {} for x in xrange(w): |