diff options
author | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-06-17 11:23:08 +0200 |
---|---|---|
committer | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-06-24 22:42:18 +0200 |
commit | d99d6eddb6af637580bb6fc72013f913077525d6 (patch) | |
tree | 3a2be1b471d34ae293ec96f2110e9fc452a6e638 | |
parent | [Hoster] Import fixup (diff) | |
download | pyload-d99d6eddb6af637580bb6fc72013f913077525d6.tar.xz |
Spare fixes
-rw-r--r-- | module/plugins/hoster/EuroshareEu.py | 17 | ||||
-rw-r--r-- | module/plugins/hoster/HighWayMe.py | 2 | ||||
-rw-r--r-- | module/plugins/hoster/OverLoadMe.py | 4 | ||||
-rw-r--r-- | module/plugins/hoster/YourfilesTo.py | 3 | ||||
-rw-r--r-- | module/plugins/internal/Captcha.py | 10 | ||||
-rw-r--r-- | module/plugins/internal/Extractor.py | 27 | ||||
-rw-r--r-- | module/plugins/internal/Hoster.py | 78 | ||||
-rw-r--r-- | module/plugins/internal/OCR.py | 43 |
8 files changed, 99 insertions, 85 deletions
diff --git a/module/plugins/hoster/EuroshareEu.py b/module/plugins/hoster/EuroshareEu.py index c4bd54667..deb138ec7 100644 --- a/module/plugins/hoster/EuroshareEu.py +++ b/module/plugins/hoster/EuroshareEu.py @@ -23,20 +23,20 @@ class EuroshareEu(SimpleHoster): LINK_FREE_PATTERN = r'<a href="(/file/\d+/[^/]*/download/)"><div class="downloadButton"' - ERR_PARDL_PATTERN = r'<h2>Prebieha s.ahovanie</h2>|<p>Naraz je z jednej IP adresy mo.n. s.ahova. iba jeden s.bor' - ERR_NOT_LOGGED_IN_PATTERN = r'href="/customer-zone/login/"' + DL_LIMIT_PATTERN = r'<h2>Prebieha s.ahovanie</h2>|<p>Naraz je z jednej IP adresy mo.n. s.ahova. iba jeden s.bor' + ERROR_PATTERN = r'href="/customer-zone/login/"' URL_REPLACEMENTS = [(r"(http://[^/]*\.)(sk|cz|hu|pl)/", r"\1eu/")] def handlePremium(self, pyfile): - if self.ERR_NOT_LOGGED_IN_PATTERN in self.html: + if self.ERROR_PATTERN in self.html: self.account.relogin(self.user) self.retry(reason=_("User not logged in")) self.link = pyfile.url.rstrip('/') + "/download/" - check = self.checkDownload({"login": re.compile(self.ERR_NOT_LOGGED_IN_PATTERN), + check = self.checkDownload({"login": re.compile(self.ERROR_PATTERN), "json" : re.compile(r'\{"status":"error".*?"message":"(.*?)"')}) if check == "login" or (check == "json" and self.lastCheck.group(1) == "Access token expired"): @@ -48,7 +48,7 @@ class EuroshareEu(SimpleHoster): def handleFree(self, pyfile): - if re.search(self.ERR_PARDL_PATTERN, self.html): + if re.search(self.DL_LIMIT_PATTERN, self.html): self.wait(5 * 60, 12, _("Download limit reached")) m = re.search(self.LINK_FREE_PATTERN, self.html) @@ -58,11 +58,4 @@ class EuroshareEu(SimpleHoster): self.link = "http://euroshare.eu%s" % m.group(1) - def checkFile(self): - if self.checkDownload({"multi-dl": re.compile(self.ERR_PARDL_PATTERN)}) - self.wait(5 * 60, 12, _("Download limit reached")) - - return super(EuroshareEu, self).checkFile() - - getInfo = create_getInfo(EuroshareEu) diff --git a/module/plugins/hoster/HighWayMe.py b/module/plugins/hoster/HighWayMe.py index 9f39c21e8..dc7a9fc12 100644 --- a/module/plugins/hoster/HighWayMe.py +++ b/module/plugins/hoster/HighWayMe.py @@ -24,7 +24,7 @@ class HighWayMe(MultiHoster): def checkErrors(self): - if self.html.get('code') == 302: #@NOTE: This is not working. It should by if 302 Moved Temporarily then ... But I don't now how to implement it. + if self.html.get('code') == 302: #@NOTE: This is not working. It should by if 302 Moved Temporarily then... But I don't now how to implement it. self.account.relogin(self.user) self.retry() diff --git a/module/plugins/hoster/OverLoadMe.py b/module/plugins/hoster/OverLoadMe.py index bc3ca87c4..8ba310b52 100644 --- a/module/plugins/hoster/OverLoadMe.py +++ b/module/plugins/hoster/OverLoadMe.py @@ -40,12 +40,10 @@ class OverLoadMe(MultiHoster): self.logWarning(data['msg']) self.tempOffline() else: + self.link = data['downloadlink'] if pyfile.name and pyfile.name.endswith('.tmp') and data['filename']: pyfile.name = data['filename'] pyfile.size = parseFileSize(data['filesize']) - http_repl = ["http://", "https://"] - self.link = data['downloadlink'].replace(*http_repl if self.getConfig('ssl') else *http_repl[::-1]) - getInfo = create_getInfo(OverLoadMe) diff --git a/module/plugins/hoster/YourfilesTo.py b/module/plugins/hoster/YourfilesTo.py index 0f6ca49df..4d316eabf 100644 --- a/module/plugins/hoster/YourfilesTo.py +++ b/module/plugins/hoster/YourfilesTo.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- -import reimport urllib +import re +import urllib from module.plugins.internal.Hoster import Hoster diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index 8dbc33da2..4629c9522 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -3,7 +3,6 @@ from module.plugins.internal.Plugin import Plugin -#@TODO: Extend (new) Plugin class; remove all `html` args class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" @@ -11,16 +10,15 @@ class Captcha(Plugin): __description__ = """Base captcha service plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - key = None #: last key detected + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] def __init__(self, plugin): - self.plugin = plugin super(Captcha, self).__init__(plugin.core) + self.plugin = plugin + self.key = None #: last key detected + #@TODO: Recheck in 0.4.10 def retrieve_key(self, html): diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 1a98060d9..2aa26e64a 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -4,6 +4,7 @@ import os import re from module.PyFile import PyFile +from module.plugins.internal.Plugin import Plugin class ArchiveError(Exception): @@ -18,8 +19,9 @@ class PasswordError(Exception): pass -class Extractor: +class Extractor(Plugin): __name__ = "Extractor" + __type__ = "extractor" __version__ = "0.24" __description__ = """Base extractor plugin""" @@ -46,7 +48,8 @@ class Extractor: @classmethod def isUsable(cls): - """ Check if system statisfy dependencies + """ + Check if system statisfy dependencies :return: boolean """ return None @@ -54,7 +57,8 @@ class Extractor: @classmethod def getTargets(cls, files_ids): - """ Filter suited targets from list of filename id tuple list + """ + Filter suited targets from list of filename id tuple list :param files_ids: List of filepathes :return: List of targets, id tuple list """ @@ -78,7 +82,7 @@ class Extractor: delete='No', keepbroken=False, fid=None): - """ Initialize extractor for specific file """ + """Initialize extractor for specific file""" self.manager = manager self.filename = filename self.out = out @@ -95,12 +99,13 @@ class Extractor: def init(self): - """ Initialize additional data structures """ + """Initialize additional data structures""" pass def check(self): - """Quick Check by listing content of archive. + """ + Quick Check by listing content of archive. Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -109,8 +114,10 @@ class Extractor: """ raise NotImplementedError + def verify(self): - """Testing with Extractors buildt-in method + """ + Testing with Extractors buildt-in method Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -125,7 +132,8 @@ class Extractor: def extract(self, password=None): - """Extract the archive. Raise specific errors in case of failure. + """ + Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status :param password password to use @@ -138,7 +146,8 @@ class Extractor: def getDeleteFiles(self): - """Return list of files to delete, do *not* delete them here. + """ + Return list of files to delete, do *not* delete them here. :return: List with paths of files to delete """ diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 834a200ac..40430c38d 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -178,11 +178,11 @@ class Hoster(Plugin): :param seconds: wait time in seconds :param reconnect: True if a reconnect would avoid wait time """ - wait_time = int(seconds) + 1 - wait_until = time.time() + wait_time + wait_time = max(int(seconds), 1) + wait_until = time.time() + wait_time + 1 self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.pyfile.waitUntil), - "Wait: %d seconds" % wait_time) + "Wait: %d(+1) seconds" % wait_time) self.pyfile.waitUntil = wait_until @@ -190,13 +190,13 @@ class Hoster(Plugin): self.setReconnect(reconnect) - def wait(self, seconds=None, reconnect=None): + def wait(self, seconds=0, reconnect=None): """ Waits the time previously set """ pyfile = self.pyfile - if seconds is not None: + if seconds > 0: self.setWait(seconds) if reconnect is not None: @@ -284,7 +284,7 @@ class Hoster(Plugin): self.retries[id] = 0 if 0 < max_tries <= self.retries[id]: - self.fail(reason or _("Max retries reached"), "retry") + self.fail(reason or _("Max retries reached"), _("retry")) self.wait(wait_time, False) @@ -480,7 +480,7 @@ class Hoster(Plugin): return self.lastDownload - def checkDownload(self, rules, delete=True, file_size=None, size_tolerance=1000, read_size=100000): + def checkDownload(self, rules, delete=True, file_size=0, size_tolerance=1000, read_size=100000): """ Checks the content of the last downloaded file, re match is saved to `lastCheck` @@ -491,40 +491,52 @@ class Hoster(Plugin): :param read_size: amount of bytes to read from files :return: dictionary key of the first rule that matched """ + do_delete = False lastDownload = fs_encode(self.lastDownload) if not self.lastDownload or not os.path.exists(lastDownload): self.lastDownload = "" self.fail(self.pyfile.error or _("No file downloaded")) - download_size = os.stat(lastDownload).st_size + try: + download_size = os.stat(lastDownload).st_size + + if download_size < 1: + do_delete = True + self.fail(_("Empty file")) + + elif file_size > 0: + diff = abs(file_size - download_size) + + if diff > size_tolerance: + do_delete = True + self.fail(_("File size mismatch")) + + elif diff != 0: + self.logWarning(_("File size is not equal to expected size")) - if download_size < 1 or (file_size and abs(file_size - download_size) > size_tolerance): - if delete: + self.logDebug("Download Check triggered") + + with open(lastDownload, "rb") as f: + content = f.read(read_size) + + # produces encoding errors, better log to other file in the future? + # self.logDebug("Content: %s" % content) + for name, rule in rules.iteritems(): + if isinstance(rule, basestring): + if rule in content: + do_delete = True + return name + + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + do_delete = True + self.lastCheck = m + return name + finally: + if delete and do_delete: os.remove(lastDownload) - self.fail(_("Empty file")) - - self.logDebug("Download Check triggered") - - with open(lastDownload, "rb") as f: - content = f.read(read_size) - - # produces encoding errors, better log to other file in the future? - # self.logDebug("Content: %s" % content) - for name, rule in rules.iteritems(): - if isinstance(rule, basestring): - if rule in content: - if delete: - os.remove(lastDownload) - return name - - elif hasattr(rule, "search"): - m = rule.search(content) - if m: - if delete: - os.remove(lastDownload) - self.lastCheck = m - return name def directLink(self, url, follow_location=None): diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py index 1782e17f0..2d41ab39e 100644 --- a/module/plugins/internal/OCR.py +++ b/module/plugins/internal/OCR.py @@ -11,12 +11,13 @@ except ImportError: import logging import os import subprocess -#import tempfile +# import tempfile +from module.plugins.internal.Plugin import Plugin from module.utils import save_join as fs_join -class OCR(object): +class OCR(Plugin): __name__ = "OCR" __type__ = "ocr" __version__ = "0.11" @@ -37,7 +38,7 @@ class OCR(object): def deactivate(self): - """delete all tmp images""" + """Delete all tmp images""" pass @@ -48,21 +49,21 @@ class OCR(object): def run(self, command): """Run a command""" - popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) popen.wait() - output = popen.stdout.read() +" | "+ popen.stderr.read() + output = popen.stdout.read() + " | " + popen.stderr.read() popen.stdout.close() popen.stderr.close() self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) - def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): - #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): + # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") try: tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") tmpTif.close() - #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") tmpTxt.close() @@ -78,10 +79,13 @@ class OCR(object): else: tessparams = ["tesseract"] - tessparams.extend( [os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")] ) + tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]) + + if pagesegmode: + tessparams.extend(["-psm", str(pagesegmode)]) if subset and (digits or lowercase or uppercase): - #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") with open(fs_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub: tmpSub.write("tessedit_char_whitelist ") @@ -151,11 +155,11 @@ class OCR(object): count = 0 try: - if pixels[x-1, y-1] != 255: + if pixels[x - 1, y - 1] != 255: count += 1 - if pixels[x-1, y] != 255: + if pixels[x - 1, y] != 255: count += 1 - if pixels[x-1, y + 1] != 255: + if pixels[x - 1, y + 1] != 255: count += 1 if pixels[x, y + 1] != 255: count += 1 @@ -163,19 +167,19 @@ class OCR(object): count += 1 if pixels[x + 1, y] != 255: count += 1 - if pixels[x + 1, y-1] != 255: + if pixels[x + 1, y - 1] != 255: count += 1 - if pixels[x, y-1] != 255: + if pixels[x, y - 1] != 255: count += 1 except Exception: pass - # not enough neighbors are dark pixels so mark this pixel - # to be changed to white + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white if count < allowed: pixels[x, y] = 1 - # second pass: this time set all 1's to 255 (white) + # second pass: this time set all 1's to 255 (white) for x in xrange(w): for y in xrange(h): if pixels[x, y] == 1: @@ -185,7 +189,7 @@ class OCR(object): def derotate_by_average(self): - """rotate by checking each angle and guess most suitable""" + """Rotate by checking each angle and guess most suitable""" w, h = self.image.size pixels = self.pixels @@ -211,7 +215,6 @@ class OCR(object): if pixels[x, y] == 0: pixels[x, y] = 255 - count = {} for x in xrange(w): |