diff options
Diffstat (limited to 'module/plugins/internal/Hoster.py')
-rw-r--r-- | module/plugins/internal/Hoster.py | 595 |
1 files changed, 142 insertions, 453 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index a0cdb1e2e..26da436a5 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -2,352 +2,111 @@ from __future__ import with_statement -import inspect import os -import random -import time -import traceback -import urlparse - -from module.plugins.internal.Captcha import Captcha -from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip, - chunks, encode, exists, fixurl as _fixurl, replace_patterns, - seconds_to_midnight, set_cookie, set_cookies, parse_html_form, - parse_html_tag_attr_value, timestamp) -from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename - - -#@TODO: Remove in 0.4.10 -def parse_fileInfo(klass, url="", html=""): - info = klass.get_info(url, html) - return info['name'], info['size'], info['status'], info['url'] - - -#@TODO: Remove in 0.4.10 -def getInfo(urls): - #: result = [ .. (name, size, status, url) .. ] - pass +import re - -#@TODO: Remove in 0.4.10 -def create_getInfo(klass): - def get_info(urls): - for url in urls: - if hasattr(klass, "URL_REPLACEMENTS"): - url = replace_patterns(url, klass.URL_REPLACEMENTS) - yield parse_fileInfo(klass, url) - - return get_info +from module.plugins.internal.Base import Base, check_abort, create_getInfo, getInfo, parse_fileInfo +from module.plugins.internal.Plugin import Fail, Retry, encode, exists, fixurl, parse_name +from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename -class Hoster(Plugin): +class Hoster(Base): __name__ = "Hoster" __type__ = "hoster" - __version__ = "0.19" + __version__ = "0.34" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [] #: [("name", "type", "desc", "default")] + __config__ = [("use_premium" , "bool", "Use premium account if available" , True), + ("fallback_premium", "bool", "Fallback to free download if premium fails", True), + ("chk_filesize" , "bool", "Check file size" , True)] __description__ = """Base hoster plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), - ("spoob" , "spoob@pyload.org" ), - ("mkaay" , "mkaay@mkaay.de" ), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] def __init__(self, pyfile): - self._init(pyfile.m.core) - - #: Engage wan reconnection - self.wantReconnect = False #@TODO: Change to `want_reconnect` in 0.4.10 + super(Hoster, self).__init__(pyfile) #: Enable simultaneous processing of multiple downloads - self.multiDL = True #@TODO: Change to `multi_dl` in 0.4.10 self.limitDL = 0 #@TODO: Change to `limit_dl` in 0.4.10 - #: time.time() + wait in seconds - self.wait_until = 0 - self.waiting = False - - #: Account handler instance, see :py:class:`Account` - self.account = None - self.user = None - self.req = None #: Browser instance, see `network.Browser` - - #: Associated pyfile instance, see `PyFile` - self.pyfile = pyfile - - self.thread = None #: Holds thread in future - #: Location where the last call to download was saved - self.last_download = "" + self.last_download = None #: Re match of the last call to `checkDownload` self.last_check = None - #: Js engine, see `JsEngine` - self.js = self.pyload.js - - #: Captcha stuff - self.captcha = Captcha(self) - - #: Some plugins store html code here - self.html = None - - #: Dict of the amount of retries already made - self.retries = {} - self.retry_free = False #@TODO: Recheck in 0.4.10 + #: Restart flag + self.rst_free = False #@TODO: Recheck in 0.4.10 self._setup() self.init() - @classmethod - def get_info(cls, url="", html=""): - url = _fixurl(url) - url_p = urlparse.urlparse(url) - return {'name' : (url_p.path.split('/')[-1] or - url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or - url_p.netloc.split('.', 1)[0]), - 'size' : 0, - 'status': 3 if url else 8, - 'url' : url} - - - def init(self): - """ - Initialize the plugin (in addition to `__init__`) - """ - pass - - - def setup(self): - """ - Setup for enviroment and other things, called before downloading (possibly more than one time) - """ - pass - - def _setup(self): - if self.account: - self.req = self.pyload.requestFactory.getRequest(self.__name__, self.user) - self.chunk_limit = -1 #: -1 for unlimited - self.resume_download = True - self.premium = self.account.is_premium(self.user) - else: - self.req = self.pyload.requestFactory.getRequest(self.__name__) - self.chunk_limit = 1 - self.resume_download = False - self.premium = False - - - def load_account(self): - if self.req: - self.req.close() + super(Hoster, self)._setup() - if not self.account: - self.account = self.pyload.accountManager.getAccountPlugin(self.__name__) + self.last_download = None + self.last_check = None + self.rst_free = False - if self.account: - if not self.user: - self.user = self.account.select()[0] - if not self.user or not self.account.is_logged(self.user, True): - self.account = False + def load_account(self): + if self.rst_free: + self.account = False + self.user = None #@TODO: Remove in 0.4.10 + else: + super(Hoster, self).load_account() + # self.rst_free = False - def preprocessing(self, thread): + def _process(self, thread): """ Handles important things to do before starting """ self.thread = thread - if self.retry_free: - self.account = False - else: - self.load_account() #@TODO: Move to PluginThread in 0.4.10 - self.retry_free = False - self._setup() - self.setup() - - self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10 - if self.pyfile.abort: - self.abort() + # self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10 + self.check_abort() self.pyfile.setStatus("starting") - self.log_debug("PROCESS URL " + self.pyfile.url, "PLUGIN VERSION %s" % self.__version__) - - return self.process(self.pyfile) - - - def process(self, pyfile): - """ - The 'main' method of every plugin, you **have to** overwrite it - """ - raise NotImplementedError - - - def set_reconnect(self, reconnect): - reconnect = bool(reconnect) - - self.log_info(_("RECONNECT ") + ("enabled" if reconnect else "disabled")) - self.log_debug("Previous wantReconnect: %s" % self.wantReconnect) - - self.wantReconnect = reconnect - - - def set_wait(self, seconds, reconnect=None): - """ - Set a specific wait time later used with `wait` - - :param seconds: wait time in seconds - :param reconnect: True if a reconnect would avoid wait time - """ - wait_time = max(int(seconds), 1) - wait_until = time.time() + wait_time + 1 - - self.log_info(_("WAIT %d seconds") % wait_time) - self.log_debug("Previous waitUntil: %f" % self.pyfile.waitUntil) - - self.pyfile.waitUntil = wait_until - - if reconnect is not None: - self.set_reconnect(reconnect) - - - def wait(self, seconds=None, reconnect=None): - """ - Waits the time previously set - """ - pyfile = self.pyfile - - if seconds is not None: - self.set_wait(seconds) - - if reconnect is not None: - self.set_reconnect(reconnect) - - self.waiting = True - - status = pyfile.status #@NOTE: Remove in 0.4.10 - pyfile.setStatus("waiting") - - if not self.wantReconnect or self.account: - if self.account: - self.log_warning("Ignore reconnection due logged account") - - while pyfile.waitUntil > time.time(): - if pyfile.abort: - self.abort() - - time.sleep(2) - - else: - while pyfile.waitUntil > time.time(): - if pyfile.abort: - self.abort() - - if self.thread.m.reconnecting.isSet(): - self.waiting = False - self.wantReconnect = False - raise Reconnect - - self.thread.m.reconnecting.wait(2) - time.sleep(2) - - self.waiting = False - pyfile.status = status #@NOTE: Remove in 0.4.10 - - - def skip(self, reason=""): - """ - Skip and give reason - """ - raise Skip(encode(reason)) #@TODO: Remove `encode` in 0.4.10 - - - def abort(self, reason=""): - """ - Abort and give reason - """ - #@TODO: Remove in 0.4.10 - if reason: - self.pyfile.error = encode(reason) - - raise Abort - - - def offline(self, reason=""): - """ - Fail and indicate file is offline - """ - #@TODO: Remove in 0.4.10 - if reason: - self.pyfile.error = encode(reason) - - raise Fail("offline") - - - def temp_offline(self, reason=""): - """ - Fail and indicates file ist temporary offline, the core may take consequences - """ - #@TODO: Remove in 0.4.10 - if reason: - self.pyfile.error = encode(reason) - - raise Fail("temp. offline") + try: + self.log_debug("PROCESS URL " + self.pyfile.url, "PLUGIN VERSION %s" % self.__version__) #@TODO: Remove in 0.4.10 + self.process(self.pyfile) - def retry(self, max_tries=5, wait_time=1, reason=""): - """ - Retries and begin again from the beginning + self.check_abort() - :param max_tries: number of maximum retries - :param wait_time: time to wait in seconds - :param reason: reason for retrying, will be passed to fail if max_tries reached - """ - id = inspect.currentframe().f_back.f_lineno - if id not in self.retries: - self.retries[id] = 0 + self.log_debug("CHECK DOWNLOAD") #@TODO: Recheck in 0.4.10 + self._check_download() - if 0 < max_tries <= self.retries[id]: - self.fail(reason or _("Max retries reached")) + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + if self.get_config('fallback_premium', True) and self.premium: + self.log_warning(_("Premium download failed"), e) + self.restart() - self.wait(wait_time, False) - - self.retries[id] += 1 - raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10 + else: + raise Fail(e) - def restart(self, reason=None, nopremium=False): - if not reason: - reason = _("Fallback to free download") if nopremium else _("Restart") + def restart(self, msg="", premium=False): + if not msg: + msg = _("Simple restart") if premium else _("Fallback to free download") - if nopremium: + if not premium: if self.premium: - self.retry_free = True + self.rst_free = True else: - self.fail("%s | %s" % (reason, _("Download was already free"))) + self.fail("%s | %s" % (msg, _("Download was already free"))) - raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10 - - - def fixurl(self, url): - url = _fixurl(url) - - if not urlparse.urlparse(url).scheme: - url_p = urlparse.urlparse(self.pyfile.url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - url = urlparse.urljoin(baseurl, url) - - return url + raise Retry(encode(msg)) #@TODO: Remove `encode` in 0.4.10 + @check_abort def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=True): """ Downloads the content at url to download folder @@ -361,23 +120,18 @@ class Hoster(Plugin): the filename will be changed if needed :return: The location where the file was saved """ - if self.pyfile.abort: - self.abort() - - url = self.fixurl(url) - - if not url or not isinstance(url, basestring): - self.fail(_("No url given")) - if self.pyload.debug: self.log_debug("DOWNLOAD URL " + url, - *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) + *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url", "_[1]")]) + + url = self.fixurl(url) - name = _fixurl(self.pyfile.name) - self.pyfile.name = urlparse.urlparse(name).path.split('/')[-1] or name + self.pyfile.name = parse_name(self.pyfile.name) #: Safe check self.captcha.correct() - self.check_for_same_files() + + if self.pyload.config.get("download", "skip_existing"): + self.check_filedupe() self.pyfile.setStatus("downloading") @@ -387,6 +141,7 @@ class Hoster(Plugin): if not exists(download_location): try: os.makedirs(download_location) + except Exception, e: self.fail(e) @@ -397,8 +152,7 @@ class Hoster(Plugin): self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, url, filename) - if self.pyfile.abort: - self.abort() + self.check_abort() try: newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, @@ -409,30 +163,60 @@ class Hoster(Plugin): #@TODO: Recheck in 0.4.10 if disposition and newname: - finalname = urlparse.urlparse(newname).path.split('/')[-1].split(' filename*=')[0] + finalname = parse_name(newname).split(' filename*=')[0] - if finalname != newname != self.pyfile.name: + if finalname != newname: try: - os.rename(fs_join(location, newname), fs_join(location, finalname)) + oldname_enc = fs_join(download_location, newname) + newname_enc = fs_join(download_location, finalname) + os.rename(oldname_enc, newname_enc) except OSError, e: self.log_warning(_("Error renaming `%s` to `%s`") % (newname, finalname), e) finalname = newname self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, finalname)) - self.pyfile.name = finalname - filename = os.path.join(location, finalname) + + self.pyfile.name = finalname + filename = os.path.join(location, finalname) self.set_permissions(fs_encode(filename)) self.last_download = filename - return self.last_download + return filename + + + def check_filesize(self, file_size, size_tolerance=1024): + """ + Checks the file size of the last downloaded file + + :param file_size: expected file size + :param size_tolerance: size check tolerance + """ + if not self.last_download: + return + + download_location = fs_encode(self.last_download) + download_size = os.stat(download_location).st_size + + if download_size < 1: + self.fail(_("Empty file")) + + elif file_size > 0: + diff = abs(file_size - download_size) + + if diff > size_tolerance: + self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s") + % (file_size, download_size)) + + elif diff != 0: + self.log_warning(_("File size is not equal to expected size")) - def check_download(self, rules, delete=False, file_size=0, size_tolerance=1024, read_size=1048576): + def check_file(self, rules, delete=False, read_size=1048576, file_size=0, size_tolerance=1024): """ - Checks the content of the last downloaded file, re match is saved to `lastCheck` + Checks the content of the last downloaded file, re match is saved to `last_check` :param rules: dict with names and rules to match (compiled regexp or strings) :param delete: delete if matched @@ -442,29 +226,13 @@ class Hoster(Plugin): :return: dictionary key of the first rule that matched """ do_delete = False - last_download = fs_encode(self.last_download) + last_download = fs_encode(self.last_download) #@TODO: Recheck in 0.4.10 if not self.last_download or not exists(last_download): - self.last_download = "" self.fail(self.pyfile.error or _("No file downloaded")) try: - download_size = os.stat(last_download).st_size - - if download_size < 1: - do_delete = True - self.fail(_("Empty file")) - - elif file_size > 0: - diff = abs(file_size - download_size) - - if diff > size_tolerance: - do_delete = True - self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s") - % (file_size, download_size)) - - elif diff != 0: - self.log_warning(_("File size is not equal to expected size")) + self.check_filesize(file_size, size_tolerance) with open(last_download, "rb") as f: content = f.read(read_size) @@ -479,7 +247,7 @@ class Hoster(Plugin): elif hasattr(rule, "search"): m = rule.search(content) - if m: + if m is not None: do_delete = True self.last_check = m return name @@ -490,133 +258,45 @@ class Hoster(Plugin): except OSError, e: self.log_warning(_("Error removing: %s") % last_download, e) - if self.pyload.debug: - traceback.print_exc() else: - self.last_download = "" - self.log_info(_("File deleted")) - - - def direct_link(self, url, follow_location=None): - link = "" - - if follow_location is None: - redirect = 1 - - elif type(follow_location) is int: - redirect = max(follow_location, 1) - - else: - redirect = self.get_config("maxredirs", 10, "UserAgentSwitcher") + self.log_info(_("File deleted: ") + self.last_download) + self.last_download = "" #: Recheck in 0.4.10 - for i in xrange(redirect): - try: - self.log_debug("Redirect #%d to: %s" % (i, url)) - header = self.load(url, just_header=True) - - except Exception: #: Bad bad bad... rewrite this part in 0.4.10 - res = self.load(url, - just_header=True, - req=self.pyload.requestFactory.getRequest()) - - header = {'code': req.code} - for line in res.splitlines(): - line = line.strip() - if not line or ":" not in line: - continue - - key, none, value = line.partition(":") - key = key.lower().strip() - value = value.strip() - - if key in header: - if type(header[key]) is list: - header[key].append(value) - else: - header[key] = [header[key], value] - else: - header[key] = value - - if 'content-disposition' in header: - link = url - - elif 'location' in header and header['location']: - location = header['location'] - - if not urlparse.urlparse(location).scheme: - url_p = urlparse.urlparse(url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - location = urlparse.urljoin(baseurl, location) - - if 'code' in header and header['code'] == 302: - link = location - - if follow_location: - url = location - continue - - else: - extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] - - if 'content-type' in header and header['content-type']: - mimetype = header['content-type'].split(';')[0].strip() - - elif extension: - mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" - else: - mimetype = "" - - if mimetype and (link or 'html' not in mimetype): - link = url - else: - link = "" + def _check_download(self): + if self.captcha.task and not self.last_download: + self.retry_captcha() - break + elif self.check_file({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}, + delete=True): + self.error(_("Empty file")) - else: - try: - self.log_error(_("Too many redirects")) - except Exception: - pass + elif self.get_config('chk_filesize', False) and self.info.get('size'): + # 10485760 is 10MB, tolerance is used when comparing displayed size on the hoster website to real size + # For example displayed size can be 1.46GB for example, but real size can be 1.4649853GB + self.check_filesize(self.info['size'], size_tolerance=10485760) - return link - - def parse_html_form(self, attr_str="", input_names={}): - return parse_html_form(attr_str, self.html, input_names) - - - def check_traffic_left(self): + def check_traffic(self): if not self.account: return True - traffic = self.account.get_data(self.user, True)['trafficleft'] + traffic = self.account.get_data('trafficleft') if traffic is None: return False - elif traffic == -1: + + elif traffic is -1: return True + else: - size = self.pyfile.size / 1024 - self.log_info(_("Filesize: %s KiB, Traffic left for user %s: %s KiB") % (size, self.user, traffic)) + size = self.pyfile.size / 1024 #@TODO: Remove in 0.4.10 + self.log_info(_("Filesize: %s KiB, Traffic left for user %s: %s KiB") % (size, self.account.user, traffic)) #@TODO: Rewrite in 0.4.10 return size <= traffic - def get_password(self): - """ - Get the password the user provided in the package - """ - return self.pyfile.package().password or "" - - - #: Deprecated method, use `check_for_same_files` instead (Remove in 0.4.10) - def checkForSameFiles(self, *args, **kwargs): - return self.check_for_same_files(*args, **kwargs) - - - def check_for_same_files(self, starting=False): + def check_filedupe(self): """ Checks if same file was/is downloaded within same package @@ -626,23 +306,32 @@ class Hoster(Plugin): pack = self.pyfile.package() for pyfile in self.pyload.files.cache.values(): - if pyfile != self.pyfile and pyfile.name is self.pyfile.name and pyfile.package().folder is pack.folder: - if pyfile.status in (0, 12): #: Finished or downloading - self.skip(pyfile.pluginname) - elif pyfile.status in (5, 7) and starting: #: A download is waiting/starting and was appenrently started before - self.skip(pyfile.pluginname) + if pyfile is self.pyfile: + continue - download_folder = self.pyload.config.get("general", "download_folder") - location = fs_join(download_folder, pack.folder, self.pyfile.name) + if pyfile.name != self.pyfile.name or pyfile.package().folder != pack.folder: + continue - if starting and self.pyload.config.get("download", "skip_existing") and exists(location): - size = os.stat(location).st_size - if size >= self.pyfile.size: - self.skip("File exists") + if pyfile.status in (0, 5, 7, 12): #: (finished, waiting, starting, downloading) + self.skip(pyfile.pluginname) - pyfile = self.pyload.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) + download_folder = self.pyload.config.get("general", "download_folder") + package_folder = pack.folder if self.pyload.config.get("general", "folder_per_package") else "" + download_location = fs_join(download_folder, package_folder, self.pyfile.name) + + if not exists(download_location): + return + + pyfile = self.pyload.db.findDuplicates(self.pyfile.id, package_folder, self.pyfile.name) if pyfile: - if exists(location): - self.skip(pyfile[0]) + self.skip(pyfile[0]) + + size = os.stat(download_location).st_size + if size >= self.pyfile.size: + self.skip(_("File exists")) - self.log_debug("File %s not skipped, because it does not exists." % self.pyfile.name) + + #: Deprecated method, use `check_filedupe` instead (Remove in 0.4.10) + def checkForSameFiles(self, *args, **kwargs): + if self.pyload.config.get("download", "skip_existing"): + return self.check_filedupe() |