summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal/Hoster.py
diff options
context:
space:
mode:
Diffstat (limited to 'module/plugins/internal/Hoster.py')
-rw-r--r--module/plugins/internal/Hoster.py648
1 files changed, 648 insertions, 0 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py
new file mode 100644
index 000000000..a0cdb1e2e
--- /dev/null
+++ b/module/plugins/internal/Hoster.py
@@ -0,0 +1,648 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+import inspect
+import os
+import random
+import time
+import traceback
+import urlparse
+
+from module.plugins.internal.Captcha import Captcha
+from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip,
+ chunks, encode, exists, fixurl as _fixurl, replace_patterns,
+ seconds_to_midnight, set_cookie, set_cookies, parse_html_form,
+ parse_html_tag_attr_value, timestamp)
+from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename
+
+
+#@TODO: Remove in 0.4.10
+def parse_fileInfo(klass, url="", html=""):
+ info = klass.get_info(url, html)
+ return info['name'], info['size'], info['status'], info['url']
+
+
+#@TODO: Remove in 0.4.10
+def getInfo(urls):
+ #: result = [ .. (name, size, status, url) .. ]
+ pass
+
+
+#@TODO: Remove in 0.4.10
+def create_getInfo(klass):
+ def get_info(urls):
+ for url in urls:
+ if hasattr(klass, "URL_REPLACEMENTS"):
+ url = replace_patterns(url, klass.URL_REPLACEMENTS)
+ yield parse_fileInfo(klass, url)
+
+ return get_info
+
+
+class Hoster(Plugin):
+ __name__ = "Hoster"
+ __type__ = "hoster"
+ __version__ = "0.19"
+ __status__ = "testing"
+
+ __pattern__ = r'^unmatchable$'
+ __config__ = [] #: [("name", "type", "desc", "default")]
+
+ __description__ = """Base hoster plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("RaNaN" , "RaNaN@pyload.org" ),
+ ("spoob" , "spoob@pyload.org" ),
+ ("mkaay" , "mkaay@mkaay.de" ),
+ ("Walter Purcaro", "vuolter@gmail.com")]
+
+
+ def __init__(self, pyfile):
+ self._init(pyfile.m.core)
+
+ #: Engage wan reconnection
+ self.wantReconnect = False #@TODO: Change to `want_reconnect` in 0.4.10
+
+ #: Enable simultaneous processing of multiple downloads
+ self.multiDL = True #@TODO: Change to `multi_dl` in 0.4.10
+ self.limitDL = 0 #@TODO: Change to `limit_dl` in 0.4.10
+
+ #: time.time() + wait in seconds
+ self.wait_until = 0
+ self.waiting = False
+
+ #: Account handler instance, see :py:class:`Account`
+ self.account = None
+ self.user = None
+ self.req = None #: Browser instance, see `network.Browser`
+
+ #: Associated pyfile instance, see `PyFile`
+ self.pyfile = pyfile
+
+ self.thread = None #: Holds thread in future
+
+ #: Location where the last call to download was saved
+ self.last_download = ""
+
+ #: Re match of the last call to `checkDownload`
+ self.last_check = None
+
+ #: Js engine, see `JsEngine`
+ self.js = self.pyload.js
+
+ #: Captcha stuff
+ self.captcha = Captcha(self)
+
+ #: Some plugins store html code here
+ self.html = None
+
+ #: Dict of the amount of retries already made
+ self.retries = {}
+ self.retry_free = False #@TODO: Recheck in 0.4.10
+
+ self._setup()
+ self.init()
+
+
+ @classmethod
+ def get_info(cls, url="", html=""):
+ url = _fixurl(url)
+ url_p = urlparse.urlparse(url)
+ return {'name' : (url_p.path.split('/')[-1] or
+ url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or
+ url_p.netloc.split('.', 1)[0]),
+ 'size' : 0,
+ 'status': 3 if url else 8,
+ 'url' : url}
+
+
+ def init(self):
+ """
+ Initialize the plugin (in addition to `__init__`)
+ """
+ pass
+
+
+ def setup(self):
+ """
+ Setup for enviroment and other things, called before downloading (possibly more than one time)
+ """
+ pass
+
+
+ def _setup(self):
+ if self.account:
+ self.req = self.pyload.requestFactory.getRequest(self.__name__, self.user)
+ self.chunk_limit = -1 #: -1 for unlimited
+ self.resume_download = True
+ self.premium = self.account.is_premium(self.user)
+ else:
+ self.req = self.pyload.requestFactory.getRequest(self.__name__)
+ self.chunk_limit = 1
+ self.resume_download = False
+ self.premium = False
+
+
+ def load_account(self):
+ if self.req:
+ self.req.close()
+
+ if not self.account:
+ self.account = self.pyload.accountManager.getAccountPlugin(self.__name__)
+
+ if self.account:
+ if not self.user:
+ self.user = self.account.select()[0]
+
+ if not self.user or not self.account.is_logged(self.user, True):
+ self.account = False
+
+
+ def preprocessing(self, thread):
+ """
+ Handles important things to do before starting
+ """
+ self.thread = thread
+
+ if self.retry_free:
+ self.account = False
+ else:
+ self.load_account() #@TODO: Move to PluginThread in 0.4.10
+ self.retry_free = False
+
+ self._setup()
+ self.setup()
+
+ self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10
+
+ if self.pyfile.abort:
+ self.abort()
+
+ self.pyfile.setStatus("starting")
+ self.log_debug("PROCESS URL " + self.pyfile.url, "PLUGIN VERSION %s" % self.__version__)
+
+ return self.process(self.pyfile)
+
+
+ def process(self, pyfile):
+ """
+ The 'main' method of every plugin, you **have to** overwrite it
+ """
+ raise NotImplementedError
+
+
+ def set_reconnect(self, reconnect):
+ reconnect = bool(reconnect)
+
+ self.log_info(_("RECONNECT ") + ("enabled" if reconnect else "disabled"))
+ self.log_debug("Previous wantReconnect: %s" % self.wantReconnect)
+
+ self.wantReconnect = reconnect
+
+
+ def set_wait(self, seconds, reconnect=None):
+ """
+ Set a specific wait time later used with `wait`
+
+ :param seconds: wait time in seconds
+ :param reconnect: True if a reconnect would avoid wait time
+ """
+ wait_time = max(int(seconds), 1)
+ wait_until = time.time() + wait_time + 1
+
+ self.log_info(_("WAIT %d seconds") % wait_time)
+ self.log_debug("Previous waitUntil: %f" % self.pyfile.waitUntil)
+
+ self.pyfile.waitUntil = wait_until
+
+ if reconnect is not None:
+ self.set_reconnect(reconnect)
+
+
+ def wait(self, seconds=None, reconnect=None):
+ """
+ Waits the time previously set
+ """
+ pyfile = self.pyfile
+
+ if seconds is not None:
+ self.set_wait(seconds)
+
+ if reconnect is not None:
+ self.set_reconnect(reconnect)
+
+ self.waiting = True
+
+ status = pyfile.status #@NOTE: Remove in 0.4.10
+ pyfile.setStatus("waiting")
+
+ if not self.wantReconnect or self.account:
+ if self.account:
+ self.log_warning("Ignore reconnection due logged account")
+
+ while pyfile.waitUntil > time.time():
+ if pyfile.abort:
+ self.abort()
+
+ time.sleep(2)
+
+ else:
+ while pyfile.waitUntil > time.time():
+ if pyfile.abort:
+ self.abort()
+
+ if self.thread.m.reconnecting.isSet():
+ self.waiting = False
+ self.wantReconnect = False
+ raise Reconnect
+
+ self.thread.m.reconnecting.wait(2)
+ time.sleep(2)
+
+ self.waiting = False
+ pyfile.status = status #@NOTE: Remove in 0.4.10
+
+
+ def skip(self, reason=""):
+ """
+ Skip and give reason
+ """
+ raise Skip(encode(reason)) #@TODO: Remove `encode` in 0.4.10
+
+
+ def abort(self, reason=""):
+ """
+ Abort and give reason
+ """
+ #@TODO: Remove in 0.4.10
+ if reason:
+ self.pyfile.error = encode(reason)
+
+ raise Abort
+
+
+ def offline(self, reason=""):
+ """
+ Fail and indicate file is offline
+ """
+ #@TODO: Remove in 0.4.10
+ if reason:
+ self.pyfile.error = encode(reason)
+
+ raise Fail("offline")
+
+
+ def temp_offline(self, reason=""):
+ """
+ Fail and indicates file ist temporary offline, the core may take consequences
+ """
+ #@TODO: Remove in 0.4.10
+ if reason:
+ self.pyfile.error = encode(reason)
+
+ raise Fail("temp. offline")
+
+
+ def retry(self, max_tries=5, wait_time=1, reason=""):
+ """
+ Retries and begin again from the beginning
+
+ :param max_tries: number of maximum retries
+ :param wait_time: time to wait in seconds
+ :param reason: reason for retrying, will be passed to fail if max_tries reached
+ """
+ id = inspect.currentframe().f_back.f_lineno
+ if id not in self.retries:
+ self.retries[id] = 0
+
+ if 0 < max_tries <= self.retries[id]:
+ self.fail(reason or _("Max retries reached"))
+
+ self.wait(wait_time, False)
+
+ self.retries[id] += 1
+ raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10
+
+
+ def restart(self, reason=None, nopremium=False):
+ if not reason:
+ reason = _("Fallback to free download") if nopremium else _("Restart")
+
+ if nopremium:
+ if self.premium:
+ self.retry_free = True
+ else:
+ self.fail("%s | %s" % (reason, _("Download was already free")))
+
+ raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10
+
+
+ def fixurl(self, url):
+ url = _fixurl(url)
+
+ if not urlparse.urlparse(url).scheme:
+ url_p = urlparse.urlparse(self.pyfile.url)
+ baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+ url = urlparse.urljoin(baseurl, url)
+
+ return url
+
+
+ def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=True):
+ """
+ Downloads the content at url to download folder
+
+ :param url:
+ :param get:
+ :param post:
+ :param ref:
+ :param cookies:
+ :param disposition: if True and server provides content-disposition header\
+ the filename will be changed if needed
+ :return: The location where the file was saved
+ """
+ if self.pyfile.abort:
+ self.abort()
+
+ url = self.fixurl(url)
+
+ if not url or not isinstance(url, basestring):
+ self.fail(_("No url given"))
+
+ if self.pyload.debug:
+ self.log_debug("DOWNLOAD URL " + url,
+ *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])
+
+ name = _fixurl(self.pyfile.name)
+ self.pyfile.name = urlparse.urlparse(name).path.split('/')[-1] or name
+
+ self.captcha.correct()
+ self.check_for_same_files()
+
+ self.pyfile.setStatus("downloading")
+
+ download_folder = self.pyload.config.get("general", "download_folder")
+ download_location = fs_join(download_folder, self.pyfile.package().folder)
+
+ if not exists(download_location):
+ try:
+ os.makedirs(download_location)
+ except Exception, e:
+ self.fail(e)
+
+ self.set_permissions(download_location)
+
+ location = fs_decode(download_location)
+ filename = os.path.join(location, safe_filename(self.pyfile.name)) #@TODO: Move `safe_filename` check to HTTPDownload in 0.4.10
+
+ self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, url, filename)
+
+ if self.pyfile.abort:
+ self.abort()
+
+ try:
+ newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies,
+ chunks=self.get_chunk_count(), resume=self.resume_download,
+ progressNotify=self.pyfile.setProgress, disposition=disposition)
+ finally:
+ self.pyfile.size = self.req.size
+
+ #@TODO: Recheck in 0.4.10
+ if disposition and newname:
+ finalname = urlparse.urlparse(newname).path.split('/')[-1].split(' filename*=')[0]
+
+ if finalname != newname != self.pyfile.name:
+ try:
+ os.rename(fs_join(location, newname), fs_join(location, finalname))
+
+ except OSError, e:
+ self.log_warning(_("Error renaming `%s` to `%s`") % (newname, finalname), e)
+ finalname = newname
+
+ self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, finalname))
+ self.pyfile.name = finalname
+ filename = os.path.join(location, finalname)
+
+ self.set_permissions(fs_encode(filename))
+
+ self.last_download = filename
+
+ return self.last_download
+
+
+ def check_download(self, rules, delete=False, file_size=0, size_tolerance=1024, read_size=1048576):
+ """
+ Checks the content of the last downloaded file, re match is saved to `lastCheck`
+
+ :param rules: dict with names and rules to match (compiled regexp or strings)
+ :param delete: delete if matched
+ :param file_size: expected file size
+ :param size_tolerance: size check tolerance
+ :param read_size: amount of bytes to read from files
+ :return: dictionary key of the first rule that matched
+ """
+ do_delete = False
+ last_download = fs_encode(self.last_download)
+
+ if not self.last_download or not exists(last_download):
+ self.last_download = ""
+ self.fail(self.pyfile.error or _("No file downloaded"))
+
+ try:
+ download_size = os.stat(last_download).st_size
+
+ if download_size < 1:
+ do_delete = True
+ self.fail(_("Empty file"))
+
+ elif file_size > 0:
+ diff = abs(file_size - download_size)
+
+ if diff > size_tolerance:
+ do_delete = True
+ self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s")
+ % (file_size, download_size))
+
+ elif diff != 0:
+ self.log_warning(_("File size is not equal to expected size"))
+
+ with open(last_download, "rb") as f:
+ content = f.read(read_size)
+
+ #: Produces encoding errors, better log to other file in the future?
+ # self.log_debug("Content: %s" % content)
+ for name, rule in rules.items():
+ if isinstance(rule, basestring):
+ if rule in content:
+ do_delete = True
+ return name
+
+ elif hasattr(rule, "search"):
+ m = rule.search(content)
+ if m:
+ do_delete = True
+ self.last_check = m
+ return name
+ finally:
+ if delete and do_delete:
+ try:
+ os.remove(last_download)
+
+ except OSError, e:
+ self.log_warning(_("Error removing: %s") % last_download, e)
+ if self.pyload.debug:
+ traceback.print_exc()
+
+ else:
+ self.last_download = ""
+ self.log_info(_("File deleted"))
+
+
+ def direct_link(self, url, follow_location=None):
+ link = ""
+
+ if follow_location is None:
+ redirect = 1
+
+ elif type(follow_location) is int:
+ redirect = max(follow_location, 1)
+
+ else:
+ redirect = self.get_config("maxredirs", 10, "UserAgentSwitcher")
+
+ for i in xrange(redirect):
+ try:
+ self.log_debug("Redirect #%d to: %s" % (i, url))
+ header = self.load(url, just_header=True)
+
+ except Exception: #: Bad bad bad... rewrite this part in 0.4.10
+ res = self.load(url,
+ just_header=True,
+ req=self.pyload.requestFactory.getRequest())
+
+ header = {'code': req.code}
+ for line in res.splitlines():
+ line = line.strip()
+ if not line or ":" not in line:
+ continue
+
+ key, none, value = line.partition(":")
+ key = key.lower().strip()
+ value = value.strip()
+
+ if key in header:
+ if type(header[key]) is list:
+ header[key].append(value)
+ else:
+ header[key] = [header[key], value]
+ else:
+ header[key] = value
+
+ if 'content-disposition' in header:
+ link = url
+
+ elif 'location' in header and header['location']:
+ location = header['location']
+
+ if not urlparse.urlparse(location).scheme:
+ url_p = urlparse.urlparse(url)
+ baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+ location = urlparse.urljoin(baseurl, location)
+
+ if 'code' in header and header['code'] == 302:
+ link = location
+
+ if follow_location:
+ url = location
+ continue
+
+ else:
+ extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1]
+
+ if 'content-type' in header and header['content-type']:
+ mimetype = header['content-type'].split(';')[0].strip()
+
+ elif extension:
+ mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream"
+
+ else:
+ mimetype = ""
+
+ if mimetype and (link or 'html' not in mimetype):
+ link = url
+ else:
+ link = ""
+
+ break
+
+ else:
+ try:
+ self.log_error(_("Too many redirects"))
+ except Exception:
+ pass
+
+ return link
+
+
+ def parse_html_form(self, attr_str="", input_names={}):
+ return parse_html_form(attr_str, self.html, input_names)
+
+
+ def check_traffic_left(self):
+ if not self.account:
+ return True
+
+ traffic = self.account.get_data(self.user, True)['trafficleft']
+
+ if traffic is None:
+ return False
+ elif traffic == -1:
+ return True
+ else:
+ size = self.pyfile.size / 1024
+ self.log_info(_("Filesize: %s KiB, Traffic left for user %s: %s KiB") % (size, self.user, traffic))
+ return size <= traffic
+
+
+ def get_password(self):
+ """
+ Get the password the user provided in the package
+ """
+ return self.pyfile.package().password or ""
+
+
+ #: Deprecated method, use `check_for_same_files` instead (Remove in 0.4.10)
+ def checkForSameFiles(self, *args, **kwargs):
+ return self.check_for_same_files(*args, **kwargs)
+
+
+ def check_for_same_files(self, starting=False):
+ """
+ Checks if same file was/is downloaded within same package
+
+ :param starting: indicates that the current download is going to start
+ :raises Skip:
+ """
+ pack = self.pyfile.package()
+
+ for pyfile in self.pyload.files.cache.values():
+ if pyfile != self.pyfile and pyfile.name is self.pyfile.name and pyfile.package().folder is pack.folder:
+ if pyfile.status in (0, 12): #: Finished or downloading
+ self.skip(pyfile.pluginname)
+ elif pyfile.status in (5, 7) and starting: #: A download is waiting/starting and was appenrently started before
+ self.skip(pyfile.pluginname)
+
+ download_folder = self.pyload.config.get("general", "download_folder")
+ location = fs_join(download_folder, pack.folder, self.pyfile.name)
+
+ if starting and self.pyload.config.get("download", "skip_existing") and exists(location):
+ size = os.stat(location).st_size
+ if size >= self.pyfile.size:
+ self.skip("File exists")
+
+ pyfile = self.pyload.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name)
+ if pyfile:
+ if exists(location):
+ self.skip(pyfile[0])
+
+ self.log_debug("File %s not skipped, because it does not exists." % self.pyfile.name)