diff options
Diffstat (limited to 'module/plugins/internal/Hoster.py')
-rw-r--r-- | module/plugins/internal/Hoster.py | 704 |
1 files changed, 681 insertions, 23 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 3fed8a7c6..9db22d1d7 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -1,33 +1,691 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +from __future__ import with_statement - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import inspect +import os +import random +import time +import urlparse - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +if os.name != "nt": + import grp + import pwd - @author: mkaay -""" +from module.plugins.internal.Plugin import Plugin, Abort, Fail, Reconnect, Retry, Skip +from module.utils import fs_decode, fs_encode, save_join as fs_join -from module.plugins.internal.Plugin import Plugin -def getInfo(self): - #result = [ .. (name, size, status, url) .. ] - return +def getInfo(urls): + #result = [ .. (name, size, status, url) .. ] + pass + class Hoster(Plugin): - __name__ = "Hoster" - __version__ = "0.02" - __pattern__ = None - __type__ = "hoster" + __name__ = "Hoster" + __type__ = "hoster" + __version__ = "0.03" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + __description__ = """Base hoster plugin""" - __author_name__ = ("mkaay") - __author_mail__ = ("mkaay@mkaay.de") + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, pyfile): + super(Hoster, self).__init__(pyfile.m.core) + + #: engage wan reconnection + self.wantReconnect = False + + #: enable simultaneous processing of multiple downloads + self.multiDL = True + self.limitDL = 0 + + #: chunk limit + self.chunkLimit = 1 + self.resumeDownload = False + + #: time.time() + wait in seconds + self.waitUntil = 0 + self.waiting = False + + #: captcha reader instance + self.ocr = None + + #: account handler instance, see :py:class:`Account` + self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + + #: premium status + self.premium = False + + #: username/login + self.user = None + + if self.account and not self.account.canUse(): + self.account = None + + if self.account: + self.user, data = self.account.selectAccount() + + #: Browser instance, see `network.Browser` + self.req = self.account.getAccountRequest(self.user) + self.chunkLimit = -1 #: chunk limit, -1 for unlimited + + #: enables resume (will be ignored if server dont accept chunks) + self.resumeDownload = True + + #: premium status + self.premium = self.account.isPremium(self.user) + else: + self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) + + #: associated pyfile instance, see `PyFile` + self.pyfile = pyfile + + self.thread = None #: holds thread in future + + #: location where the last call to download was saved + self.lastDownload = "" + + #: re match of the last call to `checkDownload` + self.lastCheck = None + + #: js engine, see `JsEngine` + self.js = self.core.js + + #: captcha task + self.cTask = None + + #: some plugins store html code here + self.html = None + + #: dict of the amount of retries already made + self.retries = {} + + self.init() + + + def init(self): + """ + Initialize the plugin (in addition to `__init__`) + """ + pass + + + def setup(self): + """ + Setup for enviroment and other things, called before downloading (possibly more than one time) + """ + pass + + + def preprocessing(self, thread): + """ + Handles important things to do before starting + """ + self.thread = thread + + if self.account: + self.account.checkLogin(self.user) + else: + self.req.clearCookies() + + self.setup() + + self.pyfile.setStatus("starting") + + return self.process(self.pyfile) + + + def process(self, pyfile): + """ + The 'main' method of every plugin, you **have to** overwrite it + """ + raise NotImplementedError + + + def getChunkCount(self): + if self.chunkLimit <= 0: + return self.core.config.get("download", "chunks") + return min(self.core.config.get("download", "chunks"), self.chunkLimit) + + + def resetAccount(self): + """ + Don't use account and retry download + """ + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retry() + + + def setReconnect(self, reconnect): + reconnect = bool(reconnect) + self.logDebug("Set wantReconnect to: %s (previous: %s)" % (reconnect, self.wantReconnect)) + self.wantReconnect = reconnect + + + def setWait(self, seconds, reconnect=None): + """ + Set a specific wait time later used with `wait` + + :param seconds: wait time in seconds + :param reconnect: True if a reconnect would avoid wait time + """ + wait_time = int(seconds) + 1 + wait_until = time.time() + wait_time + + self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.pyfile.waitUntil), + "Wait: %d seconds" % wait_time) + + self.pyfile.waitUntil = wait_until + + if reconnect is not None: + self.setReconnect(reconnect) + + + def wait(self, seconds=None, reconnect=None): + """ + Waits the time previously set + """ + pyfile = self.pyfile + + if seconds is not None: + self.setWait(seconds) + + if reconnect is not None: + self.setReconnect(reconnect) + + self.waiting = True + + status = pyfile.status + pyfile.setStatus("waiting") + + self.logInfo(_("Wait: %d seconds") % (pyfile.waitUntil - time.time()), + _("Reconnect: %s") % self.wantReconnect) + + if self.account: + self.logDebug("Ignore reconnection due account logged") + + while pyfile.waitUntil > time.time(): + if pyfile.abort: + self.abort() + + time.sleep(1) + else: + while pyfile.waitUntil > time.time(): + self.thread.m.reconnecting.wait(2) + + if pyfile.abort: + self.abort() + + if self.thread.m.reconnecting.isSet(): + self.waiting = False + self.wantReconnect = False + raise Reconnect + + time.sleep(1) + + self.waiting = False + + pyfile.status = status + + + def skip(self, reason=""): + """ + Skip and give reason + """ + raise Skip(fs_encode(reason)) + + + def abort(self, reason=""): + """ + Abort and give reason + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Abort + + + def offline(self, reason=""): + """ + Fail and indicate file is offline + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Fail("offline") + + + def tempOffline(self, reason=""): + """ + Fail and indicates file ist temporary offline, the core may take consequences + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Fail("temp. offline") + + + def retry(self, max_tries=5, wait_time=1, reason=""): + """ + Retries and begin again from the beginning + + :param max_tries: number of maximum retries + :param wait_time: time to wait in seconds + :param reason: reason for retrying, will be passed to fail if max_tries reached + """ + id = inspect.currentframe().f_back.f_lineno + if id not in self.retries: + self.retries[id] = 0 + + if 0 < max_tries <= self.retries[id]: + self.fail(reason or _("Max retries reached"), "retry") + + self.wait(wait_time, False) + + self.retries[id] += 1 + raise Retry(reason) + + + def invalidCaptcha(self): + self.logError(_("Invalid captcha")) + if self.cTask: + self.cTask.invalid() + + + def correctCaptcha(self): + self.logInfo(_("Correct captcha")) + if self.cTask: + self.cTask.correct() + + + def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, + imgtype='jpg', result_type='textual'): + """ + Loads a captcha and decrypts it with ocr, plugin, user input + + :param url: url of captcha image + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param forceUser: if True, ocr is not used + :param imgtype: Type of the Image + :param result_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + + :return: result of decrypting + """ + + img = self.load(url, get=get, post=post, cookies=cookies) + + id = ("%.2f" % time.time())[-6:].replace(".", "") + + with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") as tmpCaptcha: + tmpCaptcha.write(img) + + has_plugin = self.__name__ in self.core.pluginManager.ocrPlugins + + if self.core.captcha: + Ocr = self.core.pluginManager.loadClass("ocr", self.__name__) + else: + Ocr = None + + if Ocr and not forceUser: + time.sleep(random.randint(3000, 5000) / 1000.0) + if self.pyfile.abort: + self.abort() + + ocr = Ocr() + result = ocr.get_captcha(tmpCaptcha.name) + else: + captchaManager = self.core.captchaManager + task = captchaManager.newTask(img, imgtype, tmpCaptcha.name, result_type) + self.cTask = task + captchaManager.handleCaptcha(task) + + while task.isWaiting(): + if self.pyfile.abort: + captchaManager.removeTask(task) + self.abort() + time.sleep(1) + + captchaManager.removeTask(task) + + if task.error and has_plugin: #: ignore default error message since the user could use OCR + self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) + elif task.error: + self.fail(task.error) + elif not task.result: + self.fail(_("No captcha result obtained in appropiate time by any of the plugins")) + + result = task.result + self.logDebug("Received captcha result: %s" % result) + + if not self.core.debug: + try: + os.remove(tmpCaptcha.name) + except Exception: + pass + + return result + + + def fixurl(self, url): + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + + url = super(Hoster, self).fixurl(url) + + if not urlparse.urlparse(url).scheme: + url = urlparse.urljoin(baseurl, url) + + return url + + + def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): + """ + Downloads the content at url to download folder + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param disposition: if True and server provides content-disposition header\ + the filename will be changed if needed + :return: The location where the file was saved + """ + if self.pyfile.abort: + self.abort() + + url = self.fixurl(url) + + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) + + if self.core.debug: + self.logDebug("Download url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) + + self.correctCaptcha() + self.checkForSameFiles() + + self.pyfile.setStatus("downloading") + + if disposition: + self.pyfile.name = urlparse.urlparse(url).path.split('/')[-1] or self.pyfile.name + + download_folder = self.core.config.get("general", "download_folder") + + location = fs_join(download_folder, self.pyfile.package().folder) + + if not os.path.exists(location): + try: + os.makedirs(location, int(self.core.config.get("permission", "folder"), 8)) + + if self.core.config.get("permission", "change_dl") and os.name != "nt": + uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] + gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] + os.chown(location, uid, gid) + + except Exception, e: + self.fail(e) + + # convert back to unicode + location = fs_decode(location) + name = safe_filename(self.pyfile.name) + + filename = os.path.join(location, name) + + self.core.addonManager.dispatchEvent("download-start", self.pyfile, url, filename) + + try: + newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, + chunks=self.getChunkCount(), resume=self.resumeDownload, + progressNotify=self.pyfile.setProgress, disposition=disposition) + finally: + self.pyfile.size = self.req.size + + if newname: + newname = urlparse.urlparse(newname).path.split('/')[-1] + + if disposition and newname != name: + self.logInfo(_("%(name)s saved as %(newname)s") % {"name": name, "newname": newname}) + self.pyfile.name = newname + filename = os.path.join(location, newname) + + fs_filename = fs_encode(filename) + + if self.core.config.get("permission", "change_file"): + try: + os.chmod(fs_filename, int(self.core.config.get("permission", "file"), 8)) + except Exception, e: + self.logWarning(_("Setting file mode failed"), e) + + if self.core.config.get("permission", "change_dl") and os.name != "nt": + try: + uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] + gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] + os.chown(fs_filename, uid, gid) + + except Exception, e: + self.logWarning(_("Setting User and Group failed"), e) + + self.lastDownload = filename + return self.lastDownload + + + def checkDownload(self, rules, delete=True, file_size=None, size_tolerance=1000, read_size=100000): + """ + Checks the content of the last downloaded file, re match is saved to `lastCheck` + + :param rules: dict with names and rules to match (compiled regexp or strings) + :param delete: delete if matched + :param file_size: expected file size + :param size_tolerance: size check tolerance + :param read_size: amount of bytes to read from files + :return: dictionary key of the first rule that matched + """ + lastDownload = fs_encode(self.lastDownload) + + if not self.lastDownload or not os.path.exists(lastDownload): + self.lastDownload = "" + self.fail(self.pyfile.error or _("No file downloaded")) + + download_size = os.stat(lastDownload).st_size + + if download_size < 1 or (file_size and abs(file_size - download_size) > size_tolerance): + if delete: + os.remove(lastDownload) + self.fail(_("Empty file")) + + self.logDebug("Download Check triggered") + + with open(lastDownload, "rb") as f: + content = f.read(read_size) + + # produces encoding errors, better log to other file in the future? + # self.logDebug("Content: %s" % content) + for name, rule in rules.iteritems(): + if isinstance(rule, basestring): + if rule in content: + if delete: + os.remove(lastDownload) + return name + + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + if delete: + os.remove(lastDownload) + self.lastCheck = m + return name + + + def directLink(self, url, follow_location=None): + link = "" + + if follow_location is None: + redirect = 1 + + elif type(follow_location) is int: + redirect = max(follow_location, 1) + + else: + redirect = self.getConfig("maxredirs", plugin="UserAgentSwitcher") + + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True) + + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + req = pyreq.getHTTPRequest() + res = self.load(url, just_header=True) + + req.close() + + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + + if 'content-disposition' in header: + link = url + + elif 'location' in header and header['location']: + location = header['location'] + + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) + + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: + url = location + continue + + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] + + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" + + break + + else: + try: + self.logError(_("Too many redirects")) + except Exception: + pass + + return link + + + def parseHtmlForm(self, attr_str="", input_names={}): + return parseHtmlForm(attr_str, self.html, input_names) + + + def checkTrafficLeft(self): + if not self.account: + return True + + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: + return True + else: + size = self.pyfile.size / 1024 + self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) + return size <= traffic + + + def getPassword(self): + """ + Get the password the user provided in the package + """ + return self.pyfile.package().password or "" + + + def checkForSameFiles(self, starting=False): + """ + Checks if same file was/is downloaded within same package + + :param starting: indicates that the current download is going to start + :raises Skip: + """ + pack = self.pyfile.package() + + for pyfile in self.core.files.cache.values(): + if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: + if pyfile.status in (0, 12): #: finished or downloading + self.skip(pyfile.pluginname) + elif pyfile.status in (5, 7) and starting: #: a download is waiting/starting and was appenrently started before + self.skip(pyfile.pluginname) + + download_folder = self.core.config.get("general", "download_folder") + location = fs_join(download_folder, pack.folder, self.pyfile.name) + + if starting and self.core.config.get("download", "skip_existing") and os.path.exists(location): + size = os.stat(location).st_size + if size >= self.pyfile.size: + self.skip("File exists") + + pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) + if pyfile: + if os.path.exists(location): + self.skip(pyfile[0]) + + self.logDebug("File %s not skipped, because it does not exists." % self.pyfile.name) + + + def clean(self): + """ + Clean everything and remove references + """ + if hasattr(self, "pyfile"): + del self.pyfile + + if hasattr(self, "req"): + self.req.close() + del self.req + + if hasattr(self, "thread"): + del self.thread + + if hasattr(self, "html"): + del self.html |