diff options
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/Account.py | 11 | ||||
-rw-r--r-- | module/plugins/internal/AdYouLike.py | 2 | ||||
-rw-r--r-- | module/plugins/internal/AdsCaptcha.py | 2 | ||||
-rw-r--r-- | module/plugins/internal/Captcha.py | 6 | ||||
-rw-r--r-- | module/plugins/internal/Crypter.py | 21 | ||||
-rw-r--r-- | module/plugins/internal/Hook.py | 3 | ||||
-rw-r--r-- | module/plugins/internal/Hoster.py | 704 | ||||
-rw-r--r-- | module/plugins/internal/MultiHook.py | 15 | ||||
-rw-r--r-- | module/plugins/internal/Plugin.py | 608 | ||||
-rw-r--r-- | module/plugins/internal/ReCaptcha.py | 33 | ||||
-rw-r--r-- | module/plugins/internal/SimpleCrypter.py | 19 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 259 | ||||
-rw-r--r-- | module/plugins/internal/SolveMedia.py | 6 | ||||
-rw-r--r-- | module/plugins/internal/XFSAccount.py | 13 | ||||
-rw-r--r-- | module/plugins/internal/XFSCrypter.py | 2 | ||||
-rw-r--r-- | module/plugins/internal/XFSHoster.py | 8 |
16 files changed, 874 insertions, 838 deletions
diff --git a/module/plugins/internal/Account.py b/module/plugins/internal/Account.py index aa472f297..0e2996aaa 100644 --- a/module/plugins/internal/Account.py +++ b/module/plugins/internal/Account.py @@ -154,7 +154,9 @@ class Account(Plugin): if not type(infos) == dict: raise Exception("Wrong return format") except Exception, e: - infos = {"error": str(e)} + infos = super(self.__class__, self).loadAccountInfo(name, req) + infos['error'] = str(e) + if self.core.debug: traceback.print_exc() @@ -206,11 +208,8 @@ class Account(Plugin): def getAccountRequest(self, user=None): if not user: user, data = self.selectAccount() - if not user: - return None - req = self.core.requestFactory.getRequest(self.__name__, user) - return req + return self.core.requestFactory.getRequest(self.__name__, user) def getAccountCookies(self, user=None): @@ -264,7 +263,7 @@ class Account(Plugin): return self.selectAccount() != (None, None) - def parseTraffic(self, value, unit=None): #: return bytes + def parseTraffic(self, value, unit=None): #: return kilobytes if not unit and not isinstance(value, basestring): unit = "KB" return parseFileSize(value, unit) diff --git a/module/plugins/internal/AdYouLike.py b/module/plugins/internal/AdYouLike.py index a9c194dda..b7324ef8e 100644 --- a/module/plugins/internal/AdYouLike.py +++ b/module/plugins/internal/AdYouLike.py @@ -41,7 +41,7 @@ class AdYouLike(Captcha): # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} ayl = json_loads(ayl) - html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", + html = self.plugin.load("http://api-ayl.appspot.com/challenge", get={'key' : ayl['adyoulike']['key'], 'env' : ayl['all']['env'], 'callback': callback}) diff --git a/module/plugins/internal/AdsCaptcha.py b/module/plugins/internal/AdsCaptcha.py index 9cab99151..e058352dd 100644 --- a/module/plugins/internal/AdsCaptcha.py +++ b/module/plugins/internal/AdsCaptcha.py @@ -37,7 +37,7 @@ class AdsCaptcha(Captcha): def challenge(self, key=None, html=None): PublicKey, CaptchaId = key or self.retrieve_key(html) - html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", + html = self.plugin.load("http://api.adscaptcha.com/Get.aspx", get={'CaptchaId': CaptchaId, 'PublicKey': PublicKey}) try: diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index 8d040515c..8dbc33da2 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -23,12 +23,6 @@ class Captcha(Plugin): #@TODO: Recheck in 0.4.10 - def fail(self, reason): - self.plugin.fail(reason) - raise AttributeError(reason) - - - #@TODO: Recheck in 0.4.10 def retrieve_key(self, html): if self.detect_key(html): return self.key diff --git a/module/plugins/internal/Crypter.py b/module/plugins/internal/Crypter.py index 39b09129f..a713c7da6 100644 --- a/module/plugins/internal/Crypter.py +++ b/module/plugins/internal/Crypter.py @@ -2,14 +2,14 @@ import urlparse -from module.plugins.internal.Plugin import Plugin -from module.utils import decode, save_path as safe_filename +from module.plugins.internal.Hoster import Hoster +from module.utils import save_path as safe_filename -class Crypter(Plugin): +class Crypter(Hoster): __name__ = "Crypter" __type__ = "crypter" - __version__ = "0.03" + __version__ = "0.04" __pattern__ = r'^unmatchable$' __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config.get("general", "folder_per_package") @@ -26,18 +26,12 @@ class Crypter(Plugin): def __init__(self, pyfile): super(Crypter, self).__init__(pyfile) - #: Provide information in dict here - self.info = {} #@TODO: Move to Plugin - #: Put all packages here. It's a list of tuples like: ( name, [list of links], folder ) self.packages = [] #: List of urls, pyLoad will generate packagenames self.urls = [] - self.multiDL = True - self.limitDL = 0 - def process(self, pyfile): """Main method""" @@ -80,14 +74,13 @@ class Crypter(Plugin): "%d links" % len(links), "Saved to folder: %s" % folder if folder else "Saved to download folder") - links = map(decode, links) - - pid = self.core.api.addPackage(name, links, package_queue) + pid = self.core.api.addPackage(name, self.fixurl(links), package_queue) if package_password: self.core.api.setPackageData(pid, {"password": package_password}) - setFolder = lambda x: self.core.api.setPackageData(pid, {"folder": x or ""}) #@NOTE: Workaround to do not break API addPackage method + # Workaround to do not break API addPackage method + setFolder = lambda x: self.core.api.setPackageData(pid, {"folder": x or ""}) if use_subfolder: if not subfolder_per_package: diff --git a/module/plugins/internal/Hook.py b/module/plugins/internal/Hook.py index 8d620e794..01ffbc5f2 100644 --- a/module/plugins/internal/Hook.py +++ b/module/plugins/internal/Hook.py @@ -42,9 +42,6 @@ class Hook(Plugin): #: `HookManager` self.manager = manager - #: Provide information in dict here, usable by API `getInfo` - self.info = {} - #: automatically register event listeners for functions, attribute will be deleted dont use it yourself self.event_map = {} diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 3fed8a7c6..9db22d1d7 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -1,33 +1,691 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +from __future__ import with_statement - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import inspect +import os +import random +import time +import urlparse - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +if os.name != "nt": + import grp + import pwd - @author: mkaay -""" +from module.plugins.internal.Plugin import Plugin, Abort, Fail, Reconnect, Retry, Skip +from module.utils import fs_decode, fs_encode, save_join as fs_join -from module.plugins.internal.Plugin import Plugin -def getInfo(self): - #result = [ .. (name, size, status, url) .. ] - return +def getInfo(urls): + #result = [ .. (name, size, status, url) .. ] + pass + class Hoster(Plugin): - __name__ = "Hoster" - __version__ = "0.02" - __pattern__ = None - __type__ = "hoster" + __name__ = "Hoster" + __type__ = "hoster" + __version__ = "0.03" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + __description__ = """Base hoster plugin""" - __author_name__ = ("mkaay") - __author_mail__ = ("mkaay@mkaay.de") + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, pyfile): + super(Hoster, self).__init__(pyfile.m.core) + + #: engage wan reconnection + self.wantReconnect = False + + #: enable simultaneous processing of multiple downloads + self.multiDL = True + self.limitDL = 0 + + #: chunk limit + self.chunkLimit = 1 + self.resumeDownload = False + + #: time.time() + wait in seconds + self.waitUntil = 0 + self.waiting = False + + #: captcha reader instance + self.ocr = None + + #: account handler instance, see :py:class:`Account` + self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + + #: premium status + self.premium = False + + #: username/login + self.user = None + + if self.account and not self.account.canUse(): + self.account = None + + if self.account: + self.user, data = self.account.selectAccount() + + #: Browser instance, see `network.Browser` + self.req = self.account.getAccountRequest(self.user) + self.chunkLimit = -1 #: chunk limit, -1 for unlimited + + #: enables resume (will be ignored if server dont accept chunks) + self.resumeDownload = True + + #: premium status + self.premium = self.account.isPremium(self.user) + else: + self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) + + #: associated pyfile instance, see `PyFile` + self.pyfile = pyfile + + self.thread = None #: holds thread in future + + #: location where the last call to download was saved + self.lastDownload = "" + + #: re match of the last call to `checkDownload` + self.lastCheck = None + + #: js engine, see `JsEngine` + self.js = self.core.js + + #: captcha task + self.cTask = None + + #: some plugins store html code here + self.html = None + + #: dict of the amount of retries already made + self.retries = {} + + self.init() + + + def init(self): + """ + Initialize the plugin (in addition to `__init__`) + """ + pass + + + def setup(self): + """ + Setup for enviroment and other things, called before downloading (possibly more than one time) + """ + pass + + + def preprocessing(self, thread): + """ + Handles important things to do before starting + """ + self.thread = thread + + if self.account: + self.account.checkLogin(self.user) + else: + self.req.clearCookies() + + self.setup() + + self.pyfile.setStatus("starting") + + return self.process(self.pyfile) + + + def process(self, pyfile): + """ + The 'main' method of every plugin, you **have to** overwrite it + """ + raise NotImplementedError + + + def getChunkCount(self): + if self.chunkLimit <= 0: + return self.core.config.get("download", "chunks") + return min(self.core.config.get("download", "chunks"), self.chunkLimit) + + + def resetAccount(self): + """ + Don't use account and retry download + """ + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retry() + + + def setReconnect(self, reconnect): + reconnect = bool(reconnect) + self.logDebug("Set wantReconnect to: %s (previous: %s)" % (reconnect, self.wantReconnect)) + self.wantReconnect = reconnect + + + def setWait(self, seconds, reconnect=None): + """ + Set a specific wait time later used with `wait` + + :param seconds: wait time in seconds + :param reconnect: True if a reconnect would avoid wait time + """ + wait_time = int(seconds) + 1 + wait_until = time.time() + wait_time + + self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.pyfile.waitUntil), + "Wait: %d seconds" % wait_time) + + self.pyfile.waitUntil = wait_until + + if reconnect is not None: + self.setReconnect(reconnect) + + + def wait(self, seconds=None, reconnect=None): + """ + Waits the time previously set + """ + pyfile = self.pyfile + + if seconds is not None: + self.setWait(seconds) + + if reconnect is not None: + self.setReconnect(reconnect) + + self.waiting = True + + status = pyfile.status + pyfile.setStatus("waiting") + + self.logInfo(_("Wait: %d seconds") % (pyfile.waitUntil - time.time()), + _("Reconnect: %s") % self.wantReconnect) + + if self.account: + self.logDebug("Ignore reconnection due account logged") + + while pyfile.waitUntil > time.time(): + if pyfile.abort: + self.abort() + + time.sleep(1) + else: + while pyfile.waitUntil > time.time(): + self.thread.m.reconnecting.wait(2) + + if pyfile.abort: + self.abort() + + if self.thread.m.reconnecting.isSet(): + self.waiting = False + self.wantReconnect = False + raise Reconnect + + time.sleep(1) + + self.waiting = False + + pyfile.status = status + + + def skip(self, reason=""): + """ + Skip and give reason + """ + raise Skip(fs_encode(reason)) + + + def abort(self, reason=""): + """ + Abort and give reason + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Abort + + + def offline(self, reason=""): + """ + Fail and indicate file is offline + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Fail("offline") + + + def tempOffline(self, reason=""): + """ + Fail and indicates file ist temporary offline, the core may take consequences + """ + if reason: + self.pyfile.error = fs_encode(reason) + raise Fail("temp. offline") + + + def retry(self, max_tries=5, wait_time=1, reason=""): + """ + Retries and begin again from the beginning + + :param max_tries: number of maximum retries + :param wait_time: time to wait in seconds + :param reason: reason for retrying, will be passed to fail if max_tries reached + """ + id = inspect.currentframe().f_back.f_lineno + if id not in self.retries: + self.retries[id] = 0 + + if 0 < max_tries <= self.retries[id]: + self.fail(reason or _("Max retries reached"), "retry") + + self.wait(wait_time, False) + + self.retries[id] += 1 + raise Retry(reason) + + + def invalidCaptcha(self): + self.logError(_("Invalid captcha")) + if self.cTask: + self.cTask.invalid() + + + def correctCaptcha(self): + self.logInfo(_("Correct captcha")) + if self.cTask: + self.cTask.correct() + + + def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, + imgtype='jpg', result_type='textual'): + """ + Loads a captcha and decrypts it with ocr, plugin, user input + + :param url: url of captcha image + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param forceUser: if True, ocr is not used + :param imgtype: Type of the Image + :param result_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + + :return: result of decrypting + """ + + img = self.load(url, get=get, post=post, cookies=cookies) + + id = ("%.2f" % time.time())[-6:].replace(".", "") + + with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") as tmpCaptcha: + tmpCaptcha.write(img) + + has_plugin = self.__name__ in self.core.pluginManager.ocrPlugins + + if self.core.captcha: + Ocr = self.core.pluginManager.loadClass("ocr", self.__name__) + else: + Ocr = None + + if Ocr and not forceUser: + time.sleep(random.randint(3000, 5000) / 1000.0) + if self.pyfile.abort: + self.abort() + + ocr = Ocr() + result = ocr.get_captcha(tmpCaptcha.name) + else: + captchaManager = self.core.captchaManager + task = captchaManager.newTask(img, imgtype, tmpCaptcha.name, result_type) + self.cTask = task + captchaManager.handleCaptcha(task) + + while task.isWaiting(): + if self.pyfile.abort: + captchaManager.removeTask(task) + self.abort() + time.sleep(1) + + captchaManager.removeTask(task) + + if task.error and has_plugin: #: ignore default error message since the user could use OCR + self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) + elif task.error: + self.fail(task.error) + elif not task.result: + self.fail(_("No captcha result obtained in appropiate time by any of the plugins")) + + result = task.result + self.logDebug("Received captcha result: %s" % result) + + if not self.core.debug: + try: + os.remove(tmpCaptcha.name) + except Exception: + pass + + return result + + + def fixurl(self, url): + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + + url = super(Hoster, self).fixurl(url) + + if not urlparse.urlparse(url).scheme: + url = urlparse.urljoin(baseurl, url) + + return url + + + def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): + """ + Downloads the content at url to download folder + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param disposition: if True and server provides content-disposition header\ + the filename will be changed if needed + :return: The location where the file was saved + """ + if self.pyfile.abort: + self.abort() + + url = self.fixurl(url) + + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) + + if self.core.debug: + self.logDebug("Download url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) + + self.correctCaptcha() + self.checkForSameFiles() + + self.pyfile.setStatus("downloading") + + if disposition: + self.pyfile.name = urlparse.urlparse(url).path.split('/')[-1] or self.pyfile.name + + download_folder = self.core.config.get("general", "download_folder") + + location = fs_join(download_folder, self.pyfile.package().folder) + + if not os.path.exists(location): + try: + os.makedirs(location, int(self.core.config.get("permission", "folder"), 8)) + + if self.core.config.get("permission", "change_dl") and os.name != "nt": + uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] + gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] + os.chown(location, uid, gid) + + except Exception, e: + self.fail(e) + + # convert back to unicode + location = fs_decode(location) + name = safe_filename(self.pyfile.name) + + filename = os.path.join(location, name) + + self.core.addonManager.dispatchEvent("download-start", self.pyfile, url, filename) + + try: + newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, + chunks=self.getChunkCount(), resume=self.resumeDownload, + progressNotify=self.pyfile.setProgress, disposition=disposition) + finally: + self.pyfile.size = self.req.size + + if newname: + newname = urlparse.urlparse(newname).path.split('/')[-1] + + if disposition and newname != name: + self.logInfo(_("%(name)s saved as %(newname)s") % {"name": name, "newname": newname}) + self.pyfile.name = newname + filename = os.path.join(location, newname) + + fs_filename = fs_encode(filename) + + if self.core.config.get("permission", "change_file"): + try: + os.chmod(fs_filename, int(self.core.config.get("permission", "file"), 8)) + except Exception, e: + self.logWarning(_("Setting file mode failed"), e) + + if self.core.config.get("permission", "change_dl") and os.name != "nt": + try: + uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2] + gid = grp.getgrnam(self.core.config.get("permission", "group"))[2] + os.chown(fs_filename, uid, gid) + + except Exception, e: + self.logWarning(_("Setting User and Group failed"), e) + + self.lastDownload = filename + return self.lastDownload + + + def checkDownload(self, rules, delete=True, file_size=None, size_tolerance=1000, read_size=100000): + """ + Checks the content of the last downloaded file, re match is saved to `lastCheck` + + :param rules: dict with names and rules to match (compiled regexp or strings) + :param delete: delete if matched + :param file_size: expected file size + :param size_tolerance: size check tolerance + :param read_size: amount of bytes to read from files + :return: dictionary key of the first rule that matched + """ + lastDownload = fs_encode(self.lastDownload) + + if not self.lastDownload or not os.path.exists(lastDownload): + self.lastDownload = "" + self.fail(self.pyfile.error or _("No file downloaded")) + + download_size = os.stat(lastDownload).st_size + + if download_size < 1 or (file_size and abs(file_size - download_size) > size_tolerance): + if delete: + os.remove(lastDownload) + self.fail(_("Empty file")) + + self.logDebug("Download Check triggered") + + with open(lastDownload, "rb") as f: + content = f.read(read_size) + + # produces encoding errors, better log to other file in the future? + # self.logDebug("Content: %s" % content) + for name, rule in rules.iteritems(): + if isinstance(rule, basestring): + if rule in content: + if delete: + os.remove(lastDownload) + return name + + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + if delete: + os.remove(lastDownload) + self.lastCheck = m + return name + + + def directLink(self, url, follow_location=None): + link = "" + + if follow_location is None: + redirect = 1 + + elif type(follow_location) is int: + redirect = max(follow_location, 1) + + else: + redirect = self.getConfig("maxredirs", plugin="UserAgentSwitcher") + + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True) + + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + req = pyreq.getHTTPRequest() + res = self.load(url, just_header=True) + + req.close() + + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + + if 'content-disposition' in header: + link = url + + elif 'location' in header and header['location']: + location = header['location'] + + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) + + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: + url = location + continue + + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] + + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" + + break + + else: + try: + self.logError(_("Too many redirects")) + except Exception: + pass + + return link + + + def parseHtmlForm(self, attr_str="", input_names={}): + return parseHtmlForm(attr_str, self.html, input_names) + + + def checkTrafficLeft(self): + if not self.account: + return True + + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: + return True + else: + size = self.pyfile.size / 1024 + self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) + return size <= traffic + + + def getPassword(self): + """ + Get the password the user provided in the package + """ + return self.pyfile.package().password or "" + + + def checkForSameFiles(self, starting=False): + """ + Checks if same file was/is downloaded within same package + + :param starting: indicates that the current download is going to start + :raises Skip: + """ + pack = self.pyfile.package() + + for pyfile in self.core.files.cache.values(): + if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: + if pyfile.status in (0, 12): #: finished or downloading + self.skip(pyfile.pluginname) + elif pyfile.status in (5, 7) and starting: #: a download is waiting/starting and was appenrently started before + self.skip(pyfile.pluginname) + + download_folder = self.core.config.get("general", "download_folder") + location = fs_join(download_folder, pack.folder, self.pyfile.name) + + if starting and self.core.config.get("download", "skip_existing") and os.path.exists(location): + size = os.stat(location).st_size + if size >= self.pyfile.size: + self.skip("File exists") + + pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) + if pyfile: + if os.path.exists(location): + self.skip(pyfile[0]) + + self.logDebug("File %s not skipped, because it does not exists." % self.pyfile.name) + + + def clean(self): + """ + Clean everything and remove references + """ + if hasattr(self, "pyfile"): + del self.pyfile + + if hasattr(self, "req"): + self.req.close() + del self.req + + if hasattr(self, "thread"): + del self.thread + + if hasattr(self, "html"): + del self.html diff --git a/module/plugins/internal/MultiHook.py b/module/plugins/internal/MultiHook.py index 10b2e7b0e..0bcab5915 100644 --- a/module/plugins/internal/MultiHook.py +++ b/module/plugins/internal/MultiHook.py @@ -55,8 +55,6 @@ class MultiHook(Hook): def setup(self): - self.info = {} #@TODO: Remove in 0.4.10 - self.plugins = [] self.supported = [] self.new_supported = [] @@ -97,19 +95,6 @@ class MultiHook(Hook): self.init_periodical(threaded=True) - def getURL(self, *args, **kwargs): #@TODO: Remove in 0.4.10 - """See HTTPRequest for argument list""" - h = pyreq.getHTTPRequest(timeout=120) - try: - if not 'decode' in kwargs: - kwargs['decode'] = True - rep = h.load(*args, **kwargs) - finally: - h.close() - - return rep - - def pluginsCached(self): if self.plugins: return self.plugins diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py index da597ef42..0d4c3b165 100644 --- a/module/plugins/internal/Plugin.py +++ b/module/plugins/internal/Plugin.py @@ -1,65 +1,108 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +from __future__ import with_statement - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import os +import re +import urllib - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 +from module.utils import fs_encode, fs_decode, html_unescape, save_join as fs_join - @author: RaNaN, spoob, mkaay -""" -from time import time, sleep -from random import randint +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + return string -import os -from os import remove, makedirs, chmod, stat -from os.path import exists, join -if os.name != "nt": - from os import chown - from pwd import getpwnam - from grp import getgrnam +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) -from itertools import islice -from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 -from module.utils import save_join as fs_join, save_path as safe_filename, fs_encode, fs_decode +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parseHtmlForm(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or "" + else: + inputs[name] = value + + if input_names: + # check input attributes + for key, val in input_names.iteritems(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break #: attibute value does not match + else: + break #: attibute name does not match + else: + return action, inputs #: passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None #: no matching form found + def chunks(iterable, size): - it = iter(iterable) + it = iter(iterable) item = list(islice(it, size)) while item: yield item item = list(islice(it, size)) -class Base(object): - """ - A Base class with log/config/db methods *all* plugin types can use - """ +class Plugin(object): + __name__ = "Plugin" + __type__ = "hoster" + __version__ = "0.11" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + def __init__(self, core): - #: Core instance self.core = core + #: Provide information in dict here + self.info = {} + - #: Log functions def _log(self, level, args): log = getattr(self.core.log, level) - msg = " | ".join((fs_encode(a) if isinstance(a, unicode) else #@NOTE: `fs_encode` -> `encode` in 0.4.10 - str(a)).strip() for a in args if a) + msg = fs_encode(" | ".join((a if isinstance(a, basestring) else str(a)).strip() for a in args if a)) #@NOTE: `fs_encode` -> `encode` in 0.4.10 log("%(plugin)s%(id)s: %(msg)s" % {'plugin': self.__name__, - 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", - 'msg' : msg or _(level.upper() + " MARK")}) + 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", + 'msg' : msg or _(level.upper() + " MARK")}) def logDebug(self, *args): @@ -100,7 +143,7 @@ class Base(object): return self.setConfig(*args, **kwargs) - def getConfig(self, option, default=""): + def getConfig(self, option, default="", plugin=None): """ Returns config value for current plugin @@ -108,9 +151,10 @@ class Base(object): :return: """ try: - return self.core.config.getPlugin(self.__name__, option) + return self.core.config.getPlugin(plugin or self.__name__, option) except KeyError: + self.logWarning(_("Config option or plugin not found")) return default @@ -149,11 +193,11 @@ class Base(object): def fail(self, reason): """Fail and give reason""" - raise Fail(reason) + raise Fail(fs_encode(reason)) def error(self, reason="", type=_("Parse")): - if not reason and not type: + if not reason: type = _("Unknown") msg = _("%s error") % type.strip().capitalize() if type else _("Error") @@ -163,313 +207,73 @@ class Base(object): raise Fail(msg) -class Plugin(Base): - __name__ = "Plugin" - __type__ = "hoster" - __version__ = "0.10" + def fixurl(self, url): + return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip() - __pattern__ = r'^unmatchable$' - __config__ = [] #: [("name", "type", "desc", "default")] - __description__ = """Base plugin""" - __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("spoob", "spoob@pyload.org"), - ("mkaay", "mkaay@mkaay.de" )] - - - def __init__(self, pyfile): - super(Plugin, self).__init__(pyfile.m.core) - - self.wantReconnect = False - #: enables simultaneous processing of multiple downloads - self.multiDL = True - self.limitDL = 0 - #: chunk limit - self.chunkLimit = 1 - self.resumeDownload = False - - #: time() + wait in seconds - self.waitUntil = 0 - self.waiting = False - - self.ocr = None #captcha reader instance - #: account handler instance, see :py:class:`Account` - self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) - - #: premium status - self.premium = False - #: username/login - self.user = None - - if self.account and not self.account.canUse(): self.account = None - if self.account: - self.user, data = self.account.selectAccount() - #: Browser instance, see `network.Browser` - self.req = self.account.getAccountRequest(self.user) - self.chunkLimit = -1 # chunk limit, -1 for unlimited - #: enables resume (will be ignored if server dont accept chunks) - self.resumeDownload = True - self.multiDL = True #every hoster with account should provide multiple downloads - #: premium status - self.premium = self.account.isPremium(self.user) - else: - self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) - - #: associated pyfile instance, see `PyFile` - self.pyfile = pyfile - self.thread = None # holds thread in future - - #: location where the last call to download was saved - self.lastDownload = "" - #: re match of the last call to `checkDownload` - self.lastCheck = None - #: js engine, see `JsEngine` - self.js = self.core.js - self.cTask = None #captcha task - - self.retries = 0 # amount of retries already made - self.html = None # some plugins store html code here - - self.init() - - def getChunkCount(self): - if self.chunkLimit <= 0: - return self.core.config["download"]["chunks"] - return min(self.core.config["download"]["chunks"], self.chunkLimit) - - def __call__(self): - return self.__name__ - - def init(self): - """initialize the plugin (in addition to `__init__`)""" - pass - - def setup(self): - """ setup for enviroment and other things, called before downloading (possibly more than one time)""" - pass - - def preprocessing(self, thread): - """ handles important things to do before starting """ - self.thread = thread - - if self.account: - self.account.checkLogin(self.user) - else: - self.req.clearCookies() - - self.setup() - - self.pyfile.setStatus("starting") - - return self.process(self.pyfile) - - - def process(self, pyfile): - """the 'main' method of every plugin, you **have to** overwrite it""" - raise NotImplementedError - - def resetAccount(self): - """ dont use account and retry download """ - self.account = None - self.req = self.core.requestFactory.getRequest(self.__name__) - self.retry() - - def checksum(self, local_file=None): - """ - return codes: - 0 - checksum ok - 1 - checksum wrong - 5 - can't get checksum - 10 - not implemented - 20 - unknown error - """ - #@TODO checksum check hook - - return True, 10 - - - def setWait(self, seconds, reconnect=False): - """Set a specific wait time later used with `wait` - - :param seconds: wait time in seconds - :param reconnect: True if a reconnect would avoid wait time + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, req=None): """ - if reconnect: - self.wantReconnect = True - self.pyfile.waitUntil = time() + int(seconds) - - def wait(self): - """ waits the time previously set """ - self.waiting = True - self.pyfile.setStatus("waiting") - - while self.pyfile.waitUntil > time(): - self.thread.m.reconnecting.wait(2) - - if self.pyfile.abort: raise Abort - if self.thread.m.reconnecting.isSet(): - self.waiting = False - self.wantReconnect = False - raise Reconnect - - self.waiting = False - self.pyfile.setStatus("starting") - - def offline(self): - """ fail and indicate file is offline """ - raise Fail("offline") - - def tempOffline(self): - """ fail and indicates file ist temporary offline, the core may take consequences """ - raise Fail("temp. offline") - - def skip(self, reason): - raise Skip(reason) - - def retry(self, max_tries=3, wait_time=1, reason=""): - """Retries and begin again from the beginning - - :param max_tries: number of maximum retries - :param wait_time: time to wait in seconds - :param reason: reason for retrying, will be passed to fail if max_tries reached - """ - if 0 < max_tries <= self.retries: - if not reason: reason = "Max retries reached" - raise Fail(reason) - - self.wantReconnect = False - self.setWait(wait_time) - self.wait() - - self.retries += 1 - raise Retry(reason) - - def invalidCaptcha(self): - if self.cTask: - self.cTask.invalid() - - def correctCaptcha(self): - if self.cTask: - self.cTask.correct() - - def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype='jpg', - result_type='textual'): - """ Loads a captcha and decrypts it with ocr, plugin, user input - - :param url: url of captcha image - :param get: get part for request - :param post: post part for request - :param cookies: True if cookies should be enabled - :param forceUser: if True, ocr is not used - :param imgtype: Type of the Image - :param result_type: 'textual' if text is written on the captcha\ - or 'positional' for captcha where the user have to click\ - on a specific region on the captcha - - :return: result of decrypting - """ - - img = self.load(url, get=get, post=post, cookies=cookies) - - id = ("%.2f" % time())[-6:].replace(".", "") - temp_file = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") - temp_file.write(img) - temp_file.close() - - has_plugin = self.__name__ in self.core.pluginManager.captchaPlugins - - if self.core.captcha: - Ocr = self.core.pluginManager.loadClass("captcha", self.__name__) - else: - Ocr = None - - if Ocr and not forceUser: - sleep(randint(3000, 5000) / 1000.0) - if self.pyfile.abort: raise Abort - - ocr = Ocr() - result = ocr.get_captcha(temp_file.name) - else: - captchaManager = self.core.captchaManager - task = captchaManager.newTask(img, imgtype, temp_file.name, result_type) - self.cTask = task - captchaManager.handleCaptcha(task) - - while task.isWaiting(): - if self.pyfile.abort: - captchaManager.removeTask(task) - raise Abort - sleep(1) - - captchaManager.removeTask(task) - - if task.error and has_plugin: #ignore default error message since the user could use OCR - self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) - elif task.error: - self.fail(task.error) - elif not task.result: - self.fail(_("No captcha result obtained in appropiate time by any of the plugins.")) - - result = task.result - self.core.log.debug("Received captcha result: %s" % str(result)) - - if not self.core.debug: - try: - remove(temp_file.name) - except: - pass - - return result - - - def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): - """Load content at url and returns it + Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: - :param just_header: if True only the header will be retrieved and returned as dict + :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ - if self.pyfile.abort: raise Abort - #utf8 vs decode -> please use decode attribute in all future plugins - if type(url) == unicode: url = str(url) + if hasattr(self, 'pyfile') and self.pyfile.abort: + self.abort() + + url = self.fixurl(url) - res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode) + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) if self.core.debug: - from inspect import currentframe + self.logDebug("Load url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) + + if req is None: + if hasattr(self, "req"): + req = self.req + else: + req = self.core.requestFactory.getRequest(self.__name__) - frame = currentframe() - if not exists(join("tmp", self.__name__)): - makedirs(join("tmp", self.__name__)) + res = req.load(url, get, post, ref, cookies, just_header, True, bool(decode)) - f = open( - join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) - , "wb") - del frame # delete the frame or it wont be cleaned + if decode: + res = html_unescape(res) + if isinstance(decode, basestring): + res = res.decode(decode) + + if self.core.debug: + import inspect + + frame = inspect.currentframe() + framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: - tmp = res.encode("utf8") - except: - tmp = res + if not os.path.exists(os.path.join("tmp", self.__name__)): + os.makedirs(os.path.join("tmp", self.__name__)) - f.write(tmp) - f.close() + with open(framefile, "wb") as f: + del frame #: delete the frame or it wont be cleaned + f.write(res.encode('utf8')) + except IOError, e: + self.logError(e) if just_header: - #parse header - header = {"code": self.req.code} + # parse header + header = {"code": req.code} for line in res.splitlines(): line = line.strip() - if not line or ":" not in line: continue + if not line or ":" not in line: + continue key, none, value = line.partition(":") - key = key.lower().strip() + key = key.strip().lower() value = value.strip() if key in header: @@ -481,164 +285,4 @@ class Plugin(Base): header[key] = value res = header - return res - - def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): - """Downloads the content at url to download folder - - :param url: - :param get: - :param post: - :param ref: - :param cookies: - :param disposition: if True and server provides content-disposition header\ - the filename will be changed if needed - :return: The location where the file was saved - """ - - self.checkForSameFiles() - - self.pyfile.setStatus("downloading") - - download_folder = self.core.config['general']['download_folder'] - - location = fs_join(download_folder, self.pyfile.package().folder) - - if not exists(location): - makedirs(location, int(self.core.config["permission"]["folder"], 8)) - - if self.core.config["permission"]["change_dl"] and os.name != "nt": - try: - uid = getpwnam(self.core.config["permission"]["user"])[2] - gid = getgrnam(self.core.config["permission"]["group"])[2] - - chown(location, uid, gid) - except Exception, e: - self.core.log.warning(_("Setting User and Group failed: %s") % str(e)) - - # convert back to unicode - location = fs_decode(location) - name = safe_filename(self.pyfile.name) - - filename = join(location, name) - - self.core.hookManager.dispatchEvent("downloadStarts", self.pyfile, url, filename) - - try: - newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, - chunks=self.getChunkCount(), resume=self.resumeDownload, - progressNotify=self.pyfile.setProgress, disposition=disposition) - finally: - self.pyfile.size = self.req.size - - if disposition and newname and newname != name: #triple check, just to be sure - self.core.log.info("%(name)s saved as %(newname)s" % {"name": name, "newname": newname}) - self.pyfile.name = newname - filename = join(location, newname) - - fs_filename = fs_encode(filename) - - if self.core.config["permission"]["change_file"]: - chmod(fs_filename, int(self.core.config["permission"]["file"], 8)) - - if self.core.config["permission"]["change_dl"] and os.name != "nt": - try: - uid = getpwnam(self.core.config["permission"]["user"])[2] - gid = getgrnam(self.core.config["permission"]["group"])[2] - - chown(fs_filename, uid, gid) - except Exception, e: - self.core.log.warning(_("Setting User and Group failed: %s") % str(e)) - - self.lastDownload = filename - return self.lastDownload - - def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): - """ checks the content of the last downloaded file, re match is saved to `lastCheck` - - :param rules: dict with names and rules to match (compiled regexp or strings) - :param api_size: expected file size - :param max_size: if the file is larger then it wont be checked - :param delete: delete if matched - :param read_size: amount of bytes to read from files larger then max_size - :return: dictionary key of the first rule that matched - """ - lastDownload = fs_encode(self.lastDownload) - if not exists(lastDownload): return None - - size = stat(lastDownload) - size = size.st_size - - if api_size and api_size <= size: return None - elif size > max_size and not read_size: return None - self.core.log.debug("Download Check triggered") - f = open(lastDownload, "rb") - content = f.read(read_size if read_size else -1) - f.close() - #produces encoding errors, better log to other file in the future? - #self.core.log.debug("Content: %s" % content) - for name, rule in rules.iteritems(): - if type(rule) in (str, unicode): - if rule in content: - if delete: - remove(lastDownload) - return name - elif hasattr(rule, "search"): - m = rule.search(content) - if m: - if delete: - remove(lastDownload) - self.lastCheck = m - return name - - - def getPassword(self): - """ get the password the user provided in the package""" - password = self.pyfile.package().password - if not password: return "" - return password - - - def checkForSameFiles(self, starting=False): - """ checks if same file was/is downloaded within same package - - :param starting: indicates that the current download is going to start - :raises Skip: - """ - - pack = self.pyfile.package() - - for pyfile in self.core.files.cache.values(): - if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: - if pyfile.status in (0, 12): #finished or downloading - self.skip(pyfile.pluginname) - elif pyfile.status in ( - 5, 7) and starting: #a download is waiting/starting and was appenrently started before - self.skip(pyfile.pluginname) - - download_folder = self.core.config['general']['download_folder'] - location = fs_join(download_folder, pack.folder, self.pyfile.name) - - if starting and self.core.config['download']['skip_existing'] and exists(location): - size = os.stat(location).st_size - if size >= self.pyfile.size: - self.skip("File exists") - - pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) - if pyfile: - if exists(location): - self.skip(pyfile[0]) - - self.core.log.debug("File %s not skipped, because it does not exists" % self.pyfile.name) - - def clean(self): - """ clean everything and remove references """ - if hasattr(self, "pyfile"): - del self.pyfile - if hasattr(self, "req"): - self.req.close() - del self.req - if hasattr(self, "thread"): - del self.thread - if hasattr(self, "html"): - del self.html + return res
\ No newline at end of file diff --git a/module/plugins/internal/ReCaptcha.py b/module/plugins/internal/ReCaptcha.py index a9d0f3752..40faff5f0 100644 --- a/module/plugins/internal/ReCaptcha.py +++ b/module/plugins/internal/ReCaptcha.py @@ -51,7 +51,7 @@ class ReCaptcha(Captcha): def _challenge_v1(self, key): - html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", + html = self.plugin.load("http://www.google.com/recaptcha/api/challenge", get={'k': key}) try: challenge = re.search("challenge : '(.+?)',", html).group(1) @@ -66,8 +66,8 @@ class ReCaptcha(Captcha): def result(self, server, challenge, key): - self.plugin.req.load("http://www.google.com/recaptcha/api/js/recaptcha.js") - html = self.plugin.req.load("http://www.google.com/recaptcha/api/reload", + self.plugin.load("http://www.google.com/recaptcha/api/js/recaptcha.js") + html = self.plugin.load("http://www.google.com/recaptcha/api/reload", get={'c' : challenge, 'k' : key, 'reason': "i", @@ -92,7 +92,7 @@ class ReCaptcha(Captcha): def _collectApiInfo(self): - html = self.plugin.req.load("http://www.google.com/recaptcha/api.js") + html = self.plugin.load("http://www.google.com/recaptcha/api.js") a = re.search(r'po.src = \'(.*?)\';', html).group(1) vers = a.split("/")[5] @@ -102,7 +102,7 @@ class ReCaptcha(Captcha): self.logDebug("API language: %s" % language) - html = self.plugin.req.load("https://apis.google.com/js/api.js") + html = self.plugin.load("https://apis.google.com/js/api.js") b = re.search(r'"h":"(.*?)","', html).group(1) jsh = b.decode('unicode-escape') @@ -112,7 +112,7 @@ class ReCaptcha(Captcha): def _prepareTimeAndRpc(self): - self.plugin.req.load("http://www.google.com/recaptcha/api2/demo") + self.plugin.load("http://www.google.com/recaptcha/api2/demo") millis = int(round(time.time() * 1000)) @@ -139,7 +139,7 @@ class ReCaptcha(Captcha): vers, language, jsh = self._collectApiInfo() millis, rpc = self._prepareTimeAndRpc() - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/anchor", + html = self.plugin.load("https://www.google.com/recaptcha/api2/anchor", get={'k' : key, 'hl' : language, 'v' : vers, @@ -152,14 +152,15 @@ class ReCaptcha(Captcha): token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html) self.logDebug("Token #1: %s" % token1.group(1)) - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/frame", - get={'c' : token1.group(1), - 'hl' : language, - 'v' : vers, - 'bg' : botguardstring, - 'k' : key, - 'usegapi': "1", - 'jsh' : jsh}).decode('unicode-escape') + html = self.plugin.load("https://www.google.com/recaptcha/api2/frame", + get={'c' : token1.group(1), + 'hl' : language, + 'v' : vers, + 'bg' : botguardstring, + 'k' : key, + 'usegapi': "1", + 'jsh' : jsh}, + decode="unicode-escape") token2 = re.search(r'"finput","(.*?)",', html) self.logDebug("Token #2: %s" % token2.group(1)) @@ -179,7 +180,7 @@ class ReCaptcha(Captcha): timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500) - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/userverify", + html = self.plugin.load("https://www.google.com/recaptcha/api2/userverify", post={'k' : key, 'c' : token3.group(1), 'response': response, diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index 3e8b89f79..db253d950 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -54,7 +54,7 @@ class SimpleCrypter(Crypter, SimpleHoster): #@TODO: Remove in 0.4.10 def init(self): account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") - account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) + account = self.core.accountManager.getAccountPlugin(account_name) if account and account.canUse(): self.user, data = account.selectAccount() @@ -68,9 +68,9 @@ class SimpleCrypter(Crypter, SimpleHoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} - self.html = "" - self.link = "" #@TODO: Move to Hoster in 0.4.10 - self.links = [] #@TODO: Move to Hoster in 0.4.10 + self.html = "" #@TODO: Recheck in 0.4.10 + self.link = "" #@TODO: Recheck in 0.4.10 + self.links = [] if self.LOGIN_PREMIUM and not self.premium: self.fail(_("Required premium account not found")) @@ -87,11 +87,11 @@ class SimpleCrypter(Crypter, SimpleHoster): def handleDirect(self, pyfile): - for i in xrange(10): #@TODO: Use `pycurl.MAXREDIRS` value in 0.4.10 + for i in xrange(self.getConfig("maxredirs", plugin="UserAgentSwitcher")): redirect = self.link or pyfile.url self.logDebug("Redirect #%d to: %s" % (i, redirect)) - header = self.load(redirect, just_header=True, decode=True) + header = self.load(redirect, just_header=True) if 'location' in header and header['location']: self.link = header['location'] else: @@ -107,13 +107,13 @@ class SimpleCrypter(Crypter, SimpleHoster): self.handleDirect(pyfile) if self.link: - self.urls = self.fixurls([self.link]) + self.urls = [self.link] else: self.preload() self.checkInfo() - self.links = self.fixurls(self.getLinks()) or list() + self.links = self.getLinks() or list() if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): self.handlePages(pyfile) @@ -123,9 +123,6 @@ class SimpleCrypter(Crypter, SimpleHoster): if self.links: self.packages = [(self.info['name'], self.links, self.info['folder'])] - elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 - self.fail(_("No link grabbed")) - def checkNameSize(self, getinfo=True): if not self.info or getinfo: diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index cec5f55f1..979da24a9 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -11,12 +11,11 @@ import urllib import urlparse from module.PyFile import statusMap as _statusMap -from module.network.CookieJar import CookieJar from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL from module.plugins.internal.Hoster import Hoster -from module.plugins.internal.Plugin import Fail, Retry -from module.utils import fixup, fs_encode, html_unescape, parseFileSize +from module.plugins.internal.Plugin import Fail, Retry, replace_patterns, set_cookies +from module.utils import fixup, fs_encode, parseFileSize #@TODO: Adapt and move to PyFile in 0.4.10 @@ -24,73 +23,6 @@ statusMap = dict((v, k) for k, v in _statusMap.iteritems()) #@TODO: Remove in 0.4.10 -def _wait(self, seconds, reconnect): - if seconds: - self.setWait(int(seconds) + 1) - - if reconnect is not None: - self.wantReconnect = reconnect - - super(SimpleHoster, self).wait() - - -def replace_patterns(string, ruleslist): - for r in ruleslist: - rf, rt = r - string = re.sub(rf, rt, string) - return string - - -def set_cookies(cj, cookies): - for cookie in cookies: - if isinstance(cookie, tuple) and len(cookie) == 3: - domain, name, value = cookie - cj.setCookie(domain, name, value) - - -def parseHtmlTagAttrValue(attr_name, tag): - m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) - return m.group(2) if m else None - - -def parseHtmlForm(attr_str, html, input_names={}): - for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, - html, re.S | re.I): - inputs = {} - action = parseHtmlTagAttrValue("action", form.group('TAG')) - - for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): - name = parseHtmlTagAttrValue("name", inputtag.group(1)) - if name: - value = parseHtmlTagAttrValue("value", inputtag.group(1)) - if not value: - inputs[name] = inputtag.group(3) or "" - else: - inputs[name] = value - - if input_names: - # check input attributes - for key, val in input_names.iteritems(): - if key in inputs: - if isinstance(val, basestring) and inputs[key] == val: - continue - elif isinstance(val, tuple) and inputs[key] in val: - continue - elif hasattr(val, "search") and re.match(val, inputs[key]): - continue - break #: attibute value does not match - else: - break #: attibute name does not match - else: - return action, inputs #: passed attribute check - else: - # no attribute check - return action, inputs - - return {}, None #: no matching form found - - -#@TODO: Remove in 0.4.10 def parseFileInfo(plugin, url="", html=""): if hasattr(plugin, "getInfo"): info = plugin.getInfo(url, html) @@ -123,91 +55,6 @@ def timestamp(): return int(time.time() * 1000) -#@TODO: Move to Hoster in 0.4.10 -def getFileURL(self, url, follow_location=None): - link = "" - redirect = 1 - - if type(follow_location) is int: - redirect = max(follow_location, 1) - else: - redirect = 10 - - for i in xrange(redirect): - try: - self.logDebug("Redirect #%d to: %s" % (i, url)) - header = self.load(url, just_header=True, decode=True) - - except Exception: #: Bad bad bad... rewrite this part in 0.4.10 - req = pyreq.getHTTPRequest() - res = req.load(url, just_header=True, decode=True) - - req.close() - - header = {"code": req.code} - for line in res.splitlines(): - line = line.strip() - if not line or ":" not in line: - continue - - key, none, value = line.partition(":") - key = key.lower().strip() - value = value.strip() - - if key in header: - if type(header[key]) == list: - header[key].append(value) - else: - header[key] = [header[key], value] - else: - header[key] = value - - if 'content-disposition' in header: - link = url - - elif 'location' in header and header['location']: - location = header['location'] - - if not urlparse.urlparse(location).scheme: - url_p = urlparse.urlparse(url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - location = urlparse.urljoin(baseurl, location) - - if 'code' in header and header['code'] == 302: - link = location - - if follow_location: - url = location - continue - - else: - extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] - - if 'content-type' in header and header['content-type']: - mimetype = header['content-type'].split(';')[0].strip() - - elif extension: - mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" - - else: - mimetype = "" - - if mimetype and (link or 'html' not in mimetype): - link = url - else: - link = "" - - break - - else: - try: - self.logError(_("Too many redirects")) - except Exception: - pass - - return link - - def secondsToMidnight(gmt=0): now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) @@ -313,14 +160,11 @@ class SimpleHoster(Hoster): LOGIN_ACCOUNT = False #: Set to True to require account login LOGIN_PREMIUM = False #: Set to True to require premium account login MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + TEXT_ENCODING = True #: Set to encoding name if encoding value in http header is not correct LINK_PATTERN = None - directLink = getFileURL #@TODO: Remove in 0.4.10 - - @classmethod def apiInfo(cls, url): url = urllib.unquote(url) @@ -351,10 +195,7 @@ class SimpleHoster(Hoster): elif info['status'] is 3: try: - html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) - - if isinstance(cls.TEXT_ENCODING, basestring): - html = unicode(html, cls.TEXT_ENCODING) + html = getURL(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) except BadHeader, e: info['error'] = "%d: %s" % (e.code, e.content) @@ -424,10 +265,10 @@ class SimpleHoster(Hoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} - self.html = "" - self.link = "" #@TODO: Move to Hoster in 0.4.10 - self.directDL = False #@TODO: Move to Hoster in 0.4.10 - self.multihost = False #@TODO: Move to Hoster in 0.4.10 + self.html = "" #@TODO: Recheck in 0.4.10 + self.link = "" #@TODO: Recheck in 0.4.10 + self.directDL = False + self.multihost = False if not self.getConfig('use_premium', True): self.retryFree() @@ -465,10 +306,10 @@ class SimpleHoster(Hoster): def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), ref=False, decode=not self.TEXT_ENCODING) - - if isinstance(self.TEXT_ENCODING, basestring): - self.html = unicode(self.html, self.TEXT_ENCODING) + self.html = self.load(self.pyfile.url, + cookies=bool(self.COOKIES), + ref=False, + decode=self.TEXT_ENCODING) def process(self, pyfile): @@ -517,29 +358,6 @@ class SimpleHoster(Hoster): raise Fail(err) - def fixurl(self, url): - return self.fixurls([url])[0] - - - def fixurls(self, urls): - url_p = urlparse.urlparse(self.pyfile.url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - - urls = (html_unescape(url.decode('unicode-escape').strip()) for url in urls) - - return [urlparse.urljoin(baseurl, url) if not urlparse.urlparse(url).scheme else url \ - for url in urls] - - - def download(self, url, *args, **kwargs): - if not url or not isinstance(url, basestring): - return - - self.correctCaptcha() - - return super(SimpleHoster, self).download(self.fixurl(url), *args, **kwargs) - - def checkFile(self): lastDownload = fs_encode(self.lastDownload) @@ -547,15 +365,10 @@ class SimpleHoster(Hoster): self.invalidCaptcha() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not os.path.exists(lastDownload): - self.lastDownload = "" - self.error(self.pyfile.error or _("No file downloaded")) + elif self.checkDownload({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}, file_size=self.info['size']): + self.error(_("Empty file")) else: - #@TODO: Move to Hoster in 0.4.10 - if os.stat(lastDownload).st_size < 1 or self.checkDownload({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}): - self.error(_("Empty file")) - self.logDebug("Checking last downloaded file with built-in rules") for r, p in self.FILE_ERRORS: errmsg = self.checkDownload({r: re.compile(p)}) @@ -577,7 +390,7 @@ class SimpleHoster(Hoster): self.html = f.read(50000) #@TODO: Recheck in 0.4.10 self.checkErrors() - self.logDebug("No file errors found") + self.logDebug("No file errors found") def checkErrors(self): @@ -790,50 +603,10 @@ class SimpleHoster(Hoster): self.link = m.group(1) - def longWait(self, wait_time=None, max_tries=3): - if wait_time and isinstance(wait_time, (int, long, float)): - time_str = "%dh %dm" % divmod(wait_time / 60, 60) - else: - wait_time = 900 - time_str = _("(unknown time)") - max_tries = 100 - - self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - - self.wait(wait_time, True) - self.retry(max_tries=max_tries, reason=_("Download limit reached")) - - - def parseHtmlForm(self, attr_str="", input_names={}): - return parseHtmlForm(attr_str, self.html, input_names) - - - def checkTrafficLeft(self): - if not self.account: - return True - - traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] - - if traffic is None: - return False - elif traffic == -1: - return True - else: - size = self.pyfile.size / 1024 - self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) - return size <= traffic - - def retryFree(self): if not self.premium: return self.premium = False self.account = None - self.req = self.core.requestFactory.getRequest(self.__name__) - self.retries = -1 + self.req = self.core.requestFactory.getRequest(self.__name__) raise Retry(_("Fallback to free download")) - - - #@TODO: Remove in 0.4.10 - def wait(self, seconds=0, reconnect=None): - return _wait(self, seconds, reconnect) diff --git a/module/plugins/internal/SolveMedia.py b/module/plugins/internal/SolveMedia.py index d600ef03c..dde6223ed 100644 --- a/module/plugins/internal/SolveMedia.py +++ b/module/plugins/internal/SolveMedia.py @@ -35,7 +35,7 @@ class SolveMedia(Captcha): def challenge(self, key=None, html=None): key = key or self.retrieve_key(html) - html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", + html = self.plugin.load("http://api.solvemedia.com/papi/challenge.noscript", get={'k': key}) for i in xrange(1, 11): @@ -64,7 +64,7 @@ class SolveMedia(Captcha): self.plugin.invalidCaptcha() result = None - html = self.plugin.req.load("http://api.solvemedia.com/papi/verify.noscript", + html = self.plugin.load("http://api.solvemedia.com/papi/verify.noscript", post={'adcopy_response' : result, 'k' : key, 'l' : "en", @@ -83,7 +83,7 @@ class SolveMedia(Captcha): if "error" in html: self.logWarning("Captcha code was invalid") self.logDebug("Retry #%d" % i) - html = self.plugin.req.load(redirect) + html = self.plugin.load(redirect) else: break diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py index f230cedf3..c26a91775 100644 --- a/module/plugins/internal/XFSAccount.py +++ b/module/plugins/internal/XFSAccount.py @@ -5,7 +5,7 @@ import time import urlparse from module.plugins.internal.Account import Account -from module.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies +from module.plugins.internal.Plugin import parseHtmlForm, set_cookies class XFSAccount(Account): @@ -38,11 +38,6 @@ class XFSAccount(Account): LOGIN_FAIL_PATTERN = r'Incorrect Login or Password|account was banned|Error<' - def __init__(self, manager, accounts): #@TODO: remove in 0.4.10 - self.init() - return super(XFSAccount, self).__init__(manager, accounts) - - def init(self): if not self.HOSTER_DOMAIN: self.logError(_("Missing HOSTER_DOMAIN")) @@ -69,7 +64,7 @@ class XFSAccount(Account): 'leechtraffic': leechtraffic, 'premium' : premium} - html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) + html = self.load(self.HOSTER_URL, get={'op': "my_account"}, req=req) premium = True if re.search(self.PREMIUM_PATTERN, html) else False @@ -160,7 +155,7 @@ class XFSAccount(Account): if not self.LOGIN_URL: self.LOGIN_URL = urlparse.urljoin(self.HOSTER_URL, "login.html") - html = req.load(self.LOGIN_URL, decode=True) + html = self.load(self.LOGIN_URL, req=req) action, inputs = parseHtmlForm('name="FL"', html) if not inputs: @@ -175,7 +170,7 @@ class XFSAccount(Account): else: url = self.HOSTER_URL - html = req.load(url, post=inputs, decode=True) + html = self.load(url, post=inputs, req=req) if re.search(self.LOGIN_FAIL_PATTERN, html): self.wrongPassword() diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py index 84317c615..0f5bfd5d7 100644 --- a/module/plugins/internal/XFSCrypter.py +++ b/module/plugins/internal/XFSCrypter.py @@ -32,7 +32,7 @@ class XFSCrypter(SimpleCrypter): account = self.account else: account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") - account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) + account = self.core.accountManager.getAccountPlugin(account_name) if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: self.HOSTER_DOMAIN = account.HOSTER_DOMAIN diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index b2642bf1a..fbce5e2fb 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -61,7 +61,7 @@ class XFSHoster(SimpleHoster): if self.account: account = self.account else: - account = self.pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + account = self.core.accountManager.getAccountPlugin(self.__name__) if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: self.HOSTER_DOMAIN = account.HOSTER_DOMAIN @@ -95,7 +95,7 @@ class XFSHoster(SimpleHoster): self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.html = self.load(pyfile.url, post=data, decode=True) + self.html = self.load(pyfile.url, post=data) self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 1) @@ -110,7 +110,7 @@ class XFSHoster(SimpleHoster): self.logError(data['op'] if 'op' in data else _("UNKNOWN")) return "" - self.link = m.group(1).strip() #@TODO: Remove `.strip()` in 0.4.10 + self.link = m.group(1) def handlePremium(self, pyfile): @@ -166,7 +166,7 @@ class XFSHoster(SimpleHoster): if m is None: self.error(_("LINK_LEECH_PATTERN not found")) - header = self.load(m.group(1), just_header=True, decode=True) + header = self.load(m.group(1), just_header=True) if 'location' in header: #: Direct download link self.link = header['location'] |