diff options
Diffstat (limited to 'module/plugins/internal')
26 files changed, 2829 insertions, 1177 deletions
diff --git a/module/plugins/internal/Account.py b/module/plugins/internal/Account.py new file mode 100644 index 000000000..7632d15c2 --- /dev/null +++ b/module/plugins/internal/Account.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay +""" + +from random import choice +from time import time +from traceback import print_exc +from threading import RLock + +from Plugin import Base +from module.utils import compare_time, parseFileSize, lock + +class WrongPassword(Exception): + pass + + +class Account(Base): + """ + Base class for every Account plugin. + Just overwrite `login` and cookies will be stored and account becomes accessible in\ + associated hoster plugin. Plugin should also provide `loadAccountInfo` + """ + __name__ = "Account" + __version__ = "0.02" + __type__ = "account" + __description__ = """Account Plugin""" + __author_name__ = ("mkaay") + __author_mail__ = ("mkaay@mkaay.de") + + #: after that time [in minutes] pyload will relogin the account + login_timeout = 600 + #: account data will be reloaded after this time + info_threshold = 600 + + + def __init__(self, manager, accounts): + Base.__init__(self, manager.core) + + self.manager = manager + self.accounts = {} + self.infos = {} # cache for account information + self.lock = RLock() + + self.timestamps = {} + self.setAccounts(accounts) + self.init() + + def init(self): + pass + + def login(self, user, data, req): + """login into account, the cookies will be saved so user can be recognized + + :param user: loginname + :param data: data dictionary + :param req: `Request` instance + """ + pass + + @lock + def _login(self, user, data): + # set timestamp for login + self.timestamps[user] = time() + + req = self.getAccountRequest(user) + try: + self.login(user, data, req) + except WrongPassword: + self.logWarning( + _("Could not login with account %(user)s | %(msg)s") % {"user": user + , "msg": _("Wrong Password")}) + data["valid"] = False + + except Exception, e: + self.logWarning( + _("Could not login with account %(user)s | %(msg)s") % {"user": user + , "msg": e}) + data["valid"] = False + if self.core.debug: + print_exc() + finally: + if req: req.close() + + def relogin(self, user): + req = self.getAccountRequest(user) + if req: + req.cj.clear() + req.close() + if user in self.infos: + del self.infos[user] #delete old information + + self._login(user, self.accounts[user]) + + def setAccounts(self, accounts): + self.accounts = accounts + for user, data in self.accounts.iteritems(): + self._login(user, data) + self.infos[user] = {} + + def updateAccounts(self, user, password=None, options={}): + """ updates account and return true if anything changed """ + + if user in self.accounts: + self.accounts[user]["valid"] = True #do not remove or accounts will not login + if password: + self.accounts[user]["password"] = password + self.relogin(user) + return True + if options: + before = self.accounts[user]["options"] + self.accounts[user]["options"].update(options) + return self.accounts[user]["options"] != before + else: + self.accounts[user] = {"password": password, "options": options, "valid": True} + self._login(user, self.accounts[user]) + return True + + def removeAccount(self, user): + if user in self.accounts: + del self.accounts[user] + if user in self.infos: + del self.infos[user] + if user in self.timestamps: + del self.timestamps[user] + + @lock + def getAccountInfo(self, name, force=False): + """retrieve account infos for an user, do **not** overwrite this method!\\ + just use it to retrieve infos in hoster plugins. see `loadAccountInfo` + + :param name: username + :param force: reloads cached account information + :return: dictionary with information + """ + data = Account.loadAccountInfo(self, name) + + if force or name not in self.infos: + self.logDebug("Get Account Info for %s" % name) + req = self.getAccountRequest(name) + + try: + infos = self.loadAccountInfo(name, req) + if not type(infos) == dict: + raise Exception("Wrong return format") + except Exception, e: + infos = {"error": str(e)} + + if req: req.close() + + self.logDebug("Account Info: %s" % str(infos)) + + infos["timestamp"] = time() + self.infos[name] = infos + elif "timestamp" in self.infos[name] and self.infos[name][ + "timestamp"] + self.info_threshold * 60 < time(): + self.logDebug("Reached timeout for account data") + self.scheduleRefresh(name) + + data.update(self.infos[name]) + return data + + def isPremium(self, user): + info = self.getAccountInfo(user) + return info["premium"] + + def loadAccountInfo(self, name, req=None): + """this should be overwritten in account plugin,\ + and retrieving account information for user + + :param name: + :param req: `Request` instance + :return: + """ + return { + "validuntil": None, # -1 for unlimited + "login": name, + #"password": self.accounts[name]["password"], #@XXX: security + "options": self.accounts[name]["options"], + "valid": self.accounts[name]["valid"], + "trafficleft": None, # in kb, -1 for unlimited + "maxtraffic": None, + "premium": True, #useful for free accounts + "timestamp": 0, #time this info was retrieved + "type": self.__name__, + } + + def getAllAccounts(self, force=False): + return [self.getAccountInfo(user, force) for user, data in self.accounts.iteritems()] + + def getAccountRequest(self, user=None): + if not user: + user, data = self.selectAccount() + if not user: + return None + + req = self.core.requestFactory.getRequest(self.__name__, user) + return req + + def getAccountCookies(self, user=None): + if not user: + user, data = self.selectAccount() + if not user: + return None + + cj = self.core.requestFactory.getCookieJar(self.__name__, user) + return cj + + def getAccountData(self, user): + return self.accounts[user] + + def selectAccount(self): + """ returns an valid account name and data""" + usable = [] + for user, data in self.accounts.iteritems(): + if not data["valid"]: continue + + if "time" in data["options"] and data["options"]["time"]: + time_data = "" + try: + time_data = data["options"]["time"][0] + start, end = time_data.split("-") + if not compare_time(start.split(":"), end.split(":")): + continue + except: + self.logWarning(_("Your Time %s has wrong format, use: 1:22-3:44") % time_data) + + if user in self.infos: + if "validuntil" in self.infos[user]: + if self.infos[user]["validuntil"] > 0 and time() > self.infos[user]["validuntil"]: + continue + if "trafficleft" in self.infos[user]: + if self.infos[user]["trafficleft"] == 0: + continue + + usable.append((user, data)) + + if not usable: return None, None + return choice(usable) + + def canUse(self): + return False if self.selectAccount() == (None, None) else True + + def parseTraffic(self, string): #returns kbyte + return parseFileSize(string) / 1024 + + def wrongPassword(self): + raise WrongPassword + + def empty(self, user): + if user in self.infos: + self.logWarning(_("Account %s has not enough traffic, checking again in 30min") % user) + + self.infos[user].update({"trafficleft": 0}) + self.scheduleRefresh(user, 30 * 60) + + def expired(self, user): + if user in self.infos: + self.logWarning(_("Account %s is expired, checking again in 1h") % user) + + self.infos[user].update({"validuntil": time() - 1}) + self.scheduleRefresh(user, 60 * 60) + + def scheduleRefresh(self, user, time=0, force=True): + """ add task to refresh account info to sheduler """ + self.logDebug("Scheduled Account refresh for %s in %s seconds." % (user, time)) + self.core.scheduler.addJob(time, self.getAccountInfo, [user, force]) + + @lock + def checkLogin(self, user): + """ checks if user is still logged in """ + if user in self.timestamps: + if self.timestamps[user] + self.login_timeout * 60 < time(): + self.logDebug("Reached login timeout for %s" % user) + self.relogin(user) + return False + + return True diff --git a/module/plugins/internal/AdYouLike.py b/module/plugins/internal/AdYouLike.py new file mode 100644 index 000000000..a9c194dda --- /dev/null +++ b/module/plugins/internal/AdYouLike.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +import re + +from module.common.json_layer import json_loads +from module.plugins.internal.Captcha import Captcha + + +class AdYouLike(Captcha): + __name__ = "AdYouLike" + __type__ = "captcha" + __version__ = "0.06" + + __description__ = """AdYouLike captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' + CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' + + + def detect_key(self, html=None): + html = html or self.retrieve_html() + + m = re.search(self.AYL_PATTERN, html) + n = re.search(self.CALLBACK_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) + self.logDebug("Ayl: %s | Callback: %s" % self.key) + return self.key #: key is the tuple(ayl, callback) + else: + self.logWarning("Ayl or callback pattern not found") + return None + + + def challenge(self, key=None, html=None): + ayl, callback = key or self.retrieve_key(html) + + # {"adyoulike":{"key":"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, + # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} + ayl = json_loads(ayl) + + html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", + get={'key' : ayl['adyoulike']['key'], + 'env' : ayl['all']['env'], + 'callback': callback}) + try: + challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) + + except AttributeError: + self.fail(_("AdYouLike challenge pattern not found")) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(ayl, challenge), challenge + + + def result(self, server, challenge): + # Adyoulike.g._jsonp_5579316662423138 + # ({"translations":{"fr":{"instructions_visual":"Recopiez « Soonnight » ci-dessous :"}}, + # "site_under":true,"clickable":true,"pixels":{"VIDEO_050":[],"DISPLAY":[],"VIDEO_000":[],"VIDEO_100":[], + # "VIDEO_025":[],"VIDEO_075":[]},"medium_type":"image/adyoulike", + # "iframes":{"big":"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" + # height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},"shares":{},"id":256, + # "token":"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1","formats":{"small":{"y":300,"x":0,"w":300,"h":60}, + # "big":{"y":0,"x":0,"w":300,"h":250},"hover":{"y":440,"x":0,"w":300,"h":60}}, + # "tid":"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) + + if isinstance(server, basestring): + server = json_loads(server) + + if isinstance(challenge, basestring): + challenge = json_loads(challenge) + + try: + instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] + result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() + + except AttributeError: + self.fail(_("AdYouLike result not found")) + + result = {'_ayl_captcha_engine' : "adyoulike", + '_ayl_env' : server['all']['env'], + '_ayl_tid' : challenge['tid'], + '_ayl_token_challenge': challenge['token'], + '_ayl_response' : response} + + self.logDebug("Result: %s" % result) + + return result diff --git a/module/plugins/internal/AdsCaptcha.py b/module/plugins/internal/AdsCaptcha.py new file mode 100644 index 000000000..9cab99151 --- /dev/null +++ b/module/plugins/internal/AdsCaptcha.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +import random +import re + +from module.plugins.internal.Captcha import Captcha + + +class AdsCaptcha(Captcha): + __name__ = "AdsCaptcha" + __type__ = "captcha" + __version__ = "0.09" + + __description__ = """AdsCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?CaptchaId=(\d+)' + PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)' + + + def detect_key(self, html=None): + html = html or self.retrieve_html() + + m = re.search(self.PUBLICKEY_PATTERN, html) + n = re.search(self.CAPTCHAID_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) #: key is the tuple(PublicKey, CaptchaId) + self.logDebug("Key: %s | ID: %s" % self.key) + return self.key + else: + self.logWarning("Key or id pattern not found") + return None + + + def challenge(self, key=None, html=None): + PublicKey, CaptchaId = key or self.retrieve_key(html) + + html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", + get={'CaptchaId': CaptchaId, + 'PublicKey': PublicKey}) + try: + challenge = re.search("challenge: '(.+?)',", html).group(1) + server = re.search("server: '(.+?)',", html).group(1) + + except AttributeError: + self.fail(_("AdsCaptcha challenge pattern not found")) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(server, challenge), challenge + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, + get={'cid': challenge, 'dummy': random.random()}, + cookies=True, + imgtype="jpg") + + self.logDebug("Result: %s" % result) + + return result diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py new file mode 100644 index 000000000..e5a44d750 --- /dev/null +++ b/module/plugins/internal/Captcha.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.Plugin import Base + + +#@TODO: Extend (new) Plugin class; remove all `html` args +class Captcha(Base): + __name__ = "Captcha" + __type__ = "captcha" + __version__ = "0.30" + + __description__ = """Base captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + key = None #: last key detected + + + def __init__(self, plugin): + self.plugin = plugin + super(Captcha, self).__init__(plugin.core) + + + #@TODO: Recheck in 0.4.10 + def fail(self, reason): + self.plugin.fail(reason) + raise AttributeError(reason) + + + #@TODO: Recheck in 0.4.10 + def retrieve_key(self, html): + if self.detect_key(html): + return self.key + else: + self.fail(_("%s key not found") % self.__name__) + + + #@TODO: Recheck in 0.4.10 + def retrieve_html(self): + if hasattr(self.plugin, "html") and self.plugin.html: + return self.plugin.html + else: + self.fail(_("%s html not found") % self.__name__) + + + def detect_key(self, html=None): + raise NotImplementedError + + + def challenge(self, key=None, html=None): + raise NotImplementedError + + + def result(self, server, challenge): + raise NotImplementedError diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py deleted file mode 100644 index b429fd6e0..000000000 --- a/module/plugins/internal/CaptchaService.py +++ /dev/null @@ -1,353 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from base64 import urlsafe_b64encode -from random import random - -from module.common.json_layer import json_loads - - -class CaptchaService: - __name__ = "CaptchaService" - __version__ = "0.16" - - __description__ = """Base captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - KEY_PATTERN = None - - key = None #: last key detected - - - def __init__(self, plugin): - self.plugin = plugin - - - def detect_key(self, html=None): - if not html: - if hasattr(self.plugin, "html") and self.plugin.html: - html = self.plugin.html - else: - errmsg = _("%s html not found") % self.__name__ - self.plugin.fail(errmsg) #@TODO: replace all plugin.fail(errmsg) with plugin.error(errmsg) in 0.4.10 - raise TypeError(errmsg) - - m = re.search(self.KEY_PATTERN, html) - if m: - self.key = m.group(1).strip() - self.plugin.logDebug("%s key: %s" % (self.__name__, self.key)) - return self.key - else: - self.plugin.logDebug("%s key not found" % self.__name__) - return None - - - def challenge(self, key=None): - raise NotImplementedError - - - def result(self, server, challenge): - raise NotImplementedError - - -class ReCaptcha(CaptchaService): - __name__ = "ReCaptcha" - __version__ = "0.09" - - __description__ = """ReCaptcha captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - KEY_PATTERN = r'(?:class="g-recaptcha"\s+data-sitekey="|recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=)([\w-]+)' - KEY_AJAX_PATTERN = r'Recaptcha\.create\s*\(\s*["\']([\w-]+)' - - - def detect_key(self, html=None): - if not html: - if hasattr(self.plugin, "html") and self.plugin.html: - html = self.plugin.html - else: - errmsg = _("ReCaptcha html not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - m = re.search(self.KEY_PATTERN, html) or re.search(self.KEY_AJAX_PATTERN, html) - if m: - self.key = m.group(1).strip() - self.plugin.logDebug("ReCaptcha key: %s" % self.key) - return self.key - else: - self.plugin.logDebug("ReCaptcha key not found") - return None - - - def challenge(self, key=None, userverify=False): - if not key: - if self.detect_key(): - key = self.key - else: - errmsg = _("ReCaptcha key not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={'k': key}) - try: - challenge = re.search("challenge : '(.+?)',", html).group(1) - server = re.search("server : '(.+?)',", html).group(1) - - except AttributeError: - errmsg = _("ReCaptcha challenge pattern not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - self.plugin.logDebug("ReCaptcha challenge: %s" % challenge) - - response = challenge, self.result(server, challenge) - - return self.userverify(*response) if userverify else response - - - def userverify(self, challenge, result): - response = self.plugin.req.load("https://www.google.com/recaptcha/api2/userverify", - post={'c' : challenge, - 'response': urlsafe_b64encode('{"response":"%s"}' % result)}) - try: - return re.search(r'"uvresp","(.+?)"', response).group(1) - - except AttributeError: - errmsg = _("ReCaptcha userverify response not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - - def result(self, server, challenge): - result = self.plugin.decryptCaptcha("%simage" % server, - get={'c': challenge}, - cookies=True, - forceUser=True, - imgtype="jpg") - - self.plugin.logDebug("ReCaptcha result: %s" % result) - - return result - - -class AdsCaptcha(CaptchaService): - __name__ = "AdsCaptcha" - __version__ = "0.06" - - __description__ = """AdsCaptcha captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*CaptchaId=(\d+)' - PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*PublicKey=([\w-]+)' - - - def detect_key(self, html=None): - if not html: - if hasattr(self.plugin, "html") and self.plugin.html: - html = self.plugin.html - else: - errmsg = _("AdsCaptcha html not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - m = re.search(self.PUBLICKEY_PATTERN, html) - n = re.search(self.CAPTCHAID_PATTERN, html) - if m and n: - self.key = (m.group(1).strip(), n.group(1).strip()) #: key is the tuple(PublicKey, CaptchaId) - self.plugin.logDebug("AdsCaptcha key|id: %s | %s" % self.key) - return self.key - else: - self.plugin.logDebug("AdsCaptcha key or id not found") - return None - - - def challenge(self, key=None): - if not key: - if self.detect_key(): - key = self.key - else: - errmsg = _("AdsCaptcha key not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - PublicKey, CaptchaId = key - - html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", get={'CaptchaId': CaptchaId, 'PublicKey': PublicKey}) - try: - challenge = re.search("challenge: '(.+?)',", html).group(1) - server = re.search("server: '(.+?)',", html).group(1) - - except AttributeError: - errmsg = _("AdsCaptcha challenge pattern not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - self.plugin.logDebug("AdsCaptcha challenge: %s" % challenge) - - return challenge, self.result(server, challenge) - - - def result(self, server, challenge): - result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, - get={'cid': challenge, 'dummy': random()}, - cookies=True, - imgtype="jpg") - - self.plugin.logDebug("AdsCaptcha result: %s" % result) - - return result - - -class SolveMedia(CaptchaService): - __name__ = "SolveMedia" - __version__ = "0.06" - - __description__ = """SolveMedia captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' - - - def challenge(self, key=None): - if not key: - if self.detect_key(): - key = self.key - else: - errmsg = _("SolveMedia key not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", get={'k': key}) - try: - challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="([^"]+)">', - html).group(1) - server = "http://api.solvemedia.com/papi/media" - - except AttributeError: - errmsg = _("SolveMedia challenge pattern not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - self.plugin.logDebug("SolveMedia challenge: %s" % challenge) - - return challenge, self.result(server, challenge) - - - def result(self, server, challenge): - result = self.plugin.decryptCaptcha(server, get={'c': challenge}, imgtype="gif") - - self.plugin.logDebug("SolveMedia result: %s" % result) - - return result - - -class AdYouLike(CaptchaService): - __name__ = "AdYouLike" - __version__ = "0.02" - - __description__ = """AdYouLike captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - - - AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' - CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' - - - def detect_key(self, html=None): - if not html: - if hasattr(self.plugin, "html") and self.plugin.html: - html = self.plugin.html - else: - errmsg = _("AdYouLike html not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - m = re.search(self.AYL_PATTERN, html) - n = re.search(self.CALLBACK_PATTERN, html) - if m and n: - self.key = (m.group(1).strip(), n.group(1).strip()) - self.plugin.logDebug("AdYouLike ayl|callback: %s | %s" % self.key) - return self.key #: key is the tuple(ayl, callback) - else: - self.plugin.logDebug("AdYouLike ayl or callback not found") - return None - - - def challenge(self, key=None): - if not key: - if self.detect_key(): - key = self.key - else: - errmsg = _("AdYouLike key not found") - self.plugin.fail(errmsg) - raise TypeError(errmsg) - - ayl, callback = key - - # {"adyoulike":{"key":"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, - # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} - ayl = json_loads(ayl) - - html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", - get={'key' : ayl['adyoulike']['key'], - 'env' : ayl['all']['env'], - 'callback': callback}) - try: - challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) - - except AttributeError: - errmsg = _("AdYouLike challenge pattern not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - self.plugin.logDebug("AdYouLike challenge: %s" % challenge) - - return self.result(ayl, challenge) - - - def result(self, server, challenge): - # Adyoulike.g._jsonp_5579316662423138 - # ({"translations":{"fr":{"instructions_visual":"Recopiez « Soonnight » ci-dessous :"}}, - # "site_under":true,"clickable":true,"pixels":{"VIDEO_050":[],"DISPLAY":[],"VIDEO_000":[],"VIDEO_100":[], - # "VIDEO_025":[],"VIDEO_075":[]},"medium_type":"image/adyoulike", - # "iframes":{"big":"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" - # height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},"shares":{},"id":256, - # "token":"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1","formats":{"small":{"y":300,"x":0,"w":300,"h":60}, - # "big":{"y":0,"x":0,"w":300,"h":250},"hover":{"y":440,"x":0,"w":300,"h":60}}, - # "tid":"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) - - if isinstance(server, basestring): - server = json_loads(server) - - if isinstance(challenge, basestring): - challenge = json_loads(challenge) - - try: - instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] - result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() - - except AttributeError: - errmsg = _("AdYouLike result not found") - self.plugin.fail(errmsg) - raise AttributeError(errmsg) - - result = {'_ayl_captcha_engine' : "adyoulike", - '_ayl_env' : server['all']['env'], - '_ayl_tid' : challenge['tid'], - '_ayl_token_challenge': challenge['token'], - '_ayl_response' : response} - - self.plugin.logDebug("AdYouLike result: %s" % result) - - return result diff --git a/module/plugins/internal/Container.py b/module/plugins/internal/Container.py new file mode 100644 index 000000000..43ca95e77 --- /dev/null +++ b/module/plugins/internal/Container.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay +""" + +from module.plugins.internal.Crypter import Crypter + +from os.path import join, exists, basename +from os import remove +import re + +class Container(Crypter): + __name__ = "Container" + __version__ = "0.02" + __pattern__ = None + __type__ = "container" + __description__ = """Base container plugin""" + __author_name__ = ("mkaay") + __author_mail__ = ("mkaay@mkaay.de") + + + def preprocessing(self, thread): + """prepare""" + + self.setup() + self.thread = thread + + self.loadToDisk() + + self.decrypt(self.pyfile) + self.deleteTmp() + + self.createPackages() + + + def loadToDisk(self): + """loads container to disk if its stored remotely and overwrite url, + or check existent on several places at disk""" + + if self.pyfile.url.startswith("http"): + self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] + content = self.load(self.pyfile.url) + self.pyfile.url = join(self.config["general"]["download_folder"], self.pyfile.name) + f = open(self.pyfile.url, "wb" ) + f.write(content) + f.close() + + else: + self.pyfile.name = basename(self.pyfile.url) + if not exists(self.pyfile.url): + if exists(join(pypath, self.pyfile.url)): + self.pyfile.url = join(pypath, self.pyfile.url) + else: + self.fail(_("File not exists.")) + + + def deleteTmp(self): + if self.pyfile.name.startswith("tmp_"): + remove(self.pyfile.url) + + diff --git a/module/plugins/internal/Crypter.py b/module/plugins/internal/Crypter.py new file mode 100644 index 000000000..b87ed1882 --- /dev/null +++ b/module/plugins/internal/Crypter.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay +""" + +from module.plugins.internal.Plugin import Plugin + +class Crypter(Plugin): + __name__ = "Crypter" + __version__ = "0.02" + __pattern__ = None + __type__ = "container" + __description__ = """Base crypter plugin""" + __author_name__ = ("mkaay") + __author_mail__ = ("mkaay@mkaay.de") + + def __init__(self, pyfile): + Plugin.__init__(self, pyfile) + + #: Put all packages here. It's a list of tuples like: ( name, [list of links], folder ) + self.packages = [] + + #: List of urls, pyLoad will generate packagenames + self.urls = [] + + self.multiDL = True + self.limitDL = 0 + + + def preprocessing(self, thread): + """prepare""" + self.setup() + self.thread = thread + + self.decrypt(self.pyfile) + + self.createPackages() + + + def decrypt(self, pyfile): + raise NotImplementedError + + def createPackages(self): + """ create new packages from self.packages """ + for pack in self.packages: + + self.log.debug("Parsed package %(name)s with %(len)d links" % { "name" : pack[0], "len" : len(pack[1]) } ) + + links = [x.decode("utf-8") for x in pack[1]] + + pid = self.core.api.addPackage(pack[0], links, self.pyfile.package().queue) + + if self.pyfile.package().password: + self.core.api.setPackageData(pid, {"password": self.pyfile.package().password}) + + if self.urls: + self.core.api.generateAndAddPackages(self.urls) + diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py index 0fa23eef3..ef0d12b91 100644 --- a/module/plugins/internal/DeadCrypter.py +++ b/module/plugins/internal/DeadCrypter.py @@ -1,27 +1,24 @@ # -*- coding: utf-8 -*- -from urllib import unquote -from urlparse import urlparse - +from module.plugins.internal.Crypter import Crypter from module.plugins.internal.SimpleCrypter import create_getInfo -from module.plugins.Crypter import Crypter as _Crypter -class DeadCrypter(_Crypter): +class DeadCrypter(Crypter): __name__ = "DeadCrypter" __type__ = "crypter" - __version__ = "0.04" + __version__ = "0.06" __pattern__ = r'^unmatchable$' - __description__ = """ Crypter is no longer available """ + __description__ = """Crypter is no longer available""" __license__ = "GPLv3" __authors__ = [("stickell", "l.stickell@yahoo.it")] @classmethod - def apiInfo(cls, url="", get={}, post={}): - api = super(DeadCrypter, self).apiInfo(url, get, post) + def apiInfo(cls, *args, **kwargs): + api = super(DeadCrypter, cls).apiInfo(*args, **kwargs) api['status'] = 1 return api diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py index cc7adf4df..accb15a78 100644 --- a/module/plugins/internal/DeadHoster.py +++ b/module/plugins/internal/DeadHoster.py @@ -1,27 +1,24 @@ # -*- coding: utf-8 -*- -from urllib import unquote -from urlparse import urlparse - +from module.plugins.internal.Hoster import Hoster from module.plugins.internal.SimpleHoster import create_getInfo -from module.plugins.Hoster import Hoster as _Hoster -class DeadHoster(_Hoster): +class DeadHoster(Hoster): __name__ = "DeadHoster" __type__ = "hoster" - __version__ = "0.14" + __version__ = "0.16" __pattern__ = r'^unmatchable$' - __description__ = """ Hoster is no longer available """ + __description__ = """Hoster is no longer available""" __license__ = "GPLv3" __authors__ = [("zoidberg", "zoidberg@mujmail.cz")] @classmethod - def apiInfo(cls, url="", get={}, post={}): - api = super(DeadHoster, self).apiInfo(url, get, post) + def apiInfo(cls, *args, **kwargs): + api = super(DeadHoster, cls).apiInfo(*args, **kwargs) api['status'] = 1 return api diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 0b2462dac..1a98060d9 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -1,5 +1,11 @@ # -*- coding: utf-8 -*- +import os +import re + +from module.PyFile import PyFile + + class ArchiveError(Exception): pass @@ -14,28 +20,36 @@ class PasswordError(Exception): class Extractor: __name__ = "Extractor" - __version__ = "0.13" + __version__ = "0.24" __description__ = """Base extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "ranan@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com"), + ("Immenz" , "immenz@gmx.net" )] EXTENSIONS = [] + REPAIR = False + VERSION = "" @classmethod - def checkDeps(cls): - """ Check if system statisfy dependencies - :return: boolean - """ - return True + def isArchive(cls, filename): + name = os.path.basename(filename).lower() + return any(name.endswith(ext) for ext in cls.EXTENSIONS) @classmethod - def isArchive(cls, file): - raise NotImplementedError + def isMultipart(cls, filename): + return False + + + @classmethod + def isUsable(cls): + """ Check if system statisfy dependencies + :return: boolean + """ + return None @classmethod @@ -45,28 +59,29 @@ class Extractor: :return: List of targets, id tuple list """ targets = [] - - for file, id in files_ids: - if cls.isArchive(file): - targets.append((file, id)) - + processed = [] + + for fname, id, fout in files_ids: + if cls.isArchive(fname): + pname = re.sub(cls.re_multipart, '', fname) if cls.isMultipart(fname) else os.path.splitext(fname)[0] + if pname not in processed: + processed.append(pname) + targets.append((fname, id, fout)) return targets - def __init__(self, m, file, out, password, fullpath, overwrite, excludefiles, renice, delete, keepbroken): - """Initialize extractor for specific file - - :param m: ExtractArchive Hook plugin - :param file: Absolute filepath - :param out: Absolute path to destination directory - :param fullpath: extract to fullpath - :param overwrite: Overwrite existing archives - :param renice: Renice value - """ - self.m = m - self.file = file + def __init__(self, manager, filename, out, + fullpath=True, + overwrite=False, + excludefiles=[], + renice=0, + delete='No', + keepbroken=False, + fid=None): + """ Initialize extractor for specific file """ + self.manager = manager + self.filename = filename self.out = out - self.password = password self.fullpath = fullpath self.overwrite = overwrite self.excludefiles = excludefiles @@ -75,50 +90,45 @@ class Extractor: self.keepbroken = keepbroken self.files = [] #: Store extracted files here + pyfile = self.manager.core.files.getFile(fid) if fid else None + self.notifyProgress = lambda x: pyfile.setProgress(x) if pyfile else lambda x: None + def init(self): """ Initialize additional data structures """ pass - def verify(self): - """Check if password if needed. Raise ArchiveError if integrity is - questionable. + def check(self): + """Quick Check by listing content of archive. + Raises error if password is needed, integrity is questionable or else. + :raises PasswordError + :raises CRCError :raises ArchiveError """ - pass - + raise NotImplementedError - def isPassword(self, password): - """ Check if the given password is/might be correct. - If it can not be decided at this point return true. + def verify(self): + """Testing with Extractors buildt-in method + Raises error if password is needed, integrity is questionable or else. - :param password: - :return: boolean + :raises PasswordError + :raises CRCError + :raises ArchiveError """ - if isinstance(password, basestring): - return True - else: - return False - - - def setPassword(self, password): - if self.isPassword(password): - self.password = password - return True - else: - return False + raise NotImplementedError def repair(self): - return False + return None - def extract(self, progress=lambda x: None): + def extract(self, password=None): """Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status + :param password password to use :raises PasswordError :raises CRCError :raises ArchiveError @@ -132,9 +142,9 @@ class Extractor: :return: List with paths of files to delete """ - raise NotImplementedError + return [self.filename] - def getExtractedFiles(self): + def list(self, password=None): """Populate self.files at some point while extracting""" return self.files diff --git a/module/plugins/internal/Hook.py b/module/plugins/internal/Hook.py new file mode 100644 index 000000000..5f2639dce --- /dev/null +++ b/module/plugins/internal/Hook.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay + @interface-version: 0.2 +""" + +from traceback import print_exc + +from Plugin import Base + +class Expose(object): + """ used for decoration to declare rpc services """ + + def __new__(cls, f, *args, **kwargs): + hookManager.addRPC(f.__module__, f.func_name, f.func_doc) + return f + +def threaded(f): + def run(*args,**kwargs): + hookManager.startThread(f, *args, **kwargs) + return run + +class Hook(Base): + """ + Base class for hook plugins. + """ + __name__ = "Hook" + __version__ = "0.02" + __type__ = "hook" + __threaded__ = [] + __config__ = [ ("name", "type", "desc" , "default") ] + __description__ = """interface for hook""" + __author_name__ = ("mkaay", "RaNaN") + __author_mail__ = ("mkaay@mkaay.de", "RaNaN@pyload.org") + + #: automatically register event listeners for functions, attribute will be deleted dont use it yourself + event_map = None + + # Alternative to event_map + #: List of events the plugin can handle, name the functions exactly like eventname. + event_list = None # dont make duplicate entries in event_map + + + #: periodic call interval in secondc + interval = 60 + + def __init__(self, core, manager): + Base.__init__(self, core) + + #: Provide information in dict here, usable by API `getInfo` + self.info = None + + #: Callback of periodical job task, used by hookmanager + self.cb = None + + #: `HookManager` + self.manager = manager + + #register events + if self.event_map: + for event, funcs in self.event_map.iteritems(): + if type(funcs) in (list, tuple): + for f in funcs: + self.manager.addEvent(event, getattr(self,f)) + else: + self.manager.addEvent(event, getattr(self,funcs)) + + #delete for various reasons + self.event_map = None + + if self.event_list: + for f in self.event_list: + self.manager.addEvent(f, getattr(self,f)) + + self.event_list = None + + self.initPeriodical() + self.setup() + + def initPeriodical(self): + if self.interval >=1: + self.cb = self.core.scheduler.addJob(0, self._periodical, threaded=False) + + def _periodical(self): + try: + if self.isActivated(): self.periodical() + except Exception, e: + self.core.log.error(_("Error executing hooks: %s") % str(e)) + if self.core.debug: + print_exc() + + self.cb = self.core.scheduler.addJob(self.interval, self._periodical, threaded=False) + + + def __repr__(self): + return "<Hook %s>" % self.__name__ + + def setup(self): + """ more init stuff if needed """ + pass + + def unload(self): + """ called when hook was deactivated """ + pass + + def isActivated(self): + """ checks if hook is activated""" + return self.config.getPlugin(self.__name__, "activated") + + + #event methods - overwrite these if needed + def coreReady(self): + pass + + def coreExiting(self): + pass + + def downloadPreparing(self, pyfile): + pass + + def downloadFinished(self, pyfile): + pass + + def downloadFailed(self, pyfile): + pass + + def packageFinished(self, pypack): + pass + + def beforeReconnecting(self, ip): + pass + + def afterReconnecting(self, ip): + pass + + def periodical(self): + pass + + def newCaptchaTask(self, task): + """ new captcha task for the plugin, it MUST set the handler and timeout or will be ignored """ + pass + + def captchaCorrect(self, task): + pass + + def captchaInvalid(self, task): + pass
\ No newline at end of file diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py new file mode 100644 index 000000000..3fed8a7c6 --- /dev/null +++ b/module/plugins/internal/Hoster.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay +""" + +from module.plugins.internal.Plugin import Plugin + +def getInfo(self): + #result = [ .. (name, size, status, url) .. ] + return + +class Hoster(Plugin): + __name__ = "Hoster" + __version__ = "0.02" + __pattern__ = None + __type__ = "hoster" + __description__ = """Base hoster plugin""" + __author_name__ = ("mkaay") + __author_mail__ = ("mkaay@mkaay.de") diff --git a/module/plugins/internal/MultiHook.py b/module/plugins/internal/MultiHook.py index a3b266679..4a2c6654b 100644 --- a/module/plugins/internal/MultiHook.py +++ b/module/plugins/internal/MultiHook.py @@ -1,38 +1,32 @@ # -*- coding: utf-8 -*- import re +import time +import traceback -from time import sleep - -from module.plugins.Hook import Hook +from module.plugins.internal.Hook import Hook from module.utils import decode, remove_chars class MultiHook(Hook): __name__ = "MultiHook" __type__ = "hook" - __version__ = "0.35" + __version__ = "0.46" - __config__ = [("pluginmode" , "all;listed;unlisted", "Use for plugins" , "all"), - ("pluginlist" , "str" , "Plugin list (comma separated)" , "" ), - ("revertfailed" , "bool" , "Revert to standard download if fails", True ), - ("retry" , "int" , "Number of retries before revert" , 10 ), - ("retryinterval" , "int" , "Retry interval in minutes" , 1 ), - ("reload" , "bool" , "Reload plugin list" , True ), - ("reloadinterval", "int" , "Reload interval in hours" , 12 )] + __config__ = [("pluginmode" , "all;listed;unlisted", "Use for plugins" , "all"), + ("pluginlist" , "str" , "Plugin list (comma separated)", "" ), + ("reload" , "bool" , "Reload plugin list" , True ), + ("reloadinterval", "int" , "Reload interval in hours" , 12 )] __description__ = """Hook plugin for multi hoster/crypter""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org"), + __authors__ = [("pyLoad Team" , "admin@pyload.org" ), ("Walter Purcaro", "vuolter@gmail.com")] - MIN_INTERVAL = 1 * 60 * 60 + MIN_RELOAD_INTERVAL = 1 * 60 * 60 #: 1 hour DOMAIN_REPLACEMENTS = [(r'180upload\.com' , "hundredeightyupload.com"), - (r'1fichier\.com' , "onefichier.com" ), - (r'2shared\.com' , "twoshared.com" ), - (r'4shared\.com' , "fourshared.com" ), (r'bayfiles\.net' , "bayfiles.com" ), (r'cloudnator\.com' , "shragle.com" ), (r'dfiles\.eu' , "depositfiles.com" ), @@ -48,10 +42,21 @@ class MultiHook(Hook): (r'uploaded\.net' , "uploaded.to" ), (r'uploadhero\.co' , "uploadhero.com" ), (r'zshares\.net' , "zshare.net" ), - (r'\d+.+' , "X\0" )] + (r'^1' , "one" ), + (r'^2' , "two" ), + (r'^3' , "three" ), + (r'^4' , "four" ), + (r'^5' , "five" ), + (r'^6' , "six" ), + (r'^7' , "seven" ), + (r'^8' , "eight" ), + (r'^9' , "nine" ), + (r'^0' , "zero" )] def setup(self): + self.info = {} #@TODO: Remove in 0.4.10 + self.plugins = [] self.supported = [] self.new_supported = [] @@ -62,23 +67,22 @@ class MultiHook(Hook): self.pluginname = None self.plugintype = None - self._initPlugin() + self.initPlugin() - def _initPlugin(self): - plugin, type = self.core.pluginManager.findPlugin(self.__name__) + def initPlugin(self): + self.pluginname = self.__name__.rsplit("Hook", 1)[0] + plugin, self.plugintype = self.core.pluginManager.findPlugin(self.pluginname) - if not plugin: + if plugin: + self.pluginmodule = self.core.pluginManager.loadModule(self.plugintype, self.pluginname) + self.pluginclass = getattr(self.pluginmodule, self.pluginname) + else: self.logWarning("Hook plugin will be deactivated due missing plugin reference") self.setConfig('activated', False) - else: - self.pluginname = self.__name__ - self.plugintype = type - self.pluginmodule = self.core.pluginManager.loadModule(type, self.__name__) - self.pluginclass = getattr(self.pluginmodule, self.__name__) - def _loadAccount(self): + def loadAccount(self): self.account = self.core.accountManager.getAccountPlugin(self.pluginname) if self.account and not self.account.canUse(): @@ -89,10 +93,6 @@ class MultiHook(Hook): self.setConfig('activated', False) - def coreReady(self): - self._loadAccount() - - def getURL(self, *args, **kwargs): #@TODO: Remove in 0.4.10 """ see HTTPRequest for argument list """ h = pyreq.getHTTPRequest(timeout=120) @@ -106,7 +106,7 @@ class MultiHook(Hook): return rep - def getConfig(self, option, default=''): + def getConfig(self, option, default=''): #@TODO: Remove in 0.4.10 """getConfig with default value - sublass may not implements all config options""" try: return self.getConf(option) @@ -118,18 +118,18 @@ class MultiHook(Hook): def pluginsCached(self): if self.plugins: return self.plugins - - for _i in xrange(3): + + for _i in xrange(2): try: - pluginset = self._pluginSet(self.getHosters() if self.plugintype == "hoster" else self.getCrypters()) - - except Exception, e: - self.logError(e, "Waiting 1 minute and retry") - sleep(60) - - else: + pluginset = self._pluginSet(self.getHosters()) break + + except Exception, e: + self.logDebug(e, "Waiting 1 minute and retry") + time.sleep(60) else: + self.logWarning(_("Fallback to default reload interval due plugin parse error")) + self.interval = self.MIN_RELOAD_INTERVAL return list() try: @@ -152,17 +152,15 @@ class MultiHook(Hook): def _pluginSet(self, plugins): - plugins = set((decode(x).strip().lower() for x in plugins if '.' in x)) + regexp = re.compile(r'^[\w\-.^_]{3,63}\.[a-zA-Z]{2,}$', re.U) + plugins = [decode(p.strip()).lower() for p in plugins if regexp.match(p.strip())] - for rf, rt in self.DOMAIN_REPLACEMENTS: - regex = re.compile(rf) - for p in filter(lambda x: regex.match(x), plugins): - plugins.remove(p) - plugins.add(re.sub(rf, rt, p)) + for r in self.DOMAIN_REPLACEMENTS: + rf, rt = r + repr = re.compile(rf, re.I|re.U) + plugins = [re.sub(rf, rt, p) if repr.match(p) else p for p in plugins] - plugins.discard('') - - return plugins + return set(plugins) def getHosters(self): @@ -173,16 +171,30 @@ class MultiHook(Hook): raise NotImplementedError - def getCrypters(self): - """Load list of supported crypters + #: Threaded _periodical, remove in 0.4.10 and use built-in flag for that + def _periodical(self): + try: + if self.isActivated(): + self.periodical() - :return: List of domain names - """ - raise NotImplementedError + except Exception, e: + self.core.log.error(_("Error executing hooks: %s") % str(e)) + if self.core.debug: + traceback.print_exc() + + self.cb = self.core.scheduler.addJob(self.interval, self._periodical) def periodical(self): """reload plugin list periodically""" + self.loadAccount() + + if self.getConfig("reload", True): + self.interval = max(self.getConfig("reloadinterval", 12) * 60 * 60, self.MIN_RELOAD_INTERVAL) + else: + self.core.scheduler.removeJob(self.cb) + self.cb = None + self.logInfo(_("Reloading supported %s list") % self.plugintype) old_supported = self.supported @@ -200,12 +212,6 @@ class MultiHook(Hook): for plugin in old_supported: self.unloadPlugin(plugin) - if self.getConfig("reload", True): - self.interval = max(self.getConfig("reloadinterval", 12), self.MIN_INTERVAL) - else: - self.core.scheduler.removeJob(self.cb) - self.cb = None - def overridePlugins(self): excludedList = [] @@ -249,7 +255,7 @@ class MultiHook(Hook): self.logDebug("New %ss: %s" % (self.plugintype, ", ".join(plugins))) # create new regexp - regexp = r'.*(?P<DOMAIN>%s).*' % "|".join([x.replace(".", "\.") for x in plugins]) + regexp = r'.*(?P<DOMAIN>%s).*' % "|".join(x.replace('.', '\.') for x in plugins) if hasattr(self.pluginclass, "__pattern__") and isinstance(self.pluginclass.__pattern__, basestring) and '://' in self.pluginclass.__pattern__: regexp = r'%s|%s' % (self.pluginclass.__pattern__, regexp) @@ -263,11 +269,11 @@ class MultiHook(Hook): def unloadPlugin(self, plugin): hdict = self.core.pluginManager.plugins[self.plugintype][plugin] if "module" in hdict: - del hdict['module'] + hdict.pop('module', None) if "new_module" in hdict: - del hdict['new_module'] - del hdict['new_name'] + hdict.pop('new_module', None) + hdict.pop('new_name', None) def unload(self): @@ -280,25 +286,3 @@ class MultiHook(Hook): hdict['pattern'] = getattr(self.pluginclass, "__pattern__", r'^unmatchable$') hdict['re'] = re.compile(hdict['pattern']) - - - def downloadFailed(self, pyfile): - """remove plugin override if download fails but not if file is offline/temp.offline""" - if pyfile.status != 8 or not self.getConfig("revertfailed", True): - return - - hdict = self.core.pluginManager.plugins[self.plugintype][pyfile.pluginname] - if "new_name" in hdict and hdict['new_name'] == self.pluginname: - if pyfile.error == "MultiHook": - self.logDebug("Unload MultiHook", pyfile.pluginname, hdict) - self.unloadPlugin(pyfile.pluginname) - pyfile.setStatus("queued") - else: - retries = max(self.getConfig("retry", 10), 0) - wait_time = max(self.getConfig("retryinterval", 1), 0) - - if 0 < retries > pyfile.plugin.retries: - pyfile.setCustomStatus("MultiHook", "queued") - pyfile.plugin.retries += 1 - pyfile.plugin.setWait(wait_time) - pyfile.plugin.wait() diff --git a/module/plugins/internal/MultiHoster.py b/module/plugins/internal/MultiHoster.py index 8ca4d427f..2b3eb8941 100644 --- a/module/plugins/internal/MultiHoster.py +++ b/module/plugins/internal/MultiHoster.py @@ -2,22 +2,24 @@ import re +from module.plugins.internal.Plugin import Fail, Retry from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies class MultiHoster(SimpleHoster): __name__ = "MultiHoster" __type__ = "hoster" - __version__ = "0.32" + __version__ = "0.42" __pattern__ = r'^unmatchable$' + __config__ = [("use_premium" , "bool", "Use premium account if available" , True), + ("revertfailed", "bool", "Revert to standard download if fails", True)] __description__ = """Multi hoster plugin""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - CHECK_TRAFFIC = True LOGIN_ACCOUNT = True @@ -28,9 +30,13 @@ class MultiHoster(SimpleHoster): def prepare(self): - self.info = {} - self.link = "" #@TODO: Move to hoster class in 0.4.10 - self.directDL = False #@TODO: Move to hoster class in 0.4.10 + self.info = {} + self.html = "" + self.link = "" #@TODO: Move to Hoster in 0.4.10 + self.directDL = False #@TODO: Move to Hoster in 0.4.10 + + if not self.getConfig('use_premium', True): + self.retryFree() if self.LOGIN_ACCOUNT and not self.account: self.fail(_("Required account not found")) @@ -41,47 +47,64 @@ class MultiHoster(SimpleHoster): set_cookies(self.req.cj, self.COOKIES) if self.DIRECT_LINK is None: - self.directDL = self.__pattern__ != r'^unmatchable$' + self.directDL = self.__pattern__ != r'^unmatchable$' and re.match(self.__pattern__, self.pyfile.url) else: self.directDL = self.DIRECT_LINK - self.pyfile.url = replace_patterns(self.pyfile.url, - self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def process(self, pyfile): - self.prepare() + try: + self.prepare() + + if self.directDL: + self.checkInfo() + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) + + if not self.link and not self.lastDownload: + self.preload() + + self.checkErrors() + self.checkStatus(getinfo=False) + + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) + + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) - if self.__pattern__ != r'^unmatchable$' and re.match(self.__pattern__, pyfile.url): - self.checkInfo() + self.download(self.link, ref=False, disposition=True) + self.checkFile() - if self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect(pyfile) + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + err = str(e) #@TODO: Recheck in 0.4.10 - if not self.link and not self.lastDownload: - self.preload() + if self.premium: + self.logWarning(_("Premium download failed")) + self.retryFree() - self.checkErrors() - self.checkStatus(getinfo=False) + elif self.getConfig("revertfailed", True) \ + and "new_module" in self.core.pluginManager.hosterPlugins[self.__name__]: + hdict = self.core.pluginManager.hosterPlugins[self.__name__] - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium(pyfile) + tmp_module = hdict['new_module'] + tmp_name = hdict['new_name'] + hdict.pop('new_module', None) + hdict.pop('new_name', None) - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as free download") - self.handleFree(pyfile) + pyfile.initPlugin() - self.downloadLink(self.link) - self.checkFile() + hdict['new_module'] = tmp_module + hdict['new_name'] = tmp_name + raise Retry(_("Revert to original hoster plugin")) - #@TODO: Remove in 0.4.10 - def downloadLink(self, link): - if link and isinstance(link, basestring): - self.correctCaptcha() - self.download(link, disposition=True) + else: + raise Fail(err) def handlePremium(self, pyfile): diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py new file mode 100644 index 000000000..228685ee5 --- /dev/null +++ b/module/plugins/internal/Plugin.py @@ -0,0 +1,598 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN, spoob, mkaay +""" + +from time import time, sleep +from random import randint + +import os +from os import remove, makedirs, chmod, stat +from os.path import exists, join + +if os.name != "nt": + from os import chown + from pwd import getpwnam + from grp import getgrnam + +from itertools import islice + +from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload +from module.utils import save_join, save_path, fs_encode, fs_decode + +def chunks(iterable, size): + it = iter(iterable) + item = list(islice(it, size)) + while item: + yield item + item = list(islice(it, size)) + + +class Base(object): + """ + A Base class with log/config/db methods *all* plugin types can use + """ + + def __init__(self, core): + #: Core instance + self.core = core + #: logging instance + self.log = core.log + #: core config + self.config = core.config + + #log functions + def logInfo(self, *args): + self.log.info("%s: %s" % (self.__name__, " | ".join([a if isinstance(a, basestring) else str(a) for a in args]))) + + def logWarning(self, *args): + self.log.warning("%s: %s" % (self.__name__, " | ".join([a if isinstance(a, basestring) else str(a) for a in args]))) + + def logError(self, *args): + self.log.error("%s: %s" % (self.__name__, " | ".join([a if isinstance(a, basestring) else str(a) for a in args]))) + + def logDebug(self, *args): + self.log.debug("%s: %s" % (self.__name__, " | ".join([a if isinstance(a, basestring) else str(a) for a in args]))) + + + def setConf(self, option, value): + """ see `setConfig` """ + self.core.config.setPlugin(self.__name__, option, value) + + def setConfig(self, option, value): + """ Set config value for current plugin + + :param option: + :param value: + :return: + """ + self.setConf(option, value) + + def getConf(self, option): + """ see `getConfig` """ + return self.core.config.getPlugin(self.__name__, option) + + def getConfig(self, option): + """ Returns config value for current plugin + + :param option: + :return: + """ + return self.getConf(option) + + def setStorage(self, key, value): + """ Saves a value persistently to the database """ + self.core.db.setStorage(self.__name__, key, value) + + def store(self, key, value): + """ same as `setStorage` """ + self.core.db.setStorage(self.__name__, key, value) + + def getStorage(self, key=None, default=None): + """ Retrieves saved value or dict of all saved entries if key is None """ + if key is not None: + return self.core.db.getStorage(self.__name__, key) or default + return self.core.db.getStorage(self.__name__, key) + + def retrieve(self, *args, **kwargs): + """ same as `getStorage` """ + return self.getStorage(*args, **kwargs) + + def delStorage(self, key): + """ Delete entry in db """ + self.core.db.delStorage(self.__name__, key) + + +class Plugin(Base): + """ + Base plugin for hoster/crypter. + Overwrite `process` / `decrypt` in your subclassed plugin. + """ + __name__ = "Plugin" + __version__ = "0.05" + __pattern__ = None + __type__ = "hoster" + __config__ = [("name", "type", "desc", "default")] + __description__ = """Base Plugin""" + __author_name__ = ("RaNaN", "spoob", "mkaay") + __author_mail__ = ("RaNaN@pyload.org", "spoob@pyload.org", "mkaay@mkaay.de") + + def __init__(self, pyfile): + Base.__init__(self, pyfile.m.core) + + self.wantReconnect = False + #: enables simultaneous processing of multiple downloads + self.multiDL = True + self.limitDL = 0 + #: chunk limit + self.chunkLimit = 1 + self.resumeDownload = False + + #: time() + wait in seconds + self.waitUntil = 0 + self.waiting = False + + self.ocr = None #captcha reader instance + #: account handler instance, see :py:class:`Account` + self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + + #: premium status + self.premium = False + #: username/login + self.user = None + + if self.account and not self.account.canUse(): self.account = None + if self.account: + self.user, data = self.account.selectAccount() + #: Browser instance, see `network.Browser` + self.req = self.account.getAccountRequest(self.user) + self.chunkLimit = -1 # chunk limit, -1 for unlimited + #: enables resume (will be ignored if server dont accept chunks) + self.resumeDownload = True + self.multiDL = True #every hoster with account should provide multiple downloads + #: premium status + self.premium = self.account.isPremium(self.user) + else: + self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) + + #: associated pyfile instance, see `PyFile` + self.pyfile = pyfile + self.thread = None # holds thread in future + + #: location where the last call to download was saved + self.lastDownload = "" + #: re match of the last call to `checkDownload` + self.lastCheck = None + #: js engine, see `JsEngine` + self.js = self.core.js + self.cTask = None #captcha task + + self.retries = 0 # amount of retries already made + self.html = None # some plugins store html code here + + self.init() + + def getChunkCount(self): + if self.chunkLimit <= 0: + return self.config["download"]["chunks"] + return min(self.config["download"]["chunks"], self.chunkLimit) + + def __call__(self): + return self.__name__ + + def init(self): + """initialize the plugin (in addition to `__init__`)""" + pass + + def setup(self): + """ setup for enviroment and other things, called before downloading (possibly more than one time)""" + pass + + def preprocessing(self, thread): + """ handles important things to do before starting """ + self.thread = thread + + if self.account: + self.account.checkLogin(self.user) + else: + self.req.clearCookies() + + self.setup() + + self.pyfile.setStatus("starting") + + return self.process(self.pyfile) + + + def process(self, pyfile): + """the 'main' method of every plugin, you **have to** overwrite it""" + raise NotImplementedError + + def resetAccount(self): + """ dont use account and retry download """ + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retry() + + def checksum(self, local_file=None): + """ + return codes: + 0 - checksum ok + 1 - checksum wrong + 5 - can't get checksum + 10 - not implemented + 20 - unknown error + """ + #@TODO checksum check hook + + return True, 10 + + + def setWait(self, seconds, reconnect=False): + """Set a specific wait time later used with `wait` + + :param seconds: wait time in seconds + :param reconnect: True if a reconnect would avoid wait time + """ + if reconnect: + self.wantReconnect = True + self.pyfile.waitUntil = time() + int(seconds) + + def wait(self): + """ waits the time previously set """ + self.waiting = True + self.pyfile.setStatus("waiting") + + while self.pyfile.waitUntil > time(): + self.thread.m.reconnecting.wait(2) + + if self.pyfile.abort: raise Abort + if self.thread.m.reconnecting.isSet(): + self.waiting = False + self.wantReconnect = False + raise Reconnect + + self.waiting = False + self.pyfile.setStatus("starting") + + def fail(self, reason): + """ fail and give reason """ + raise Fail(reason) + + def offline(self): + """ fail and indicate file is offline """ + raise Fail("offline") + + def tempOffline(self): + """ fail and indicates file ist temporary offline, the core may take consequences """ + raise Fail("temp. offline") + + def retry(self, max_tries=3, wait_time=1, reason=""): + """Retries and begin again from the beginning + + :param max_tries: number of maximum retries + :param wait_time: time to wait in seconds + :param reason: reason for retrying, will be passed to fail if max_tries reached + """ + if 0 < max_tries <= self.retries: + if not reason: reason = "Max retries reached" + raise Fail(reason) + + self.wantReconnect = False + self.setWait(wait_time) + self.wait() + + self.retries += 1 + raise Retry(reason) + + def invalidCaptcha(self): + if self.cTask: + self.cTask.invalid() + + def correctCaptcha(self): + if self.cTask: + self.cTask.correct() + + def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype='jpg', + result_type='textual'): + """ Loads a captcha and decrypts it with ocr, plugin, user input + + :param url: url of captcha image + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param forceUser: if True, ocr is not used + :param imgtype: Type of the Image + :param result_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + + :return: result of decrypting + """ + + img = self.load(url, get=get, post=post, cookies=cookies) + + id = ("%.2f" % time())[-6:].replace(".", "") + temp_file = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") + temp_file.write(img) + temp_file.close() + + has_plugin = self.__name__ in self.core.pluginManager.captchaPlugins + + if self.core.captcha: + Ocr = self.core.pluginManager.loadClass("captcha", self.__name__) + else: + Ocr = None + + if Ocr and not forceUser: + sleep(randint(3000, 5000) / 1000.0) + if self.pyfile.abort: raise Abort + + ocr = Ocr() + result = ocr.get_captcha(temp_file.name) + else: + captchaManager = self.core.captchaManager + task = captchaManager.newTask(img, imgtype, temp_file.name, result_type) + self.cTask = task + captchaManager.handleCaptcha(task) + + while task.isWaiting(): + if self.pyfile.abort: + captchaManager.removeTask(task) + raise Abort + sleep(1) + + captchaManager.removeTask(task) + + if task.error and has_plugin: #ignore default error message since the user could use OCR + self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) + elif task.error: + self.fail(task.error) + elif not task.result: + self.fail(_("No captcha result obtained in appropiate time by any of the plugins.")) + + result = task.result + self.log.debug("Received captcha result: %s" % str(result)) + + if not self.core.debug: + try: + remove(temp_file.name) + except: + pass + + return result + + + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): + """Load content at url and returns it + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param just_header: if True only the header will be retrieved and returned as dict + :param decode: Wether to decode the output according to http header, should be True in most cases + :return: Loaded content + """ + if self.pyfile.abort: raise Abort + #utf8 vs decode -> please use decode attribute in all future plugins + if type(url) == unicode: url = str(url) + + res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode) + + if self.core.debug: + from inspect import currentframe + + frame = currentframe() + if not exists(join("tmp", self.__name__)): + makedirs(join("tmp", self.__name__)) + + f = open( + join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) + , "wb") + del frame # delete the frame or it wont be cleaned + + try: + tmp = res.encode("utf8") + except: + tmp = res + + f.write(tmp) + f.close() + + if just_header: + #parse header + header = {"code": self.req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + res = header + + return res + + def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): + """Downloads the content at url to download folder + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param disposition: if True and server provides content-disposition header\ + the filename will be changed if needed + :return: The location where the file was saved + """ + + self.checkForSameFiles() + + self.pyfile.setStatus("downloading") + + download_folder = self.config['general']['download_folder'] + + location = save_join(download_folder, self.pyfile.package().folder) + + if not exists(location): + makedirs(location, int(self.core.config["permission"]["folder"], 8)) + + if self.core.config["permission"]["change_dl"] and os.name != "nt": + try: + uid = getpwnam(self.config["permission"]["user"])[2] + gid = getgrnam(self.config["permission"]["group"])[2] + + chown(location, uid, gid) + except Exception, e: + self.log.warning(_("Setting User and Group failed: %s") % str(e)) + + # convert back to unicode + location = fs_decode(location) + name = save_path(self.pyfile.name) + + filename = join(location, name) + + self.core.hookManager.dispatchEvent("downloadStarts", self.pyfile, url, filename) + + try: + newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, + chunks=self.getChunkCount(), resume=self.resumeDownload, + progressNotify=self.pyfile.setProgress, disposition=disposition) + finally: + self.pyfile.size = self.req.size + + if disposition and newname and newname != name: #triple check, just to be sure + self.log.info("%(name)s saved as %(newname)s" % {"name": name, "newname": newname}) + self.pyfile.name = newname + filename = join(location, newname) + + fs_filename = fs_encode(filename) + + if self.core.config["permission"]["change_file"]: + chmod(fs_filename, int(self.core.config["permission"]["file"], 8)) + + if self.core.config["permission"]["change_dl"] and os.name != "nt": + try: + uid = getpwnam(self.config["permission"]["user"])[2] + gid = getgrnam(self.config["permission"]["group"])[2] + + chown(fs_filename, uid, gid) + except Exception, e: + self.log.warning(_("Setting User and Group failed: %s") % str(e)) + + self.lastDownload = filename + return self.lastDownload + + def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): + """ checks the content of the last downloaded file, re match is saved to `lastCheck` + + :param rules: dict with names and rules to match (compiled regexp or strings) + :param api_size: expected file size + :param max_size: if the file is larger then it wont be checked + :param delete: delete if matched + :param read_size: amount of bytes to read from files larger then max_size + :return: dictionary key of the first rule that matched + """ + lastDownload = fs_encode(self.lastDownload) + if not exists(lastDownload): return None + + size = stat(lastDownload) + size = size.st_size + + if api_size and api_size <= size: return None + elif size > max_size and not read_size: return None + self.log.debug("Download Check triggered") + f = open(lastDownload, "rb") + content = f.read(read_size if read_size else -1) + f.close() + #produces encoding errors, better log to other file in the future? + #self.log.debug("Content: %s" % content) + for name, rule in rules.iteritems(): + if type(rule) in (str, unicode): + if rule in content: + if delete: + remove(lastDownload) + return name + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + if delete: + remove(lastDownload) + self.lastCheck = m + return name + + + def getPassword(self): + """ get the password the user provided in the package""" + password = self.pyfile.package().password + if not password: return "" + return password + + + def checkForSameFiles(self, starting=False): + """ checks if same file was/is downloaded within same package + + :param starting: indicates that the current download is going to start + :raises SkipDownload: + """ + + pack = self.pyfile.package() + + for pyfile in self.core.files.cache.values(): + if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: + if pyfile.status in (0, 12): #finished or downloading + raise SkipDownload(pyfile.pluginname) + elif pyfile.status in ( + 5, 7) and starting: #a download is waiting/starting and was appenrently started before + raise SkipDownload(pyfile.pluginname) + + download_folder = self.config['general']['download_folder'] + location = save_join(download_folder, pack.folder, self.pyfile.name) + + if starting and self.core.config['download']['skip_existing'] and exists(location): + size = os.stat(location).st_size + if size >= self.pyfile.size: + raise SkipDownload("File exists.") + + pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) + if pyfile: + if exists(location): + raise SkipDownload(pyfile[0]) + + self.log.debug("File %s not skipped, because it does not exists." % self.pyfile.name) + + def clean(self): + """ clean everything and remove references """ + if hasattr(self, "pyfile"): + del self.pyfile + if hasattr(self, "req"): + self.req.close() + del self.req + if hasattr(self, "thread"): + del self.thread + if hasattr(self, "html"): + del self.html diff --git a/module/plugins/internal/ReCaptcha.py b/module/plugins/internal/ReCaptcha.py new file mode 100644 index 000000000..a9d0f3752 --- /dev/null +++ b/module/plugins/internal/ReCaptcha.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- + +import random +import re +import time +import urlparse + +from base64 import b64encode + +from module.plugins.internal.Captcha import Captcha + + +class ReCaptcha(Captcha): + __name__ = "ReCaptcha" + __type__ = "captcha" + __version__ = "0.17" + + __description__ = """ReCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("zapp-brannigan", "fuerst.reinje@web.de")] + + + KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)' + KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)' + + + def detect_key(self, html=None): + html = html or self.retrieve_html() + + m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.logDebug("Key: %s" % self.key) + return self.key + else: + self.logWarning("Key pattern not found") + return None + + + def challenge(self, key=None, html=None, version=None): + key = key or self.retrieve_key(html) + + if version in (1, 2): + return getattr(self, "_challenge_v%s" % version)(key) + + else: + return self.challenge(key, + version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_html()) else 1) + + + def _challenge_v1(self, key): + html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", + get={'k': key}) + try: + challenge = re.search("challenge : '(.+?)',", html).group(1) + server = re.search("server : '(.+?)',", html).group(1) + + except AttributeError: + self.fail(_("ReCaptcha challenge pattern not found")) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(server, challenge, key) + + + def result(self, server, challenge, key): + self.plugin.req.load("http://www.google.com/recaptcha/api/js/recaptcha.js") + html = self.plugin.req.load("http://www.google.com/recaptcha/api/reload", + get={'c' : challenge, + 'k' : key, + 'reason': "i", + 'type' : "image"}) + + try: + challenge = re.search('\(\'(.+?)\',',html).group(1) + + except AttributeError: + self.fail(_("ReCaptcha second challenge pattern not found")) + + self.logDebug("Second challenge: %s" % challenge) + result = self.plugin.decryptCaptcha("%simage" % server, + get={'c': challenge}, + cookies=True, + forceUser=True, + imgtype="jpg") + + self.logDebug("Result: %s" % result) + + return result, challenge + + + def _collectApiInfo(self): + html = self.plugin.req.load("http://www.google.com/recaptcha/api.js") + a = re.search(r'po.src = \'(.*?)\';', html).group(1) + vers = a.split("/")[5] + + self.logDebug("API version: %s" % vers) + + language = a.split("__")[1].split(".")[0] + + self.logDebug("API language: %s" % language) + + html = self.plugin.req.load("https://apis.google.com/js/api.js") + b = re.search(r'"h":"(.*?)","', html).group(1) + jsh = b.decode('unicode-escape') + + self.logDebug("API jsh-string: %s" % jsh) + + return vers, language, jsh + + + def _prepareTimeAndRpc(self): + self.plugin.req.load("http://www.google.com/recaptcha/api2/demo") + + millis = int(round(time.time() * 1000)) + + self.logDebug("Time: %s" % millis) + + rand = random.randint(1, 99999999) + a = "0.%s" % str(rand * 2147483647) + rpc = int(100000000 * float(a)) + + self.logDebug("Rpc-token: %s" % rpc) + + return millis, rpc + + + def _challenge_v2(self, key, parent=None): + if parent is None: + try: + parent = urlparse.urljoin("http://", urlparse.urlparse(self.plugin.pyfile.url).netloc) + + except Exception: + parent = "" + + botguardstring = "!A" + vers, language, jsh = self._collectApiInfo() + millis, rpc = self._prepareTimeAndRpc() + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/anchor", + get={'k' : key, + 'hl' : language, + 'v' : vers, + 'usegapi' : "1", + 'jsh' : "%s#id=IO_%s" % (jsh, millis), + 'parent' : parent, + 'pfname' : "", + 'rpctoken': rpc}) + + token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html) + self.logDebug("Token #1: %s" % token1.group(1)) + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/frame", + get={'c' : token1.group(1), + 'hl' : language, + 'v' : vers, + 'bg' : botguardstring, + 'k' : key, + 'usegapi': "1", + 'jsh' : jsh}).decode('unicode-escape') + + token2 = re.search(r'"finput","(.*?)",', html) + self.logDebug("Token #2: %s" % token2.group(1)) + + token3 = re.search(r'"rresp","(.*?)",', html) + self.logDebug("Token #3: %s" % token3.group(1)) + + millis_captcha_loading = int(round(time.time() * 1000)) + captcha_response = self.plugin.decryptCaptcha("https://www.google.com/recaptcha/api2/payload", + get={'c':token3.group(1), 'k':key}, + cookies=True, + forceUser=True) + response = b64encode('{"response":"%s"}' % captcha_response) + + self.logDebug("Result: %s" % response) + + timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading + timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500) + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/userverify", + post={'k' : key, + 'c' : token3.group(1), + 'response': response, + 't' : timeToSolve, + 'ct' : timeToSolveMore, + 'bg' : botguardstring}) + + token4 = re.search(r'"uvresp","(.*?)",', html) + self.logDebug("Token #4: %s" % token4.group(1)) + + result = token4.group(1) + + return result, None diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py new file mode 100644 index 000000000..624f6c939 --- /dev/null +++ b/module/plugins/internal/SevenZip.py @@ -0,0 +1,153 @@ +# -*- coding: utf-8 -*- + +import os +import re +import subprocess + +from module.plugins.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice +from module.utils import fs_encode, save_join + + +class SevenZip(UnRar): + __name__ = "SevenZip" + __version__ = "0.11" + + __description__ = """7-Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Michael Nowak" , "" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + CMD = "7z" + VERSION = "" + + EXTENSIONS = [".7z", ".xz", ".zip", ".gz", ".gzip", ".tgz", ".bz2", ".bzip2", + ".tbz2", ".tbz", ".tar", ".wim", ".swm", ".lzma", ".rar", ".cab", + ".arj", ".z", ".taz", ".cpio", ".rpm", ".deb", ".lzh", ".lha", + ".chm", ".chw", ".hxs", ".iso", ".msi", ".doc", ".xls", ".ppt", + ".dmg", ".xar", ".hfs", ".exe", ".ntfs", ".fat", ".vhd", ".mbr", + ".squashfs", ".cramfs", ".scap"] + + + #@NOTE: there are some more uncovered 7z formats + re_filelist = re.compile(r'([\d\:]+)\s+([\d\:]+)\s+([\w\.]+)\s+(\d+)\s+(\d+)\s+(.+)') + re_wrongpwd = re.compile(r'(Can not open encrypted archive|Wrong password|Encrypted\s+\=\s+\+)', re.I) + re_wrongcrc = re.compile(r'CRC Failed|Can not open file', re.I) + re_version = re.compile(r'7-Zip\s(?:\[64\]\s)?(\d+\.\d+)', re.I) + + + @classmethod + def isUsable(cls): + if os.name == "nt": + cls.CMD = os.path.join(pypath, "7z.exe") + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + else: + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + + m = cls.re_version.search(out) + cls.VERSION = m.group(1) if m else '(version unknown)' + + return True + + + def verify(self, password): + # 7z can't distinguish crc and pw error in test + p = self.call_cmd("l", "-slt", fs_encode(self.filename)) + out, err = p.communicate() + + if self.re_wrongpwd.search(out): + raise PasswordError + + if self.re_wrongpwd.search(err): + raise PasswordError + + if self.re_wrongcrc.search(err): + raise CRCError(err) + + + + def check(self, password): + p = self.call_cmd("l", "-slt", fs_encode(self.filename)) + out, err = p.communicate() + + # check if output or error macthes the 'wrong password'-Regexp + if self.re_wrongpwd.search(out): + raise PasswordError + + if self.re_wrongcrc.search(out): + raise CRCError(_("Header protected")) + + + def repair(self): + return False + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, '-o' + self.out, fs_encode(self.filename), password=password) + + renice(p.pid, self.renice) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + self.files = self.list(password) + + + def list(self, password=None): + command = "l" if self.fullpath else "l" + + p = self.call_cmd(command, fs_encode(self.filename), password=password) + out, err = p.communicate() + + if "Can not open" in err: + raise ArchiveError(_("Cannot open file")) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + result = set() + for groups in self.re_filelist.findall(out): + f = groups[-1].strip() + result.add(save_join(self.out, f)) + + return list(result) + + + def call_cmd(self, command, *xargs, **kwargs): + args = [] + + #overwrite flag + if self.overwrite: + args.append("-y") + + #set a password + if "password" in kwargs and kwargs["password"]: + args.append("-p%s" % kwargs["password"]) + else: + args.append("-p-") + + #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + + self.manager.logDebug(" ".join(call)) + + p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return p diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index 43b1347fd..be0d5e9d5 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -1,29 +1,25 @@ # -*- coding: utf-8 -*- import re +import urlparse -from urlparse import urlparse - -from module.plugins.Crypter import Crypter +from module.plugins.internal.Crypter import Crypter from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies -from module.utils import fixup +from module.utils import fixup, html_unescape class SimpleCrypter(Crypter, SimpleHoster): __name__ = "SimpleCrypter" __type__ = "crypter" - __version__ = "0.38" + __version__ = "0.53" __pattern__ = r'^unmatchable$' - __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] - ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + __config__ = [("use_subfolder" , "bool", "Save package to subfolder" , True), #: Overrides core.config['general']['folder_per_package'] + ("subfolder_per_pack", "bool", "Create a subfolder for each package", True)] __description__ = """Simple decrypter plugin""" __license__ = "GPLv3" - __authors__ = [("stickell", "l.stickell@yahoo.it"), - ("zoidberg", "zoidberg@mujmail.cz"), - ("Walter Purcaro", "vuolter@gmail.com")] - + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] """ Following patterns should be defined by each crypter: @@ -56,27 +52,15 @@ class SimpleCrypter(Crypter, SimpleHoster): return the html of the page number page_n """ - LINK_PATTERN = None - - NAME_REPLACEMENTS = [("&#?\w+;", fixup)] - URL_REPLACEMENTS = [] - - TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] - - LOGIN_ACCOUNT = False - LOGIN_PREMIUM = False - - #@TODO: Remove in 0.4.10 def init(self): account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") - account = self.core.accountManager.getAccountPlugin(account_name) + account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) if account and account.canUse(): self.user, data = account.selectAccount() - self.req = account.getAccountRequest(self.user) - self.premium = account.isPremium(self.user) + self.req = account.getAccountRequest(self.user) + self.premium = account.isPremium(self.user) self.account = account @@ -85,7 +69,9 @@ class SimpleCrypter(Crypter, SimpleHoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} - self.links = [] #@TODO: Move to hoster class in 0.4.10 + self.html = "" + self.link = "" #@TODO: Move to Hoster in 0.4.10 + self.links = [] #@TODO: Move to Hoster in 0.4.10 if self.LOGIN_PREMIUM and not self.premium: self.fail(_("Required premium account not found")) @@ -101,24 +87,46 @@ class SimpleCrypter(Crypter, SimpleHoster): self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + def handleDirect(self, pyfile): + for i in xrange(10): #@TODO: Use `pycurl.MAXREDIRS` value in 0.4.10 + redirect = self.link or pyfile.url + self.logDebug("Redirect #%d to: %s" % (i, redirect)) + + header = self.load(redirect, just_header=True, decode=True) + if 'location' in header and header['location']: + self.link = header['location'] + else: + break + else: + self.logError(_("Too many redirects")) + + def decrypt(self, pyfile): self.prepare() - self.preload() - self.checkInfo() + self.logDebug("Looking for link redirect...") + self.handleDirect(pyfile) + + if self.link: + self.urls = [self.link] - self.links = self.getLinks() + else: + self.preload() + self.checkInfo() - if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): - self.handlePages(pyfile) + self.links = self.getLinks() - self.logDebug("Package has %d links" % len(self.links)) + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handlePages(pyfile) + + self.logDebug("Package has %d links" % len(self.links)) if self.links: + self.links = [html_unescape(l.decode('unicode-escape').strip()) for l in self.links] #@TODO: Move to Crypter in 0.4.10 self.packages = [(self.info['name'], self.links, self.info['folder'])] elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 - self.fail("No link grabbed") + self.fail(_("No link grabbed")) def checkNameSize(self, getinfo=True): @@ -128,8 +136,8 @@ class SimpleCrypter(Crypter, SimpleHoster): self.logDebug("File info (AFTER): %s" % self.info) try: - url = self.info['url'] - name = self.info['name'] + url = self.info['url'].strip() + name = self.info['name'].strip() if name and name != url: self.pyfile.name = name @@ -151,13 +159,17 @@ class SimpleCrypter(Crypter, SimpleHoster): Returns the links extracted from self.html You should override this only if it's impossible to extract links using only the LINK_PATTERN. """ - return re.findall(self.LINK_PATTERN, self.html) + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + + return [urlparse.urljoin(baseurl, link) if not urlparse.urlparse(link).scheme else link \ + for link in re.findall(self.LINK_PATTERN, self.html)] def handlePages(self, pyfile): try: pages = int(re.search(self.PAGES_PATTERN, self.html).group(1)) - except: + except Exception: pages = 1 for p in xrange(2, pages + 1): diff --git a/module/plugins/internal/SimpleDereferer.py b/module/plugins/internal/SimpleDereferer.py deleted file mode 100644 index 53b80f827..000000000 --- a/module/plugins/internal/SimpleDereferer.py +++ /dev/null @@ -1,99 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from urllib import unquote - -from module.plugins.Crypter import Crypter -from module.plugins.internal.SimpleHoster import directLink, set_cookies - - -class SimpleDereferer(Crypter): - __name__ = "SimpleDereferer" - __type__ = "crypter" - __version__ = "0.03" - - __pattern__ = r'^unmatchable$' - __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), - ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] - - __description__ = """Simple dereferer plugin""" - __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - - - """ - Following patterns should be defined by each crypter: - - LINK_PATTERN: Regex to catch the redirect url in group(1) - example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' - - OFFLINE_PATTERN: (optional) Checks if the page is unreachable - example: OFFLINE_PATTERN = r'File (deleted|not found)' - - TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable - example: TEMP_OFFLINE_PATTERN = r'Server maintainance' - - - You can override the getLinks method if you need a more sophisticated way to extract the redirect url. - """ - - LINK_PATTERN = None - - TEXT_ENCODING = False - COOKIES = True - - - def decrypt(self, pyfile): - link = directLink(self, pyfile.url) - - if not link: - try: - link = unquote(re.match(self.__pattern__, pyfile.url).group('LINK')) - - except Exception: - self.prepare() - self.preload() - - if self.html is None: - self.fail(_("No html retrieved")) - - self.checkStatus() - - link = self.getLink() - - if link.strip(): - self.urls = [link.strip()] #@TODO: Remove `.strip()` in 0.4.10 - - elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 - self.fail("No link grabbed") - - - def prepare(self): - self.req.setOption("timeout", 120) - - if isinstance(self.COOKIES, list): - set_cookies(self.req.cj, self.COOKIES) - - - def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) - - if isinstance(self.TEXT_ENCODING, basestring): - self.html = unicode(self.html, self.TEXT_ENCODING) - - - def checkStatus(self): - if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): - self.offline() - - elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): - self.tempOffline() - - - def getLink(self): - try: - return re.search(self.LINK_PATTERN, self.html).group(1) - - except Exception: - pass diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index fa1ea45f2..4a3b7dcf7 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,20 +1,22 @@ # -*- coding: utf-8 -*- -import re +from __future__ import with_statement -from inspect import isclass -from os.path import exists -from time import time -from urllib import unquote -from urlparse import urljoin, urlparse +import datetime +import mimetypes +import os +import re +import time +import urllib +import urlparse from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL -from module.plugins.Hoster import Hoster -from module.plugins.Plugin import Fail -from module.utils import fixup, fs_encode, parseFileSize +from module.plugins.internal.Hoster import Hoster +from module.plugins.internal.Plugin import Fail, Retry +from module.utils import decode, fixup, fs_encode, html_unescape, parseFileSize #@TODO: Adapt and move to PyFile in 0.4.10 @@ -27,7 +29,7 @@ def _error(self, reason, type): type = "unknown" msg = _("%s error") % type.strip().capitalize() if type else _("Error") - msg += ": %s" % reason.strip() if reason else "" + msg += (": %s" % reason.strip()) if reason else "" msg += _(" | Plugin may be out of date") raise Fail(msg) @@ -74,7 +76,7 @@ def parseHtmlForm(attr_str, html, input_names={}): if name: value = parseHtmlTagAttrValue("value", inputtag.group(1)) if not value: - inputs[name] = inputtag.group(3) or '' + inputs[name] = inputtag.group(3) or "" else: inputs[name] = value @@ -100,43 +102,77 @@ def parseHtmlForm(attr_str, html, input_names={}): return {}, None #: no matching form found -#: Deprecated +#@TODO: Remove in 0.4.10 def parseFileInfo(plugin, url="", html=""): if hasattr(plugin, "getInfo"): info = plugin.getInfo(url, html) res = info['name'], info['size'], info['status'], info['url'] else: - res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + res = ((url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 0, + 3 if url else 8, + url) return res #@TODO: Remove in 0.4.10 -#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 def create_getInfo(plugin): + def getInfo(urls): + for url in urls: + if hasattr(plugin, "URL_REPLACEMENTS"): + url = replace_patterns(url, plugin.URL_REPLACEMENTS) + yield parseFileInfo(plugin, url) + + return getInfo + + +def timestamp(): + return int(time.time() * 1000) + - def generator(list): - for x in list: - yield x +#@TODO: Move to Hoster in 0.4.10 +def getFileURL(self, url, follow_location=None): + link = "" + redirect = 1 - if hasattr(plugin, "parseInfos"): - fn = lambda urls: generator((info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)) + if type(follow_location) is int: + redirect = max(follow_location, 1) else: - fn = lambda urls: generator(parseFileInfo(url) for url in urls) + redirect = 10 - return fn + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True, decode=True) + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + req = pyreq.getHTTPRequest() + res = req.load(url, just_header=True, decode=True) -def timestamp(): - return int(time() * 1000) + req.close() + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue -#@TODO: Move to hoster class in 0.4.10 -def directLink(self, url, resumable=False): - link = "" + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() - for i in xrange(5 if resumable else 1): - header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value if 'content-disposition' in header: link = url @@ -144,42 +180,59 @@ def directLink(self, url, resumable=False): elif 'location' in header and header['location']: location = header['location'] - if not urlparse(location).scheme: - p = urlparse(url) - base = "%s://%s" % (p.scheme, p.netloc) - location = urljoin(base, location) + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) - if resumable: + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: url = location - self.logDebug("Redirect #%d to: %s" % (++i, location)) continue - elif 'code' in header and header['code'] == 302: - link = location + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] - elif 'content-type' in header and header['content-type'] and "html" not in header['content-type']: - link = url + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" break + else: - self.logError(_("Too many redirects")) + try: + self.logError(_("Too many redirects")) + except Exception: + pass return link def secondsToMidnight(gmt=0): - now = datetime.utcnow() + timedelta(hours=gmt) + now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) if now.hour is 0 and now.minute < 10: midnight = now else: - midnight = now + timedelta(days=1) + midnight = now + datetime.timedelta(days=1) td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now if hasattr(td, 'total_seconds'): res = td.total_seconds() - else: #@NOTE: work-around for python 2.5 and 2.6 missing timedelta.total_seconds + else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return int(res) @@ -188,39 +241,39 @@ def secondsToMidnight(gmt=0): class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.96" + __version__ = "1.66" __pattern__ = r'^unmatchable$' + __config__ = [("use_premium", "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True)] __description__ = """Simple hoster plugin""" __license__ = "GPLv3" - __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), - ("stickell", "l.stickell@yahoo.it"), - ("Walter Purcaro", "vuolter@gmail.com")] - + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] """ - Info patterns should be defined by each hoster: + Info patterns: - INFO_PATTERN: (optional) Name and Size of the file + INFO_PATTERN: (mandatory) Name and Size of the file example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' or - NAME_PATTERN: (optional) Name that will be set for the file + NAME_PATTERN: (mandatory) Name that will be set for the file example: NAME_PATTERN = r'(?P<N>file_name)' - SIZE_PATTERN: (optional) Size that will be checked for the file + + SIZE_PATTERN: (mandatory) Size that will be checked for the file example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' HASHSUM_PATTERN: (optional) Hash code and type of the file example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' - OFFLINE_PATTERN: (optional) Check if the page is unreachable + OFFLINE_PATTERN: (mandatory) Check if the page is unreachable example: OFFLINE_PATTERN = r'File (deleted|not found)' TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' - Error handling patterns are all optional: + Error patterns: WAIT_PATTERN: (optional) Detect waiting time example: WAIT_PATTERN = r'' @@ -228,46 +281,65 @@ class SimpleHoster(Hoster): PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account example: PREMIUM_ONLY_PATTERN = r'Premium account required' + HAPPY_HOUR_PATTERN: (optional) + example: HAPPY_HOUR_PATTERN = r'Happy hour' + + IP_BLOCKED_PATTERN: (optional) + example: IP_BLOCKED_PATTERN = r'in your country' + + DL_LIMIT_PATTERN: (optional) + example: DL_LIMIT_PATTERN = r'download limit' + + SIZE_LIMIT_PATTERN: (optional) + example: SIZE_LIMIT_PATTERN = r'up to' + ERROR_PATTERN: (optional) Detect any error preventing download example: ERROR_PATTERN = r'' - Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling: - LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download - example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PATTERN: (optional) group(1) should be the direct link for free and premium download + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + or + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' - LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download - example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' """ NAME_REPLACEMENTS = [("&#?\w+;", fixup)] SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] + FILE_ERRORS = [('Html error' , r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)'), + ('Request error', r'([Aa]n error occured while processing your request)' ), + ('Html file' , r'\A\s*<!DOCTYPE html' )] + + CHECK_FILE = True #: Set to False to not check the last downloaded file with declared error patterns CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + COOKIES = True #: or False or list of tuples [(domain, name, value)] DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False - MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + DISPOSITION = True #: Set to True to use any content-disposition value in http header as file name LOGIN_ACCOUNT = False #: Set to True to require account login + LOGIN_PREMIUM = False #: Set to True to require premium account login + MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - directLink = directLink #@TODO: Remove in 0.4.10 + LINK_PATTERN = None - @classmethod - def parseInfos(cls, urls): #@TODO: Built-in in 0.4.10 core, then remove from plugins - for url in urls: - url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 - yield cls.getInfo(url) + directLink = getFileURL #@TODO: Remove in 0.4.10 @classmethod - def apiInfo(cls, url="", get={}, post={}): - url = unquote(url) - return {'name' : (urlparse(url).path.split('/')[-1] - or urlparse(url).query.split('=', 1)[::-1][0].split('&', 1)[0] - or _("Unknown")), + def apiInfo(cls, url): + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + return {'name' : (url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), 'size' : 0, 'status': 3 if url else 8, 'url' : url} @@ -276,7 +348,7 @@ class SimpleHoster(Hoster): @classmethod def getInfo(cls, url="", html=""): info = cls.apiInfo(url) - online = False + online = True if info['status'] is 2 else False try: info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here @@ -284,7 +356,7 @@ class SimpleHoster(Hoster): except Exception: info['pattern'] = {} - if not html: + if not html and not online: if not url: info['error'] = "missing url" info['status'] = 1 @@ -305,21 +377,18 @@ class SimpleHoster(Hoster): elif e.code is 503: info['status'] = 6 + except Exception: + pass + if html: if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): info['status'] = 1 - elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 - info['status'] = 1 - elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): info['status'] = 6 else: - for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", - "FILE_NAME_PATTERN", "NAME_PATTERN", - "FILE_SIZE_PATTERN", "SIZE_PATTERN", - "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): try: attr = getattr(cls, pattern) pdict = re.search(attr, html).groupdict() @@ -333,19 +402,16 @@ class SimpleHoster(Hoster): else: online = True - if not info['pattern']: - info.pop('pattern', None) - if online: info['status'] = 2 if 'N' in info['pattern']: - info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), - cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 + info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()), + cls.NAME_REPLACEMENTS) if 'S' in info['pattern']: size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], - cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10 + cls.SIZE_REPLACEMENTS) info['size'] = parseFileSize(size) elif isinstance(info['size'], basestring): @@ -356,9 +422,49 @@ class SimpleHoster(Hoster): hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" info[hashtype] = info['pattern']['H'] + if not info['pattern']: + info.pop('pattern', None) + return info + #@TODO: Move to Hoster in 0.4.10 + def _log(self, level, args): + log = getattr(self.core.log, level) + msg = " | ".join((fs_encode(a) if isinstance(a, unicode) else #@NOTE: `fs_encode` -> `encode` in 0.4.10 + decode(a) if isinstance(a, str) else + str(a)).strip() for a in args if a) + log("%(plugin)s[%(id)s]: %(msg)s" % {'plugin': self.__name__, + 'id' : self.pyfile.id, + 'msg' : msg or _(level.upper() + " MARK")}) + + + #@TODO: Move to Hoster in 0.4.10 + def logDebug(self, *args): + if self.core.debug: + return self._log("debug", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logInfo(self, *args): + return self._log("info", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logWarning(self, *args): + return self._log("warning", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logError(self, *args): + return self._log("error", args) + + + #@TODO: Move to Hoster in 0.4.10 + def logCritical(self, *args): + return self._log("critical", args) + + def setup(self): self.resumeDownload = self.multiDL = self.premium @@ -367,9 +473,16 @@ class SimpleHoster(Hoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} - self.link = "" #@TODO: Move to hoster class in 0.4.10 - self.directDL = False #@TODO: Move to hoster class in 0.4.10 - self.multihost = False #@TODO: Move to hoster class in 0.4.10 + self.html = "" + self.link = "" #@TODO: Move to Hoster in 0.4.10 + self.directDL = False #@TODO: Move to Hoster in 0.4.10 + self.multihost = False #@TODO: Move to Hoster in 0.4.10 + + if not self.getConfig('use_premium', True): + self.retryFree() + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) if self.LOGIN_ACCOUNT and not self.account: self.fail(_("Required account not found")) @@ -379,6 +492,13 @@ class SimpleHoster(Hoster): if isinstance(self.COOKIES, list): set_cookies(self.req.cj, self.COOKIES) + if self.LINK_PATTERN: + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.LINK_FREE_PATTERN = self.LINK_PATTERN + + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.LINK_PREMIUM_PATTERN = self.LINK_PATTERN + if (self.MULTI_HOSTER and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] or re.match(self.__pattern__, self.pyfile.url) is None)): @@ -390,80 +510,116 @@ class SimpleHoster(Hoster): else: self.directDL = self.DIRECT_LINK - self.pyfile.url = replace_patterns(self.pyfile.url, - self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), ref=False, decode=not self.TEXT_ENCODING) if isinstance(self.TEXT_ENCODING, basestring): self.html = unicode(self.html, self.TEXT_ENCODING) def process(self, pyfile): - self.prepare() - self.checkInfo() + try: + self.prepare() + self.checkInfo() - if self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect(pyfile) + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) - if self.multihost and not self.link and not self.lastDownload: - self.logDebug("Looking for leeched download link...") - self.handleMulti(pyfile) + if self.multihost and not self.link and not self.lastDownload: + self.logDebug("Looking for leeched download link...") + self.handleMulti(pyfile) + + if not self.link and not self.lastDownload: + self.MULTI_HOSTER = False + self.retry(1, reason=_("Multi hoster fails")) if not self.link and not self.lastDownload: - self.MULTI_HOSTER = False - self.retry(1, reason="Multi hoster fails") + self.preload() + self.checkInfo() + + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) + + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) + + self.download(self.link, ref=False, disposition=self.DISPOSITION) + self.checkFile() + + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + err = str(e) #@TODO: Recheck in 0.4.10 + + if err == _("No captcha result obtained in appropiate time by any of the plugins."): #@TODO: Fix in 0.4.10 + self.checkFile() + + elif self.getConfig('fallback', True) and self.premium: + self.logWarning(_("Premium download failed"), e) + self.retryFree() + + else: + raise Fail(err) - if not self.link and not self.lastDownload: - self.preload() - self.checkInfo() - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium(pyfile) + def download(self, url, *args, **kwargs): + if not url or not isinstance(url, basestring): + return - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as free download") - self.handleFree(pyfile) + self.correctCaptcha() - self.downloadLink(self.link) - self.checkFile() + url = html_unescape(url.decode('unicode-escape').strip()) #@TODO: Move to Hoster in 0.4.10 + if not urlparse.urlparse(url).scheme: + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + url = urlparse.urljoin(baseurl, url) - def downloadLink(self, link): - if link and isinstance(link, basestring): - self.correctCaptcha() - self.download(link, disposition=False) #@TODO: Set `disposition=True` in 0.4.10 + return super(SimpleHoster, self).download(url, *args, **kwargs) def checkFile(self): + lastDownload = fs_encode(self.lastDownload) + if self.cTask and not self.lastDownload: self.invalidCaptcha() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not exists(fs_encode(self.lastDownload)): + elif not self.lastDownload or not os.path.exists(lastDownload): self.lastDownload = "" - self.fail(self.pyfile.error or _("No file downloaded")) + self.error(self.pyfile.error or _("No file downloaded")) else: - rules = {'empty file': re.compile(r'\A\Z'), - 'html file' : re.compile(r'\A\s*<!DOCTYPE html'), - 'html error': re.compile(r'\A\s*(<.+>)?\d{3}(\Z|\s+)')} + #@TODO: Move to Hoster in 0.4.10 + if os.stat(lastDownload).st_size < 1 or self.checkDownload({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}): + self.error(_("Empty file")) - if hasattr(self, 'ERROR_PATTERN'): - rules['error'] = re.compile(self.ERROR_PATTERN) + self.logDebug("Checking last downloaded file with built-in rules") + for r, p in self.FILE_ERRORS: + errmsg = self.checkDownload({r: re.compile(p)}) + if errmsg is not None: + errmsg = errmsg.strip().capitalize() + + try: + errmsg += " | " + self.lastCheck.group(1).strip() + except Exception: + pass - check = self.checkDownload(rules) - if check: #@TODO: Move to hoster in 0.4.10 - errmsg = check.strip().capitalize() - if self.lastCheck: - errmsg += " | " + self.lastCheck.group(0).strip() + self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry") + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) + else: + if self.CHECK_FILE: + self.logDebug("Checking last downloaded file with custom rules") + with open(lastDownload, "rb") as f: + self.html = f.read(50000) #@TODO: Recheck in 0.4.10 + self.checkErrors() - self.lastDownload = "" - self.retry(10, 60, errmsg) + self.logDebug("No file errors found") def checkErrors(self): @@ -471,30 +627,111 @@ class SimpleHoster(Hoster): self.logWarning(_("No html code to check")) return - if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): - self.fail(_("Link require a premium account to be handled")) + if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html): + self.fail(_("Connection from your current IP address is not allowed")) + + elif not self.premium: + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("File can be downloaded by premium users only")) + + elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html): + self.fail(_("File too large for free download")) + + elif hasattr(self, 'DL_LIMIT_PATTERN') and re.search(self.DL_LIMIT_PATTERN, self.html): + m = re.search(self.DL_LIMIT_PATTERN, self.html) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('da(il)?y|today', errmsg, re.I): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) - elif hasattr(self, 'ERROR_PATTERN'): + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html): + self.multiDL = True + + if hasattr(self, 'ERROR_PATTERN'): m = re.search(self.ERROR_PATTERN, self.html) if m: - errmsg = self.info['error'] = m.group(1) - self.error(errmsg) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('limit|wait|slot', errmsg, re.I): + if re.search("da(il)?y|today", errmsg): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) + + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + elif re.search('country|ip|region|nation', errmsg, re.I): + self.fail(_("Connection from your current IP address is not allowed")) + + elif re.search('captcha|code', errmsg, re.I): + self.invalidCaptcha() + self.retry(10, reason=_("Wrong captcha")) + + elif re.search('countdown|expired', errmsg, re.I): + self.retry(10, 60, _("Link expired")) + + elif re.search('maintenance|maintainance|temp', errmsg, re.I): + self.tempOffline() + + elif re.search('up to|size', errmsg, re.I): + self.fail(_("File too large for free download")) + + elif re.search('offline|delet|remov|not? (found|(longer)? available)', errmsg, re.I): + self.offline() + + elif re.search('filename', errmsg, re.I): + url_p = urlparse.urlparse(self.pyfile.url) + self.pyfile.url = "%s://%s/%s" % (url_p.scheme, url_p.netloc, url_p.path.strip('/').split('/')[0]) + self.retry(1, reason=_("Wrong url")) + + elif re.search('premium', errmsg, re.I): + self.fail(_("File can be downloaded by premium users only")) + + else: + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) elif hasattr(self, 'WAIT_PATTERN'): m = re.search(self.WAIT_PATTERN, self.html) if m: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)]) + try: + waitmsg = m.group(1).strip() + except Exception: + waitmsg = m.group(0).strip() + + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I)) self.wait(wait_time, wait_time > 300) - return self.info.pop('error', None) def checkStatus(self, getinfo=True): if not self.info or getinfo: - self.logDebug("File info (BEFORE): %s" % self.info) + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) try: status = self.info['status'] @@ -506,22 +743,22 @@ class SimpleHoster(Hoster): self.tempOffline() elif status is 8: - self.fail() + self.fail(self.info['error'] if 'error' in self.info else _("Failed")) finally: - self.logDebug("File status: %s" % statusMap[status], - "File info: %s" % self.info) + self.logDebug("File status: %s" % statusMap[status]) def checkNameSize(self, getinfo=True): if not self.info or getinfo: - self.logDebug("File info (BEFORE): %s" % self.info) + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) self.info.update(self.getInfo(self.pyfile.url, self.html)) - self.logDebug("File info (AFTER): %s" % self.info) + self.logDebug("Current file info: %s" % self.info) try: - url = self.info['url'] - name = self.info['name'] + url = self.info['url'].strip() + name = self.info['name'].strip() if name and name != url: self.pyfile.name = name @@ -537,7 +774,7 @@ class SimpleHoster(Hoster): pass self.logDebug("File name: %s" % self.pyfile.name, - "File size: %s" % self.pyfile.size if self.pyfile.size > 0 else "Unknown") + "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown") def checkInfo(self): @@ -562,7 +799,6 @@ class SimpleHoster(Hoster): if link: self.logInfo(_("Direct download link detected")) - self.link = link else: self.logDebug("Direct download link not found") @@ -576,16 +812,12 @@ class SimpleHoster(Hoster): if not hasattr(self, 'LINK_FREE_PATTERN'): self.logError(_("Free download not implemented")) - try: - m = re.search(self.LINK_FREE_PATTERN, self.html) - if m is None: - self.error(_("Free download link not found")) - + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def handlePremium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): @@ -593,16 +825,12 @@ class SimpleHoster(Hoster): self.logDebug("Handled as free download") self.handleFree(pyfile) - try: - m = re.search(self.LINK_PREMIUM_PATTERN, self.html) - if m is None: - self.error(_("Premium download link not found")) - + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): @@ -614,8 +842,7 @@ class SimpleHoster(Hoster): self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - self.setWait(wait_time, True) - self.wait() + self.wait(wait_time, True) self.retry(max_tries=max_tries, reason=_("Download limit reached")) @@ -640,6 +867,26 @@ class SimpleHoster(Hoster): #@TODO: Remove in 0.4.10 + def getConfig(self, option, default=''): + """getConfig with default value - sublass may not implements all config options""" + try: + return self.getConf(option) + + except KeyError: + return default + + + def retryFree(self): + if not self.premium: + return + self.premium = False + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retries = -1 + raise Retry(_("Fallback to free download")) + + + #@TODO: Remove in 0.4.10 def wait(self, seconds=0, reconnect=None): return _wait(self, seconds, reconnect) diff --git a/module/plugins/internal/SolveMedia.py b/module/plugins/internal/SolveMedia.py new file mode 100644 index 000000000..d600ef03c --- /dev/null +++ b/module/plugins/internal/SolveMedia.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- + +import re + +from module.plugins.internal.Plugin import Fail +from module.plugins.internal.Captcha import Captcha + + +class SolveMedia(Captcha): + __name__ = "SolveMedia" + __type__ = "captcha" + __version__ = "0.14" + + __description__ = """SolveMedia captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' + + + def detect_key(self, html=None): + html = html or self.retrieve_html() + + m = re.search(self.KEY_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.logDebug("Key: %s" % self.key) + return self.key + else: + self.logWarning("Key pattern not found") + return None + + + def challenge(self, key=None, html=None): + key = key or self.retrieve_key(html) + + html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", + get={'k': key}) + + for i in xrange(1, 11): + try: + magic = re.search(r'name="magic" value="(.+?)"', html).group(1) + + except AttributeError: + self.logWarning("Magic pattern not found") + magic = None + + try: + challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="(.+?)">', + html).group(1) + + except AttributeError: + self.fail(_("SolveMedia challenge pattern not found")) + + else: + self.logDebug("Challenge: %s" % challenge) + + try: + result = self.result("http://api.solvemedia.com/papi/media", challenge) + + except Fail, e: + self.logWarning(e) + self.plugin.invalidCaptcha() + result = None + + html = self.plugin.req.load("http://api.solvemedia.com/papi/verify.noscript", + post={'adcopy_response' : result, + 'k' : key, + 'l' : "en", + 't' : "img", + 's' : "standard", + 'magic' : magic, + 'adcopy_challenge': challenge, + 'ref' : self.plugin.pyfile.url}) + try: + redirect = re.search(r'URL=(.+?)">', html).group(1) + + except AttributeError: + self.fail(_("SolveMedia verify pattern not found")) + + else: + if "error" in html: + self.logWarning("Captcha code was invalid") + self.logDebug("Retry #%d" % i) + html = self.plugin.req.load(redirect) + else: + break + + else: + self.fail(_("SolveMedia max retries exceeded")) + + return result, challenge + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha(server, + get={'c': challenge}, + cookies=True, + imgtype="gif") + + self.logDebug("Result: %s" % result) + + return result diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 572fe95b9..5b9f2e1c3 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -2,105 +2,108 @@ import os import re +import subprocess from glob import glob -from os.path import basename, dirname, join from string import digits -from subprocess import Popen, PIPE from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError -from module.utils import save_join, decode +from module.utils import fs_decode, fs_encode, save_join def renice(pid, value): - if os.name != "nt" and value: + if value and os.name != "nt": try: - Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) - except: - print "Renice failed" + subprocess.Popen(["renice", str(value), str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1) + + except Exception: + pass class UnRar(Extractor): __name__ = "UnRar" - __version__ = "1.01" + __version__ = "1.20" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("Walter Purcaro", "vuolter@gmail.com"), + ("Immenz" , "immenz@gmx.net" )] CMD = "unrar" + VERSION = "" + EXTENSIONS = [".rar"] - EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"] + re_multipart = re.compile(r'\.(part|r)(\d+)(?:\.rar)?(\.rev|\.bad)?',re.I) - #@NOTE: there are some more uncovered rar formats - re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) - re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I) + re_filefixed = re.compile(r'Building (.+)') + re_filelist = re.compile(r'^(.)(\s*[\w\.\-]+)\s+(\d+\s+)+(?:\d+\%\s+)?[\d\-]{8}\s+[\d\:]{5}', re.M|re.I) - re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') re_wrongpwd = re.compile(r'password', re.I) - re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error|corrupt', re.I) + + re_version = re.compile(r'(?:UN)?RAR\s(\d+\.\d+)', re.I) @classmethod - def checkDeps(cls): + def isUsable(cls): if os.name == "nt": - cls.CMD = join(pypath, "UnRAR.exe") - p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() - else: try: - p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() + cls.CMD = os.path.join(pypath, "RAR.exe") + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + cls.__name__ = "RAR" + cls.REPAIR = True except OSError: - # fallback to rar - cls.CMD = "rar" - p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() + cls.CMD = os.path.join(pypath, "UnRAR.exe") + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + else: + try: + p = subprocess.Popen(["rar"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + cls.__name__ = "RAR" + cls.REPAIR = True + + except OSError: #: fallback to unrar + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + + m = cls.re_version.search(out) + cls.VERSION = m.group(1) if m else '(version unknown)' return True @classmethod - def isArchive(cls, file): - f = basename(file).lower() - return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS) + def isMultipart(cls, filename): + return True if cls.re_multipart.search(filename) else False - @classmethod - def getTargets(cls, files_ids): - targets = [] - - for file, id in files_ids: - if not cls.isArchive(file): - continue - - m = cls.re_rarpart.findall(file) - if m: - # only add first parts - if int(m[0][1]) == 1: - targets.append((file, id)) - else: - targets.append((file, id)) + def verify(self, password): + p = self.call_cmd("t", "-v", fs_encode(self.filename), password=password) + self._progress(p) + err = p.stderr.read().strip() - return targets + if self.re_wrongpwd.search(err): + raise PasswordError + if self.re_wrongcrc.search(err): + raise CRCError(err) - def check(self, out="", err=""): - if not out or not err: - return - if err.strip(): - if self.re_wrongpwd.search(err): - raise PasswordError + def check(self, password): + p = self.call_cmd("l", "-v", fs_encode(self.filename), password=password) + out, err = p.communicate() - elif self.re_wrongcrc.search(err): - raise CRCError + if self.re_wrongpwd.search(err): + raise PasswordError - else: #: raise error if anything is on stderr - raise ArchiveError(err.strip()) + if self.re_wrongcrc.search(err): + raise CRCError(err) # output only used to check if passworded files are present for attr in self.re_filelist.findall(out): @@ -108,114 +111,99 @@ class UnRar(Extractor): raise PasswordError - def verify(self): - p = self.call_cmd("l", "-v", self.file, password=self.password) - - self.check(*p.communicate()) - - if p and p.returncode: - raise ArchiveError("Process terminated") - - if not self.list(): - raise ArchiveError("Empty archive") - - - def isPassword(self, password): - if isinstance(password, basestring): - p = self.call_cmd("l", "-v", self.file, password=password) - out, err = p.communicate() - - if not self.re_wrongpwd.search(err): - return True - - return False - - def repair(self): - p = self.call_cmd("rc", self.file) - out, err = p.communicate() - - if p.returncode or err.strip(): - p = self.call_cmd("r", self.file) - out, err = p.communicate() - - if p.returncode or err.strip(): - return False - else: - self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0)) + p = self.call_cmd("rc", fs_encode(self.filename)) + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + if err or p.returncode: + return False return True - def extract(self, progress=lambda x: None): - self.verify() - - progress(0) - - command = "x" if self.fullpath else "e" - - p = self.call_cmd(command, self.file, self.out, password=self.password) - - renice(p.pid, self.renice) - - progressstring = "" + def _progress(self, process): + s = "" while True: - c = p.stdout.read(1) + c = process.stdout.read(1) # quit loop on eof if not c: break # reading a percentage sign -> set progress and restart - if c is '%': - progress(int(progressstring)) - progressstring = "" + if c == '%': + self.notifyProgress(int(s)) + s = "" # not reading a digit -> therefore restart elif c not in digits: - progressstring = "" + s = "" # add digit to progressstring else: - progressstring += c + s += c + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" - progress(100) + p = self.call_cmd(command, fs_encode(self.filename), self.out, password=password) - self.files = self.list() + renice(p.pid, self.renice) - # retrieve stderr - self.check(err=p.stderr.read()) + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) if p.returncode: - raise ArchiveError("Process terminated") + raise ArchiveError(_("Process return code: %d") % p.returncode) + + self.files = self.list(password) def getDeleteFiles(self): - if ".part" in basename(self.file): - return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + dir, name = os.path.split(self.filename) + + # actually extracted file + files = [self.filename] - # get files which matches .r* and filter unsuited files out - parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) + # eventually Multipart Files + files.extend(save_join(dir, os.path.basename(file)) for file in filter(self.isMultipart, os.listdir(dir)) + if re.sub(self.re_multipart,".rar",name) == re.sub(self.re_multipart,".rar",file)) - return filter(lambda x: self.re_rarfile.match(x), parts) + return files - def list(self): + def list(self, password=None): command = "vb" if self.fullpath else "lb" - p = self.call_cmd(command, "-v", self.file, password=self.password) + p = self.call_cmd(command, "-v", fs_encode(self.filename), password=password) out, err = p.communicate() - if err.strip(): - self.m.logError(err) - if "Cannot open" in err: - return list() + if "Cannot open" in err: + raise ArchiveError(_("Cannot open file")) - if p.returncode: - self.m.logError("Process terminated") - return list() + if err.strip(): #: only log error at this point + self.manager.logError(err.strip()) result = set() - - for f in decode(out).splitlines(): - f = f.strip() - result.add(save_join(self.out, f)) + if not self.fullpath and self.VERSION.startswith('5'): + # NOTE: Unrar 5 always list full path + for f in fs_decode(out).splitlines(): + f = save_join(self.out, os.path.basename(f.strip())) + if os.path.isfile(f): + result.add(save_join(self.out, os.path.basename(f))) + else: + for f in fs_decode(out).splitlines(): + f = f.strip() + result.add(save_join(self.out, f)) return list(result) @@ -228,11 +216,11 @@ class UnRar(Extractor): args.append("-o+") else: args.append("-o-") - if self.delete: + if self.delete != 'No': args.append("-or") for word in self.excludefiles: - args.append("-x%s" % word.strip()) + args.append("-x'%s'" % word.strip()) # assume yes on all queries args.append("-y") @@ -248,6 +236,8 @@ class UnRar(Extractor): # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) - self.m.logDebug(" ".join(call)) - return Popen(call, stdout=PIPE, stderr=PIPE) + self.manager.logDebug(" ".join(call)) + + p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + return p diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 5ec56cbdf..4c18a0e35 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -2,85 +2,71 @@ from __future__ import with_statement +import os import sys import zipfile from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from module.utils import fs_encode class UnZip(Extractor): __name__ = "UnZip" - __version__ = "1.01" + __version__ = "1.12" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - EXTENSIONS = ["zip", "zip64"] + VERSION ="(python %s.%s.%s)" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]) + EXTENSIONS = [".zip", ".zip64"] @classmethod - def checkDeps(cls): + def isUsable(cls): return sys.version_info[:2] >= (2, 6) - @classmethod - def isArchive(cls, file): - return zipfile.is_zipfile(file) + def list(self, password=None): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + return z.namelist() - def verify(self): - try: - with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: - z.setpassword(self.password) - badcrc = z.testzip() - - except (BadZipfile, LargeZipFile), e: - raise ArchiveError(e) + def check(self, password): + pass - except RuntimeError, e: - if 'encrypted' in e: - raise PasswordError - else: - raise ArchiveError(e) - else: - if badcrc: - raise CRCError + def verify(self): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + badfile = z.testzip() - if not self.list(): - raise ArchiveError("Empty archive") + if badfile: + raise CRCError(badfile) + else: + raise PasswordError - def list(self): + def extract(self, password=None): try: - with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: - z.setpassword(self.password) - return z.namelist() - except Exception: - return list() + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + badfile = z.testzip() - def extract(self, progress=lambda x: None): - try: - with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: - progress(0) - z.extractall(self.out, pwd=self.password) - progress(100) + if badfile: + raise CRCError(badfile) + else: + z.extractall(self.out) - except (BadZipfile, LargeZipFile), e: + except (zipfile.BadZipfile, zipfile.LargeZipFile), e: raise ArchiveError(e) except RuntimeError, e: - if e is "Bad password for file": + if "encrypted" in e: raise PasswordError else: raise ArchiveError(e) - - finally: - self.files = self.list() - - - def getDeleteFiles(self): - return [self.file] + else: + self.files = z.namelist() diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py index c350729ac..f230cedf3 100644 --- a/module/plugins/internal/XFSAccount.py +++ b/module/plugins/internal/XFSAccount.py @@ -1,29 +1,29 @@ # -*- coding: utf-8 -*- import re +import time +import urlparse -from time import gmtime, mktime, strptime -from urlparse import urljoin - -from module.plugins.Account import Account +from module.plugins.internal.Account import Account from module.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies class XFSAccount(Account): __name__ = "XFSAccount" __type__ = "account" - __version__ = "0.35" + __version__ = "0.38" __description__ = """XFileSharing account plugin""" __license__ = "GPLv3" - __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("zoidberg" , "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com" )] HOSTER_DOMAIN = None HOSTER_URL = None + LOGIN_URL = None - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + COOKIES = True PREMIUM_PATTERN = r'\(Premium only\)' @@ -46,9 +46,15 @@ class XFSAccount(Account): def init(self): if not self.HOSTER_DOMAIN: self.logError(_("Missing HOSTER_DOMAIN")) + self.COOKIES = False + + else: + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN - if not self.HOSTER_URL: - self.HOSTER_URL = "http://www.%s/" % (self.HOSTER_DOMAIN or "") + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + set_cookies(req.cj, self.COOKIES) def loadAccountInfo(self, user, req): @@ -57,6 +63,12 @@ class XFSAccount(Account): leechtraffic = None premium = None + if not self.HOSTER_URL: #@TODO: Remove in 0.4.10 + return {'validuntil' : validuntil, + 'trafficleft' : trafficleft, + 'leechtraffic': leechtraffic, + 'premium' : premium} + html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) premium = True if re.search(self.PREMIUM_PATTERN, html) else False @@ -67,7 +79,7 @@ class XFSAccount(Account): self.logDebug("Expire date: " + expiredate) try: - validuntil = mktime(strptime(expiredate, "%d %B %Y")) + validuntil = time.mktime(time.strptime(expiredate, "%d %B %Y")) except Exception, e: self.logError(e) @@ -75,7 +87,7 @@ class XFSAccount(Account): else: self.logDebug("Valid until: %s" % validuntil) - if validuntil > mktime(gmtime()): + if validuntil > time.mktime(time.gmtime()): premium = True trafficleft = -1 else: @@ -136,25 +148,34 @@ class XFSAccount(Account): else: self.logDebug("LEECH_TRAFFIC_PATTERN not found") - return {'validuntil': validuntil, 'trafficleft': trafficleft, 'leechtraffic': leechtraffic, 'premium': premium} + return {'validuntil' : validuntil, + 'trafficleft' : trafficleft, + 'leechtraffic': leechtraffic, + 'premium' : premium} def login(self, user, data, req): - if isinstance(self.COOKIES, list): - set_cookies(req.cj, self.COOKIES) + if not self.HOSTER_URL: #@TODO: Remove in 0.4.10 + raise Exception(_("Missing HOSTER_DOMAIN")) - url = urljoin(self.HOSTER_URL, "login.html") - html = req.load(url, decode=True) + if not self.LOGIN_URL: + self.LOGIN_URL = urlparse.urljoin(self.HOSTER_URL, "login.html") + html = req.load(self.LOGIN_URL, decode=True) action, inputs = parseHtmlForm('name="FL"', html) if not inputs: - inputs = {'op': "login", + inputs = {'op' : "login", 'redirect': self.HOSTER_URL} - inputs.update({'login': user, + inputs.update({'login' : user, 'password': data['password']}) - html = req.load(self.HOSTER_URL, post=inputs, decode=True) + if action: + url = urlparse.urljoin("http://", action) + else: + url = self.HOSTER_URL + + html = req.load(url, post=inputs, decode=True) if re.search(self.LOGIN_FAIL_PATTERN, html): self.wrongPassword() diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py index 3cb837aac..8b333b45c 100644 --- a/module/plugins/internal/XFSCrypter.py +++ b/module/plugins/internal/XFSCrypter.py @@ -6,7 +6,7 @@ from module.plugins.internal.SimpleCrypter import SimpleCrypter, create_getInfo class XFSCrypter(SimpleCrypter): __name__ = "XFSCrypter" __type__ = "crypter" - __version__ = "0.05" + __version__ = "0.09" __pattern__ = r'^unmatchable$' @@ -19,10 +19,27 @@ class XFSCrypter(SimpleCrypter): URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] - - LINK_PATTERN = r'<(?:td|TD).*?>\s*<a href="(.+?)".*?>.+?(?:</a>)?\s*</(?:td|TD)>' - NAME_PATTERN = r'<[tT]itle>.*?\: (?P<N>.+) folder</[tT]itle>' + NAME_PATTERN = r'<[Tt]itle>.*?\: (?P<N>.+) folder</[Tt]itle>' + LINK_PATTERN = r'<(?:td|TD).*?>\s*(?:<.+>\s*)?<a href="(.+?)".*?>.+?(?:</a>)?\s*(?:<.+>\s*)?</(?:td|TD)>' OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' + + + def prepare(self): + if not self.HOSTER_DOMAIN: + if self.account: + account = self.account + else: + account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") + account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) + + if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: + self.HOSTER_DOMAIN = account.HOSTER_DOMAIN + else: + self.fail(_("Missing HOSTER_DOMAIN")) + + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + + return super(XFSCrypter, self).prepare() diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index b32f5978f..0591f1baf 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -1,13 +1,12 @@ # -*- coding: utf-8 -*- +import pycurl +import random import re +import urlparse -from random import random -from time import sleep - -from pycurl import FOLLOWLOCATION, LOW_SPEED_TIME - -from module.plugins.internal.CaptchaService import ReCaptcha, SolveMedia +from module.plugins.internal.ReCaptcha import ReCaptcha +from module.plugins.internal.SolveMedia import SolveMedia from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, secondsToMidnight from module.utils import html_unescape @@ -15,36 +14,33 @@ from module.utils import html_unescape class XFSHoster(SimpleHoster): __name__ = "XFSHoster" __type__ = "hoster" - __version__ = "0.35" + __version__ = "0.53" __pattern__ = r'^unmatchable$' __description__ = """XFileSharing hoster plugin""" __license__ = "GPLv3" - __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), - ("stickell", "l.stickell@yahoo.it"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("zoidberg" , "zoidberg@mujmail.cz"), + ("stickell" , "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com" )] HOSTER_DOMAIN = None - TEXT_ENCODING = False - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] - DIRECT_LINK = None - MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... NAME_PATTERN = r'(Filename[ ]*:[ ]*</b>(</td><td nowrap>)?|name="fname"[ ]+value="|<[\w^_]+ class="(file)?name">)\s*(?P<N>.+?)(\s*<|")' SIZE_PATTERN = r'(Size[ ]*:[ ]*</b>(</td><td>)?|File:.*>|</font>\s*\(|<[\w^_]+ class="size">)\s*(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' - OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed|no longer available)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' - WAIT_PATTERN = r'<span id="countdown_str">.*?>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' + WAIT_PATTERN = r'<span id="countdown_str".*>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' PREMIUM_ONLY_PATTERN = r'>This file is available for Premium Users only' + HAPPY_HOUR_PATTERN = r'>[Hh]appy hour' ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' LINK_LEECH_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' - LINK_PATTERN = None #: final download url pattern CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' CAPTCHA_BLOCK_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>' @@ -56,48 +52,39 @@ class XFSHoster(SimpleHoster): def setup(self): - self.chunkLimit = 1 + self.chunkLimit = -1 if self.premium else 1 self.resumeDownload = self.multiDL = self.premium def prepare(self): """ Initialize important variables """ if not self.HOSTER_DOMAIN: - self.fail(_("Missing HOSTER_DOMAIN")) + if self.account: + account = self.account + else: + account = self.pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + + if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: + self.HOSTER_DOMAIN = account.HOSTER_DOMAIN + else: + self.fail(_("Missing HOSTER_DOMAIN")) + + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) if not self.LINK_PATTERN: - pattern = r'(https?://(www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' + pattern = r'(?:file: "(.+?)"|(https?://(?:www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<])' self.LINK_PATTERN = pattern % self.HOSTER_DOMAIN.replace('.', '\.') - self.captcha = None - self.errmsg = None - super(XFSHoster, self).prepare() if self.DIRECT_LINK is None: - self.directDL = bool(self.premium) - - - def downloadLink(self, link): - if link and isinstance(link, basestring): - if self.captcha: - self.correctCaptcha() - - self.download(link, ref=True, cookies=True, disposition=False) #@TODO: Set `disposition=True` in 0.4.10 - - elif self.errmsg: - if 'captcha' in self.errmsg: - self.fail(_("No valid captcha code entered")) - else: - self.fail(self.errmsg) - - else: - self.fail(_("Download link not found")) + self.directDL = self.premium def handleFree(self, pyfile): for i in xrange(1, 6): - self.logDebug("Getting download link: #%d" % i) + self.logDebug("Getting download link #%d" % i) self.checkErrors() @@ -107,11 +94,11 @@ class XFSHoster(SimpleHoster): data = self.getPostParameters() - self.req.http.c.setopt(FOLLOWLOCATION, 0) + self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.html = self.load(pyfile.url, post=data, ref=True, decode=True) + self.html = self.load(pyfile.url, post=data, decode=True) - self.req.http.c.setopt(FOLLOWLOCATION, 1) + self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 1) m = re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I) if m and not "op=" in m.group(1): @@ -124,9 +111,7 @@ class XFSHoster(SimpleHoster): self.logError(data['op'] if 'op' in data else _("UNKNOWN")) return "" - self.errmsg = None - - self.link = m.group(1).strip() #@TODO: Remove .strip() in 0.4.10 + self.link = m.group(1).strip() #@TODO: Remove `.strip()` in 0.4.10 def handlePremium(self, pyfile): @@ -142,7 +127,7 @@ class XFSHoster(SimpleHoster): action, inputs = self.parseHtmlForm() - upload_id = "%012d" % int(random() * 10 ** 12) + upload_id = "%012d" % int(random.random() * 10 ** 12) action += upload_id + "&js_on=1&utype=prem&upload_type=url" inputs['tos'] = '1' @@ -159,10 +144,7 @@ class XFSHoster(SimpleHoster): action, inputs = self.parseHtmlForm('F1') if not inputs: - if self.errmsg: - self.retry(reason=self.errmsg) - else: - self.error(_("TEXTAREA F1 not found")) + self.retry(reason=self.info['error'] if 'error' in self.info else _("TEXTAREA F1 not found")) self.logDebug(inputs) @@ -189,61 +171,6 @@ class XFSHoster(SimpleHoster): if 'location' in header: #: Direct download link self.link = header['location'] - else: - self.fail(_("Download link not found")) - - - def checkErrors(self): - m = re.search(self.ERROR_PATTERN, self.html) - if m is None: - self.errmsg = None - else: - self.errmsg = m.group(1).strip() - - self.logWarning(re.sub(r"<.*?>", " ", self.errmsg)) - - if 'wait' in self.errmsg: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)]) - self.wait(wait_time, True) - - elif 'country' in self.errmsg: - self.fail(_("Downloads are disabled for your country")) - - elif 'captcha' in self.errmsg: - self.invalidCaptcha() - - elif 'premium' in self.errmsg and 'require' in self.errmsg: - self.fail(_("File can be downloaded by premium users only")) - - elif 'limit' in self.errmsg: - if 'days' in self.errmsg: - delay = secondsToMidnight(gmt=2) - retries = 3 - else: - delay = 1 * 60 * 60 - retries = 24 - - self.wantReconnect = True - self.retry(retries, delay, _("Download limit exceeded")) - - elif 'countdown' in self.errmsg or 'Expired' in self.errmsg: - self.retry(reason=_("Link expired")) - - elif 'maintenance' in self.errmsg or 'maintainance' in self.errmsg: - self.tempOffline() - - elif 'up to' in self.errmsg: - self.fail(_("File too large for free download")) - - else: - self.wantReconnect = True - self.retry(wait_time=60, reason=self.errmsg) - - if self.errmsg: - self.info['error'] = self.errmsg - else: - self.info.pop('error', None) def getPostParameters(self): @@ -255,10 +182,7 @@ class XFSHoster(SimpleHoster): if not inputs: action, inputs = self.parseHtmlForm('F1') if not inputs: - if self.errmsg: - self.retry(reason=self.errmsg) - else: - self.error(_("TEXTAREA F1 not found")) + self.retry(reason=self.info['error'] if 'error' in self.info else _("TEXTAREA F1 not found")) self.logDebug(inputs) @@ -276,8 +200,7 @@ class XFSHoster(SimpleHoster): wait_time = int(m.group(1)) self.setWait(wait_time, False) - self.captcha = self.handleCaptcha(inputs) - + self.handleCaptcha(inputs) self.wait() else: inputs['referer'] = self.pyfile.url @@ -297,39 +220,43 @@ class XFSHoster(SimpleHoster): if m: captcha_url = m.group(1) inputs['code'] = self.decryptCaptcha(captcha_url) - return 1 + return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) if m: captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) + self.logDebug(captcha_div) - inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + + inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) + self.logDebug("Captcha code: %s" % inputs['code'], numerals) - return 2 + return recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) - except: + + except Exception: captcha_key = recaptcha.detect_key() + else: self.logDebug("ReCaptcha key: %s" % captcha_key) if captcha_key: - inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) - return 3 + inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) + return solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) - except: + + except Exception: captcha_key = solvemedia.detect_key() + else: self.logDebug("SolveMedia key: %s" % captcha_key) if captcha_key: - inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) - return 4 - - return 0 + inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key) |