diff options
Diffstat (limited to 'module/plugins/internal')
29 files changed, 3115 insertions, 1540 deletions
diff --git a/module/plugins/internal/Account.py b/module/plugins/internal/Account.py new file mode 100644 index 000000000..2713e8da4 --- /dev/null +++ b/module/plugins/internal/Account.py @@ -0,0 +1,397 @@ +# -*- coding: utf-8 -*- + +import copy +import random +import time +import threading +import traceback + +from module.plugins.internal.Plugin import Plugin +from module.utils import compare_time, lock, parseFileSize as parse_size + + +class Account(Plugin): + __name__ = "Account" + __type__ = "account" + __version__ = "0.17" + __status__ = "testing" + + __description__ = """Base account plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + LOGIN_TIMEOUT = 10 * 60 #: After that time (in minutes) pyload will relogin the account + INFO_THRESHOLD = 30 * 60 #: After that time (in minutes) account data will be reloaded + + + def __init__(self, manager, accounts): + self._init(manager.core) + + self.lock = threading.RLock() + self.accounts = accounts #@TODO: Remove in 0.4.10 + + self.init() + self.init_accounts(accounts) + + + def init(self): + """ + Initialize additional data structures + """ + pass + + + def login(self, user, password, data, req): + """ + Login into account, the cookies will be saved so user can be recognized + """ + pass + + + @lock + def _login(self, user): + try: + info = self.info[user] + info['login']['timestamp'] = time.time() #: Set timestamp for login + + self.req = self.get_request(user) + self.login(user, info['login']['password'], info['data'], self.req) + + except Exception, e: + self.log_warning(_("Could not login user `%s`") % user, e) + res = info['login']['valid'] = False + self.accounts[user]['valid'] = False #@TODO: Remove in 0.4.10 + + if self.pyload.debug: + traceback.print_exc() + + else: + res = info['login']['valid'] = True + self.accounts[user]['valid'] = True #@TODO: Remove in 0.4.10 + + finally: + self.clean() + return res + + + def relogin(self, user): + self.log_info(_("Relogin user `%s`...") % user) + + req = self.get_request(user) + if req: + req.clearCookies() + self.clean() + + return self._login(user) + + + #@TODO: Rewrite in 0.4.10 + def init_accounts(self, accounts): + for user, data in accounts.items(): + self.add(user, data['password'], data['options']) + + + @lock + def add(self, user, password=None, options={}): + if user not in self.info: + self.info[user] = {'login': {'valid' : None, + 'password' : password or "", + 'timestamp': 0}, + 'data' : {'options' : options, + 'premium' : None, + 'validuntil' : None, + 'trafficleft': None, + 'maxtraffic' : None}} + + #@TODO: Remove in 0.4.10 + self.accounts[user] = self.info[user]['data'] + self.accounts[user].update({'login' : user, + 'type' : self.__name__, + 'valid' : self.info[user]['login']['valid'], + 'password': self.info[user]['login']['password']}) + + self.log_info(_("Login user `%s`...") % user) + self._login(user) + return True + + else: + self.log_error(_("Error adding user `%s`") % user, _("User already exists")) + + + @lock + def update(self, user, password=None, options={}): + """ + Updates account and return true if anything changed + """ + if not (password or options): + return + + if user not in self.info: + return self.add(user, password, options) + + else: + if password: + self.info[user]['login']['password'] = password + self.accounts[user]['password'] = password #@TODO: Remove in 0.4.10 + self.relogin(user) + + if options: + before = self.info[user]['data']['options'] + self.info[user]['data']['options'].update(options) + return self.info[user]['data']['options'] != before + + return True + + + #: Deprecated method, use `update` instead (Remove in 0.4.10) + def updateAccounts(self, *args, **kwargs): + return self.update(*args, **kwargs) + + + def remove(self, user=None): # -> def remove + if not user: + self.info.clear() + self.accounts.clear() #@TODO: Remove in 0.4.10 + + elif user in self.info: + self.info.pop(user, None) + self.accounts.pop(user, None) #@TODO: Remove in 0.4.10 + + + #: Deprecated method, use `remove` instead (Remove in 0.4.10) + def removeAccount(self, *args, **kwargs): + return self.remove(*args, **kwargs) + + + #@NOTE: Remove in 0.4.10? + def get_data(self, user, reload=False): + if not user: + return + + info = self.get_info(user, reload) + if info and 'data' in info: + return info['data'] + + + #: Deprecated method, use `get_data` instead (Remove in 0.4.10) + def getAccountData(self, *args, **kwargs): + if 'force' in kwargs: + kwargs['reload'] = kwargs['force'] + kwargs.pop('force', None) + + data = self.get_data(*args, **kwargs) or {} + if 'options' not in data: + data['options'] = {'limitdl': ['0']} + + return data + + + def get_info(self, user, reload=False): + """ + Retrieve account infos for an user, do **not** overwrite this method!\\ + just use it to retrieve infos in hoster plugins. see `parse_info` + + :param user: username + :param reload: reloads cached account information + :return: dictionary with information + """ + if user not in self.info: + self.log_error(_("User `%s` not found while retrieving account info") % user) + return + + elif reload: + self.log_info(_("Parsing account info for user `%s`...") % user) + info = self._parse_info(user) + + safe_info = copy.deepcopy(info) + safe_info['login']['password'] = "**********" + safe_info['data']['password'] = "**********" #@TODO: Remove in 0.4.10 + self.log_debug("Account info for user `%s`: %s" % (user, safe_info)) + + elif self.INFO_THRESHOLD > 0 and self.info[user]['login']['timestamp'] + self.INFO_THRESHOLD < time.time(): + self.log_debug("Reached data timeout for %s" % user) + info = self.get_info(user, True) + + else: + info = self.info[user] + + return info + + + def is_premium(self, user): + if not user: + return False + + info = self.get_info(user) + return info['data']['premium'] + + + def _parse_info(self, user): + info = self.info[user] + + if not info['login']['valid']: + return info + + try: + self.req = self.get_request(user) + extra_info = self.parse_info(user, info['login']['password'], info, self.req) + + if extra_info and isinstance(extra_info, dict): + info['data'].update(extra_info) + + except (Fail, Exception), e: + self.log_warning(_("Error loading info for user `%s`") % user, e) + + if self.pyload.debug: + traceback.print_exc() + + finally: + self.clean() + + self.info[user].update(info) + return info + + + def parse_info(self, user, password, info, req): + """ + This should be overwritten in account plugin + and retrieving account information for user + + :param user: + :param req: `Request` instance + :return: + """ + pass + + + #: Remove in 0.4.10 + def getAllAccounts(self, *args, **kwargs): + return [self.getAccountData(user, *args, **kwargs) for user, info in self.info.items()] + + + def login_fail(self, reason=_("Login handshake has failed")): + return self.fail(reason) + + + def get_request(self, user=None): + if not user: + user, info = self.select() + + return self.pyload.requestFactory.getRequest(self.__name__, user) + + + def get_cookies(self, user=None): + if not user: + user, info = self.select() + + return self.pyload.requestFactory.getCookieJar(self.__name__, user) + + + def select(self): + """ + Returns a valid account name and info + """ + free_accounts = {} + premium_accounts = {} + + for user, info in self.info.items(): + if not info['login']['valid']: + continue + + data = info['data'] + + if "time" in data['options'] and data['options']['time']: + time_data = "" + try: + time_data = data['options']['time'][0] + start, end = time_data.split("-") + + if not compare_time(start.split(":"), end.split(":")): + continue + + except Exception: + self.log_warning(_("Wrong time format `%s` for account `%s`, use 1:22-3:44") % (user, time_data)) + + if data['trafficleft'] == 0: + continue + + if time.time() > data['validuntil'] > 0: + continue + + if data['premium']: + premium_accounts[user] = info + + else: + free_accounts[user] = info + + account_list = (premium_accounts or free_accounts).items() + + if not account_list: + return None, None + + validuntil_list = [(user, info) for user, info in account_list if info['data']['validuntil']] + + if not validuntil_list: + return random.choice(account_list) #@TODO: Random account?! Recheck in 0.4.10 + + return sorted(validuntil_list, + key=lambda a: a[1]['data']['validuntil'], + reverse=True)[0] + + + def parse_traffic(self, value, unit=None): #: Return kilobytes + if not unit and not isinstance(value, basestring): + unit = "KB" + + return parse_size(value, unit) + + + def empty(self, user): + if user not in self.info: + return + + self.log_warning(_("Account `%s` has not enough traffic") % user, _("Checking again in 30 minutes")) + + self.info[user]['data']['trafficleft'] = 0 + self.schedule_refresh(user, 30 * 60) + + + def expired(self, user): + if user not in self.info: + return + + self.log_warning(_("Account `%s` is expired") % user, _("Checking again in 60 minutes")) + + self.info[user]['data']['validuntil'] = time.time() - 1 + self.schedule_refresh(user, 60 * 60) + + + def schedule_refresh(self, user, time=0): + """ + Add task to refresh account info to sheduler + """ + self.log_debug("Scheduled refresh for user `%s` in %s seconds" % (user, time)) + self.pyload.scheduler.addJob(time, self.get_info, [user, True]) + + + #: Deprecated method, use `schedule_refresh` instead (Remove in 0.4.10) + def scheduleRefresh(self, *args, **kwargs): + if 'force' in kwargs: + kwargs.pop('force', None) #@TODO: Recheck in 0.4.10 + return self.schedule_refresh(*args, **kwargs) + + + @lock + def is_logged(self, user, relogin=False): + """ + Checks if user is still logged in + """ + if user in self.info: + if self.LOGIN_TIMEOUT > 0 and self.info[user]['login']['timestamp'] + self.LOGIN_TIMEOUT < time.time(): + self.log_debug("Reached login timeout for %s" % user) + return self.relogin(user) if relogin else False + else: + return True + else: + return False diff --git a/module/plugins/internal/AdYouLike.py b/module/plugins/internal/AdYouLike.py deleted file mode 100644 index a9c194dda..000000000 --- a/module/plugins/internal/AdYouLike.py +++ /dev/null @@ -1,91 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from module.common.json_layer import json_loads -from module.plugins.internal.Captcha import Captcha - - -class AdYouLike(Captcha): - __name__ = "AdYouLike" - __type__ = "captcha" - __version__ = "0.06" - - __description__ = """AdYouLike captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - - - AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' - CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' - - - def detect_key(self, html=None): - html = html or self.retrieve_html() - - m = re.search(self.AYL_PATTERN, html) - n = re.search(self.CALLBACK_PATTERN, html) - if m and n: - self.key = (m.group(1).strip(), n.group(1).strip()) - self.logDebug("Ayl: %s | Callback: %s" % self.key) - return self.key #: key is the tuple(ayl, callback) - else: - self.logWarning("Ayl or callback pattern not found") - return None - - - def challenge(self, key=None, html=None): - ayl, callback = key or self.retrieve_key(html) - - # {"adyoulike":{"key":"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, - # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} - ayl = json_loads(ayl) - - html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", - get={'key' : ayl['adyoulike']['key'], - 'env' : ayl['all']['env'], - 'callback': callback}) - try: - challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) - - except AttributeError: - self.fail(_("AdYouLike challenge pattern not found")) - - self.logDebug("Challenge: %s" % challenge) - - return self.result(ayl, challenge), challenge - - - def result(self, server, challenge): - # Adyoulike.g._jsonp_5579316662423138 - # ({"translations":{"fr":{"instructions_visual":"Recopiez « Soonnight » ci-dessous :"}}, - # "site_under":true,"clickable":true,"pixels":{"VIDEO_050":[],"DISPLAY":[],"VIDEO_000":[],"VIDEO_100":[], - # "VIDEO_025":[],"VIDEO_075":[]},"medium_type":"image/adyoulike", - # "iframes":{"big":"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" - # height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},"shares":{},"id":256, - # "token":"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1","formats":{"small":{"y":300,"x":0,"w":300,"h":60}, - # "big":{"y":0,"x":0,"w":300,"h":250},"hover":{"y":440,"x":0,"w":300,"h":60}}, - # "tid":"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) - - if isinstance(server, basestring): - server = json_loads(server) - - if isinstance(challenge, basestring): - challenge = json_loads(challenge) - - try: - instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] - result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() - - except AttributeError: - self.fail(_("AdYouLike result not found")) - - result = {'_ayl_captcha_engine' : "adyoulike", - '_ayl_env' : server['all']['env'], - '_ayl_tid' : challenge['tid'], - '_ayl_token_challenge': challenge['token'], - '_ayl_response' : response} - - self.logDebug("Result: %s" % result) - - return result diff --git a/module/plugins/internal/Addon.py b/module/plugins/internal/Addon.py new file mode 100644 index 000000000..45ca98eac --- /dev/null +++ b/module/plugins/internal/Addon.py @@ -0,0 +1,245 @@ +# -*- coding: utf-8 -*- + +import traceback + +from module.plugins.internal.Plugin import Plugin + + +class Expose(object): + """ + Used for decoration to declare rpc services + """ + def __new__(cls, f, *args, **kwargs): + hookManager.addRPC(f.__module__, f.func_name, f.func_doc) + return f + + +def threaded(fn): + + def run(*args, **kwargs): + hookManager.startThread(fn, *args, **kwargs) + + return run + + +class Addon(Plugin): + __name__ = "Addon" + __type__ = "hook" #@TODO: Change to `addon` in 0.4.10 + __version__ = "0.04" + __status__ = "testing" + + __config__ = [] #: [("name", "type", "desc", "default")] + __threaded__ = [] #@TODO: Remove in 0.4.10 + + __description__ = """Base addon plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, core, manager): + self._init(core) + + #: `HookManager` + self.manager = manager + + #: Automatically register event listeners for functions, attribute will be deleted dont use it yourself + self.event_map = {} + + #: Deprecated alternative to event_map + #: List of events the plugin can handle, name the functions exactly like eventname. + self.event_list = [] #@NOTE: dont make duplicate entries in event_map + + #: Callback of periodical job task, used by HookManager + self.cb = None + self.interval = 60 + + self.init() + self.init_events() + + + def init_events(self): + if self.event_map: + for event, funcs in self.event_map.items(): + if type(funcs) in (list, tuple): + for f in funcs: + self.manager.addEvent(event, getattr(self, f)) + else: + self.manager.addEvent(event, getattr(self, funcs)) + + #: Delete for various reasons + self.event_map = None + + if self.event_list: + self.log_debug("Deprecated method `event_list`, use `event_map` instead") + + for f in self.event_list: + self.manager.addEvent(f, getattr(self, f)) + + self.event_list = None + + + def init_periodical(self, delay=0, threaded=False): + self.cb = self.pyload.scheduler.addJob(max(0, delay), self._periodical, [threaded], threaded=threaded) + + + #: Deprecated method, use `init_periodical` instead (Remove in 0.4.10) + def initPeriodical(self, *args, **kwargs): + return self.init_periodical(*args, **kwargs) + + + def _periodical(self, threaded): + if self.interval < 0: + self.cb = None + return + + try: + self.periodical() + + except Exception, e: + self.log_error(_("Error executing periodical task: %s") % e) + if self.pyload.debug: + traceback.print_exc() + + self.cb = self.pyload.scheduler.addJob(self.interval, self._periodical, [threaded], threaded=threaded) + + + def periodical(self): + pass + + + def __repr__(self): + return "<Addon %s>" % self.__name__ + + + def is_activated(self): + """ + Checks if addon is activated + """ + return self.get_config("activated") + + + #: Deprecated method, use `is_activated` instead (Remove in 0.4.10) + def isActivated(self, *args, **kwargs): + return self.is_activated(*args, **kwargs) + + + def deactivate(self): + """ + Called when addon was deactivated + """ + pass + + + #: Deprecated method, use `deactivate` instead (Remove in 0.4.10) + def unload(self, *args, **kwargs): + return self.deactivate(*args, **kwargs) + + + def activate(self): + """ + Called when addon was activated + """ + pass + + + #: Deprecated method, use `activate` instead (Remove in 0.4.10) + def coreReady(self, *args, **kwargs): + return self.activate(*args, **kwargs) + + + def exit(self): + """ + Called by core.shutdown just before pyLoad exit + """ + pass + + + #: Deprecated method, use `exit` instead (Remove in 0.4.10) + def coreExiting(self, *args, **kwargs): + self.unload(*args, **kwargs) #@TODO: Fix in 0.4.10 + return self.exit(*args, **kwargs) + + + def download_preparing(self, pyfile): + pass + + + #: Deprecated method, use `download_preparing` instead (Remove in 0.4.10) + def downloadPreparing(self, pyfile): + if pyfile.plugin.req is not None: #@TODO: Remove in 0.4.10 + return self.download_preparing(pyfile) + + + def download_finished(self, pyfile): + pass + + + #: Deprecated method, use `download_finished` instead (Remove in 0.4.10) + def downloadFinished(self, *args, **kwargs): + return self.download_finished(*args, **kwargs) + + + def download_failed(self, pyfile): + pass + + + #: Deprecated method, use `download_failed` instead (Remove in 0.4.10) + def downloadFailed(self, *args, **kwargs): + return self.download_failed(*args, **kwargs) + + + def package_finished(self, pypack): + pass + + + #: Deprecated method, use `package_finished` instead (Remove in 0.4.10) + def packageFinished(self, *args, **kwargs): + return self.package_finished(*args, **kwargs) + + + def before_reconnect(self, ip): + pass + + + #: Deprecated method, use `before_reconnect` instead (Remove in 0.4.10) + def beforeReconnecting(self, *args, **kwargs): + return self.before_reconnect(*args, **kwargs) + + + def after_reconnect(self, ip, oldip): + pass + + + #: Deprecated method, use `after_reconnect` instead (Remove in 0.4.10) + def afterReconnecting(self, ip): + return self.after_reconnect(ip, None) + + + def captcha_task(self, task): + """ + New captcha task for the plugin, it MUST set the handler and timeout or will be ignored + """ + pass + + + #: Deprecated method, use `captcha_task` instead (Remove in 0.4.10) + def newCaptchaTask(self, *args, **kwargs): + return self.captcha_task(*args, **kwargs) + + + def captcha_correct(self, task): + pass + + + #: Deprecated method, use `captcha_correct` instead (Remove in 0.4.10) + def captchaCorrect(self, *args, **kwargs): + return self.captcha_correct(*args, **kwargs) + + + def captcha_invalid(self, task): + pass + + + #: Deprecated method, use `captcha_invalid` instead (Remove in 0.4.10) + def captchaInvalid(self, *args, **kwargs): + return self.captcha_invalid(*args, **kwargs) diff --git a/module/plugins/internal/AdsCaptcha.py b/module/plugins/internal/AdsCaptcha.py deleted file mode 100644 index 9cab99151..000000000 --- a/module/plugins/internal/AdsCaptcha.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: utf-8 -*- - -import random -import re - -from module.plugins.internal.Captcha import Captcha - - -class AdsCaptcha(Captcha): - __name__ = "AdsCaptcha" - __type__ = "captcha" - __version__ = "0.09" - - __description__ = """AdsCaptcha captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?CaptchaId=(\d+)' - PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)' - - - def detect_key(self, html=None): - html = html or self.retrieve_html() - - m = re.search(self.PUBLICKEY_PATTERN, html) - n = re.search(self.CAPTCHAID_PATTERN, html) - if m and n: - self.key = (m.group(1).strip(), n.group(1).strip()) #: key is the tuple(PublicKey, CaptchaId) - self.logDebug("Key: %s | ID: %s" % self.key) - return self.key - else: - self.logWarning("Key or id pattern not found") - return None - - - def challenge(self, key=None, html=None): - PublicKey, CaptchaId = key or self.retrieve_key(html) - - html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", - get={'CaptchaId': CaptchaId, - 'PublicKey': PublicKey}) - try: - challenge = re.search("challenge: '(.+?)',", html).group(1) - server = re.search("server: '(.+?)',", html).group(1) - - except AttributeError: - self.fail(_("AdsCaptcha challenge pattern not found")) - - self.logDebug("Challenge: %s" % challenge) - - return self.result(server, challenge), challenge - - - def result(self, server, challenge): - result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, - get={'cid': challenge, 'dummy': random.random()}, - cookies=True, - imgtype="jpg") - - self.logDebug("Result: %s" % result) - - return result diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index b4af46493..c08050ee8 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -1,56 +1,141 @@ # -*- coding: utf-8 -*- -from module.plugins.Plugin import Base +from __future__ import with_statement +import os +import time +import traceback -#@TODO: Extend (new) Plugin class; remove all `html` args -class Captcha(Base): +from module.plugins.internal.Plugin import Plugin + + +class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" - __version__ = "0.29" + __version__ = "0.42" + __status__ = "testing" - __description__ = """Base captcha service plugin""" + __description__ = """Base anti-captcha plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - key = None #: last key detected + def __init__(self, plugin): #@TODO: Pass pyfile instead plugin, so store plugin's html in its associated pyfile as data + self._init(plugin.pyload) - def __init__(self, plugin): self.plugin = plugin - super(Captcha, self).__init__(plugin.core) + self.task = None #: captchaManager task + + self.init() + + + def init(self): + """ + Initialize additional data structures + """ + pass + + + def _log(self, level, plugintype, pluginname, messages): + return self.plugin._log(level, + plugintype, + self.plugin.__name__, + (self.__name__,) + messages) + + + def recognize(self, image): + """ + Extend to build your custom anti-captcha ocr + """ + pass + + + def decrypt(self, url, get={}, post={}, ref=False, cookies=False, decode=False, + input_type='jpg', output_type='textual', ocr=True, timeout=120): + img = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode) + return self._decrypt(img, input_type, output_type, ocr, timeout) + + + #@TODO: Definitely choose a better name for this method! + def _decrypt(self, raw, input_type='jpg', output_type='textual', ocr=False, timeout=120): + """ + Loads a captcha and decrypts it with ocr, plugin, user input + + :param raw: image raw data + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param input_type: Type of the Image + :param output_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + :param ocr: if True, ocr is not used + + :return: result of decrypting + """ + result = "" + time_ref = ("%.2f" % time.time())[-6:].replace(".", "") + + with open(os.path.join("tmp", "captcha_image_%s_%s.%s" % (self.plugin.__name__, time_ref, input_type)), "wb") as tmp_img: + tmp_img.write(raw) + + if ocr: + if isinstance(ocr, basestring): + OCR = self.pyload.pluginManager.loadClass("captcha", ocr) #: Rename `captcha` to `ocr` in 0.4.10 + result = OCR(self.plugin).recognize(tmp_img.name) + else: + result = self.recognize(tmp_img.name) + + if not result: + captchaManager = self.pyload.captchaManager + + try: + self.task = captchaManager.newTask(raw, input_type, tmp_img.name, output_type) + + captchaManager.handleCaptcha(self.task) + + self.task.setWaiting(max(timeout, 50)) #@TODO: Move to `CaptchaManager` in 0.4.10 + while self.task.isWaiting(): + if self.plugin.pyfile.abort: + self.plugin.abort() + time.sleep(1) + + finally: + captchaManager.removeTask(self.task) + + if self.task.error: + self.fail(self.task.error) + elif not self.task.result: + self.invalid() + self.plugin.retry(reason=_("No captcha result obtained in appropiate time")) - #@TODO: Recheck in 0.4.10 - def fail(self, reason): - self.plugin.fail(reason) - raise AttributeError(reason) + result = self.task.result + if not self.pyload.debug: + try: + os.remove(tmp_img.name) - #@TODO: Recheck in 0.4.10 - def retrieve_key(self, html): - if self.detect_key(html): - return self.key - else: - self.fail(_("%s key not found") % self.__name__) + except OSError, e: + self.log_warning(_("Error removing: %s") % tmp_img.name, e) + traceback.print_exc() + self.log_info(_("Captcha result: ") + result) #@TODO: Remove from here? - #@TODO: Recheck in 0.4.10 - def retrieve_html(self): - if hasattr(self.plugin, "html") and self.plugin.html: - return self.plugin.html - else: - self.fail(_("%s html not found") % self.__name__) + return result - def detect_key(self, html=None): - raise NotImplementedError + def invalid(self): + if not self.task: + return + self.log_error(_("Invalid captcha")) + self.task.invalid() - def challenge(self, key=None, html=None): - raise NotImplementedError + def correct(self): + if not self.task: + return - def result(self, server, challenge): - raise NotImplementedError + self.log_info(_("Correct captcha")) + self.task.correct() diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py new file mode 100644 index 000000000..20dc60427 --- /dev/null +++ b/module/plugins/internal/CaptchaService.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.Captcha import Captcha + + +class CaptchaService(Captcha): + __name__ = "CaptchaService" + __type__ = "captcha" + __version__ = "0.32" + __status__ = "testing" + + __description__ = """Base anti-captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + def init(self): + self.key = None #: Last key detected + + + #@TODO: Recheck in 0.4.10 + def retrieve_key(self, data): + if self.detect_key(data): + return self.key + else: + self.fail(_("%s key not found") % self.__name__) + + + #@TODO: Recheck in 0.4.10, html is now pyfile.data + def retrieve_data(self): + if hasattr(self.plugin, "html") and self.plugin.html: + return self.plugin.html + else: + self.fail(_("%s data not found") % self.__name__) + + + def detect_key(self, data=None): + raise NotImplementedError + + + def challenge(self, key=None, data=None): + raise NotImplementedError + + + def result(self, server, challenge): + raise NotImplementedError diff --git a/module/plugins/internal/Container.py b/module/plugins/internal/Container.py new file mode 100644 index 000000000..729592a0d --- /dev/null +++ b/module/plugins/internal/Container.py @@ -0,0 +1,91 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +import os +import re +import traceback + +from module.plugins.internal.Crypter import Crypter +from module.plugins.internal.Plugin import exists +from module.utils import save_join as fs_join + + +class Container(Crypter): + __name__ = "Container" + __type__ = "container" + __version__ = "0.06" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base container decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay", "mkaay@mkaay.de")] + + + def process(self, pyfile): + """ + Main method + """ + self._load2disk() + + self.decrypt(pyfile) + + self.delete_tmp() + + if self.urls: + self._generate_packages() + + elif not self.packages: + self.error(_("No link grabbed"), "decrypt") + + self._create_packages() + + + #: Deprecated method, use `_load2disk` instead (Remove in 0.4.10) + def loadToDisk(self, *args, **kwargs): + return self._load2disk(*args, **kwargs) + + + def _load2disk(self): + """ + Loads container to disk if its stored remotely and overwrite url, + or check existent on several places at disk + """ + if self.pyfile.url.startswith("http"): + self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] + content = self.load(self.pyfile.url) + self.pyfile.url = fs_join(self.pyload.config.get("general", "download_folder"), self.pyfile.name) + try: + with open(self.pyfile.url, "wb") as f: + f.write(content) + + except IOError, e: + self.fail(str(e)) #@TODO: Remove `str` in 0.4.10 + + else: + self.pyfile.name = os.path.basename(self.pyfile.url) + if not exists(self.pyfile.url): + if exists(fs_join(pypath, self.pyfile.url)): + self.pyfile.url = fs_join(pypath, self.pyfile.url) + else: + self.fail(_("File not exists")) + + + #: Deprecated method, use `delete_tmp` instead (Remove in 0.4.10) + def deleteTmp(self, *args, **kwargs): + return self.delete_tmp(*args, **kwargs) + + + def delete_tmp(self): + if not self.pyfile.name.startswith("tmp_"): + return + + try: + os.remove(self.pyfile.url) + except OSError, e: + self.log_warning(_("Error removing: %s") % self.pyfile.url, e) + if self.pyload.debug: + traceback.print_exc() diff --git a/module/plugins/internal/Crypter.py b/module/plugins/internal/Crypter.py new file mode 100644 index 000000000..d0e8eb1b4 --- /dev/null +++ b/module/plugins/internal/Crypter.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- + +import urlparse + +from module.plugins.internal.Hoster import Hoster, _fixurl +from module.utils import save_path as safe_filename + + +class Crypter(Hoster): + __name__ = "Crypter" + __type__ = "crypter" + __version__ = "0.07" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides pyload.config.get("general", "folder_per_package") + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Base decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + html = None #: Last html loaded #@TODO: Move to Hoster + + + def __init__(self, pyfile): + super(Crypter, self).__init__(pyfile) + + #: Put all packages here. It's a list of tuples like: ( name, [list of links], folder ) + self.packages = [] + + #: List of urls, pyLoad will generate packagenames + self.urls = [] + + + def process(self, pyfile): + """ + Main method + """ + self.decrypt(pyfile) + + if self.urls: + self._generate_packages() + + elif not self.packages: + self.error(_("No link grabbed"), "decrypt") + + self._create_packages() + + + def decrypt(self, pyfile): + raise NotImplementedError + + + def _generate_packages(self): + """ + Generate new packages from self.urls + """ + packages = [(name, links, None) for name, links in self.pyload.api.generatePackages(self.urls).items()] + self.packages.extend(packages) + + + def _create_packages(self): + """ + Create new packages from self.packages + """ + package_folder = self.pyfile.package().folder + package_password = self.pyfile.package().password + package_queue = self.pyfile.package().queue + + folder_per_package = self.pyload.config.get("general", "folder_per_package") + use_subfolder = self.get_config('use_subfolder', folder_per_package) + subfolder_per_package = self.get_config('subfolder_per_package', True) + + for name, links, folder in self.packages: + self.log_debug("Parsed package: %s" % name, + "%d links" % len(links), + "Saved to folder: %s" % folder if folder else "Saved to download folder") + + pid = self.pyload.api.addPackage(name, map(self.fixurl, links), package_queue) + + if package_password: + self.pyload.api.setPackageData(pid, {'password': package_password}) + + #: Workaround to do not break API addPackage method + set_folder = lambda x: self.pyload.api.setPackageData(pid, {'folder': x or ""}) + + if use_subfolder: + if not subfolder_per_package: + set_folder(package_folder) + self.log_debug("Set package %(name)s folder to: %(folder)s" % {'name': name, 'folder': folder}) + + elif not folder_per_package or name is not folder: + if not folder: + folder = urlparse.urlparse(_fixurl(name)).path.split("/")[-1] + + set_folder(safe_filename(folder)) + self.log_debug("Set package %(name)s folder to: %(folder)s" % {'name': name, 'folder': folder}) + + elif folder_per_package: + set_folder(None) diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py index c93447164..e7cc6ff39 100644 --- a/module/plugins/internal/DeadCrypter.py +++ b/module/plugins/internal/DeadCrypter.py @@ -1,31 +1,30 @@ # -*- coding: utf-8 -*- -from module.plugins.internal.SimpleCrypter import create_getInfo -from module.plugins.Crypter import Crypter as _Crypter +from module.plugins.internal.Crypter import Crypter, create_getInfo -class DeadCrypter(_Crypter): +class DeadCrypter(Crypter): __name__ = "DeadCrypter" __type__ = "crypter" - __version__ = "0.05" + __version__ = "0.09" + __status__ = "testing" __pattern__ = r'^unmatchable$' - __description__ = """ Crypter is no longer available """ + __description__ = """Crypter is no longer available""" __license__ = "GPLv3" __authors__ = [("stickell", "l.stickell@yahoo.it")] @classmethod - def apiInfo(cls, url): - api = super(DeadCrypter, cls).apiInfo(url) - api['status'] = 1 - return api + def get_info(cls, *args, **kwargs): + info = super(DeadCrypter, cls).get_info(*args, **kwargs) + info['status'] = 1 + return info def setup(self): - self.pyfile.error = "Crypter is no longer available" - self.offline() #@TODO: self.offline("Crypter is no longer available") + self.offline(_("Crypter is no longer available")) getInfo = create_getInfo(DeadCrypter) diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py index f159ae5fa..250287d25 100644 --- a/module/plugins/internal/DeadHoster.py +++ b/module/plugins/internal/DeadHoster.py @@ -1,31 +1,30 @@ # -*- coding: utf-8 -*- -from module.plugins.internal.SimpleHoster import create_getInfo -from module.plugins.Hoster import Hoster as _Hoster +from module.plugins.internal.Hoster import Hoster, create_getInfo -class DeadHoster(_Hoster): +class DeadHoster(Hoster): __name__ = "DeadHoster" __type__ = "hoster" - __version__ = "0.15" + __version__ = "0.19" + __status__ = "testing" __pattern__ = r'^unmatchable$' - __description__ = """ Hoster is no longer available """ + __description__ = """Hoster is no longer available""" __license__ = "GPLv3" __authors__ = [("zoidberg", "zoidberg@mujmail.cz")] @classmethod - def apiInfo(cls, url): - api = super(DeadHoster, cls).apiInfo(url) - api['status'] = 1 - return api + def get_info(cls, *args, **kwargs): + info = super(DeadHoster, cls).get_info(*args, **kwargs) + info['status'] = 1 + return info def setup(self): - self.pyfile.error = "Hoster is no longer available" - self.offline() #@TODO: self.offline("Hoster is no longer available") + self.offline(_("Hoster is no longer available")) getInfo = create_getInfo(DeadHoster) diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 159b65ffe..7f5212090 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -4,6 +4,7 @@ import os import re from module.PyFile import PyFile +from module.plugins.internal.Plugin import Plugin class ArchiveError(Exception): @@ -18,9 +19,11 @@ class PasswordError(Exception): pass -class Extractor: +class Extractor(Plugin): __name__ = "Extractor" - __version__ = "0.24" + __type__ = "extractor" + __version__ = "0.33" + __status__ = "testing" __description__ = """Base extractor plugin""" __license__ = "GPLv3" @@ -29,32 +32,34 @@ class Extractor: EXTENSIONS = [] - VERSION = "" REPAIR = False + VERSION = None @classmethod - def isArchive(cls, filename): + def is_archive(cls, filename): name = os.path.basename(filename).lower() return any(name.endswith(ext) for ext in cls.EXTENSIONS) @classmethod - def isMultipart(cls, filename): + def is_multipart(cls, filename): return False @classmethod - def isUsable(cls): - """ Check if system statisfy dependencies + def find(cls): + """ + Check if system statisfy dependencies :return: boolean """ - return None + pass @classmethod - def getTargets(cls, files_ids): - """ Filter suited targets from list of filename id tuple list + def get_targets(cls, files_ids): + """ + Filter suited targets from list of filename id tuple list :param files_ids: List of filepathes :return: List of targets, id tuple list """ @@ -62,15 +67,15 @@ class Extractor: processed = [] for fname, id, fout in files_ids: - if cls.isArchive(fname): - pname = re.sub(cls.re_multipart, '', fname) if cls.isMultipart(fname) else os.path.splitext(fname)[0] + if cls.is_archive(fname): + pname = re.sub(cls.re_multipart, "", fname) if cls.is_multipart(fname) else os.path.splitext(fname)[0] if pname not in processed: processed.append(pname) targets.append((fname, id, fout)) return targets - def __init__(self, manager, filename, out, + def __init__(self, plugin, filename, out, fullpath=True, overwrite=False, excludefiles=[], @@ -78,8 +83,12 @@ class Extractor: delete='No', keepbroken=False, fid=None): - """ Initialize extractor for specific file """ - self.manager = manager + """ + Initialize extractor for specific file + """ + self._init(plugin.pyload) + + self.plugin = plugin self.filename = filename self.out = out self.fullpath = fullpath @@ -90,17 +99,29 @@ class Extractor: self.keepbroken = keepbroken self.files = [] #: Store extracted files here - pyfile = self.manager.core.files.getFile(fid) if fid else None - self.notifyProgress = lambda x: pyfile.setProgress(x) if pyfile else lambda x: None + pyfile = self.pyload.files.getFile(fid) if fid else None + self.notify_progress = lambda x: pyfile.setProgress(x) if pyfile else lambda x: None + + self.init() def init(self): - """ Initialize additional data structures """ + """ + Initialize additional data structures + """ pass + def _log(self, level, plugintype, pluginname, messages): + return self.plugin._log(level, + plugintype, + self.plugin.__name__, + (self.__name__,) + messages) + + def check(self): - """Quick Check by listing content of archive. + """ + Quick Check by listing content of archive. Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -109,8 +130,10 @@ class Extractor: """ raise NotImplementedError + def verify(self): - """Testing with Extractors buildt-in method + """ + Testing with Extractors buildt-in method Raises error if password is needed, integrity is questionable or else. :raises PasswordError @@ -125,7 +148,8 @@ class Extractor: def extract(self, password=None): - """Extract the archive. Raise specific errors in case of failure. + """ + Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status :param password password to use @@ -137,8 +161,9 @@ class Extractor: raise NotImplementedError - def getDeleteFiles(self): - """Return list of files to delete, do *not* delete them here. + def get_delete_files(self): + """ + Return list of files to delete, do *not* delete them here. :return: List with paths of files to delete """ @@ -146,5 +171,7 @@ class Extractor: def list(self, password=None): - """Populate self.files at some point while extracting""" + """ + Populate self.files at some point while extracting + """ return self.files diff --git a/module/plugins/internal/Hook.py b/module/plugins/internal/Hook.py new file mode 100644 index 000000000..1f566f824 --- /dev/null +++ b/module/plugins/internal/Hook.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.Addon import Addon, threaded + + +class Hook(Addon): + __name__ = "Hook" + __type__ = "hook" + __version__ = "0.13" + __status__ = "testing" + + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base hook plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay" , "mkaay@mkaay.de" ), + ("RaNaN" , "RaNaN@pyload.org" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, core, manager): + super(Hook, self).__init__(core, manager) + self.init_periodical(10) + + + #@TODO: Remove in 0.4.10 + def _log(self, level, plugintype, pluginname, messages): + return super(Addon, self)._log(level, plugintype, pluginname.replace("Hook", ""), messages) diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py new file mode 100644 index 000000000..a0cdb1e2e --- /dev/null +++ b/module/plugins/internal/Hoster.py @@ -0,0 +1,648 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +import inspect +import os +import random +import time +import traceback +import urlparse + +from module.plugins.internal.Captcha import Captcha +from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip, + chunks, encode, exists, fixurl as _fixurl, replace_patterns, + seconds_to_midnight, set_cookie, set_cookies, parse_html_form, + parse_html_tag_attr_value, timestamp) +from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename + + +#@TODO: Remove in 0.4.10 +def parse_fileInfo(klass, url="", html=""): + info = klass.get_info(url, html) + return info['name'], info['size'], info['status'], info['url'] + + +#@TODO: Remove in 0.4.10 +def getInfo(urls): + #: result = [ .. (name, size, status, url) .. ] + pass + + +#@TODO: Remove in 0.4.10 +def create_getInfo(klass): + def get_info(urls): + for url in urls: + if hasattr(klass, "URL_REPLACEMENTS"): + url = replace_patterns(url, klass.URL_REPLACEMENTS) + yield parse_fileInfo(klass, url) + + return get_info + + +class Hoster(Plugin): + __name__ = "Hoster" + __type__ = "hoster" + __version__ = "0.19" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, pyfile): + self._init(pyfile.m.core) + + #: Engage wan reconnection + self.wantReconnect = False #@TODO: Change to `want_reconnect` in 0.4.10 + + #: Enable simultaneous processing of multiple downloads + self.multiDL = True #@TODO: Change to `multi_dl` in 0.4.10 + self.limitDL = 0 #@TODO: Change to `limit_dl` in 0.4.10 + + #: time.time() + wait in seconds + self.wait_until = 0 + self.waiting = False + + #: Account handler instance, see :py:class:`Account` + self.account = None + self.user = None + self.req = None #: Browser instance, see `network.Browser` + + #: Associated pyfile instance, see `PyFile` + self.pyfile = pyfile + + self.thread = None #: Holds thread in future + + #: Location where the last call to download was saved + self.last_download = "" + + #: Re match of the last call to `checkDownload` + self.last_check = None + + #: Js engine, see `JsEngine` + self.js = self.pyload.js + + #: Captcha stuff + self.captcha = Captcha(self) + + #: Some plugins store html code here + self.html = None + + #: Dict of the amount of retries already made + self.retries = {} + self.retry_free = False #@TODO: Recheck in 0.4.10 + + self._setup() + self.init() + + + @classmethod + def get_info(cls, url="", html=""): + url = _fixurl(url) + url_p = urlparse.urlparse(url) + return {'name' : (url_p.path.split('/')[-1] or + url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or + url_p.netloc.split('.', 1)[0]), + 'size' : 0, + 'status': 3 if url else 8, + 'url' : url} + + + def init(self): + """ + Initialize the plugin (in addition to `__init__`) + """ + pass + + + def setup(self): + """ + Setup for enviroment and other things, called before downloading (possibly more than one time) + """ + pass + + + def _setup(self): + if self.account: + self.req = self.pyload.requestFactory.getRequest(self.__name__, self.user) + self.chunk_limit = -1 #: -1 for unlimited + self.resume_download = True + self.premium = self.account.is_premium(self.user) + else: + self.req = self.pyload.requestFactory.getRequest(self.__name__) + self.chunk_limit = 1 + self.resume_download = False + self.premium = False + + + def load_account(self): + if self.req: + self.req.close() + + if not self.account: + self.account = self.pyload.accountManager.getAccountPlugin(self.__name__) + + if self.account: + if not self.user: + self.user = self.account.select()[0] + + if not self.user or not self.account.is_logged(self.user, True): + self.account = False + + + def preprocessing(self, thread): + """ + Handles important things to do before starting + """ + self.thread = thread + + if self.retry_free: + self.account = False + else: + self.load_account() #@TODO: Move to PluginThread in 0.4.10 + self.retry_free = False + + self._setup() + self.setup() + + self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10 + + if self.pyfile.abort: + self.abort() + + self.pyfile.setStatus("starting") + self.log_debug("PROCESS URL " + self.pyfile.url, "PLUGIN VERSION %s" % self.__version__) + + return self.process(self.pyfile) + + + def process(self, pyfile): + """ + The 'main' method of every plugin, you **have to** overwrite it + """ + raise NotImplementedError + + + def set_reconnect(self, reconnect): + reconnect = bool(reconnect) + + self.log_info(_("RECONNECT ") + ("enabled" if reconnect else "disabled")) + self.log_debug("Previous wantReconnect: %s" % self.wantReconnect) + + self.wantReconnect = reconnect + + + def set_wait(self, seconds, reconnect=None): + """ + Set a specific wait time later used with `wait` + + :param seconds: wait time in seconds + :param reconnect: True if a reconnect would avoid wait time + """ + wait_time = max(int(seconds), 1) + wait_until = time.time() + wait_time + 1 + + self.log_info(_("WAIT %d seconds") % wait_time) + self.log_debug("Previous waitUntil: %f" % self.pyfile.waitUntil) + + self.pyfile.waitUntil = wait_until + + if reconnect is not None: + self.set_reconnect(reconnect) + + + def wait(self, seconds=None, reconnect=None): + """ + Waits the time previously set + """ + pyfile = self.pyfile + + if seconds is not None: + self.set_wait(seconds) + + if reconnect is not None: + self.set_reconnect(reconnect) + + self.waiting = True + + status = pyfile.status #@NOTE: Remove in 0.4.10 + pyfile.setStatus("waiting") + + if not self.wantReconnect or self.account: + if self.account: + self.log_warning("Ignore reconnection due logged account") + + while pyfile.waitUntil > time.time(): + if pyfile.abort: + self.abort() + + time.sleep(2) + + else: + while pyfile.waitUntil > time.time(): + if pyfile.abort: + self.abort() + + if self.thread.m.reconnecting.isSet(): + self.waiting = False + self.wantReconnect = False + raise Reconnect + + self.thread.m.reconnecting.wait(2) + time.sleep(2) + + self.waiting = False + pyfile.status = status #@NOTE: Remove in 0.4.10 + + + def skip(self, reason=""): + """ + Skip and give reason + """ + raise Skip(encode(reason)) #@TODO: Remove `encode` in 0.4.10 + + + def abort(self, reason=""): + """ + Abort and give reason + """ + #@TODO: Remove in 0.4.10 + if reason: + self.pyfile.error = encode(reason) + + raise Abort + + + def offline(self, reason=""): + """ + Fail and indicate file is offline + """ + #@TODO: Remove in 0.4.10 + if reason: + self.pyfile.error = encode(reason) + + raise Fail("offline") + + + def temp_offline(self, reason=""): + """ + Fail and indicates file ist temporary offline, the core may take consequences + """ + #@TODO: Remove in 0.4.10 + if reason: + self.pyfile.error = encode(reason) + + raise Fail("temp. offline") + + + def retry(self, max_tries=5, wait_time=1, reason=""): + """ + Retries and begin again from the beginning + + :param max_tries: number of maximum retries + :param wait_time: time to wait in seconds + :param reason: reason for retrying, will be passed to fail if max_tries reached + """ + id = inspect.currentframe().f_back.f_lineno + if id not in self.retries: + self.retries[id] = 0 + + if 0 < max_tries <= self.retries[id]: + self.fail(reason or _("Max retries reached")) + + self.wait(wait_time, False) + + self.retries[id] += 1 + raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10 + + + def restart(self, reason=None, nopremium=False): + if not reason: + reason = _("Fallback to free download") if nopremium else _("Restart") + + if nopremium: + if self.premium: + self.retry_free = True + else: + self.fail("%s | %s" % (reason, _("Download was already free"))) + + raise Retry(encode(reason)) #@TODO: Remove `encode` in 0.4.10 + + + def fixurl(self, url): + url = _fixurl(url) + + if not urlparse.urlparse(url).scheme: + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + url = urlparse.urljoin(baseurl, url) + + return url + + + def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=True): + """ + Downloads the content at url to download folder + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param disposition: if True and server provides content-disposition header\ + the filename will be changed if needed + :return: The location where the file was saved + """ + if self.pyfile.abort: + self.abort() + + url = self.fixurl(url) + + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) + + if self.pyload.debug: + self.log_debug("DOWNLOAD URL " + url, + *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) + + name = _fixurl(self.pyfile.name) + self.pyfile.name = urlparse.urlparse(name).path.split('/')[-1] or name + + self.captcha.correct() + self.check_for_same_files() + + self.pyfile.setStatus("downloading") + + download_folder = self.pyload.config.get("general", "download_folder") + download_location = fs_join(download_folder, self.pyfile.package().folder) + + if not exists(download_location): + try: + os.makedirs(download_location) + except Exception, e: + self.fail(e) + + self.set_permissions(download_location) + + location = fs_decode(download_location) + filename = os.path.join(location, safe_filename(self.pyfile.name)) #@TODO: Move `safe_filename` check to HTTPDownload in 0.4.10 + + self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, url, filename) + + if self.pyfile.abort: + self.abort() + + try: + newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, + chunks=self.get_chunk_count(), resume=self.resume_download, + progressNotify=self.pyfile.setProgress, disposition=disposition) + finally: + self.pyfile.size = self.req.size + + #@TODO: Recheck in 0.4.10 + if disposition and newname: + finalname = urlparse.urlparse(newname).path.split('/')[-1].split(' filename*=')[0] + + if finalname != newname != self.pyfile.name: + try: + os.rename(fs_join(location, newname), fs_join(location, finalname)) + + except OSError, e: + self.log_warning(_("Error renaming `%s` to `%s`") % (newname, finalname), e) + finalname = newname + + self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, finalname)) + self.pyfile.name = finalname + filename = os.path.join(location, finalname) + + self.set_permissions(fs_encode(filename)) + + self.last_download = filename + + return self.last_download + + + def check_download(self, rules, delete=False, file_size=0, size_tolerance=1024, read_size=1048576): + """ + Checks the content of the last downloaded file, re match is saved to `lastCheck` + + :param rules: dict with names and rules to match (compiled regexp or strings) + :param delete: delete if matched + :param file_size: expected file size + :param size_tolerance: size check tolerance + :param read_size: amount of bytes to read from files + :return: dictionary key of the first rule that matched + """ + do_delete = False + last_download = fs_encode(self.last_download) + + if not self.last_download or not exists(last_download): + self.last_download = "" + self.fail(self.pyfile.error or _("No file downloaded")) + + try: + download_size = os.stat(last_download).st_size + + if download_size < 1: + do_delete = True + self.fail(_("Empty file")) + + elif file_size > 0: + diff = abs(file_size - download_size) + + if diff > size_tolerance: + do_delete = True + self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s") + % (file_size, download_size)) + + elif diff != 0: + self.log_warning(_("File size is not equal to expected size")) + + with open(last_download, "rb") as f: + content = f.read(read_size) + + #: Produces encoding errors, better log to other file in the future? + # self.log_debug("Content: %s" % content) + for name, rule in rules.items(): + if isinstance(rule, basestring): + if rule in content: + do_delete = True + return name + + elif hasattr(rule, "search"): + m = rule.search(content) + if m: + do_delete = True + self.last_check = m + return name + finally: + if delete and do_delete: + try: + os.remove(last_download) + + except OSError, e: + self.log_warning(_("Error removing: %s") % last_download, e) + if self.pyload.debug: + traceback.print_exc() + + else: + self.last_download = "" + self.log_info(_("File deleted")) + + + def direct_link(self, url, follow_location=None): + link = "" + + if follow_location is None: + redirect = 1 + + elif type(follow_location) is int: + redirect = max(follow_location, 1) + + else: + redirect = self.get_config("maxredirs", 10, "UserAgentSwitcher") + + for i in xrange(redirect): + try: + self.log_debug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True) + + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + res = self.load(url, + just_header=True, + req=self.pyload.requestFactory.getRequest()) + + header = {'code': req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) is list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + + if 'content-disposition' in header: + link = url + + elif 'location' in header and header['location']: + location = header['location'] + + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) + + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: + url = location + continue + + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] + + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" + + break + + else: + try: + self.log_error(_("Too many redirects")) + except Exception: + pass + + return link + + + def parse_html_form(self, attr_str="", input_names={}): + return parse_html_form(attr_str, self.html, input_names) + + + def check_traffic_left(self): + if not self.account: + return True + + traffic = self.account.get_data(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: + return True + else: + size = self.pyfile.size / 1024 + self.log_info(_("Filesize: %s KiB, Traffic left for user %s: %s KiB") % (size, self.user, traffic)) + return size <= traffic + + + def get_password(self): + """ + Get the password the user provided in the package + """ + return self.pyfile.package().password or "" + + + #: Deprecated method, use `check_for_same_files` instead (Remove in 0.4.10) + def checkForSameFiles(self, *args, **kwargs): + return self.check_for_same_files(*args, **kwargs) + + + def check_for_same_files(self, starting=False): + """ + Checks if same file was/is downloaded within same package + + :param starting: indicates that the current download is going to start + :raises Skip: + """ + pack = self.pyfile.package() + + for pyfile in self.pyload.files.cache.values(): + if pyfile != self.pyfile and pyfile.name is self.pyfile.name and pyfile.package().folder is pack.folder: + if pyfile.status in (0, 12): #: Finished or downloading + self.skip(pyfile.pluginname) + elif pyfile.status in (5, 7) and starting: #: A download is waiting/starting and was appenrently started before + self.skip(pyfile.pluginname) + + download_folder = self.pyload.config.get("general", "download_folder") + location = fs_join(download_folder, pack.folder, self.pyfile.name) + + if starting and self.pyload.config.get("download", "skip_existing") and exists(location): + size = os.stat(location).st_size + if size >= self.pyfile.size: + self.skip("File exists") + + pyfile = self.pyload.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) + if pyfile: + if exists(location): + self.skip(pyfile[0]) + + self.log_debug("File %s not skipped, because it does not exists." % self.pyfile.name) diff --git a/module/plugins/internal/MultiCrypter.py b/module/plugins/internal/MultiCrypter.py new file mode 100644 index 000000000..ca7b03941 --- /dev/null +++ b/module/plugins/internal/MultiCrypter.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.SimpleCrypter import SimpleCrypter + + +class MultiCrypter(SimpleCrypter): + __name__ = "MultiCrypter" + __type__ = "hoster" + __version__ = "0.02" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder" , "bool", "Save package to subfolder" , True), + ("subfolder_per_pack", "bool", "Create a subfolder for each package", True)] + + __description__ = """Multi decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + def init(self): + self.CRYPTER_NAME = self.pyload.pluginManager.crypterPlugins[self.__name__]['name'] + + + def _log(self, level, plugintype, pluginname, messages): + return super(MultiCrypter, self)._log(level, + plugintype, + pluginname, + (self.CRYPTER_NAME,) + messages) diff --git a/module/plugins/internal/MultiHook.py b/module/plugins/internal/MultiHook.py index 01ff4b07d..42a1985b5 100644 --- a/module/plugins/internal/MultiHook.py +++ b/module/plugins/internal/MultiHook.py @@ -4,14 +4,15 @@ import re import time import traceback -from module.plugins.Hook import Hook +from module.plugins.internal.Hook import Hook from module.utils import decode, remove_chars class MultiHook(Hook): __name__ = "MultiHook" __type__ = "hook" - __version__ = "0.45" + __version__ = "0.54" + __status__ = "testing" __config__ = [("pluginmode" , "all;listed;unlisted", "Use for plugins" , "all"), ("pluginlist" , "str" , "Plugin list (comma separated)", "" ), @@ -54,9 +55,7 @@ class MultiHook(Hook): (r'^0' , "zero" )] - def setup(self): - self.info = {} #@TODO: Remove in 0.4.10 - + def init(self): self.plugins = [] self.supported = [] self.new_supported = [] @@ -67,76 +66,58 @@ class MultiHook(Hook): self.pluginname = None self.plugintype = None - self.initPlugin() + self.init_plugin() - def initPlugin(self): + def init_plugin(self): self.pluginname = self.__name__.rsplit("Hook", 1)[0] - plugin, self.plugintype = self.core.pluginManager.findPlugin(self.pluginname) + plugin, self.plugintype = self.pyload.pluginManager.findPlugin(self.pluginname) if plugin: - self.pluginmodule = self.core.pluginManager.loadModule(self.plugintype, self.pluginname) + self.pluginmodule = self.pyload.pluginManager.loadModule(self.plugintype, self.pluginname) self.pluginclass = getattr(self.pluginmodule, self.pluginname) else: - self.logWarning("Hook plugin will be deactivated due missing plugin reference") - self.setConfig('activated', False) + self.log_warning(_("Hook plugin will be deactivated due missing plugin reference")) + self.set_config('activated', False) - def loadAccount(self): - self.account = self.core.accountManager.getAccountPlugin(self.pluginname) + def load_account(self): + self.account = self.pyload.accountManager.getAccountPlugin(self.pluginname) - if self.account and not self.account.canUse(): - self.account = None + if self.account and not self.account.select()[0]: + self.account = False if not self.account and hasattr(self.pluginclass, "LOGIN_ACCOUNT") and self.pluginclass.LOGIN_ACCOUNT: - self.logWarning("Hook plugin will be deactivated due missing account reference") - self.setConfig('activated', False) - + self.log_warning(_("Hook plugin will be deactivated due missing account reference")) + self.set_config('activated', False) - def getURL(self, *args, **kwargs): #@TODO: Remove in 0.4.10 - """ see HTTPRequest for argument list """ - h = pyreq.getHTTPRequest(timeout=120) - try: - if not 'decode' in kwargs: - kwargs['decode'] = True - rep = h.load(*args, **kwargs) - finally: - h.close() - return rep + def activate(self): + self.init_periodical(threaded=True) - def getConfig(self, option, default=''): #@TODO: Remove in 0.4.10 - """getConfig with default value - sublass may not implements all config options""" - try: - return self.getConf(option) - - except KeyError: - return default - - - def pluginsCached(self): + def plugins_cached(self): if self.plugins: return self.plugins - for _i in xrange(2): + for _i in xrange(5): try: - pluginset = self._pluginSet(self.getHosters()) + pluginset = self._plugin_set(self.get_hosters()) break except Exception, e: - self.logDebug(e, "Waiting 1 minute and retry") + self.log_warning(e, _("Waiting 1 minute and retry")) time.sleep(60) else: - self.logWarning(_("Fallback to default reload interval due plugin parse error")) + self.log_error(_("No hoster list retrieved")) self.interval = self.MIN_RELOAD_INTERVAL return list() try: - configmode = self.getConfig("pluginmode", 'all') + configmode = self.get_config('pluginmode', 'all') if configmode in ("listed", "unlisted"): - pluginlist = self.getConfig("pluginlist", '').replace('|', ',').replace(';', ',').split(',') - configset = self._pluginSet(pluginlist) + pluginlist = self.get_config('pluginlist', '').replace('|', ',').replace(';', ',').split(',') + configset = self._plugin_set(pluginlist) if configmode == "listed": pluginset &= configset @@ -144,14 +125,14 @@ class MultiHook(Hook): pluginset -= configset except Exception, e: - self.logError(e) + self.log_error(e) self.plugins = list(pluginset) return self.plugins - def _pluginSet(self, plugins): + def _plugin_set(self, plugins): regexp = re.compile(r'^[\w\-.^_]{3,63}\.[a-zA-Z]{2,}$', re.U) plugins = [decode(p.strip()).lower() for p in plugins if regexp.match(p.strip())] @@ -163,39 +144,28 @@ class MultiHook(Hook): return set(plugins) - def getHosters(self): - """Load list of supported hoster + def get_hosters(self): + """ + Load list of supported hoster :return: List of domain names """ raise NotImplementedError - #: Threaded _periodical, remove in 0.4.10 and use built-in flag for that - def _periodical(self): - try: - if self.isActivated(): - self.periodical() - - except Exception, e: - self.core.log.error(_("Error executing hooks: %s") % str(e)) - if self.core.debug: - traceback.print_exc() - - self.cb = self.core.scheduler.addJob(self.interval, self._periodical) - - def periodical(self): - """reload plugin list periodically""" - self.loadAccount() + """ + Reload plugin list periodically + """ + self.load_account() - if self.getConfig("reload", True): - self.interval = max(self.getConfig("reloadinterval", 12) * 60 * 60, self.MIN_RELOAD_INTERVAL) + if self.get_config('reload', True): + self.interval = max(self.get_config('reloadinterval', 12) * 60 * 60, self.MIN_RELOAD_INTERVAL) else: - self.core.scheduler.removeJob(self.cb) + self.pyload.scheduler.removeJob(self.cb) self.cb = None - self.logInfo(_("Reloading supported %s list") % self.plugintype) + self.log_info(_("Reloading supported %s list") % self.plugintype) old_supported = self.supported @@ -203,27 +173,27 @@ class MultiHook(Hook): self.new_supported = [] self.plugins = [] - self.overridePlugins() + self.override_plugins() old_supported = [plugin for plugin in old_supported if plugin not in self.supported] if old_supported: - self.logDebug("Unload: %s" % ", ".join(old_supported)) + self.log_debug("Unload: %s" % ", ".join(old_supported)) for plugin in old_supported: - self.unloadPlugin(plugin) + self.unload_plugin(plugin) - def overridePlugins(self): + def override_plugins(self): excludedList = [] if self.plugintype == "hoster": - pluginMap = dict((name.lower(), name) for name in self.core.pluginManager.hosterPlugins.iterkeys()) - accountList = [account.type.lower() for account in self.core.api.getAccounts(False) if account.valid and account.premium] + pluginMap = dict((name.lower(), name) for name in self.pyload.pluginManager.hosterPlugins.keys()) + accountList = [account.type.lower() for account in self.pyload.api.getAccounts(False) if account.valid and account.premium] else: pluginMap = {} - accountList = [name[::-1].replace("Folder"[::-1], "", 1).lower()[::-1] for name in self.core.pluginManager.crypterPlugins.iterkeys()] + accountList = [name[::-1].replace("Folder"[::-1], "", 1).lower()[::-1] for name in self.pyload.pluginManager.crypterPlugins.keys()] - for plugin in self.pluginsCached(): + for plugin in self.plugins_cached(): name = remove_chars(plugin, "-.") if name in accountList: @@ -235,39 +205,39 @@ class MultiHook(Hook): self.new_supported.append(plugin) if not self.supported and not self.new_supported: - self.logError(_("No %s loaded") % self.plugintype) + self.log_error(_("No %s loaded") % self.plugintype) return - # inject plugin plugin - self.logDebug("Overwritten %ss: %s" % (self.plugintype, ", ".join(sorted(self.supported)))) + #: Inject plugin plugin + self.log_debug("Overwritten %ss: %s" % (self.plugintype, ", ".join(sorted(self.supported)))) for plugin in self.supported: - hdict = self.core.pluginManager.plugins[self.plugintype][plugin] + hdict = self.pyload.pluginManager.plugins[self.plugintype][plugin] hdict['new_module'] = self.pluginmodule hdict['new_name'] = self.pluginname if excludedList: - self.logInfo(_("%ss not overwritten: %s") % (self.plugintype.capitalize(), ", ".join(sorted(excludedList)))) + self.log_info(_("%ss not overwritten: %s") % (self.plugintype.capitalize(), ", ".join(sorted(excludedList)))) if self.new_supported: plugins = sorted(self.new_supported) - self.logDebug("New %ss: %s" % (self.plugintype, ", ".join(plugins))) + self.log_debug("New %ss: %s" % (self.plugintype, ", ".join(plugins))) - # create new regexp + #: Create new regexp regexp = r'.*(?P<DOMAIN>%s).*' % "|".join(x.replace('.', '\.') for x in plugins) - if hasattr(self.pluginclass, "__pattern__") and isinstance(self.pluginclass.__pattern__, basestring) and '://' in self.pluginclass.__pattern__: + if hasattr(self.pluginclass, "__pattern__") and isinstance(self.pluginclass.__pattern__, basestring) and "://" in self.pluginclass.__pattern__: regexp = r'%s|%s' % (self.pluginclass.__pattern__, regexp) - self.logDebug("Regexp: %s" % regexp) + self.log_debug("Regexp: %s" % regexp) - hdict = self.core.pluginManager.plugins[self.plugintype][self.pluginname] + hdict = self.pyload.pluginManager.plugins[self.plugintype][self.pluginname] hdict['pattern'] = regexp hdict['re'] = re.compile(regexp) - def unloadPlugin(self, plugin): - hdict = self.core.pluginManager.plugins[self.plugintype][plugin] + def unload_plugin(self, plugin): + hdict = self.pyload.pluginManager.plugins[self.plugintype][plugin] if "module" in hdict: hdict.pop('module', None) @@ -276,13 +246,15 @@ class MultiHook(Hook): hdict.pop('new_name', None) - def unload(self): - """Remove override for all plugins. Scheduler job is removed by hookmanager""" + def deactivate(self): + """ + Remove override for all plugins. Scheduler job is removed by hookmanager + """ for plugin in self.supported: - self.unloadPlugin(plugin) + self.unload_plugin(plugin) - # reset pattern - hdict = self.core.pluginManager.plugins[self.plugintype][self.pluginname] + #: Reset pattern + hdict = self.pyload.pluginManager.plugins[self.plugintype][self.pluginname] hdict['pattern'] = getattr(self.pluginclass, "__pattern__", r'^unmatchable$') hdict['re'] = re.compile(hdict['pattern']) diff --git a/module/plugins/internal/MultiHoster.py b/module/plugins/internal/MultiHoster.py index ff4414034..c0c928a45 100644 --- a/module/plugins/internal/MultiHoster.py +++ b/module/plugins/internal/MultiHoster.py @@ -2,14 +2,15 @@ import re -from module.plugins.Plugin import Fail, Retry -from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies +from module.plugins.internal.Plugin import Fail, encode +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookie, set_cookies class MultiHoster(SimpleHoster): __name__ = "MultiHoster" __type__ = "hoster" - __version__ = "0.40" + __version__ = "0.50" + __status__ = "testing" __pattern__ = r'^unmatchable$' __config__ = [("use_premium" , "bool", "Use premium account if available" , True), @@ -20,76 +21,90 @@ class MultiHoster(SimpleHoster): __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + HOSTER_NAME = None + + LEECH_HOSTER = False LOGIN_ACCOUNT = True - def setup(self): - self.chunkLimit = 1 - self.multiDL = bool(self.account) - self.resumeDownload = self.premium + def init(self): + self.HOSTER_NAME = self.pyload.pluginManager.hosterPlugins[self.__name__]['name'] - def prepare(self): - self.info = {} - self.html = "" - self.link = "" #@TODO: Move to hoster class in 0.4.10 - self.directDL = False #@TODO: Move to hoster class in 0.4.10 + def _log(self, level, plugintype, pluginname, messages): + return super(MultiHoster, self)._log(level, + plugintype, + pluginname, + (self.HOSTER_NAME,) + messages) - if not self.getConfig('use_premium', True): - self.retryFree() - if self.LOGIN_ACCOUNT and not self.account: - self.fail(_("Required account not found")) + def setup(self): + self.chunk_limit = 1 + self.multiDL = bool(self.account) + self.resume_download = self.premium + - self.req.setOption("timeout", 120) + def prepare(self): + #@TODO: Recheck in 0.4.10 + plugin = self.pyload.pluginManager.hosterPlugins[self.__name__] + name = plugin['name'] + module = plugin['module'] + klass = getattr(module, name) - if isinstance(self.COOKIES, list): - set_cookies(self.req.cj, self.COOKIES) + self.get_info = klass.get_info if self.DIRECT_LINK is None: - self.directDL = self.__pattern__ != r'^unmatchable$' and re.match(self.__pattern__, self.pyfile.url) + direct_dl = self.__pattern__ != r'^unmatchable$' and re.match(self.__pattern__, self.pyfile.url) else: - self.directDL = self.DIRECT_LINK + direct_dl = self.DIRECT_LINK + + super(MultiHoster, self).prepare() - self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + self.direct_dl = direct_dl def process(self, pyfile): try: self.prepare() + self.check_info() #@TODO: Remove in 0.4.10 + + if self.direct_dl: + self.log_info(_("Looking for direct download link...")) + self.handle_direct(pyfile) - if self.directDL: - self.checkInfo() - self.logDebug("Looking for direct download link...") - self.handleDirect(pyfile) + if self.link or was_downloaded(): + self.log_info(_("Direct download link detected")) + else: + self.log_info(_("Direct download link not found")) - if not self.link and not self.lastDownload: + if not self.link and not self.last_download: self.preload() - self.checkErrors() - self.checkStatus(getinfo=False) + self.check_errors() + self.check_status(getinfo=False) - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium(pyfile) + if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic_left()): + self.log_info(_("Processing as premium download...")) + self.handle_premium(pyfile) - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as free download") - self.handleFree(pyfile) + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.check_traffic_left()): + self.log_info(_("Processing as free download...")) + self.handle_free(pyfile) - self.downloadLink(self.link, True) - self.checkFile() + if not self.last_download: + self.log_info(_("Downloading file...")) + self.download(self.link, disposition=self.DISPOSITION) - except Fail, e: #@TODO: Move to PluginThread in 0.4.10 - err = str(e) #@TODO: Recheck in 0.4.10 + self.check_file() + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 if self.premium: - self.logWarning(_("Premium download failed")) - self.retryFree() + self.log_warning(_("Premium download failed")) + self.restart(nopremium=True) - elif self.getConfig("revertfailed", True) \ - and "new_module" in self.core.pluginManager.hosterPlugins[self.__name__]: - hdict = self.core.pluginManager.hosterPlugins[self.__name__] + elif self.get_config("revertfailed", True) \ + and "new_module" in self.pyload.pluginManager.hosterPlugins[self.__name__]: + hdict = self.pyload.pluginManager.hosterPlugins[self.__name__] tmp_module = hdict['new_module'] tmp_name = hdict['new_name'] @@ -101,17 +116,17 @@ class MultiHoster(SimpleHoster): hdict['new_module'] = tmp_module hdict['new_name'] = tmp_name - raise Retry(_("Revert to original hoster plugin")) + self.restart(_("Revert to original hoster plugin")) else: - raise Fail(err) + raise Fail(encode(e)) #@TODO: Remove `encode` in 0.4.10 - def handlePremium(self, pyfile): - return self.handleFree(pyfile) + def handle_premium(self, pyfile): + return self.handle_free(pyfile) - def handleFree(self, pyfile): + def handle_free(self, pyfile): if self.premium: raise NotImplementedError else: diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py new file mode 100644 index 000000000..b24b3058b --- /dev/null +++ b/module/plugins/internal/OCR.py @@ -0,0 +1,346 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +try: + from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin + +except ImportError: + import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin + +import logging +import os +import subprocess +# import tempfile +import traceback + +from module.plugins.internal.Plugin import Plugin +from module.utils import save_join as fs_join + + +class OCR(Plugin): + __name__ = "OCR" + __type__ = "ocr" + __version__ = "0.19" + __status__ = "testing" + + __description__ = """OCR base plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + def __init__(self, plugin): + self._init(plugin.pyload) + self.plugin = plugin + self.init() + + + def init(self): + """ + Initialize additional data structures + """ + pass + + + def _log(self, level, plugintype, pluginname, messages): + return self.plugin._log(level, + plugintype, + self.plugin.__name__, + (self.__name__,) + messages) + + + def load_image(self, image): + self.image = Image.open(image) + self.pixels = self.image.load() + self.result_captcha = "" + + + def deactivate(self): + """ + Delete all tmp images + """ + pass + + + def threshold(self, value): + self.image = self.image.point(lambda a: a * value + 10) + + + def run(self, command): + """ + Run a command + """ + popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + popen.wait() + output = popen.stdout.read() + " | " + popen.stderr.read() + popen.stdout.close() + popen.stderr.close() + self.pyload.log_debug("Tesseract ReturnCode " + popen.returncode, "Output: " + output) + + + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True, pagesegmode=None): + # tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + try: + tmpTif = open(fs_join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmpTif.close() + + # tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + tmpTxt = open(fs_join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") + tmpTxt.close() + + except IOError, e: + self.log_error(e) + return + + self.pyload.log_debug("Saving tiff...") + self.image.save(tmpTif.name, 'TIFF') + + if os.name == "nt": + tessparams = [os.path.join(pypath, "tesseract", "tesseract.exe")] + else: + tessparams = ["tesseract"] + + tessparams.extend([os.path.abspath(tmpTif.name), os.path.abspath(tmpTxt.name).replace(".txt", "")]) + + if pagesegmode: + tessparams.extend(["-psm", str(pagesegmode)]) + + if subset and (digits or lowercase or uppercase): + # tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + with open(fs_join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") as tmpSub: + tmpSub.write("tessedit_char_whitelist ") + + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + + tmpSub.write("\n") + tessparams.append("nobatch") + tessparams.append(os.path.abspath(tmpSub.name)) + + self.pyload.log_debug("Running tesseract...") + self.run(tessparams) + self.pyload.log_debug("Reading txt...") + + try: + with open(tmpTxt.name, 'r') as f: + self.result_captcha = f.read().replace("\n", "") + except Exception: + self.result_captcha = "" + + self.pyload.log_info(_("OCR result: ") + self.result_captcha) + try: + os.remove(tmpTif.name) + os.remove(tmpTxt.name) + if subset and (digits or lowercase or uppercase): + os.remove(tmpSub.name) + except OSError, e: + self.log_warning(e) + if self.pyload.debug: + traceback.print_exc() + + + def recognize(self, name): + raise NotImplementedError + + + def to_greyscale(self): + if self.image.mode != 'L': + self.image = self.image.convert('L') + + self.pixels = self.image.load() + + + def eval_black_white(self, limit): + self.pixels = self.image.load() + w, h = self.image.size + for x in xrange(w): + for y in xrange(h): + if self.pixels[x, y] > limit: + self.pixels[x, y] = 255 + else: + self.pixels[x, y] = 0 + + + def clean(self, allowed): + pixels = self.pixels + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 255: + continue + #: No point in processing white pixels since we only want to remove black pixel + count = 0 + + try: + if pixels[x - 1, y - 1] != 255: + count += 1 + if pixels[x - 1, y] != 255: + count += 1 + if pixels[x - 1, y + 1] != 255: + count += 1 + if pixels[x, y + 1] != 255: + count += 1 + if pixels[x + 1, y + 1] != 255: + count += 1 + if pixels[x + 1, y] != 255: + count += 1 + if pixels[x + 1, y - 1] != 255: + count += 1 + if pixels[x, y - 1] != 255: + count += 1 + except Exception: + pass + + #: Not enough neighbors are dark pixels so mark this pixel + #: To be changed to white + if count < allowed: + pixels[x, y] = 1 + + #: Second pass: this time set all 1's to 255 (white) + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 1: + pixels[x, y] = 255 + + self.pixels = pixels + + + def derotate_by_average(self): + """ + Rotate by checking each angle and guess most suitable + """ + w, h = self.image.size + pixels = self.pixels + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 155 + + highest = {} + counts = {} + + for angle in xrange(-45, 45): + + tmpimage = self.image.rotate(angle) + + pixels = tmpimage.load() + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + count = {} + + for x in xrange(w): + count[x] = 0 + for y in xrange(h): + if pixels[x, y] == 155: + count[x] += 1 + + sum = 0 + cnt = 0 + + for x in count.values(): + if x != 0: + sum += x + cnt += 1 + + avg = sum / cnt + counts[angle] = cnt + highest[angle] = 0 + for x in count.values(): + if x > highest[angle]: + highest[angle] = x + + highest[angle] = highest[angle] - avg + + hkey = 0 + hvalue = 0 + + for key, value in highest.items(): + if value > hvalue: + hkey = key + hvalue = value + + self.image = self.image.rotate(hkey) + pixels = self.image.load() + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + if pixels[x, y] == 155: + pixels[x, y] = 0 + + self.pixels = pixels + + + def split_captcha_letters(self): + captcha = self.image + started = False + letters = [] + width, height = captcha.size + bottomY, topY = 0, height + pixels = captcha.load() + + for x in xrange(width): + black_pixel_in_col = False + for y in xrange(height): + if pixels[x, y] != 255: + if not started: + started = True + firstX = x + lastX = x + + if y > bottomY: + bottomY = y + if y < topY: + topY = y + if x > lastX: + lastX = x + + black_pixel_in_col = True + + if black_pixel_in_col is False and started is True: + rect = (firstX, topY, lastX, bottomY) + new_captcha = captcha.crop(rect) + + w, h = new_captcha.size + if w > 5 and h > 5: + letters.append(new_captcha) + + started = False + bottomY, topY = 0, height + + return letters + + + def correct(self, values, var=None): + if var: + result = var + else: + result = self.result_captcha + + for key, item in values.items(): + + if key.__class__ is str: + result = result.replace(key, item) + else: + for expr in key: + result = result.replace(expr, item) + + if var: + return result + else: + self.result_captcha = result diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py new file mode 100644 index 000000000..7b45c40a8 --- /dev/null +++ b/module/plugins/internal/Plugin.py @@ -0,0 +1,399 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +import datetime +import inspect +import os +import re +import urllib + +if os.name != "nt": + import grp + import pwd + +from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 +from module.utils import fs_encode, fs_decode, html_unescape, save_join as fs_join + + +#@TODO: Move to utils in 0.4.10 +def decode(string, encoding='utf8'): + """ Decode string to unicode with utf8 """ + if type(string) is str: + return string.decode(encoding, "replace") + else: + return string + + +#@TODO: Move to utils in 0.4.10 +def encode(string, encoding='utf8'): + """ Decode string to utf8 """ + if type(string) is unicode: + return string.encode(encoding, "replace") + else: + return string + + +#@TODO: Move to utils in 0.4.10 +def exists(path): + if os.path.exists(path): + if os.name == "nt": + dir, name = os.path.split(path) + return name in os.listdir(dir) + else: + return True + else: + return False + + +#@TODO: Move to utils in 0.4.10 +def fixurl(url): + return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip().rstrip('/') + + +#@TODO: Move to utils in 0.4.10 +def timestamp(): + return int(time.time() * 1000) + + +def seconds_to_midnight(gmt=0): + now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) + + if now.hour == 0 and now.minute < 10: + midnight = now + else: + midnight = now + datetime.timedelta(days=1) + + td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now + + if hasattr(td, 'total_seconds'): + res = td.total_seconds() + else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds + res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + return int(res) + + +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + return string + + +#@TODO: Remove in 0.4.10 and fix CookieJar.setCookie +def set_cookie(cj, domain, name, value): + return cj.setCookie(domain, name, encode(value)) + + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + set_cookie(cj, *cookie) + + +def parse_html_tag_attr_value(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parse_html_form(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parse_html_tag_attr_value("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): + name = parse_html_tag_attr_value("name", inputtag.group(1)) + if name: + value = parse_html_tag_attr_value("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or "" + else: + inputs[name] = value + + if input_names: + #: Check input attributes + for key, val in input_names.items(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] is val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break #: Attibute value does not match + else: + break #: Attibute name does not match + else: + return action, inputs #: Passed attribute check + else: + #: No attribute check + return action, inputs + + return {}, None #: No matching form found + + +#@TODO: Move to utils in 0.4.10 +def chunks(iterable, size): + it = iter(iterable) + item = list(islice(it, size)) + while item: + yield item + item = list(islice(it, size)) + + +class Plugin(object): + __name__ = "Plugin" + __type__ = "hoster" + __version__ = "0.30" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + + + def __init__(self, core): + self._init(core) + self.init() + + + def _init(self, core): + self.pyload = core + self.info = {} #: Provide information in dict here + self.req = None + + + def init(self): + """ + Initialize the plugin (in addition to `__init__`) + """ + pass + + + def _log(self, level, plugintype, pluginname, messages): + log = getattr(self.pyload.log, level) + msg = encode(" | ".join((a if isinstance(a, basestring) else str(a)).strip() for a in messages if a)) + log("%(plugintype)s %(pluginname)s%(id)s: %(msg)s" + % {'plugintype': plugintype.upper(), + 'pluginname': pluginname, + 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", + 'msg' : msg}) + + + def log_debug(self, *args): + if self.pyload.debug: + return self._log("debug", self.__type__, self.__name__, args) + + + def log_info(self, *args): + return self._log("info", self.__type__, self.__name__, args) + + + def log_warning(self, *args): + return self._log("warning", self.__type__, self.__name__, args) + + + def log_error(self, *args): + return self._log("error", self.__type__, self.__name__, args) + + + def log_critical(self, *args): + return self._log("critical", self.__type__, self.__name__, args) + + + def set_permissions(self, path): + if not os.path.exists(path): + return + + try: + if self.pyload.config.get("permission", "change_file"): + if os.path.isfile(path): + os.chmod(path, int(self.pyload.config.get("permission", "file"), 8)) + + elif os.path.isdir(path): + os.chmod(path, int(self.pyload.config.get("permission", "folder"), 8)) + + except OSError, e: + self.log_warning(_("Setting path mode failed"), e) + + try: + if os.name != "nt" and self.pyload.config.get("permission", "change_dl"): + uid = pwd.getpwnam(self.pyload.config.get("permission", "user"))[2] + gid = grp.getgrnam(self.pyload.config.get("permission", "group"))[2] + os.chown(path, uid, gid) + + except OSError, e: + self.log_warning(_("Setting owner and group failed"), e) + + + def get_chunk_count(self): + if self.chunk_limit <= 0: + return self.pyload.config.get("download", "chunks") + return min(self.pyload.config.get("download", "chunks"), self.chunk_limit) + + + def set_config(self, option, value): + """ + Set config value for current plugin + + :param option: + :param value: + :return: + """ + self.pyload.config.setPlugin(self.__name__, option, value) + + + def get_config(self, option, default="", plugin=None): + """ + Returns config value for current plugin + + :param option: + :return: + """ + try: + return self.pyload.config.getPlugin(plugin or self.__name__, option) + + except KeyError: + self.log_debug("Config option `%s` not found, use default `%s`" % (option, default or None)) #@TODO: Restore to `log_warning` in 0.4.10 + return default + + + def store(self, key, value): + """ + Saves a value persistently to the database + """ + self.pyload.db.setStorage(self.__name__, key, value) + + + def retrieve(self, key, default=None): + """ + Retrieves saved value or dict of all saved entries if key is None + """ + return self.pyload.db.getStorage(self.__name__, key) or default + + + def delete(self, key): + """ + Delete entry in db + """ + self.pyload.db.delStorage(self.__name__, key) + + + def fail(self, reason): + """ + Fail and give reason + """ + raise Fail(encode(reason)) #@TODO: Remove `encode` in 0.4.10 + + + def error(self, reason="", type=_("Parse")): + if not reason: + type = _("Unknown") + + msg = _("%s error") % type.strip().capitalize() if type else _("Error") + msg += (": %s" % reason.strip()) if reason else "" + msg += _(" | Plugin may be out of date") + + raise Fail(encode(msg)) #@TODO: Remove `encode` in 0.4.10 + + + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None): + """ + Load content at url and returns it + + :param url: + :param get: + :param post: + :param ref: + :param cookies: + :param just_header: If True only the header will be retrieved and returned as dict + :param decode: Wether to decode the output according to http header, should be True in most cases + :return: Loaded content + """ + if hasattr(self, 'pyfile') and self.pyfile.abort: + self.abort() + + url = fixurl(url) + + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) + + if self.pyload.debug: + self.log_debug("LOAD URL " + url, + *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) + + if req is None: + req = self.req or self.pyload.requestFactory.getRequest(self.__name__) + + #@TODO: Move to network in 0.4.10 + if isinstance(cookies, list): + set_cookies(req.cj, cookies) + + res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True) #@TODO: Fix network multipart in 0.4.10 + + #@TODO: Move to network in 0.4.10 + if decode: + res = html_unescape(res) + + #@TODO: Move to network in 0.4.10 + if isinstance(decode, basestring): + res = decode(res, decode) + + if self.pyload.debug: + frame = inspect.currentframe() + framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) + try: + if not exists(os.path.join("tmp", self.__name__)): + os.makedirs(os.path.join("tmp", self.__name__)) + + with open(framefile, "wb") as f: + del frame #: Delete the frame or it wont be cleaned + f.write(encode(res)) + + except IOError, e: + self.log_error(e) + + if just_header: + #: Parse header + header = {'code': req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.strip().lower() + value = value.strip() + + if key in header: + if type(header[key]) is list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + res = header + + return res + + + def clean(self): + """ + Clean everything and remove references + """ + try: + self.req.close() + except Exception: + pass + + for a in ("pyfile", "thread", "html", "req"): + if hasattr(self, a): + setattr(self, a, None) diff --git a/module/plugins/internal/ReCaptcha.py b/module/plugins/internal/ReCaptcha.py deleted file mode 100644 index dea714a62..000000000 --- a/module/plugins/internal/ReCaptcha.py +++ /dev/null @@ -1,195 +0,0 @@ -# -*- coding: utf-8 -*- - -import random -import re -import time -import urlparse - -from base64 import b64encode - -from module.plugins.internal.Captcha import Captcha - - -class ReCaptcha(Captcha): - __name__ = "ReCaptcha" - __type__ = "captcha" - __version__ = "0.17" - - __description__ = """ReCaptcha captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com"), - ("zapp-brannigan", "fuerst.reinje@web.de")] - - - KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)' - KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)' - - - def detect_key(self, html=None): - html = html or self.retrieve_html() - - m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html) - if m: - self.key = m.group(1).strip() - self.logDebug("Key: %s" % self.key) - return self.key - else: - self.logWarning("Key pattern not found") - return None - - - def challenge(self, key=None, html=None, version=None): - key = key or self.retrieve_key(html) - - if version in (1, 2): - return getattr(self, "_challenge_v%s" % version)(key) - - else: - return self.challenge(key, - version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_html()) else 1) - - - def _challenge_v1(self, key): - html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", - get={'k': key}) - try: - challenge = re.search("challenge : '(.+?)',", html).group(1) - server = re.search("server : '(.+?)',", html).group(1) - - except AttributeError: - self.fail(_("ReCaptcha challenge pattern not found")) - - self.logDebug("Challenge: %s" % challenge) - - return self.result(server, challenge, key) - - - def result(self, server, challenge, key): - self.plugin.req.load("http://www.google.com/recaptcha/api/js/recaptcha.js") - html = self.plugin.req.load("http://www.google.com/recaptcha/api/reload", - get={'c' : challenge, - 'k' : key, - 'reason': "i", - 'type' : "image"}) - - try: - challenge = re.search('\(\'(.+?)\',',html).group(1) - - except AttributeError: - self.fail(_("ReCaptcha second challenge pattern not found")) - - self.logDebug("Second challenge: %s" % challenge) - result = self.plugin.decryptCaptcha("%simage" % server, - get={'c': challenge}, - cookies=True, - forceUser=True, - imgtype="jpg") - - self.logDebug("Result: %s" % result) - - return result, challenge - - - def _collectApiInfo(self): - html = self.plugin.req.load("http://www.google.com/recaptcha/api.js") - a = re.search(r'po.src = \'(.*?)\';', html).group(1) - vers = a.split("/")[5] - - self.logDebug("API version: %s" % vers) - - language = a.split("__")[1].split(".")[0] - - self.logDebug("API language: %s" % language) - - html = self.plugin.req.load("https://apis.google.com/js/api.js") - b = re.search(r'"h":"(.*?)","', html).group(1) - jsh = b.decode('unicode-escape') - - self.logDebug("API jsh-string: %s" % jsh) - - return vers, language, jsh - - - def _prepareTimeAndRpc(self): - self.plugin.req.load("http://www.google.com/recaptcha/api2/demo") - - millis = int(round(time.time() * 1000)) - - self.logDebug("Time: %s" % millis) - - rand = random.randint(1, 99999999) - a = "0.%s" % str(rand * 2147483647) - rpc = int(100000000 * float(a)) - - self.logDebug("Rpc-token: %s" % rpc) - - return millis, rpc - - - def _challenge_v2(self, key, parent=None): - if parent is None: - try: - parent = urlparse.urljoin("http://", urlparse.urlparse(self.plugin.pyfile.url).netloc) - - except Exception: - parent = "" - - botguardstring = "!A" - vers, language, jsh = self._collectApiInfo() - millis, rpc = self._prepareTimeAndRpc() - - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/anchor", - get={'k' : key, - 'hl' : language, - 'v' : vers, - 'usegapi' : "1", - 'jsh' : "%s#id=IO_%s" % (jsh, millis), - 'parent' : parent, - 'pfname' : "", - 'rpctoken': rpc}) - - token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html) - self.logDebug("Token #1: %s" % token1.group(1)) - - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/frame", - get={'c' : token1.group(1), - 'hl' : language, - 'v' : vers, - 'bg' : botguardstring, - 'k' : key, - 'usegapi': "1", - 'jsh' : jsh}).decode('unicode-escape') - - token2 = re.search(r'"finput","(.*?)",', html) - self.logDebug("Token #2: %s" % token2.group(1)) - - token3 = re.search(r'"rresp","(.*?)",', html) - self.logDebug("Token #3: %s" % token3.group(1)) - - millis_captcha_loading = int(round(time.time() * 1000)) - captcha_response = self.plugin.decryptCaptcha("https://www.google.com/recaptcha/api2/payload", - get={'c':token3.group(1), 'k':key}, - cookies=True, - forceUser=True) - response = b64encode('{"response":"%s"}' % captcha_response) - - self.logDebug("Result: %s" % response) - - timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading - timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500) - - html = self.plugin.req.load("https://www.google.com/recaptcha/api2/userverify", - post={'k' : key, - 'c' : token3.group(1), - 'response': response, - 't' : timeToSolve, - 'ct' : timeToSolveMore, - 'bg' : botguardstring}) - - token4 = re.search(r'"uvresp","(.*?)",', html) - self.logDebug("Token #4: %s" % token4.group(1)) - - result = token4.group(1) - - return result, None diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py index 624f6c939..5811c28de 100644 --- a/module/plugins/internal/SevenZip.py +++ b/module/plugins/internal/SevenZip.py @@ -5,12 +5,13 @@ import re import subprocess from module.plugins.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice -from module.utils import fs_encode, save_join +from module.utils import fs_encode, save_join as fs_join class SevenZip(UnRar): __name__ = "SevenZip" - __version__ = "0.11" + __version__ = "0.14" + __status__ = "testing" __description__ = """7-Zip extractor plugin""" __license__ = "GPLv3" @@ -18,9 +19,7 @@ class SevenZip(UnRar): ("Walter Purcaro", "vuolter@gmail.com")] - CMD = "7z" - VERSION = "" - + CMD = "7z" EXTENSIONS = [".7z", ".xz", ".zip", ".gz", ".gzip", ".tgz", ".bz2", ".bzip2", ".tbz2", ".tbz", ".tar", ".wim", ".swm", ".lzma", ".rar", ".cab", ".arj", ".z", ".taz", ".cpio", ".rpm", ".deb", ".lzh", ".lha", @@ -37,23 +36,27 @@ class SevenZip(UnRar): @classmethod - def isUsable(cls): - if os.name == "nt": - cls.CMD = os.path.join(pypath, "7z.exe") - p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - else: + def find(cls): + try: + if os.name == "nt": + cls.CMD = os.path.join(pypath, "7z.exe") + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() - m = cls.re_version.search(out) - cls.VERSION = m.group(1) if m else '(version unknown)' + except OSError: + return False + + else: + m = cls.re_version.search(out) + if m is not None: + cls.VERSION = m.group(1) - return True + return True def verify(self, password): - # 7z can't distinguish crc and pw error in test + #: 7z can't distinguish crc and pw error in test p = self.call_cmd("l", "-slt", fs_encode(self.filename)) out, err = p.communicate() @@ -72,7 +75,7 @@ class SevenZip(UnRar): p = self.call_cmd("l", "-slt", fs_encode(self.filename)) out, err = p.communicate() - # check if output or error macthes the 'wrong password'-Regexp + #: Check if output or error macthes the 'wrong password'-Regexp if self.re_wrongpwd.search(out): raise PasswordError @@ -91,7 +94,7 @@ class SevenZip(UnRar): renice(p.pid, self.renice) - # communicate and retrieve stderr + #: Communicate and retrieve stderr self._progress(p) err = p.stderr.read().strip() @@ -102,7 +105,7 @@ class SevenZip(UnRar): elif self.re_wrongcrc.search(err): raise CRCError(err) - else: #: raise error if anything is on stderr + else: #: Raise error if anything is on stderr raise ArchiveError(err) if p.returncode > 1: @@ -126,7 +129,7 @@ class SevenZip(UnRar): result = set() for groups in self.re_filelist.findall(out): f = groups[-1].strip() - result.add(save_join(self.out, f)) + result.add(fs_join(self.out, f)) return list(result) @@ -134,20 +137,20 @@ class SevenZip(UnRar): def call_cmd(self, command, *xargs, **kwargs): args = [] - #overwrite flag + #: Overwrite flag if self.overwrite: args.append("-y") - #set a password - if "password" in kwargs and kwargs["password"]: - args.append("-p%s" % kwargs["password"]) + #: Set a password + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) else: args.append("-p-") #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) - self.manager.logDebug(" ".join(call)) + self.log_debug(" ".join(call)) p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return p diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index b843a28f0..6a3f91a5b 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -1,27 +1,26 @@ # -*- coding: utf-8 -*- import re -import urlparse -from module.plugins.Crypter import Crypter -from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies +from module.plugins.internal.Crypter import Crypter +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookie, set_cookies from module.utils import fixup, html_unescape class SimpleCrypter(Crypter, SimpleHoster): __name__ = "SimpleCrypter" __type__ = "crypter" - __version__ = "0.46" + __version__ = "0.60" + __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("use_subfolder" , "bool", "Save package to subfolder" , True), #: Overrides core.config['general']['folder_per_package'] + __config__ = [("use_subfolder" , "bool", "Save package to subfolder" , True), #: Overrides pyload.config['general']['folder_per_package'] ("subfolder_per_pack", "bool", "Create a subfolder for each package", True)] __description__ = """Simple decrypter plugin""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - """ Following patterns should be defined by each crypter: @@ -49,121 +48,93 @@ class SimpleCrypter(Crypter, SimpleHoster): and its loadPage method: - def loadPage(self, page_n): + def load_page(self, page_n): return the html of the page number page_n """ - LINK_PATTERN = None - - NAME_REPLACEMENTS = [("&#?\w+;", fixup)] - URL_REPLACEMENTS = [] - - TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] - - LOGIN_ACCOUNT = False - LOGIN_PREMIUM = False + DIRECT_LINK = True + LEECH_HOSTER = False #@TODO: Remove in 0.4.10 - def init(self): - account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") - account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) - - if account and account.canUse(): - self.user, data = account.selectAccount() - self.req = account.getAccountRequest(self.user) - self.premium = account.isPremium(self.user) + def _setup(self): + orig_name = self.__name__ + self.__name__ = (orig_name + ".py").replace("Folder.py", "").replace(".py", "") - self.account = account + super(SimpleCrypter, self)._setup() + self.__name__ = orig_name - def prepare(self): - self.pyfile.error = "" #@TODO: Remove in 0.4.10 - self.info = {} - self.html = "" - self.links = [] #@TODO: Move to hoster class in 0.4.10 + #@TODO: Remove in 0.4.10 + def load_account(self): + orig_name = self.__name__ + self.__name__ = (orig_name + ".py").replace("Folder.py", "").replace(".py", "") - if self.LOGIN_PREMIUM and not self.premium: - self.fail(_("Required premium account not found")) + super(SimpleCrypter, self).load_account() - if self.LOGIN_ACCOUNT and not self.account: - self.fail(_("Required account not found")) + self.__name__ = orig_name - self.req.setOption("timeout", 120) - if isinstance(self.COOKIES, list): - set_cookies(self.req.cj, self.COOKIES) + def handle_direct(self, pyfile): + for i in xrange(self.get_config("maxredirs", plugin="UserAgentSwitcher")): + redirect = self.link or pyfile.url + self.log_debug("Redirect #%d to: %s" % (i, redirect)) - self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + header = self.load(redirect, just_header=True) + if 'location' in header and header['location']: + self.link = header['location'] + else: + break + else: + self.log_error(_("Too many redirects")) def decrypt(self, pyfile): self.prepare() + self.check_info() #@TODO: Remove in 0.4.10 - self.preload() - self.checkInfo() - - self.links = self.getLinks() + if self.direct_dl: + self.log_debug(_("Looking for direct download link...")) + self.handle_direct(pyfile) - if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): - self.handlePages(pyfile) + if self.link or self.links or self.urls or self.packages: + self.log_info(_("Direct download link detected")) + else: + self.log_info(_("Direct download link not found")) - self.logDebug("Package has %d links" % len(self.links)) + if not (self.link or self.links or self.urls or self.packages): + self.preload() - if self.links: - self.packages = [(self.info['name'], self.links, self.info['folder'])] + self.links = self.get_links() or list() - elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 - self.fail(_("No link grabbed")) + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handle_pages(pyfile) + self.log_debug("Package has %d links" % len(self.links)) - def checkNameSize(self, getinfo=True): - if not self.info or getinfo: - self.logDebug("File info (BEFORE): %s" % self.info) - self.info.update(self.getInfo(self.pyfile.url, self.html)) - self.logDebug("File info (AFTER): %s" % self.info) - - try: - url = self.info['url'].strip() - name = self.info['name'].strip() - if name and name != url: - self.pyfile.name = name + if self.link: + self.urls.append(self.link) - except Exception: - pass - - try: - folder = self.info['folder'] = self.pyfile.name - - except Exception: - pass - - self.logDebug("File name: %s" % self.pyfile.name, - "File folder: %s" % self.pyfile.name) + if self.links: + name = folder = pyfile.name + self.packages.append((name, self.links, folder)) - def getLinks(self): + def get_links(self): """ Returns the links extracted from self.html You should override this only if it's impossible to extract links using only the LINK_PATTERN. """ - url_p = urlparse.urlparse(self.pyfile.url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - - links = [urlparse.urljoin(baseurl, link) if not urlparse.urlparse(link).scheme else link \ - for link in re.findall(self.LINK_PATTERN, self.html)] - - return [html_unescape(l.strip().decode('unicode-escape')) for l in links] + return re.findall(self.LINK_PATTERN, self.html) - def handlePages(self, pyfile): + def handle_pages(self, pyfile): try: pages = int(re.search(self.PAGES_PATTERN, self.html).group(1)) except Exception: pages = 1 for p in xrange(2, pages + 1): - self.html = self.loadPage(p) - self.links += self.getLinks() + self.html = self.load_page(p) + self.links += self.get_links() diff --git a/module/plugins/internal/SimpleDereferer.py b/module/plugins/internal/SimpleDereferer.py deleted file mode 100644 index 2e7e08321..000000000 --- a/module/plugins/internal/SimpleDereferer.py +++ /dev/null @@ -1,107 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from module.plugins.Crypter import Crypter -from module.plugins.internal.SimpleHoster import create_getInfo, set_cookies -from module.utils import html_unescape - - -class SimpleDereferer(Crypter): - __name__ = "SimpleDereferer" - __type__ = "crypter" - __version__ = "0.14" - - __pattern__ = r'^unmatchable$' - __config__ = [] #@TODO: Remove in 0.4.10 - - __description__ = """Simple dereferer plugin""" - __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - - - """ - Following patterns should be defined by each crypter: - - LINK_PATTERN: Regex to catch the redirect url in group(1) - example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' - - OFFLINE_PATTERN: (optional) Checks if the page is unreachable - example: OFFLINE_PATTERN = r'File (deleted|not found)' - - TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable - example: TEMP_OFFLINE_PATTERN = r'Server maintainance' - - - You can override the getLinks method if you need a more sophisticated way to extract the redirect url. - """ - - URL_REPLACEMENTS = [] - - LINK_PATTERN = None - - TEXT_ENCODING = False - COOKIES = True - - - def handleDirect(self, pyfile): - header = self.load(pyfile.url, just_header=True, decode=True) - if 'location' in header and header['location']: - self.link = header['location'] - - - def decrypt(self, pyfile): - self.prepare() - - self.handleDirect(pyfile) - - if not self.link: - self.preload() - self.checkStatus() - - self.link = self.getLink() - - if self.link: - self.urls = [self.link] - - elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 - self.fail(_("No link grabbed")) - - - def prepare(self): - self.info = {} - self.html = "" - self.link = "" #@TODO: Move to hoster class in 0.4.10 - - self.req.setOption("timeout", 120) - - if isinstance(self.COOKIES, list): - set_cookies(self.req.cj, self.COOKIES) - - self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) - - - def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) - - if isinstance(self.TEXT_ENCODING, basestring): - self.html = unicode(self.html, self.TEXT_ENCODING) - - - def checkStatus(self): - if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): - self.offline() - - elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): - self.tempOffline() - - - def getLink(self): - try: - link = re.search(self.LINK_PATTERN, self.html).group(1) - - except Exception, e: - self.logWarning(e) - - else: - return html_unescape(link.strip().decode('unicode-escape')) #@TODO: Move this check to plugin `load` method in 0.4.10 diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 1d44a6642..9c310ca27 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,245 +1,30 @@ # -*- coding: utf-8 -*- -import datetime +from __future__ import with_statement + import mimetypes import os import re import time -import urllib import urlparse from module.PyFile import statusMap as _statusMap -from module.network.CookieJar import CookieJar from module.network.HTTPRequest import BadHeader -from module.network.RequestFactory import getURL -from module.plugins.Hoster import Hoster -from module.plugins.Plugin import Fail, Retry -from module.utils import fixup, fs_encode, html_unescape, parseFileSize +from module.network.RequestFactory import getURL as get_url +from module.plugins.internal.Hoster import Hoster, create_getInfo, parse_fileInfo +from module.plugins.internal.Plugin import Fail, encode, fixurl, replace_patterns, seconds_to_midnight, set_cookie, set_cookies +from module.utils import fixup, fs_encode, parseFileSize as parse_size #@TODO: Adapt and move to PyFile in 0.4.10 -statusMap = dict((v, k) for k, v in _statusMap.iteritems()) - - -#@TODO: Remove in 0.4.10 and redirect to self.error instead -def _error(self, reason, type): - if not reason and not type: - type = "unknown" - - msg = _("%s error") % type.strip().capitalize() if type else _("Error") - msg += (": %s" % reason.strip()) if reason else "" - msg += _(" | Plugin may be out of date") - - raise Fail(msg) - - -#@TODO: Remove in 0.4.10 -def _wait(self, seconds, reconnect): - if seconds: - self.setWait(int(seconds) + 1) - - if reconnect is not None: - self.wantReconnect = reconnect - - super(SimpleHoster, self).wait() - - -def replace_patterns(string, ruleslist): - for r in ruleslist: - rf, rt = r - string = re.sub(rf, rt, string) - return string - - -def set_cookies(cj, cookies): - for cookie in cookies: - if isinstance(cookie, tuple) and len(cookie) == 3: - domain, name, value = cookie - cj.setCookie(domain, name, value) - - -def parseHtmlTagAttrValue(attr_name, tag): - m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) - return m.group(2) if m else None - - -def parseHtmlForm(attr_str, html, input_names={}): - for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, - html, re.S | re.I): - inputs = {} - action = parseHtmlTagAttrValue("action", form.group('TAG')) - - for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): - name = parseHtmlTagAttrValue("name", inputtag.group(1)) - if name: - value = parseHtmlTagAttrValue("value", inputtag.group(1)) - if not value: - inputs[name] = inputtag.group(3) or "" - else: - inputs[name] = value - - if input_names: - # check input attributes - for key, val in input_names.iteritems(): - if key in inputs: - if isinstance(val, basestring) and inputs[key] == val: - continue - elif isinstance(val, tuple) and inputs[key] in val: - continue - elif hasattr(val, "search") and re.match(val, inputs[key]): - continue - break #: attibute value does not match - else: - break #: attibute name does not match - else: - return action, inputs #: passed attribute check - else: - # no attribute check - return action, inputs - - return {}, None #: no matching form found - - -#@TODO: Remove in 0.4.10 -def parseFileInfo(plugin, url="", html=""): - if hasattr(plugin, "getInfo"): - info = plugin.getInfo(url, html) - res = info['name'], info['size'], info['status'], info['url'] - else: - url = urllib.unquote(url) - url_p = urlparse.urlparse(url) - res = ((url_p.path.split('/')[-1] - or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] - or url_p.netloc.split('.', 1)[0]), - 0, - 3 if url else 8, - url) - - return res - - -#@TODO: Remove in 0.4.10 -def create_getInfo(plugin): - def getInfo(urls): - for url in urls: - if hasattr(plugin, "URL_REPLACEMENTS"): - url = replace_patterns(url, plugin.URL_REPLACEMENTS) - yield parseFileInfo(plugin, url) - - return getInfo - - -def timestamp(): - return int(time.time() * 1000) - - -#@TODO: Move to hoster class in 0.4.10 -def getFileURL(self, url, follow_location=None): - link = "" - redirect = 1 - - if type(follow_location) is int: - redirect = max(follow_location, 1) - else: - redirect = 10 - - for i in xrange(redirect): - try: - self.logDebug("Redirect #%d to: %s" % (i, url)) - header = self.load(url, just_header=True, decode=True) - - except Exception: #: Bad bad bad... rewrite this part in 0.4.10 - req = pyreq.getHTTPRequest() - res = req.load(url, just_header=True, decode=True) - - req.close() - - header = {"code": req.code} - for line in res.splitlines(): - line = line.strip() - if not line or ":" not in line: - continue - - key, none, value = line.partition(":") - key = key.lower().strip() - value = value.strip() - - if key in header: - if type(header[key]) == list: - header[key].append(value) - else: - header[key] = [header[key], value] - else: - header[key] = value - - if 'content-disposition' in header: - link = url - - elif 'location' in header and header['location']: - location = header['location'] - - if not urlparse.urlparse(location).scheme: - url_p = urlparse.urlparse(url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - location = urlparse.urljoin(baseurl, location) - - if 'code' in header and header['code'] == 302: - link = location - - if follow_location: - url = location - continue - - else: - extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] - - if 'content-type' in header and header['content-type']: - mimetype = header['content-type'].split(';')[0].strip() - - elif extension: - mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" - - else: - mimetype = "" - - if mimetype and (link or 'html' not in mimetype): - link = url - else: - link = "" - - break - - else: - try: - self.logError(_("Too many redirects")) - except Exception: - pass - - return link - - -def secondsToMidnight(gmt=0): - now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) - - if now.hour is 0 and now.minute < 10: - midnight = now - else: - midnight = now + datetime.timedelta(days=1) - - td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now - - if hasattr(td, 'total_seconds'): - res = td.total_seconds() - else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds - res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 - - return int(res) +statusMap = dict((v, k) for k, v in _statusMap.items()) class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "1.50" + __version__ = "1.80" + __status__ = "testing" __pattern__ = r'^unmatchable$' __config__ = [("use_premium", "bool", "Use premium account if available" , True), @@ -249,7 +34,6 @@ class SimpleHoster(Hoster): __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - """ Info patterns: @@ -258,6 +42,7 @@ class SimpleHoster(Hoster): or NAME_PATTERN: (mandatory) Name that will be set for the file example: NAME_PATTERN = r'(?P<N>file_name)' + SIZE_PATTERN: (mandatory) Size that will be checked for the file example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' @@ -285,8 +70,8 @@ class SimpleHoster(Hoster): IP_BLOCKED_PATTERN: (optional) example: IP_BLOCKED_PATTERN = r'in your country' - DOWNLOAD_LIMIT_PATTERN: (optional) - example: DOWNLOAD_LIMIT_PATTERN = r'download limit' + DL_LIMIT_PATTERN: (optional) + example: DL_LIMIT_PATTERN = r'download limit' SIZE_LIMIT_PATTERN: (optional) example: SIZE_LIMIT_PATTERN = r'up to' @@ -295,49 +80,50 @@ class SimpleHoster(Hoster): example: ERROR_PATTERN = r'' - Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling: + Instead overriding handle_free and handle_premium methods you may define the following patterns for basic link handling: - LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download - example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PATTERN: (optional) group(1) should be the direct link for free and premium download + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + or + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' - LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download - example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' """ - NAME_REPLACEMENTS = [("&#?\w+;", fixup)] SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + FILE_ERRORS = [('Html error' , r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)'), + ('Request error', r'([Aa]n error occured while processing your request)' ), + ('Html file' , r'\A\s*<!DOCTYPE html' )] + + CHECK_FILE = True #: Set to False to not check the last downloaded file with declared error patterns + CHECK_TRAFFIC = False #: Set to True to reload checking traffic left for premium account COOKIES = True #: or False or list of tuples [(domain, name, value)] - CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account - DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False - MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) - LOGIN_ACCOUNT = False #: Set to True to require account login + DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handle_direct method), set to None to do it if self.account is True else False DISPOSITION = True #: Set to True to use any content-disposition value in http header as file name + LOGIN_ACCOUNT = False #: Set to True to require account login + LOGIN_PREMIUM = False #: Set to True to require premium account login + LEECH_HOSTER = False #: Set to True to leech other hoster link (as defined in handle_multi method) + TEXT_ENCODING = True #: Set to encoding name if encoding value in http header is not correct - directLink = getFileURL #@TODO: Remove in 0.4.10 + LINK_PATTERN = None @classmethod - def apiInfo(cls, url): - url = urllib.unquote(url) - url_p = urlparse.urlparse(url) - return {'name' : (url_p.path.split('/')[-1] - or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] - or url_p.netloc.split('.', 1)[0]), - 'size' : 0, - 'status': 3 if url else 8, - 'url' : url} + def api_info(cls, url): + return super(SimpleHoster, cls).get_info(url) @classmethod - def getInfo(cls, url="", html=""): - info = cls.apiInfo(url) - online = True if info['status'] is 2 else False + def get_info(cls, url="", html=""): + info = cls.api_info(url) + online = True if info['status'] == 2 else False try: - info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here + info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: Pattern groups will be saved here except Exception: info['pattern'] = {} @@ -347,20 +133,17 @@ class SimpleHoster(Hoster): info['error'] = "missing url" info['status'] = 1 - elif info['status'] is 3: + elif info['status'] == 3: try: - html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) - - if isinstance(cls.TEXT_ENCODING, basestring): - html = unicode(html, cls.TEXT_ENCODING) + html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) except BadHeader, e: info['error'] = "%d: %s" % (e.code, e.content) - if e.code is 404: + if e.code == 404: info['status'] = 1 - elif e.code is 503: + elif e.code == 503: info['status'] = 6 except Exception: @@ -392,17 +175,17 @@ class SimpleHoster(Hoster): info['status'] = 2 if 'N' in info['pattern']: - info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()), + info['name'] = replace_patterns(fixurl(info['pattern']['N']), cls.NAME_REPLACEMENTS) if 'S' in info['pattern']: size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], cls.SIZE_REPLACEMENTS) - info['size'] = parseFileSize(size) + info['size'] = parse_size(size) elif isinstance(info['size'], basestring): unit = info['units'] if 'units' in info else None - info['size'] = parseFileSize(info['size'], unit) + info['size'] = parse_size(info['size'], unit) if 'H' in info['pattern']: hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" @@ -415,156 +198,155 @@ class SimpleHoster(Hoster): def setup(self): - self.resumeDownload = self.multiDL = self.premium + self.resume_download = self.multiDL = self.premium def prepare(self): - self.pyfile.error = "" #@TODO: Remove in 0.4.10 + self.pyfile.error = "" #@TODO: Remove in 0.4.10 + self.html = "" #@TODO: Recheck in 0.4.10 + self.link = "" #@TODO: Recheck in 0.4.10 + self.last_download = "" + self.direct_dl = False + self.leech_dl = False - self.info = {} - self.html = "" - self.link = "" #@TODO: Move to hoster class in 0.4.10 - self.directDL = False #@TODO: Move to hoster class in 0.4.10 - self.multihost = False #@TODO: Move to hoster class in 0.4.10 + if not self.get_config('use_premium', True): + self.restart(nopremium=True) - if not self.getConfig('use_premium', True): - self.retryFree() + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) + self.LOGIN_ACCOUNT = True if self.LOGIN_ACCOUNT and not self.account: self.fail(_("Required account not found")) self.req.setOption("timeout", 120) - if isinstance(self.COOKIES, list): - set_cookies(self.req.cj, self.COOKIES) + if self.LINK_PATTERN: + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.LINK_FREE_PATTERN = self.LINK_PATTERN - if (self.MULTI_HOSTER - and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] - or re.match(self.__pattern__, self.pyfile.url) is None)): - self.multihost = True - return + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.LINK_PREMIUM_PATTERN = self.LINK_PATTERN + + if (self.LEECH_HOSTER + and (self.__pattern__ is not self.pyload.pluginManager.hosterPlugins[self.__name__]['pattern'] + and re.match(self.__pattern__, self.pyfile.url) is None)): + self.leech_dl = True + + if self.leech_dl: + self.direct_dl = False - if self.DIRECT_LINK is None: - self.directDL = bool(self.account) + elif self.DIRECT_LINK is None: + self.direct_dl = bool(self.account) else: - self.directDL = self.DIRECT_LINK + self.direct_dl = self.DIRECT_LINK - self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + if not self.leech_dl: + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def preload(self): - self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) - - if isinstance(self.TEXT_ENCODING, basestring): - self.html = unicode(self.html, self.TEXT_ENCODING) + self.html = self.load(self.pyfile.url, + cookies=self.COOKIES, + ref=False, + decode=self.TEXT_ENCODING) def process(self, pyfile): try: self.prepare() - self.checkInfo() - - if self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect(pyfile) + self.check_info() #@TODO: Remove in 0.4.10 - if self.multihost and not self.link and not self.lastDownload: - self.logDebug("Looking for leeched download link...") - self.handleMulti(pyfile) + if self.leech_dl: + self.log_info(_("Processing as debrid download...")) + self.handle_multi(pyfile) - if not self.link and not self.lastDownload: - self.MULTI_HOSTER = False - self.retry(1, reason="Multi hoster fails") + if not self.link and not was_downloaded(): + self.log_info(_("Failed to leech url")) - if not self.link and not self.lastDownload: - self.preload() - self.checkInfo() - - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium(pyfile) - - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as free download") - self.handleFree(pyfile) - - self.downloadLink(self.link, self.DISPOSITION) - self.checkFile() + else: + if not self.link and self.direct_dl and not self.last_download: + self.log_info(_("Looking for direct download link...")) + self.handle_direct(pyfile) - except Fail, e: #@TODO: Move to PluginThread in 0.4.10 - err = str(e) #@TODO: Recheck in 0.4.10 + if self.link or self.last_download: + self.log_info(_("Direct download link detected")) + else: + self.log_info(_("Direct download link not found")) - if err == _("No captcha result obtained in appropiate time by any of the plugins."): #@TODO: Fix in 0.4.10 - self.checkFile() + if not self.link and not self.last_download: + self.preload() - elif self.getConfig('fallback', True) and self.premium: - self.logWarning(_("Premium download failed"), e) - self.retryFree() + if 'status' not in self.info or self.info['status'] is 3: #@TODO: Recheck in 0.4.10 + self.check_info() - else: - raise Fail(err) + if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic_left()): + self.log_info(_("Processing as premium download...")) + self.handle_premium(pyfile) + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.check_traffic_left()): + self.log_info(_("Processing as free download...")) + self.handle_free(pyfile) - def downloadLink(self, link, disposition=True): - if not link or not isinstance(link, basestring): - return + if not self.last_download: + self.log_info(_("Downloading file...")) + self.download(self.link, disposition=self.DISPOSITION) - self.correctCaptcha() + self.check_file() - link = html_unescape(link.strip().decode('unicode-escape')) #@TODO: Move this check to plugin `load` method in 0.4.10 + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + if self.get_config('fallback', True) and self.premium: + self.log_warning(_("Premium download failed"), e) + self.restart(nopremium=True) - if not urlparse.urlparse(link).scheme: - url_p = urlparse.urlparse(self.pyfile.url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - link = urlparse.urljoin(baseurl, link) + else: + raise Fail(encode(e)) #@TODO: Remove `encode` in 0.4.10 - self.download(link, ref=False, disposition=disposition) + def check_file(self): + self.log_info(_("Checking file...")) - def checkFile(self, rules={}): - if self.cTask and not self.lastDownload: - self.invalidCaptcha() + if self.captcha.task and not self.last_download: + self.captcha.invalid() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)): - self.lastDownload = "" - self.error(self.pyfile.error or _("No file downloaded")) + # 10485760 is 10MB, tolerance is used when comparing displayed size on the hoster website to real size + # For example displayed size can be 1.46GB for example, but real size can be 1.4649853GB + elif self.check_download({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}, + file_size=self.info['size'] if 'size' in self.info else 0, + size_tolerance=10485760, + delete=True): + self.error(_("Empty file")) else: - errmsg = self.checkDownload({'Empty file': re.compile(r'\A\s*\Z'), - 'Html error': re.compile(r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)')}) - - if not errmsg: - for r, p in [('Html file' , re.compile(r'\A\s*<!DOCTYPE html') ), - ('Request error', re.compile(r'([Aa]n error occured while processing your request)'))]: - if r not in rules: - rules[r] = p - - for r, a in [('Error' , "ERROR_PATTERN" ), - ('Premium only', "PREMIUM_ONLY_PATTERN"), - ('Wait error' , "WAIT_PATTERN" )]: - if r not in rules and hasattr(self, a): - rules[r] = getattr(self, a) - - errmsg = self.checkDownload(rules) + self.log_debug("Using default check rules...") + for r, p in self.FILE_ERRORS: + errmsg = self.check_download({r: re.compile(p)}) + if errmsg is not None: + errmsg = errmsg.strip().capitalize() - if not errmsg: - return - - errmsg = errmsg.strip().capitalize() + try: + errmsg += " | " + self.last_check.group(1).strip() + except Exception: + pass - try: - errmsg += " | " + self.lastCheck.group(1).strip() - except Exception: - pass + self.log_warning(_("Check result: ") + errmsg, _("Waiting 1 minute and retry")) + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) + else: + if self.CHECK_FILE: + self.log_debug("Using custom check rules...") + with open(fs_encode(self.last_download), "rb") as f: + self.html = f.read(1048576) #@TODO: Recheck in 0.4.10 + self.check_errors() - self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry") - self.retry(3, 60, errmsg) + self.log_info(_("No errors found")) + self.pyfile.error = "" - def checkErrors(self): + def check_errors(self): if not self.html: - self.logWarning(_("No html code to check")) + self.log_warning(_("No html code to check")) return if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html): @@ -577,20 +359,20 @@ class SimpleHoster(Hoster): elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html): self.fail(_("File too large for free download")) - elif hasattr(self, 'DOWNLOAD_LIMIT_PATTERN') and re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html): - m = re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html) + elif hasattr(self, 'DL_LIMIT_PATTERN') and re.search(self.DL_LIMIT_PATTERN, self.html): + m = re.search(self.DL_LIMIT_PATTERN, self.html) try: errmsg = m.group(1).strip() except Exception: errmsg = m.group(0).strip() self.info['error'] = re.sub(r'<.*?>', " ", errmsg) - self.logWarning(self.info['error']) + self.log_warning(self.info['error']) if re.search('da(il)?y|today', errmsg, re.I): - wait_time = secondsToMidnight(gmt=2) + wait_time = seconds_to_midnight(gmt=2) else: - wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) self.wantReconnect = wait_time > 300 @@ -608,13 +390,13 @@ class SimpleHoster(Hoster): errmsg = m.group(0).strip() self.info['error'] = re.sub(r'<.*?>', " ", errmsg) - self.logWarning(self.info['error']) + self.log_warning(self.info['error']) - if re.search('limit|wait', errmsg, re.I): + if re.search('limit|wait|slot', errmsg, re.I): if re.search("da(il)?y|today", errmsg): - wait_time = secondsToMidnight(gmt=2) + wait_time = seconds_to_midnight(gmt=2) else: - wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) self.wantReconnect = wait_time > 300 @@ -624,20 +406,26 @@ class SimpleHoster(Hoster): self.fail(_("Connection from your current IP address is not allowed")) elif re.search('captcha|code', errmsg, re.I): - self.invalidCaptcha() + self.captcha.invalid() + self.retry(10, reason=_("Wrong captcha")) elif re.search('countdown|expired', errmsg, re.I): - self.retry(wait_time=60, reason=_("Link expired")) + self.retry(10, 60, _("Link expired")) elif re.search('maintenance|maintainance|temp', errmsg, re.I): - self.tempOffline() + self.temp_offline() - elif re.search('up to', errmsg, re.I): + elif re.search('up to|size', errmsg, re.I): self.fail(_("File too large for free download")) - elif re.search('offline|delet|remov|not (found|available)', errmsg, re.I): + elif re.search('offline|delet|remov|not? (found|(longer)? available)', errmsg, re.I): self.offline() + elif re.search('filename', errmsg, re.I): + url_p = urlparse.urlparse(self.pyfile.url) + self.pyfile.url = "%s://%s/%s" % (url_p.scheme, url_p.netloc, url_p.path.split('/')[0]) + self.retry(1, reason=_("Wrong url")) + elif re.search('premium', errmsg, re.I): self.fail(_("File can be downloaded by premium users only")) @@ -653,98 +441,102 @@ class SimpleHoster(Hoster): except Exception: waitmsg = m.group(0).strip() - wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I)) self.wait(wait_time, wait_time > 300) self.info.pop('error', None) - def checkStatus(self, getinfo=True): + def check_status(self, getinfo=True): if not self.info or getinfo: - self.logDebug("Update file info...") - self.logDebug("Previous file info: %s" % self.info) - self.info.update(self.getInfo(self.pyfile.url, self.html)) - self.logDebug("Current file info: %s" % self.info) + self.log_info(_("Updating file info...")) + old_info = self.info.copy() + self.info.update(self.get_info(self.pyfile.url, self.html)) + self.log_debug("File info: %s" % self.info) + self.log_debug("Previous file info: %s" % old_info) try: - status = self.info['status'] + status = self.info['status'] or None - if status is 1: + if status == 1: self.offline() - elif status is 6: - self.tempOffline() + elif status == 6: + self.temp_offline() - elif status is 8: - self.fail(self.info['error'] if 'error' in self.info else _("Failed")) + elif status == 8: + if 'error' in self.info: + self.fail(self.info['error']) + else: + self.fail(_("File status: " + statusMap[status])) finally: - self.logDebug("File status: %s" % statusMap[status]) + self.log_info(_("File status: ") + (statusMap[status] if status else _("Unknown"))) - def checkNameSize(self, getinfo=True): + def check_name_size(self, getinfo=True): if not self.info or getinfo: - self.logDebug("Update file info...") - self.logDebug("Previous file info: %s" % self.info) - self.info.update(self.getInfo(self.pyfile.url, self.html)) - self.logDebug("Current file info: %s" % self.info) + self.log_info(_("Updating file info...")) + old_info = self.info.copy() + self.info.update(self.get_info(self.pyfile.url, self.html)) + self.log_debug("File info: %s" % self.info) + self.log_debug("Previous file info: %s" % old_info) try: url = self.info['url'].strip() name = self.info['name'].strip() - if name and name != url: - self.pyfile.name = name - except Exception: + except KeyError: pass - try: - size = self.info['size'] - if size > 0: - self.pyfile.size = size + else: + if name and name is not url: + self.pyfile.name = name - except Exception: - pass + if 'size' in self.info and self.info['size'] > 0: + self.pyfile.size = int(self.info['size']) #@TODO: Fix int conversion in 0.4.10 + + # self.pyfile.sync() + + name = self.pyfile.name + size = self.pyfile.size + folder = self.info['folder'] = name - self.logDebug("File name: %s" % self.pyfile.name, - "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown") + self.log_info(_("File name: ") + name) + self.log_info(_("File size: %s bytes") % size if size > 0 else _("File size: Unknown")) + # self.log_info("File folder: " + folder) - def checkInfo(self): - self.checkNameSize() + #@TODO: Rewrite in 0.4.10 + def check_info(self): + self.check_name_size() if self.html: - self.checkErrors() - self.checkNameSize() + self.check_errors() + self.check_name_size() - self.checkStatus(getinfo=False) + self.check_status(getinfo=False) - #: Deprecated - def getFileInfo(self): + #: Deprecated method (Remove in 0.4.10) + def get_fileInfo(self): self.info = {} - self.checkInfo() + self.check_info() return self.info - def handleDirect(self, pyfile): - link = self.directLink(pyfile.url, self.resumeDownload) - - if link: - self.logInfo(_("Direct download link detected")) - self.link = link - else: - self.logDebug("Direct download link not found") + def handle_direct(self, pyfile): + self.link = self.direct_link(pyfile.url, self.resume_download) - def handleMulti(self, pyfile): #: Multi-hoster handler + def handle_multi(self, pyfile): #: Multi-hoster handler pass - def handleFree(self, pyfile): + def handle_free(self, pyfile): if not hasattr(self, 'LINK_FREE_PATTERN'): - self.logError(_("Free download not implemented")) + self.log_error(_("Free download not implemented")) m = re.search(self.LINK_FREE_PATTERN, self.html) if m is None: @@ -753,77 +545,14 @@ class SimpleHoster(Hoster): self.link = m.group(1) - def handlePremium(self, pyfile): + def handle_premium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): - self.logError(_("Premium download not implemented")) - self.logDebug("Handled as free download") - self.handleFree(pyfile) + self.log_error(_("Premium download not implemented")) + self.log_info(_("Processing as free download...")) + self.handle_free(pyfile) m = re.search(self.LINK_PREMIUM_PATTERN, self.html) if m is None: self.error(_("Premium download link not found")) else: self.link = m.group(1) - - - def longWait(self, wait_time=None, max_tries=3): - if wait_time and isinstance(wait_time, (int, long, float)): - time_str = "%dh %dm" % divmod(wait_time / 60, 60) - else: - wait_time = 900 - time_str = _("(unknown time)") - max_tries = 100 - - self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - - self.wait(wait_time, True) - self.retry(max_tries=max_tries, reason=_("Download limit reached")) - - - def parseHtmlForm(self, attr_str="", input_names={}): - return parseHtmlForm(attr_str, self.html, input_names) - - - def checkTrafficLeft(self): - if not self.account: - return True - - traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] - - if traffic is None: - return False - elif traffic == -1: - return True - else: - size = self.pyfile.size / 1024 - self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) - return size <= traffic - - - #@TODO: Remove in 0.4.10 - def getConfig(self, option, default=''): - """getConfig with default value - sublass may not implements all config options""" - try: - return self.getConf(option) - - except KeyError: - return default - - - def retryFree(self): - if not self.premium: - return - self.premium = False - self.account = None - self.req = self.core.requestFactory.getRequest(self.__name__) - self.retries = -1 - raise Retry(_("Fallback to free download")) - - - #@TODO: Remove in 0.4.10 - def wait(self, seconds=0, reconnect=None): - return _wait(self, seconds, reconnect) - - - def error(self, reason="", type="parse"): - return _error(self, reason, type) diff --git a/module/plugins/internal/SolveMedia.py b/module/plugins/internal/SolveMedia.py deleted file mode 100644 index 7f5de51e1..000000000 --- a/module/plugins/internal/SolveMedia.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- - -import re - -from module.plugins.Plugin import Fail -from module.plugins.internal.Captcha import Captcha - - -class SolveMedia(Captcha): - __name__ = "SolveMedia" - __type__ = "captcha" - __version__ = "0.13" - - __description__ = """SolveMedia captcha service plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' - - - def detect_key(self, html=None): - html = html or self.retrieve_html() - - m = re.search(self.KEY_PATTERN, html) - if m: - self.key = m.group(1).strip() - self.logDebug("Key: %s" % self.key) - return self.key - else: - self.logWarning("Key pattern not found") - return None - - - def challenge(self, key=None, html=None): - key = key or self.retrieve_key(html) - - html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", - get={'k': key}) - - for i in xrange(1, 11): - try: - magic = re.search(r'name="magic" value="(.+?)"', html).group(1) - - except AttributeError: - self.logWarning("Magic pattern not found") - magic = None - - try: - challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="(.+?)">', - html).group(1) - - except AttributeError: - self.fail(_("SolveMedia challenge pattern not found")) - - else: - self.logDebug("Challenge: %s" % challenge) - - try: - result = self.result("http://api.solvemedia.com/papi/media", challenge) - - except Fail, e: - self.logWarning(e) - self.plugin.invalidCaptcha() - result = None - - html = self.plugin.req.load("http://api.solvemedia.com/papi/verify.noscript", - post={'adcopy_response' : result, - 'k' : key, - 'l' : "en", - 't' : "img", - 's' : "standard", - 'magic' : magic, - 'adcopy_challenge': challenge, - 'ref' : self.plugin.pyfile.url}) - try: - redirect = re.search(r'URL=(.+?)">', html).group(1) - - except AttributeError: - self.fail(_("SolveMedia verify pattern not found")) - - else: - if "error" in html: - self.logWarning("Captcha code was invalid") - self.logDebug("Retry #%d" % i) - html = self.plugin.req.load(redirect) - else: - break - - else: - self.fail(_("SolveMedia max retries exceeded")) - - return result, challenge - - - def result(self, server, challenge): - result = self.plugin.decryptCaptcha(server, - get={'c': challenge}, - cookies=True, - imgtype="gif") - - self.logDebug("Result: %s" % result) - - return result diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 5b9f2e1c3..0386991d9 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -8,7 +8,7 @@ from glob import glob from string import digits from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError -from module.utils import fs_decode, fs_encode, save_join +from module.utils import fs_decode, fs_encode, save_join as fs_join def renice(pid, value): @@ -22,7 +22,8 @@ def renice(pid, value): class UnRar(Extractor): __name__ = "UnRar" - __version__ = "1.20" + __version__ = "1.25" + __status__ = "testing" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" @@ -31,12 +32,10 @@ class UnRar(Extractor): ("Immenz" , "immenz@gmx.net" )] - CMD = "unrar" - VERSION = "" + CMD = "unrar" EXTENSIONS = [".rar"] - - re_multipart = re.compile(r'\.(part|r)(\d+)(?:\.rar)?(\.rev|\.bad)?',re.I) + re_multipart = re.compile(r'\.(part|r)(\d+)(?:\.rar)?(\.rev|\.bad)?', re.I) re_filefixed = re.compile(r'Building (.+)') re_filelist = re.compile(r'^(.)(\s*[\w\.\-]+)\s+(\d+\s+)+(?:\d+\%\s+)?[\d\-]{8}\s+[\d\:]{5}', re.M|re.I) @@ -48,38 +47,40 @@ class UnRar(Extractor): @classmethod - def isUsable(cls): - if os.name == "nt": - try: + def find(cls): + try: + if os.name == "nt": cls.CMD = os.path.join(pypath, "RAR.exe") - p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - cls.__name__ = "RAR" - cls.REPAIR = True + else: + cls.CMD = "rar" - except OSError: - cls.CMD = os.path.join(pypath, "UnRAR.exe") - p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - else: + p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = p.communicate() + # cls.__name__ = "RAR" + cls.REPAIR = True + + except OSError: try: - p = subprocess.Popen(["rar"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - out, err = p.communicate() - cls.__name__ = "RAR" - cls.REPAIR = True + if os.name == "nt": + cls.CMD = os.path.join(pypath, "UnRAR.exe") + else: + cls.CMD = "unrar" - except OSError: #: fallback to unrar p = subprocess.Popen([cls.CMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() + except OSError: + return False + m = cls.re_version.search(out) - cls.VERSION = m.group(1) if m else '(version unknown)' + if m is not None: + cls.VERSION = m.group(1) return True @classmethod - def isMultipart(cls, filename): + def is_multipart(cls, filename): return True if cls.re_multipart.search(filename) else False @@ -105,7 +106,7 @@ class UnRar(Extractor): if self.re_wrongcrc.search(err): raise CRCError(err) - # output only used to check if passworded files are present + #: Output only used to check if passworded files are present for attr in self.re_filelist.findall(out): if attr[0].startswith("*"): raise PasswordError @@ -114,7 +115,7 @@ class UnRar(Extractor): def repair(self): p = self.call_cmd("rc", fs_encode(self.filename)) - # communicate and retrieve stderr + #: Communicate and retrieve stderr self._progress(p) err = p.stderr.read().strip() if err or p.returncode: @@ -126,17 +127,17 @@ class UnRar(Extractor): s = "" while True: c = process.stdout.read(1) - # quit loop on eof + #: Quit loop on eof if not c: break - # reading a percentage sign -> set progress and restart - if c == '%': - self.notifyProgress(int(s)) + #: Reading a percentage sign -> set progress and restart + if c == "%": + self.notify_progress(int(s)) s = "" - # not reading a digit -> therefore restart + #: Not reading a digit -> therefore restart elif c not in digits: s = "" - # add digit to progressstring + #: Add digit to progressstring else: s += c @@ -148,7 +149,7 @@ class UnRar(Extractor): renice(p.pid, self.renice) - # communicate and retrieve stderr + #: Communicate and retrieve stderr self._progress(p) err = p.stderr.read().strip() @@ -159,7 +160,7 @@ class UnRar(Extractor): elif self.re_wrongcrc.search(err): raise CRCError(err) - else: #: raise error if anything is on stderr + else: #: Raise error if anything is on stderr raise ArchiveError(err) if p.returncode: @@ -168,15 +169,15 @@ class UnRar(Extractor): self.files = self.list(password) - def getDeleteFiles(self): + def get_delete_files(self): dir, name = os.path.split(self.filename) - # actually extracted file + #: Actually extracted file files = [self.filename] - # eventually Multipart Files - files.extend(save_join(dir, os.path.basename(file)) for file in filter(self.isMultipart, os.listdir(dir)) - if re.sub(self.re_multipart,".rar",name) == re.sub(self.re_multipart,".rar",file)) + #: eventually Multipart Files + files.extend(fs_join(dir, os.path.basename(file)) for file in filter(self.is_multipart, os.listdir(dir)) + if re.sub(self.re_multipart, ".rar", name) == re.sub(self.re_multipart, ".rar", file)) return files @@ -190,20 +191,19 @@ class UnRar(Extractor): if "Cannot open" in err: raise ArchiveError(_("Cannot open file")) - if err.strip(): #: only log error at this point - self.manager.logError(err.strip()) + if err.strip(): #: Only log error at this point + self.log_error(err.strip()) result = set() if not self.fullpath and self.VERSION.startswith('5'): - # NOTE: Unrar 5 always list full path + #@NOTE: Unrar 5 always list full path for f in fs_decode(out).splitlines(): - f = save_join(self.out, os.path.basename(f.strip())) + f = fs_join(self.out, os.path.basename(f.strip())) if os.path.isfile(f): - result.add(save_join(self.out, os.path.basename(f))) + result.add(fs_join(self.out, os.path.basename(f))) else: for f in fs_decode(out).splitlines(): - f = f.strip() - result.add(save_join(self.out, f)) + result.add(fs_join(self.out, f.strip())) return list(result) @@ -211,7 +211,7 @@ class UnRar(Extractor): def call_cmd(self, command, *xargs, **kwargs): args = [] - # overwrite flag + #: Overwrite flag if self.overwrite: args.append("-o+") else: @@ -222,10 +222,10 @@ class UnRar(Extractor): for word in self.excludefiles: args.append("-x'%s'" % word.strip()) - # assume yes on all queries + #: Assume yes on all queries args.append("-y") - # set a password + #: Set a password if "password" in kwargs and kwargs['password']: args.append("-p%s" % kwargs['password']) else: @@ -234,10 +234,10 @@ class UnRar(Extractor): if self.keepbroken: args.append("-kb") - # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) - self.manager.logDebug(" ".join(call)) + self.log_debug(" ".join(call)) p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return p diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 8d3fec370..9a01611bf 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -12,19 +12,20 @@ from module.utils import fs_encode class UnZip(Extractor): __name__ = "UnZip" - __version__ = "1.12" + __version__ = "1.15" + __status__ = "testing" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + VERSION = "%s.%s.%s" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]) EXTENSIONS = [".zip", ".zip64"] - VERSION ="(python %s.%s.%s)" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]) @classmethod - def isUsable(cls): + def find(cls): return sys.version_info[:2] >= (2, 6) diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py index e619cb038..e0f6b1ee8 100644 --- a/module/plugins/internal/XFSAccount.py +++ b/module/plugins/internal/XFSAccount.py @@ -4,14 +4,15 @@ import re import time import urlparse -from module.plugins.Account import Account -from module.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies +from module.plugins.internal.Account import Account +from module.plugins.internal.Plugin import parse_html_form, set_cookie class XFSAccount(Account): __name__ = "XFSAccount" __type__ = "account" - __version__ = "0.37" + __version__ = "0.42" + __status__ = "testing" __description__ = """XFileSharing account plugin""" __license__ = "GPLv3" @@ -30,34 +31,15 @@ class XFSAccount(Account): VALID_UNTIL_PATTERN = r'Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})' TRAFFIC_LEFT_PATTERN = r'Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' - TRAFFIC_LEFT_UNIT = "MB" #: used only if no group <U> was found + TRAFFIC_LEFT_UNIT = "MB" #: Used only if no group <U> was found LEECH_TRAFFIC_PATTERN = r'Leech Traffic left:<b>.*?(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' - LEECH_TRAFFIC_UNIT = "MB" #: used only if no group <U> was found + LEECH_TRAFFIC_UNIT = "MB" #: Used only if no group <U> was found LOGIN_FAIL_PATTERN = r'Incorrect Login or Password|account was banned|Error<' - def __init__(self, manager, accounts): #@TODO: remove in 0.4.10 - self.init() - return super(XFSAccount, self).__init__(manager, accounts) - - - def init(self): - if not self.HOSTER_DOMAIN: - self.logError(_("Missing HOSTER_DOMAIN")) - self.COOKIES = False - - else: - if not self.HOSTER_URL: - self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN - - if isinstance(self.COOKIES, list): - self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) - set_cookies(req.cj, self.COOKIES) - - - def loadAccountInfo(self, user, req): + def parse_info(self, user, password, data, req): validuntil = None trafficleft = None leechtraffic = None @@ -69,32 +51,34 @@ class XFSAccount(Account): 'leechtraffic': leechtraffic, 'premium' : premium} - html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) + html = self.load(self.HOSTER_URL, + get={'op': "my_account"}, + cookies=self.COOKIES) premium = True if re.search(self.PREMIUM_PATTERN, html) else False m = re.search(self.VALID_UNTIL_PATTERN, html) if m: expiredate = m.group(1).strip() - self.logDebug("Expire date: " + expiredate) + self.log_debug("Expire date: " + expiredate) try: validuntil = time.mktime(time.strptime(expiredate, "%d %B %Y")) except Exception, e: - self.logError(e) + self.log_error(e) else: - self.logDebug("Valid until: %s" % validuntil) + self.log_debug("Valid until: %s" % validuntil) if validuntil > time.mktime(time.gmtime()): premium = True trafficleft = -1 else: premium = False - validuntil = None #: registered account type (not premium) + validuntil = None #: Registered account type (not premium) else: - self.logDebug("VALID_UNTIL_PATTERN not found") + self.log_debug("VALID_UNTIL_PATTERN not found") m = re.search(self.TRAFFIC_LEFT_PATTERN, html) if m: @@ -114,12 +98,12 @@ class XFSAccount(Account): else: unit = "" - trafficleft = self.parseTraffic(size + unit) + trafficleft = self.parse_traffic(size + unit) except Exception, e: - self.logError(e) + self.log_error(e) else: - self.logDebug("TRAFFIC_LEFT_PATTERN not found") + self.log_debug("TRAFFIC_LEFT_PATTERN not found") leech = [m.groupdict() for m in re.finditer(self.LEECH_TRAFFIC_PATTERN, html)] if leech: @@ -141,12 +125,12 @@ class XFSAccount(Account): else: unit = "" - leechtraffic += self.parseTraffic(size + unit) + leechtraffic += self.parse_traffic(size + unit) except Exception, e: - self.logError(e) + self.log_error(e) else: - self.logDebug("LEECH_TRAFFIC_PATTERN not found") + self.log_debug("LEECH_TRAFFIC_PATTERN not found") return {'validuntil' : validuntil, 'trafficleft' : trafficleft, @@ -154,28 +138,39 @@ class XFSAccount(Account): 'premium' : premium} - def login(self, user, data, req): - if not self.HOSTER_URL: #@TODO: Remove in 0.4.10 - raise Exception(_("Missing HOSTER_DOMAIN")) + def login(self, user, password, data, req): + if self.HOSTER_DOMAIN: + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN + + if self.COOKIES: + if isinstance(self.COOKIES, list) and not self.COOKIES.count((self.HOSTER_DOMAIN, "lang", "english")): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + else: + set_cookie(self.req.cj, self.HOSTER_DOMAIN, "lang", "english") + + if not self.HOSTER_URL: + self.login_fail(_("Missing HOSTER_URL")) if not self.LOGIN_URL: self.LOGIN_URL = urlparse.urljoin(self.HOSTER_URL, "login.html") - html = req.load(self.LOGIN_URL, decode=True) - action, inputs = parseHtmlForm('name="FL"', html) + html = self.load(self.LOGIN_URL, cookies=self.COOKIES) + + action, inputs = parse_html_form('name="FL"', html) if not inputs: inputs = {'op' : "login", 'redirect': self.HOSTER_URL} inputs.update({'login' : user, - 'password': data['password']}) + 'password': password}) if action: url = urlparse.urljoin("http://", action) else: url = self.HOSTER_URL - html = req.load(url, post=inputs, decode=True) + html = self.load(url, post=inputs, cookies=self.COOKIES) if re.search(self.LOGIN_FAIL_PATTERN, html): - self.wrongPassword() + self.login_fail() diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py index 80eff53ea..4c059d647 100644 --- a/module/plugins/internal/XFSCrypter.py +++ b/module/plugins/internal/XFSCrypter.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- +from module.plugins.internal.Plugin import set_cookie from module.plugins.internal.SimpleCrypter import SimpleCrypter, create_getInfo class XFSCrypter(SimpleCrypter): __name__ = "XFSCrypter" __type__ = "crypter" - __version__ = "0.09" + __version__ = "0.13" + __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -19,10 +21,10 @@ class XFSCrypter(SimpleCrypter): URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] - LINK_PATTERN = r'<(?:td|TD).*?>\s*(?:<.+>\s*)?<a href="(.+?)".*?>.+?(?:</a>)?\s*(?:<.+>\s*)?</(?:td|TD)>' NAME_PATTERN = r'<[Tt]itle>.*?\: (?P<N>.+) folder</[Tt]itle>' + LINK_PATTERN = r'<(?:td|TD).*?>\s*(?:<.+>\s*)?<a href="(.+?)".*?>.+?(?:</a>)?\s*(?:<.+>\s*)?</(?:td|TD)>' - OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + OFFLINE_PATTERN = r'>\s*(No such user|\w+ (Not Found|file (was|has been) removed|no longer available)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' @@ -32,14 +34,17 @@ class XFSCrypter(SimpleCrypter): account = self.account else: account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") - account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) + account = self.pyload.accountManager.getAccountPlugin(account_name) if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: self.HOSTER_DOMAIN = account.HOSTER_DOMAIN else: self.fail(_("Missing HOSTER_DOMAIN")) - if isinstance(self.COOKIES, list): - self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + if self.COOKIES: + if isinstance(self.COOKIES, list) and not self.COOKIES.count((self.HOSTER_DOMAIN, "lang", "english")): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + else: + set_cookie(self.req.cj, self.HOSTER_DOMAIN, "lang", "english") return super(XFSCrypter, self).prepare() diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index 34e319500..5e0830dc6 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -3,18 +3,19 @@ import pycurl import random import re -import urlparse -from module.plugins.internal.ReCaptcha import ReCaptcha -from module.plugins.internal.SolveMedia import SolveMedia -from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, secondsToMidnight +from module.plugins.captcha.ReCaptcha import ReCaptcha +from module.plugins.captcha.SolveMedia import SolveMedia +from module.plugins.internal.Plugin import set_cookie +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, seconds_to_midnight from module.utils import html_unescape class XFSHoster(SimpleHoster): __name__ = "XFSHoster" __type__ = "hoster" - __version__ = "0.52" + __version__ = "0.57" + __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -27,14 +28,12 @@ class XFSHoster(SimpleHoster): HOSTER_DOMAIN = None - TEXT_ENCODING = False - DIRECT_LINK = None - MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + LEECH_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... NAME_PATTERN = r'(Filename[ ]*:[ ]*</b>(</td><td nowrap>)?|name="fname"[ ]+value="|<[\w^_]+ class="(file)?name">)\s*(?P<N>.+?)(\s*<|")' SIZE_PATTERN = r'(Size[ ]*:[ ]*</b>(</td><td>)?|File:.*>|</font>\s*\(|<[\w^_]+ class="size">)\s*(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' - OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed|no longer available)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' WAIT_PATTERN = r'<span id="countdown_str".*>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' @@ -43,7 +42,6 @@ class XFSHoster(SimpleHoster): ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' LINK_LEECH_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' - LINK_PATTERN = None #: final download url pattern CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' CAPTCHA_BLOCK_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>' @@ -51,29 +49,34 @@ class XFSHoster(SimpleHoster): SOLVEMEDIA_PATTERN = None FORM_PATTERN = None - FORM_INPUTS_MAP = None #: dict passed as input_names to parseHtmlForm + FORM_INPUTS_MAP = None #: Dict passed as input_names to parse_html_form def setup(self): - self.chunkLimit = -1 if self.premium else 1 - self.resumeDownload = self.multiDL = self.premium + self.chunk_limit = -1 if self.premium else 1 + self.resume_download = self.multiDL = self.premium def prepare(self): - """ Initialize important variables """ + """ + Initialize important variables + """ if not self.HOSTER_DOMAIN: if self.account: account = self.account else: - account = self.pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + account = self.pyload.accountManager.getAccountPlugin(self.__name__) if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: self.HOSTER_DOMAIN = account.HOSTER_DOMAIN else: self.fail(_("Missing HOSTER_DOMAIN")) - if isinstance(self.COOKIES, list): - self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + if self.COOKIES: + if isinstance(self.COOKIES, list) and not self.COOKIES.count((self.HOSTER_DOMAIN, "lang", "english")): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + else: + set_cookie(self.req.cj, self.HOSTER_DOMAIN, "lang", "english") if not self.LINK_PATTERN: pattern = r'(?:file: "(.+?)"|(https?://(?:www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<])' @@ -82,24 +85,24 @@ class XFSHoster(SimpleHoster): super(XFSHoster, self).prepare() if self.DIRECT_LINK is None: - self.directDL = self.premium + self.direct_dl = self.premium - def handleFree(self, pyfile): + def handle_free(self, pyfile): for i in xrange(1, 6): - self.logDebug("Getting download link #%d" % i) + self.log_debug("Getting download link #%d" % i) - self.checkErrors() + self.check_errors() m = re.search(self.LINK_PATTERN, self.html, re.S) if m: break - data = self.getPostParameters() + data = self.get_post_parameters() self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.html = self.load(pyfile.url, post=data, decode=True) + self.html = self.load(pyfile.url, post=data) self.req.http.c.setopt(pycurl.FOLLOWLOCATION, 1) @@ -111,24 +114,24 @@ class XFSHoster(SimpleHoster): if m: break else: - self.logError(data['op'] if 'op' in data else _("UNKNOWN")) - return "" + if 'op' in data: + self.error(_("Missing OP data after: ") + data['op']) - self.link = m.group(1).strip() #@TODO: Remove .strip() in 0.4.10 + self.link = m.group(1) - def handlePremium(self, pyfile): - return self.handleFree(pyfile) + def handle_premium(self, pyfile): + return self.handle_free(pyfile) - def handleMulti(self, pyfile): + def handle_multi(self, pyfile): if not self.account: self.fail(_("Only registered or premium users can use url leech feature")) - #only tested with easybytez.com + #: Only tested with easybytez.com self.html = self.load("http://www.%s/" % self.HOSTER_DOMAIN) - action, inputs = self.parseHtmlForm() + action, inputs = self.parse_html_form() upload_id = "%012d" % int(random.random() * 10 ** 12) action += upload_id + "&js_on=1&utype=prem&upload_type=url" @@ -137,19 +140,19 @@ class XFSHoster(SimpleHoster): inputs['url_mass'] = pyfile.url inputs['up1oad_type'] = 'url' - self.logDebug(action, inputs) + self.log_debug(action, inputs) - self.req.setOption("timeout", 600) #: wait for file to upload to easybytez.com + self.req.setOption("timeout", 600) #: Wait for file to upload to easybytez.com self.html = self.load(action, post=inputs) - self.checkErrors() + self.check_errors() - action, inputs = self.parseHtmlForm('F1') + action, inputs = self.parse_html_form('F1') if not inputs: self.retry(reason=self.info['error'] if 'error' in self.info else _("TEXTAREA F1 not found")) - self.logDebug(inputs) + self.log_debug(inputs) stmsg = inputs['st'] @@ -160,38 +163,38 @@ class XFSHoster(SimpleHoster): self.retry(20, 3 * 60, _("Can not leech file")) elif 'today' in stmsg: - self.retry(wait_time=secondsToMidnight(gmt=2), reason=_("You've used all Leech traffic today")) + self.retry(wait_time=seconds_to_midnight(gmt=2), reason=_("You've used all Leech traffic today")) else: self.fail(stmsg) - #get easybytez.com link for uploaded file + #: Get easybytez.com link for uploaded file m = re.search(self.LINK_LEECH_PATTERN, self.html) if m is None: self.error(_("LINK_LEECH_PATTERN not found")) - header = self.load(m.group(1), just_header=True, decode=True) + header = self.load(m.group(1), just_header=True) if 'location' in header: #: Direct download link self.link = header['location'] - def getPostParameters(self): + def get_post_parameters(self): if self.FORM_PATTERN or self.FORM_INPUTS_MAP: - action, inputs = self.parseHtmlForm(self.FORM_PATTERN or "", self.FORM_INPUTS_MAP or {}) + action, inputs = self.parse_html_form(self.FORM_PATTERN or "", self.FORM_INPUTS_MAP or {}) else: - action, inputs = self.parseHtmlForm(input_names={'op': re.compile(r'^download')}) + action, inputs = self.parse_html_form(input_names={'op': re.compile(r'^download')}) if not inputs: - action, inputs = self.parseHtmlForm('F1') + action, inputs = self.parse_html_form('F1') if not inputs: self.retry(reason=self.info['error'] if 'error' in self.info else _("TEXTAREA F1 not found")) - self.logDebug(inputs) + self.log_debug(inputs) if 'op' in inputs: if "password" in inputs: - password = self.getPassword() + password = self.get_password() if password: inputs['password'] = password else: @@ -201,9 +204,9 @@ class XFSHoster(SimpleHoster): m = re.search(self.WAIT_PATTERN, self.html) if m: wait_time = int(m.group(1)) - self.setWait(wait_time, False) + self.set_wait(wait_time, False) - self.handleCaptcha(inputs) + self.handle_captcha(inputs) self.wait() else: inputs['referer'] = self.pyfile.url @@ -218,11 +221,11 @@ class XFSHoster(SimpleHoster): return inputs - def handleCaptcha(self, inputs): + def handle_captcha(self, inputs): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) - inputs['code'] = self.decryptCaptcha(captcha_url) + inputs['code'] = self.captcha.decrypt(captcha_url) return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) @@ -230,11 +233,11 @@ class XFSHoster(SimpleHoster): captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) - self.logDebug(captcha_div) + self.log_debug(captcha_div) inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) - self.logDebug("Captcha code: %s" % inputs['code'], numerals) + self.log_debug("Captcha code: %s" % inputs['code'], numerals) return recaptcha = ReCaptcha(self) @@ -245,7 +248,7 @@ class XFSHoster(SimpleHoster): captcha_key = recaptcha.detect_key() else: - self.logDebug("ReCaptcha key: %s" % captcha_key) + self.log_debug("ReCaptcha key: %s" % captcha_key) if captcha_key: inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) @@ -259,7 +262,7 @@ class XFSHoster(SimpleHoster): captcha_key = solvemedia.detect_key() else: - self.logDebug("SolveMedia key: %s" % captcha_key) + self.log_debug("SolveMedia key: %s" % captcha_key) if captcha_key: inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key) |