diff options
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/AbstractExtractor.py | 109 | ||||
-rw-r--r-- | module/plugins/internal/BasePlugin.py | 71 | ||||
-rw-r--r-- | module/plugins/internal/CaptchaService.py | 522 | ||||
-rw-r--r-- | module/plugins/internal/DeadCrypter.py | 9 | ||||
-rw-r--r-- | module/plugins/internal/DeadHoster.py | 9 | ||||
-rw-r--r-- | module/plugins/internal/Extractor.py | 139 | ||||
-rw-r--r-- | module/plugins/internal/MultiHook.py | 308 | ||||
-rw-r--r-- | module/plugins/internal/MultiHoster.py | 219 | ||||
-rw-r--r-- | module/plugins/internal/SevenZip.py | 155 | ||||
-rw-r--r-- | module/plugins/internal/SimpleCrypter.py | 77 | ||||
-rw-r--r-- | module/plugins/internal/SimpleDereferer.py | 98 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 549 | ||||
-rw-r--r-- | module/plugins/internal/UnRar.py | 271 | ||||
-rw-r--r-- | module/plugins/internal/UnZip.py | 65 | ||||
-rw-r--r-- | module/plugins/internal/XFSAccount.py | 53 | ||||
-rw-r--r-- | module/plugins/internal/XFSCrypter.py | 26 | ||||
-rw-r--r-- | module/plugins/internal/XFSHoster.py | 125 |
17 files changed, 2020 insertions, 785 deletions
diff --git a/module/plugins/internal/AbstractExtractor.py b/module/plugins/internal/AbstractExtractor.py deleted file mode 100644 index 5e09c6755..000000000 --- a/module/plugins/internal/AbstractExtractor.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- - -class ArchiveError(Exception): - pass - - -class CRCError(Exception): - pass - - -class WrongPassword(Exception): - pass - - -class AbtractExtractor(object): - __name__ = "AbtractExtractor" - __version__ = "0.10" - - __description__ = """Abtract extractor plugin""" - __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - - - @staticmethod - def checkDeps(): - """ Check if system statisfy dependencies - :return: boolean - """ - return True - - - @staticmethod - def getTargets(files_ids): - """ Filter suited targets from list of filename id tuple list - :param files_ids: List of filepathes - :return: List of targets, id tuple list - """ - raise NotImplementedError - - - def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): - """Initialize extractor for specific file - - :param m: ExtractArchive Addon plugin - :param file: Absolute filepath - :param out: Absolute path to destination directory - :param fullpath: extract to fullpath - :param overwrite: Overwrite existing archives - :param renice: Renice value - """ - self.m = m - self.file = file - self.out = out - self.fullpath = fullpath - self.overwrite = overwrite - self.excludefiles = excludefiles - self.renice = renice - self.files = [] #: Store extracted files here - - - def init(self): - """ Initialize additional data structures """ - pass - - - def checkArchive(self): - """Check if password if needed. Raise ArchiveError if integrity is - questionable. - - :return: boolean - :raises ArchiveError - """ - return False - - - def checkPassword(self, password): - """ Check if the given password is/might be correct. - If it can not be decided at this point return true. - - :param password: - :return: boolean - """ - return True - - - def extract(self, progress, password=None): - """Extract the archive. Raise specific errors in case of failure. - - :param progress: Progress function, call this to update status - :param password password to use - :raises WrongPassword - :raises CRCError - :raises ArchiveError - :return: - """ - raise NotImplementedError - - - def getDeleteFiles(self): - """Return list of files to delete, do *not* delete them here. - - :return: List with paths of files to delete - """ - raise NotImplementedError - - - def getExtractedFiles(self): - """Populate self.files at some point while extracting""" - return self.files diff --git a/module/plugins/internal/BasePlugin.py b/module/plugins/internal/BasePlugin.py index 954fe4e22..792497449 100644 --- a/module/plugins/internal/BasePlugin.py +++ b/module/plugins/internal/BasePlugin.py @@ -5,15 +5,15 @@ import re from urllib import unquote from urlparse import urljoin, urlparse -from pyload.network.HTTPRequest import BadHeader -from pyload.plugin.internal.SimpleHoster import create_getInfo -from pyload.plugin.Hoster import Hoster +from module.network.HTTPRequest import BadHeader +from module.plugins.internal.SimpleHoster import create_getInfo, fileUrl +from module.plugins.Hoster import Hoster class BasePlugin(Hoster): __name__ = "BasePlugin" __type__ = "hoster" - __version__ = "0.25" + __version__ = "0.34" __pattern__ = r'^unmatchable$' @@ -25,11 +25,19 @@ class BasePlugin(Hoster): @classmethod def getInfo(cls, url="", html=""): #@TODO: Move to hoster class in 0.4.10 - return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': unquote(url) or ""} + url = unquote(url) + url_p = urlparse(url) + return {'name' : (url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 'size' : 0, + 'status': 3 if url else 8, + 'url' : url} def setup(self): - self.chunkLimit = -1 + self.chunkLimit = -1 + self.multiDL = True self.resumeDownload = True @@ -43,7 +51,12 @@ class BasePlugin(Hoster): for _i in xrange(5): try: - self.downloadFile(pyfile) + link = fileUrl(self, unquote(pyfile.url)) + + if link: + self.download(link, ref=False, disposition=True) + else: + self.fail(_("File not found")) except BadHeader, e: if e.code is 404: @@ -58,12 +71,11 @@ class BasePlugin(Hoster): if server in servers: self.logDebug("Logging on to %s" % server) - self.req.addAuth(account.accounts[server]['password']) + self.req.addAuth(account.getAccountData(server)['password']) else: - for pwd in self.getPassword().splitlines(): - if ":" in pwd: - self.req.addAuth(pwd.strip()) - break + pwd = self.getPassword() + if ':' in pwd: + self.req.addAuth(pwd) else: self.fail(_("Authorization required")) else: @@ -73,34 +85,11 @@ class BasePlugin(Hoster): else: self.fail(_("No file downloaded")) #@TODO: Move to hoster class in 0.4.10 - if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10 - self.fail(_("Empty file")) - + check = self.checkDownload({'empty file': re.compile(r'\A\Z'), + 'html file' : re.compile(r'\A\s*<!DOCTYPE html'), + 'html error': re.compile(r'\A\s*(<.+>)?\d{3}(\Z|\s+)')}) + if check: + self.fail(check.capitalize()) - def downloadFile(self, pyfile): - url = pyfile.url - - for i in xrange(1, 7): #@TODO: retrieve the pycurl.MAXREDIRS value set by req - header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) - - if 'location' not in header or not header['location']: - if 'code' in header and header['code'] not in (200, 201, 203, 206): - self.logDebug("Received HTTP status code: %d" % header['code']) - self.fail(_("File not found")) - else: - break - - location = header['location'] - - self.logDebug("Redirect #%d to: %s" % (i, location)) - - if urlparse(location).scheme: - url = location - else: - p = urlparse(url) - base = "%s://%s" % (p.scheme, p.netloc) - url = urljoin(base, location) - else: - self.fail(_("Too many redirects")) - self.download(unquote(url), disposition=True) +getInfo = create_getInfo(BasePlugin) diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py new file mode 100644 index 000000000..6f2c8e06d --- /dev/null +++ b/module/plugins/internal/CaptchaService.py @@ -0,0 +1,522 @@ +# -*- coding: utf-8 -*- + +import re +import time + +from base64 import b64encode +from random import random, randint +from urlparse import urljoin, urlparse + +from module.common.json_layer import json_loads +from module.plugins.Plugin import Base + + +#@TODO: Extend (new) Plugin class; remove all `html` args +class CaptchaService(Base): + __name__ = "CaptchaService" + __version__ = "0.25" + + __description__ = """Base captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + key = None #: last key detected + + + def __init__(self, plugin): + self.plugin = plugin + super(CaptchaService, self).__init__(plugin.core) + + + def detect_key(self, html=None): + raise NotImplementedError + + + def challenge(self, key=None, html=None): + raise NotImplementedError + + + def result(self, server, challenge): + raise NotImplementedError + + +class ReCaptcha(CaptchaService): + __name__ = "ReCaptcha" + __version__ = "0.14" + + __description__ = """ReCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("zapp-brannigan", "fuerst.reinje@web.de")] + + + KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)' + KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("ReCaptcha html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.logDebug("Key: %s" % self.key) + return self.key + else: + self.logDebug("Key not found") + return None + + + def challenge(self, key=None, html=None, version=None): + if not key: + if self.detect_key(html): + key = self.key + else: + errmsg = _("ReCaptcha key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + if version in (1, 2): + return getattr(self, "_challenge_v%s" % version)(key) + + elif not html and hasattr(self.plugin, "html") and self.plugin.html: + version = 2 if re.search(self.KEY_V2_PATTERN, self.plugin.html) else 1 + return self.challenge(key, self.plugin.html, version) + + else: + errmsg = _("ReCaptcha html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + + def _challenge_v1(self, key): + html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", + get={'k': key}) + try: + challenge = re.search("challenge : '(.+?)',", html).group(1) + server = re.search("server : '(.+?)',", html).group(1) + + except AttributeError: + errmsg = _("ReCaptcha challenge pattern not found") + self.plugin.fail(errmsg) + raise AttributeError(errmsg) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(server, challenge), challenge + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha("%simage" % server, + get={'c': challenge}, + cookies=True, + forceUser=True, + imgtype="jpg") + + self.logDebug("Result: %s" % result) + + return result + + + def _collectApiInfo(self): + html = self.plugin.req.load("http://www.google.com/recaptcha/api.js") + a = re.search(r'po.src = \'(.*?)\';', html).group(1) + vers = a.split("/")[5] + + self.logDebug("API version: %s" %vers) + + language = a.split("__")[1].split(".")[0] + + self.logDebug("API language: %s" % language) + + html = self.plugin.req.load("https://apis.google.com/js/api.js") + b = re.search(r'"h":"(.*?)","', html).group(1) + jsh = b.decode('unicode-escape') + + self.logDebug("API jsh-string: %s" % jsh) + + return vers, language, jsh + + + def _prepareTimeAndRpc(self): + self.plugin.req.load("http://www.google.com/recaptcha/api2/demo") + + millis = int(round(time.time() * 1000)) + + self.logDebug("Time: %s" % millis) + + rand = randint(1, 99999999) + a = "0.%s" % str(rand * 2147483647) + rpc = int(100000000 * float(a)) + + self.logDebug("Rpc-token: %s" % rpc) + + return millis, rpc + + + def _challenge_v2(self, key, parent=None): + if parent is None: + try: + parent = urljoin("http://", urlparse(self.plugin.pyfile.url).netloc) + + except Exception: + parent = "" + + botguardstring = "!A" + vers, language, jsh = self._collectApiInfo() + millis, rpc = self._prepareTimeAndRpc() + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/anchor", + get={'k' : key, + 'hl' : language, + 'v' : vers, + 'usegapi' : "1", + 'jsh' : "%s#id=IO_%s" % (jsh, millis), + 'parent' : parent, + 'pfname' : "", + 'rpctoken': rpc}) + + token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html) + self.logDebug("Token #1: %s" % token1.group(1)) + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/frame", + get={'c' : token1.group(1), + 'hl' : language, + 'v' : vers, + 'bg' : botguardstring, + 'k' : key, + 'usegapi': "1", + 'jsh' : jsh}).decode('unicode-escape') + + token2 = re.search(r'"finput","(.*?)",', html) + self.logDebug("Token #2: %s" % token2.group(1)) + + token3 = re.search(r'."asconf".\s,".*?".\s,"(.*?)".', html) + self.logDebug("Token #3: %s" % token3.group(1)) + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/reload", + post={'k' : key, + 'c' : token2.group(1), + 'reason': "fi", + 'fbg' : token3.group(1)}) + + token4 = re.search(r'"rresp","(.*?)",', html) + self.logDebug("Token #4: %s" % token4.group(1)) + + millis_captcha_loading = int(round(time.time() * 1000)) + captcha_response = self.plugin.decryptCaptcha("https://www.google.com/recaptcha/api2/payload", + get={'c':token4.group(1), 'k':key}, + cookies=True, + forceUser=True) + response = b64encode('{"response":"%s"}' % captcha_response) + + self.logDebug("Result: %s" % response) + + timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading + timeToSolveMore = timeToSolve + int(float("0." + str(randint(1, 99999999))) * 500) + + html = self.plugin.req.load("https://www.google.com/recaptcha/api2/userverify", + post={'k' : key, + 'c' : token4.group(1), + 'response': response, + 't' : timeToSolve, + 'ct' : timeToSolveMore, + 'bg' : botguardstring}) + + token5 = re.search(r'"uvresp","(.*?)",', html) + self.logDebug("Token #5: %s" % token5.group(1)) + + result = token5.group(1) + + return result, None + + + +class AdsCaptcha(CaptchaService): + __name__ = "AdsCaptcha" + __version__ = "0.08" + + __description__ = """AdsCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*CaptchaId=(\d+)' + PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*PublicKey=([\w-]+)' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("AdsCaptcha html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.PUBLICKEY_PATTERN, html) + n = re.search(self.CAPTCHAID_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) #: key is the tuple(PublicKey, CaptchaId) + self.logDebug("Key|id: %s | %s" % self.key) + return self.key + else: + self.logDebug("Key or id not found") + return None + + + def challenge(self, key=None, html=None): + if not key: + if self.detect_key(html): + key = self.key + else: + errmsg = _("AdsCaptcha key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + PublicKey, CaptchaId = key + + html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", + get={'CaptchaId': CaptchaId, + 'PublicKey': PublicKey}) + try: + challenge = re.search("challenge: '(.+?)',", html).group(1) + server = re.search("server: '(.+?)',", html).group(1) + + except AttributeError: + errmsg = _("AdsCaptcha challenge pattern not found") + self.plugin.fail(errmsg) + raise AttributeError(errmsg) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(server, challenge), challenge + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, + get={'cid': challenge, 'dummy': random()}, + cookies=True, + imgtype="jpg") + + self.logDebug("Result: %s" % result) + + return result + + +class SolveMedia(CaptchaService): + __name__ = "SolveMedia" + __version__ = "0.12" + + __description__ = """SolveMedia captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("SolveMedia html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.KEY_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.logDebug("Key: %s" % self.key) + return self.key + else: + self.logDebug("Key not found") + return None + + + def challenge(self, key=None, html=None): + if not key: + if self.detect_key(html): + key = self.key + else: + errmsg = _("SolveMedia key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", + get={'k': key}) + try: + challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="([^"]+)">', + html).group(1) + server = "http://api.solvemedia.com/papi/media" + + except AttributeError: + errmsg = _("SolveMedia challenge pattern not found") + self.plugin.fail(errmsg) + raise AttributeError(errmsg) + + self.logDebug("Challenge: %s" % challenge) + + result = self.result(server, challenge) + + try: + magic = re.search(r'name="magic" value="(.+?)"', html).group(1) + + except AttributeError: + self.logDebug("Magic code not found") + + else: + if not self._verify(key, magic, result, challenge): + self.logDebug("Captcha code was invalid") + + return result, challenge + + + def _verify(self, key, magic, result, challenge, ref=None): #@TODO: Clean up + if ref is None: + try: + ref = self.plugin.pyfile.url + + except Exception: + ref = "" + + html = self.plugin.req.load("http://api.solvemedia.com/papi/verify.noscript", + post={'adcopy_response' : result, + 'k' : key, + 'l' : "en", + 't' : "img", + 's' : "standard", + 'magic' : magic, + 'adcopy_challenge' : challenge, + 'ref' : ref}) + try: + html = self.plugin.req.load(re.search(r'URL=(.+?)">', html).group(1)) + gibberish = re.search(r'id=gibberish>(.+?)</textarea>', html).group(1) + + except Exception: + return False + + else: + return True + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha(server, + get={'c': challenge}, + cookies=True, + imgtype="gif") + + self.logDebug("Result: %s" % result) + + return result + + +class AdYouLike(CaptchaService): + __name__ = "AdYouLike" + __version__ = "0.05" + + __description__ = """AdYouLike captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' + CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("AdYouLike html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.AYL_PATTERN, html) + n = re.search(self.CALLBACK_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) + self.logDebug("Ayl|callback: %s | %s" % self.key) + return self.key #: key is the tuple(ayl, callback) + else: + self.logDebug("Ayl or callback not found") + return None + + + def challenge(self, key=None, html=None): + if not key: + if self.detect_key(html): + key = self.key + else: + errmsg = _("AdYouLike key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + ayl, callback = key + + # {"adyoulike":{"key":"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, + # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} + ayl = json_loads(ayl) + + html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", + get={'key' : ayl['adyoulike']['key'], + 'env' : ayl['all']['env'], + 'callback': callback}) + try: + challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) + + except AttributeError: + errmsg = _("AdYouLike challenge pattern not found") + self.plugin.fail(errmsg) + raise AttributeError(errmsg) + + self.logDebug("Challenge: %s" % challenge) + + return self.result(ayl, challenge), challenge + + + def result(self, server, challenge): + # Adyoulike.g._jsonp_5579316662423138 + # ({"translations":{"fr":{"instructions_visual":"Recopiez « Soonnight » ci-dessous :"}}, + # "site_under":true,"clickable":true,"pixels":{"VIDEO_050":[],"DISPLAY":[],"VIDEO_000":[],"VIDEO_100":[], + # "VIDEO_025":[],"VIDEO_075":[]},"medium_type":"image/adyoulike", + # "iframes":{"big":"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" + # height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},"shares":{},"id":256, + # "token":"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1","formats":{"small":{"y":300,"x":0,"w":300,"h":60}, + # "big":{"y":0,"x":0,"w":300,"h":250},"hover":{"y":440,"x":0,"w":300,"h":60}}, + # "tid":"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) + + if isinstance(server, basestring): + server = json_loads(server) + + if isinstance(challenge, basestring): + challenge = json_loads(challenge) + + try: + instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] + result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() + + except AttributeError: + errmsg = _("AdYouLike result not found") + self.plugin.fail(errmsg) + raise AttributeError(errmsg) + + result = {'_ayl_captcha_engine' : "adyoulike", + '_ayl_env' : server['all']['env'], + '_ayl_tid' : challenge['tid'], + '_ayl_token_challenge': challenge['token'], + '_ayl_response' : response} + + self.logDebug("Result: %s" % result) + + return result diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py index 97d99021e..ce56947fc 100644 --- a/module/plugins/internal/DeadCrypter.py +++ b/module/plugins/internal/DeadCrypter.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -from urllib import unquote -from urlparse import urlparse - from pyload.plugin.Crypter import Crypter as _Crypter from pyload.plugin.internal.SimpleCrypter import create_getInfo @@ -20,8 +17,10 @@ class DeadCrypter(_Crypter): @classmethod - def getInfo(cls, url="", html=""): - return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} + def apiInfo(cls, url="", get={}, post={}): + api = super(DeadCrypter, self).apiInfo(url, get, post) + api['status'] = 1 + return api def setup(self): diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py index 27036a139..132e4741a 100644 --- a/module/plugins/internal/DeadHoster.py +++ b/module/plugins/internal/DeadHoster.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- -from urllib import unquote -from urlparse import urlparse - from pyload.plugin.Hoster import Hoster as _Hoster from pyload.plugin.internal.SimpleHoster import create_getInfo @@ -20,8 +17,10 @@ class DeadHoster(_Hoster): @classmethod - def getInfo(cls, url="", html=""): - return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} + def apiInfo(cls, url="", get={}, post={}): + api = super(DeadHoster, self).apiInfo(url, get, post) + api['status'] = 1 + return api def setup(self): diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py new file mode 100644 index 000000000..b445f1497 --- /dev/null +++ b/module/plugins/internal/Extractor.py @@ -0,0 +1,139 @@ +# -*- coding: utf-8 -*- + +import os + +from module.PyFile import PyFile + + +class ArchiveError(Exception): + pass + + +class CRCError(Exception): + pass + + +class PasswordError(Exception): + pass + + +class Extractor: + __name__ = "Extractor" + __version__ = "0.20" + + __description__ = """Base extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "ranan@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("Immenz", "immenz@gmx.net")] + + + EXTENSIONS = [] + VERSION = "" + + + @classmethod + def isArchive(cls, filename): + name = os.path.basename(filename).lower() + return any(name.endswith(ext) for ext in cls.EXTENSIONS) and not cls.isMultipart(filename) + + + @classmethod + def isMultipart(cls,filename): + return False + + + @classmethod + def isUsable(cls): + """ Check if system statisfy dependencies + :return: boolean + """ + return None + + + @classmethod + def getTargets(cls, files_ids): + """ Filter suited targets from list of filename id tuple list + :param files_ids: List of filepathes + :return: List of targets, id tuple list + """ + return [(fname, id, fout) for fname, id, fout in files_ids if cls.isArchive(fname)] + + + def __init__(self, manager, filename, out, + fullpath=True, + overwrite=False, + excludefiles=[], + renice=0, + delete=False, + keepbroken=False, + fid=None): + """ Initialize extractor for specific file """ + self.manager = manager + self.filename = filename + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.excludefiles = excludefiles + self.renice = renice + self.delete = delete + self.keepbroken = keepbroken + self.files = [] #: Store extracted files here + + pyfile = self.manager.core.files.getFile(fid) if fid else None + self.notifyProgress = lambda x: pyfile.setProgress(x) if pyfile else lambda x: None + + + def init(self): + """ Initialize additional data structures """ + pass + + + def check(self): + """Check if password if needed. Raise ArchiveError if integrity is + questionable. + + :return: boolean + :raises ArchiveError + """ + raise PasswordError + + + def isPassword(self, password): + """ Check if the given password is/might be correct. + If it can not be decided at this point return true. + + :param password: + :return: boolean + """ + return None + + + def repair(self): + return None + + + def extract(self, password=None): + """Extract the archive. Raise specific errors in case of failure. + + :param progress: Progress function, call this to update status + :param password password to use + :raises PasswordError + :raises CRCError + :raises ArchiveError + :return: + """ + raise NotImplementedError + + + def getDeleteFiles(self): + """Return list of files to delete, do *not* delete them here. + + :return: List with paths of files to delete + """ + return [self.filename] + + + def list(self, password=None): + """Populate self.files at some point while extracting""" + return self.files diff --git a/module/plugins/internal/MultiHook.py b/module/plugins/internal/MultiHook.py new file mode 100644 index 000000000..652443098 --- /dev/null +++ b/module/plugins/internal/MultiHook.py @@ -0,0 +1,308 @@ +# -*- coding: utf-8 -*- + +import re + +from time import sleep + +from module.plugins.Hook import Hook +from module.utils import decode, remove_chars + + +class MultiHook(Hook): + __name__ = "MultiHook" + __type__ = "hook" + __version__ = "0.37" + + __config__ = [("pluginmode" , "all;listed;unlisted", "Use for plugins" , "all"), + ("pluginlist" , "str" , "Plugin list (comma separated)" , "" ), + ("revertfailed" , "bool" , "Revert to standard download if fails", True ), + ("retry" , "int" , "Number of retries before revert" , 10 ), + ("retryinterval" , "int" , "Retry interval in minutes" , 1 ), + ("reload" , "bool" , "Reload plugin list" , True ), + ("reloadinterval", "int" , "Reload interval in hours" , 12 )] + + __description__ = """Hook plugin for multi hoster/crypter""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + MIN_INTERVAL = 1 * 60 * 60 + + DOMAIN_REPLACEMENTS = [(r'180upload\.com' , "hundredeightyupload.com"), + (r'1fichier\.com' , "onefichier.com" ), + (r'2shared\.com' , "twoshared.com" ), + (r'4shared\.com' , "fourshared.com" ), + (r'bayfiles\.net' , "bayfiles.com" ), + (r'cloudnator\.com' , "shragle.com" ), + (r'dfiles\.eu' , "depositfiles.com" ), + (r'easy-share\.com' , "crocko.com" ), + (r'freakshare\.net' , "freakshare.com" ), + (r'hellshare\.com' , "hellshare.cz" ), + (r'ifile\.it' , "filecloud.io" ), + (r'nowdownload\.\w+', "nowdownload.sx" ), + (r'nowvideo\.\w+' , "nowvideo.sx" ), + (r'putlocker\.com' , "firedrive.com" ), + (r'share-?rapid\.cz', "multishare.cz" ), + (r'ul\.to' , "uploaded.to" ), + (r'uploaded\.net' , "uploaded.to" ), + (r'uploadhero\.co' , "uploadhero.com" ), + (r'zshares\.net' , "zshare.net" ), + (r'(\d+.+)' , "X\1" )] + + + def setup(self): + self.plugins = [] + self.supported = [] + self.new_supported = [] + + self.account = None + self.pluginclass = None + self.pluginmodule = None + self.pluginname = None + self.plugintype = None + + self._initPlugin() + + + def _initPlugin(self): + plugin, type = self.core.pluginManager.findPlugin(self.__name__) + + if not plugin: + self.logWarning("Hook plugin will be deactivated due missing plugin reference") + self.setConfig('activated', False) + else: + self.pluginname = self.__name__ + self.plugintype = type + self.pluginmodule = self.core.pluginManager.loadModule(type, self.__name__) + self.pluginclass = getattr(self.pluginmodule, self.__name__) + + + def _loadAccount(self): + self.account = self.core.accountManager.getAccountPlugin(self.pluginname) + + if self.account and not self.account.canUse(): + self.account = None + + if not self.account and hasattr(self.pluginclass, "LOGIN_ACCOUNT") and self.pluginclass.LOGIN_ACCOUNT: + self.logWarning("Hook plugin will be deactivated due missing account reference") + self.setConfig('activated', False) + + + def coreReady(self): + self._loadAccount() + + + def getURL(self, *args, **kwargs): #@TODO: Remove in 0.4.10 + """ see HTTPRequest for argument list """ + h = pyreq.getHTTPRequest(timeout=120) + try: + if not 'decode' in kwargs: + kwargs['decode'] = True + rep = h.load(*args, **kwargs) + finally: + h.close() + + return rep + + + def getConfig(self, option, default=''): + """getConfig with default value - sublass may not implements all config options""" + try: + return self.getConf(option) + + except KeyError: + return default + + + def pluginsCached(self): + if self.plugins: + return self.plugins + + for _i in xrange(3): + try: + pluginset = self._pluginSet(self.getHosters() if self.plugintype == "hoster" else self.getCrypters()) + + except Exception, e: + self.logError(e, "Waiting 1 minute and retry") + sleep(60) + + else: + break + else: + return list() + + try: + configmode = self.getConfig("pluginmode", 'all') + if configmode in ("listed", "unlisted"): + pluginlist = self.getConfig("pluginlist", '').replace('|', ',').replace(';', ',').split(',') + configset = self._pluginSet(pluginlist) + + if configmode == "listed": + pluginset &= configset + else: + pluginset -= configset + + except Exception, e: + self.logError(e) + + self.plugins = list(pluginset) + + return self.plugins + + + def _pluginSet(self, plugins): + plugins = set((decode(x).strip().lower() for x in plugins if '.' in x)) + + for rf, rt in self.DOMAIN_REPLACEMENTS: + regex = re.compile(rf) + for p in filter(lambda x: regex.match(x), plugins): + plugins.remove(p) + plugins.add(re.sub(rf, rt, p)) + + plugins.discard('') + + return plugins + + + def getHosters(self): + """Load list of supported hoster + + :return: List of domain names + """ + raise NotImplementedError + + + def getCrypters(self): + """Load list of supported crypters + + :return: List of domain names + """ + raise NotImplementedError + + + def periodical(self): + """reload plugin list periodically""" + self.logInfo(_("Reloading supported %s list") % self.plugintype) + + old_supported = self.supported + + self.supported = [] + self.new_supported = [] + self.plugins = [] + + self.overridePlugins() + + old_supported = [plugin for plugin in old_supported if plugin not in self.supported] + + if old_supported: + self.logDebug("Unload: %s" % ", ".join(old_supported)) + for plugin in old_supported: + self.unloadPlugin(plugin) + + if self.getConfig("reload", True): + self.interval = max(self.getConfig("reloadinterval", 12) * 60 * 60, self.MIN_INTERVAL) + else: + self.core.scheduler.removeJob(self.cb) + self.cb = None + + + def overridePlugins(self): + excludedList = [] + + if self.plugintype == "hoster": + pluginMap = dict((name.lower(), name) for name in self.core.pluginManager.hosterPlugins.iterkeys()) + accountList = [account.type.lower() for account in self.core.api.getAccounts(False) if account.valid and account.premium] + else: + pluginMap = {} + accountList = [name[::-1].replace("Folder"[::-1], "", 1).lower()[::-1] for name in self.core.pluginManager.crypterPlugins.iterkeys()] + + for plugin in self.pluginsCached(): + name = remove_chars(plugin, "-.") + + if name in accountList: + excludedList.append(plugin) + else: + if name in pluginMap: + self.supported.append(pluginMap[name]) + else: + self.new_supported.append(plugin) + + if not self.supported and not self.new_supported: + self.logError(_("No %s loaded") % self.plugintype) + return + + # inject plugin plugin + self.logDebug("Overwritten %ss: %s" % (self.plugintype, ", ".join(sorted(self.supported)))) + + for plugin in self.supported: + hdict = self.core.pluginManager.plugins[self.plugintype][plugin] + hdict['new_module'] = self.pluginmodule + hdict['new_name'] = self.pluginname + + if excludedList: + self.logInfo(_("%ss not overwritten: %s") % (self.plugintype.capitalize(), ", ".join(sorted(excludedList)))) + + if self.new_supported: + plugins = sorted(self.new_supported) + + self.logDebug("New %ss: %s" % (self.plugintype, ", ".join(plugins))) + + # create new regexp + regexp = r'.*(?P<DOMAIN>%s).*' % "|".join([x.replace(".", "\.") for x in plugins]) + if hasattr(self.pluginclass, "__pattern__") and isinstance(self.pluginclass.__pattern__, basestring) and '://' in self.pluginclass.__pattern__: + regexp = r'%s|%s' % (self.pluginclass.__pattern__, regexp) + + self.logDebug("Regexp: %s" % regexp) + + hdict = self.core.pluginManager.plugins[self.plugintype][self.pluginname] + hdict['pattern'] = regexp + hdict['re'] = re.compile(regexp) + + + def unloadPlugin(self, plugin): + hdict = self.core.pluginManager.plugins[self.plugintype][plugin] + if "module" in hdict: + del hdict['module'] + + if "new_module" in hdict: + del hdict['new_module'] + del hdict['new_name'] + + + def unload(self): + """Remove override for all plugins. Scheduler job is removed by hookmanager""" + for plugin in self.supported: + self.unloadPlugin(plugin) + + # reset pattern + hdict = self.core.pluginManager.plugins[self.plugintype][self.pluginname] + + hdict['pattern'] = getattr(self.pluginclass, "__pattern__", r'^unmatchable$') + hdict['re'] = re.compile(hdict['pattern']) + + + def downloadFailed(self, pyfile): + """remove plugin override if download fails but not if file is offline/temp.offline""" + if pyfile.status != 8 or not self.getConfig("revertfailed", True): + return + + hdict = self.core.pluginManager.plugins[self.plugintype][pyfile.pluginname] + if "new_name" in hdict and hdict['new_name'] == self.pluginname: + if pyfile.error == "MultiHook": + self.logDebug("Unload MultiHook", pyfile.pluginname, hdict) + self.unloadPlugin(pyfile.pluginname) + pyfile.setStatus("queued") + pyfile.sync() + else: + retries = max(self.getConfig("retry", 10), 0) + wait_time = max(self.getConfig("retryinterval", 1), 0) + + if 0 < retries > pyfile.plugin.retries: + self.logInfo(_("Retrying: %s") % pyfile.name) + pyfile.setCustomStatus("MultiHook", "queued") + pyfile.sync() + + pyfile.plugin.retries += 1 + pyfile.plugin.setWait(wait_time) + pyfile.plugin.wait() diff --git a/module/plugins/internal/MultiHoster.py b/module/plugins/internal/MultiHoster.py index 3b45801f8..63b7d76b1 100644 --- a/module/plugins/internal/MultiHoster.py +++ b/module/plugins/internal/MultiHoster.py @@ -2,201 +2,84 @@ import re -from pyload.plugin.Addon import Addon -from pyload.utils import remove_chars +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies -class MultiHoster(Addon): +class MultiHoster(SimpleHoster): __name__ = "MultiHoster" - __type__ = "addon" - __version__ = "0.20" + __type__ = "hoster" + __version__ = "0.37" - __description__ = """Base multi-hoster plugin""" + __pattern__ = r'^unmatchable$' + + __description__ = """Multi hoster plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - HOSTER_REPLACEMENTS = [("1fichier.com", "onefichier.com"), ("2shared.com", "twoshared.com"), - ("4shared.com", "fourshared.com"), ("cloudnator.com", "shragle.com"), - ("easy-share.com", "crocko.com"), ("freakshare.net", "freakshare.com"), - ("hellshare.com", "hellshare.cz"), ("ifile.it", "filecloud.io"), - ("putlocker.com", "firedrive.com"), ("share-rapid.cz", "multishare.cz"), - ("sharerapid.cz", "multishare.cz"), ("ul.to", "uploaded.to"), - ("uploaded.net", "uploaded.to")] - HOSTER_EXCLUDED = [] + LOGIN_ACCOUNT = True def setup(self): - self.interval = 12 * 60 * 60 #: reload hosters every 12h - self.hosters = [] - self.supported = [] - self.new_supported = [] - - - def getConfig(self, option, default=''): - """getConfig with default value - subclass may not implements all config options""" - try: - # Fixed loop due to getConf deprecation in 0.4.10 - return super(MultiHoster, self).getConfig(option) - except KeyError: - return default - - - def getHosterCached(self): - if not self.hosters: - try: - hosterSet = self.toHosterSet(self.getHoster()) - set(self.HOSTER_EXCLUDED) - except Exception, e: - self.logError(e) - return [] - - try: - configMode = self.getConfig('hosterListMode', 'all') - if configMode in ("listed", "unlisted"): - configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) - - if configMode == "listed": - hosterSet &= configSet - else: - hosterSet -= configSet - - except Exception, e: - self.logError(e) - - self.hosters = list(hosterSet) - - return self.hosters - - - def toHosterSet(self, hosters): - hosters = set((str(x).strip().lower() for x in hosters)) - - for rep in self.HOSTER_REPLACEMENTS: - if rep[0] in hosters: - hosters.remove(rep[0]) - hosters.add(rep[1]) - - hosters.discard('') - return hosters - + self.chunkLimit = 1 + self.multiDL = bool(self.account) + self.resumeDownload = self.premium - def getHoster(self): - """Load list of supported hoster - :return: List of domain names - """ - raise NotImplementedError + def prepare(self): + self.info = {} + self.html = "" + self.link = "" #@TODO: Move to hoster class in 0.4.10 + self.directDL = False #@TODO: Move to hoster class in 0.4.10 + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) - def activate(self): - if self.cb: - self.core.scheduler.removeJob(self.cb) + self.req.setOption("timeout", 120) - self.setConfig("activated", True) #: config not in sync after plugin reload + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) - cfg_interval = self.getConfig("interval", None) #: reload interval in hours - if cfg_interval is not None: - self.interval = cfg_interval * 60 * 60 - - if self.interval: - self._periodical() + if self.DIRECT_LINK is None: + self.directDL = self.__pattern__ != r'^unmatchable$' and re.match(self.__pattern__, self.pyfile.url) else: - self.periodical() - - - def periodical(self): - """reload hoster list periodically""" - self.logInfo(_("Reloading supported hoster list")) - - old_supported = self.supported - self.supported = [] - self.new_supported = [] - self.hosters = [] - - self.overridePlugins() - - old_supported = [hoster for hoster in old_supported if hoster not in self.supported] - if old_supported: - self.logDebug("UNLOAD", ", ".join(old_supported)) - for hoster in old_supported: - self.unloadHoster(hoster) - + self.directDL = self.DIRECT_LINK - def overridePlugins(self): - pluginMap = dict((name.lower(), name) for name in self.core.pluginManager.hosterPlugins.keys()) - accountList = [name.lower() for name, data in self.core.accountManager.accounts.iteritems() if data] - excludedList = [] + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) - for hoster in self.getHosterCached(): - name = remove_chars(hoster.lower(), "-.") - if name in accountList: - excludedList.append(hoster) - else: - if name in pluginMap: - self.supported.append(pluginMap[name]) - else: - self.new_supported.append(hoster) + def process(self, pyfile): + self.prepare() - if not self.supported and not self.new_supported: - self.logError(_("No Hoster loaded")) - return + if self.directDL: + self.checkInfo() + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) - module = self.core.pluginManager.getPlugin(self.__type__, self.__name__) - klass = getattr(module, self.__name__) + if not self.link and not self.lastDownload: + self.preload() - # inject plugin plugin - self.logDebug("Overwritten Hosters", ", ".join(sorted(self.supported))) - for hoster in self.supported: - dict = self.core.pluginManager.hosterPlugins[hoster] - dict['new_module'] = module - dict['new_name'] = self.__name__ + self.checkErrors() + self.checkStatus(getinfo=False) - if excludedList: - self.logInfo(_("The following hosters were not overwritten - account exists"), ", ".join(sorted(excludedList))) + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) - if self.new_supported: - self.logDebug("New Hosters", ", ".join(sorted(self.new_supported))) + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) - # create new regexp - regexp = r'.*(%s).*' % "|".join([x.replace(".", "\.") for x in self.new_supported]) - if hasattr(klass, "__pattern__") and isinstance(klass.__pattern__, basestring) and '://' in klass.__pattern__: - regexp = r'%s|%s' % (klass.__pattern__, regexp) + self.downloadLink(self.link, True) + self.checkFile() - self.logDebug("Regexp", regexp) - dict = self.core.pluginManager.hosterPlugins[self.__name__] - dict['pattern'] = regexp - dict['re'] = re.compile(regexp) + def handlePremium(self, pyfile): + return self.handleFree(pyfile) - def unloadHoster(self, hoster): - dict = self.core.pluginManager.hosterPlugins[hoster] - if "module" in dict: - del dict['module'] - - if "new_module" in dict: - del dict['new_module'] - del dict['new_name'] - - - def deactivate(self): - """Remove override for all hosters. Scheduler job is removed by AddonManager""" - for hoster in self.supported: - self.unloadHoster(hoster) - - # reset pattern - klass = getattr(self.core.pluginManager.getPlugin(self.__type__, self.__name__), self.__name__) - dict = self.core.pluginManager.hosterPlugins[self.__name__] - dict['pattern'] = getattr(klass, "__pattern__", r'^unmatchable$') - dict['re'] = re.compile(dict['pattern']) - - - def downloadFailed(self, pyfile): - """remove plugin override if download fails but not if file is offline/temp.offline""" - if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): - hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] - if "new_name" in hdict and hdict['new_name'] == self.__name__: - self.logDebug("Unload MultiHoster", pyfile.pluginname, hdict) - self.unloadHoster(pyfile.pluginname) - pyfile.setStatus("queued") + def handleFree(self, pyfile): + if self.premium: + raise NotImplementedError + else: + self.fail(_("Required premium account not found")) diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py new file mode 100644 index 000000000..7ad6b0d7a --- /dev/null +++ b/module/plugins/internal/SevenZip.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from subprocess import Popen, PIPE + +from module.plugins.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice +from module.utils import fs_encode, save_join + + +class SevenZip(UnRar): + __name__ = "SevenZip" + __version__ = "0.08" + + __description__ = """7-Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Michael Nowak", ""), + ("Walter Purcaro", "vuolter@gmail.com")] + + + CMD = "7z" + VERSION = "" + + EXTENSIONS = [".7z", ".xz", ".zip", ".gz", ".gzip", ".tgz", ".bz2", ".bzip2", + ".tbz2", ".tbz", ".tar", ".wim", ".swm", ".lzma", ".rar", ".cab", + ".arj", ".z", ".taz", ".cpio", ".rpm", ".deb", ".lzh", ".lha", + ".chm", ".chw", ".hxs", ".iso", ".msi", ".doc", ".xls", ".ppt", + ".dmg", ".xar", ".hfs", ".exe", ".ntfs", ".fat", ".vhd", ".mbr", + ".squashfs", ".cramfs", ".scap"] + + + #@NOTE: there are some more uncovered 7z formats + re_filelist = re.compile(r'([\d\:]+)\s+([\d\:]+)\s+([\w\.]+)\s+(\d+)\s+(\d+)\s+(.+)') + re_wrongpwd = re.compile(r'(Can not open encrypted archive|Wrong password)', re.I) + re_wrongcrc = re.compile(r'Encrypted\s+\=\s+\+', re.I) + re_version = re.compile(r'7-Zip\s(?:\[64\]\s)?(\d+\.\d+)', re.I) + + + @classmethod + def isUsable(cls): + if os.name == "nt": + cls.CMD = os.path.join(pypath, "7z.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out,err = p.communicate() + else: + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + + cls.VERSION = cls.re_version.search(out).group(1) + + return True + + + def check(self): + file = fs_encode(self.filename) + + p = self.call_cmd("t", file) + out, err = p.communicate() + + if p.returncode > 1: + raise CRCError(err) + + p = self.call_cmd("l", "-slt", file) + out, err = p.communicate() + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + # check if output or error macthes the 'wrong password'-Regexp + if self.re_wrongpwd.search(out): + raise PasswordError + + # check if output matches 'Encrypted = +' + if self.re_wrongcrc.search(out): + raise CRCError(_("Header protected")) + + + def isPassword(self, password): + p = self.call_cmd("l", fs_encode(self.filename), password=password) + p.communicate() + return p.returncode == 0 + + + def repair(self): + return False + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, '-o' + self.out, fs_encode(self.filename), password=password) + + renice(p.pid, self.renice) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + self.files = self.list(password) + + + def list(self, password=None): + command = "l" if self.fullpath else "l" + + p = self.call_cmd(command, fs_encode(self.filename), password=password) + out, err = p.communicate() + + if "Can not open" in err: + raise ArchiveError(_("Cannot open file")) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + result = set() + for groups in self.re_filelist.findall(out): + f = groups[-1].strip() + result.add(save_join(self.out, f)) + + return list(result) + + + def call_cmd(self, command, *xargs, **kwargs): + args = [] + + #overwrite flag + if self.overwrite: + args.append("-y") + + #set a password + if "password" in kwargs and kwargs["password"]: + args.append("-p'%s'" % kwargs["password"]) + else: + args.append("-p-") + + #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + + self.manager.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + return p diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index 5115692ad..dc34f864f 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -2,7 +2,7 @@ import re -from urlparse import urlparse +from urlparse import urljoin, urlparse from pyload.plugin.Crypter import Crypter from pyload.plugin.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies @@ -12,7 +12,7 @@ from pyload.utils import fixup class SimpleCrypter(Crypter, SimpleHoster): __name__ = "SimpleCrypter" __type__ = "crypter" - __version__ = "0.32" + __version__ = "0.43" __pattern__ = r'^unmatchable$' __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] @@ -28,16 +28,16 @@ class SimpleCrypter(Crypter, SimpleHoster): """ Following patterns should be defined by each crypter: - LINK_PATTERN: group(1) must be a download link or a regex to catch more links + LINK_PATTERN: Download link or regex to catch links in group(1) example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' - NAME_PATTERN: (optional) folder name or webpage title + NAME_PATTERN: (optional) folder name or page title example: NAME_PATTERN = r'<title>Files of: (?P<N>[^<]+) folder</title>' - OFFLINE_PATTERN: (optional) Checks if the file is yet available online + OFFLINE_PATTERN: (optional) Checks if the page is unreachable example: OFFLINE_PATTERN = r'File (deleted|not found)' - TEMP_OFFLINE_PATTERN: (optional) Checks if the file is temporarily offline + TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable example: TEMP_OFFLINE_PATTERN = r'Server maintainance' @@ -69,15 +69,18 @@ class SimpleCrypter(Crypter, SimpleHoster): def prepare(self): - self.info = {} - self.links = [] + self.pyfile.error = "" #@TODO: Remove in 0.4.10 - if self.LOGIN_ACCOUNT and not self.account: - self.fail(_("Required account not found")) + self.info = {} + self.html = "" + self.links = [] #@TODO: Move to hoster class in 0.4.10 if self.LOGIN_PREMIUM and not self.premium: self.fail(_("Required premium account not found")) + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + self.req.setOption("timeout", 120) if isinstance(self.COOKIES, list): @@ -90,46 +93,45 @@ class SimpleCrypter(Crypter, SimpleHoster): self.prepare() self.preload() - - if self.html is None: - self.fail(_("No html retrieved")) - self.checkInfo() self.links = self.getLinks() if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): - self.handleMultiPages() + self.handlePages(pyfile) self.logDebug("Package has %d links" % len(self.links)) if self.links: self.packages = [(self.info['name'], self.links, self.info['folder'])] + elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 + self.fail(_("No link grabbed")) - def checkStatus(self): - status = self.info['status'] - - if status is 1: - self.offline() - elif status is 6: - self.tempOffline() + def checkNameSize(self, getinfo=True): + if not self.info or getinfo: + self.logDebug("File info (BEFORE): %s" % self.info) + self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("File info (AFTER): %s" % self.info) + try: + url = self.info['url'].strip() + name = self.info['name'].strip() + if name and name != url: + self.pyfile.name = name - def checkNameSize(self): - name = self.info['name'] - url = self.info['url'] + except Exception: + pass - if name and name != url: - self.pyfile.name = name - else: - self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] + try: + folder = self.info['folder'] = self.pyfile.name - folder = self.info['folder'] = name + except Exception: + pass - self.logDebug("File name: %s" % name, - "File folder: %s" % folder) + self.logDebug("File name: %s" % self.pyfile.name, + "File folder: %s" % self.pyfile.name) def getLinks(self): @@ -137,13 +139,16 @@ class SimpleCrypter(Crypter, SimpleHoster): Returns the links extracted from self.html You should override this only if it's impossible to extract links using only the LINK_PATTERN. """ - return re.findall(self.LINK_PATTERN, self.html) + url_p = urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + + return [urljoin(baseurl, link) if not urlparse(link).scheme else link \ + for link in re.findall(self.LINK_PATTERN, self.html)] - def handleMultiPages(self): + def handlePages(self, pyfile): try: - m = re.search(self.PAGES_PATTERN, self.html) - pages = int(m.group(1)) + pages = int(re.search(self.PAGES_PATTERN, self.html).group(1)) except Exception: pages = 1 diff --git a/module/plugins/internal/SimpleDereferer.py b/module/plugins/internal/SimpleDereferer.py new file mode 100644 index 000000000..bd00f5d25 --- /dev/null +++ b/module/plugins/internal/SimpleDereferer.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- + +import re + +from urllib import unquote + +from module.plugins.Crypter import Crypter +from module.plugins.internal.SimpleHoster import fileUrl, set_cookies + + +class SimpleDereferer(Crypter): + __name__ = "SimpleDereferer" + __type__ = "crypter" + __version__ = "0.07" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Simple dereferer plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: Regex to catch the redirect url in group(1) + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + OFFLINE_PATTERN: (optional) Checks if the page is unreachable + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + You can override the getLinks method if you need a more sophisticated way to extract the redirect url. + """ + + LINK_PATTERN = None + + TEXT_ENCODING = False + COOKIES = True + + + def decrypt(self, pyfile): + link = fileUrl(self, pyfile.url) + + if not link: + try: + link = unquote(re.match(self.__pattern__, pyfile.url).group('LINK')) + + except Exception: + self.prepare() + self.preload() + self.checkStatus() + + link = self.getLink() + + if link.strip(): + self.urls = [link.strip()] #@TODO: Remove `.strip()` in 0.4.10 + + elif not self.urls and not self.packages: #@TODO: Remove in 0.4.10 + self.fail(_("No link grabbed")) + + + def prepare(self): + self.info = {} + self.html = "" + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def checkStatus(self): + if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): + self.offline() + + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): + self.tempOffline() + + + def getLink(self): + try: + return re.search(self.LINK_PATTERN, self.html).group(1) + + except Exception: + pass diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 84c47f3b4..e4ff1a2d8 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,22 +1,51 @@ # -*- coding: utf-8 -*- +import mimetypes +import os import re +from datetime import datetime, timedelta +from inspect import isclass from time import time from urllib import unquote from urlparse import urljoin, urlparse -from pyload.datatype.File import statusMap as _statusMap -from pyload.network.CookieJar import CookieJar -from pyload.network.RequestFactory import getURL -from pyload.plugin.Hoster import Hoster -from pyload.utils import fixup, formatSize, parseFileSize +from module.PyFile import statusMap as _statusMap +from module.network.CookieJar import CookieJar +from module.network.HTTPRequest import BadHeader +from module.network.RequestFactory import getURL +from module.plugins.Hoster import Hoster +from module.plugins.Plugin import Fail +from module.utils import fixup, fs_encode, parseFileSize #@TODO: Adapt and move to PyFile in 0.4.10 statusMap = dict((v, k) for k, v in _statusMap.iteritems()) +#@TODO: Remove in 0.4.10 and redirect to self.error instead +def _error(self, reason, type): + if not reason and not type: + type = "unknown" + + msg = _("%s error") % type.strip().capitalize() if type else _("Error") + msg += ": %s" % reason.strip() if reason else "" + msg += _(" | Plugin may be out of date") + + raise Fail(msg) + + +#@TODO: Remove in 0.4.10 +def _wait(self, seconds, reconnect): + if seconds: + self.setWait(int(seconds) + 1) + + if reconnect is not None: + self.wantReconnect = reconnect + + super(SimpleHoster, self).wait() + + def replace_patterns(string, ruleslist): for r in ruleslist: rf, rt = r @@ -75,14 +104,36 @@ def parseHtmlForm(attr_str, html, input_names={}): #: Deprecated def parseFileInfo(plugin, url="", html=""): - info = plugin.getInfo(url, html) - return info['name'], info['size'], info['status'], info['url'] + if hasattr(plugin, "getInfo"): + info = plugin.getInfo(url, html) + res = info['name'], info['size'], info['status'], info['url'] + else: + url = unquote(url) + url_p = urlparse(url) + res = ((url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 0, + 3 if url else 8, + url) + + return res #@TODO: Remove in 0.4.10 #@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 def create_getInfo(plugin): - return lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)] + + def generator(list): + for x in list: + yield x + + if hasattr(plugin, "parseInfos"): + fn = lambda urls: generator((info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)) + else: + fn = lambda urls: generator(parseFileInfo(url) for url in urls) + + return fn def timestamp(): @@ -90,37 +141,112 @@ def timestamp(): #@TODO: Move to hoster class in 0.4.10 -def _isDirectLink(self, url, resumable=True): - header = self.load(url, ref=True, just_header=True, decode=True) +def fileUrl(self, url, follow_location=None): + link = "" + redirect = 1 - if not 'location' in header or not header['location']: - return "" + if type(follow_location) is int: + redirect = max(follow_location, 1) + else: + redirect = 5 - location = header['location'] + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) - resumable = False #@NOTE: Testing... + except Exception: #: Bad bad bad... + req = pyreq.getHTTPRequest() + res = req.load(url, cookies=True, just_header=True, decode=True) - if resumable: #: sometimes http code may be wrong... - if 'location' in self.load(location, ref=True, cookies=True, just_header=True, decode=True): - return "" - else: - if not 'code' in header or header['code'] != 302: - return "" + req.close() + + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value + + if 'content-disposition' in header: + link = url + + elif 'location' in header and header['location'].strip(): + location = header['location'] + + if not urlparse(location).scheme: + url_p = urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urljoin(baseurl, location) + + if 'code' in header and header['code'] == 302: + link = location + + if follow_location: + url = location + continue + + else: + extension = os.path.splitext(urlparse(url).path.split('/')[-1])[-1] + + if 'content-type' in header and header['content-type'].strip(): + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" + + break - if urlparse(location).scheme: - link = location else: - p = urlparse(url) - base = "%s://%s" % (p.scheme, p.netloc) - link = urljoin(base, location) + try: + self.logError(_("Too many redirects")) + except Exception: + pass return link +def secondsToMidnight(gmt=0): + now = datetime.utcnow() + timedelta(hours=gmt) + + if now.hour is 0 and now.minute < 10: + midnight = now + else: + midnight = now + timedelta(days=1) + + td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now + + if hasattr(td, 'total_seconds'): + res = td.total_seconds() + else: #@NOTE: work-around for python 2.5 and 2.6 missing timedelta.total_seconds + res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + return int(res) + + class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.72" + __version__ = "1.15" __pattern__ = r'^unmatchable$' @@ -145,10 +271,10 @@ class SimpleHoster(Hoster): HASHSUM_PATTERN: (optional) Hash code and type of the file example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' - OFFLINE_PATTERN: (optional) Check if the file is yet available online + OFFLINE_PATTERN: (optional) Check if the page is unreachable example: OFFLINE_PATTERN = r'File (deleted|not found)' - TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline + TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' @@ -177,31 +303,53 @@ class SimpleHoster(Hoster): SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] - FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account - CHECK_DIRECT_LINK = None #: Set to True to check for direct link, set to None to do it only if self.account is True - MULTI_HOSTER = False #: Set to True to leech other hoster link (according its multihoster hook if available) + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False + MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + LOGIN_ACCOUNT = False #: Set to True to require account login + DISPOSITION = True #: Work-around to `filename*=UTF-8` bug; remove in 0.4.10 + + directLink = fileUrl #@TODO: Remove in 0.4.10 @classmethod - def parseInfos(cls, urls): + def parseInfos(cls, urls): #@TODO: Built-in in 0.4.10 core, then remove from plugins for url in urls: url = replace_patterns(url, cls.URL_REPLACEMENTS) yield cls.getInfo(url) @classmethod + def apiInfo(cls, url="", get={}, post={}): + url = unquote(url) + url_p = urlparse(url) + return {'name' : (url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 'size' : 0, + 'status': 3 if url else 8, + 'url' : url} + + + @classmethod def getInfo(cls, url="", html=""): - info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url} + info = cls.apiInfo(url) + online = False if info['status'] != 2 else True + + try: + info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here + + except Exception: + info['pattern'] = {} - if not html: - try: - if not url: - info['error'] = "missing url" - info['status'] = 1 - raise + if not html and not online: + if not url: + info['error'] = "missing url" + info['status'] = 1 + elif info['status'] is 3 and not fileUrl(None, url): try: html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) @@ -213,50 +361,41 @@ class SimpleHoster(Hoster): if e.code is 404: info['status'] = 1 - raise - if e.code is 503: + elif e.code is 503: info['status'] = 6 - raise - except Exception: - return info - online = False + if html: + if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): + info['status'] = 1 - if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): - info['status'] = 1 + elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 - elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): - info['status'] = 6 - - else: - try: - info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api'] - except Exception: - pass - - for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): - try: - attr = getattr(cls, pattern) - dict = re.search(attr, html).groupdict() + else: + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): + try: + attr = getattr(cls, pattern) + pdict = re.search(attr, html).groupdict() - if all(True for k in dict if k not in info['pattern']): - info['pattern'].update(dict) + if all(True for k in pdict if k not in info['pattern']): + info['pattern'].update(pdict) - except AttributeError: - continue + except AttributeError: + continue - else: - online = True + else: + online = True if online: info['status'] = 2 if 'N' in info['pattern']: - info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), cls.NAME_REPLACEMENTS) + info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), + cls.NAME_REPLACEMENTS) if 'S' in info['pattern']: - size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'], + size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], cls.SIZE_REPLACEMENTS) info['size'] = parseFileSize(size) @@ -268,6 +407,9 @@ class SimpleHoster(Hoster): hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" info[hashtype] = info['pattern']['H'] + if not info['pattern']: + info.pop('pattern', None) + return info @@ -276,11 +418,17 @@ class SimpleHoster(Hoster): def prepare(self): + self.pyfile.error = "" #@TODO: Remove in 0.4.10 + self.info = {} + self.html = "" self.link = "" #@TODO: Move to hoster class in 0.4.10 self.directDL = False #@TODO: Move to hoster class in 0.4.10 self.multihost = False #@TODO: Move to hoster class in 0.4.10 + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + self.req.setOption("timeout", 120) if isinstance(self.COOKIES, list): @@ -289,17 +437,13 @@ class SimpleHoster(Hoster): if (self.MULTI_HOSTER and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] or re.match(self.__pattern__, self.pyfile.url) is None)): + self.multihost = True + return - self.logInfo("Multi hoster detected") - - if self.account: - self.multihost = True - return - else: - self.fail(_("Only registered or premium users can use url leech feature")) - - if self.CHECK_DIRECT_LINK is None: + if self.DIRECT_LINK is None: self.directDL = bool(self.account) + else: + self.directDL = self.DIRECT_LINK self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) @@ -313,125 +457,159 @@ class SimpleHoster(Hoster): def process(self, pyfile): self.prepare() + self.checkInfo() + + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) - if self.multihost: + if self.multihost and not self.link and not self.lastDownload: self.logDebug("Looking for leeched download link...") - self.handleMulti() + self.handleMulti(pyfile) - elif self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect() + if not self.link and not self.lastDownload: + self.MULTI_HOSTER = False + self.retry(1, reason="Multi hoster fails") - if not self.link: + if not self.link and not self.lastDownload: self.preload() + self.checkInfo() - if self.html is None: - self.fail(_("No html retrieved")) - - self.checkErrors() + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) - premium_only = 'error' in self.info and self.info['error'] == "premium-only" + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) - self._updateInfo(self.getInfo(pyfile.url, self.html)) + self.downloadLink(self.link, self.DISPOSITION) #: Remove `self.DISPOSITION` in 0.4.10 + self.checkFile() - self.checkNameSize() - #: Usually premium only pages doesn't show any file information - if not premium_only: - self.checkStatus() + def downloadLink(self, link, disposition=True): + if link and isinstance(link, basestring): + self.correctCaptcha() - if self.premium and (not self.FORCE_CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium() + if not urlparse(link).scheme: + url_p = urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + link = urljoin(baseurl, link) - elif premium_only: - self.fail(_("Link require a premium account to be handled")) + self.download(link, ref=False, disposition=disposition) - else: - self.logDebug("Handled as free download") - self.handleFree() - self.downloadLink(self.link) - self.checkFile() + def checkFile(self): + if self.cTask and not self.lastDownload: + self.invalidCaptcha() + self.retry(10, reason=_("Wrong captcha")) + elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)): + self.lastDownload = "" + self.error(self.pyfile.error or _("No file downloaded")) - def downloadLink(self, link): - if not link: - return + else: + rules = {'empty file': re.compile(r'\A\Z'), + 'html file' : re.compile(r'\A\s*<!DOCTYPE html'), + 'html error': re.compile(r'\A\s*(<.+>)?\d{3}(\Z|\s+)')} - self.download(link, disposition=True) + if hasattr(self, 'ERROR_PATTERN'): + rules['error'] = re.compile(self.ERROR_PATTERN) + check = self.checkDownload(rules) + if check: #@TODO: Move to hoster in 0.4.10 + errmsg = check.strip().capitalize() + if self.lastCheck: + errmsg += " | " + self.lastCheck.group(0).strip() - def checkFile(self): - if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10 - self.fail(_("Empty file")) + self.lastDownload = "" + self.retry(10, 60, errmsg) def checkErrors(self): - if hasattr(self, 'ERROR_PATTERN'): + if not self.html: + self.logWarning(_("No html code to check")) + return + + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and not self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("Link require a premium account to be handled")) + + elif hasattr(self, 'ERROR_PATTERN'): m = re.search(self.ERROR_PATTERN, self.html) if m: errmsg = self.info['error'] = m.group(1) self.error(errmsg) - if hasattr(self, 'PREMIUM_ONLY_PATTERN'): - m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) - if m: - self.info['error'] = "premium-only" - return - - if hasattr(self, 'WAIT_PATTERN'): + elif hasattr(self, 'WAIT_PATTERN'): m = re.search(self.WAIT_PATTERN, self.html) if m: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) - self.wait(wait_time, False) + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)) + self.wait(wait_time, wait_time > 300) return self.info.pop('error', None) - def checkStatus(self): - status = self.info['status'] + def checkStatus(self, getinfo=True): + if not self.info or getinfo: + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) + self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) - if status is 1: - self.offline() + try: + status = self.info['status'] - elif status is 6: - self.tempOffline() + if status is 1: + self.offline() - elif status is not 2: - self.logInfo(_("File status: %s") % statusMap[status], - _("File info: %s") % self.info) - self.error(_("No file info retrieved")) + elif status is 6: + self.tempOffline() + elif status is 8: + self.fail() - def checkNameSize(self): - name = self.info['name'] - size = self.info['size'] - url = self.info['url'] + finally: + self.logDebug("File status: %s" % statusMap[status]) - if name and name != url: - self.pyfile.name = name - else: - self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] - if size > 0: - self.pyfile.size = size - else: - size = "Unknown" + def checkNameSize(self, getinfo=True): + if not self.info or getinfo: + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) + self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) - self.logDebug("File name: %s" % name, - "File size: %s" % size) + try: + url = self.info['url'].strip() + name = self.info['name'].strip() + if name and name != url: + self.pyfile.name = name + except Exception: + pass - def checkInfo(self): - self.checkErrors() + try: + size = self.info['size'] + if size > 0: + self.pyfile.size = size + + except Exception: + pass - self._updateInfo(self.getInfo(self.pyfile.url, self.html or "")) + self.logDebug("File name: %s" % self.pyfile.name, + "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown") + + def checkInfo(self): self.checkNameSize() - self.checkStatus() + + if self.html: + self.checkErrors() + self.checkNameSize() + + self.checkStatus(getinfo=False) #: Deprecated @@ -441,59 +619,44 @@ class SimpleHoster(Hoster): return self.info - def _updateInfo(self, info): - self.logDebug(_("File info (before update): %s") % self.info) - self.info.update(info) - self.logDebug(_("File info (after update): %s") % self.info) - - - def handleDirect(self): - link = _isDirectLink(self, self.pyfile.url, self.resumeDownload) + def handleDirect(self, pyfile): + link = self.directLink(pyfile.url, self.resumeDownload) if link: self.logInfo(_("Direct download link detected")) self.link = link - - self._updateInfo(self.getInfo(self.pyfile.url)) - self.checkNameSize() else: - self.logDebug(_("Direct download link not found")) + self.logDebug("Direct download link not found") - def handleMulti(self): #: Multi-hoster handler + def handleMulti(self, pyfile): #: Multi-hoster handler pass - def handleFree(self): + def handleFree(self, pyfile): if not hasattr(self, 'LINK_FREE_PATTERN'): - self.fail(_("Free download not implemented")) - - try: - m = re.search(self.LINK_FREE_PATTERN, self.html) - if m is None: - self.error(_("Free download link not found")) + self.logError(_("Free download not implemented")) + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - - def handlePremium(self): + def handlePremium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): - self.fail(_("Premium download not implemented")) - - try: - m = re.search(self.LINK_PREMIUM_PATTERN, self.html) - if m is None: - self.error(_("Premium download link not found")) + self.logError(_("Premium download not implemented")) + self.logDebug("Handled as free download") + self.handleFree(pyfile) + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): @@ -505,8 +668,7 @@ class SimpleHoster(Hoster): self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - self.setWait(wait_time, True) - self.wait() + self.wait(wait_time, True) self.retry(max_tries=max_tries, reason=_("Download limit reached")) @@ -515,6 +677,9 @@ class SimpleHoster(Hoster): def checkTrafficLeft(self): + if not self.account: + return True + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] if traffic is None: @@ -522,9 +687,15 @@ class SimpleHoster(Hoster): elif traffic == -1: return True else: - self.logInfo(_("Filesize: %s, Traffic left for user %s: %s") % (formatSize(size), self.user, formatSize(traffic))) - return self.pyfile.size <= traffic + size = self.pyfile.size / 1024 + self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) + return size <= traffic + + + #@TODO: Remove in 0.4.10 + def wait(self, seconds=0, reconnect=None): + return _wait(self, seconds, reconnect) def error(self, reason="", type="parse"): - return super(SimpleHoster, self).error(self, reason, type) + return _error(self, reason, type) diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 20da4177e..54d64c430 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -4,204 +4,228 @@ import os import re from glob import glob -from os.path import basename, join from string import digits from subprocess import Popen, PIPE -from pyload.plugin.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError -from pyload.utils import safe_join, decode +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from module.utils import decode, fs_encode, save_join def renice(pid, value): - if os.name != "nt" and value: + if value and os.name != "nt": try: Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except Exception: - print "Renice failed" + pass -class UnRar(AbtractExtractor): +class UnRar(Extractor): __name__ = "UnRar" - __version__ = "0.19" + __version__ = "1.13" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org")] + __authors__ = [("RaNaN", "RaNaN@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("Immenz", "immenz@gmx.net"),] CMD = "unrar" + VERSION = "" + + EXTENSIONS = [".rar"] + + + re_multipart = re.compile(r'\.(part|r)(\d+)(?:\.rar)?',re.I) - # there are some more uncovered rar formats - re_version = re.compile(r'UNRAR ([\w .]+?) freeware') - re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) - re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I) - re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+') - re_filelist5 = re.compile(r'(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') - re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I) + re_filefixed = re.compile(r'Building (.+)') + re_filelist = re.compile(r'^(.)(\s*[\w\.\-]+)\s+(\d+\s+)+(?:\d+\%\s+)?[\d\-]{8}\s+[\d\:]{5}', re.M|re.I) + re_wrongpwd = re.compile(r'password', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) - @staticmethod - def checkDeps(): + re_version = re.compile(r'UNRAR\s(\d+\.\d+)', re.I) + + + @classmethod + def isUsable(cls): if os.name == "nt": - UnRar.CMD = join(pypath, "UnRAR.exe") - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() + cls.CMD = os.path.join(pypath, "UnRAR.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() else: try: - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() - except OSError: + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() - # fallback to rar - UnRar.CMD = "rar" - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) - p.communicate() + except OSError: #: fallback to rar + cls.CMD = "rar" + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + + cls.VERSION = cls.re_version.search(out).group(1) return True - @staticmethod - def getTargets(files_ids): - result = [] + @classmethod + def isMultipart(cls,filename): + multipart = cls.re_multipart.search(filename) + if multipart: + # First Multipart file (part1.rar for *.part1-9.rar format or *.rar for .r1-9 format) handled as normal Archive + return False if (multipart.group(1) == "part" and int(multipart.group(2)) == 1) else True - for file, id in files_ids: - if not file.endswith(".rar"): - continue + return False - match = UnRar.re_splitfile.findall(file) - if match: - # only add first parts - if int(match[0][1]) == 1: - result.append((file, id)) - else: - result.append((file, id)) - return result + def check(self): + p = self.call_cmd("l", "-v", fs_encode(self.filename)) + out, err = p.communicate() + + if self.re_wrongpwd.search(err): + raise PasswordError + if self.re_wrongcrc.search(err): + raise CRCError(err) - def init(self): - self.passwordProtected = False - self.headerProtected = False #: list files will not work without password - self.smallestFile = None #: small file to test passwords - self.password = "" #: save the correct password + # output only used to check if passworded files are present + for attr in self.re_filelist.findall(out): + if attr[0].startswith("*"): + raise PasswordError - def checkArchive(self): - p = self.call_unrar("l", "-v", self.file) + def isPassword(self, password): + # at this point we can only verify header protected files + p = self.call_cmd("l", "-v", fs_encode(self.filename), password=password) out, err = p.communicate() - if self.re_wrongpwd.search(err): - self.passwordProtected = True - self.headerProtected = True - return True + return False if self.re_wrongpwd.search(err) else True - # output only used to check if passworded files are present - if self.re_version.search(out): - for attr, size, name in self.re_filelist5.findall(out): - if attr.startswith("*"): - self.passwordProtected = True - return True - else: - for name, size, packed in self.re_filelist.findall(out): - if name.startswith("*"): - self.passwordProtected = True - return True - self.listContent() - if not self.files: - raise ArchiveError("Empty Archive") + def repair(self): + p = self.call_cmd("rc", fs_encode(self.filename)) - return False + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + if err or p.returncode: + p = self.call_cmd("r", fs_encode(self.filename)) - def checkPassword(self, password): - # at this point we can only verify header protected files - if self.headerProtected: - p = self.call_unrar("l", "-v", self.file, password=password) - out, err = p.communicate() - if self.re_wrongpwd.search(err): - return False + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() - return True + if err or p.returncode: + return False + else: + dir = os.path.dirname(filename) + name = re_filefixed.search(out).group(1) + self.filename = os.path.join(dir, name) - def extract(self, progress, password=None): - command = "x" if self.fullpath else "e" + return True - p = self.call_unrar(command, self.file, self.out, password=password) - renice(p.pid, self.renice) - progress(0) - progressstring = "" + def _progress(self, process): + s = "" while True: - c = p.stdout.read(1) + c = process.stdout.read(1) # quit loop on eof if not c: break # reading a percentage sign -> set progress and restart if c == '%': - progress(int(progressstring)) - progressstring = "" + self.notifyProgress(int(s)) + s = "" # not reading a digit -> therefore restart elif c not in digits: - progressstring = "" + s = "" # add digit to progressstring else: - progressstring = progressstring + c - progress(100) - - # retrieve stderr - err = p.stderr.read() - - if "CRC failed" in err and not password and not self.passwordProtected: - raise CRCError - elif "CRC failed" in err: - raise WrongPassword - if err.strip(): #: raise error if anything is on stderr - raise ArchiveError(err.strip()) + s += c + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, fs_encode(self.filename), self.out, password=password) + + renice(p.pid, self.renice) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) + if p.returncode: - raise ArchiveError("Process terminated") + raise ArchiveError(_("Process return code: %d") % p.returncode) - if not self.files: - self.password = password - self.listContent() + self.files = self.list(password) def getDeleteFiles(self): - if ".part" in basename(self.file): - return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) - # get files which matches .r* and filter unsuited files out - parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) - return filter(lambda x: self.re_partfiles.match(x), parts) + dir, name = os.path.split(self.filename) + + # actually extracted file + files = [self.filename] + + # eventually Multipart Files + files.extend(save_join(dir, os.path.basename(file)) for file in filter(self.isMultipart, os.listdir(dir)) + if re.sub(self.re_multipart,".rar",name) == re.sub(self.re_multipart,".rar",file)) + return files - def listContent(self): + + def list(self, password=None): command = "vb" if self.fullpath else "lb" - p = self.call_unrar(command, "-v", self.file, password=self.password) + + p = self.call_cmd(command, "-v", fs_encode(self.filename), password=password) out, err = p.communicate() if "Cannot open" in err: - raise ArchiveError("Cannot open file") + raise ArchiveError(_("Cannot open file")) if err.strip(): #: only log error at this point - self.m.logError(err.strip()) + self.manager.logError(err.strip()) result = set() + if not self.fullpath and self.VERSION.startswith('5'): + # NOTE: Unrar 5 always list full path + for f in decode(out).splitlines(): + f = save_join(self.out, os.path.basename(f.strip())) + if os.path.isfile(f): + result.add(save_join(self.out, os.path.basename(f))) + else: + for f in decode(out).splitlines(): + f = f.strip() + result.add(save_join(self.out, f)) - for f in decode(out).splitlines(): - f = f.strip() - result.add(safe_join(self.out, f)) - - self.files = result + return list(result) - def call_unrar(self, command, *xargs, **kwargs): + def call_cmd(self, command, *xargs, **kwargs): args = [] + # overwrite flag - args.append("-o+") if self.overwrite else args.append("-o-") + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") - if self.excludefiles: - for word in self.excludefiles.split(';'): - args.append("-x%s" % word) + for word in self.excludefiles: + args.append("-x'%s'" % word.strip()) # assume yes on all queries args.append("-y") @@ -212,10 +236,13 @@ class UnRar(AbtractExtractor): else: args.append("-p-") + if self.keepbroken: + args.append("-kb") + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) - self.m.logDebug(" ".join(call)) - p = Popen(call, stdout=PIPE, stderr=PIPE) + self.manager.logDebug(" ".join(call)) + p = Popen(call, stdout=PIPE, stderr=PIPE) return p diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index dded6158e..704b49ca0 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,41 +1,68 @@ # -*- coding: utf-8 -*- +from __future__ import with_statement + +import os import sys import zipfile -from pyload.plugin.internal.AbstractExtractor import AbtractExtractor +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from module.utils import fs_encode -class UnZip(AbtractExtractor): +class UnZip(Extractor): __name__ = "UnZip" - __version__ = "0.10" + __version__ = "1.10" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = [".zip", ".zip64"] + VERSION ="(python %s.%s.%s)" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]) - @staticmethod - def checkDeps(): + @classmethod + def isUsable(cls): return sys.version_info[:2] >= (2, 6) - @staticmethod - def getTargets(files_ids): - result = [] + def list(self, password=None): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + return z.namelist() + + + def check(self): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + badfile = z.testzip() + + if badfile: + raise CRCError(badfile) + else: + raise PasswordError - for file, id in files_ids: - if file.endswith(".zip"): - result.append((file, id)) - return result + def extract(self, password=None): + try: + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + badfile = z.testzip() - def extract(self, progress, password=None): - z = zipfile.ZipFile(self.file) - self.files = z.namelist() - z.extractall(self.out) + if badfile: + raise CRCError(badfile) + else: + z.extractall(self.out) + except (zipfile.BadZipfile, zipfile.LargeZipFile), e: + raise ArchiveError(e) - def getDeleteFiles(self): - return [self.file] + except RuntimeError, e: + if "encrypted" in e: + raise PasswordError + else: + raise ArchiveError(e) + else: + self.files = z.namelist() diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py index 7a9db0e79..2e6b7dc50 100644 --- a/module/plugins/internal/XFSAccount.py +++ b/module/plugins/internal/XFSAccount.py @@ -12,7 +12,7 @@ from pyload.plugin.internal.SimpleHoster import parseHtmlForm, set_cookies class XFSAccount(Account): __name__ = "XFSAccount" __type__ = "account" - __version__ = "0.32" + __version__ = "0.36" __description__ = """XFileSharing account plugin""" __license__ = "GPLv3" @@ -22,8 +22,9 @@ class XFSAccount(Account): HOSTER_DOMAIN = None HOSTER_URL = None + LOGIN_URL = None - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + COOKIES = True PREMIUM_PATTERN = r'\(Premium only\)' @@ -35,15 +36,21 @@ class XFSAccount(Account): LEECH_TRAFFIC_PATTERN = r'Leech Traffic left:<b>.*?(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' LEECH_TRAFFIC_UNIT = "MB" #: used only if no group <U> was found - LOGIN_FAIL_PATTERN = r'>\s*(Incorrect Login or Password|Error<)' + LOGIN_FAIL_PATTERN = r'Incorrect Login or Password|account was banned|Error<' def init(self): - # if not self.HOSTER_DOMAIN: - # self.fail(_("Missing HOSTER_DOMAIN")) + if not self.HOSTER_DOMAIN: + self.logError(_("Missing HOSTER_DOMAIN")) + self.COOKIES = False - if not self.HOSTER_URL: - self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN + else: + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN + + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + set_cookies(req.cj, self.COOKIES) def loadAccountInfo(self, user, req): @@ -52,6 +59,12 @@ class XFSAccount(Account): leechtraffic = None premium = None + if not self.HOSTER_URL: #@TODO: Remove in 0.4.10 + return {'validuntil' : validuntil, + 'trafficleft' : trafficleft, + 'leechtraffic': leechtraffic, + 'premium' : premium} + html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) premium = True if re.search(self.PREMIUM_PATTERN, html) else False @@ -71,10 +84,10 @@ class XFSAccount(Account): self.logDebug("Valid until: %s" % validuntil) if validuntil > mktime(gmtime()): - premium = True + premium = True trafficleft = -1 else: - premium = False + premium = False validuntil = None #: registered account type (not premium) else: self.logDebug("VALID_UNTIL_PATTERN not found") @@ -131,25 +144,31 @@ class XFSAccount(Account): else: self.logDebug("LEECH_TRAFFIC_PATTERN not found") - return {'validuntil': validuntil, 'trafficleft': trafficleft, 'leechtraffic': leechtraffic, 'premium': premium} + return {'validuntil' : validuntil, + 'trafficleft' : trafficleft, + 'leechtraffic': leechtraffic, + 'premium' : premium} def login(self, user, data, req): - if isinstance(self.COOKIES, list): - set_cookies(req.cj, self.COOKIES) + if not self.HOSTER_URL: #@TODO: Remove in 0.4.10 + raise Exception(_("Missing HOSTER_DOMAIN")) - url = urljoin(self.HOSTER_URL, "login.html") - html = req.load(url, decode=True) + if not self.LOGIN_URL: + self.LOGIN_URL = urljoin(self.HOSTER_URL, "login.html") + html = req.load(self.LOGIN_URL, decode=True) action, inputs = parseHtmlForm('name="FL"', html) if not inputs: - inputs = {'op': "login", + inputs = {'op' : "login", 'redirect': self.HOSTER_URL} - inputs.update({'login': user, + inputs.update({'login' : user, 'password': data['password']}) - html = req.load(self.HOSTER_URL, post=inputs, decode=True) + if not action: + action = self.HOSTER_URL + html = req.load(action, post=inputs, decode=True) if re.search(self.LOGIN_FAIL_PATTERN, html): self.wrongPassword() diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py index a3053b60c..665e13b18 100644 --- a/module/plugins/internal/XFSCrypter.py +++ b/module/plugins/internal/XFSCrypter.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -from pyload.plugin.internal.SimpleCrypter import SimpleCrypter +from module.plugins.internal.SimpleCrypter import SimpleCrypter, create_getInfo class XFSCrypter(SimpleCrypter): __name__ = "XFSCrypter" __type__ = "crypter" - __version__ = "0.04" + __version__ = "0.06" __pattern__ = r'^unmatchable$' @@ -16,14 +16,30 @@ class XFSCrypter(SimpleCrypter): HOSTER_DOMAIN = None - HOSTER_NAME = None URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] - LINK_PATTERN = r'<(?:td|TD).*?>\s*<a href="(.+?)".*?>.+?(?:</a>)?\s*</(?:td|TD)>' NAME_PATTERN = r'<[tT]itle>.*?\: (?P<N>.+) folder</[tT]itle>' OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' + + + def prepare(self): + if not self.HOSTER_DOMAIN: + if self.account: + account = self.account + else: + account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") + account = self.pyfile.m.core.accountManager.getAccountPlugin(account_name) + + if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: + self.HOSTER_DOMAIN = account.HOSTER_DOMAIN + else: + self.fail(_("Missing HOSTER_DOMAIN")) + + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) + + return super(XFSCrypter, self).prepare() diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index 0a7853966..ecdd2f125 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -4,17 +4,17 @@ import re from random import random from time import sleep +from urlparse import urljoin, urlparse -from pyload.plugin.hoster.UnrestrictLi import secondsToMidnight from pyload.plugin.internal.captcha import ReCaptcha, SolveMedia -from pyload.plugin.internal.SimpleHoster import SimpleHoster, create_getInfo +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, secondsToMidnight from pyload.utils import html_unescape class XFSHoster(SimpleHoster): __name__ = "XFSHoster" __type__ = "hoster" - __version__ = "0.27" + __version__ = "0.44" __pattern__ = r'^unmatchable$' @@ -26,15 +26,13 @@ class XFSHoster(SimpleHoster): HOSTER_DOMAIN = None - HOSTER_NAME = None - TEXT_ENCODING = False - COOKIES = [(HOSTER_DOMAIN, "lang", "english")] - CHECK_DIRECT_LINK = None - MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + TEXT_ENCODING = False + DIRECT_LINK = None + MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... - NAME_PATTERN = r'(>Filename:</b></td><td nowrap>|name="fname" value="|<span class="name">)(?P<N>.+?)(\s*<|")' - SIZE_PATTERN = r'(>Size:</b></td><td>|>File:.*>|<span class="size">)(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' + NAME_PATTERN = r'(Filename[ ]*:[ ]*</b>(</td><td nowrap>)?|name="fname"[ ]+value="|<[\w^_]+ class="(file)?name">)\s*(?P<N>.+?)(\s*<|")' + SIZE_PATTERN = r'(Size[ ]*:[ ]*</b>(</td><td>)?|File:.*>|</font>\s*\(|<[\w^_]+ class="size">)\s*(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' @@ -43,7 +41,7 @@ class XFSHoster(SimpleHoster): PREMIUM_ONLY_PATTERN = r'>This file is available for Premium Users only' ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' - LEECH_LINK_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' + LINK_LEECH_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' LINK_PATTERN = None #: final download url pattern CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' @@ -56,56 +54,40 @@ class XFSHoster(SimpleHoster): def setup(self): - self.chunkLimit = 1 + self.chunkLimit = -1 if self.premium else 1 self.resumeDownload = self.multiDL = self.premium def prepare(self): """ Initialize important variables """ if not self.HOSTER_DOMAIN: - self.fail(_("Missing HOSTER_DOMAIN")) + if self.account: + account = self.account + else: + account = self.pyfile.m.core.accountManager.getAccountPlugin(self.__name__) + + if account and hasattr(account, "HOSTER_DOMAIN") and account.HOSTER_DOMAIN: + self.HOSTER_DOMAIN = account.HOSTER_DOMAIN + else: + self.fail(_("Missing HOSTER_DOMAIN")) - if not self.HOSTER_NAME: - self.HOSTER_NAME = "".join([str.capitalize() for str in self.HOSTER_DOMAIN.split('.')]) + if isinstance(self.COOKIES, list): + self.COOKIES.insert((self.HOSTER_DOMAIN, "lang", "english")) if not self.LINK_PATTERN: - pattern = r'(https?://(www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' + pattern = r'(https?://(?:www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' self.LINK_PATTERN = pattern % self.HOSTER_DOMAIN.replace('.', '\.') - self.captcha = None - self.errmsg = None - self.passwords = self.getPassword().splitlines() + self.captcha = None + self.errmsg = None super(XFSHoster, self).prepare() - if self.CHECK_DIRECT_LINK is None: - self.directDL = bool(self.premium) - - - def handleFree(self): - link = self.getDownloadLink() - - if link: - if self.captcha: - self.correctCaptcha() - - self.download(link, ref=True, cookies=True, disposition=True) - - elif self.errmsg: - if 'captcha' in self.errmsg: - self.fail(_("No valid captcha code entered")) - else: - self.fail(self.errmsg) - - else: - self.fail(_("Download link not found")) - - - def handlePremium(self): - return self.handleFree() + if self.DIRECT_LINK is None: + self.directDL = self.premium - def getDownloadLink(self): + def handleFree(self, pyfile): for i in xrange(1, 6): self.logDebug("Getting download link: #%d" % i) @@ -130,12 +112,17 @@ class XFSHoster(SimpleHoster): self.logError(data['op'] if 'op' in data else _("UNKNOWN")) return "" - self.errmsg = None + self.link = m.group(1).strip() #@TODO: Remove .strip() in 0.4.10 - return m.group(1) + def handlePremium(self, pyfile): + return self.handleFree(pyfile) + + + def handleMulti(self, pyfile): + if not self.account: + self.fail(_("Only registered or premium users can use url leech feature")) - def handleMulti(self): #only tested with easybytez.com self.html = self.load("http://www.%s/" % self.HOSTER_DOMAIN) @@ -145,7 +132,7 @@ class XFSHoster(SimpleHoster): action += upload_id + "&js_on=1&utype=prem&upload_type=url" inputs['tos'] = '1' - inputs['url_mass'] = self.pyfile.url + inputs['url_mass'] = pyfile.url inputs['up1oad_type'] = 'url' self.logDebug(action, inputs) @@ -180,26 +167,18 @@ class XFSHoster(SimpleHoster): self.fail(stmsg) #get easybytez.com link for uploaded file - m = re.search(self.LEECH_LINK_PATTERN, self.html) + m = re.search(self.LINK_LEECH_PATTERN, self.html) if m is None: - self.error(_("LEECH_LINK_PATTERN not found")) + self.error(_("LINK_LEECH_PATTERN not found")) header = self.load(m.group(1), just_header=True, decode=True) if 'location' in header: #: Direct download link self.link = header['location'] - else: - self.fail(_("Download link not found")) def checkErrors(self): - m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) - if m: - self.info['error'] = "premium-only" - return - m = re.search(self.ERROR_PATTERN, self.html) - if m is None: self.errmsg = None else: @@ -208,8 +187,8 @@ class XFSHoster(SimpleHoster): self.logWarning(re.sub(r"<.*?>", " ", self.errmsg)) if 'wait' in self.errmsg: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)]) + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)) self.wait(wait_time, True) elif 'country' in self.errmsg: @@ -223,10 +202,10 @@ class XFSHoster(SimpleHoster): elif 'limit' in self.errmsg: if 'days' in self.errmsg: - delay = secondsToMidnight(gmt=2) + delay = secondsToMidnight(gmt=2) retries = 3 else: - delay = 1 * 60 * 60 + delay = 1 * 60 * 60 retries = 24 self.wantReconnect = True @@ -238,7 +217,7 @@ class XFSHoster(SimpleHoster): elif 'maintenance' in self.errmsg or 'maintainance' in self.errmsg: self.tempOffline() - elif 'download files up to' in self.errmsg: + elif 'up to' in self.errmsg: self.fail(_("File too large for free download")) else: @@ -269,8 +248,9 @@ class XFSHoster(SimpleHoster): if 'op' in inputs: if "password" in inputs: - if self.passwords: - inputs['password'] = self.passwords.pop(0) + password = self.getPassword() + if password: + inputs['password'] = password else: self.fail(_("Missing password")) @@ -307,33 +287,40 @@ class XFSHoster(SimpleHoster): if m: captcha_div = m.group(1) numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) + self.logDebug(captcha_div) - inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + + inputs['code'] = "".join(a[1] for a in sorted(numerals, key=lambda num: int(num[0]))) + self.logDebug("Captcha code: %s" % inputs['code'], numerals) return 2 recaptcha = ReCaptcha(self) try: captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) + except Exception: captcha_key = recaptcha.detect_key() + else: self.logDebug("ReCaptcha key: %s" % captcha_key) if captcha_key: - inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + inputs['recaptcha_response_field'], inputs['recaptcha_challenge_field'] = recaptcha.challenge(captcha_key) return 3 solvemedia = SolveMedia(self) try: captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) + except Exception: captcha_key = solvemedia.detect_key() + else: self.logDebug("SolveMedia key: %s" % captcha_key) if captcha_key: - inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) + inputs['adcopy_response'], inputs['adcopy_challenge'] = solvemedia.challenge(captcha_key) return 4 return 0 |