diff options
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/CaptchaService.py | 346 | ||||
-rw-r--r-- | module/plugins/internal/DeadCrypter.py | 32 | ||||
-rw-r--r-- | module/plugins/internal/DeadHoster.py | 36 | ||||
-rw-r--r-- | module/plugins/internal/Extractor.py (renamed from module/plugins/internal/AbstractExtractor.py) | 93 | ||||
-rw-r--r-- | module/plugins/internal/MultiHook.py | 218 | ||||
-rw-r--r-- | module/plugins/internal/MultiHoster.py | 209 | ||||
-rw-r--r-- | module/plugins/internal/SimpleCrypter.py | 176 | ||||
-rw-r--r-- | module/plugins/internal/SimpleDereferer.py | 96 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 680 | ||||
-rw-r--r-- | module/plugins/internal/UnRar.py | 277 | ||||
-rw-r--r-- | module/plugins/internal/UnZip.py | 100 | ||||
-rw-r--r-- | module/plugins/internal/XFSAccount.py | 160 | ||||
-rw-r--r-- | module/plugins/internal/XFSCrypter.py | 29 | ||||
-rw-r--r-- | module/plugins/internal/XFSHoster.py | 341 | ||||
-rw-r--r-- | module/plugins/internal/XFSPAccount.py | 82 |
15 files changed, 2137 insertions, 738 deletions
diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py index 400484d26..965799e8e 100644 --- a/module/plugins/internal/CaptchaService.py +++ b/module/plugins/internal/CaptchaService.py @@ -1,106 +1,332 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import re - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +from random import random - @author: zoidberg -""" +from module.common.json_layer import json_loads -import re -from random import random +class CaptchaService: + __name__ = "CaptchaService" + __version__ = "0.16" -class CaptchaService(): - __version__ = "0.04" + __description__ = """Base captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] - def __init__(self, plugin): - self.plugin = plugin + KEY_PATTERN = None -class ReCaptcha(): - RECAPTCHA_KEY_PATTERN = r"https?://(?:www\.)?google\.com/recaptcha/api/challenge\?k=(?P<key>\w+)" - RECAPTCHA_KEY_AJAX_PATTERN = r"Recaptcha\.create\s*\(\s*[\"'](?P<key>\w+)[\"']\s*," + key = None #: last key detected - recaptcha_key = None def __init__(self, plugin): self.plugin = plugin - def detect_key(self, html): - m = re.search(self.RECAPTCHA_KEY_PATTERN, html) - if not m: - m = re.search(self.RECAPTCHA_KEY_AJAX_PATTERN, html) + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("%s html not found") % self.__name__ + self.plugin.fail(errmsg) #@TODO: replace all plugin.fail(errmsg) with plugin.error(errmsg) in 0.4.10 + raise TypeError(errmsg) + + m = re.search(self.KEY_PATTERN, html) if m: - self.recaptcha_key = m.group('key') - return self.recaptcha_key + self.key = m.group(1).strip() + self.plugin.logDebug("%s key: %s" % (self.__name__, self.key)) + return self.key else: + self.plugin.logDebug("%s key not found" % self.__name__) return None + def challenge(self, key=None): - if not key and self.recaptcha_key: - key = self.recaptcha_key - elif not (key or self.recaptcha_key): - raise TypeError("ReCaptcha key not found") + raise NotImplementedError + - js = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={"k": key}, cookies=True) + def result(self, server, challenge): + raise NotImplementedError + + +class ReCaptcha(CaptchaService): + __name__ = "ReCaptcha" + __version__ = "0.08" + + __description__ = """ReCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = r'recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=([\w-]+)' + KEY_AJAX_PATTERN = r'Recaptcha\.create\s*\(\s*["\']([\w-]+)' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("ReCaptcha html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.KEY_PATTERN, html) or re.search(self.KEY_AJAX_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.plugin.logDebug("ReCaptcha key: %s" % self.key) + return self.key + else: + self.plugin.logDebug("ReCaptcha key not found") + return None + + def challenge(self, key=None): + if not key: + if self.detect_key(): + key = self.key + else: + errmsg = _("ReCaptcha key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + html = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={'k': key}) try: - challenge = re.search("challenge : '(.*?)',", js).group(1) - server = re.search("server : '(.*?)',", js).group(1) + challenge = re.search("challenge : '(.+?)',", html).group(1) + server = re.search("server : '(.+?)',", html).group(1) except: - self.plugin.fail("recaptcha error") - result = self.result(server, challenge) + errmsg = _("ReCaptcha challenge pattern not found") + self.plugin.fail(errmsg) + raise ValueError(errmsg) + + self.plugin.logDebug("ReCaptcha challenge: %s" % challenge) + + return challenge, self.result(server, challenge) - return challenge, result def result(self, server, challenge): - return self.plugin.decryptCaptcha("%simage" % server, get={"c": challenge}, - cookies=True, forceUser=True, imgtype="jpg") + result = self.plugin.decryptCaptcha("%simage" % server, + get={'c': challenge}, + cookies=True, + forceUser=True, + imgtype="jpg") + + self.plugin.logDebug("ReCaptcha result: %s" % result) + + return result class AdsCaptcha(CaptchaService): - def challenge(self, src): - js = self.plugin.req.load(src, cookies=True) + __name__ = "AdsCaptcha" + __version__ = "0.06" + + __description__ = """AdsCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*CaptchaId=(\d+)' + PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?[^"\']*PublicKey=([\w-]+)' + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("AdsCaptcha html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.PUBLICKEY_PATTERN, html) + n = re.search(self.CAPTCHAID_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) #: key is the tuple(PublicKey, CaptchaId) + self.plugin.logDebug("AdsCaptcha key|id: %s | %s" % self.key) + return self.key + else: + self.plugin.logDebug("AdsCaptcha key or id not found") + return None + + + def challenge(self, key=None): + if not key: + if self.detect_key(): + key = self.key + else: + errmsg = _("AdsCaptcha key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + PublicKey, CaptchaId = key + + html = self.plugin.req.load("http://api.adscaptcha.com/Get.aspx", get={'CaptchaId': CaptchaId, 'PublicKey': PublicKey}) try: - challenge = re.search("challenge: '(.*?)',", js).group(1) - server = re.search("server: '(.*?)',", js).group(1) + challenge = re.search("challenge: '(.+?)',", html).group(1) + server = re.search("server: '(.+?)',", html).group(1) except: - self.plugin.fail("adscaptcha error") - result = self.result(server, challenge) + errmsg = _("AdsCaptcha challenge pattern not found") + self.plugin.fail(errmsg) + raise ValueError(errmsg) + + self.plugin.logDebug("AdsCaptcha challenge: %s" % challenge) + + return challenge, self.result(server, challenge) - return challenge, result def result(self, server, challenge): - return self.plugin.decryptCaptcha("%sChallenge.aspx" % server, get={"cid": challenge, "dummy": random()}, - cookies=True, imgtype="jpg") + result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, + get={'cid': challenge, 'dummy': random()}, + cookies=True, + imgtype="jpg") + + self.plugin.logDebug("AdsCaptcha result: %s" % result) + + return result class SolveMedia(CaptchaService): - def __init__(self, plugin): - self.plugin = plugin + __name__ = "SolveMedia" + __version__ = "0.06" + + __description__ = """SolveMedia captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' - def challenge(self, src): - html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript?k=%s" % src, cookies=True) + + def challenge(self, key=None): + if not key: + if self.detect_key(): + key = self.key + else: + errmsg = _("SolveMedia key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript", get={'k': key}) try: challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="([^"]+)">', html).group(1) + server = "http://api.solvemedia.com/papi/media" except: - self.plugin.fail("solvemedia error") - result = self.result(challenge) + errmsg = _("SolveMedia challenge pattern not found") + self.plugin.fail(errmsg) + raise ValueError(errmsg) + + self.plugin.logDebug("SolveMedia challenge: %s" % challenge) + + return challenge, self.result(server, challenge) + + + def result(self, server, challenge): + result = self.plugin.decryptCaptcha(server, get={'c': challenge}, imgtype="gif") + + self.plugin.logDebug("SolveMedia result: %s" % result) + + return result + + +class AdYouLike(CaptchaService): + __name__ = "AdYouLike" + __version__ = "0.02" + + __description__ = """AdYouLike captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' + CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("AdYouLike html not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + m = re.search(self.AYL_PATTERN, html) + n = re.search(self.CALLBACK_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) + self.plugin.logDebug("AdYouLike ayl|callback: %s | %s" % self.key) + return self.key #: key is the tuple(ayl, callback) + else: + self.plugin.logDebug("AdYouLike ayl or callback not found") + return None + + + def challenge(self, key=None): + if not key: + if self.detect_key(): + key = self.key + else: + errmsg = _("AdYouLike key not found") + self.plugin.fail(errmsg) + raise TypeError(errmsg) + + ayl, callback = key + + # {"adyoulike":{"key":"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, + # "all":{"element_id":"ayl_private_cap_92300","lang":"fr","env":"prod"}} + ayl = json_loads(ayl) + + html = self.plugin.req.load("http://api-ayl.appspot.com/challenge", + get={'key' : ayl['adyoulike']['key'], + 'env' : ayl['all']['env'], + 'callback': callback}) + try: + challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) + except: + errmsg = _("AdYouLike challenge pattern not found") + self.plugin.fail(errmsg) + raise ValueError(errmsg) + + self.plugin.logDebug("AdYouLike challenge: %s" % challenge) + + return self.result(ayl, challenge) + + + def result(self, server, challenge): + # Adyoulike.g._jsonp_5579316662423138 + # ({"translations":{"fr":{"instructions_visual":"Recopiez « Soonnight » ci-dessous :"}}, + # "site_under":true,"clickable":true,"pixels":{"VIDEO_050":[],"DISPLAY":[],"VIDEO_000":[],"VIDEO_100":[], + # "VIDEO_025":[],"VIDEO_075":[]},"medium_type":"image/adyoulike", + # "iframes":{"big":"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" + # height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},"shares":{},"id":256, + # "token":"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1","formats":{"small":{"y":300,"x":0,"w":300,"h":60}, + # "big":{"y":0,"x":0,"w":300,"h":250},"hover":{"y":440,"x":0,"w":300,"h":60}}, + # "tid":"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) + + if isinstance(server, basestring): + server = json_loads(server) + + if isinstance(challenge, basestring): + challenge = json_loads(challenge) + + try: + instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] + result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() + except: + errmsg = _("AdYouLike result not found") + self.plugin.fail(errmsg) + raise ValueError(errmsg) + + result = {'_ayl_captcha_engine' : "adyoulike", + '_ayl_env' : server['all']['env'], + '_ayl_tid' : challenge['tid'], + '_ayl_token_challenge': challenge['token'], + '_ayl_response' : response} - return challenge, result + self.plugin.logDebug("AdYouLike result: %s" % result) - def result(self, challenge): - return self.plugin.decryptCaptcha("http://api.solvemedia.com/papi/media?c=%s" % challenge, imgtype="gif") + return result diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py index 10eccb9bd..07c5c3881 100644 --- a/module/plugins/internal/DeadCrypter.py +++ b/module/plugins/internal/DeadCrypter.py @@ -1,16 +1,32 @@ # -*- coding: utf-8 -*- +from urllib import unquote +from urlparse import urlparse + +from module.plugins.internal.SimpleCrypter import create_getInfo from module.plugins.Crypter import Crypter as _Crypter class DeadCrypter(_Crypter): - __name__ = "DeadCrypter" - __type__ = "crypter" - __pattern__ = None - __version__ = "0.01" - __description__ = """Crypter is no longer available""" - __author_name__ = "stickell" - __author_mail__ = "l.stickell@yahoo.it" + __name__ = "DeadCrypter" + __type__ = "crypter" + __version__ = "0.04" + + __pattern__ = r'^unmatchable$' + + __description__ = """ Crypter is no longer available """ + __license__ = "GPLv3" + __authors__ = [("stickell", "l.stickell@yahoo.it")] + + + @classmethod + def getInfo(cls, url="", html=""): + return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} + def setup(self): - self.fail("Crypter is no longer available") + self.pyfile.error = "Crypter is no longer available" + self.offline() #@TODO: self.offline("Crypter is no longer available") + + +getInfo = create_getInfo(DeadCrypter) diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py index 201835e2b..6f3252f70 100644 --- a/module/plugins/internal/DeadHoster.py +++ b/module/plugins/internal/DeadHoster.py @@ -1,22 +1,32 @@ # -*- coding: utf-8 -*- +from urllib import unquote +from urlparse import urlparse + +from module.plugins.internal.SimpleHoster import create_getInfo from module.plugins.Hoster import Hoster as _Hoster -def create_getInfo(plugin): - def getInfo(urls): - yield [('#N/A: ' + url, 0, 1, url) for url in urls] - return getInfo +class DeadHoster(_Hoster): + __name__ = "DeadHoster" + __type__ = "hoster" + __version__ = "0.14" + + __pattern__ = r'^unmatchable$' + __description__ = """ Hoster is no longer available """ + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz")] + + + @classmethod + def getInfo(cls, url="", html=""): + return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} -class DeadHoster(_Hoster): - __name__ = "DeadHoster" - __type__ = "hoster" - __pattern__ = None - __version__ = "0.11" - __description__ = """Hoster is no longer available""" - __author_name__ = "zoidberg" - __author_mail__ = "zoidberg@mujmail.cz" def setup(self): - self.fail("Hoster is no longer available") + self.pyfile.error = "Hoster is no longer available" + self.offline() #@TODO: self.offline("Hoster is no longer available") + + +getInfo = create_getInfo(DeadHoster) diff --git a/module/plugins/internal/AbstractExtractor.py b/module/plugins/internal/Extractor.py index 0ecc11f06..0b2462dac 100644 --- a/module/plugins/internal/AbstractExtractor.py +++ b/module/plugins/internal/Extractor.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- - class ArchiveError(Exception): pass @@ -9,30 +8,52 @@ class CRCError(Exception): pass -class WrongPassword(Exception): +class PasswordError(Exception): pass -class AbtractExtractor: +class Extractor: + __name__ = "Extractor" + __version__ = "0.13" + + __description__ = """Base extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "ranan@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = [] - __version__ = "0.1" - @staticmethod - def checkDeps(): + @classmethod + def checkDeps(cls): """ Check if system statisfy dependencies :return: boolean """ return True - @staticmethod - def getTargets(files_ids): + + @classmethod + def isArchive(cls, file): + raise NotImplementedError + + + @classmethod + def getTargets(cls, files_ids): """ Filter suited targets from list of filename id tuple list :param files_ids: List of filepathes :return: List of targets, id tuple list """ - raise NotImplementedError + targets = [] + + for file, id in files_ids: + if cls.isArchive(file): + targets.append((file, id)) + + return targets - def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): + + def __init__(self, m, file, out, password, fullpath, overwrite, excludefiles, renice, delete, keepbroken): """Initialize extractor for specific file :param m: ExtractArchive Hook plugin @@ -42,49 +63,70 @@ class AbtractExtractor: :param overwrite: Overwrite existing archives :param renice: Renice value """ - self.m = m - self.file = file - self.out = out - self.fullpath = fullpath - self.overwrite = overwrite + self.m = m + self.file = file + self.out = out + self.password = password + self.fullpath = fullpath + self.overwrite = overwrite self.excludefiles = excludefiles - self.renice = renice - self.files = [] #: Store extracted files here + self.renice = renice + self.delete = delete + self.keepbroken = keepbroken + self.files = [] #: Store extracted files here + def init(self): """ Initialize additional data structures """ pass - def checkArchive(self): + + def verify(self): """Check if password if needed. Raise ArchiveError if integrity is questionable. - :return: boolean :raises ArchiveError """ - return False + pass + - def checkPassword(self, password): + def isPassword(self, password): """ Check if the given password is/might be correct. If it can not be decided at this point return true. :param password: :return: boolean """ - return True + if isinstance(password, basestring): + return True + else: + return False + + + def setPassword(self, password): + if self.isPassword(password): + self.password = password + return True + else: + return False - def extract(self, progress, password=None): + + def repair(self): + return False + + + def extract(self, progress=lambda x: None): """Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status - :param password password to use - :raises WrongPassword + :raises PasswordError :raises CRCError :raises ArchiveError :return: """ raise NotImplementedError + def getDeleteFiles(self): """Return list of files to delete, do *not* delete them here. @@ -92,6 +134,7 @@ class AbtractExtractor: """ raise NotImplementedError + def getExtractedFiles(self): """Populate self.files at some point while extracting""" return self.files diff --git a/module/plugins/internal/MultiHook.py b/module/plugins/internal/MultiHook.py new file mode 100644 index 000000000..78de1ed0a --- /dev/null +++ b/module/plugins/internal/MultiHook.py @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- + +import re + +from module.plugins.Hook import Hook +from module.utils import remove_chars + + +class MultiHook(Hook): + __name__ = "MultiHook" + __type__ = "hook" + __version__ = "0.23" + + __description__ = """Hook plugin for MultiHoster""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + interval = 12 * 60 * 60 #: reload hosters every 12h + + HOSTER_REPLACEMENTS = [("1fichier.com" , "onefichier.com"), + ("2shared.com" , "twoshared.com" ), + ("4shared.com" , "fourshared.com"), + ("cloudnator.com" , "shragle.com" ), + ("easy-share.com" , "crocko.com" ), + ("fileparadox.com", "fileparadox.in"), + ("freakshare.net" , "freakshare.com"), + ("hellshare.com" , "hellshare.cz" ), + ("ifile.it" , "filecloud.io" ), + ("nowdownload.ch" , "nowdownload.sx"), + ("nowvideo.co" , "nowvideo.sx" ), + ("putlocker.com" , "firedrive.com" ), + ("share-rapid.cz" , "multishare.cz" ), + ("sharerapid.cz" , "multishare.cz" ), + ("ul.to" , "uploaded.to" ), + ("uploaded.net" , "uploaded.to" )] + HOSTER_EXCLUDED = [] + + + def setup(self): + self.hosters = [] + self.supported = [] + self.new_supported = [] + + + def getConfig(self, option, default=''): + """getConfig with default value - sublass may not implements all config options""" + try: + return self.getConf(option) + + except KeyError: + return default + + + def getHosterCached(self): + if not self.hosters: + try: + hosterSet = self.toHosterSet(self.getHoster()) - set(self.HOSTER_EXCLUDED) + except Exception, e: + self.logError(e) + return [] + + try: + configMode = self.getConfig('hosterListMode', 'all') + if configMode in ("listed", "unlisted"): + configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) + + if configMode == "listed": + hosterSet &= configSet + else: + hosterSet -= configSet + + except Exception, e: + self.logError(e) + + self.hosters = list(hosterSet) + + return self.hosters + + + def toHosterSet(self, hosters): + hosters = set((str(x).strip().lower() for x in hosters)) + + for rep in self.HOSTER_REPLACEMENTS: + if rep[0] in hosters: + hosters.remove(rep[0]) + hosters.add(rep[1]) + + hosters.discard('') + return hosters + + + def getHoster(self): + """Load list of supported hoster + + :return: List of domain names + """ + raise NotImplementedError + + + def coreReady(self): + if self.cb: + self.core.scheduler.removeJob(self.cb) + + self.setConfig("activated", True) #: config not in sync after plugin reload + + cfg_interval = self.getConfig("interval", None) #: reload interval in hours + if cfg_interval is not None: + self.interval = cfg_interval * 60 * 60 + + if self.interval: + self._periodical() + else: + self.periodical() + + + def initPeriodical(self): + pass + + + def periodical(self): + """reload hoster list periodically""" + self.logInfo(_("Reloading supported hoster list")) + + old_supported = self.supported + self.supported = [] + self.new_supported = [] + self.hosters = [] + + self.overridePlugins() + + old_supported = [hoster for hoster in old_supported if hoster not in self.supported] + if old_supported: + self.logDebug("Unload: %s" % ", ".join(old_supported)) + for hoster in old_supported: + self.unloadHoster(hoster) + + + def overridePlugins(self): + pluginMap = dict((name.lower(), name) for name in self.core.pluginManager.hosterPlugins.iterkeys()) + accountList = [account.type.lower() for account in self.core.api.getAccounts(False) if account.valid and account.premium] + excludedList = [] + + for hoster in self.getHosterCached(): + name = remove_chars(hoster, "-.") + + if name in accountList: + excludedList.append(hoster) + else: + if name in pluginMap: + self.supported.append(pluginMap[name]) + else: + self.new_supported.append(hoster) + + if not self.supported and not self.new_supported: + self.logError(_("No Hoster loaded")) + return + + module = self.core.pluginManager.getPlugin(self.__name__) + klass = getattr(module, self.__name__) + + # inject plugin plugin + self.logDebug("Overwritten Hosters: %s" % ", ".join(sorted(self.supported))) + for hoster in self.supported: + hdict = self.core.pluginManager.hosterPlugins[hoster] + hdict['new_module'] = module + hdict['new_name'] = self.__name__ + + if excludedList: + self.logInfo(_("Hosters not overwritten: %s" % ", ".join(sorted(excludedList)))) + + if self.new_supported: + hosters = sorted(self.new_supported) + + self.logDebug("New Hosters: %s" % ", ".join(hosters)) + + # create new regexp + regexp = r'.*(%s).*' % "|".join([x.replace(".", "\.") for x in hosters]) + if hasattr(klass, "__pattern__") and isinstance(klass.__pattern__, basestring) and '://' in klass.__pattern__: + regexp = r'%s|%s' % (klass.__pattern__, regexp) + + self.logDebug("Regexp: %s" % regexp) + + hdict = self.core.pluginManager.hosterPlugins[self.__name__] + hdict['pattern'] = regexp + hdict['re'] = re.compile(regexp) + + + def unloadHoster(self, hoster): + hdict = self.core.pluginManager.hosterPlugins[hoster] + if "module" in hdict: + del hdict['module'] + + if "new_module" in hdict: + del hdict['new_module'] + del hdict['new_name'] + + + def unload(self): + """Remove override for all hosters. Scheduler job is removed by hookmanager""" + for hoster in self.supported: + self.unloadHoster(hoster) + + # reset pattern + klass = getattr(self.core.pluginManager.getPlugin(self.__name__), self.__name__) + hdict = self.core.pluginManager.hosterPlugins[self.__name__] + hdict['pattern'] = getattr(klass, "__pattern__", r'^unmatchable$') + hdict['re'] = re.compile(hdict['pattern']) + + + def downloadFailed(self, pyfile): + """remove plugin override if download fails but not if file is offline/temp.offline""" + if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): + hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] + if "new_name" in hdict and hdict['new_name'] == self.__name__: + self.logDebug("Unload MultiHook", pyfile.pluginname, hdict) + self.unloadHoster(pyfile.pluginname) + pyfile.setStatus("queued") diff --git a/module/plugins/internal/MultiHoster.py b/module/plugins/internal/MultiHoster.py index 97f3a5996..9d218c3b2 100644 --- a/module/plugins/internal/MultiHoster.py +++ b/module/plugins/internal/MultiHoster.py @@ -2,188 +2,79 @@ import re -from module.utils import remove_chars -from module.plugins.Hook import Hook +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies -class MultiHoster(Hook): - """ - Generic MultiHoster plugin - """ +class MultiHoster(SimpleHoster): + __name__ = "MultiHoster" + __type__ = "hoster" + __version__ = "0.26" - __version__ = "0.19" + __pattern__ = r'^unmatchable$' - replacements = [("2shared.com", "twoshared.com"), ("4shared.com", "fourshared.com"), ("cloudnator.com", "shragle.com"), - ("ifile.it", "filecloud.io"), ("easy-share.com", "crocko.com"), ("freakshare.net", "freakshare.com"), - ("hellshare.com", "hellshare.cz"), ("share-rapid.cz", "sharerapid.com"), ("sharerapid.cz", "sharerapid.com"), - ("ul.to", "uploaded.to"), ("uploaded.net", "uploaded.to"), ("1fichier.com", "onefichier.com")] - ignored = [] - interval = 24 * 60 * 60 #: reload hosters daily + __description__ = """Multi hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - def setup(self): - self.hosters = [] - self.supported = [] - self.new_supported = [] - - def getConfig(self, option, default=''): - """getConfig with default value - sublass may not implements all config options""" - try: - return self.getConf(option) - except KeyError: - return default - - def getHosterCached(self): - if not self.hosters: - - try: - hosterSet = self.toHosterSet(self.getHoster()) - set(self.ignored) - except Exception, e: - self.logError("%s" % str(e)) - return [] - - try: - configMode = self.getConfig('hosterListMode', 'all') - if configMode in ("listed", "unlisted"): - configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) - - if configMode == "listed": - hosterSet &= configSet - else: - hosterSet -= configSet - except Exception, e: - self.logError("%s" % str(e)) + LOGIN_ACCOUNT = True - self.hosters = list(hosterSet) - return self.hosters + def setup(self): + self.chunkLimit = 1 + self.multiDL = self.premium - def toHosterSet(self, hosters): - hosters = set((str(x).strip().lower() for x in hosters)) - for rep in self.replacements: - if rep[0] in hosters: - hosters.remove(rep[0]) - hosters.add(rep[1]) + def prepare(self): + self.info = {} + self.link = "" #@TODO: Move to hoster class in 0.4.10 + self.directDL = False #@TODO: Move to hoster class in 0.4.10 - hosters.discard('') - return hosters + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) - def getHoster(self): - """Load list of supported hoster + self.req.setOption("timeout", 120) - :return: List of domain names - """ - raise NotImplementedError + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) - def coreReady(self): - if self.cb: - self.core.scheduler.removeJob(self.cb) + if self.DIRECT_LINK is None: + self.directDL = self.__pattern__ != r'^unmatchable$' + else: + self.directDL = self.DIRECT_LINK - self.setConfig("activated", True) #: config not in sync after plugin reload + self.pyfile.url = replace_patterns(self.pyfile.url, + self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 - cfg_interval = self.getConfig("interval", None) #: reload interval in hours - if cfg_interval is not None: - self.interval = cfg_interval * 60 * 60 - if self.interval: - self._periodical() - else: - self.periodical() + def process(self, pyfile): + self.prepare() - def initPeriodical(self): - pass + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect() - def periodical(self): - """reload hoster list periodically""" - self.logInfo("Reloading supported hoster list") + if not self.link and not self.lastDownload: + self.preload() - old_supported = self.supported - self.supported, self.new_supported, self.hosters = [], [], [] + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium() - self.overridePlugins() + else: + self.logDebug("Handled as free download") + self.handleFree() - old_supported = [hoster for hoster in old_supported if hoster not in self.supported] - if old_supported: - self.logDebug("UNLOAD: %s" % ", ".join(old_supported)) - for hoster in old_supported: - self.unloadHoster(hoster) + self.downloadLink(self.link) + self.checkFile() - def overridePlugins(self): - pluginMap = {} - for name in self.core.pluginManager.hosterPlugins.keys(): - pluginMap[name.lower()] = name - accountList = [name.lower() for name, data in self.core.accountManager.accounts.items() if data] - excludedList = [] + def handlePremium(self): + return self.handleFree() - for hoster in self.getHosterCached(): - name = remove_chars(hoster.lower(), "-.") - if name in accountList: - excludedList.append(hoster) - else: - if name in pluginMap: - self.supported.append(pluginMap[name]) - else: - self.new_supported.append(hoster) - - if not self.supported and not self.new_supported: - self.logError(_("No Hoster loaded")) - return - - module = self.core.pluginManager.getPlugin(self.__name__) - klass = getattr(module, self.__name__) - - # inject plugin plugin - self.logDebug("Overwritten Hosters: %s" % ", ".join(sorted(self.supported))) - for hoster in self.supported: - dict = self.core.pluginManager.hosterPlugins[hoster] - dict["new_module"] = module - dict["new_name"] = self.__name__ - - if excludedList: - self.logInfo("The following hosters were not overwritten - account exists: %s" % ", ".join(sorted(excludedList))) - - if self.new_supported: - self.logDebug("New Hosters: %s" % ", ".join(sorted(self.new_supported))) - - # create new regexp - regexp = r".*(%s).*" % "|".join([x.replace(".", "\\.") for x in self.new_supported]) - if hasattr(klass, "__pattern__") and isinstance(klass.__pattern__, basestring) and '://' in klass.__pattern__: - regexp = r"%s|%s" % (klass.__pattern__, regexp) - - self.logDebug("Regexp: %s" % regexp) - - dict = self.core.pluginManager.hosterPlugins[self.__name__] - dict["pattern"] = regexp - dict["re"] = re.compile(regexp) - - def unloadHoster(self, hoster): - dict = self.core.pluginManager.hosterPlugins[hoster] - if "module" in dict: - del dict["module"] - - if "new_module" in dict: - del dict["new_module"] - del dict["new_name"] - - def unload(self): - """Remove override for all hosters. Scheduler job is removed by hookmanager""" - for hoster in self.supported: - self.unloadHoster(hoster) - - # reset pattern - klass = getattr(self.core.pluginManager.getPlugin(self.__name__), self.__name__) - dict = self.core.pluginManager.hosterPlugins[self.__name__] - dict["pattern"] = getattr(klass, '__pattern__', r"^unmatchable$") - dict["re"] = re.compile(dict["pattern"]) - - def downloadFailed(self, pyfile): - """remove plugin override if download fails but not if file is offline/temp.offline""" - if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): - hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] - if "new_name" in hdict and hdict['new_name'] == self.__name__: - self.logDebug("Unload MultiHoster", pyfile.pluginname, hdict) - self.unloadHoster(pyfile.pluginname) - pyfile.setStatus("queued") + def handleFree(self): + if self.premium: + raise NotImplementedError + else: + self.fail(_("Required premium account not found")) diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index b1a18f5e0..428826456 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -1,78 +1,142 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +import re - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +from urlparse import urlparse - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +from module.plugins.Crypter import Crypter +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies +from module.utils import fixup - @author: zoidberg -""" -import re - -from module.plugins.Crypter import Crypter -from module.utils import html_unescape -from module.plugins.internal.SimpleHoster import replace_patterns +class SimpleCrypter(Crypter, SimpleHoster): + __name__ = "SimpleCrypter" + __type__ = "crypter" + __version__ = "0.35" + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] -class SimpleCrypter(Crypter): - __name__ = "SimpleCrypter" - __version__ = "0.07" - __pattern__ = None - __type__ = "crypter" __description__ = """Simple decrypter plugin""" - __author_name__ = ("stickell", "zoidberg") - __author_mail__ = ("l.stickell@yahoo.it", "zoidberg@mujmail.cz") + __license__ = "GPLv3" + __authors__ = [("stickell", "l.stickell@yahoo.it"), + ("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + """ - These patterns should be defined by each crypter: + Following patterns should be defined by each crypter: + + LINK_PATTERN: Download link or regex to catch links in group(1) + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + NAME_PATTERN: (optional) folder name or page title + example: NAME_PATTERN = r'<title>Files of: (?P<N>[^<]+) folder</title>' + + OFFLINE_PATTERN: (optional) Checks if the page is unreachable + example: OFFLINE_PATTERN = r'File (deleted|not found)' - LINK_PATTERN: group(1) must be a download link - example: <div class="link"><a href="(http://speedload.org/\w+) + TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' - TITLE_PATTERN: (optional) the group defined by 'title' should be the title - example: <title>Files of: (?P<title>[^<]+) folder</title> - If it's impossible to extract the links using the LINK_PATTERN only you can override the getLinks method. + You can override the getLinks method if you need a more sophisticated way to extract the links. - If the links are disposed on multiple pages you need to define a pattern: - PAGES_PATTERN: the group defined by 'pages' must be the total number of pages + If the links are splitted on multiple pages you can define the PAGES_PATTERN regex: - and a function: + PAGES_PATTERN: (optional) group(1) should be the number of overall pages containing the links + example: PAGES_PATTERN = r'Pages: (\d+)' - loadPage(self, page_n): - must return the html of the page number 'page_n' + and its loadPage method: + + + def loadPage(self, page_n): + return the html of the page number page_n """ - FILE_URL_REPLACEMENTS = [] + LINK_PATTERN = None + + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + + LOGIN_ACCOUNT = False + LOGIN_PREMIUM = False + + + #@TODO: Remove in 0.4.10 + def init(self): + account_name = (self.__name__ + ".py").replace("Folder.py", "").replace(".py", "") + account = self.core.accountManager.getAccountPlugin(account_name) + + if account and account.canUse(): + self.user, data = account.selectAccount() + self.req = account.getAccountRequest(self.user) + self.premium = account.isPremium(self.user) + + self.account = account + + + def prepare(self): + self.info = {} + self.links = [] #@TODO: Move to hoster class in 0.4.10 + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) + + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + def decrypt(self, pyfile): - pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) + self.prepare() + + self.preload() - self.html = self.load(pyfile.url, decode=True) + if self.html is None: + self.fail(_("No html retrieved")) - package_name, folder_name = self.getPackageNameAndFolder() + self.checkInfo() - self.package_links = self.getLinks() + self.links = self.getLinks() if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): self.handleMultiPages() - self.logDebug('Package has %d links' % len(self.package_links)) + self.logDebug("Package has %d links" % len(self.links)) - if self.package_links: - self.packages = [(package_name, self.package_links, folder_name)] + if self.links: + self.packages = [(self.info['name'], self.links, self.info['folder'])] + + + def checkNameSize(self, getinfo=True): + if getinfo: + self.updateInfo(self.getInfo(self.pyfile.url, self.html)) + + name = self.info['name'] + url = self.info['url'] + + if name and name != url: + self.pyfile.name = name else: - self.fail('Could not extract any links') + self.pyfile.name = self.info['name'] = urlparse(name).path.split('/')[-1] + + folder = self.info['folder'] = self.pyfile.name + + self.logDebug("File name: %s" % self.pyfile.name, + "File folder: %s" % folder) + def getLinks(self): """ @@ -81,26 +145,14 @@ class SimpleCrypter(Crypter): """ return re.findall(self.LINK_PATTERN, self.html) - def getPackageNameAndFolder(self): - if hasattr(self, 'TITLE_PATTERN'): - m = re.search(self.TITLE_PATTERN, self.html) - if m: - name = folder = html_unescape(m.group('title').strip()) - self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder)) - return name, folder - - name = self.pyfile.package().name - folder = self.pyfile.package().folder - self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) - return name, folder def handleMultiPages(self): - pages = re.search(self.PAGES_PATTERN, self.html) - if pages: - pages = int(pages.group('pages')) - else: + try: + m = re.search(self.PAGES_PATTERN, self.html) + pages = int(m.group(1)) + except: pages = 1 for p in xrange(2, pages + 1): self.html = self.loadPage(p) - self.package_links += self.getLinks() + self.links += self.getLinks() diff --git a/module/plugins/internal/SimpleDereferer.py b/module/plugins/internal/SimpleDereferer.py new file mode 100644 index 000000000..0ad1098f4 --- /dev/null +++ b/module/plugins/internal/SimpleDereferer.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import re + +from urllib import unquote + +from module.plugins.Crypter import Crypter +from module.plugins.internal.SimpleHoster import _isDirectLink, set_cookies + + +class SimpleDereferer(Crypter): + __name__ = "SimpleDereferer" + __type__ = "crypter" + __version__ = "0.01" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Simple dereferer plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: Regex to catch the redirect url in group(1) + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + OFFLINE_PATTERN: (optional) Checks if the page is unreachable + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + You can override the getLinks method if you need a more sophisticated way to extract the redirect url. + """ + + LINK_PATTERN = None + + TEXT_ENCODING = False + COOKIES = True + + + def decrypt(self, pyfile): + link = _isDirectLink(pyfile.url) + + if not link: + try: + link = unquote(re.match(self.__pattern__, pyfile.url).group('LINK')) + + except Exception: + self.prepare() + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkStatus() + + link = self.getLink() + + if link.strip(): + self.urls = [link.strip()] #@TODO: Remove `.strip()` in 0.4.10 + + + def prepare(self): + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def checkStatus(self): + if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): + self.offline() + + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): + self.tempOffline() + + + def getLink(self): + try: + return re.search(self.LINK_PATTERN, self.html).group(1) + + except Exception: + pass diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index f10433e78..d9732d063 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,36 +1,51 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: zoidberg -""" -from urlparse import urlparse import re + +from os.path import exists from time import time +from urllib import unquote +from urlparse import urljoin, urlparse -from module.plugins.Hoster import Hoster -from module.utils import html_unescape, fixup, parseFileSize -from module.network.RequestFactory import getURL +from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar +from module.network.RequestFactory import getURL +from module.plugins.Hoster import Hoster +from module.plugins.Plugin import Fail +from module.utils import fixup, fs_encode, parseFileSize + + +#@TODO: Adapt and move to PyFile in 0.4.10 +statusMap = dict((v, k) for k, v in _statusMap.iteritems()) + + +#@TODO: Remove in 0.4.10 and redirect to self.error instead +def _error(self, reason, type): + if not reason and not type: + type = "unknown" + + msg = _("%s error") % type.strip().capitalize() if type else _("Error") + msg += ": %s" % reason.strip() if reason else "" + msg += _(" | Plugin may be out of date") + + raise Fail(msg) + + +#@TODO: Remove in 0.4.10 +def _wait(self, seconds, reconnect): + if seconds: + self.setWait(int(seconds) + 1) + + if reconnect is not None: + self.wantReconnect = reconnect + + super(SimpleHoster, self).wait() def replace_patterns(string, ruleslist): for r in ruleslist: rf, rt = r string = re.sub(rf, rt, string) - #self.logDebug(rf, rt, string) return string @@ -46,23 +61,24 @@ def parseHtmlTagAttrValue(attr_name, tag): return m.group(2) if m else None -def parseHtmlForm(attr_str, html, input_names=None): - for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, +def parseHtmlForm(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, html, re.S | re.I): inputs = {} - action = parseHtmlTagAttrValue("action", form.group('tag')) - for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I): + action = parseHtmlTagAttrValue("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): name = parseHtmlTagAttrValue("name", inputtag.group(1)) if name: value = parseHtmlTagAttrValue("value", inputtag.group(1)) - if value is None: + if not value: inputs[name] = inputtag.group(3) or '' else: inputs[name] = value - if isinstance(input_names, dict): + if input_names: # check input attributes - for key, val in input_names.items(): + for key, val in input_names.iteritems(): if key in inputs: if isinstance(val, basestring) and inputs[key] == val: continue @@ -70,219 +86,535 @@ def parseHtmlForm(attr_str, html, input_names=None): continue elif hasattr(val, "search") and re.match(val, inputs[key]): continue - break # attibute value does not match + break #: attibute value does not match else: - break # attibute name does not match + break #: attibute name does not match else: - return action, inputs # passed attribute check + return action, inputs #: passed attribute check else: # no attribute check return action, inputs - return {}, None # no matching form found + return {}, None #: no matching form found -def parseFileInfo(self, url='', html=''): - info = {"name": url, "size": 0, "status": 3} +#: Deprecated +def parseFileInfo(plugin, url="", html=""): + if hasattr(plugin, "getInfo"): + info = plugin.getInfo(url, html) + res = info['name'], info['size'], info['status'], info['url'] + else: + res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url - if hasattr(self, "pyfile"): - url = self.pyfile.url + return res - if hasattr(self, "req") and self.req.http.code == '404': - info['status'] = 1 + +#@TODO: Remove in 0.4.10 +#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 +def create_getInfo(plugin): + if hasattr(plugin, "parseInfos"): + fn = lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)] else: - if not html and hasattr(self, "html"): - html = self.html - if isinstance(self.SH_BROKEN_ENCODING, (str, unicode)): - html = unicode(html, self.SH_BROKEN_ENCODING) - if hasattr(self, "html"): - self.html = html - - if hasattr(self, "FILE_OFFLINE_PATTERN") and re.search(self.FILE_OFFLINE_PATTERN, html): - # File offline - info['status'] = 1 - else: - online = False - try: - info.update(re.match(self.__pattern__, url).groupdict()) - except: - pass + fn = lambda urls: [parseFileInfo(url) for url in urls] - for pattern in ("FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): - try: - info.update(re.search(getattr(self, pattern), html).groupdict()) - online = True - except AttributeError: - continue + return fn - if online: - # File online, return name and size - info['status'] = 2 - if 'N' in info: - info['name'] = replace_patterns(info['N'], self.FILE_NAME_REPLACEMENTS) - if 'S' in info: - size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], - self.FILE_SIZE_REPLACEMENTS) - info['size'] = parseFileSize(size) - elif isinstance(info['size'], (str, unicode)): - if 'units' in info: - info['size'] += info['units'] - info['size'] = parseFileSize(info['size']) - if hasattr(self, "file_info"): - self.file_info = info +def timestamp(): + return int(time() * 1000) - return info['name'], info['size'], info['status'], url +#@TODO: Move to hoster class in 0.4.10 +def _isDirectLink(self, url, resumable=False): + link = "" -def create_getInfo(plugin): - def getInfo(urls): - for url in urls: - cj = CookieJar(plugin.__name__) - if isinstance(plugin.SH_COOKIES, list): - set_cookies(cj, plugin.SH_COOKIES) - file_info = parseFileInfo(plugin, url, getURL(replace_patterns(url, plugin.FILE_URL_REPLACEMENTS), - decode=not plugin.SH_BROKEN_ENCODING, cookies=cj)) - yield file_info + for i in xrange(5 if resumable else 1): + header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) - return getInfo + if 'content-disposition' in header or 'content-length' in header: + link = url + elif 'location' in header and header['location']: + location = header['location'] -def timestamp(): - return int(time() * 1000) + if not urlparse(location).scheme: + p = urlparse(url) + base = "%s://%s" % (p.scheme, p.netloc) + location = urljoin(base, location) + if 'code' in header and header['code'] == 302: + link = location -class PluginParseError(Exception): - def __init__(self, msg): - Exception.__init__(self) - self.value = 'Parse error (%s) - plugin may be out of date' % msg + elif resumable: + url = location + self.logDebug("Redirect #%d to: %s" % (++i, location)) + continue - def __str__(self): - return repr(self.value) + elif 'content-type' in header and header['content-type' ] and "html" not in header['content-type']: + link = url + + break + else: + self.logError(_("Too many redirects")) + + return link + + +def secondsToMidnight(gmt=0): + now = datetime.utcnow() + timedelta(hours=gmt) + + if now.hour is 0 and now.minute < 10: + midnight = now + else: + midnight = now + timedelta(days=1) + + td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now + + if hasattr(td, 'total_seconds'): + res = td.total_seconds() + else: #@NOTE: work-around for python 2.5 and 2.6 missing timedelta.total_seconds + res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + return int(res) class SimpleHoster(Hoster): - __name__ = "SimpleHoster" - __version__ = "0.33" - __pattern__ = None - __type__ = "hoster" + __name__ = "SimpleHoster" + __type__ = "hoster" + __version__ = "0.82" + + __pattern__ = r'^unmatchable$' + __description__ = """Simple hoster plugin""" - __author_name__ = ("zoidberg", "stickell") - __author_mail__ = ("zoidberg@mujmail.cz", "l.stickell@yahoo.it") + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + """ - These patterns should be defined by each hoster: - FILE_INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>units)' - or FILE_NAME_PATTERN = r'(?P<N>file_name)' - and FILE_SIZE_PATTERN = r'(?P<S>file_size) (?P<U>units)' - FILE_OFFLINE_PATTERN = r'File (deleted|not found)' - TEMP_OFFLINE_PATTERN = r'Server maintainance' - - You can also define a PREMIUM_ONLY_PATTERN to detect links that can be downloaded only with a premium account. + Info patterns should be defined by each hoster: + + INFO_PATTERN: (optional) Name and Size of the file + example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' + or + NAME_PATTERN: (optional) Name that will be set for the file + example: NAME_PATTERN = r'(?P<N>file_name)' + SIZE_PATTERN: (optional) Size that will be checked for the file + example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' + + HASHSUM_PATTERN: (optional) Hash code and type of the file + example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' + + OFFLINE_PATTERN: (optional) Check if the page is unreachable + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable + example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' + + + Error handling patterns are all optional: + + WAIT_PATTERN: (optional) Detect waiting time + example: WAIT_PATTERN = r'' + + PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account + example: PREMIUM_ONLY_PATTERN = r'Premium account required' + + ERROR_PATTERN: (optional) Detect any error preventing download + example: ERROR_PATTERN = r'' + + + Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' """ - FILE_SIZE_REPLACEMENTS = [] - FILE_NAME_REPLACEMENTS = [("&#?\w+;", fixup)] - FILE_URL_REPLACEMENTS = [] + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + SIZE_REPLACEMENTS = [] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False + MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + + + @classmethod + def parseInfos(cls, urls): + for url in urls: + url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + yield cls.getInfo(url) + + + @classmethod + def getInfo(cls, url="", html=""): + info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url} + online = False + + try: + info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api'] + except Exception: + pass + + if not html: + try: + if not url: + info['error'] = "missing url" + info['status'] = 1 + raise + + if _isDirectLink(url): + info['error'] = "direct link" + info['status'] = 2 + raise + + try: + html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) + + if isinstance(cls.TEXT_ENCODING, basestring): + html = unicode(html, cls.TEXT_ENCODING) + + except BadHeader, e: + info['error'] = "%d: %s" % (e.code, e.content) + + if e.code is 404: + info['status'] = 1 + raise + + if e.code is 503: + info['status'] = 6 + raise + except: + return info + + if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): + info['status'] = 1 + + elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 + info['status'] = 1 + + elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 + + else: + if not 'pattern' in info: + info['pattern'] = {} + + for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", + "FILE_NAME_PATTERN", "NAME_PATTERN", + "FILE_SIZE_PATTERN", "SIZE_PATTERN", + "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 + try: + attr = getattr(cls, pattern) + pdict = re.search(attr, html).groupdict() + + if all(True for k in pdict if k not in info['pattern']): + info['pattern'].update(pdict) + + except AttributeError: + continue + + else: + online = True + + if not info['pattern']: + info.pop('pattern', None) + + if online: + info['status'] = 2 + + if 'N' in info['pattern']: + info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), + cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 + + if 'S' in info['pattern']: + size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], + cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10 + info['size'] = parseFileSize(size) + + elif isinstance(info['size'], basestring): + unit = info['units'] if 'units' in info else None + info['size'] = parseFileSize(info['size'], unit) + + if 'H' in info['pattern']: + hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" + info[hashtype] = info['pattern']['H'] - SH_BROKEN_ENCODING = False # Set to True or encoding name if encoding in http header is not correct - SH_COOKIES = True # or False or list of tuples [(domain, name, value)] - SH_CHECK_TRAFFIC = False # True = force check traffic left for a premium account + return info - def init(self): - self.file_info = {} def setup(self): self.resumeDownload = self.multiDL = self.premium - if isinstance(self.SH_COOKIES, list): - set_cookies(self.req.cj, self.SH_COOKIES) - def process(self, pyfile): - pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) + + def prepare(self): + self.info = {} + self.link = "" #@TODO: Move to hoster class in 0.4.10 + self.directDL = False #@TODO: Move to hoster class in 0.4.10 + self.multihost = False #@TODO: Move to hoster class in 0.4.10 + self.req.setOption("timeout", 120) - # Due to a 0.4.9 core bug self.load would keep previous cookies even if overridden by cookies parameter. - # Workaround using getURL. Can be reverted in 0.5 as the cookies bug has been fixed. - self.html = getURL(pyfile.url, decode=not self.SH_BROKEN_ENCODING, cookies=self.SH_COOKIES) - premium_only = hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html) - if not premium_only: # Usually premium only pages doesn't show the file information - self.getFileInfo() - - if self.premium and (not self.SH_CHECK_TRAFFIC or self.checkTrafficLeft()): - self.handlePremium() - elif premium_only: - self.fail("This link require a premium account") + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + if (self.MULTI_HOSTER + and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] + or re.match(self.__pattern__, self.pyfile.url) is None)): + self.multihost = True + return + + if self.DIRECT_LINK is None: + self.directDL = bool(self.account) else: - # This line is required due to the getURL workaround. Can be removed in 0.5 - self.html = self.load(pyfile.url, decode=not self.SH_BROKEN_ENCODING, cookies=self.SH_COOKIES) - self.handleFree() + self.directDL = self.DIRECT_LINK - def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): - if type(url) == unicode: - url = url.encode('utf8') - return Hoster.load(self, url=url, get=get, post=post, ref=ref, cookies=cookies, - just_header=just_header, decode=decode) + self.pyfile.url = replace_patterns(self.pyfile.url, + self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def process(self, pyfile): + self.prepare() + self.checkInfo() + + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect() + + if self.multihost and not self.link and not self.lastDownload: + self.logDebug("Looking for leeched download link...") + self.handleMulti() + + if not self.link and not self.lastDownload: + self.MULTI_HOSTER = False + self.retry(1, reason="Multi hoster fails") + + if not self.link and not self.lastDownload: + self.preload() + self.checkInfo() + + if self.html is None: + self.fail(_("No html retrieved")) + + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium() + + else: + self.logDebug("Handled as free download") + self.handleFree() + + self.downloadLink(self.link) + self.checkFile() + + + def downloadLink(self, link): + if link and isinstance(link, basestring): + self.correctCaptcha() + self.download(link, disposition=True) + + + def checkFile(self): + if self.cTask and not self.lastDownload: + self.invalidCaptcha() + self.retry(10, reason=_("Wrong captcha")) + + elif not self.lastDownload or not exists(fs_encode(self.lastDownload)): + errmsg = _("No file downloaded") + if 'error' in self.info: + self.fail(errmsg, self.info['error']) + else: + self.fail(errmsg) + + else: + rules = {'empty file': re.compile(r"^$")} + + if hasattr(self, 'ERROR_PATTERN'): + rules['error'] = re.compile(self.ERROR_PATTERN) + + check = self.checkDownload(rules) + if check: #@TODO: Move to hoster in 0.4.10 + errmsg = check.strip().capitalize() + (" | " + self.lastCheck.strip() if self.lastCheck else "") + self.retry(10, 60, errmsg) - def getFileInfo(self): - self.logDebug("URL: %s" % self.pyfile.url) - if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): - self.tempOffline() - name, size, status = parseFileInfo(self)[:3] + def checkErrors(self): + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("Link require a premium account to be handled")) - if status == 1: + if hasattr(self, 'ERROR_PATTERN'): + m = re.search(self.ERROR_PATTERN, self.html) + if m: + errmsg = self.info['error'] = m.group(1) + self.error(errmsg) + + if hasattr(self, 'WAIT_PATTERN'): + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)]) + self.wait(wait_time, wait_time > 300) + return + + self.info.pop('error', None) + + + def checkStatus(self, getinfo=True): + if getinfo: + self.updateInfo(self.getInfo(self.pyfile.url, self.html)) + + status = self.info['status'] + + if status is 1: self.offline() - elif status != 2: - self.logDebug(self.file_info) - self.parseError('File info') - if name: + elif status is 6: + self.tempOffline() + + elif status is not 2: + self.logDebug("File status: %s" % statusMap[status], + "File info: %s" % self.info) + + + def checkNameSize(self, getinfo=True): + if getinfo: + self.updateInfo(self.getInfo(self.pyfile.url, self.html)) + + name = self.info['name'] + size = self.info['size'] + url = self.info['url'] + + if name and name != url: self.pyfile.name = name else: - self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] - if size: + if size > 0: self.pyfile.size = size else: - self.logError("File size not parsed") + size = "Unknown" + + self.logDebug("File name: %s" % name, + "File size: %s" % size) + + + def checkInfo(self): + self.checkNameSize() + + if self.html: + self.checkErrors() + + self.checkNameSize() + self.checkStatus(getinfo=False) + + + #: Deprecated + def getFileInfo(self): + self.info = {} + self.checkInfo() + return self.info + + + def updateInfo(self, info): + self.logDebug(_("File info (BEFORE): %s") % self.info) + self.info.update(info) + self.logDebug(_("File info (AFTER): %s") % self.info) + + + def handleDirect(self): + link = _isDirectLink(self, self.pyfile.url, self.resumeDownload) + + if link: + self.logInfo(_("Direct download link detected")) + + self.link = link + else: + self.logDebug(_("Direct download link not found")) + + + def handleMulti(self): #: Multi-hoster handler + pass - self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) - return self.file_info def handleFree(self): - self.fail("Free download not implemented") + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.fail(_("Free download not implemented")) + + try: + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(e) + def handlePremium(self): - self.fail("Premium download not implemented") + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.fail(_("Premium download not implemented")) + + try: + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(e) - def parseError(self, msg): - raise PluginParseError(msg) def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): - time_str = "%dh %dm" % divmod(wait_time / 60, 60) + time_str = "%dh %dm" % divmod(wait_time / 60, 60) else: wait_time = 900 - time_str = "(unknown time)" + time_str = _("(unknown time)") max_tries = 100 - self.logInfo("Download limit reached, reconnect or wait %s" % time_str) + self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) self.setWait(wait_time, True) self.wait() - self.retry(max_tries=max_tries, reason="Download limit reached") + self.retry(max_tries=max_tries, reason=_("Download limit reached")) - def parseHtmlForm(self, attr_str='', input_names=None): + + def parseHtmlForm(self, attr_str="", input_names={}): return parseHtmlForm(attr_str, self.html, input_names) + def checkTrafficLeft(self): - traffic = self.account.getAccountInfo(self.user, True)["trafficleft"] - if traffic == -1: + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: return True - size = self.pyfile.size / 1024 - self.logInfo("Filesize: %i KiB, Traffic left for user %s: %i KiB" % (size, self.user, traffic)) - return size <= traffic - - # TODO: Remove in 0.5 - def wait(self, seconds=False, reconnect=False): - if seconds: - self.setWait(seconds, reconnect) - super(SimpleHoster, self).wait() + else: + size = self.pyfile.size / 1024 + self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) + return size <= traffic + + + #@TODO: Remove in 0.4.10 + def wait(self, seconds=0, reconnect=None): + return _wait(self, seconds, reconnect) + + + def error(self, reason="", type="parse"): + return _error(self, reason, type) diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index e3765602b..572fe95b9 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -1,132 +1,163 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - import os import re -from os.path import join + from glob import glob -from subprocess import Popen, PIPE +from os.path import basename, dirname, join from string import digits +from subprocess import Popen, PIPE +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError from module.utils import save_join, decode -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError -class UnRar(AbtractExtractor): - __name__ = "UnRar" - __version__ = "0.16" +def renice(pid, value): + if os.name != "nt" and value: + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except: + print "Renice failed" + + +class UnRar(Extractor): + __name__ = "UnRar" + __version__ = "1.01" + + __description__ = """Rar extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + - # there are some more uncovered rar formats - re_version = re.compile(r"(UNRAR 5[\.\d]+(.*?)freeware)") - re_splitfile = re.compile(r"(.*)\.part(\d+)\.rar$", re.I) - re_partfiles = re.compile(r".*\.(rar|r[0-9]+)", re.I) - re_filelist = re.compile(r"(.+)\s+(\d+)\s+(\d+)\s+") - re_filelist5 = re.compile(r"(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)") - re_wrongpwd = re.compile("(Corrupt file or wrong password|password incorrect)", re.I) CMD = "unrar" - @staticmethod - def checkDeps(): + EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"] + + + #@NOTE: there are some more uncovered rar formats + re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) + re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I) + + re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') + re_wrongpwd = re.compile(r'password', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) + + + @classmethod + def checkDeps(cls): if os.name == "nt": - UnRar.CMD = join(pypath, "UnRAR.exe") - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + cls.CMD = join(pypath, "UnRAR.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() else: try: - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() - except OSError: + except OSError: # fallback to rar - UnRar.CMD = "rar" - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + cls.CMD = "rar" + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() return True - @staticmethod - def getTargets(files_ids): - result = [] + + @classmethod + def isArchive(cls, file): + f = basename(file).lower() + return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS) + + + @classmethod + def getTargets(cls, files_ids): + targets = [] for file, id in files_ids: - if not file.endswith(".rar"): + if not cls.isArchive(file): continue - match = UnRar.re_splitfile.findall(file) - if match: + m = cls.re_rarpart.findall(file) + if m: # only add first parts - if int(match[0][1]) == 1: - result.append((file, id)) + if int(m[0][1]) == 1: + targets.append((file, id)) else: - result.append((file, id)) + targets.append((file, id)) - return result + return targets - def init(self): - self.passwordProtected = False - self.headerProtected = False #: list files will not work without password - self.smallestFile = None #: small file to test passwords - self.password = "" #: save the correct password - def checkArchive(self): - p = self.call_unrar("l", "-v", self.file) - out, err = p.communicate() - if self.re_wrongpwd.search(err): - self.passwordProtected = True - self.headerProtected = True - return True + def check(self, out="", err=""): + if not out or not err: + return + + if err.strip(): + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError + + else: #: raise error if anything is on stderr + raise ArchiveError(err.strip()) # output only used to check if passworded files are present - if self.re_version.search(out): - for attr, size, name in self.re_filelist5.findall(out): - if attr.startswith("*"): - self.passwordProtected = True - return True - else: - for name, size, packed in self.re_filelist.findall(out): - if name.startswith("*"): - self.passwordProtected = True - return True + for attr in self.re_filelist.findall(out): + if attr[0].startswith("*"): + raise PasswordError + + + def verify(self): + p = self.call_cmd("l", "-v", self.file, password=self.password) + + self.check(*p.communicate()) + + if p and p.returncode: + raise ArchiveError("Process terminated") - self.listContent() - if not self.files: - raise ArchiveError("Empty Archive") + if not self.list(): + raise ArchiveError("Empty archive") + + + def isPassword(self, password): + if isinstance(password, basestring): + p = self.call_cmd("l", "-v", self.file, password=password) + out, err = p.communicate() + + if not self.re_wrongpwd.search(err): + return True return False - def checkPassword(self, password): - # at this point we can only verify header protected files - if self.headerProtected: - p = self.call_unrar("l", "-v", self.file, password=password) + + def repair(self): + p = self.call_cmd("rc", self.file) + out, err = p.communicate() + + if p.returncode or err.strip(): + p = self.call_cmd("r", self.file) out, err = p.communicate() - if self.re_wrongpwd.search(err): + + if p.returncode or err.strip(): return False + else: + self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0)) return True - def extract(self, progress, password=None): + + def extract(self, progress=lambda x: None): + self.verify() + + progress(0) + command = "x" if self.fullpath else "e" - p = self.call_unrar(command, self.file, self.out, password=password) + p = self.call_cmd(command, self.file, self.out, password=self.password) + renice(p.pid, self.renice) - progress(0) progressstring = "" while True: c = p.stdout.read(1) @@ -134,7 +165,7 @@ class UnRar(AbtractExtractor): if not c: break # reading a percentage sign -> set progress and restart - if c == '%': + if c is '%': progress(int(progressstring)) progressstring = "" # not reading a digit -> therefore restart @@ -142,42 +173,43 @@ class UnRar(AbtractExtractor): progressstring = "" # add digit to progressstring else: - progressstring = progressstring + c + progressstring += c + progress(100) + self.files = self.list() + # retrieve stderr - err = p.stderr.read() - - if "CRC failed" in err and not password and not self.passwordProtected: - raise CRCError - elif "CRC failed" in err: - raise WrongPassword - if err.strip(): #: raise error if anything is on stderr - raise ArchiveError(err.strip()) + self.check(err=p.stderr.read()) + if p.returncode: raise ArchiveError("Process terminated") - if not self.files: - self.password = password - self.listContent() def getDeleteFiles(self): - if ".part" in self.file: - return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.IGNORECASE)) + if ".part" in basename(self.file): + return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + # get files which matches .r* and filter unsuited files out - parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.IGNORECASE)) - return filter(lambda x: self.re_partfiles.match(x), parts) + parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) + + return filter(lambda x: self.re_rarfile.match(x), parts) + - def listContent(self): + def list(self): command = "vb" if self.fullpath else "lb" - p = self.call_unrar(command, "-v", self.file, password=self.password) + + p = self.call_cmd(command, "-v", self.file, password=self.password) out, err = p.communicate() - if "Cannot open" in err: - raise ArchiveError("Cannot open file") + if err.strip(): + self.m.logError(err) + if "Cannot open" in err: + return list() - if err.strip(): #: only log error at this point - self.m.logError(err.strip()) + if p.returncode: + self.m.logError("Process terminated") + return list() result = set() @@ -185,38 +217,37 @@ class UnRar(AbtractExtractor): f = f.strip() result.add(save_join(self.out, f)) - self.files = result + return list(result) - def call_unrar(self, command, *xargs, **kwargs): + + def call_cmd(self, command, *xargs, **kwargs): args = [] + # overwrite flag - args.append("-o+") if self.overwrite else args.append("-o-") + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") - if self.excludefiles: - for word in self.excludefiles.split(';'): - args.append("-x%s" % word) + for word in self.excludefiles: + args.append("-x%s" % word.strip()) # assume yes on all queries args.append("-y") # set a password - if "password" in kwargs and kwargs["password"]: - args.append("-p%s" % kwargs["password"]) + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) else: args.append("-p-") + if self.keepbroken: + args.append("-kb") + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) self.m.logDebug(" ".join(call)) - p = Popen(call, stdout=PIPE, stderr=PIPE) - - return p - - -def renice(pid, value): - if os.name != "nt" and value: - try: - Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) - except: - print "Renice failed" + return Popen(call, stdout=PIPE, stderr=PIPE) diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 501962442..5ec56cbdf 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,50 +1,86 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +from __future__ import with_statement - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import sys +import zipfile - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError - @author: RaNaN -""" -import zipfile -import sys +class UnZip(Extractor): + __name__ = "UnZip" + __version__ = "1.01" + + __description__ = """Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] -from module.plugins.internal.AbstractExtractor import AbtractExtractor + EXTENSIONS = ["zip", "zip64"] -class UnZip(AbtractExtractor): - __name__ = "UnZip" - __version__ = "0.1" - @staticmethod - def checkDeps(): + @classmethod + def checkDeps(cls): return sys.version_info[:2] >= (2, 6) - @staticmethod - def getTargets(files_ids): - result = [] - for file, id in files_ids: - if file.endswith(".zip"): - result.append((file, id)) + @classmethod + def isArchive(cls, file): + return zipfile.is_zipfile(file) + + + def verify(self): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + badcrc = z.testzip() + + except (BadZipfile, LargeZipFile), e: + raise ArchiveError(e) + + except RuntimeError, e: + if 'encrypted' in e: + raise PasswordError + else: + raise ArchiveError(e) + + else: + if badcrc: + raise CRCError + + if not self.list(): + raise ArchiveError("Empty archive") + + + def list(self): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + return z.namelist() + except Exception: + return list() + + + def extract(self, progress=lambda x: None): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + progress(0) + z.extractall(self.out, pwd=self.password) + progress(100) + + except (BadZipfile, LargeZipFile), e: + raise ArchiveError(e) + + except RuntimeError, e: + if e is "Bad password for file": + raise PasswordError + else: + raise ArchiveError(e) - return result + finally: + self.files = self.list() - def extract(self, progress, password=None): - z = zipfile.ZipFile(self.file) - self.files = z.namelist() - z.extractall(self.out) def getDeleteFiles(self): return [self.file] diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py new file mode 100644 index 000000000..2784ecd0b --- /dev/null +++ b/module/plugins/internal/XFSAccount.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +import re + +from time import gmtime, mktime, strptime +from urlparse import urljoin + +from module.plugins.Account import Account +from module.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies + + +class XFSAccount(Account): + __name__ = "XFSAccount" + __type__ = "account" + __version__ = "0.33" + + __description__ = """XFileSharing account plugin""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_URL = None + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + PREMIUM_PATTERN = r'\(Premium only\)' + + VALID_UNTIL_PATTERN = r'Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})' + + TRAFFIC_LEFT_PATTERN = r'Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' + TRAFFIC_LEFT_UNIT = "MB" #: used only if no group <U> was found + + LEECH_TRAFFIC_PATTERN = r'Leech Traffic left:<b>.*?(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' + LEECH_TRAFFIC_UNIT = "MB" #: used only if no group <U> was found + + LOGIN_FAIL_PATTERN = r'>\s*(Incorrect Login or Password|Error<)' + + + def __init__(self, manager, accounts): #@TODO: remove in 0.4.10 + self.init() + return super(XFSAccount, self).__init__(manager, accounts) + + + def init(self): + if not self.HOSTER_DOMAIN: + self.logError(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN or "" + + + def loadAccountInfo(self, user, req): + validuntil = None + trafficleft = None + leechtraffic = None + premium = None + + html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) + + premium = True if re.search(self.PREMIUM_PATTERN, html) else False + + m = re.search(self.VALID_UNTIL_PATTERN, html) + if m: + expiredate = m.group(1).strip() + self.logDebug("Expire date: " + expiredate) + + try: + validuntil = mktime(strptime(expiredate, "%d %B %Y")) + + except Exception, e: + self.logError(e) + + else: + self.logDebug("Valid until: %s" % validuntil) + + if validuntil > mktime(gmtime()): + premium = True + trafficleft = -1 + else: + premium = False + validuntil = None #: registered account type (not premium) + else: + self.logDebug("VALID_UNTIL_PATTERN not found") + + m = re.search(self.TRAFFIC_LEFT_PATTERN, html) + if m: + try: + traffic = m.groupdict() + size = traffic['S'] + + if "nlimited" in size: + trafficleft = -1 + if validuntil is None: + validuntil = -1 + else: + if 'U' in traffic: + unit = traffic['U'] + elif isinstance(self.TRAFFIC_LEFT_UNIT, basestring): + unit = self.TRAFFIC_LEFT_UNIT + else: + unit = "" + + trafficleft = self.parseTraffic(size + unit) + + except Exception, e: + self.logError(e) + else: + self.logDebug("TRAFFIC_LEFT_PATTERN not found") + + leech = [m.groupdict() for m in re.finditer(self.LEECH_TRAFFIC_PATTERN, html)] + if leech: + leechtraffic = 0 + try: + for traffic in leech: + size = traffic['S'] + + if "nlimited" in size: + leechtraffic = -1 + if validuntil is None: + validuntil = -1 + break + else: + if 'U' in traffic: + unit = traffic['U'] + elif isinstance(self.LEECH_TRAFFIC_UNIT, basestring): + unit = self.LEECH_TRAFFIC_UNIT + else: + unit = "" + + leechtraffic += self.parseTraffic(size + unit) + + except Exception, e: + self.logError(e) + else: + self.logDebug("LEECH_TRAFFIC_PATTERN not found") + + return {'validuntil': validuntil, 'trafficleft': trafficleft, 'leechtraffic': leechtraffic, 'premium': premium} + + + def login(self, user, data, req): + if isinstance(self.COOKIES, list): + set_cookies(req.cj, self.COOKIES) + + url = urljoin(self.HOSTER_URL, "login.html") + html = req.load(url, decode=True) + + action, inputs = parseHtmlForm('name="FL"', html) + if not inputs: + inputs = {'op': "login", + 'redirect': self.HOSTER_URL} + + inputs.update({'login': user, + 'password': data['password']}) + + html = req.load(self.HOSTER_URL, post=inputs, decode=True) + + if re.search(self.LOGIN_FAIL_PATTERN, html): + self.wrongPassword() diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py new file mode 100644 index 000000000..4b57dab90 --- /dev/null +++ b/module/plugins/internal/XFSCrypter.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.SimpleCrypter import SimpleCrypter, create_getInfo + + +class XFSCrypter(SimpleCrypter): + __name__ = "XFSCrypter" + __type__ = "crypter" + __version__ = "0.05" + + __pattern__ = r'^unmatchable$' + + __description__ = """XFileSharing decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + LINK_PATTERN = r'<(?:td|TD).*?>\s*<a href="(.+?)".*?>.+?(?:</a>)?\s*</(?:td|TD)>' + NAME_PATTERN = r'<[tT]itle>.*?\: (?P<N>.+) folder</[tT]itle>' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py new file mode 100644 index 000000000..f2168d0c7 --- /dev/null +++ b/module/plugins/internal/XFSHoster.py @@ -0,0 +1,341 @@ +# -*- coding: utf-8 -*- + +import re + +from random import random +from time import sleep + +from pycurl import FOLLOWLOCATION, LOW_SPEED_TIME + +from module.plugins.internal.CaptchaService import ReCaptcha, SolveMedia +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, secondsToMidnight +from module.utils import html_unescape + + +class XFSHoster(SimpleHoster): + __name__ = "XFSHoster" + __type__ = "hoster" + __version__ = "0.33" + + __pattern__ = r'^unmatchable$' + + __description__ = """XFileSharing hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + TEXT_ENCODING = False + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + DIRECT_LINK = None + MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + + NAME_PATTERN = r'(Filename[ ]*:[ ]*</b>(</td><td nowrap>)?|name="fname"[ ]+value="|<[\w^_]+ class="(file)?name">)\s*(?P<N>.+?)(\s*<|")' + SIZE_PATTERN = r'(Size[ ]*:[ ]*</b>(</td><td>)?|File:.*>|</font>\s*\(|<[\w^_]+ class="size">)\s*(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' + + WAIT_PATTERN = r'<span id="countdown_str">.*?>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' + PREMIUM_ONLY_PATTERN = r'>This file is available for Premium Users only' + ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' + + LINK_LEECH_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' + LINK_PATTERN = None #: final download url pattern + + CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' + CAPTCHA_BLOCK_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>' + RECAPTCHA_PATTERN = None + SOLVEMEDIA_PATTERN = None + + FORM_PATTERN = None + FORM_INPUTS_MAP = None #: dict passed as input_names to parseHtmlForm + + + def setup(self): + self.chunkLimit = 1 + self.resumeDownload = self.multiDL = self.premium + + + def prepare(self): + """ Initialize important variables """ + if not self.HOSTER_DOMAIN: + self.fail(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_NAME: + self.HOSTER_NAME = "".join([str.capitalize() for str in self.HOSTER_DOMAIN.split('.')]) + + if not self.LINK_PATTERN: + pattern = r'(https?://(www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' + self.LINK_PATTERN = pattern % self.HOSTER_DOMAIN.replace('.', '\.') + + self.captcha = None + self.errmsg = None + self.passwords = self.getPassword().splitlines() + + super(XFSHoster, self).prepare() + + if self.DIRECT_LINK is None: + self.directDL = bool(self.premium) + + + def handleFree(self): + link = self.getDownloadLink() + + if link: + if self.captcha: + self.correctCaptcha() + + self.download(link, ref=True, cookies=True, disposition=True) + + elif self.errmsg: + if 'captcha' in self.errmsg: + self.fail(_("No valid captcha code entered")) + else: + self.fail(self.errmsg) + + else: + self.fail(_("Download link not found")) + + + def handlePremium(self): + return self.handleFree() + + + def getDownloadLink(self): + for i in xrange(1, 6): + self.logDebug("Getting download link: #%d" % i) + + self.checkErrors() + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + + data = self.getPostParameters() + + self.req.http.c.setopt(FOLLOWLOCATION, 0) + + self.html = self.load(self.pyfile.url, post=data, ref=True, decode=True) + + self.req.http.c.setopt(FOLLOWLOCATION, 1) + + m = re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I) + if m and not "op=" in m.group(1): + break + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + else: + self.logError(data['op'] if 'op' in data else _("UNKNOWN")) + return "" + + self.errmsg = None + + return m.group(1).strip() #@TODO: Remove .strip() in 0.4.10 + + + def handleMulti(self): + if not self.account: + self.fail(_("Only registered or premium users can use url leech feature")) + + #only tested with easybytez.com + self.html = self.load("http://www.%s/" % self.HOSTER_DOMAIN) + + action, inputs = self.parseHtmlForm() + + upload_id = "%012d" % int(random() * 10 ** 12) + action += upload_id + "&js_on=1&utype=prem&upload_type=url" + + inputs['tos'] = '1' + inputs['url_mass'] = self.pyfile.url + inputs['up1oad_type'] = 'url' + + self.logDebug(action, inputs) + + self.req.setOption("timeout", 600) #: wait for file to upload to easybytez.com + + self.html = self.load(action, post=inputs) + + self.checkErrors() + + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + stmsg = inputs['st'] + + if stmsg == 'OK': + self.html = self.load(action, post=inputs) + + elif 'Can not leech file' in stmsg: + self.retry(20, 3 * 60, _("Can not leech file")) + + elif 'today' in stmsg: + self.retry(wait_time=secondsToMidnight(gmt=2), reason=_("You've used all Leech traffic today")) + + else: + self.fail(stmsg) + + #get easybytez.com link for uploaded file + m = re.search(self.LINK_LEECH_PATTERN, self.html) + if m is None: + self.error(_("LINK_LEECH_PATTERN not found")) + + header = self.load(m.group(1), just_header=True, decode=True) + + if 'location' in header: #: Direct download link + self.link = header['location'] + else: + self.fail(_("Download link not found")) + + + def checkErrors(self): + m = re.search(self.ERROR_PATTERN, self.html) + if m is None: + self.errmsg = None + else: + self.errmsg = m.group(1).strip() + + self.logWarning(re.sub(r"<.*?>", " ", self.errmsg)) + + if 'wait' in self.errmsg: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)]) + self.wait(wait_time, True) + + elif 'country' in self.errmsg: + self.fail(_("Downloads are disabled for your country")) + + elif 'captcha' in self.errmsg: + self.invalidCaptcha() + + elif 'premium' in self.errmsg and 'require' in self.errmsg: + self.fail(_("File can be downloaded by premium users only")) + + elif 'limit' in self.errmsg: + if 'days' in self.errmsg: + delay = secondsToMidnight(gmt=2) + retries = 3 + else: + delay = 1 * 60 * 60 + retries = 24 + + self.wantReconnect = True + self.retry(retries, delay, _("Download limit exceeded")) + + elif 'countdown' in self.errmsg or 'Expired' in self.errmsg: + self.retry(reason=_("Link expired")) + + elif 'maintenance' in self.errmsg or 'maintainance' in self.errmsg: + self.tempOffline() + + elif 'up to' in self.errmsg: + self.fail(_("File too large for free download")) + + else: + self.wantReconnect = True + self.retry(wait_time=60, reason=self.errmsg) + + if self.errmsg: + self.info['error'] = self.errmsg + else: + self.info.pop('error', None) + + + def getPostParameters(self): + if self.FORM_PATTERN or self.FORM_INPUTS_MAP: + action, inputs = self.parseHtmlForm(self.FORM_PATTERN or "", self.FORM_INPUTS_MAP or {}) + else: + action, inputs = self.parseHtmlForm(input_names={'op': re.compile(r'^download')}) + + if not inputs: + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + if 'op' in inputs: + if "password" in inputs: + if self.passwords: + inputs['password'] = self.passwords.pop(0) + else: + self.fail(_("Missing password")) + + if not self.premium: + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = int(m.group(1)) + self.setWait(wait_time, False) + + self.captcha = self.handleCaptcha(inputs) + + self.wait() + else: + inputs['referer'] = self.pyfile.url + + if self.premium: + inputs['method_premium'] = "Premium Download" + inputs.pop('method_free', None) + else: + inputs['method_free'] = "Free Download" + inputs.pop('method_premium', None) + + return inputs + + + def handleCaptcha(self, inputs): + m = re.search(self.CAPTCHA_PATTERN, self.html) + if m: + captcha_url = m.group(1) + inputs['code'] = self.decryptCaptcha(captcha_url) + return 1 + + m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) + if m: + captcha_div = m.group(1) + numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) + self.logDebug(captcha_div) + inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + self.logDebug("Captcha code: %s" % inputs['code'], numerals) + return 2 + + recaptcha = ReCaptcha(self) + try: + captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) + except: + captcha_key = recaptcha.detect_key() + else: + self.logDebug("ReCaptcha key: %s" % captcha_key) + + if captcha_key: + inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + return 3 + + solvemedia = SolveMedia(self) + try: + captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) + except: + captcha_key = solvemedia.detect_key() + else: + self.logDebug("SolveMedia key: %s" % captcha_key) + + if captcha_key: + inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) + return 4 + + return 0 diff --git a/module/plugins/internal/XFSPAccount.py b/module/plugins/internal/XFSPAccount.py deleted file mode 100644 index 76aff54f0..000000000 --- a/module/plugins/internal/XFSPAccount.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: zoidberg -""" - -import re -from time import mktime, strptime -from module.plugins.Account import Account -from module.plugins.internal.SimpleHoster import parseHtmlForm -from module.utils import parseFileSize - - -class XFSPAccount(Account): - __name__ = "XFSPAccount" - __version__ = "0.06" - __type__ = "account" - __description__ = """XFileSharingPro base account plugin""" - __author_name__ = "zoidberg" - __author_mail__ = "zoidberg@mujmail.cz" - - MAIN_PAGE = None - - VALID_UNTIL_PATTERN = r'>Premium.[Aa]ccount expire:</TD><TD><b>([^<]+)</b>' - TRAFFIC_LEFT_PATTERN = r'>Traffic available today:</TD><TD><b>([^<]+)</b>' - LOGIN_FAIL_PATTERN = r'Incorrect Login or Password|>Error<' - PREMIUM_PATTERN = r'>Renew premium<' - - def loadAccountInfo(self, user, req): - html = req.load(self.MAIN_PAGE + "?op=my_account", decode=True) - - validuntil = trafficleft = None - premium = True if re.search(self.PREMIUM_PATTERN, html) else False - - found = re.search(self.VALID_UNTIL_PATTERN, html) - if found: - premium = True - trafficleft = -1 - try: - self.logDebug(found.group(1)) - validuntil = mktime(strptime(found.group(1), "%d %B %Y")) - except Exception, e: - self.logError(e) - else: - found = re.search(self.TRAFFIC_LEFT_PATTERN, html) - if found: - trafficleft = found.group(1) - if "Unlimited" in trafficleft: - premium = True - else: - trafficleft = parseFileSize(trafficleft) / 1024 - - return {"validuntil": validuntil, "trafficleft": trafficleft, "premium": premium} - - def login(self, user, data, req): - html = req.load('%slogin.html' % self.MAIN_PAGE, decode=True) - - action, inputs = parseHtmlForm('name="FL"', html) - if not inputs: - inputs = {"op": "login", - "redirect": self.MAIN_PAGE} - - inputs.update({"login": user, - "password": data['password']}) - - html = req.load(self.MAIN_PAGE, post=inputs, decode=True) - - if re.search(self.LOGIN_FAIL_PATTERN, html): - self.wrongPassword() |