diff options
Diffstat (limited to 'pyload/plugins/internal')
21 files changed, 3178 insertions, 0 deletions
diff --git a/pyload/plugins/internal/AbstractExtractor.py b/pyload/plugins/internal/AbstractExtractor.py new file mode 100644 index 000000000..54ea9b348 --- /dev/null +++ b/pyload/plugins/internal/AbstractExtractor.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +class ArchiveError(Exception): + pass + + +class CRCError(Exception): + pass + + +class WrongPassword(Exception): + pass + + +class AbtractExtractor: + __name__ = "AbtractExtractor" + __version__ = "0.1" + + __description__ = """Abtract extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + @staticmethod + def checkDeps(): + """ Check if system statisfy dependencies + :return: boolean + """ + return True + + + @staticmethod + def getTargets(files_ids): + """ Filter suited targets from list of filename id tuple list + :param files_ids: List of filepathes + :return: List of targets, id tuple list + """ + raise NotImplementedError + + + def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): + """Initialize extractor for specific file + + :param m: ExtractArchive Addon plugin + :param file: Absolute filepath + :param out: Absolute path to destination directory + :param fullpath: extract to fullpath + :param overwrite: Overwrite existing archives + :param renice: Renice value + """ + self.m = m + self.file = file + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.excludefiles = excludefiles + self.renice = renice + self.files = [] #: Store extracted files here + + + def init(self): + """ Initialize additional data structures """ + pass + + + def checkArchive(self): + """Check if password if needed. Raise ArchiveError if integrity is + questionable. + + :return: boolean + :raises ArchiveError + """ + return False + + + def checkPassword(self, password): + """ Check if the given password is/might be correct. + If it can not be decided at this point return true. + + :param password: + :return: boolean + """ + return True + + + def extract(self, progress, password=None): + """Extract the archive. Raise specific errors in case of failure. + + :param progress: Progress function, call this to update status + :param password password to use + :raises WrongPassword + :raises CRCError + :raises ArchiveError + :return: + """ + raise NotImplementedError + + + def getDeleteFiles(self): + """Return list of files to delete, do *not* delete them here. + + :return: List with paths of files to delete + """ + raise NotImplementedError + + + def getExtractedFiles(self): + """Populate self.files at some point while extracting""" + return self.files diff --git a/pyload/plugins/internal/Account.py b/pyload/plugins/internal/Account.py new file mode 100644 index 000000000..e6895f119 --- /dev/null +++ b/pyload/plugins/internal/Account.py @@ -0,0 +1,305 @@ +# -*- coding: utf-8 -*- + +from random import choice +from time import time +from traceback import print_exc +from threading import RLock + +from pyload.plugins.Plugin import Base +from pyload.utils import compare_time, parseFileSize, lock + + +class WrongPassword(Exception): + pass + + +class Account(Base): + """ + Base class for every Account plugin. + Just overwrite `login` and cookies will be stored and account becomes accessible in\ + associated hoster plugin. Plugin should also provide `loadAccountInfo` + """ + __name__ = "Account" + __type__ = "account" + __version__ = "0.03" + + __description__ = """Base account plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay", "mkaay@mkaay.de")] + + + #: after that time (in minutes) pyload will relogin the account + login_timeout = 10 * 60 + #: after that time (in minutes) account data will be reloaded + info_threshold = 10 * 60 + + + def __init__(self, manager, accounts): + Base.__init__(self, manager.core) + + self.manager = manager + self.accounts = {} + self.infos = {} #: cache for account information + self.lock = RLock() + self.timestamps = {} + + self.init() + + self.setAccounts(accounts) + + + def init(self): + pass + + + def login(self, user, data, req): + """login into account, the cookies will be saved so user can be recognized + + :param user: loginname + :param data: data dictionary + :param req: `Request` instance + """ + pass + + + @lock + def _login(self, user, data): + # set timestamp for login + self.timestamps[user] = time() + + req = self.getAccountRequest(user) + try: + self.login(user, data, req) + except WrongPassword: + self.logWarning( + _("Could not login with account %(user)s | %(msg)s") % {"user": user, + "msg": _("Wrong Password")}) + success = data['valid'] = False + except Exception, e: + self.logWarning( + _("Could not login with account %(user)s | %(msg)s") % {"user": user, + "msg": e}) + success = data['valid'] = False + if self.core.debug: + print_exc() + else: + success = True + finally: + if req: + req.close() + return success + + + def relogin(self, user): + req = self.getAccountRequest(user) + if req: + req.cj.clear() + req.close() + if user in self.infos: + del self.infos[user] #delete old information + + return self._login(user, self.accounts[user]) + + + def setAccounts(self, accounts): + self.accounts = accounts + for user, data in self.accounts.iteritems(): + self._login(user, data) + self.infos[user] = {} + + + def updateAccounts(self, user, password=None, options={}): + """ updates account and return true if anything changed """ + + if user in self.accounts: + self.accounts[user]['valid'] = True #do not remove or accounts will not login + if password: + self.accounts[user]['password'] = password + self.relogin(user) + return True + if options: + before = self.accounts[user]['options'] + self.accounts[user]['options'].update(options) + return self.accounts[user]['options'] != before + else: + self.accounts[user] = {"password": password, "options": options, "valid": True} + self._login(user, self.accounts[user]) + return True + + + def removeAccount(self, user): + if user in self.accounts: + del self.accounts[user] + if user in self.infos: + del self.infos[user] + if user in self.timestamps: + del self.timestamps[user] + + + @lock + def getAccountInfo(self, name, force=False): + """retrieve account infos for an user, do **not** overwrite this method!\\ + just use it to retrieve infos in hoster plugins. see `loadAccountInfo` + + :param name: username + :param force: reloads cached account information + :return: dictionary with information + """ + data = Account.loadAccountInfo(self, name) + + if force or name not in self.infos: + self.logDebug("Get Account Info for %s" % name) + req = self.getAccountRequest(name) + + try: + infos = self.loadAccountInfo(name, req) + if not type(infos) == dict: + raise Exception("Wrong return format") + except Exception, e: + infos = {"error": str(e)} + print_exc() + + if req: + req.close() + + self.logDebug("Account Info: %s" % infos) + + infos['timestamp'] = time() + self.infos[name] = infos + elif "timestamp" in self.infos[name] and self.infos[name][ + "timestamp"] + self.info_threshold * 60 < time(): + self.logDebug("Reached timeout for account data") + self.scheduleRefresh(name) + + data.update(self.infos[name]) + return data + + + def isPremium(self, user): + info = self.getAccountInfo(user) + return info['premium'] + + + def loadAccountInfo(self, name, req=None): + """this should be overwritten in account plugin,\ + and retrieving account information for user + + :param name: + :param req: `Request` instance + :return: + """ + return {"validuntil": None, #: -1 for unlimited + "login": name, + # "password": self.accounts[name]['password'], #: commented due security reason + "options": self.accounts[name]['options'], + "valid": self.accounts[name]['valid'], + "trafficleft": None, #: in kb, -1 for unlimited + "maxtraffic": None, + "premium": None, + "timestamp": 0, #: time this info was retrieved + "type": self.__name__} + + + def getAllAccounts(self, force=False): + return [self.getAccountInfo(user, force) for user, data in self.accounts.iteritems()] + + + def getAccountRequest(self, user=None): + if not user: + user, data = self.selectAccount() + if not user: + return None + + req = self.core.requestFactory.getRequest(self.__name__, user) + return req + + + def getAccountCookies(self, user=None): + if not user: + user, data = self.selectAccount() + if not user: + return None + + cj = self.core.requestFactory.getCookieJar(self.__name__, user) + return cj + + + def getAccountData(self, user): + return self.accounts[user] + + + def selectAccount(self): + """ returns an valid account name and data""" + usable = [] + for user, data in self.accounts.iteritems(): + if not data['valid']: continue + + if "time" in data['options'] and data['options']['time']: + time_data = "" + try: + time_data = data['options']['time'][0] + start, end = time_data.split("-") + if not compare_time(start.split(":"), end.split(":")): + continue + except: + self.logWarning(_("Your Time %s has wrong format, use: 1:22-3:44") % time_data) + + if user in self.infos: + if "validuntil" in self.infos[user]: + if self.infos[user]['validuntil'] > 0 and time() > self.infos[user]['validuntil']: + continue + if "trafficleft" in self.infos[user]: + if self.infos[user]['trafficleft'] == 0: + continue + + usable.append((user, data)) + + if not usable: return None, None + return choice(usable) + + + def canUse(self): + return False if self.selectAccount() == (None, None) else True + + + def parseTraffic(self, string): #returns kbyte + return parseFileSize(string) + + + def wrongPassword(self): + raise WrongPassword + + + def empty(self, user): + if user in self.infos: + self.logWarning(_("Account %s has not enough traffic, checking again in 30min") % user) + + self.infos[user].update({"trafficleft": 0}) + self.scheduleRefresh(user, 30 * 60) + + + def expired(self, user): + if user in self.infos: + self.logWarning(_("Account %s is expired, checking again in 1h") % user) + + self.infos[user].update({"validuntil": time() - 1}) + self.scheduleRefresh(user, 60 * 60) + + + def scheduleRefresh(self, user, time=0, force=True): + """ add task to refresh account info to sheduler """ + self.logDebug("Scheduled Account refresh for %s in %s seconds." % (user, time)) + self.core.scheduler.addJob(time, self.getAccountInfo, [user, force]) + + + @lock + def checkLogin(self, user): + """ checks if user is still logged in """ + if user in self.timestamps: + if self.login_timeout > 0 and self.timestamps[user] + self.login_timeout * 60 < time(): + self.logDebug("Reached login timeout for %s" % user) + return self.relogin(user) + else: + return True + else: + return False diff --git a/pyload/plugins/internal/Addon.py b/pyload/plugins/internal/Addon.py new file mode 100644 index 000000000..b126b97d6 --- /dev/null +++ b/pyload/plugins/internal/Addon.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- + +from traceback import print_exc + +from pyload.plugins.Plugin import Base + + +class Expose(object): + """ used for decoration to declare rpc services """ + + def __new__(cls, f, *args, **kwargs): + addonManager.addRPC(f.__module__, f.func_name, f.func_doc) + return f + + +def threaded(f): + + def run(*args,**kwargs): + addonManager.startThread(f, *args, **kwargs) + return run + + +class Addon(Base): + """ + Base class for addon plugins. + """ + __name__ = "Addon" + __type__ = "addon" + __version__ = "0.03" + + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base addon/hook plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay", "mkaay@mkaay.de"), + ("RaNaN", "RaNaN@pyload.org")] + + + #: automatically register event listeners for functions, attribute will be deleted dont use it yourself + event_map = {} + + # Alternative to event_map + #: List of events the plugin can handle, name the functions exactly like eventname. + event_list = [] #@NOTE: dont make duplicate entries in event_map + + + def __init__(self, core, manager): + Base.__init__(self, core) + + #: Provide information in dict here, usable by API `getInfo` + self.info = {} + + #: Callback of periodical job task, used by AddonManager + self.cb = None + self.interval = -1 #: disabled + + #: `AddonManager` + self.manager = manager + + #register events + if self.event_map: + for event, funcs in self.event_map.iteritems(): + if type(funcs) in (list, tuple): + for f in funcs: + self.manager.addEvent(event, getattr(self,f)) + else: + self.manager.addEvent(event, getattr(self,funcs)) + + #delete for various reasons + self.event_map = None + + if self.event_list: + for f in self.event_list: + self.manager.addEvent(f, getattr(self,f)) + + self.event_list = None + + self.setup() + + self.initPeriodical() + + + def initPeriodical(self, delay=0, threaded=False): + self.cb = self.core.scheduler.addJob(delay, self._periodical, args=[threaded], threaded=threaded) + + + def _periodical(self, threaded): + if self.interval < 0: + self.cb = None + return + + try: + self.periodical() + + except Exception, e: + self.logError(_("Error executing addon: %s") % e) + if self.core.debug: + print_exc() + + self.cb = self.core.scheduler.addJob(self.interval, self._periodical, threaded=threaded) + + + def __repr__(self): + return "<Addon %s>" % self.__name__ + + + def setup(self): + """ more init stuff if needed """ + pass + + + def unload(self): + """ called when addon was deactivated """ + pass + + + def isActivated(self): + """ checks if addon is activated""" + return self.core.config.getPlugin(self.__name__, "activated") + + + #event methods - overwrite these if needed + def coreReady(self): + pass + + + def coreExiting(self): + pass + + + def downloadPreparing(self, pyfile): + pass + + + def downloadFinished(self, pyfile): + pass + + + def downloadFailed(self, pyfile): + pass + + + def packageFinished(self, pypack): + pass + + + def beforeReconnecting(self, ip): + pass + + + def afterReconnecting(self, ip): + pass + + + def periodical(self): + pass + + + def newCaptchaTask(self, task): + """ new captcha task for the plugin, it MUST set the handler and timeout or will be ignored """ + pass + + + def captchaCorrect(self, task): + pass + + + def captchaInvalid(self, task): + pass diff --git a/pyload/plugins/internal/BasePlugin.py b/pyload/plugins/internal/BasePlugin.py new file mode 100644 index 000000000..dd8540578 --- /dev/null +++ b/pyload/plugins/internal/BasePlugin.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +import re + +from urllib import unquote +from urlparse import urlparse + +from pyload.network.HTTPRequest import BadHeader +from pyload.plugins.internal.Hoster import Hoster +from pyload.utils import html_unescape, remove_chars + + +class BasePlugin(Hoster): + __name__ = "BasePlugin" + __type__ = "hoster" + __version__ = "0.20" + + __pattern__ = r'^unmatchable$' + + __description__ = """Base Plugin when any other didnt fit""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "RaNaN@pyload.org")] + + + def setup(self): + self.chunkLimit = -1 + self.resumeDownload = True + + + def process(self, pyfile): + """main function""" + + #: debug part, for api exerciser + if pyfile.url.startswith("DEBUG_API"): + self.multiDL = False + return + + if pyfile.url.startswith("http"): + + try: + self.downloadFile(pyfile) + except BadHeader, e: + if e.code in (401, 403): + self.logDebug("Auth required") + + account = self.core.accountManager.getAccountPlugin('Http') + servers = [x['login'] for x in account.getAllAccounts()] + server = urlparse(pyfile.url).netloc + + if server in servers: + self.logDebug("Logging on to %s" % server) + self.req.addAuth(account.accounts[server]['password']) + else: + for pwd in pyfile.package().password.splitlines(): + if ":" in pwd: + self.req.addAuth(pwd.strip()) + break + else: + self.fail(_("Authorization required (username:password)")) + + self.downloadFile(pyfile) + else: + raise + + else: + self.fail(_("No Plugin matched and not a downloadable url")) + + + def downloadFile(self, pyfile): + url = pyfile.url + + for _i in xrange(5): + header = self.load(url, just_header=True) + + # self.load does not raise a BadHeader on 404 responses, do it here + if 'code' in header and header['code'] == 404: + raise BadHeader(404) + + if 'location' in header: + self.logDebug("Location: " + header['location']) + base = re.match(r'https?://[^/]+', url).group(0) + if header['location'].startswith("http"): + url = header['location'] + elif header['location'].startswith("/"): + url = base + unquote(header['location']) + else: + url = '%s/%s' % (base, unquote(header['location'])) + else: + break + + name = html_unescape(unquote(urlparse(url).path.split("/")[-1])) + + if 'content-disposition' in header: + self.logDebug("Content-Disposition: " + header['content-disposition']) + m = re.search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition']) + if m: + disp = m.groupdict() + self.logDebug(disp) + if not disp['enc']: + disp['enc'] = 'utf-8' + name = remove_chars(disp['name'], "\"';").strip() + name = unicode(unquote(name), disp['enc']) + + if not name: + name = url + pyfile.name = name + self.logDebug("Filename: %s" % pyfile.name) + self.download(url, disposition=True) diff --git a/pyload/plugins/internal/Captcha.py b/pyload/plugins/internal/Captcha.py new file mode 100644 index 000000000..7197c390e --- /dev/null +++ b/pyload/plugins/internal/Captcha.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +import re + +from pyload.plugins.Plugin import Plugin + + +class Captcha(Plugin): + __name__ = "Captcha" + __type__ = "captcha" + __version__ = "0.14" + + __description__ = """Base captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = None + + key = None #: last key detected + + + def __init__(self, plugin): + self.plugin = plugin + + + def detect_key(self, html=None): + if not html: + if hasattr(self.plugin, "html") and self.plugin.html: + html = self.plugin.html + else: + errmsg = _("%s html not found") % self.__name__ + self.plugin.error(errmsg) + raise TypeError(errmsg) + + m = re.search(self.KEY_PATTERN, html) + if m: + self.key = m.group("KEY") + self.plugin.logDebug("%s key: %s" % (self.__name__, self.key)) + return self.key + else: + self.plugin.logDebug("%s key not found" % self.__name__) + return None + + + def challenge(self, key=None): + raise NotImplementedError + + + def result(self, server, challenge): + raise NotImplementedError diff --git a/pyload/plugins/internal/Container.py b/pyload/plugins/internal/Container.py new file mode 100644 index 000000000..b7dd3aa20 --- /dev/null +++ b/pyload/plugins/internal/Container.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +import re + +from os import remove +from os.path import basename, exists + +from pyload.plugins.internal.Crypter import Crypter +from pyload.utils import safe_join + + +class Container(Crypter): + __name__ = "Container" + __type__ = "container" + __version__ = "0.01" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base container decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay", "mkaay@mkaay.de")] + + + def preprocessing(self, thread): + """prepare""" + + self.setup() + self.thread = thread + + self.loadToDisk() + + self.decrypt(self.pyfile) + self.deleteTmp() + + self.createPackages() + + + def loadToDisk(self): + """loads container to disk if its stored remotely and overwrite url, + or check existent on several places at disk""" + + if self.pyfile.url.startswith("http"): + self.pyfile.name = re.findall("([^\/=]+)", self.pyfile.url)[-1] + content = self.load(self.pyfile.url) + self.pyfile.url = safe_join(self.core.config['general']['download_folder'], self.pyfile.name) + try: + with open(self.pyfile.url, "wb") as f: + f.write(content) + except IOError, e: + self.fail(str(e)) + + else: + self.pyfile.name = basename(self.pyfile.url) + if not exists(self.pyfile.url): + if exists(safe_join(pypath, self.pyfile.url)): + self.pyfile.url = safe_join(pypath, self.pyfile.url) + else: + self.fail(_("File not exists")) + + + def deleteTmp(self): + if self.pyfile.name.startswith("tmp_"): + remove(self.pyfile.url) diff --git a/pyload/plugins/internal/Crypter.py b/pyload/plugins/internal/Crypter.py new file mode 100644 index 000000000..76880ca14 --- /dev/null +++ b/pyload/plugins/internal/Crypter.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- + +from urlparse import urlparse + +from pyload.plugins.Plugin import Plugin +from pyload.utils import decode, html_unescape, save_filename + + +class Crypter(Plugin): + __name__ = "Crypter" + __type__ = "crypter" + __version__ = "0.05" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Base decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + html = None #: last html loaded + + + def __init__(self, pyfile): + #: Put all packages here. It's a list of tuples like: ( name, [list of links], folder ) + self.packages = [] + + #: List of urls, pyLoad will generate packagenames + self.urls = [] + + Plugin.__init__(self, pyfile) + + + def process(self, pyfile): + """ main method """ + + self.decrypt(pyfile) + + if self.urls: + self.generatePackages() + + elif not self.packages: + self.error(_("No link extracted"), "decrypt") + + self.createPackages() + + + def decrypt(self, pyfile): + raise NotImplementedError + + + def generatePackages(self): + """ generate new packages from self.urls """ + + packages = map(lambda name, links: (name, links, None), self.core.api.generatePackages(self.urls).iteritems()) + self.packages.extend(packages) + + + def createPackages(self): + """ create new packages from self.packages """ + + package_folder = self.pyfile.package().folder + package_password = self.pyfile.package().password + package_queue = self.pyfile.package().queue + + folder_per_package = self.core.config['general']['folder_per_package'] + try: + use_subfolder = self.getConfig('use_subfolder') + except: + use_subfolder = folder_per_package + try: + subfolder_per_package = self.getConfig('subfolder_per_package') + except: + subfolder_per_package = True + + for pack in self.packages: + name, links, folder = pack + + self.logDebug("Parsed package: %s" % name, + "%d links" % len(links), + "Saved to folder: %s" % folder if folder else "Saved to download folder") + + links = map(decode, links) + + pid = self.core.api.addPackage(name, links, package_queue) + + if package_password: + self.core.api.setPackageData(pid, {"password": package_password}) + + setFolder = lambda x: self.core.api.setPackageData(pid, {"folder": x or ""}) #: Workaround to do not break API addPackage method + + if use_subfolder: + if not subfolder_per_package: + setFolder(package_folder) + self.logDebug("Set package %(name)s folder to: %(folder)s" % {"name": name, "folder": folder}) + + elif not folder_per_package or name != folder: + if not folder: + folder = urlparse(html_unescape(name)).path.split("/")[-1] + + setFolder(folder) + self.logDebug("Set package %(name)s folder to: %(folder)s" % {"name": name, "folder": folder}) + + elif folder_per_package: + setFolder(None) diff --git a/pyload/plugins/internal/DeadCrypter.py b/pyload/plugins/internal/DeadCrypter.py new file mode 100644 index 000000000..bf150f3d5 --- /dev/null +++ b/pyload/plugins/internal/DeadCrypter.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.internal.Crypter import Crypter as _Crypter + + +class DeadCrypter(_Crypter): + __name__ = "DeadCrypter" + __type__ = "crypter" + __version__ = "0.02" + + __pattern__ = r'^unmatchable$' + + __description__ = """Crypter is no longer available""" + __license__ = "GPLv3" + __authors__ = [("stickell", "l.stickell@yahoo.it")] + + + def setup(self): + self.offline("Crypter is no longer available") diff --git a/pyload/plugins/internal/DeadHoster.py b/pyload/plugins/internal/DeadHoster.py new file mode 100644 index 000000000..036ed3cb6 --- /dev/null +++ b/pyload/plugins/internal/DeadHoster.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.internal.Hoster import Hoster as _Hoster + + +def create_getInfo(plugin): + + def getInfo(urls): + yield map(lambda url: ('#N/A: ' + url, 0, 1, url), urls) + + return getInfo + + +class DeadHoster(_Hoster): + __name__ = "DeadHoster" + __type__ = "hoster" + __version__ = "0.12" + + __pattern__ = r'^unmatchable$' + + __description__ = """Hoster is no longer available""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz")] + + + def setup(self): + self.offline("Hoster is no longer available") diff --git a/pyload/plugins/internal/Hoster.py b/pyload/plugins/internal/Hoster.py new file mode 100644 index 000000000..ea225262e --- /dev/null +++ b/pyload/plugins/internal/Hoster.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.Plugin import Plugin + + +def getInfo(self): + #result = [ .. (name, size, status, url) .. ] + return + + +class Hoster(Plugin): + __name__ = "Hoster" + __type__ = "hoster" + __version__ = "0.02" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("mkaay", "mkaay@mkaay.de")] diff --git a/pyload/plugins/internal/MultiHoster.py b/pyload/plugins/internal/MultiHoster.py new file mode 100644 index 000000000..4eb4a6f31 --- /dev/null +++ b/pyload/plugins/internal/MultiHoster.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- + +import re + +from pyload.plugins.internal.Addon import Addon +from pyload.utils import remove_chars + + +class MultiHoster(Addon): + __name__ = "MultiHoster" + __type__ = "addon" + __version__ = "0.20" + + __description__ = """Base multi-hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + interval = 24 * 60 * 60 #: reload hosters daily + + HOSTER_REPLACEMENTS = [("2shared.com", "twoshared.com"), ("4shared.com", "fourshared.com"), ("cloudnator.com", "shragle.com"), + ("ifile.it", "filecloud.io"), ("easy-share.com", "crocko.com"), ("freakshare.net", "freakshare.com"), + ("hellshare.com", "hellshare.cz"), ("share-rapid.cz", "sharerapid.com"), ("sharerapid.cz", "sharerapid.com"), + ("ul.to", "uploaded.to"), ("uploaded.net", "uploaded.to"), ("1fichier.com", "onefichier.com")] + HOSTER_EXCLUDED = [] + + + def setup(self): + self.hosters = [] + self.supported = [] + self.new_supported = [] + + + def getConfig(self, option, default=''): + """getConfig with default value - subclass may not implements all config options""" + try: + # Fixed loop due to getConf deprecation in 0.4.10 + return super(MultiHoster, self).getConfig(option) + except KeyError: + return default + + + def getHosterCached(self): + if not self.hosters: + try: + hosterSet = self.toHosterSet(self.getHoster()) - set(self.HOSTER_EXCLUDED) + except Exception, e: + self.logError(e) + return [] + + try: + configMode = self.getConfig('hosterListMode', 'all') + if configMode in ("listed", "unlisted"): + configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) + + if configMode == "listed": + hosterSet &= configSet + else: + hosterSet -= configSet + + except Exception, e: + self.logError(e) + + self.hosters = list(hosterSet) + + return self.hosters + + + def toHosterSet(self, hosters): + hosters = set((str(x).strip().lower() for x in hosters)) + + for rep in self.HOSTER_REPLACEMENTS: + if rep[0] in hosters: + hosters.remove(rep[0]) + hosters.add(rep[1]) + + hosters.discard('') + return hosters + + + def getHoster(self): + """Load list of supported hoster + + :return: List of domain names + """ + raise NotImplementedError + + + def coreReady(self): + if self.cb: + self.core.scheduler.removeJob(self.cb) + + self.setConfig("activated", True) #: config not in sync after plugin reload + + cfg_interval = self.getConfig("interval", None) #: reload interval in hours + if cfg_interval is not None: + self.interval = cfg_interval * 60 * 60 + + if self.interval: + self._periodical() + else: + self.periodical() + + + def initPeriodical(self): + pass + + + def periodical(self): + """reload hoster list periodically""" + self.logInfo(_("Reloading supported hoster list")) + + old_supported = self.supported + self.supported, self.new_supported, self.hosters = [], [], [] + + self.overridePlugins() + + old_supported = [hoster for hoster in old_supported if hoster not in self.supported] + if old_supported: + self.logDebug("UNLOAD", ", ".join(old_supported)) + for hoster in old_supported: + self.unloadHoster(hoster) + + + def overridePlugins(self): + pluginMap = {} + for name in self.core.pluginManager.hosterPlugins.keys(): + pluginMap[name.lower()] = name + + accountList = [name.lower() for name, data in self.core.accountManager.accounts.iteritems() if data] + excludedList = [] + + for hoster in self.getHosterCached(): + name = remove_chars(hoster.lower(), "-.") + + if name in accountList: + excludedList.append(hoster) + else: + if name in pluginMap: + self.supported.append(pluginMap[name]) + else: + self.new_supported.append(hoster) + + if not self.supported and not self.new_supported: + self.logError(_("No Hoster loaded")) + return + + module = self.core.pluginManager.getPlugin(self.__type__, self.__name__) + klass = getattr(module, self.__name__) + + # inject plugin plugin + self.logDebug("Overwritten Hosters", ", ".join(sorted(self.supported))) + for hoster in self.supported: + dict = self.core.pluginManager.hosterPlugins[hoster] + dict['new_module'] = module + dict['new_name'] = self.__name__ + + if excludedList: + self.logInfo(_("The following hosters were not overwritten - account exists"), ", ".join(sorted(excludedList))) + + if self.new_supported: + self.logDebug("New Hosters", ", ".join(sorted(self.new_supported))) + + # create new regexp + regexp = r'.*(%s).*' % "|".join([x.replace(".", "\\.") for x in self.new_supported]) + if hasattr(klass, "__pattern__") and isinstance(klass.__pattern__, basestring) and '://' in klass.__pattern__: + regexp = r'%s|%s' % (klass.__pattern__, regexp) + + self.logDebug("Regexp", regexp) + + dict = self.core.pluginManager.hosterPlugins[self.__name__] + dict['pattern'] = regexp + dict['re'] = re.compile(regexp) + + + def unloadHoster(self, hoster): + dict = self.core.pluginManager.hosterPlugins[hoster] + if "module" in dict: + del dict['module'] + + if "new_module" in dict: + del dict['new_module'] + del dict['new_name'] + + + def unload(self): + """Remove override for all hosters. Scheduler job is removed by AddonManager""" + for hoster in self.supported: + self.unloadHoster(hoster) + + # reset pattern + klass = getattr(self.core.pluginManager.getPlugin(self.__type__, self.__name__), self.__name__) + dict = self.core.pluginManager.hosterPlugins[self.__name__] + dict['pattern'] = getattr(klass, "__pattern__", r'^unmatchable$') + dict['re'] = re.compile(dict['pattern']) + + + def downloadFailed(self, pyfile): + """remove plugin override if download fails but not if file is offline/temp.offline""" + if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): + hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] + if "new_name" in hdict and hdict['new_name'] == self.__name__: + self.logDebug("Unload MultiHoster", pyfile.pluginname, hdict) + self.unloadHoster(pyfile.pluginname) + pyfile.setStatus("queued") diff --git a/pyload/plugins/internal/OCR.py b/pyload/plugins/internal/OCR.py new file mode 100644 index 000000000..dec9f28b7 --- /dev/null +++ b/pyload/plugins/internal/OCR.py @@ -0,0 +1,314 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +try: + from PIL import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin +except ImportError: + import Image, GifImagePlugin, JpegImagePlugin, PngImagePlugin, TiffImagePlugin + +import logging +import subprocess + +from os.path import abspath, join + + +class OCR(object): + __name__ = "OCR" + __type__ = "ocr" + __version__ = "0.01" + + __description__ = """Base OCR plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + def __init__(self): + self.logger = logging.getLogger("log") + + + def load_image(self, image): + self.image = Image.open(image) + self.pixels = self.image.load() + self.result_captcha = '' + + + def unload(self): + """delete all tmp images""" + pass + + + def threshold(self, value): + self.image = self.image.point(lambda a: a * value + 10) + + + def run(self, command): + """Run a command""" + + popen = subprocess.Popen(command, bufsize=-1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + popen.wait() + output = popen.stdout.read() + " | " + popen.stderr.read() + popen.stdout.close() + popen.stderr.close() + self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) + + + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): + #tmpTif = tempfile.NamedTemporaryFile(suffix=".tif") + try: + tmpTif = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmpTif.close() + + #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") + tmpTxt.close() + + except IOError, e: + self.logError(e) + return + + self.logger.debug("save tiff") + self.image.save(tmpTif.name, 'TIFF') + + if os.name == "nt": + tessparams = [join(pypath, "tesseract", "tesseract.exe")] + else: + tessparams = ['tesseract'] + + tessparams.extend([abspath(tmpTif.name), abspath(tmpTxt.name).replace(".txt", "")] ) + + if subset and (digits or lowercase or uppercase): + #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") + tmpSub.write("tessedit_char_whitelist ") + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + tmpSub.write("\n") + tessparams.append("nobatch") + tessparams.append(abspath(tmpSub.name)) + tmpSub.close() + + self.logger.debug("run tesseract") + self.run(tessparams) + self.logger.debug("read txt") + + try: + with open(tmpTxt.name, 'r') as f: + self.result_captcha = f.read().replace("\n", "") + except: + self.result_captcha = "" + + self.logger.debug(self.result_captcha) + try: + os.remove(tmpTif.name) + os.remove(tmpTxt.name) + if subset and (digits or lowercase or uppercase): + os.remove(tmpSub.name) + except: + pass + + + def get_captcha(self, name): + raise NotImplementedError + + + def to_greyscale(self): + if self.image.mode != 'L': + self.image = self.image.convert('L') + + self.pixels = self.image.load() + + + def eval_black_white(self, limit): + self.pixels = self.image.load() + w, h = self.image.size + for x in xrange(w): + for y in xrange(h): + if self.pixels[x, y] > limit: + self.pixels[x, y] = 255 + else: + self.pixels[x, y] = 0 + + + def clean(self, allowed): + pixels = self.pixels + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 255: + continue + # No point in processing white pixels since we only want to remove black pixel + count = 0 + + try: + if pixels[x - 1, y - 1] != 255: + count += 1 + if pixels[x - 1, y] != 255: + count += 1 + if pixels[x - 1, y + 1] != 255: + count += 1 + if pixels[x, y + 1] != 255: + count += 1 + if pixels[x + 1, y + 1] != 255: + count += 1 + if pixels[x + 1, y] != 255: + count += 1 + if pixels[x + 1, y - 1] != 255: + count += 1 + if pixels[x, y - 1] != 255: + count += 1 + except: + pass + + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white + if count < allowed: + pixels[x, y] = 1 + + # second pass: this time set all 1's to 255 (white) + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 1: + pixels[x, y] = 255 + + self.pixels = pixels + + + def derotate_by_average(self): + """rotate by checking each angle and guess most suitable""" + + w, h = self.image.size + pixels = self.pixels + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 155 + + highest = {} + counts = {} + + for angle in xrange(-45, 45): + + tmpimage = self.image.rotate(angle) + + pixels = tmpimage.load() + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + count = {} + + for x in xrange(w): + count[x] = 0 + for y in xrange(h): + if pixels[x, y] == 155: + count[x] += 1 + + sum = 0 + cnt = 0 + + for x in count.values(): + if x != 0: + sum += x + cnt += 1 + + avg = sum / cnt + counts[angle] = cnt + highest[angle] = 0 + for x in count.values(): + if x > highest[angle]: + highest[angle] = x + + highest[angle] = highest[angle] - avg + + hkey = 0 + hvalue = 0 + + for key, value in highest.iteritems(): + if value > hvalue: + hkey = key + hvalue = value + + self.image = self.image.rotate(hkey) + pixels = self.image.load() + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + if pixels[x, y] == 155: + pixels[x, y] = 0 + + self.pixels = pixels + + + def split_captcha_letters(self): + captcha = self.image + started = False + letters = [] + width, height = captcha.size + bottomY, topY = 0, height + pixels = captcha.load() + + for x in xrange(width): + black_pixel_in_col = False + for y in xrange(height): + if pixels[x, y] != 255: + if not started: + started = True + firstX = x + lastX = x + + if y > bottomY: + bottomY = y + if y < topY: + topY = y + if x > lastX: + lastX = x + + black_pixel_in_col = True + + if black_pixel_in_col is False and started is True: + rect = (firstX, topY, lastX, bottomY) + new_captcha = captcha.crop(rect) + + w, h = new_captcha.size + if w > 5 and h > 5: + letters.append(new_captcha) + + started = False + bottomY, topY = 0, height + + return letters + + + def correct(self, values, var=None): + if var: + result = var + else: + result = self.result_captcha + + for key, item in values.iteritems(): + + if key.__class__ == str: + result = result.replace(key, item) + else: + for expr in key: + result = result.replace(expr, item) + + if var: + return result + else: + self.result_captcha = result diff --git a/pyload/plugins/internal/SimpleCrypter.py b/pyload/plugins/internal/SimpleCrypter.py new file mode 100644 index 000000000..ead5cefba --- /dev/null +++ b/pyload/plugins/internal/SimpleCrypter.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +import re + +from urlparse import urlparse + +from pyload.plugins.internal.Crypter import Crypter +from pyload.plugins.internal.SimpleHoster import SimpleHoster, replace_patterns, set_cookies +from pyload.utils import fixup + + +class SimpleCrypter(Crypter, SimpleHoster): + __name__ = "SimpleCrypter" + __type__ = "crypter" + __version__ = "0.31" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Simple decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("stickell", "l.stickell@yahoo.it"), + ("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: group(1) must be a download link or a regex to catch more links + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + NAME_PATTERN: (optional) folder name or webpage title + example: NAME_PATTERN = r'<title>Files of: (?P<N>[^<]+) folder</title>' + + OFFLINE_PATTERN: (optional) Checks if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + You can override the getLinks method if you need a more sophisticated way to extract the links. + + + If the links are splitted on multiple pages you can define the PAGES_PATTERN regex: + + PAGES_PATTERN: (optional) group(1) should be the number of overall pages containing the links + example: PAGES_PATTERN = r'Pages: (\d+)' + + and its loadPage method: + + + def loadPage(self, page_n): + return the html of the page number page_n + """ + + LINK_PATTERN = None + + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + + LOGIN_ACCOUNT = False + LOGIN_PREMIUM = False + + + def prepare(self): + self.info = {} + self.links = [] + + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + + + def decrypt(self, pyfile): + self.prepare() + + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkInfo() + + self.links = self.getLinks() + + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handleMultiPages() + + self.logDebug("Package has %d links" % len(self.links)) + + if self.links: + self.packages = [(self.info['name'], self.links, self.info['folder'])] + + + def checkStatus(self): + status = self.info['status'] + + if status is 1: + self.offline() + + elif status is 6: + self.tempOffline() + + + def checkNameSize(self): + name = self.info['name'] + url = self.info['url'] + + if name and name != url: + self.pyfile.name = name + else: + self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] + + folder = self.info['folder'] = name + + self.logDebug("File name: %s" % name, + "File folder: %s" % folder) + + + def getLinks(self): + """ + Returns the links extracted from self.html + You should override this only if it's impossible to extract links using only the LINK_PATTERN. + """ + return re.findall(self.LINK_PATTERN, self.html) + + + def handleMultiPages(self): + try: + m = re.search(self.PAGES_PATTERN, self.html) + pages = int(m.group(1)) + except: + pages = 1 + + for p in xrange(2, pages + 1): + self.html = self.loadPage(p) + self.links += self.getLinks() diff --git a/pyload/plugins/internal/SimpleHoster.py b/pyload/plugins/internal/SimpleHoster.py new file mode 100644 index 000000000..0a3d5cf95 --- /dev/null +++ b/pyload/plugins/internal/SimpleHoster.py @@ -0,0 +1,472 @@ +# -*- coding: utf-8 -*- + +import re + +from time import time +from urlparse import urlparse + +from pycurl import FOLLOWLOCATION + +from pyload.datatype.PyFile import statusMap as _statusMap +from pyload.network.CookieJar import CookieJar +from pyload.network.RequestFactory import getURL +from pyload.plugins.internal.Hoster import Hoster +from pyload.plugins.Plugin import Fail +from pyload.utils import fixup, parseFileSize +#@TODO: Adapt and move to PyFile in 0.4.10 +statusMap = {v: k for k, v in _statusMap.iteritems()} + + +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + return string + + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) + + +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parseHtmlForm(attr_str, html, input_names=None): + for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('tag')) + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or '' + else: + inputs[name] = value + + if isinstance(input_names, dict): + # check input attributes + for key, val in input_names.iteritems(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break #: attibute value does not match + else: + break #: attibute name does not match + else: + return action, inputs #: passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None #: no matching form found + + +#: Deprecated +def parseFileInfo(plugin, url="", html=""): + info = plugin.getInfo(url, html) + return info['name'], info['size'], info['status'], info['url'] + + +#@TODO: Remove in 0.4.10 +def create_getInfo(plugin): + return lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfo(urls)] + + +def timestamp(): + return int(time() * 1000) + + +#@TODO: Move to hoster class in 0.4.10 +def _getDirectLink(self, url): + self.req.http.c.setopt(FOLLOWLOCATION, 0) + + html = self.load(url, ref=True, decode=True) + + self.req.http.c.setopt(FOLLOWLOCATION, 1) + + if self.getInfo(url, html)['status'] is not 2: + try: + return re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I).group(1).rstrip() #@TODO: Remove .rstrip() in 0.4.10 + except: + pass + + +class SimpleHoster(Hoster): + __name__ = "SimpleHoster" + __type__ = "hoster" + __version__ = "0.59" + + __pattern__ = r'^unmatchable$' + + __description__ = """Simple hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Info patterns should be defined by each hoster: + + INFO_PATTERN: (optional) Name and Size of the file + example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' + or + NAME_PATTERN: (optional) Name that will be set for the file + example: NAME_PATTERN = r'(?P<N>file_name)' + SIZE_PATTERN: (optional) Size that will be checked for the file + example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' + + OFFLINE_PATTERN: (optional) Check if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' + + + Error handling patterns are all optional: + + WAIT_PATTERN: (optional) Detect waiting time + example: WAIT_PATTERN = r'' + + PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account + example: PREMIUM_ONLY_PATTERN = r'Premium account required' + + ERROR_PATTERN: (optional) Detect any error preventing download + example: ERROR_PATTERN = r'' + + + Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' + """ + + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + SIZE_REPLACEMENTS = [] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + CHECK_DIRECT_LINK = None #: when None self-set to True if self.account else False + MULTI_HOSTER = False #: Set to True to leech other hoster link + CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value in http header + + + @classmethod + def parseInfo(cls, urls): + for url in urls: + url = replace_patterns(url, cls.URL_REPLACEMENTS) + yield cls.getInfo(url) + + + @classmethod + def getInfo(cls, url="", html=""): + info = {'name': url or _("Unknown"), 'size': 0, 'status': 3, 'url': url} + + if not html: + if url: + html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) + if isinstance(cls.TEXT_ENCODING, basestring): + html = unicode(html, cls.TEXT_ENCODING) + else: + return info + + online = False + + if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): + info['status'] = 1 + + elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 + + else: + try: + info.update(re.match(cls.__pattern__, url).groupdict()) + except: + pass + + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN"): + try: + attr = getattr(cls, pattern) + info.update(re.search(attr, html).groupdict()) + except AttributeError: + continue + else: + online = True + + if online: + info['status'] = 2 + + if 'N' in info: + info['name'] = replace_patterns(info['N'].strip(), cls.NAME_REPLACEMENTS) + + if 'S' in info: + size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], cls.SIZE_REPLACEMENTS) + info['size'] = parseFileSize(size) + + elif isinstance(info['size'], basestring): + unit = info['units'] if 'units' in info else None + info['size'] = parseFileSize(info['size'], unit) + + return info + + + def setup(self): + self.resumeDownload = self.multiDL = self.premium + + + def prepare(self): + self.info = {} + self.link = "" #@TODO: Move to hoster class in 0.4.10 + self.directDL = False #@TODO: Move to hoster class in 0.4.10 + self.multihost = False #@TODO: Move to hoster class in 0.4.10 + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + if (self.MULTI_HOSTER + and self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern'] + and re.match(self.__pattern__, self.pyfile.url) is None): + + self.logInfo("Multi hoster detected") + + if self.account: + self.multihost = True + return + else: + self.fail(_("Only registered or premium users can use url leech feature")) + + if self.CHECK_DIRECT_LINK is None: + self.directDL = bool(self.account) + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def process(self, pyfile): + self.prepare() + + if self.multihost: + self.logDebug("Looking for leeched download link...") + self.handleMulti() + + elif self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect() + + if not self.link: + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkErrors() + + premium_only = 'error' in self.info and self.info['error'] == "premium-only" + + info = self.getInfo(pyfile.url, self.html) + self._updateInfo(info) + + self.checkNameSize() + + #: Usually premium only pages doesn't show any file information + if not premium_only: + self.checkStatus() + + if self.premium and (not self.FORCE_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium() + + elif premium_only: + self.fail(_("Link require a premium account to be handled")) + + else: + self.logDebug("Handled as free download") + self.handleFree() + + if self.link: + self.download(self.link, disposition=self.CONTENT_DISPOSITION) + + + def checkErrors(self): + if hasattr(self, 'WAIT_PATTERN'): + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) + self.wait(wait_time, False) + return + + if hasattr(self, 'PREMIUM_ONLY_PATTERN'): + m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if m: + self.info['error'] = "premium-only" + return + + if hasattr(self, 'ERROR_PATTERN'): + m = re.search(self.ERROR_PATTERN, self.html) + if m: + e = self.info['error'] = m.group(1) + self.error(e) + + self.info.pop('error', None) + + + def checkStatus(self): + status = self.info['status'] + + if status is 1: + self.offline() + + elif status is 6: + self.tempOffline() + + elif status is not 2: + self.logInfo(_("File status: %s") % statusMap[status], + _("File info: %s") % self.info) + self.error(_("No file info retrieved")) + + + def checkNameSize(self): + name = self.info['name'] + size = self.info['size'] + url = self.info['url'] + + if name and name != url: + self.pyfile.name = name + else: + self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] + + if size > 0: + self.pyfile.size = size + else: + size = "Unknown" + + self.logDebug("File name: %s" % name, + "File size: %s" % size) + + + def checkInfo(self): + self.checkErrors() + + self._updateInfo(self.getInfo(self.pyfile.url, self.html or "")) + + self.checkNameSize() + self.checkStatus() + + + #: Deprecated + def getFileInfo(self): + return self.checkInfo() + + + def _updateInfo(self, info): + self.logDebug(_("File info (before update): %s") % self.info) + self.info.update(info) + self.logDebug(_("File info (after update): %s") % self.info) + + + def handleDirect(self): + self.link = _getDirectLink(self, self.pyfile.url) + + if self.link: + self.logInfo(_("Direct download link detected")) + + self._updateInfo(self.getInfo(self.pyfile.url)) + self.checkNameSize() + + else: + self.logDebug(_("Direct download link not found")) + + + def handleMulti(self): #: Multi-hoster handler + pass + + + def handleFree(self): + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.fail(_("Free download not implemented")) + + try: + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(str(e)) + + + def handlePremium(self): + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.fail(_("Premium download not implemented")) + + try: + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(str(e)) + + + def longWait(self, wait_time=None, max_tries=3): + if wait_time and isinstance(wait_time, (int, long, float)): + time_str = "%dh %dm" % divmod(wait_time / 60, 60) + else: + wait_time = 900 + time_str = _("(unknown time)") + max_tries = 100 + + self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) + + self.setWait(wait_time, True) + self.wait() + self.retry(max_tries=max_tries, reason=_("Download limit reached")) + + + def parseHtmlForm(self, attr_str='', input_names=None): + return parseHtmlForm(attr_str, self.html, input_names) + + + def checkTrafficLeft(self): + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: + return True + else: + size = self.pyfile.size / 1024 + self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic)) + return size <= traffic + + + def error(self, reason="", type="parse"): + return super(SimpleHoster, self).error(self, reason, type) diff --git a/pyload/plugins/internal/UnRar.py b/pyload/plugins/internal/UnRar.py new file mode 100644 index 000000000..31a0d7642 --- /dev/null +++ b/pyload/plugins/internal/UnRar.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from glob import glob +from os.path import basename, join +from string import digits +from subprocess import Popen, PIPE + +from pyload.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from pyload.utils import safe_join, decode + + +def renice(pid, value): + if os.name != "nt" and value: + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except: + print "Renice failed" + + +class UnRar(AbtractExtractor): + __name__ = "UnRar" + __version__ = "0.18" + + __description__ = """Rar extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "RaNaN@pyload.org")] + + + CMD = "unrar" + + # there are some more uncovered rar formats + re_version = re.compile(r"(UNRAR 5[\d.]+(.*?)freeware)") + re_splitfile = re.compile(r"(.*)\.part(\d+)\.rar$", re.I) + re_partfiles = re.compile(r".*\.(rar|r\d+)", re.I) + re_filelist = re.compile(r"(.+)\s+(\d+)\s+(\d+)\s+") + re_filelist5 = re.compile(r"(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)") + re_wrongpwd = re.compile("(Corrupt file or wrong password|password incorrect)", re.I) + + + @staticmethod + def checkDeps(): + if os.name == "nt": + UnRar.CMD = join(pypath, "UnRAR.exe") + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + else: + try: + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + except OSError: + + # fallback to rar + UnRar.CMD = "rar" + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + + return True + + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if not file.endswith(".rar"): + continue + + match = UnRar.re_splitfile.findall(file) + if match: + # only add first parts + if int(match[0][1]) == 1: + result.append((file, id)) + else: + result.append((file, id)) + + return result + + + def init(self): + self.passwordProtected = False + self.headerProtected = False #: list files will not work without password + self.smallestFile = None #: small file to test passwords + self.password = "" #: save the correct password + + + def checkArchive(self): + p = self.call_unrar("l", "-v", self.file) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + self.passwordProtected = True + self.headerProtected = True + return True + + # output only used to check if passworded files are present + if self.re_version.search(out): + for attr, size, name in self.re_filelist5.findall(out): + if attr.startswith("*"): + self.passwordProtected = True + return True + else: + for name, size, packed in self.re_filelist.findall(out): + if name.startswith("*"): + self.passwordProtected = True + return True + + self.listContent() + if not self.files: + raise ArchiveError("Empty Archive") + + return False + + + def checkPassword(self, password): + # at this point we can only verify header protected files + if self.headerProtected: + p = self.call_unrar("l", "-v", self.file, password=password) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + return False + + return True + + + def extract(self, progress, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_unrar(command, self.file, self.out, password=password) + renice(p.pid, self.renice) + + progress(0) + progressstring = "" + while True: + c = p.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + progress(int(progressstring)) + progressstring = "" + # not reading a digit -> therefore restart + elif c not in digits: + progressstring = "" + # add digit to progressstring + else: + progressstring = progressstring + c + progress(100) + + # retrieve stderr + err = p.stderr.read() + + if "CRC failed" in err and not password and not self.passwordProtected: + raise CRCError + elif "CRC failed" in err: + raise WrongPassword + if err.strip(): #: raise error if anything is on stderr + raise ArchiveError(err.strip()) + if p.returncode: + raise ArchiveError("Process terminated") + + if not self.files: + self.password = password + self.listContent() + + + def getDeleteFiles(self): + if ".part" in basename(self.file): + return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + # get files which matches .r* and filter unsuited files out + parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) + return filter(lambda x: self.re_partfiles.match(x), parts) + + + def listContent(self): + command = "vb" if self.fullpath else "lb" + p = self.call_unrar(command, "-v", self.file, password=self.password) + out, err = p.communicate() + + if "Cannot open" in err: + raise ArchiveError("Cannot open file") + + if err.strip(): #: only log error at this point + self.m.logError(err.strip()) + + result = set() + + for f in decode(out).splitlines(): + f = f.strip() + result.add(safe_join(self.out, f)) + + self.files = result + + + def call_unrar(self, command, *xargs, **kwargs): + args = [] + # overwrite flag + args.append("-o+") if self.overwrite else args.append("-o-") + + if self.excludefiles: + for word in self.excludefiles.split(';'): + args.append("-x%s" % word) + + # assume yes on all queries + args.append("-y") + + # set a password + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) + else: + args.append("-p-") + + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + self.m.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + + return p diff --git a/pyload/plugins/internal/UnZip.py b/pyload/plugins/internal/UnZip.py new file mode 100644 index 000000000..413c0699e --- /dev/null +++ b/pyload/plugins/internal/UnZip.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +import sys +import zipfile + +from pyload.plugins.internal.AbstractExtractor import AbtractExtractor + + +class UnZip(AbtractExtractor): + __name__ = "UnZip" + __version__ = "0.1" + + __description__ = """Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "RaNaN@pyload.org")] + + + @staticmethod + def checkDeps(): + return sys.version_info[:2] >= (2, 6) + + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if file.endswith(".zip"): + result.append((file, id)) + + return result + + + def extract(self, progress, password=None): + z = zipfile.ZipFile(self.file) + self.files = z.namelist() + z.extractall(self.out) + + + def getDeleteFiles(self): + return [self.file] diff --git a/pyload/plugins/internal/UpdateManager.py b/pyload/plugins/internal/UpdateManager.py new file mode 100644 index 000000000..082721e2f --- /dev/null +++ b/pyload/plugins/internal/UpdateManager.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- + +import re +import sys + +from operator import itemgetter +from os import path, remove, stat + +from pyload.network.RequestFactory import getURL +from pyload.plugins.internal.Addon import Expose, Addon, threaded +from pyload.utils import safe_join + + +class UpdateManager(Addon): + __name__ = "UpdateManager" + __type__ = "addon" + __version__ = "0.40" + + __config__ = [("activated" , "bool" , "Activated" , True ), + ("mode" , "pyLoad + plugins;plugins only", "Check updates for" , "pyLoad + plugins"), + ("interval" , "int" , "Check interval in hours" , 8 ), + ("reloadplugins", "bool" , "Monitor plugins for code changes (debug mode only)", True ), + ("nodebugupdate", "bool" , "Don't check for updates in debug mode" , True )] + + __description__ = """Check for updates""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + # event_list = ["pluginConfigChanged"] + + SERVER_URL = "http://updatemanager.pyload.org" + MIN_INTERVAL = 6 * 60 * 60 #: 6h minimum check interval (value is in seconds) + + + def pluginConfigChanged(self, plugin, name, value): + if name == "interval": + interval = value * 60 * 60 + if self.MIN_INTERVAL <= interval != self.interval: + self.core.scheduler.removeJob(self.cb) + self.interval = interval + self.initPeriodical() + else: + self.logDebug("Invalid interval value, kept current") + + elif name == "reloadplugins": + if self.cb2: + self.core.scheduler.removeJob(self.cb2) + if value is True and self.core.debug: + self.periodical2() + + + def coreReady(self): + self.pluginConfigChanged(self.__name__, "interval", self.getConfig("interval")) + x = lambda: self.pluginConfigChanged(self.__name__, "reloadplugins", self.getConfig("reloadplugins")) + self.core.scheduler.addJob(10, x, threaded=False) + + + def unload(self): + self.pluginConfigChanged(self.__name__, "reloadplugins", False) + + + def setup(self): + self.cb2 = None + self.interval = self.MIN_INTERVAL + self.updating = False + self.info = {'pyload': False, 'version': None, 'plugins': False} + self.mtimes = {} #: store modification time for each plugin + + + def periodical2(self): + if not self.updating: + self.autoreloadPlugins() + + self.cb2 = self.core.scheduler.addJob(4, self.periodical2, threaded=False) + + + @Expose + def autoreloadPlugins(self): + """ reload and reindex all modified plugins """ + modules = filter( + lambda m: m and (m.__name__.startswith("pyload.plugins.") or + m.__name__.startswith("userplugins.")) and + m.__name__.count(".") >= 2, sys.modules.itervalues() + ) + + reloads = [] + + for m in modules: + root, type, name = m.__name__.rsplit(".", 2) + id = (type, name) + if type in self.core.pluginManager.plugins: + f = m.__file__.replace(".pyc", ".py") + if not path.isfile(f): + continue + + mtime = stat(f).st_mtime + + if id not in self.mtimes: + self.mtimes[id] = mtime + elif self.mtimes[id] < mtime: + reloads.append(id) + self.mtimes[id] = mtime + + return True if self.core.pluginManager.reloadPlugins(reloads) else False + + + def periodical(self): + if self.info['pyload'] or self.getConfig("nodebugupdate") and self.core.debug: + return + + self.updateThread() + + + def server_request(self): + try: + return getURL(self.SERVER_URL, get={'v': self.core.api.getServerVersion()}).splitlines() + except: + self.logWarning(_("Unable to contact server to get updates")) + + + @threaded + def updateThread(self): + self.updating = True + + status = self.update(onlyplugin=self.getConfig("mode") == "plugins only") + + if status == 2: + self.core.api.restart() + else: + self.updating = False + + + @Expose + def updatePlugins(self): + """ simple wrapper for calling plugin update quickly """ + return self.update(onlyplugin=True) + + + @Expose + def update(self, onlyplugin=False): + """ check for updates """ + data = self.server_request() + + if not data: + exitcode = 0 + + elif data[0] == "None": + self.logInfo(_("No new pyLoad version available")) + updates = data[1:] + exitcode = self._updatePlugins(updates) + + elif onlyplugin: + exitcode = 0 + + else: + newversion = data[0] + self.logInfo(_("*** New pyLoad Version %s available ***") % newversion) + self.logInfo(_("*** Get it here: https://github.com/pyload/pyload/releases ***")) + exitcode = 3 + self.info['pyload'] = True + self.info['version'] = newversion + + return exitcode #: 0 = No plugins updated; 1 = Plugins updated; 2 = Plugins updated, but restart required; 3 = No plugins updated, new pyLoad version available + + + def _updatePlugins(self, updates): + """ check for plugin updates """ + + if self.info['plugins']: + return False #: plugins were already updated + + exitcode = 0 + updated = [] + + vre = re.compile(r'__version__.*=.*("|\')([\d.]+)') + url = updates[0] + schema = updates[1].split('|') + + if "BLACKLIST" in updates: + blacklist = updates[updates.index('BLACKLIST') + 1:] + updates = updates[2:updates.index('BLACKLIST')] + else: + blacklist = None + updates = updates[2:] + + upgradable = sorted(map(lambda x: dict(zip(schema, x.split('|'))), updates), + key=itemgetter("type", "name")) + + for plugin in upgradable: + filename = plugin['name'] + type = plugin['type'] + version = plugin['version'] + + if filename.endswith(".pyc"): + name = filename[:filename.find("_")] + else: + name = filename.replace(".py", "") + + plugins = getattr(self.core.pluginManager, "%sPlugins" % type) + + oldver = float(plugins[name]['version']) if name in plugins else None + newver = float(version) + + if not oldver: + msg = "New plugin: [%(type)s] %(name)s (v%(newver).2f)" + elif newver > oldver: + msg = "New version of plugin: [%(type)s] %(name)s (v%(oldver).2f -> v%(newver).2f)" + else: + continue + + self.logInfo(_(msg) % {'type' : type, + 'name' : name, + 'oldver': oldver, + 'newver': newver}) + try: + content = getURL(url % plugin) + m = vre.search(content) + + if m and m.group(2) == version: + f = open(safe_join("userplugins", prefix, filename), "wb") + f.write(content) + f.close() + updated.append((prefix, name)) + else: + raise Exception, _("Version mismatch") + + except Exception, e: + self.logError(_("Error updating plugin %s") % filename, e) + + if blacklist: + blacklisted = map(lambda x: (x.split('|')[0], x.split('|')[1].rsplit('.', 1)[0]), blacklist) + + # Always protect internal plugins from removing + for i, n, t in blacklisted.enumerate(): + if t == "internal": + del blacklisted[i] + + blacklisted = sorted(blacklisted) + removed = self.removePlugins(blacklisted) + for t, n in removed: + self.logInfo(_("Removed blacklisted plugin [%(type)s] %(name)s") % { + 'type': t, + 'name': n, + }) + + if updated: + reloaded = self.core.pluginManager.reloadPlugins(updated) + if reloaded: + self.logInfo(_("Plugins updated and reloaded")) + exitcode = 1 + else: + self.logInfo(_("*** Plugins have been updated, but need a pyLoad restart to be reloaded ***")) + self.info['plugins'] = True + exitcode = 2 + else: + self.logInfo(_("No plugin updates available")) + + return exitcode #: 0 = No plugins updated; 1 = Plugins updated; 2 = Plugins updated, but restart required + + + @Expose + def removePlugins(self, type_plugins): + """ delete plugins from disk """ + + if not type_plugins: + return + + self.logDebug("Requested deletion of plugins: %s" % type_plugins) + + removed = [] + + for type, name in type_plugins: + err = False + file = name + ".py" + + for root in ("userplugins", path.join(pypath, "pyload", "plugins")): + + filename = safe_join(root, type, file) + try: + remove(filename) + except Exception, e: + self.logDebug("Error deleting: %s" % path.basename(filename), e) + err = True + + filename += "c" + if path.isfile(filename): + try: + if type == "addon": + self.manager.deactivateAddon(name) + remove(filename) + except Exception, e: + self.logDebug("Error deleting: %s" % path.basename(filename), e) + err = True + + if not err: + id = (type, name) + removed.append(id) + + return removed #: return a list of the plugins successfully removed diff --git a/pyload/plugins/internal/XFSAccount.py b/pyload/plugins/internal/XFSAccount.py new file mode 100644 index 000000000..1e18c09bd --- /dev/null +++ b/pyload/plugins/internal/XFSAccount.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +import re + +from time import gmtime, mktime, strptime +from urlparse import urljoin + +from pyload.plugins.internal.Account import Account +from pyload.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies + + +class XFSAccount(Account): + __name__ = "XFSAccount" + __type__ = "account" + __version__ = "0.26" + + __description__ = """XFileSharing account plugin""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_URL = None + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + PREMIUM_PATTERN = r'\(Premium only\)' + + VALID_UNTIL_PATTERN = r'>Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})' + + TRAFFIC_LEFT_PATTERN = r'>Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' + TRAFFIC_LEFT_UNIT = "MB" #: used only if no group <U> was found + + LOGIN_FAIL_PATTERN = r'>(Incorrect Login or Password|Error<)' + + + def init(self): + # if not self.HOSTER_DOMAIN: + # self.fail(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN + + + def loadAccountInfo(self, user, req): + validuntil = None + trafficleft = None + premium = None + + html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) + + premium = True if re.search(self.PREMIUM_PATTERN, html) else False + + m = re.search(self.VALID_UNTIL_PATTERN, html) + if m: + expiredate = m.group(1).strip() + self.logDebug("Expire date: " + expiredate) + + try: + validuntil = mktime(strptime(expiredate, "%d %B %Y")) + + except Exception, e: + self.logError(e) + + else: + if validuntil > mktime(gmtime()): + premium = True + else: + premium = False + validuntil = None #: registered account type (not premium) + + m = re.search(self.TRAFFIC_LEFT_PATTERN, html) + if m: + try: + traffic = m.groupdict() + size = traffic['S'] + + if "nlimited" in size: + trafficleft = -1 + if validuntil is None: + validuntil = -1 + else: + if 'U' in traffic: + unit = traffic['U'] + elif isinstance(self.TRAFFIC_LEFT_UNIT, basestring): + unit = self.TRAFFIC_LEFT_UNIT + else: + unit = "" + + trafficleft = self.parseTraffic(size + unit) + + except Exception, e: + self.logError(e) + else: + if premium: + trafficleft = -1 + + return {'validuntil': validuntil, 'trafficleft': trafficleft, 'premium': premium} + + + def login(self, user, data, req): + if isinstance(self.COOKIES, list): + set_cookies(req.cj, self.COOKIES) + + url = urljoin(self.HOSTER_URL, "login.html") + html = req.load(url, decode=True) + + action, inputs = parseHtmlForm('name="FL"', html) + if not inputs: + inputs = {'op': "login", + 'redirect': self.HOSTER_URL} + + inputs.update({'login': user, + 'password': data['password']}) + + html = req.load(self.HOSTER_URL, post=inputs, decode=True) + + if re.search(self.LOGIN_FAIL_PATTERN, html): + self.wrongPassword() diff --git a/pyload/plugins/internal/XFSCrypter.py b/pyload/plugins/internal/XFSCrypter.py new file mode 100644 index 000000000..2de39f4bc --- /dev/null +++ b/pyload/plugins/internal/XFSCrypter.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.internal.SimpleCrypter import SimpleCrypter + + +class XFSCrypter(SimpleCrypter): + __name__ = "XFSCrypter" + __type__ = "crypter" + __version__ = "0.04" + + __pattern__ = r'^unmatchable$' + + __description__ = """XFileSharing decrypter plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + LINK_PATTERN = r'<(?:td|TD).*?>\s*<a href="(.+?)".*?>.+?(?:</a>)?\s*</(?:td|TD)>' + NAME_PATTERN = r'<[tT]itle>.*?\: (?P<N>.+) folder</[tT]itle>' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' diff --git a/pyload/plugins/internal/XFSHoster.py b/pyload/plugins/internal/XFSHoster.py new file mode 100644 index 000000000..3ae9ee05a --- /dev/null +++ b/pyload/plugins/internal/XFSHoster.py @@ -0,0 +1,344 @@ +# -*- coding: utf-8 -*- + +import re + +from random import random +from time import sleep + +from pycurl import FOLLOWLOCATION, LOW_SPEED_TIME + +from pyload.plugins.hoster.UnrestrictLi import secondsToMidnight +from pyload.plugins.internal.CaptchaService import ReCaptcha, SolveMedia +from pyload.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo +from pyload.utils import html_unescape + + +class XFSHoster(SimpleHoster): + __name__ = "XFSHoster" + __type__ = "hoster" + __version__ = "0.22" + + __pattern__ = r'^unmatchable$' + + __description__ = """XFileSharing hoster plugin""" + __license__ = "GPLv3" + __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + URL_REPLACEMENTS = [(r'/(?:embed-)?(\w{12}).*', r'/\1')] #: plus support embedded files + + TEXT_ENCODING = False + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + CHECK_DIRECT_LINK = None + MULTI_HOSTER = False + + INFO_PATTERN = r'<tr><td align=right><b>Filename:</b></td><td nowrap>(?P<N>[^<]+)</td></tr>\s*.*?<small>\((?P<S>[^<]+)\)</small>' + NAME_PATTERN = r'(>Filename:</b></td><td nowrap>|name="fname" value="|<span class="name">|<[Tt]itle>.*?Download )(?P<N>.+?)(\s*<|")' + SIZE_PATTERN = r'(>Size:</b></td><td>|>File:.*>|<span class="size">)(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' + + WAIT_PATTERN = r'<span id="countdown_str">.*?>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' + PREMIUM_ONLY_PATTERN = r'>This file is available for Premium Users only' + ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' + + OVR_LINK_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' + LINK_PATTERN = None #: final download url pattern + + CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' + CAPTCHA_DIV_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>' + RECAPTCHA_PATTERN = None + SOLVEMEDIA_PATTERN = None + + + def setup(self): + self.chunkLimit = 1 + self.resumeDownload = self.multiDL = self.premium + + + def prepare(self): + """ Initialize important variables """ + if not self.HOSTER_DOMAIN: + self.fail(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_NAME: + self.HOSTER_NAME = "".join([str.capitalize() for str in self.HOSTER_DOMAIN.split('.')]) + + if not self.LINK_PATTERN: + pattern = r'(https?://(www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' + self.LINK_PATTERN = pattern % self.HOSTER_DOMAIN.replace('.', '\.') + + self.captcha = None + self.errmsg = None + self.passwords = self.getPassword().splitlines() + + super(XFSHoster, self).prepare() + + if self.CHECK_DIRECT_LINK is None: + self.directDL = bool(self.premium) + + + def handleFree(self): + link = self.getDownloadLink() + + if link: + if self.captcha: + self.correctCaptcha() + + self.download(link, ref=True, cookies=True, disposition=True) + + elif self.errmsg: + if 'captcha' in self.errmsg: + self.fail(_("No valid captcha code entered")) + else: + self.fail(self.errmsg) + + else: + self.fail(_("Download link not found")) + + + def handlePremium(self): + return self.handleFree() + + + def getDownloadLink(self): + for i in xrange(1, 5): + self.logDebug("Getting download link: #%d" % i) + + self.checkErrors() + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + + data = self.getPostParameters() + + self.req.http.c.setopt(FOLLOWLOCATION, 0) + + self.html = self.load(self.pyfile.url, post=data, ref=True, decode=True) + + self.req.http.c.setopt(FOLLOWLOCATION, 1) + + m = re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I) + if m and not "op=" in m.group(1): + break + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + else: + self.logError(data['op'] if 'op' in data else _("UNKNOWN")) + return "" + + self.errmsg = None + + return m.group(1) + + + def handleMulti(self): + #only tested with easybytez.com + self.html = self.load("http://www.%s/" % self.HOSTER_DOMAIN) + + action, inputs = self.parseHtmlForm('') + + upload_id = "%012d" % int(random() * 10 ** 12) + action += upload_id + "&js_on=1&utype=prem&upload_type=url" + + inputs['tos'] = '1' + inputs['url_mass'] = self.pyfile.url + inputs['up1oad_type'] = 'url' + + self.logDebug(action, inputs) + + self.req.setOption("timeout", 600) #: wait for file to upload to easybytez.com + + self.html = self.load(action, post=inputs) + + self.checkErrors() + + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + stmsg = inputs['st'] + + if stmsg == 'OK': + self.html = self.load(action, post=inputs) + + elif 'Can not leech file' in stmsg: + self.retry(20, 3 * 60, _("Can not leech file")) + + elif 'today' in stmsg: + self.retry(wait_time=secondsToMidnight(gmt=2), reason=_("You've used all Leech traffic today")) + + else: + self.fail(stmsg) + + #get easybytez.com link for uploaded file + m = re.search(self.OVR_LINK_PATTERN, self.html) + if m is None: + self.error(_("OVR_LINK_PATTERN not found")) + + header = self.load(m.group(1), just_header=True, decode=True) + + if 'location' in header: #: Direct download link + self.link = header['location'] + else: + self.fail(_("Download link not found")) + + + def checkErrors(self): + m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if m: + self.info['error'] = "premium-only" + return + + m = re.search(self.ERROR_PATTERN, self.html) + + if m is None: + self.errmsg = None + else: + self.errmsg = m.group(1).strip() + + self.logWarning(re.sub(r"<.*?>", " ", self.errmsg)) + + if 'wait' in self.errmsg: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)]) + self.wait(wait_time, True) + + elif 'country' in self.errmsg: + self.fail(_("Downloads are disabled for your country")) + + elif 'captcha' in self.errmsg: + self.invalidCaptcha() + + elif 'premium' in self.errmsg and 'require' in self.errmsg: + self.fail(_("File can be downloaded by premium users only")) + + elif 'limit' in self.errmsg: + if 'days' in self.errmsg: + delay = secondsToMidnight(gmt=2) + retries = 3 + else: + delay = 1 * 60 * 60 + retries = 25 + + self.wait(delay, True) + self.retry(retries, reason=_("Download limit exceeded")) + + elif 'countdown' in self.errmsg or 'Expired' in self.errmsg: + self.retry(reason=_("Link expired")) + + elif 'maintenance' in self.errmsg or 'maintainance' in self.errmsg: + self.tempOffline() + + elif 'download files up to' in self.errmsg: + self.fail(_("File too large for free download")) + + else: + self.retry(wait_time=60, reason=self.errmsg) + + if self.errmsg: + self.info['error'] = self.errmsg + else: + self.info.pop('error', None) + + return self.errmsg + + + def getPostParameters(self): + if hasattr(self, "FORM_PATTERN"): + action, inputs = self.parseHtmlForm(self.FORM_PATTERN) + else: + action, inputs = self.parseHtmlForm(input_names={"op": re.compile("^download")}) + + if not inputs: + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + if 'op' in inputs: + if "password" in inputs: + if self.passwords: + inputs['password'] = self.passwords.pop(0) + else: + self.fail(_("Missing password")) + + if not self.premium: + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = int(m.group(1)) + self.setWait(wait_time, False) + + self.captcha = self.handleCaptcha(inputs) + + self.wait() + else: + inputs['referer'] = self.pyfile.url + + if self.premium: + inputs['method_premium'] = "Premium Download" + inputs.pop('method_free', None) + else: + inputs['method_free'] = "Free Download" + inputs.pop('method_premium', None) + + return inputs + + + def handleCaptcha(self, inputs): + m = re.search(self.CAPTCHA_PATTERN, self.html) + if m: + captcha_url = m.group(1) + inputs['code'] = self.decryptCaptcha(captcha_url) + return 1 + + m = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S) + if m: + captcha_div = m.group(1) + self.logDebug(captcha_div) + numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) + inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + self.logDebug("Captcha code: %s" % inputs['code'], numerals) + return 2 + + recaptcha = ReCaptcha(self) + try: + captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) + except: + captcha_key = recaptcha.detect_key() + + if captcha_key: + self.logDebug("ReCaptcha key: %s" % captcha_key) + inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + return 3 + + solvemedia = SolveMedia(self) + try: + captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) + except: + captcha_key = solvemedia.detect_key() + + if captcha_key: + self.logDebug("SolveMedia key: %s" % captcha_key) + inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) + return 4 + + return 0 diff --git a/pyload/plugins/internal/__init__.py b/pyload/plugins/internal/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pyload/plugins/internal/__init__.py |