diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-12-13 15:56:57 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-12-13 15:56:57 +0100 |
commit | acc46fc3497a66a427b795b4a22c6e71d69185a1 (patch) | |
tree | 2d315b838a76435fc456b972c99c28d1732b2f70 /pyload/plugin/internal | |
parent | Code fixes (diff) | |
download | pyload-acc46fc3497a66a427b795b4a22c6e71d69185a1.tar.xz |
Update
Diffstat (limited to 'pyload/plugin/internal')
-rw-r--r-- | pyload/plugin/internal/AbstractExtractor.py | 109 | ||||
-rw-r--r-- | pyload/plugin/internal/BasePlugin.py | 106 | ||||
-rw-r--r-- | pyload/plugin/internal/DeadCrypter.py | 32 | ||||
-rw-r--r-- | pyload/plugin/internal/DeadHoster.py | 32 | ||||
-rw-r--r-- | pyload/plugin/internal/MultiHoster.py | 202 | ||||
-rw-r--r-- | pyload/plugin/internal/SimpleCrypter.py | 152 | ||||
-rw-r--r-- | pyload/plugin/internal/SimpleHoster.py | 530 | ||||
-rw-r--r-- | pyload/plugin/internal/UnRar.py | 221 | ||||
-rw-r--r-- | pyload/plugin/internal/UnZip.py | 41 | ||||
-rw-r--r-- | pyload/plugin/internal/UpdateManager.py | 300 | ||||
-rw-r--r-- | pyload/plugin/internal/XFSAccount.py | 155 | ||||
-rw-r--r-- | pyload/plugin/internal/XFSCrypter.py | 29 | ||||
-rw-r--r-- | pyload/plugin/internal/XFSHoster.py | 339 | ||||
-rw-r--r-- | pyload/plugin/internal/__init__.py | 1 |
14 files changed, 2249 insertions, 0 deletions
diff --git a/pyload/plugin/internal/AbstractExtractor.py b/pyload/plugin/internal/AbstractExtractor.py new file mode 100644 index 000000000..b4fd10895 --- /dev/null +++ b/pyload/plugin/internal/AbstractExtractor.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +class ArchiveError(Exception): + pass + + +class CRCError(Exception): + pass + + +class WrongPassword(Exception): + pass + + +class AbtractExtractor(object): + __name = "AbtractExtractor" + __version = "0.10" + + __description = """Abtract extractor plugin""" + __license = "GPLv3" + __authors = [("pyLoad Team", "admin@pyload.org")] + + + @staticmethod + def checkDeps(): + """ Check if system statisfy dependencies + :return: boolean + """ + return True + + + @staticmethod + def getTargets(files_ids): + """ Filter suited targets from list of filename id tuple list + :param files_ids: List of filepathes + :return: List of targets, id tuple list + """ + raise NotImplementedError + + + def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): + """Initialize extractor for specific file + + :param m: ExtractArchive Addon plugin + :param file: Absolute filepath + :param out: Absolute path to destination directory + :param fullpath: extract to fullpath + :param overwrite: Overwrite existing archives + :param renice: Renice value + """ + self.m = m + self.file = file + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.excludefiles = excludefiles + self.renice = renice + self.files = [] #: Store extracted files here + + + def init(self): + """ Initialize additional data structures """ + pass + + + def checkArchive(self): + """Check if password if needed. Raise ArchiveError if integrity is + questionable. + + :return: boolean + :raises ArchiveError + """ + return False + + + def checkPassword(self, password): + """ Check if the given password is/might be correct. + If it can not be decided at this point return true. + + :param password: + :return: boolean + """ + return True + + + def extract(self, progress, password=None): + """Extract the archive. Raise specific errors in case of failure. + + :param progress: Progress function, call this to update status + :param password password to use + :raises WrongPassword + :raises CRCError + :raises ArchiveError + :return: + """ + raise NotImplementedError + + + def getDeleteFiles(self): + """Return list of files to delete, do *not* delete them here. + + :return: List with paths of files to delete + """ + raise NotImplementedError + + + def getExtractedFiles(self): + """Populate self.files at some point while extracting""" + return self.files diff --git a/pyload/plugin/internal/BasePlugin.py b/pyload/plugin/internal/BasePlugin.py new file mode 100644 index 000000000..f4b8e8997 --- /dev/null +++ b/pyload/plugin/internal/BasePlugin.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +import re + +from urllib import unquote +from urlparse import urljoin, urlparse + +from pyload.network.HTTPRequest import BadHeader +from pyload.plugin.internal.SimpleHoster import create_getInfo +from pyload.plugin.Hoster import Hoster + + +class BasePlugin(Hoster): + __name = "BasePlugin" + __type = "hoster" + __version = "0.25" + + __pattern = r'^unmatchable$' + + __description = """Base plugin when any other didnt fit""" + __license = "GPLv3" + __authors = [("RaNaN", "RaNaN@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + @classmethod + def getInfo(cls, url="", html=""): #@TODO: Move to hoster class in 0.4.10 + return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': unquote(url) or ""} + + + def setup(self): + self.chunkLimit = -1 + self.resumeDownload = True + + + def process(self, pyfile): + """main function""" + + pyfile.name = self.getInfo(pyfile.url)['name'] + + if not pyfile.url.startswith("http"): + self.fail(_("No plugin matched")) + + for _i in xrange(5): + try: + self.downloadFile(pyfile) + + except BadHeader, e: + if e.code is 404: + self.offline() + + elif e.code in (401, 403): + self.logDebug("Auth required", "Received HTTP status code: %d" % e.code) + + account = self.core.accountManager.getAccountPlugin('Http') + servers = [x['login'] for x in account.getAllAccounts()] + server = urlparse(pyfile.url).netloc + + if server in servers: + self.logDebug("Logging on to %s" % server) + self.req.addAuth(account.accounts[server]['password']) + else: + for pwd in self.getPassword().splitlines(): + if ":" in pwd: + self.req.addAuth(pwd.strip()) + break + else: + self.fail(_("Authorization required")) + else: + self.fail(e) + else: + break + else: + self.fail(_("No file downloaded")) #@TODO: Move to hoster class in 0.4.10 + + if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10 + self.fail(_("Empty file")) + + + def downloadFile(self, pyfile): + url = pyfile.url + + for i in xrange(1, 7): #@TODO: retrieve the pycurl.MAXREDIRS value set by req + header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + + if 'location' not in header or not header['location']: + if 'code' in header and header['code'] not in (200, 201, 203, 206): + self.logDebug("Received HTTP status code: %d" % header['code']) + self.fail(_("File not found")) + else: + break + + location = header['location'] + + self.logDebug("Redirect #%d to: %s" % (i, location)) + + if urlparse(location).scheme: + url = location + else: + p = urlparse(url) + base = "%s://%s" % (p.scheme, p.netloc) + url = urljoin(base, location) + else: + self.fail(_("Too many redirects")) + + self.download(unquote(url), disposition=True) diff --git a/pyload/plugin/internal/DeadCrypter.py b/pyload/plugin/internal/DeadCrypter.py new file mode 100644 index 000000000..fcc2e6eb2 --- /dev/null +++ b/pyload/plugin/internal/DeadCrypter.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +from urllib import unquote +from urlparse import urlparse + +from pyload.plugin.Crypter import Crypter as _Crypter +from pyload.plugin.internal.SimpleCrypter import create_getInfo + + +class DeadCrypter(_Crypter): + __name = "DeadCrypter" + __type = "crypter" + __version = "0.04" + + __pattern = r'^unmatchable$' + + __description = """Crypter is no longer available""" + __license = "GPLv3" + __authors = [("stickell", "l.stickell@yahoo.it")] + + + @classmethod + def getInfo(cls, url="", html=""): + return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} + + + def setup(self): + self.pyfile.error = "Crypter is no longer available" + self.offline() #@TODO: self.offline("Crypter is no longer available") + + +getInfo = create_getInfo(DeadCrypter) diff --git a/pyload/plugin/internal/DeadHoster.py b/pyload/plugin/internal/DeadHoster.py new file mode 100644 index 000000000..d8f57801a --- /dev/null +++ b/pyload/plugin/internal/DeadHoster.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +from urllib import unquote +from urlparse import urlparse + +from pyload.plugin.Hoster import Hoster as _Hoster +from pyload.plugin.internal.SimpleHoster import create_getInfo + + +class DeadHoster(_Hoster): + __name = "DeadHoster" + __type = "hoster" + __version = "0.14" + + __pattern = r'^unmatchable$' + + __description = """Hoster is no longer available""" + __license = "GPLv3" + __authors = [("zoidberg", "zoidberg@mujmail.cz")] + + + @classmethod + def getInfo(cls, url="", html=""): + return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url} + + + def setup(self): + self.pyfile.error = "Hoster is no longer available" + self.offline() #@TODO: self.offline("Hoster is no longer available") + + +getInfo = create_getInfo(DeadHoster) diff --git a/pyload/plugin/internal/MultiHoster.py b/pyload/plugin/internal/MultiHoster.py new file mode 100644 index 000000000..93fa13561 --- /dev/null +++ b/pyload/plugin/internal/MultiHoster.py @@ -0,0 +1,202 @@ +# -*- coding: utf-8 -*- + +import re + +from pyload.plugin.Addon import Addon +from pyload.utils import remove_chars + + +class MultiHoster(Addon): + __name = "MultiHoster" + __type = "addon" + __version = "0.20" + + __description = """Base multi-hoster plugin""" + __license = "GPLv3" + __authors = [("pyLoad Team", "admin@pyload.org")] + + + HOSTER_REPLACEMENTS = [("1fichier.com", "onefichier.com"), ("2shared.com", "twoshared.com"), + ("4shared.com", "fourshared.com"), ("cloudnator.com", "shragle.com"), + ("easy-share.com", "crocko.com"), ("freakshare.net", "freakshare.com"), + ("hellshare.com", "hellshare.cz"), ("ifile.it", "filecloud.io"), + ("putlocker.com", "firedrive.com"), ("share-rapid.cz", "multishare.cz"), + ("sharerapid.cz", "multishare.cz"), ("ul.to", "uploaded.to"), + ("uploaded.net", "uploaded.to")] + HOSTER_EXCLUDED = [] + + + def setup(self): + self.interval = 12 * 60 * 60 #: reload hosters every 12h + self.hosters = [] + self.supported = [] + self.new_supported = [] + + + def getConfig(self, option, default=''): + """getConfig with default value - subclass may not implements all config options""" + try: + # Fixed loop due to getConf deprecation in 0.4.10 + return super(MultiHoster, self).getConfig(option) + except KeyError: + return default + + + def getHosterCached(self): + if not self.hosters: + try: + hosterSet = self.toHosterSet(self.getHoster()) - set(self.HOSTER_EXCLUDED) + except Exception, e: + self.logError(e) + return [] + + try: + configMode = self.getConfig('hosterListMode', 'all') + if configMode in ("listed", "unlisted"): + configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) + + if configMode == "listed": + hosterSet &= configSet + else: + hosterSet -= configSet + + except Exception, e: + self.logError(e) + + self.hosters = list(hosterSet) + + return self.hosters + + + def toHosterSet(self, hosters): + hosters = set((str(x).strip().lower() for x in hosters)) + + for rep in self.HOSTER_REPLACEMENTS: + if rep[0] in hosters: + hosters.remove(rep[0]) + hosters.add(rep[1]) + + hosters.discard('') + return hosters + + + def getHoster(self): + """Load list of supported hoster + + :return: List of domain names + """ + raise NotImplementedError + + + def activate(self): + if self.cb: + self.core.scheduler.removeJob(self.cb) + + self.setConfig("activated", True) #: config not in sync after plugin reload + + cfg_interval = self.getConfig("interval", None) #: reload interval in hours + if cfg_interval is not None: + self.interval = cfg_interval * 60 * 60 + + if self.interval: + self._periodical() + else: + self.periodical() + + + def periodical(self): + """reload hoster list periodically""" + self.logInfo(_("Reloading supported hoster list")) + + old_supported = self.supported + self.supported = [] + self.new_supported = [] + self.hosters = [] + + self.overridePlugins() + + old_supported = [hoster for hoster in old_supported if hoster not in self.supported] + if old_supported: + self.logDebug("UNLOAD", ", ".join(old_supported)) + for hoster in old_supported: + self.unloadHoster(hoster) + + + def overridePlugins(self): + pluginMap = dict((name.lower(), name) for name in self.core.pluginManager.hosterPlugins.keys()) + accountList = [name.lower() for name, data in self.core.accountManager.accounts.iteritems() if data] + excludedList = [] + + for hoster in self.getHosterCached(): + name = remove_chars(hoster.lower(), "-.") + + if name in accountList: + excludedList.append(hoster) + else: + if name in pluginMap: + self.supported.append(pluginMap[name]) + else: + self.new_supported.append(hoster) + + if not self.supported and not self.new_supported: + self.logError(_("No Hoster loaded")) + return + + module = self.core.pluginManager.getPlugin(self.__type, self.__name) + klass = getattr(module, self.__name) + + # inject plugin plugin + self.logDebug("Overwritten Hosters", ", ".join(sorted(self.supported))) + for hoster in self.supported: + dict = self.core.pluginManager.hosterPlugins[hoster] + dict['new_module'] = module + dict['new_name'] = self.__name + + if excludedList: + self.logInfo(_("The following hosters were not overwritten - account exists"), ", ".join(sorted(excludedList))) + + if self.new_supported: + self.logDebug("New Hosters", ", ".join(sorted(self.new_supported))) + + # create new regexp + regexp = r'.*(%s).*' % "|".join([x.replace(".", "\.") for x in self.new_supported]) + if hasattr(klass, "__pattern") and isinstance(klass.__pattern, basestring) and '://' in klass.__pattern: + regexp = r'%s|%s' % (klass.__pattern, regexp) + + self.logDebug("Regexp", regexp) + + dict = self.core.pluginManager.hosterPlugins[self.__name] + dict['pattern'] = regexp + dict['re'] = re.compile(regexp) + + + def unloadHoster(self, hoster): + dict = self.core.pluginManager.hosterPlugins[hoster] + if "module" in dict: + del dict['module'] + + if "new_module" in dict: + del dict['new_module'] + del dict['new_name'] + + + def deactivate(self): + """Remove override for all hosters. Scheduler job is removed by AddonManager""" + for hoster in self.supported: + self.unloadHoster(hoster) + + # reset pattern + klass = getattr(self.core.pluginManager.getPlugin(self.__type, self.__name), self.__name) + dict = self.core.pluginManager.hosterPlugins[self.__name] + dict['pattern'] = getattr(klass, "__pattern", r'^unmatchable$') + dict['re'] = re.compile(dict['pattern']) + + + def downloadFailed(self, pyfile): + """remove plugin override if download fails but not if file is offline/temp.offline""" + if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): + hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] + if "new_name" in hdict and hdict['new_name'] == self.__name: + self.logDebug("Unload MultiHoster", pyfile.pluginname, hdict) + self.unloadHoster(pyfile.pluginname) + pyfile.setStatus("queued") diff --git a/pyload/plugin/internal/SimpleCrypter.py b/pyload/plugin/internal/SimpleCrypter.py new file mode 100644 index 000000000..30b455545 --- /dev/null +++ b/pyload/plugin/internal/SimpleCrypter.py @@ -0,0 +1,152 @@ +# -*- coding: utf-8 -*- + +import re + +from urlparse import urlparse + +from pyload.plugin.Crypter import Crypter +from pyload.plugin.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookies +from pyload.utils import fixup + + +class SimpleCrypter(Crypter, SimpleHoster): + __name = "SimpleCrypter" + __type = "crypter" + __version = "0.32" + + __pattern = r'^unmatchable$' + __config = [("use_subfolder", "bool", "Save package to subfolder", True), #: Overrides core.config['general']['folder_per_package'] + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description = """Simple decrypter plugin""" + __license = "GPLv3" + __authors = [("stickell", "l.stickell@yahoo.it"), + ("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: group(1) must be a download link or a regex to catch more links + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + NAME_PATTERN: (optional) folder name or webpage title + example: NAME_PATTERN = r'<title>Files of: (?P<N>[^<]+) folder</title>' + + OFFLINE_PATTERN: (optional) Checks if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + You can override the getLinks method if you need a more sophisticated way to extract the links. + + + If the links are splitted on multiple pages you can define the PAGES_PATTERN regex: + + PAGES_PATTERN: (optional) group(1) should be the number of overall pages containing the links + example: PAGES_PATTERN = r'Pages: (\d+)' + + and its loadPage method: + + + def loadPage(self, page_n): + return the html of the page number page_n + """ + + LINK_PATTERN = None + + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + + LOGIN_ACCOUNT = False + LOGIN_PREMIUM = False + + + def prepare(self): + self.info = {} + self.links = [] + + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + + + def decrypt(self, pyfile): + self.prepare() + + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkInfo() + + self.links = self.getLinks() + + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handleMultiPages() + + self.logDebug("Package has %d links" % len(self.links)) + + if self.links: + self.packages = [(self.info['name'], self.links, self.info['folder'])] + + + def checkStatus(self): + status = self.info['status'] + + if status is 1: + self.offline() + + elif status is 6: + self.tempOffline() + + + def checkNameSize(self): + name = self.info['name'] + url = self.info['url'] + + if name and name != url: + self.pyfile.name = name + else: + self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] + + folder = self.info['folder'] = name + + self.logDebug("File name: %s" % name, + "File folder: %s" % folder) + + + def getLinks(self): + """ + Returns the links extracted from self.html + You should override this only if it's impossible to extract links using only the LINK_PATTERN. + """ + return re.findall(self.LINK_PATTERN, self.html) + + + def handleMultiPages(self): + try: + m = re.search(self.PAGES_PATTERN, self.html) + pages = int(m.group(1)) + except Exception: + pages = 1 + + for p in xrange(2, pages + 1): + self.html = self.loadPage(p) + self.links += self.getLinks() diff --git a/pyload/plugin/internal/SimpleHoster.py b/pyload/plugin/internal/SimpleHoster.py new file mode 100644 index 000000000..9355cfe86 --- /dev/null +++ b/pyload/plugin/internal/SimpleHoster.py @@ -0,0 +1,530 @@ +# -*- coding: utf-8 -*- + +import re + +from time import time +from urllib import unquote +from urlparse import urljoin, urlparse + +from pyload.datatype.File import statusMap as _statusMap +from pyload.network.CookieJar import CookieJar +from pyload.network.RequestFactory import getURL +from pyload.plugin.Hoster import Hoster +from pyload.utils import fixup, formatSize, parseFileSize + + +#@TODO: Adapt and move to PyFile in 0.4.10 +statusMap = dict((v, k) for k, v in _statusMap.iteritems()) + + +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + return string + + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) + + +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parseHtmlForm(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or '' + else: + inputs[name] = value + + if input_names: + # check input attributes + for key, val in input_names.iteritems(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break #: attibute value does not match + else: + break #: attibute name does not match + else: + return action, inputs #: passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None #: no matching form found + + +#: Deprecated +def parseFileInfo(plugin, url="", html=""): + info = plugin.getInfo(url, html) + return info['name'], info['size'], info['status'], info['url'] + + +#@TODO: Remove in 0.4.10 +#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 +def create_getInfo(plugin): + return lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)] + + +def timestamp(): + return int(time() * 1000) + + +#@TODO: Move to hoster class in 0.4.10 +def _isDirectLink(self, url, resumable=True): + header = self.load(url, ref=True, just_header=True, decode=True) + + if not 'location' in header or not header['location']: + return "" + + location = header['location'] + + resumable = False #@NOTE: Testing... + + if resumable: #: sometimes http code may be wrong... + if 'location' in self.load(location, ref=True, cookies=True, just_header=True, decode=True): + return "" + else: + if not 'code' in header or header['code'] != 302: + return "" + + if urlparse(location).scheme: + link = location + else: + p = urlparse(url) + base = "%s://%s" % (p.scheme, p.netloc) + link = urljoin(base, location) + + return link + + +class SimpleHoster(Hoster): + __name = "SimpleHoster" + __type = "hoster" + __version = "0.72" + + __pattern = r'^unmatchable$' + + __description = """Simple hoster plugin""" + __license = "GPLv3" + __authors = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Info patterns should be defined by each hoster: + + INFO_PATTERN: (optional) Name and Size of the file + example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' + or + NAME_PATTERN: (optional) Name that will be set for the file + example: NAME_PATTERN = r'(?P<N>file_name)' + SIZE_PATTERN: (optional) Size that will be checked for the file + example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' + + HASHSUM_PATTERN: (optional) Hash code and type of the file + example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' + + OFFLINE_PATTERN: (optional) Check if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' + + + Error handling patterns are all optional: + + WAIT_PATTERN: (optional) Detect waiting time + example: WAIT_PATTERN = r'' + + PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account + example: PREMIUM_ONLY_PATTERN = r'Premium account required' + + ERROR_PATTERN: (optional) Detect any error preventing download + example: ERROR_PATTERN = r'' + + + Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + + LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download + example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' + + LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download + example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' + """ + + NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + SIZE_REPLACEMENTS = [] + URL_REPLACEMENTS = [] + + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + CHECK_DIRECT_LINK = None #: Set to True to check for direct link, set to None to do it only if self.account is True + MULTI_HOSTER = False #: Set to True to leech other hoster link (according its multihoster hook if available) + + + @classmethod + def parseInfos(cls, urls): + for url in urls: + url = replace_patterns(url, cls.URL_REPLACEMENTS) + yield cls.getInfo(url) + + + @classmethod + def getInfo(cls, url="", html=""): + info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url} + + if not html: + try: + if not url: + info['error'] = "missing url" + info['status'] = 1 + raise + + try: + html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) + + if isinstance(cls.TEXT_ENCODING, basestring): + html = unicode(html, cls.TEXT_ENCODING) + + except BadHeader, e: + info['error'] = "%d: %s" % (e.code, e.content) + + if e.code is 404: + info['status'] = 1 + raise + + if e.code is 503: + info['status'] = 6 + raise + except Exception: + return info + + online = False + + if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): + info['status'] = 1 + + elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 + + else: + try: + info['pattern'] = re.match(cls.__pattern, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api'] + except Exception: + pass + + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): + try: + attr = getattr(cls, pattern) + dict = re.search(attr, html).groupdict() + + if all(True for k in dict if k not in info['pattern']): + info['pattern'].update(dict) + + except AttributeError: + continue + + else: + online = True + + if online: + info['status'] = 2 + + if 'N' in info['pattern']: + info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), cls.NAME_REPLACEMENTS) + + if 'S' in info['pattern']: + size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'], + cls.SIZE_REPLACEMENTS) + info['size'] = parseFileSize(size) + + elif isinstance(info['size'], basestring): + unit = info['units'] if 'units' in info else None + info['size'] = parseFileSize(info['size'], unit) + + if 'H' in info['pattern']: + hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" + info[hashtype] = info['pattern']['H'] + + return info + + + def setup(self): + self.resumeDownload = self.multiDL = self.premium + + + def prepare(self): + self.info = {} + self.link = "" #@TODO: Move to hoster class in 0.4.10 + self.directDL = False #@TODO: Move to hoster class in 0.4.10 + self.multihost = False #@TODO: Move to hoster class in 0.4.10 + + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + if (self.MULTI_HOSTER + and (self.__pattern != self.core.pluginManager.hosterPlugins[self.__name]['pattern'] + or re.match(self.__pattern, self.pyfile.url) is None)): + + self.logInfo("Multi hoster detected") + + if self.account: + self.multihost = True + return + else: + self.fail(_("Only registered or premium users can use url leech feature")) + + if self.CHECK_DIRECT_LINK is None: + self.directDL = bool(self.account) + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def process(self, pyfile): + self.prepare() + + if self.multihost: + self.logDebug("Looking for leeched download link...") + self.handleMulti() + + elif self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect() + + if not self.link: + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkErrors() + + premium_only = 'error' in self.info and self.info['error'] == "premium-only" + + self._updateInfo(self.getInfo(pyfile.url, self.html)) + + self.checkNameSize() + + #: Usually premium only pages doesn't show any file information + if not premium_only: + self.checkStatus() + + if self.premium and (not self.FORCE_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium() + + elif premium_only: + self.fail(_("Link require a premium account to be handled")) + + else: + self.logDebug("Handled as free download") + self.handleFree() + + self.downloadLink(self.link) + self.checkFile() + + + def downloadLink(self, link): + if not link: + return + + self.download(link, disposition=True) + + + def checkFile(self): + if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10 + self.fail(_("Empty file")) + + + def checkErrors(self): + if hasattr(self, 'ERROR_PATTERN'): + m = re.search(self.ERROR_PATTERN, self.html) + if m: + errmsg = self.info['error'] = m.group(1) + self.error(errmsg) + + if hasattr(self, 'PREMIUM_ONLY_PATTERN'): + m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if m: + self.info['error'] = "premium-only" + return + + if hasattr(self, 'WAIT_PATTERN'): + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) + self.wait(wait_time, False) + return + + self.info.pop('error', None) + + + def checkStatus(self): + status = self.info['status'] + + if status is 1: + self.offline() + + elif status is 6: + self.tempOffline() + + elif status is not 2: + self.logInfo(_("File status: %s") % statusMap[status], + _("File info: %s") % self.info) + self.error(_("No file info retrieved")) + + + def checkNameSize(self): + name = self.info['name'] + size = self.info['size'] + url = self.info['url'] + + if name and name != url: + self.pyfile.name = name + else: + self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] + + if size > 0: + self.pyfile.size = size + else: + size = "Unknown" + + self.logDebug("File name: %s" % name, + "File size: %s" % size) + + + def checkInfo(self): + self.checkErrors() + + self._updateInfo(self.getInfo(self.pyfile.url, self.html or "")) + + self.checkNameSize() + self.checkStatus() + + + #: Deprecated + def getFileInfo(self): + self.info = {} + self.checkInfo() + return self.info + + + def _updateInfo(self, info): + self.logDebug(_("File info (before update): %s") % self.info) + self.info.update(info) + self.logDebug(_("File info (after update): %s") % self.info) + + + def handleDirect(self): + link = _isDirectLink(self, self.pyfile.url, self.resumeDownload) + + if link: + self.logInfo(_("Direct download link detected")) + + self.link = link + + self._updateInfo(self.getInfo(self.pyfile.url)) + self.checkNameSize() + else: + self.logDebug(_("Direct download link not found")) + + + def handleMulti(self): #: Multi-hoster handler + pass + + + def handleFree(self): + if not hasattr(self, 'LINK_FREE_PATTERN'): + self.fail(_("Free download not implemented")) + + try: + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(e) + + + def handlePremium(self): + if not hasattr(self, 'LINK_PREMIUM_PATTERN'): + self.fail(_("Premium download not implemented")) + + try: + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + + self.link = m.group(1) + + except Exception, e: + self.fail(e) + + + def longWait(self, wait_time=None, max_tries=3): + if wait_time and isinstance(wait_time, (int, long, float)): + time_str = "%dh %dm" % divmod(wait_time / 60, 60) + else: + wait_time = 900 + time_str = _("(unknown time)") + max_tries = 100 + + self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) + + self.setWait(wait_time, True) + self.wait() + self.retry(max_tries=max_tries, reason=_("Download limit reached")) + + + def parseHtmlForm(self, attr_str="", input_names={}): + return parseHtmlForm(attr_str, self.html, input_names) + + + def checkTrafficLeft(self): + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + + if traffic is None: + return False + elif traffic == -1: + return True + else: + self.logInfo(_("Filesize: %s, Traffic left for user %s: %s") % (formatSize(size), self.user, formatSize(traffic))) + return self.pyfile.size <= traffic + + + def error(self, reason="", type="parse"): + return super(SimpleHoster, self).error(self, reason, type) diff --git a/pyload/plugin/internal/UnRar.py b/pyload/plugin/internal/UnRar.py new file mode 100644 index 000000000..404ee906b --- /dev/null +++ b/pyload/plugin/internal/UnRar.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from glob import glob +from os.path import basename, join +from string import digits +from subprocess import Popen, PIPE + +from pyload.plugin.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from pyload.utils import safe_join, decode + + +def renice(pid, value): + if os.name != "nt" and value: + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except Exception: + print "Renice failed" + + +class UnRar(AbtractExtractor): + __name = "UnRar" + __version = "0.19" + + __description = """Rar extractor plugin""" + __license = "GPLv3" + __authors = [("RaNaN", "RaNaN@pyload.org")] + + + CMD = "unrar" + + # there are some more uncovered rar formats + re_version = re.compile(r'UNRAR ([\w .]+?) freeware') + re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) + re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I) + re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+') + re_filelist5 = re.compile(r'(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') + re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I) + + + @staticmethod + def checkDeps(): + if os.name == "nt": + UnRar.CMD = join(pypath, "UnRAR.exe") + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + else: + try: + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + except OSError: + + # fallback to rar + UnRar.CMD = "rar" + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + + return True + + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if not file.endswith(".rar"): + continue + + match = UnRar.re_splitfile.findall(file) + if match: + # only add first parts + if int(match[0][1]) == 1: + result.append((file, id)) + else: + result.append((file, id)) + + return result + + + def init(self): + self.passwordProtected = False + self.headerProtected = False #: list files will not work without password + self.smallestFile = None #: small file to test passwords + self.password = "" #: save the correct password + + + def checkArchive(self): + p = self.call_unrar("l", "-v", self.file) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + self.passwordProtected = True + self.headerProtected = True + return True + + # output only used to check if passworded files are present + if self.re_version.search(out): + for attr, size, name in self.re_filelist5.findall(out): + if attr.startswith("*"): + self.passwordProtected = True + return True + else: + for name, size, packed in self.re_filelist.findall(out): + if name.startswith("*"): + self.passwordProtected = True + return True + + self.listContent() + if not self.files: + raise ArchiveError("Empty Archive") + + return False + + + def checkPassword(self, password): + # at this point we can only verify header protected files + if self.headerProtected: + p = self.call_unrar("l", "-v", self.file, password=password) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + return False + + return True + + + def extract(self, progress, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_unrar(command, self.file, self.out, password=password) + renice(p.pid, self.renice) + + progress(0) + progressstring = "" + while True: + c = p.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + progress(int(progressstring)) + progressstring = "" + # not reading a digit -> therefore restart + elif c not in digits: + progressstring = "" + # add digit to progressstring + else: + progressstring = progressstring + c + progress(100) + + # retrieve stderr + err = p.stderr.read() + + if "CRC failed" in err and not password and not self.passwordProtected: + raise CRCError + elif "CRC failed" in err: + raise WrongPassword + if err.strip(): #: raise error if anything is on stderr + raise ArchiveError(err.strip()) + if p.returncode: + raise ArchiveError("Process terminated") + + if not self.files: + self.password = password + self.listContent() + + + def getDeleteFiles(self): + if ".part" in basename(self.file): + return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + # get files which matches .r* and filter unsuited files out + parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) + return filter(lambda x: self.re_partfiles.match(x), parts) + + + def listContent(self): + command = "vb" if self.fullpath else "lb" + p = self.call_unrar(command, "-v", self.file, password=self.password) + out, err = p.communicate() + + if "Cannot open" in err: + raise ArchiveError("Cannot open file") + + if err.strip(): #: only log error at this point + self.m.logError(err.strip()) + + result = set() + + for f in decode(out).splitlines(): + f = f.strip() + result.add(safe_join(self.out, f)) + + self.files = result + + + def call_unrar(self, command, *xargs, **kwargs): + args = [] + # overwrite flag + args.append("-o+") if self.overwrite else args.append("-o-") + + if self.excludefiles: + for word in self.excludefiles.split(';'): + args.append("-x%s" % word) + + # assume yes on all queries + args.append("-y") + + # set a password + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) + else: + args.append("-p-") + + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + self.m.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + + return p diff --git a/pyload/plugin/internal/UnZip.py b/pyload/plugin/internal/UnZip.py new file mode 100644 index 000000000..8cf71f659 --- /dev/null +++ b/pyload/plugin/internal/UnZip.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +import sys +import zipfile + +from pyload.plugin.internal.AbstractExtractor import AbtractExtractor + + +class UnZip(AbtractExtractor): + __name = "UnZip" + __version = "0.10" + + __description = """Zip extractor plugin""" + __license = "GPLv3" + __authors = [("RaNaN", "RaNaN@pyload.org")] + + + @staticmethod + def checkDeps(): + return sys.version_info[:2] >= (2, 6) + + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if file.endswith(".zip"): + result.append((file, id)) + + return result + + + def extract(self, progress, password=None): + z = zipfile.ZipFile(self.file) + self.files = z.namelist() + z.extractall(self.out) + + + def getDeleteFiles(self): + return [self.file] diff --git a/pyload/plugin/internal/UpdateManager.py b/pyload/plugin/internal/UpdateManager.py new file mode 100644 index 000000000..0c40b4192 --- /dev/null +++ b/pyload/plugin/internal/UpdateManager.py @@ -0,0 +1,300 @@ +# -*- coding: utf-8 -*- + +import re +import sys + +from operator import itemgetter +from os import path, remove, stat + +from pyload.network.RequestFactory import getURL +from pyload.plugin.Addon import Expose, Addon, threaded +from pyload.utils import safe_join + + +class UpdateManager(Addon): + __name = "UpdateManager" + __type = "addon" + __version = "0.40" + + __config = [("activated" , "bool" , "Activated" , True ), + ("mode" , "pyLoad + plugins;plugins only", "Check updates for" , "pyLoad + plugins"), + ("interval" , "int" , "Check interval in hours" , 8 ), + ("reloadplugins", "bool" , "Monitor plugins for code changes (debug mode only)", True ), + ("nodebugupdate", "bool" , "Don't check for updates in debug mode" , True )] + + __description = """Check for updates""" + __license = "GPLv3" + __authors = [("Walter Purcaro", "vuolter@gmail.com")] + + + # event_list = ["pluginConfigChanged"] + + SERVER_URL = "http://updatemanager.pyload.org" + MIN_INTERVAL = 6 * 60 * 60 #: 6h minimum check interval (value is in seconds) + + + def pluginConfigChanged(self, plugin, name, value): + if name == "interval": + interval = value * 60 * 60 + if self.MIN_INTERVAL <= interval != self.interval: + self.core.scheduler.removeJob(self.cb) + self.interval = interval + self.initPeriodical() + else: + self.logDebug("Invalid interval value, kept current") + + elif name == "reloadplugins": + if self.cb2: + self.core.scheduler.removeJob(self.cb2) + if value is True and self.core.debug: + self.periodical2() + + + def activate(self): + self.pluginConfigChanged(self.__name, "interval", self.getConfig("interval")) + x = lambda: self.pluginConfigChanged(self.__name, "reloadplugins", self.getConfig("reloadplugins")) + self.core.scheduler.addJob(10, x, threaded=False) + + + def deactivate(self): + self.pluginConfigChanged(self.__name, "reloadplugins", False) + + + def setup(self): + self.cb2 = None + self.interval = self.MIN_INTERVAL + self.updating = False + self.info = {'pyload': False, 'version': None, 'plugins': False} + self.mtimes = {} #: store modification time for each plugin + + + def periodical2(self): + if not self.updating: + self.autoreloadPlugins() + + self.cb2 = self.core.scheduler.addJob(4, self.periodical2, threaded=False) + + + @Expose + def autoreloadPlugins(self): + """ reload and reindex all modified plugins """ + modules = filter( + lambda m: m and (m.__name.startswith("pyload.plugin.") or + m.__name.startswith("userplugins.")) and + m.__name.count(".") >= 2, sys.modules.itervalues() + ) + + reloads = [] + + for m in modules: + root, type, name = m.__name.rsplit(".", 2) + id = (type, name) + if type in self.core.pluginManager.plugins: + f = m.__file__.replace(".pyc", ".py") + if not path.isfile(f): + continue + + mtime = stat(f).st_mtime + + if id not in self.mtimes: + self.mtimes[id] = mtime + elif self.mtimes[id] < mtime: + reloads.append(id) + self.mtimes[id] = mtime + + return True if self.core.pluginManager.reloadPlugins(reloads) else False + + + def periodical(self): + if self.info['pyload'] or self.getConfig("nodebugupdate") and self.core.debug: + return + + self.updateThread() + + + def server_request(self): + try: + return getURL(self.SERVER_URL, get={'v': self.core.api.getServerVersion()}).splitlines() + except Exception: + self.logWarning(_("Unable to contact server to get updates")) + + + @threaded + def updateThread(self): + self.updating = True + + status = self.update(onlyplugin=self.getConfig("mode") == "plugins only") + + if status == 2: + self.core.api.restart() + else: + self.updating = False + + + @Expose + def updatePlugins(self): + """ simple wrapper for calling plugin update quickly """ + return self.update(onlyplugin=True) + + + @Expose + def update(self, onlyplugin=False): + """ check for updates """ + data = self.server_request() + + if not data: + exitcode = 0 + + elif data[0] == "None": + self.logInfo(_("No new pyLoad version available")) + updates = data[1:] + exitcode = self._updatePlugins(updates) + + elif onlyplugin: + exitcode = 0 + + else: + newversion = data[0] + self.logInfo(_("*** New pyLoad Version %s available ***") % newversion) + self.logInfo(_("*** Get it here: https://github.com/pyload/pyload/releases ***")) + exitcode = 3 + self.info['pyload'] = True + self.info['version'] = newversion + + return exitcode #: 0 = No plugins updated; 1 = Plugins updated; 2 = Plugins updated, but restart required; 3 = No plugins updated, new pyLoad version available + + + def _updatePlugins(self, updates): + """ check for plugin updates """ + + if self.info['plugins']: + return False #: plugins were already updated + + exitcode = 0 + updated = [] + + vre = re.compile(r'__version.*=.*("|\')([\d.]+)') + url = updates[0] + schema = updates[1].split('|') + + if "BLACKLIST" in updates: + blacklist = updates[updates.index('BLACKLIST') + 1:] + updates = updates[2:updates.index('BLACKLIST')] + else: + blacklist = None + updates = updates[2:] + + upgradable = sorted(map(lambda x: dict(zip(schema, x.split('|'))), updates), + key=itemgetter("type", "name")) + + for plugin in upgradable: + filename = plugin['name'] + type = plugin['type'] + version = plugin['version'] + + if filename.endswith(".pyc"): + name = filename[:filename.find("_")] + else: + name = filename.replace(".py", "") + + plugins = getattr(self.core.pluginManager, "%sPlugins" % type) + + oldver = float(plugins[name]['version']) if name in plugins else None + newver = float(version) + + if not oldver: + msg = "New plugin: [%(type)s] %(name)s (v%(newver).2f)" + elif newver > oldver: + msg = "New version of plugin: [%(type)s] %(name)s (v%(oldver).2f -> v%(newver).2f)" + else: + continue + + self.logInfo(_(msg) % {'type' : type, + 'name' : name, + 'oldver': oldver, + 'newver': newver}) + try: + content = getURL(url % plugin) + m = vre.search(content) + + if m and m.group(2) == version: + f = open(safe_join("userplugins", prefix, filename), "wb") + f.write(content) + f.close() + updated.append((prefix, name)) + else: + raise Exception, _("Version mismatch") + + except Exception, e: + self.logError(_("Error updating plugin %s") % filename, e) + + if blacklist: + blacklisted = map(lambda x: (x.split('|')[0], x.split('|')[1].rsplit('.', 1)[0]), blacklist) + + # Always protect internal plugins from removing + for i, n, t in blacklisted.enumerate(): + if t == "internal": + del blacklisted[i] + + blacklisted = sorted(blacklisted) + removed = self.removePlugins(blacklisted) + for t, n in removed: + self.logInfo(_("Removed blacklisted plugin [%(type)s] %(name)s") % { + 'type': t, + 'name': n, + }) + + if updated: + reloaded = self.core.pluginManager.reloadPlugins(updated) + if reloaded: + self.logInfo(_("Plugins updated and reloaded")) + exitcode = 1 + else: + self.logInfo(_("*** Plugins have been updated, but need a pyLoad restart to be reloaded ***")) + self.info['plugins'] = True + exitcode = 2 + else: + self.logInfo(_("No plugin updates available")) + + return exitcode #: 0 = No plugins updated; 1 = Plugins updated; 2 = Plugins updated, but restart required + + + @Expose + def removePlugins(self, type_plugins): + """ delete plugins from disk """ + + if not type_plugins: + return + + self.logDebug("Requested deletion of plugins: %s" % type_plugins) + + removed = [] + + for type, name in type_plugins: + err = False + file = name + ".py" + + for root in ("userplugins", path.join(pypath, "pyload", "plugins")): + + filename = safe_join(root, type, file) + try: + remove(filename) + except Exception, e: + self.logDebug("Error deleting: %s" % path.basename(filename), e) + err = True + + filename += "c" + if path.isfile(filename): + try: + if type == "addon": + self.manager.deactivateAddon(name) + remove(filename) + except Exception, e: + self.logDebug("Error deleting: %s" % path.basename(filename), e) + err = True + + if not err: + id = (type, name) + removed.append(id) + + return removed #: return a list of the plugins successfully removed diff --git a/pyload/plugin/internal/XFSAccount.py b/pyload/plugin/internal/XFSAccount.py new file mode 100644 index 000000000..388cda26c --- /dev/null +++ b/pyload/plugin/internal/XFSAccount.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- + +import re + +from time import gmtime, mktime, strptime +from urlparse import urljoin + +from pyload.plugin.Account import Account +from pyload.plugin.internal.SimpleHoster import parseHtmlForm, set_cookies + + +class XFSAccount(Account): + __name = "XFSAccount" + __type = "account" + __version = "0.32" + + __description = """XFileSharing account plugin""" + __license = "GPLv3" + __authors = [("zoidberg", "zoidberg@mujmail.cz"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_URL = None + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + PREMIUM_PATTERN = r'\(Premium only\)' + + VALID_UNTIL_PATTERN = r'Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})' + + TRAFFIC_LEFT_PATTERN = r'Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' + TRAFFIC_LEFT_UNIT = "MB" #: used only if no group <U> was found + + LEECH_TRAFFIC_PATTERN = r'Leech Traffic left:<b>.*?(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>' + LEECH_TRAFFIC_UNIT = "MB" #: used only if no group <U> was found + + LOGIN_FAIL_PATTERN = r'>\s*(Incorrect Login or Password|Error<)' + + + def init(self): + # if not self.HOSTER_DOMAIN: + # self.fail(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_URL: + self.HOSTER_URL = "http://www.%s/" % self.HOSTER_DOMAIN + + + def loadAccountInfo(self, user, req): + validuntil = None + trafficleft = None + leechtraffic = None + premium = None + + html = req.load(self.HOSTER_URL, get={'op': "my_account"}, decode=True) + + premium = True if re.search(self.PREMIUM_PATTERN, html) else False + + m = re.search(self.VALID_UNTIL_PATTERN, html) + if m: + expiredate = m.group(1).strip() + self.logDebug("Expire date: " + expiredate) + + try: + validuntil = mktime(strptime(expiredate, "%d %B %Y")) + + except Exception, e: + self.logError(e) + + else: + self.logDebug("Valid until: %s" % validuntil) + + if validuntil > mktime(gmtime()): + premium = True + trafficleft = -1 + else: + premium = False + validuntil = None #: registered account type (not premium) + else: + self.logDebug("VALID_UNTIL_PATTERN not found") + + m = re.search(self.TRAFFIC_LEFT_PATTERN, html) + if m: + try: + traffic = m.groupdict() + size = traffic['S'] + + if "nlimited" in size: + trafficleft = -1 + if validuntil is None: + validuntil = -1 + else: + if 'U' in traffic: + unit = traffic['U'] + elif isinstance(self.TRAFFIC_LEFT_UNIT, basestring): + unit = self.TRAFFIC_LEFT_UNIT + else: + unit = "" + + trafficleft = self.parseTraffic(size + unit) + + except Exception, e: + self.logError(e) + else: + self.logDebug("TRAFFIC_LEFT_PATTERN not found") + + leech = [m.groupdict() for m in re.finditer(self.LEECH_TRAFFIC_PATTERN, html)] + if leech: + leechtraffic = 0 + try: + for traffic in leech: + size = traffic['S'] + + if "nlimited" in size: + leechtraffic = -1 + if validuntil is None: + validuntil = -1 + break + else: + if 'U' in traffic: + unit = traffic['U'] + elif isinstance(self.LEECH_TRAFFIC_UNIT, basestring): + unit = self.LEECH_TRAFFIC_UNIT + else: + unit = "" + + leechtraffic += self.parseTraffic(size + unit) + + except Exception, e: + self.logError(e) + else: + self.logDebug("LEECH_TRAFFIC_PATTERN not found") + + return {'validuntil': validuntil, 'trafficleft': trafficleft, 'leechtraffic': leechtraffic, 'premium': premium} + + + def login(self, user, data, req): + if isinstance(self.COOKIES, list): + set_cookies(req.cj, self.COOKIES) + + url = urljoin(self.HOSTER_URL, "login.html") + html = req.load(url, decode=True) + + action, inputs = parseHtmlForm('name="FL"', html) + if not inputs: + inputs = {'op': "login", + 'redirect': self.HOSTER_URL} + + inputs.update({'login': user, + 'password': data['password']}) + + html = req.load(self.HOSTER_URL, post=inputs, decode=True) + + if re.search(self.LOGIN_FAIL_PATTERN, html): + self.wrongPassword() diff --git a/pyload/plugin/internal/XFSCrypter.py b/pyload/plugin/internal/XFSCrypter.py new file mode 100644 index 000000000..6440d328a --- /dev/null +++ b/pyload/plugin/internal/XFSCrypter.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + +from pyload.plugin.internal.SimpleCrypter import SimpleCrypter + + +class XFSCrypter(SimpleCrypter): + __name = "XFSCrypter" + __type = "crypter" + __version = "0.04" + + __pattern = r'^unmatchable$' + + __description = """XFileSharing decrypter plugin""" + __license = "GPLv3" + __authors = [("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + URL_REPLACEMENTS = [(r'&?per_page=\d+', ""), (r'[?/&]+$', ""), (r'(.+/[^?]+)$', r'\1?'), (r'$', r'&per_page=10000')] + + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + + LINK_PATTERN = r'<(?:td|TD).*?>\s*<a href="(.+?)".*?>.+?(?:</a>)?\s*</(?:td|TD)>' + NAME_PATTERN = r'<[tT]itle>.*?\: (?P<N>.+) folder</[tT]itle>' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' diff --git a/pyload/plugin/internal/XFSHoster.py b/pyload/plugin/internal/XFSHoster.py new file mode 100644 index 000000000..dfc923f69 --- /dev/null +++ b/pyload/plugin/internal/XFSHoster.py @@ -0,0 +1,339 @@ +# -*- coding: utf-8 -*- + +import re + +from random import random +from time import sleep + +from pyload.plugin.hoster.UnrestrictLi import secondsToMidnight +from pyload.plugin.internal.captcha import ReCaptcha, SolveMedia +from pyload.plugin.internal.SimpleHoster import SimpleHoster, create_getInfo +from pyload.utils import html_unescape + + +class XFSHoster(SimpleHoster): + __name = "XFSHoster" + __type = "hoster" + __version = "0.27" + + __pattern = r'^unmatchable$' + + __description = """XFileSharing hoster plugin""" + __license = "GPLv3" + __authors = [("zoidberg", "zoidberg@mujmail.cz"), + ("stickell", "l.stickell@yahoo.it"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + HOSTER_DOMAIN = None + HOSTER_NAME = None + + TEXT_ENCODING = False + COOKIES = [(HOSTER_DOMAIN, "lang", "english")] + CHECK_DIRECT_LINK = None + MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + + NAME_PATTERN = r'(>Filename:</b></td><td nowrap>|name="fname" value="|<span class="name">)(?P<N>.+?)(\s*<|")' + SIZE_PATTERN = r'(>Size:</b></td><td>|>File:.*>|<span class="size">)(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' + + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' + TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' + + WAIT_PATTERN = r'<span id="countdown_str">.*?>(\d+)</span>|id="countdown" value=".*?(\d+).*?"' + PREMIUM_ONLY_PATTERN = r'>This file is available for Premium Users only' + ERROR_PATTERN = r'(?:class=["\']err["\'].*?>|<[Cc]enter><b>|>Error</td>|>\(ERROR:)(?:\s*<.+?>\s*)*(.+?)(?:["\']|<|\))' + + LEECH_LINK_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)' + LINK_PATTERN = None #: final download url pattern + + CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)' + CAPTCHA_BLOCK_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>' + RECAPTCHA_PATTERN = None + SOLVEMEDIA_PATTERN = None + + FORM_PATTERN = None + FORM_INPUTS_MAP = None #: dict passed as input_names to parseHtmlForm + + + def setup(self): + self.chunkLimit = 1 + self.resumeDownload = self.multiDL = self.premium + + + def prepare(self): + """ Initialize important variables """ + if not self.HOSTER_DOMAIN: + self.fail(_("Missing HOSTER_DOMAIN")) + + if not self.HOSTER_NAME: + self.HOSTER_NAME = "".join([str.capitalize() for str in self.HOSTER_DOMAIN.split('.')]) + + if not self.LINK_PATTERN: + pattern = r'(https?://(www\.)?([^/]*?%s|\d+\.\d+\.\d+\.\d+)(\:\d+)?(/d/|(/files)?/\d+/\w+/).+?)["\'<]' + self.LINK_PATTERN = pattern % self.HOSTER_DOMAIN.replace('.', '\.') + + self.captcha = None + self.errmsg = None + self.passwords = self.getPassword().splitlines() + + super(XFSHoster, self).prepare() + + if self.CHECK_DIRECT_LINK is None: + self.directDL = bool(self.premium) + + + def handleFree(self): + link = self.getDownloadLink() + + if link: + if self.captcha: + self.correctCaptcha() + + self.download(link, ref=True, cookies=True, disposition=True) + + elif self.errmsg: + if 'captcha' in self.errmsg: + self.fail(_("No valid captcha code entered")) + else: + self.fail(self.errmsg) + + else: + self.fail(_("Download link not found")) + + + def handlePremium(self): + return self.handleFree() + + + def getDownloadLink(self): + for i in xrange(1, 6): + self.logDebug("Getting download link: #%d" % i) + + self.checkErrors() + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + + data = self.getPostParameters() + + self.html = self.load(self.pyfile.url, post=data, ref=True, decode=True, follow_location=False) + + m = re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I) + if m and not "op=" in m.group(1): + break + + m = re.search(self.LINK_PATTERN, self.html, re.S) + if m: + break + else: + self.logError(data['op'] if 'op' in data else _("UNKNOWN")) + return "" + + self.errmsg = None + + return m.group(1) + + + def handleMulti(self): + #only tested with easybytez.com + self.html = self.load("http://www.%s/" % self.HOSTER_DOMAIN) + + action, inputs = self.parseHtmlForm() + + upload_id = "%012d" % int(random() * 10 ** 12) + action += upload_id + "&js_on=1&utype=prem&upload_type=url" + + inputs['tos'] = '1' + inputs['url_mass'] = self.pyfile.url + inputs['up1oad_type'] = 'url' + + self.logDebug(action, inputs) + + self.req.setOption("timeout", 600) #: wait for file to upload to easybytez.com + + self.html = self.load(action, post=inputs) + + self.checkErrors() + + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + stmsg = inputs['st'] + + if stmsg == 'OK': + self.html = self.load(action, post=inputs) + + elif 'Can not leech file' in stmsg: + self.retry(20, 3 * 60, _("Can not leech file")) + + elif 'today' in stmsg: + self.retry(wait_time=secondsToMidnight(gmt=2), reason=_("You've used all Leech traffic today")) + + else: + self.fail(stmsg) + + #get easybytez.com link for uploaded file + m = re.search(self.LEECH_LINK_PATTERN, self.html) + if m is None: + self.error(_("LEECH_LINK_PATTERN not found")) + + header = self.load(m.group(1), just_header=True, decode=True) + + if 'location' in header: #: Direct download link + self.link = header['location'] + else: + self.fail(_("Download link not found")) + + + def checkErrors(self): + m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if m: + self.info['error'] = "premium-only" + return + + m = re.search(self.ERROR_PATTERN, self.html) + + if m is None: + self.errmsg = None + else: + self.errmsg = m.group(1).strip() + + self.logWarning(re.sub(r"<.*?>", " ", self.errmsg)) + + if 'wait' in self.errmsg: + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', self.errmsg, re.I)]) + self.wait(wait_time, True) + + elif 'country' in self.errmsg: + self.fail(_("Downloads are disabled for your country")) + + elif 'captcha' in self.errmsg: + self.invalidCaptcha() + + elif 'premium' in self.errmsg and 'require' in self.errmsg: + self.fail(_("File can be downloaded by premium users only")) + + elif 'limit' in self.errmsg: + if 'days' in self.errmsg: + delay = secondsToMidnight(gmt=2) + retries = 3 + else: + delay = 1 * 60 * 60 + retries = 24 + + self.wantReconnect = True + self.retry(retries, delay, _("Download limit exceeded")) + + elif 'countdown' in self.errmsg or 'Expired' in self.errmsg: + self.retry(reason=_("Link expired")) + + elif 'maintenance' in self.errmsg or 'maintainance' in self.errmsg: + self.tempOffline() + + elif 'download files up to' in self.errmsg: + self.fail(_("File too large for free download")) + + else: + self.wantReconnect = True + self.retry(wait_time=60, reason=self.errmsg) + + if self.errmsg: + self.info['error'] = self.errmsg + else: + self.info.pop('error', None) + + + def getPostParameters(self): + if self.FORM_PATTERN or self.FORM_INPUTS_MAP: + action, inputs = self.parseHtmlForm(self.FORM_PATTERN or "", self.FORM_INPUTS_MAP or {}) + else: + action, inputs = self.parseHtmlForm(input_names={'op': re.compile(r'^download')}) + + if not inputs: + action, inputs = self.parseHtmlForm('F1') + if not inputs: + if self.errmsg: + self.retry(reason=self.errmsg) + else: + self.error(_("TEXTAREA F1 not found")) + + self.logDebug(inputs) + + if 'op' in inputs: + if "password" in inputs: + if self.passwords: + inputs['password'] = self.passwords.pop(0) + else: + self.fail(_("Missing password")) + + if not self.premium: + m = re.search(self.WAIT_PATTERN, self.html) + if m: + wait_time = int(m.group(1)) + self.setWait(wait_time, False) + + self.captcha = self.handleCaptcha(inputs) + + self.wait() + else: + inputs['referer'] = self.pyfile.url + + if self.premium: + inputs['method_premium'] = "Premium Download" + inputs.pop('method_free', None) + else: + inputs['method_free'] = "Free Download" + inputs.pop('method_premium', None) + + return inputs + + + def handleCaptcha(self, inputs): + m = re.search(self.CAPTCHA_PATTERN, self.html) + if m: + captcha_url = m.group(1) + inputs['code'] = self.decryptCaptcha(captcha_url) + return 1 + + m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) + if m: + captcha_div = m.group(1) + numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div)) + self.logDebug(captcha_div) + inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + self.logDebug("Captcha code: %s" % inputs['code'], numerals) + return 2 + + recaptcha = ReCaptcha(self) + try: + captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) + except Exception: + captcha_key = recaptcha.detect_key() + else: + self.logDebug("ReCaptcha key: %s" % captcha_key) + + if captcha_key: + inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + return 3 + + solvemedia = SolveMedia(self) + try: + captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) + except Exception: + captcha_key = solvemedia.detect_key() + else: + self.logDebug("SolveMedia key: %s" % captcha_key) + + if captcha_key: + inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) + return 4 + + return 0 diff --git a/pyload/plugin/internal/__init__.py b/pyload/plugin/internal/__init__.py new file mode 100644 index 000000000..40a96afc6 --- /dev/null +++ b/pyload/plugin/internal/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- |