diff options
Diffstat (limited to 'pyload/plugins/internal')
-rw-r--r-- | pyload/plugins/internal/AbstractExtractor.py | 101 | ||||
-rw-r--r-- | pyload/plugins/internal/CaptchaService.py | 96 | ||||
-rw-r--r-- | pyload/plugins/internal/DeadCrypter.py | 19 | ||||
-rw-r--r-- | pyload/plugins/internal/DeadHoster.py | 27 | ||||
-rw-r--r-- | pyload/plugins/internal/MultiHoster.py | 192 | ||||
-rw-r--r-- | pyload/plugins/internal/SimpleCrypter.py | 118 | ||||
-rw-r--r-- | pyload/plugins/internal/SimpleHoster.py | 292 | ||||
-rw-r--r-- | pyload/plugins/internal/UnRar.py | 212 | ||||
-rw-r--r-- | pyload/plugins/internal/UnZip.py | 38 | ||||
-rw-r--r-- | pyload/plugins/internal/XFSPAccount.py | 69 | ||||
-rw-r--r-- | pyload/plugins/internal/__init__.py | 0 |
11 files changed, 1164 insertions, 0 deletions
diff --git a/pyload/plugins/internal/AbstractExtractor.py b/pyload/plugins/internal/AbstractExtractor.py new file mode 100644 index 000000000..d1d1a09cb --- /dev/null +++ b/pyload/plugins/internal/AbstractExtractor.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +class ArchiveError(Exception): + pass + + +class CRCError(Exception): + pass + + +class WrongPassword(Exception): + pass + + +class AbtractExtractor: + __name__ = "AbtractExtractor" + __version__ = "0.1" + + __description__ = """Abtract extractor plugin""" + __author_name__ = "pyLoad Team" + __author_mail__ = "admin@pyload.org" + + + @staticmethod + def checkDeps(): + """ Check if system statisfy dependencies + :return: boolean + """ + return True + + @staticmethod + def getTargets(files_ids): + """ Filter suited targets from list of filename id tuple list + :param files_ids: List of filepathes + :return: List of targets, id tuple list + """ + raise NotImplementedError + + def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): + """Initialize extractor for specific file + + :param m: ExtractArchive Hook plugin + :param file: Absolute filepath + :param out: Absolute path to destination directory + :param fullpath: extract to fullpath + :param overwrite: Overwrite existing archives + :param renice: Renice value + """ + self.m = m + self.file = file + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.excludefiles = excludefiles + self.renice = renice + self.files = [] #: Store extracted files here + + def init(self): + """ Initialize additional data structures """ + pass + + def checkArchive(self): + """Check if password if needed. Raise ArchiveError if integrity is + questionable. + + :return: boolean + :raises ArchiveError + """ + return False + + def checkPassword(self, password): + """ Check if the given password is/might be correct. + If it can not be decided at this point return true. + + :param password: + :return: boolean + """ + return True + + def extract(self, progress, password=None): + """Extract the archive. Raise specific errors in case of failure. + + :param progress: Progress function, call this to update status + :param password password to use + :raises WrongPassword + :raises CRCError + :raises ArchiveError + :return: + """ + raise NotImplementedError + + def getDeleteFiles(self): + """Return list of files to delete, do *not* delete them here. + + :return: List with paths of files to delete + """ + raise NotImplementedError + + def getExtractedFiles(self): + """Populate self.files at some point while extracting""" + return self.files diff --git a/pyload/plugins/internal/CaptchaService.py b/pyload/plugins/internal/CaptchaService.py new file mode 100644 index 000000000..b247ba654 --- /dev/null +++ b/pyload/plugins/internal/CaptchaService.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import re + +from random import random + + +class CaptchaService: + __name__ = "CaptchaService" + __version__ = "0.05" + + __description__ = """Captcha service plugin""" + __author_name__ = "pyLoad Team" + __author_mail__ = "admin@pyload.org" + + + def __init__(self, plugin): + self.plugin = plugin + + +class ReCaptcha: + RECAPTCHA_KEY_PATTERN = r"https?://(?:www\.)?google\.com/recaptcha/api/challenge\?k=(?P<key>\w+)" + RECAPTCHA_KEY_AJAX_PATTERN = r"Recaptcha\.create\s*\(\s*[\"'](?P<key>\w+)[\"']\s*," + + recaptcha_key = None + + + def __init__(self, plugin): + self.plugin = plugin + + def detect_key(self, html): + m = re.search(self.RECAPTCHA_KEY_PATTERN, html) + if m is None: + m = re.search(self.RECAPTCHA_KEY_AJAX_PATTERN, html) + if m: + self.recaptcha_key = m.group('key') + return self.recaptcha_key + else: + return None + + def challenge(self, key=None): + if key is None and self.recaptcha_key: + key = self.recaptcha_key + else: + raise TypeError("ReCaptcha key not found") + + js = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={"k": key}, cookies=True) + + try: + challenge = re.search("challenge : '(.*?)',", js).group(1) + server = re.search("server : '(.*?)',", js).group(1) + except: + self.plugin.fail("recaptcha error") + result = self.result(server, challenge) + + return challenge, result + + def result(self, server, challenge): + return self.plugin.decryptCaptcha("%simage" % server, get={"c": challenge}, + cookies=True, forceUser=True, imgtype="jpg") + + +class AdsCaptcha(CaptchaService): + + def challenge(self, src): + js = self.plugin.req.load(src, cookies=True) + + try: + challenge = re.search("challenge: '(.*?)',", js).group(1) + server = re.search("server: '(.*?)',", js).group(1) + except: + self.plugin.fail("adscaptcha error") + result = self.result(server, challenge) + + return challenge, result + + def result(self, server, challenge): + return self.plugin.decryptCaptcha("%sChallenge.aspx" % server, get={"cid": challenge, "dummy": random()}, + cookies=True, imgtype="jpg") + + +class SolveMedia(CaptchaService): + + def challenge(self, src): + html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript?k=%s" % src, cookies=True) + try: + challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="([^"]+)">', + html).group(1) + except: + self.plugin.fail("solvemedia error") + result = self.result(challenge) + + return challenge, result + + def result(self, challenge): + return self.plugin.decryptCaptcha("http://api.solvemedia.com/papi/media?c=%s" % challenge, imgtype="gif") diff --git a/pyload/plugins/internal/DeadCrypter.py b/pyload/plugins/internal/DeadCrypter.py new file mode 100644 index 000000000..ea9c414cb --- /dev/null +++ b/pyload/plugins/internal/DeadCrypter.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.Crypter import Crypter as _Crypter + + +class DeadCrypter(_Crypter): + __name__ = "DeadCrypter" + __type__ = "crypter" + __version__ = "0.01" + + __pattern__ = None + + __description__ = """Crypter is no longer available""" + __author_name__ = "stickell" + __author_mail__ = "l.stickell@yahoo.it" + + + def setup(self): + self.fail("Crypter is no longer available") diff --git a/pyload/plugins/internal/DeadHoster.py b/pyload/plugins/internal/DeadHoster.py new file mode 100644 index 000000000..0b2398020 --- /dev/null +++ b/pyload/plugins/internal/DeadHoster.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +from pyload.plugins.Hoster import Hoster as _Hoster + + +def create_getInfo(plugin): + + def getInfo(urls): + yield [('#N/A: ' + url, 0, 1, url) for url in urls] + + return getInfo + + +class DeadHoster(_Hoster): + __name__ = "DeadHoster" + __type__ = "hoster" + __version__ = "0.11" + + __pattern__ = None + + __description__ = """Hoster is no longer available""" + __author_name__ = "zoidberg" + __author_mail__ = "zoidberg@mujmail.cz" + + + def setup(self): + self.fail("Hoster is no longer available") diff --git a/pyload/plugins/internal/MultiHoster.py b/pyload/plugins/internal/MultiHoster.py new file mode 100644 index 000000000..d99ae6ff9 --- /dev/null +++ b/pyload/plugins/internal/MultiHoster.py @@ -0,0 +1,192 @@ +# -*- coding: utf-8 -*- + +import re + +from pyload.plugins.Hook import Hook +from pyload.utils import remove_chars + + +class MultiHoster(Hook): + __name__ = "MultiHoster" + __type__ = "hook" + __version__ = "0.20" + + __description__ = """Generic MultiHoster plugin""" + __author_name__ = "pyLoad Team" + __author_mail__ = "admin@pyload.org" + + replacements = [("2shared.com", "twoshared.com"), ("4shared.com", "fourshared.com"), ("cloudnator.com", "shragle.com"), + ("ifile.it", "filecloud.io"), ("easy-share.com", "crocko.com"), ("freakshare.net", "freakshare.com"), + ("hellshare.com", "hellshare.cz"), ("share-rapid.cz", "sharerapid.com"), ("sharerapid.cz", "sharerapid.com"), + ("ul.to", "uploaded.to"), ("uploaded.net", "uploaded.to"), ("1fichier.com", "onefichier.com")] + ignored = [] + interval = 24 * 60 * 60 #: reload hosters daily + + + def setup(self): + self.hosters = [] + self.supported = [] + self.new_supported = [] + + def getConfig(self, option, default=''): + """getConfig with default value - subclass may not implements all config options""" + try: + # Fixed loop due to getConf deprecation in 0.4.10 + return super(MultiHoster, self).getConfig(option) + except KeyError: + return default + + def getHosterCached(self): + if not self.hosters: + try: + hosterSet = self.toHosterSet(self.getHoster()) - set(self.ignored) + except Exception, e: + self.logError("%s" % str(e)) + return [] + + try: + configMode = self.getConfig('hosterListMode', 'all') + if configMode in ("listed", "unlisted"): + configSet = self.toHosterSet(self.getConfig('hosterList', '').replace('|', ',').replace(';', ',').split(',')) + + if configMode == "listed": + hosterSet &= configSet + else: + hosterSet -= configSet + + except Exception, e: + self.logError("%s" % str(e)) + + self.hosters = list(hosterSet) + + return self.hosters + + def toHosterSet(self, hosters): + hosters = set((str(x).strip().lower() for x in hosters)) + + for rep in self.replacements: + if rep[0] in hosters: + hosters.remove(rep[0]) + hosters.add(rep[1]) + + hosters.discard('') + return hosters + + def getHoster(self): + """Load list of supported hoster + + :return: List of domain names + """ + raise NotImplementedError + + def coreReady(self): + if self.cb: + self.core.scheduler.removeJob(self.cb) + + self.setConfig("activated", True) #: config not in sync after plugin reload + + cfg_interval = self.getConfig("interval", None) #: reload interval in hours + if cfg_interval is not None: + self.interval = cfg_interval * 60 * 60 + + if self.interval: + self._periodical() + else: + self.periodical() + + def initPeriodical(self): + pass + + def periodical(self): + """reload hoster list periodically""" + self.logInfo("Reloading supported hoster list") + + old_supported = self.supported + self.supported, self.new_supported, self.hosters = [], [], [] + + self.overridePlugins() + + old_supported = [hoster for hoster in old_supported if hoster not in self.supported] + if old_supported: + self.logDebug("UNLOAD: %s" % ", ".join(old_supported)) + for hoster in old_supported: + self.unloadHoster(hoster) + + def overridePlugins(self): + pluginMap = {} + for name in self.core.pluginManager.hosterPlugins.keys(): + pluginMap[name.lower()] = name + + accountList = [name.lower() for name, data in self.core.accountManager.accounts.items() if data] + excludedList = [] + + for hoster in self.getHosterCached(): + name = remove_chars(hoster.lower(), "-.") + + if name in accountList: + excludedList.append(hoster) + else: + if name in pluginMap: + self.supported.append(pluginMap[name]) + else: + self.new_supported.append(hoster) + + if not self.supported and not self.new_supported: + self.logError(_("No Hoster loaded")) + return + + module = self.core.pluginManager.getPlugin(self.__name__) + klass = getattr(module, self.__name__) + + # inject plugin plugin + self.logDebug("Overwritten Hosters: %s" % ", ".join(sorted(self.supported))) + for hoster in self.supported: + dict = self.core.pluginManager.hosterPlugins[hoster] + dict['new_module'] = module + dict['new_name'] = self.__name__ + + if excludedList: + self.logInfo("The following hosters were not overwritten - account exists: %s" % ", ".join(sorted(excludedList))) + + if self.new_supported: + self.logDebug("New Hosters: %s" % ", ".join(sorted(self.new_supported))) + + # create new regexp + regexp = r".*(%s).*" % "|".join([x.replace(".", "\\.") for x in self.new_supported]) + if hasattr(klass, "__pattern__") and isinstance(klass.__pattern__, basestring) and '://' in klass.__pattern__: + regexp = r"%s|%s" % (klass.__pattern__, regexp) + + self.logDebug("Regexp: %s" % regexp) + + dict = self.core.pluginManager.hosterPlugins[self.__name__] + dict['pattern'] = regexp + dict['re'] = re.compile(regexp) + + def unloadHoster(self, hoster): + dict = self.core.pluginManager.hosterPlugins[hoster] + if "module" in dict: + del dict['module'] + + if "new_module" in dict: + del dict['new_module'] + del dict['new_name'] + + def unload(self): + """Remove override for all hosters. Scheduler job is removed by hookmanager""" + for hoster in self.supported: + self.unloadHoster(hoster) + + # reset pattern + klass = getattr(self.core.pluginManager.getPlugin(self.__name__), self.__name__) + dict = self.core.pluginManager.hosterPlugins[self.__name__] + dict['pattern'] = getattr(klass, "__pattern__", r'^unmatchable$') + dict['re'] = re.compile(dict['pattern']) + + def downloadFailed(self, pyfile): + """remove plugin override if download fails but not if file is offline/temp.offline""" + if pyfile.hasStatus("failed") and self.getConfig("unloadFailing", True): + hdict = self.core.pluginManager.hosterPlugins[pyfile.pluginname] + if "new_name" in hdict and hdict['new_name'] == self.__name__: + self.logDebug("Unload MultiHoster", pyfile.pluginname, hdict) + self.unloadHoster(pyfile.pluginname) + pyfile.setStatus("queued") diff --git a/pyload/plugins/internal/SimpleCrypter.py b/pyload/plugins/internal/SimpleCrypter.py new file mode 100644 index 000000000..6e639c946 --- /dev/null +++ b/pyload/plugins/internal/SimpleCrypter.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +import re + +from pyload.plugins.Crypter import Crypter +from pyload.plugins.internal.SimpleHoster import PluginParseError, replace_patterns, set_cookies +from pyload.utils import html_unescape + + +class SimpleCrypter(Crypter): + __name__ = "SimpleCrypter" + __type__ = "crypter" + __version__ = "0.10" + + __pattern__ = None + + __description__ = """Simple decrypter plugin""" + __author_name__ = ("stickell", "zoidberg", "Walter Purcaro") + __author_mail__ = ("l.stickell@yahoo.it", "zoidberg@mujmail.cz", "vuolter@gmail.com") + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: group(1) must be a download link or a regex to catch more links + example: LINK_PATTERN = r'<div class="link"><a href="(http://speedload.org/\w+)' + + TITLE_PATTERN: (optional) The group defined by 'title' should be the title + example: TITLE_PATTERN = r'<title>Files of: (?P<title>[^<]+) folder</title>' + + OFFLINE_PATTERN: (optional) Checks if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + If it's impossible to extract the links using the LINK_PATTERN only you can override the getLinks method. + + If the links are disposed on multiple pages you need to define a pattern: + + PAGES_PATTERN: The group defined by 'pages' must be the total number of pages + example: PAGES_PATTERN = r'Pages: (?P<pages>\d+)' + + and a function: + + loadPage(self, page_n): + return the html of the page number 'page_n' + """ + + URL_REPLACEMENTS = [] + + SH_COOKIES = True # or False or list of tuples [(domain, name, value)] + + + def setup(self): + if isinstance(self.SH_COOKIES, list): + set_cookies(self.req.cj, self.SH_COOKIES) + + def decrypt(self, pyfile): + pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS) + + self.html = self.load(pyfile.url, decode=True) + + self.checkOnline() + + package_name, folder_name = self.getPackageNameAndFolder() + + self.package_links = self.getLinks() + + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handleMultiPages() + + self.logDebug('Package has %d links' % len(self.package_links)) + + if self.package_links: + self.packages = [(package_name, self.package_links, folder_name)] + else: + self.fail('Could not extract any links') + + def getLinks(self): + """ + Returns the links extracted from self.html + You should override this only if it's impossible to extract links using only the LINK_PATTERN. + """ + return re.findall(self.LINK_PATTERN, self.html) + + def checkOnline(self): + if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): + self.offline() + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): + self.tempOffline() + + def getPackageNameAndFolder(self): + if hasattr(self, 'TITLE_PATTERN'): + m = re.search(self.TITLE_PATTERN, self.html) + if m: + name = folder = html_unescape(m.group('title').strip()) + self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder)) + return name, folder + + name = self.pyfile.package().name + folder = self.pyfile.package().folder + self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) + return name, folder + + def handleMultiPages(self): + pages = re.search(self.PAGES_PATTERN, self.html) + if pages: + pages = int(pages.group('pages')) + else: + pages = 1 + + for p in xrange(2, pages + 1): + self.html = self.loadPage(p) + self.package_links += self.getLinks() + + def parseError(self, msg): + raise PluginParseError(msg) diff --git a/pyload/plugins/internal/SimpleHoster.py b/pyload/plugins/internal/SimpleHoster.py new file mode 100644 index 000000000..ca320732f --- /dev/null +++ b/pyload/plugins/internal/SimpleHoster.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- + +import re + +from time import time +from urlparse import urlparse + +from pyload.network.CookieJar import CookieJar +from pyload.network.RequestFactory import getURL +from pyload.plugins.Hoster import Hoster +from pyload.utils import fixup, html_unescape, parseFileSize + + +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + #self.logDebug(rf, rt, string) + return string + + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) + + +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parseHtmlForm(attr_str, html, input_names=None): + for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('tag')) + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or '' + else: + inputs[name] = value + + if isinstance(input_names, dict): + # check input attributes + for key, val in input_names.items(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break # attibute value does not match + else: + break # attibute name does not match + else: + return action, inputs # passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None # no matching form found + + +def parseFileInfo(self, url='', html=''): + info = {"name": url, "size": 0, "status": 3} + + if hasattr(self, "pyfile"): + url = self.pyfile.url + + if hasattr(self, "req") and self.req.http.code == '404': + info['status'] = 1 + else: + if not html and hasattr(self, "html"): + html = self.html + if isinstance(self.SH_BROKEN_ENCODING, (str, unicode)): + html = unicode(html, self.SH_BROKEN_ENCODING) + if hasattr(self, "html"): + self.html = html + + if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, html): + info['status'] = 1 + elif hasattr(self, "FILE_OFFLINE_PATTERN") and re.search(self.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 + info['status'] = 1 + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 + else: + online = False + try: + info.update(re.match(self.__pattern__, url).groupdict()) + except: + pass + + for pattern in ("FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): + try: + info.update(re.search(getattr(self, pattern), html).groupdict()) + online = True + except AttributeError: + continue + + if online: + # File online, return name and size + info['status'] = 2 + if 'N' in info: + info['name'] = replace_patterns(info['N'], self.FILE_NAME_REPLACEMENTS) + if 'S' in info: + size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], + self.FILE_SIZE_REPLACEMENTS) + info['size'] = parseFileSize(size) + elif isinstance(info['size'], (str, unicode)): + if 'units' in info: + info['size'] += info['units'] + info['size'] = parseFileSize(info['size']) + + if hasattr(self, "file_info"): + self.file_info = info + + return info['name'], info['size'], info['status'], url + + +def create_getInfo(plugin): + + def getInfo(urls): + for url in urls: + cj = CookieJar(plugin.__name__) + if isinstance(plugin.SH_COOKIES, list): + set_cookies(cj, plugin.SH_COOKIES) + file_info = parseFileInfo(plugin, url, getURL(replace_patterns(url, plugin.FILE_URL_REPLACEMENTS), + decode=not plugin.SH_BROKEN_ENCODING, cookies=cj)) + yield file_info + + return getInfo + + +def timestamp(): + return int(time() * 1000) + + +class PluginParseError(Exception): + + def __init__(self, msg): + Exception.__init__(self) + self.value = 'Parse error (%s) - plugin may be out of date' % msg + + def __str__(self): + return repr(self.value) + + +class SimpleHoster(Hoster): + __name__ = "SimpleHoster" + __type__ = "hoster" + __version__ = "0.35" + + __pattern__ = None + + __description__ = """Simple hoster plugin""" + __author_name__ = ("zoidberg", "stickell") + __author_mail__ = ("zoidberg@mujmail.cz", "l.stickell@yahoo.it") + + """ + Following patterns should be defined by each hoster: + + FILE_INFO_PATTERN: Name and Size of the file + example: FILE_INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' + or + FILE_NAME_PATTERN: Name that will be set for the file + example: FILE_NAME_PATTERN = r'(?P<N>file_name)' + FILE_SIZE_PATTERN: Size that will be checked for the file + example: FILE_SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' + + OFFLINE_PATTERN: Checks if the file is yet available online + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: Checks if the file is temporarily offline + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + PREMIUM_ONLY_PATTERN: (optional) Checks if the file can be downloaded only with a premium account + example: PREMIUM_ONLY_PATTERN = r'Premium account required' + """ + + FILE_NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + FILE_SIZE_REPLACEMENTS = [] + FILE_URL_REPLACEMENTS = [] + + SH_BROKEN_ENCODING = False # Set to True or encoding name if encoding in http header is not correct + SH_COOKIES = True # or False or list of tuples [(domain, name, value)] + SH_CHECK_TRAFFIC = False # True = force check traffic left for a premium account + + + def init(self): + self.file_info = {} + + def setup(self): + self.resumeDownload = self.multiDL = self.premium + if isinstance(self.SH_COOKIES, list): + set_cookies(self.req.cj, self.SH_COOKIES) + + def process(self, pyfile): + pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) + self.req.setOption("timeout", 120) + # Due to a 0.4.9 core bug self.load would keep previous cookies even if overridden by cookies parameter. + # Workaround using getURL. Can be reverted in 0.5 as the cookies bug has been fixed. + self.html = getURL(pyfile.url, decode=not self.SH_BROKEN_ENCODING, cookies=self.SH_COOKIES) + premium_only = hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if not premium_only: # Usually premium only pages doesn't show the file information + self.getFileInfo() + + if self.premium and (not self.SH_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.handlePremium() + elif premium_only: + self.fail("This link require a premium account") + else: + # This line is required due to the getURL workaround. Can be removed in 0.5 + self.html = self.load(pyfile.url, decode=not self.SH_BROKEN_ENCODING, cookies=self.SH_COOKIES) + self.handleFree() + + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): + if type(url) == unicode: + url = url.encode('utf8') + return Hoster.load(self, url=url, get=get, post=post, ref=ref, cookies=cookies, + just_header=just_header, decode=decode) + + def getFileInfo(self): + self.logDebug("URL: %s" % self.pyfile.url) + + name, size, status = parseFileInfo(self)[:3] + + if status == 1: + self.offline() + elif status == 6: + self.tempOffline() + elif status != 2: + self.logDebug(self.file_info) + self.parseError('File info') + + if name: + self.pyfile.name = name + else: + self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + + if size: + self.pyfile.size = size + else: + self.logError("File size not parsed") + + self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) + return self.file_info + + def handleFree(self): + self.fail("Free download not implemented") + + def handlePremium(self): + self.fail("Premium download not implemented") + + def parseError(self, msg): + raise PluginParseError(msg) + + def longWait(self, wait_time=None, max_tries=3): + if wait_time and isinstance(wait_time, (int, long, float)): + time_str = "%dh %dm" % divmod(wait_time / 60, 60) + else: + wait_time = 900 + time_str = "(unknown time)" + max_tries = 100 + + self.logInfo("Download limit reached, reconnect or wait %s" % time_str) + + self.setWait(wait_time, True) + self.wait() + self.retry(max_tries=max_tries, reason="Download limit reached") + + def parseHtmlForm(self, attr_str='', input_names=None): + return parseHtmlForm(attr_str, self.html, input_names) + + def checkTrafficLeft(self): + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] + if traffic == -1: + return True + size = self.pyfile.size / 1024 + self.logInfo("Filesize: %i KiB, Traffic left for user %s: %i KiB" % (size, self.user, traffic)) + return size <= traffic + + # TODO: Remove in 0.5 + def wait(self, seconds=False, reconnect=False): + if seconds: + self.setWait(seconds, reconnect) + super(SimpleHoster, self).wait() diff --git a/pyload/plugins/internal/UnRar.py b/pyload/plugins/internal/UnRar.py new file mode 100644 index 000000000..ed8478a3a --- /dev/null +++ b/pyload/plugins/internal/UnRar.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from glob import glob +from os.path import join +from string import digits +from subprocess import Popen, PIPE + +from pyload.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from pyload.utils import safe_join, decode + + +class UnRar(AbtractExtractor): + __name__ = "UnRar" + __version__ = "0.16" + + __description__ = """Rar extractor plugin""" + __author_name__ = "RaNaN" + __author_mail__ = "RaNaN@pyload.org" + + CMD = "unrar" + + # there are some more uncovered rar formats + re_version = re.compile(r"(UNRAR 5[\.\d]+(.*?)freeware)") + re_splitfile = re.compile(r"(.*)\.part(\d+)\.rar$", re.I) + re_partfiles = re.compile(r".*\.(rar|r[0-9]+)", re.I) + re_filelist = re.compile(r"(.+)\s+(\d+)\s+(\d+)\s+") + re_filelist5 = re.compile(r"(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)") + re_wrongpwd = re.compile("(Corrupt file or wrong password|password incorrect)", re.I) + + + @staticmethod + def checkDeps(): + if os.name == "nt": + UnRar.CMD = join(pypath, "UnRAR.exe") + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + else: + try: + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + except OSError: + + # fallback to rar + UnRar.CMD = "rar" + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + + return True + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if not file.endswith(".rar"): + continue + + match = UnRar.re_splitfile.findall(file) + if match: + # only add first parts + if int(match[0][1]) == 1: + result.append((file, id)) + else: + result.append((file, id)) + + return result + + def init(self): + self.passwordProtected = False + self.headerProtected = False #: list files will not work without password + self.smallestFile = None #: small file to test passwords + self.password = "" #: save the correct password + + def checkArchive(self): + p = self.call_unrar("l", "-v", self.file) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + self.passwordProtected = True + self.headerProtected = True + return True + + # output only used to check if passworded files are present + if self.re_version.search(out): + for attr, size, name in self.re_filelist5.findall(out): + if attr.startswith("*"): + self.passwordProtected = True + return True + else: + for name, size, packed in self.re_filelist.findall(out): + if name.startswith("*"): + self.passwordProtected = True + return True + + self.listContent() + if not self.files: + raise ArchiveError("Empty Archive") + + return False + + def checkPassword(self, password): + # at this point we can only verify header protected files + if self.headerProtected: + p = self.call_unrar("l", "-v", self.file, password=password) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + return False + + return True + + def extract(self, progress, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_unrar(command, self.file, self.out, password=password) + renice(p.pid, self.renice) + + progress(0) + progressstring = "" + while True: + c = p.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + progress(int(progressstring)) + progressstring = "" + # not reading a digit -> therefore restart + elif c not in digits: + progressstring = "" + # add digit to progressstring + else: + progressstring = progressstring + c + progress(100) + + # retrieve stderr + err = p.stderr.read() + + if "CRC failed" in err and not password and not self.passwordProtected: + raise CRCError + elif "CRC failed" in err: + raise WrongPassword + if err.strip(): #: raise error if anything is on stderr + raise ArchiveError(err.strip()) + if p.returncode: + raise ArchiveError("Process terminated") + + if not self.files: + self.password = password + self.listContent() + + def getDeleteFiles(self): + if ".part" in self.file: + return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.IGNORECASE)) + # get files which matches .r* and filter unsuited files out + parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.IGNORECASE)) + return filter(lambda x: self.re_partfiles.match(x), parts) + + def listContent(self): + command = "vb" if self.fullpath else "lb" + p = self.call_unrar(command, "-v", self.file, password=self.password) + out, err = p.communicate() + + if "Cannot open" in err: + raise ArchiveError("Cannot open file") + + if err.strip(): #: only log error at this point + self.m.logError(err.strip()) + + result = set() + + for f in decode(out).splitlines(): + f = f.strip() + result.add(safe_join(self.out, f)) + + self.files = result + + def call_unrar(self, command, *xargs, **kwargs): + args = [] + # overwrite flag + args.append("-o+") if self.overwrite else args.append("-o-") + + if self.excludefiles: + for word in self.excludefiles.split(';'): + args.append("-x%s" % word) + + # assume yes on all queries + args.append("-y") + + # set a password + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) + else: + args.append("-p-") + + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + self.m.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + + return p + + +def renice(pid, value): + if os.name != "nt" and value: + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except: + print "Renice failed" diff --git a/pyload/plugins/internal/UnZip.py b/pyload/plugins/internal/UnZip.py new file mode 100644 index 000000000..65a5a82bb --- /dev/null +++ b/pyload/plugins/internal/UnZip.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +import sys +import zipfile + +from pyload.plugins.internal.AbstractExtractor import AbtractExtractor + + +class UnZip(AbtractExtractor): + __name__ = "UnZip" + __version__ = "0.1" + + __description__ = """Zip extractor plugin""" + __author_name__ = "RaNaN" + __author_mail__ = "RaNaN@pyload.org" + + + @staticmethod + def checkDeps(): + return sys.version_info[:2] >= (2, 6) + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if file.endswith(".zip"): + result.append((file, id)) + + return result + + def extract(self, progress, password=None): + z = zipfile.ZipFile(self.file) + self.files = z.namelist() + z.extractall(self.out) + + def getDeleteFiles(self): + return [self.file] diff --git a/pyload/plugins/internal/XFSPAccount.py b/pyload/plugins/internal/XFSPAccount.py new file mode 100644 index 000000000..aec9b7dbc --- /dev/null +++ b/pyload/plugins/internal/XFSPAccount.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +import re + +from time import mktime, strptime + +from pyload.plugins.Account import Account +from pyload.plugins.internal.SimpleHoster import parseHtmlForm +from pyload.utils import parseFileSize + + +class XFSPAccount(Account): + __name__ = "XFSPAccount" + __type__ = "account" + __version__ = "0.06" + + __description__ = """XFileSharingPro base account plugin""" + __author_name__ = "zoidberg" + __author_mail__ = "zoidberg@mujmail.cz" + + MAIN_PAGE = None + + VALID_UNTIL_PATTERN = r'>Premium.[Aa]ccount expire:</TD><TD><b>([^<]+)</b>' + TRAFFIC_LEFT_PATTERN = r'>Traffic available today:</TD><TD><b>([^<]+)</b>' + LOGIN_FAIL_PATTERN = r'Incorrect Login or Password|>Error<' + PREMIUM_PATTERN = r'>Renew premium<' + + + def loadAccountInfo(self, user, req): + html = req.load(self.MAIN_PAGE + "?op=my_account", decode=True) + + validuntil = trafficleft = None + premium = True if re.search(self.PREMIUM_PATTERN, html) else False + + m = re.search(self.VALID_UNTIL_PATTERN, html) + if m: + premium = True + trafficleft = -1 + try: + self.logDebug(m.group(1)) + validuntil = mktime(strptime(m.group(1), "%d %B %Y")) + except Exception, e: + self.logError(e) + else: + m = re.search(self.TRAFFIC_LEFT_PATTERN, html) + if m: + trafficleft = m.group(1) + if "Unlimited" in trafficleft: + premium = True + else: + trafficleft = parseFileSize(trafficleft) / 1024 + + return {"validuntil": validuntil, "trafficleft": trafficleft, "premium": premium} + + def login(self, user, data, req): + html = req.load('%slogin.html' % self.MAIN_PAGE, decode=True) + + action, inputs = parseHtmlForm('name="FL"', html) + if not inputs: + inputs = {"op": "login", + "redirect": self.MAIN_PAGE} + + inputs.update({"login": user, + "password": data['password']}) + + html = req.load(self.MAIN_PAGE, post=inputs, decode=True) + + if re.search(self.LOGIN_FAIL_PATTERN, html): + self.wrongPassword() diff --git a/pyload/plugins/internal/__init__.py b/pyload/plugins/internal/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pyload/plugins/internal/__init__.py |