diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-12-27 13:39:25 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-12-27 13:39:25 +0100 |
commit | 4f287a307dd61afbbf1029065dcd6d1b9c304a15 (patch) | |
tree | 87945030efa5e5cf67fdef3caad78e19b8e03b43 | |
parent | [NowVideoSx] Fixup (diff) | |
download | pyload-4f287a307dd61afbbf1029065dcd6d1b9c304a15.tar.xz |
New plugin: SimpleDereferer
-rw-r--r-- | module/plugins/internal/SimpleDereferer.py | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/module/plugins/internal/SimpleDereferer.py b/module/plugins/internal/SimpleDereferer.py new file mode 100644 index 000000000..0ad1098f4 --- /dev/null +++ b/module/plugins/internal/SimpleDereferer.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +import re + +from urllib import unquote + +from module.plugins.Crypter import Crypter +from module.plugins.internal.SimpleHoster import _isDirectLink, set_cookies + + +class SimpleDereferer(Crypter): + __name__ = "SimpleDereferer" + __type__ = "crypter" + __version__ = "0.01" + + __pattern__ = r'^unmatchable$' + __config__ = [("use_subfolder", "bool", "Save package to subfolder", True), + ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + + __description__ = """Simple dereferer plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + """ + Following patterns should be defined by each crypter: + + LINK_PATTERN: Regex to catch the redirect url in group(1) + example: LINK_PATTERN = r'<div class="link"><a href="(.+?)"' + + OFFLINE_PATTERN: (optional) Checks if the page is unreachable + example: OFFLINE_PATTERN = r'File (deleted|not found)' + + TEMP_OFFLINE_PATTERN: (optional) Checks if the page is temporarily unreachable + example: TEMP_OFFLINE_PATTERN = r'Server maintainance' + + + You can override the getLinks method if you need a more sophisticated way to extract the redirect url. + """ + + LINK_PATTERN = None + + TEXT_ENCODING = False + COOKIES = True + + + def decrypt(self, pyfile): + link = _isDirectLink(pyfile.url) + + if not link: + try: + link = unquote(re.match(self.__pattern__, pyfile.url).group('LINK')) + + except Exception: + self.prepare() + self.preload() + + if self.html is None: + self.fail(_("No html retrieved")) + + self.checkStatus() + + link = self.getLink() + + if link.strip(): + self.urls = [link.strip()] #@TODO: Remove `.strip()` in 0.4.10 + + + def prepare(self): + self.req.setOption("timeout", 120) + + if isinstance(self.COOKIES, list): + set_cookies(self.req.cj, self.COOKIES) + + + def preload(self): + self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING) + + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + + + def checkStatus(self): + if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, self.html): + self.offline() + + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): + self.tempOffline() + + + def getLink(self): + try: + return re.search(self.LINK_PATTERN, self.html).group(1) + + except Exception: + pass |