diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-11-01 18:51:19 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-11-01 18:51:19 +0100 |
commit | 011c308fdce25df396824ea97c2bf634b976c8d8 (patch) | |
tree | c88d192d4cd1359fec6ca09b323a001a2f1e7bc4 /module/plugins/internal/SimpleHoster.py | |
parent | [SimpleCrypter] Move TEXT_ENCODING check to prepare routine (diff) | |
download | pyload-011c308fdce25df396824ea97c2bf634b976c8d8.tar.xz |
[SimpleHoster] Fix and improve ddl detection, parseFileInfo and getInfo
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 122 |
1 files changed, 80 insertions, 42 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index de21f4d3c..e601759cf 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -90,27 +90,30 @@ def parseHtmlForm(attr_str, html, input_names=None): def parseFileInfo(self, url="", html=""): - info = {"name": url, "size": 0, "status": 3} - - if hasattr(self, "pyfile"): + if not url and hasattr(self, "pyfile"): url = self.pyfile.url - if hasattr(self, "req") and self.req.http.code == '404': - info['status'] = 1 - else: - if not html and hasattr(self, "html"): - html = self.html - if isinstance(self.TEXT_ENCODING, basestring): - html = unicode(html, self.TEXT_ENCODING) - if hasattr(self, "html"): - self.html = html + info = {"name": url, "size": 0, "status": 3} + if not html: + if url: + return tuple(create_getInfo(self)([url])) + elif hasattr(self, "html"): + if hasattr(self, "req") and self.req.http.code == '404': + info['status'] = 1 + else: + html = self.html + + if html: if hasattr(self, "OFFLINE_PATTERN") and re.search(self.OFFLINE_PATTERN, html): info['status'] = 1 + elif hasattr(self, "FILE_OFFLINE_PATTERN") and re.search(self.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 info['status'] = 1 + elif hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, html): info['status'] = 6 + else: online = False try: @@ -128,18 +131,23 @@ def parseFileInfo(self, url="", html=""): if online: # File online, return name and size info['status'] = 2 + if 'N' in info: info['name'] = replace_patterns(info['N'].strip(), self.FILE_NAME_REPLACEMENTS) + if 'S' in info: size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], self.FILE_SIZE_REPLACEMENTS) info['size'] = parseFileSize(size) + elif isinstance(info['size'], basestring): unit = info['units'] if 'units' in info else None info['size'] = parseFileSize(info['size'], unit) - if hasattr(self, "file_info"): - self.file_info = info + if not hasattr(self, "file_info"): + self.file_info = {} + + self.file_info.update(info) return info['name'], info['size'], info['status'], url @@ -148,13 +156,23 @@ def create_getInfo(plugin): def getInfo(urls): for url in urls: - cj = CookieJar(plugin.__name__) - if isinstance(plugin.COOKIES, list): + if hasattr(plugin, "COOKIES") and isinstance(plugin.COOKIES, list): + cj = CookieJar(plugin.__name__) set_cookies(cj, plugin.COOKIES) + else: + cj = None - file_info = parseFileInfo(plugin, url, getURL(replace_patterns(url, plugin.FILE_URL_REPLACEMENTS), - decode=not plugin.TEXT_ENCODING, cookies=cj)) - yield file_info + if hasattr(plugin, "FILE_URL_REPLACEMENTS"): + url = replace_patterns(url, plugin.FILE_URL_REPLACEMENTS) + + if hasattr(plugin, "TEXT_ENCODING"): + html = getURL(url, cookies=bool(cj), decode=not plugin.TEXT_ENCODING) + if isinstance(plugin.TEXT_ENCODING, basestring): + html = unicode(html, plugin.TEXT_ENCODING) + else: + html = getURL(url, cookies=bool(cj), decode=True) + + yield parseFileInfo(plugin, url, html) return getInfo @@ -166,7 +184,7 @@ def timestamp(): class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.45" + __version__ = "0.46" __pattern__ = None @@ -215,11 +233,10 @@ class SimpleHoster(Hoster): COOKIES = True #: or False or list of tuples [(domain, name, value)] FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account - CHECK_DIRECT_DOWNLOAD = True #: Search for a direct download link in the page header (premium only) - def init(self): self.file_info = {} + self.html = "" #@TODO: Remove in 0.4.10 def setup(self): @@ -234,31 +251,53 @@ class SimpleHoster(Hoster): self.pyfile.url = replace_patterns(self.pyfile.url, self.FILE_URL_REPLACEMENTS) - if self.CHECK_DIRECT_DOWNLOAD and self.premium: - header = self.load(self.pyfile.url, just_header=True, decode=True) - if 'location' in header: - self.logDebug("Direct download link detected") - self.download(header['location'], ref=True, cookies=True, disposition=True) - return + if self.premium: + direct_link = self.getDirectLink(self.pyfile.url) + if direct_link: + return direct_link self.html = self.load(self.pyfile.url, decode=not self.TEXT_ENCODING, cookies=bool(self.COOKIES)) + if isinstance(self.TEXT_ENCODING, basestring): + self.html = unicode(self.html, self.TEXT_ENCODING) + def process(self, pyfile): - self.prepare() + direct_link = self.prepare() - premium_only = hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html) - if not premium_only: #: Usually premium only pages doesn't show the file information - self.getFileInfo() + if isinstance(direct_link, basestring): + self.download(direct_link, ref=True, cookies=True, disposition=True) + + elif self.html is None: + self.fail(_("Attribute html should never be set to None")) - if self.premium and (not self.FORCE_CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handle as premium download") - self.handlePremium() - elif premium_only: - self.fail(_("This link require a premium account")) else: - self.logDebug("Handle as free download") - self.handleFree() + premium_only = hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html) + if not premium_only and 'status' not in self.file_info: #: Usually premium only pages doesn't show the file information + self.getFileInfo() + + if self.premium and (not self.FORCE_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium() + elif premium_only: + self.fail(_("Link require a premium account to be handled")) + else: + self.logDebug("Handled as free download") + self.handleFree() + + + def getDirectLink(self, url): + self.req.http.c.setopt(FOLLOWLOCATION, 0) + + html = self.load(url, ref=True, decode=True) + + self.req.http.c.setopt(FOLLOWLOCATION, 1) + + if parseFileInfo(self, url, html)[2] != 2: + try + return re.search(r"Location\s*:\s*(.+)", self.req.http.header, re.I).group(1) + except: + pass def getFileInfo(self): @@ -271,8 +310,7 @@ class SimpleHoster(Hoster): elif status == 6: self.tempOffline() elif status != 2: - self.logDebug(self.file_info) - self.error(_("File info")) + self.error(_("File info: %s") % self.file_info) if name: self.pyfile.name = name @@ -284,7 +322,7 @@ class SimpleHoster(Hoster): else: self.logError(_("File size not parsed")) - self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) + self.logDebug("FILE NAME: %s" % self.pyfile.name, "FILE SIZE: %d" % self.pyfile.size) return self.file_info |