diff options
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 139 |
1 files changed, 97 insertions, 42 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 3f14ca711..c74e33d59 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- +import mimetypes +import os import re from datetime import datetime, timedelta from inspect import isclass -from os.path import exists from time import time from urllib import unquote from urlparse import urljoin, urlparse @@ -107,7 +108,13 @@ def parseFileInfo(plugin, url="", html=""): info = plugin.getInfo(url, html) res = info['name'], info['size'], info['status'], info['url'] else: - res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url + url = unquote(url) + res = ((urlparse(url).path.split('/')[-1] + or urlparse(url).query.split('=', 1)[::-1][0].split('&', 1)[0] + or _("Unknown")), + 0, + 3 if url else 8, + url) return res @@ -133,37 +140,86 @@ def timestamp(): #@TODO: Move to hoster class in 0.4.10 -def directLink(self, url, resumable=False): - link = "" +def fileUrl(self, url, follow_location=None): + link = "" + redirect = 1 - for i in xrange(5 if resumable else 1): - header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + if type(follow_location) is int: + redirect = max(follow_location, 1) + else: + redirect = 5 + + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + + except Exception: #: Bad bad bad... + req = pyreq.getHTTPRequest() + res = req.load(url, cookies=True, just_header=True, decode=True) + + req.close() + + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value if 'content-disposition' in header: link = url - elif 'location' in header and header['location']: + elif 'location' in header and header['location'].strip(): location = header['location'] if not urlparse(location).scheme: - parsed = urlparse(url) - base = "%s://%s" % (parsed.scheme, parsed.netloc) - location = urljoin(base, location) + url_p = urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urljoin(baseurl, location) + + if 'code' in header and header['code'] == 302: + link = location - if resumable: + if follow_location: url = location - self.logDebug("Redirect #%d to: %s" % (++i, location)) continue - elif 'code' in header and header['code'] == 302: - link = location + else: + extension = os.path.splitext(urlparse(url).path.split('/')[-1])[-1] - elif 'content-type' in header and header['content-type'] and "html" not in header['content-type']: - link = url + if 'content-type' in header and header['content-type'].strip(): + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" break + else: - self.logError(_("Too many redirects")) + try: + self.logError(_("Too many redirects")) + except Exception: + pass return link @@ -189,7 +245,7 @@ def secondsToMidnight(gmt=0): class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.98" + __version__ = "1.12" __pattern__ = r'^unmatchable$' @@ -253,7 +309,7 @@ class SimpleHoster(Hoster): MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) LOGIN_ACCOUNT = False #: Set to True to require account login - directLink = directLink #@TODO: Remove in 0.4.10 + directLink = fileUrl #@TODO: Remove in 0.4.10 @classmethod @@ -277,7 +333,7 @@ class SimpleHoster(Hoster): @classmethod def getInfo(cls, url="", html=""): info = cls.apiInfo(url) - online = False + online = False if info['status'] != 2 else True try: info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here @@ -285,12 +341,12 @@ class SimpleHoster(Hoster): except Exception: info['pattern'] = {} - if not html: + if not html and not online: if not url: info['error'] = "missing url" info['status'] = 1 - elif info['status'] is 3: + elif info['status'] is 3 and not fileUrl(None, url): try: html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) @@ -368,6 +424,7 @@ class SimpleHoster(Hoster): self.pyfile.error = "" #@TODO: Remove in 0.4.10 self.info = {} + self.html = "" self.link = "" #@TODO: Move to hoster class in 0.4.10 self.directDL = False #@TODO: Move to hoster class in 0.4.10 self.multihost = False #@TODO: Move to hoster class in 0.4.10 @@ -434,10 +491,16 @@ class SimpleHoster(Hoster): self.checkFile() - def downloadLink(self, link): + def downloadLink(self, link, disposition=False): #@TODO: Set `disposition=True` in 0.4.10 if link and isinstance(link, basestring): self.correctCaptcha() - self.download(link, disposition=False) #@TODO: Set `disposition=True` in 0.4.10 + + if not urlparse(link).scheme: + url_p = urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + link = urljoin(baseurl, link) + + self.download(link, ref=False, disposition=disposition) def checkFile(self): @@ -445,7 +508,7 @@ class SimpleHoster(Hoster): self.invalidCaptcha() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not exists(fs_encode(self.lastDownload)): + elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)): self.lastDownload = "" self.error(self.pyfile.error or _("No file downloaded")) @@ -472,7 +535,7 @@ class SimpleHoster(Hoster): self.logWarning(_("No html code to check")) return - if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and not self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): self.fail(_("Link require a premium account to be handled")) elif hasattr(self, 'ERROR_PATTERN'): @@ -577,16 +640,12 @@ class SimpleHoster(Hoster): if not hasattr(self, 'LINK_FREE_PATTERN'): self.logError(_("Free download not implemented")) - try: - m = re.search(self.LINK_FREE_PATTERN, self.html) - if m is None: - self.error(_("Free download link not found")) - + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def handlePremium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): @@ -594,16 +653,12 @@ class SimpleHoster(Hoster): self.logDebug("Handled as free download") self.handleFree(pyfile) - try: - m = re.search(self.LINK_PREMIUM_PATTERN, self.html) - if m is None: - self.error(_("Premium download link not found")) - + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): |