diff options
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 123 |
1 files changed, 83 insertions, 40 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index f741cd691..736d15f3a 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -4,7 +4,7 @@ import re from time import time from urllib import unquote -from urlparse import urlparse +from urlparse import urljoin, urlparse from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar @@ -33,7 +33,7 @@ def _error(self, reason, type): #@TODO: Remove in 0.4.10 def _wait(self, seconds, reconnect): if seconds: - self.setWait(seconds) + self.setWait(seconds + 1) if reconnect is not None: self.wantReconnect = reconnect @@ -114,25 +114,35 @@ def timestamp(): #@TODO: Move to hoster class in 0.4.10 -def _getDirectLink(self, url): +def _isDirectLink(self, url, resumable=True): header = self.load(url, ref=True, just_header=True, decode=True) - if not 'code' in header or header['code'] != 302: - return "" - if not 'location' in header or not header['location']: return "" - # if 'content-type' in header and "text/plain" not in header['content-type']: - # return "" + location = header['location'] + + if resumable: #: sometimes http code may be wrong... + if self.load(location, ref=True, cookies=True, just_header=True, decode=True)['location']: + return "" + else: + if not 'code' in header or header['code'] != 302: + return "" - return header['location'] + if urlparse(location).scheme: + link = location + else: + p = urlparse(url) + base = "%s://%s" % (p.scheme, p.netloc) + link = urljoin(base, location) + + return link class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.68" + __version__ = "0.69" __pattern__ = r'^unmatchable$' @@ -154,6 +164,9 @@ class SimpleHoster(Hoster): SIZE_PATTERN: (optional) Size that will be checked for the file example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' + HASHSUM_PATTERN: (optional) Hash code and type of the file + example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' + OFFLINE_PATTERN: (optional) Check if the file is yet available online example: OFFLINE_PATTERN = r'File (deleted|not found)' @@ -189,9 +202,9 @@ class SimpleHoster(Hoster): TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct COOKIES = True #: or False or list of tuples [(domain, name, value)] FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account - CHECK_DIRECT_LINK = None #: when None self-set to True if self.account else False - MULTI_HOSTER = False #: Set to True to leech other hoster link - CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value in http header + CHECK_DIRECT_LINK = None #: Set to True to check for direct link, set to None to do it only if self.account is True + MULTI_HOSTER = False #: Set to True to leech other hoster link (according its multihoster hook if available) + CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value from http header @classmethod @@ -203,14 +216,32 @@ class SimpleHoster(Hoster): @classmethod def getInfo(cls, url="", html=""): - info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""} + info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url} if not html: - if url: - html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) - if isinstance(cls.TEXT_ENCODING, basestring): - html = unicode(html, cls.TEXT_ENCODING) - else: + try: + if not url: + info['error'] = "missing url" + info['status'] = 1 + raise + + try: + html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) + + if isinstance(cls.TEXT_ENCODING, basestring): + html = unicode(html, cls.TEXT_ENCODING) + + except BadHeader, e: + info['error'] = "%d: %s" % (e.code, e.content) + + if e.code is 404: + info['status'] = 1 + raise + + if e.code is 503: + info['status'] = 6 + raise + except: return info online = False @@ -226,19 +257,20 @@ class SimpleHoster(Hoster): else: try: - info.update(re.match(cls.__pattern__, url).groupdict()) + info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api'] except: pass for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", "FILE_NAME_PATTERN", "NAME_PATTERN", - "FILE_SIZE_PATTERN", "SIZE_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 + "FILE_SIZE_PATTERN", "SIZE_PATTERN", + "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 try: attr = getattr(cls, pattern) dict = re.search(attr, html).groupdict() - if all(True for k in dict if k not in info): - info.update(dict) + if all(True for k in dict if k not in info['pattern']): + info['pattern'].update(dict) except AttributeError: continue @@ -249,12 +281,12 @@ class SimpleHoster(Hoster): if online: info['status'] = 2 - if 'N' in info: - info['name'] = replace_patterns(unquote(info['N'].strip()), + if 'N' in info['pattern']: + info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 - if 'S' in info: - size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], + if 'S' in info['pattern']: + size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'], cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10 info['size'] = parseFileSize(size) @@ -262,6 +294,10 @@ class SimpleHoster(Hoster): unit = info['units'] if 'units' in info else None info['size'] = parseFileSize(info['size'], unit) + if 'H' in info['pattern']: + hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" + info[hashtype] = info['pattern']['H'] + return info @@ -349,15 +385,20 @@ class SimpleHoster(Hoster): if self.link: self.download(self.link, disposition=self.CONTENT_DISPOSITION) + self.checkFile() + + + def checkFile(self) + if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10 + self.fail(_("Empty file")) + def checkErrors(self): - if hasattr(self, 'WAIT_PATTERN'): - m = re.search(self.WAIT_PATTERN, self.html) + if hasattr(self, 'ERROR_PATTERN'): + m = re.search(self.ERROR_PATTERN, self.html) if m: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) - self.wait(wait_time, False) - return + e = self.info['error'] = m.group(1) + self.error(e) if hasattr(self, 'PREMIUM_ONLY_PATTERN'): m = re.search(self.PREMIUM_ONLY_PATTERN, self.html) @@ -365,11 +406,13 @@ class SimpleHoster(Hoster): self.info['error'] = "premium-only" return - if hasattr(self, 'ERROR_PATTERN'): - m = re.search(self.ERROR_PATTERN, self.html) + if hasattr(self, 'WAIT_PATTERN'): + m = re.search(self.WAIT_PATTERN, self.html) if m: - e = self.info['error'] = m.group(1) - self.error(e) + wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)]) + self.wait(wait_time, False) + return self.info.pop('error', None) @@ -431,7 +474,7 @@ class SimpleHoster(Hoster): def handleDirect(self): - link = _getDirectLink(self, self.pyfile.url) + link = _isDirectLink(self, self.pyfile.url, self.resumeDownload) if link: self.logInfo(_("Direct download link detected")) @@ -460,7 +503,7 @@ class SimpleHoster(Hoster): self.link = m.group(1) except Exception, e: - self.fail(str(e)) + self.fail(e) def handlePremium(self): @@ -475,7 +518,7 @@ class SimpleHoster(Hoster): self.link = m.group(1) except Exception, e: - self.fail(str(e)) + self.fail(e) def longWait(self, wait_time=None, max_tries=3): |