diff options
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 649 |
1 files changed, 446 insertions, 203 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 6726726e1..1d44a6642 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -1,18 +1,20 @@ # -*- coding: utf-8 -*- +import datetime +import mimetypes +import os import re - -from os.path import exists -from time import time -from urllib import unquote -from urlparse import urljoin, urlparse +import time +import urllib +import urlparse from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar +from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL from module.plugins.Hoster import Hoster -from module.plugins.Plugin import Fail -from module.utils import fixup, fs_encode, parseFileSize +from module.plugins.Plugin import Fail, Retry +from module.utils import fixup, fs_encode, html_unescape, parseFileSize #@TODO: Adapt and move to PyFile in 0.4.10 @@ -25,7 +27,7 @@ def _error(self, reason, type): type = "unknown" msg = _("%s error") % type.strip().capitalize() if type else _("Error") - msg += ": %s" % reason.strip() if reason else "" + msg += (": %s" % reason.strip()) if reason else "" msg += _(" | Plugin may be out of date") raise Fail(msg) @@ -72,7 +74,7 @@ def parseHtmlForm(attr_str, html, input_names={}): if name: value = parseHtmlTagAttrValue("value", inputtag.group(1)) if not value: - inputs[name] = inputtag.group(3) or '' + inputs[name] = inputtag.group(3) or "" else: inputs[name] = value @@ -98,38 +100,77 @@ def parseHtmlForm(attr_str, html, input_names={}): return {}, None #: no matching form found -#: Deprecated +#@TODO: Remove in 0.4.10 def parseFileInfo(plugin, url="", html=""): if hasattr(plugin, "getInfo"): info = plugin.getInfo(url, html) res = info['name'], info['size'], info['status'], info['url'] else: - res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + res = ((url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 0, + 3 if url else 8, + url) return res #@TODO: Remove in 0.4.10 -#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10 def create_getInfo(plugin): - if hasattr(plugin, "parseInfos"): - fn = lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)] - else: - fn = lambda urls: [parseFileInfo(url) for url in urls] + def getInfo(urls): + for url in urls: + if hasattr(plugin, "URL_REPLACEMENTS"): + url = replace_patterns(url, plugin.URL_REPLACEMENTS) + yield parseFileInfo(plugin, url) - return fn + return getInfo def timestamp(): - return int(time() * 1000) + return int(time.time() * 1000) #@TODO: Move to hoster class in 0.4.10 -def _isDirectLink(self, url, resumable=False): - link = "" +def getFileURL(self, url, follow_location=None): + link = "" + redirect = 1 + + if type(follow_location) is int: + redirect = max(follow_location, 1) + else: + redirect = 10 + + for i in xrange(redirect): + try: + self.logDebug("Redirect #%d to: %s" % (i, url)) + header = self.load(url, just_header=True, decode=True) + + except Exception: #: Bad bad bad... rewrite this part in 0.4.10 + req = pyreq.getHTTPRequest() + res = req.load(url, just_header=True, decode=True) - for i in xrange(5 if resumable else 1): - header = self.load(url, ref=True, cookies=True, just_header=True, decode=True) + req.close() + + header = {"code": req.code} + for line in res.splitlines(): + line = line.strip() + if not line or ":" not in line: + continue + + key, none, value = line.partition(":") + key = key.lower().strip() + value = value.strip() + + if key in header: + if type(header[key]) == list: + header[key].append(value) + else: + header[key] = [header[key], value] + else: + header[key] = value if 'content-disposition' in header: link = url @@ -137,62 +178,100 @@ def _isDirectLink(self, url, resumable=False): elif 'location' in header and header['location']: location = header['location'] - if not urlparse(location).scheme: - p = urlparse(url) - base = "%s://%s" % (p.scheme, p.netloc) - location = urljoin(base, location) + if not urlparse.urlparse(location).scheme: + url_p = urlparse.urlparse(url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + location = urlparse.urljoin(baseurl, location) if 'code' in header and header['code'] == 302: link = location - elif resumable: + if follow_location: url = location - self.logDebug("Redirect #%d to: %s" % (++i, location)) continue + else: + extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] + + if 'content-type' in header and header['content-type']: + mimetype = header['content-type'].split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + else: + mimetype = "" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = "" + break + else: - self.logError(_("Too many redirects")) + try: + self.logError(_("Too many redirects")) + except Exception: + pass return link +def secondsToMidnight(gmt=0): + now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) + + if now.hour is 0 and now.minute < 10: + midnight = now + else: + midnight = now + datetime.timedelta(days=1) + + td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now + + if hasattr(td, 'total_seconds'): + res = td.total_seconds() + else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds + res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 + + return int(res) + + class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.79" + __version__ = "1.50" __pattern__ = r'^unmatchable$' + __config__ = [("use_premium", "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True)] __description__ = """Simple hoster plugin""" __license__ = "GPLv3" - __authors__ = [("zoidberg", "zoidberg@mujmail.cz"), - ("stickell", "l.stickell@yahoo.it"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] """ - Info patterns should be defined by each hoster: + Info patterns: - INFO_PATTERN: (optional) Name and Size of the file + INFO_PATTERN: (mandatory) Name and Size of the file example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)' or - NAME_PATTERN: (optional) Name that will be set for the file + NAME_PATTERN: (mandatory) Name that will be set for the file example: NAME_PATTERN = r'(?P<N>file_name)' - SIZE_PATTERN: (optional) Size that will be checked for the file + SIZE_PATTERN: (mandatory) Size that will be checked for the file example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)' HASHSUM_PATTERN: (optional) Hash code and type of the file example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)' - OFFLINE_PATTERN: (optional) Check if the file is yet available online + OFFLINE_PATTERN: (mandatory) Check if the page is unreachable example: OFFLINE_PATTERN = r'File (deleted|not found)' - TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline + TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)' - Error handling patterns are all optional: + Error patterns: WAIT_PATTERN: (optional) Detect waiting time example: WAIT_PATTERN = r'' @@ -200,11 +279,23 @@ class SimpleHoster(Hoster): PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account example: PREMIUM_ONLY_PATTERN = r'Premium account required' + HAPPY_HOUR_PATTERN: (optional) + example: HAPPY_HOUR_PATTERN = r'Happy hour' + + IP_BLOCKED_PATTERN: (optional) + example: IP_BLOCKED_PATTERN = r'in your country' + + DOWNLOAD_LIMIT_PATTERN: (optional) + example: DOWNLOAD_LIMIT_PATTERN = r'download limit' + + SIZE_LIMIT_PATTERN: (optional) + example: SIZE_LIMIT_PATTERN = r'up to' + ERROR_PATTERN: (optional) Detect any error preventing download example: ERROR_PATTERN = r'' - Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download: + Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling: LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"' @@ -217,31 +308,46 @@ class SimpleHoster(Hoster): SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] - TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct - COOKIES = True #: or False or list of tuples [(domain, name, value)] - CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account - DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False - MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct + COOKIES = True #: or False or list of tuples [(domain, name, value)] + CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account + DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False + MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method) + LOGIN_ACCOUNT = False #: Set to True to require account login + DISPOSITION = True #: Set to True to use any content-disposition value in http header as file name + + directLink = getFileURL #@TODO: Remove in 0.4.10 @classmethod - def parseInfos(cls, urls): - for url in urls: - url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 - yield cls.getInfo(url) + def apiInfo(cls, url): + url = urllib.unquote(url) + url_p = urlparse.urlparse(url) + return {'name' : (url_p.path.split('/')[-1] + or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] + or url_p.netloc.split('.', 1)[0]), + 'size' : 0, + 'status': 3 if url else 8, + 'url' : url} @classmethod def getInfo(cls, url="", html=""): - info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url} + info = cls.apiInfo(url) + online = True if info['status'] is 2 else False - if not html: - try: - if not url: - info['error'] = "missing url" - info['status'] = 1 - raise + try: + info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here + except Exception: + info['pattern'] = {} + + if not html and not online: + if not url: + info['error'] = "missing url" + info['status'] = 1 + + elif info['status'] is 3: try: html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING) @@ -253,61 +359,45 @@ class SimpleHoster(Hoster): if e.code is 404: info['status'] = 1 - raise - if e.code is 503: + elif e.code is 503: info['status'] = 6 - raise - except: - return info - online = False + except Exception: + pass - if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): - info['status'] = 1 + if html: + if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): + info['status'] = 1 - elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10 - info['status'] = 1 + elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): + info['status'] = 6 - elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): - info['status'] = 6 - - else: - try: - info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api'] - except: - info['pattern'] = {} - - for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", - "FILE_NAME_PATTERN", "NAME_PATTERN", - "FILE_SIZE_PATTERN", "SIZE_PATTERN", - "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 - try: - attr = getattr(cls, pattern) - pdict = re.search(attr, html).groupdict() - - if all(True for k in pdict if k not in info['pattern']): - info['pattern'].update(pdict) + else: + for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"): + try: + attr = getattr(cls, pattern) + pdict = re.search(attr, html).groupdict() - except AttributeError: - continue + if all(True for k in pdict if k not in info['pattern']): + info['pattern'].update(pdict) - else: - online = True + except AttributeError: + continue - if not info['pattern']: - info.pop('pattern', None) + else: + online = True if online: info['status'] = 2 if 'N' in info['pattern']: - info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()), - cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10 + info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()), + cls.NAME_REPLACEMENTS) if 'S' in info['pattern']: size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], - cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10 + cls.SIZE_REPLACEMENTS) info['size'] = parseFileSize(size) elif isinstance(info['size'], basestring): @@ -318,6 +408,9 @@ class SimpleHoster(Hoster): hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash" info[hashtype] = info['pattern']['H'] + if not info['pattern']: + info.pop('pattern', None) + return info @@ -326,11 +419,20 @@ class SimpleHoster(Hoster): def prepare(self): + self.pyfile.error = "" #@TODO: Remove in 0.4.10 + self.info = {} + self.html = "" self.link = "" #@TODO: Move to hoster class in 0.4.10 self.directDL = False #@TODO: Move to hoster class in 0.4.10 self.multihost = False #@TODO: Move to hoster class in 0.4.10 + if not self.getConfig('use_premium', True): + self.retryFree() + + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + self.req.setOption("timeout", 120) if isinstance(self.COOKIES, list): @@ -347,8 +449,7 @@ class SimpleHoster(Hoster): else: self.directDL = self.DIRECT_LINK - self.pyfile.url = replace_patterns(self.pyfile.url, - self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10 + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def preload(self): @@ -359,132 +460,265 @@ class SimpleHoster(Hoster): def process(self, pyfile): - self.prepare() - self.checkInfo() + try: + self.prepare() + self.checkInfo() - if self.directDL: - self.logDebug("Looking for direct download link...") - self.handleDirect() + if self.directDL: + self.logDebug("Looking for direct download link...") + self.handleDirect(pyfile) - if self.multihost and not self.link and not self.lastDownload: - self.logDebug("Looking for leeched download link...") - self.handleMulti() + if self.multihost and not self.link and not self.lastDownload: + self.logDebug("Looking for leeched download link...") + self.handleMulti(pyfile) + + if not self.link and not self.lastDownload: + self.MULTI_HOSTER = False + self.retry(1, reason="Multi hoster fails") if not self.link and not self.lastDownload: - self.MULTI_HOSTER = False - self.retry(1, reason="Multi hoster fails") + self.preload() + self.checkInfo() - if not self.link and not self.lastDownload: - self.preload() - self.checkInfo() + if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as premium download") + self.handlePremium(pyfile) - if self.html is None: - self.fail(_("No html retrieved")) + elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): + self.logDebug("Handled as free download") + self.handleFree(pyfile) - if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()): - self.logDebug("Handled as premium download") - self.handlePremium() + self.downloadLink(self.link, self.DISPOSITION) + self.checkFile() + + except Fail, e: #@TODO: Move to PluginThread in 0.4.10 + err = str(e) #@TODO: Recheck in 0.4.10 + + if err == _("No captcha result obtained in appropiate time by any of the plugins."): #@TODO: Fix in 0.4.10 + self.checkFile() + + elif self.getConfig('fallback', True) and self.premium: + self.logWarning(_("Premium download failed"), e) + self.retryFree() else: - self.logDebug("Handled as free download") - self.handleFree() + raise Fail(err) + - self.downloadLink(self.link) - self.checkFile() + def downloadLink(self, link, disposition=True): + if not link or not isinstance(link, basestring): + return + + self.correctCaptcha() + link = html_unescape(link.strip().decode('unicode-escape')) #@TODO: Move this check to plugin `load` method in 0.4.10 - def downloadLink(self, link): - if link and isinstance(link, basestring): - self.correctCaptcha() - self.download(link, disposition=True) + if not urlparse.urlparse(link).scheme: + url_p = urlparse.urlparse(self.pyfile.url) + baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) + link = urlparse.urljoin(baseurl, link) + self.download(link, ref=False, disposition=disposition) - def checkFile(self): + + def checkFile(self, rules={}): if self.cTask and not self.lastDownload: self.invalidCaptcha() self.retry(10, reason=_("Wrong captcha")) - elif not self.lastDownload or not exists(fs_encode(self.lastDownload)): - self.fail(_("No file downloaded")) + elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)): + self.lastDownload = "" + self.error(self.pyfile.error or _("No file downloaded")) else: - rules = {'empty file': re.compile(r"^$")} + errmsg = self.checkDownload({'Empty file': re.compile(r'\A\s*\Z'), + 'Html error': re.compile(r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)')}) - if hasattr(self, 'ERROR_PATTERN'): - rules['error'] = re.compile(self.ERROR_PATTERN) + if not errmsg: + for r, p in [('Html file' , re.compile(r'\A\s*<!DOCTYPE html') ), + ('Request error', re.compile(r'([Aa]n error occured while processing your request)'))]: + if r not in rules: + rules[r] = p - check = self.checkDownload(rules) - if check: #@TODO: Move to hoster in 0.4.10 - errmsg = check.strip().capitalize() + (" | " + self.lastCheck.strip() if self.lastCheck else "") - self.retry(10, 60, errmsg) + for r, a in [('Error' , "ERROR_PATTERN" ), + ('Premium only', "PREMIUM_ONLY_PATTERN"), + ('Wait error' , "WAIT_PATTERN" )]: + if r not in rules and hasattr(self, a): + rules[r] = getattr(self, a) + + errmsg = self.checkDownload(rules) + + if not errmsg: + return + + errmsg = errmsg.strip().capitalize() + + try: + errmsg += " | " + self.lastCheck.group(1).strip() + except Exception: + pass + + self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry") + self.retry(3, 60, errmsg) def checkErrors(self): - if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html): - self.fail(_("Link require a premium account to be handled")) + if not self.html: + self.logWarning(_("No html code to check")) + return + + if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html): + self.fail(_("Connection from your current IP address is not allowed")) + + elif not self.premium: + if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("File can be downloaded by premium users only")) + + elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html): + self.fail(_("File too large for free download")) + + elif hasattr(self, 'DOWNLOAD_LIMIT_PATTERN') and re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html): + m = re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('da(il)?y|today', errmsg, re.I): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) + + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html): + self.multiDL = True if hasattr(self, 'ERROR_PATTERN'): m = re.search(self.ERROR_PATTERN, self.html) if m: - errmsg = self.info['error'] = m.group(1) - self.error(errmsg) + try: + errmsg = m.group(1).strip() + except Exception: + errmsg = m.group(0).strip() + + self.info['error'] = re.sub(r'<.*?>', " ", errmsg) + self.logWarning(self.info['error']) + + if re.search('limit|wait', errmsg, re.I): + if re.search("da(il)?y|today", errmsg): + wait_time = secondsToMidnight(gmt=2) + else: + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) + + self.wantReconnect = wait_time > 300 + self.retry(1, wait_time, _("Download limit exceeded")) + + elif re.search('country|ip|region|nation', errmsg, re.I): + self.fail(_("Connection from your current IP address is not allowed")) + + elif re.search('captcha|code', errmsg, re.I): + self.invalidCaptcha() + + elif re.search('countdown|expired', errmsg, re.I): + self.retry(wait_time=60, reason=_("Link expired")) + + elif re.search('maintenance|maintainance|temp', errmsg, re.I): + self.tempOffline() - if hasattr(self, 'WAIT_PATTERN'): + elif re.search('up to', errmsg, re.I): + self.fail(_("File too large for free download")) + + elif re.search('offline|delet|remov|not (found|available)', errmsg, re.I): + self.offline() + + elif re.search('premium', errmsg, re.I): + self.fail(_("File can be downloaded by premium users only")) + + else: + self.wantReconnect = True + self.retry(wait_time=60, reason=errmsg) + + elif hasattr(self, 'WAIT_PATTERN'): m = re.search(self.WAIT_PATTERN, self.html) if m: - wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in - re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)]) + try: + waitmsg = m.group(1).strip() + except Exception: + waitmsg = m.group(0).strip() + + wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in + re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I)) self.wait(wait_time, wait_time > 300) - return self.info.pop('error', None) - def checkStatus(self): - status = self.info['status'] + def checkStatus(self, getinfo=True): + if not self.info or getinfo: + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) + self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) - if status is 1: - self.offline() + try: + status = self.info['status'] - elif status is 6: - self.tempOffline() + if status is 1: + self.offline() - elif status is not 2: - self.logDebug("File status: %s" % statusMap[status], - "File info: %s" % self.info) + elif status is 6: + self.tempOffline() + elif status is 8: + self.fail(self.info['error'] if 'error' in self.info else _("Failed")) - def checkNameSize(self): - name = self.info['name'] - size = self.info['size'] - url = self.info['url'] + finally: + self.logDebug("File status: %s" % statusMap[status]) - if name and name != url: - self.pyfile.name = name - else: - self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1] - if size > 0: - self.pyfile.size = size - else: - size = "Unknown" + def checkNameSize(self, getinfo=True): + if not self.info or getinfo: + self.logDebug("Update file info...") + self.logDebug("Previous file info: %s" % self.info) + self.info.update(self.getInfo(self.pyfile.url, self.html)) + self.logDebug("Current file info: %s" % self.info) - self.logDebug("File name: %s" % name, - "File size: %s" % size) + try: + url = self.info['url'].strip() + name = self.info['name'].strip() + if name and name != url: + self.pyfile.name = name + except Exception: + pass + + try: + size = self.info['size'] + if size > 0: + self.pyfile.size = size + + except Exception: + pass + + self.logDebug("File name: %s" % self.pyfile.name, + "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown") - def checkInfo(self): - self.updateInfo(self.getInfo(self.pyfile.url, self.html)) + def checkInfo(self): self.checkNameSize() if self.html: self.checkErrors() + self.checkNameSize() - self.updateInfo(self.getInfo(self.pyfile.url, self.html)) - - self.checkNameSize() - self.checkStatus() + self.checkStatus(getinfo=False) #: Deprecated @@ -494,56 +728,43 @@ class SimpleHoster(Hoster): return self.info - def updateInfo(self, info): - self.logDebug(_("File info (BEFORE): %s") % self.info) - self.info.update(info) - self.logDebug(_("File info (AFTER): %s") % self.info) - - - def handleDirect(self): - link = _isDirectLink(self, self.pyfile.url, self.resumeDownload) + def handleDirect(self, pyfile): + link = self.directLink(pyfile.url, self.resumeDownload) if link: self.logInfo(_("Direct download link detected")) - self.link = link else: - self.logDebug(_("Direct download link not found")) + self.logDebug("Direct download link not found") - def handleMulti(self): #: Multi-hoster handler + def handleMulti(self, pyfile): #: Multi-hoster handler pass - def handleFree(self): + def handleFree(self, pyfile): if not hasattr(self, 'LINK_FREE_PATTERN'): - self.fail(_("Free download not implemented")) - - try: - m = re.search(self.LINK_FREE_PATTERN, self.html) - if m is None: - self.error(_("Free download link not found")) + self.logError(_("Free download not implemented")) + m = re.search(self.LINK_FREE_PATTERN, self.html) + if m is None: + self.error(_("Free download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - - def handlePremium(self): + def handlePremium(self, pyfile): if not hasattr(self, 'LINK_PREMIUM_PATTERN'): - self.fail(_("Premium download not implemented")) - - try: - m = re.search(self.LINK_PREMIUM_PATTERN, self.html) - if m is None: - self.error(_("Premium download link not found")) + self.logError(_("Premium download not implemented")) + self.logDebug("Handled as free download") + self.handleFree(pyfile) + m = re.search(self.LINK_PREMIUM_PATTERN, self.html) + if m is None: + self.error(_("Premium download link not found")) + else: self.link = m.group(1) - except Exception, e: - self.fail(e) - def longWait(self, wait_time=None, max_tries=3): if wait_time and isinstance(wait_time, (int, long, float)): @@ -555,8 +776,7 @@ class SimpleHoster(Hoster): self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str) - self.setWait(wait_time, True) - self.wait() + self.wait(wait_time, True) self.retry(max_tries=max_tries, reason=_("Download limit reached")) @@ -565,6 +785,9 @@ class SimpleHoster(Hoster): def checkTrafficLeft(self): + if not self.account: + return True + traffic = self.account.getAccountInfo(self.user, True)['trafficleft'] if traffic is None: @@ -578,6 +801,26 @@ class SimpleHoster(Hoster): #@TODO: Remove in 0.4.10 + def getConfig(self, option, default=''): + """getConfig with default value - sublass may not implements all config options""" + try: + return self.getConf(option) + + except KeyError: + return default + + + def retryFree(self): + if not self.premium: + return + self.premium = False + self.account = None + self.req = self.core.requestFactory.getRequest(self.__name__) + self.retries = -1 + raise Retry(_("Fallback to free download")) + + + #@TODO: Remove in 0.4.10 def wait(self, seconds=0, reconnect=None): return _wait(self, seconds, reconnect) |