summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal/SimpleHoster.py
diff options
context:
space:
mode:
authorGravatar GammaC0de <GammaC0de@users.noreply.github.com> 2015-05-29 23:33:10 +0200
committerGravatar GammaC0de <GammaC0de@users.noreply.github.com> 2015-05-29 23:33:10 +0200
commit844dfd92f590e531ca2f7fd86305fcbc13a03721 (patch)
tree5303bd07749b362dab071ada6197fe37dda85b27 /module/plugins/internal/SimpleHoster.py
parent[BitshareCom] Code cosmetics (diff)
parent[SimpleHoster] Fix DB error (diff)
downloadpyload-844dfd92f590e531ca2f7fd86305fcbc13a03721.tar.xz
Merge pull request #1 from pyload/stable
sync stable
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r--module/plugins/internal/SimpleHoster.py649
1 files changed, 446 insertions, 203 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py
index 6726726e1..1d44a6642 100644
--- a/module/plugins/internal/SimpleHoster.py
+++ b/module/plugins/internal/SimpleHoster.py
@@ -1,18 +1,20 @@
# -*- coding: utf-8 -*-
+import datetime
+import mimetypes
+import os
import re
-
-from os.path import exists
-from time import time
-from urllib import unquote
-from urlparse import urljoin, urlparse
+import time
+import urllib
+import urlparse
from module.PyFile import statusMap as _statusMap
from module.network.CookieJar import CookieJar
+from module.network.HTTPRequest import BadHeader
from module.network.RequestFactory import getURL
from module.plugins.Hoster import Hoster
-from module.plugins.Plugin import Fail
-from module.utils import fixup, fs_encode, parseFileSize
+from module.plugins.Plugin import Fail, Retry
+from module.utils import fixup, fs_encode, html_unescape, parseFileSize
#@TODO: Adapt and move to PyFile in 0.4.10
@@ -25,7 +27,7 @@ def _error(self, reason, type):
type = "unknown"
msg = _("%s error") % type.strip().capitalize() if type else _("Error")
- msg += ": %s" % reason.strip() if reason else ""
+ msg += (": %s" % reason.strip()) if reason else ""
msg += _(" | Plugin may be out of date")
raise Fail(msg)
@@ -72,7 +74,7 @@ def parseHtmlForm(attr_str, html, input_names={}):
if name:
value = parseHtmlTagAttrValue("value", inputtag.group(1))
if not value:
- inputs[name] = inputtag.group(3) or ''
+ inputs[name] = inputtag.group(3) or ""
else:
inputs[name] = value
@@ -98,38 +100,77 @@ def parseHtmlForm(attr_str, html, input_names={}):
return {}, None #: no matching form found
-#: Deprecated
+#@TODO: Remove in 0.4.10
def parseFileInfo(plugin, url="", html=""):
if hasattr(plugin, "getInfo"):
info = plugin.getInfo(url, html)
res = info['name'], info['size'], info['status'], info['url']
else:
- res = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url
+ url = urllib.unquote(url)
+ url_p = urlparse.urlparse(url)
+ res = ((url_p.path.split('/')[-1]
+ or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0]
+ or url_p.netloc.split('.', 1)[0]),
+ 0,
+ 3 if url else 8,
+ url)
return res
#@TODO: Remove in 0.4.10
-#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10
def create_getInfo(plugin):
- if hasattr(plugin, "parseInfos"):
- fn = lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)]
- else:
- fn = lambda urls: [parseFileInfo(url) for url in urls]
+ def getInfo(urls):
+ for url in urls:
+ if hasattr(plugin, "URL_REPLACEMENTS"):
+ url = replace_patterns(url, plugin.URL_REPLACEMENTS)
+ yield parseFileInfo(plugin, url)
- return fn
+ return getInfo
def timestamp():
- return int(time() * 1000)
+ return int(time.time() * 1000)
#@TODO: Move to hoster class in 0.4.10
-def _isDirectLink(self, url, resumable=False):
- link = ""
+def getFileURL(self, url, follow_location=None):
+ link = ""
+ redirect = 1
+
+ if type(follow_location) is int:
+ redirect = max(follow_location, 1)
+ else:
+ redirect = 10
+
+ for i in xrange(redirect):
+ try:
+ self.logDebug("Redirect #%d to: %s" % (i, url))
+ header = self.load(url, just_header=True, decode=True)
+
+ except Exception: #: Bad bad bad... rewrite this part in 0.4.10
+ req = pyreq.getHTTPRequest()
+ res = req.load(url, just_header=True, decode=True)
- for i in xrange(5 if resumable else 1):
- header = self.load(url, ref=True, cookies=True, just_header=True, decode=True)
+ req.close()
+
+ header = {"code": req.code}
+ for line in res.splitlines():
+ line = line.strip()
+ if not line or ":" not in line:
+ continue
+
+ key, none, value = line.partition(":")
+ key = key.lower().strip()
+ value = value.strip()
+
+ if key in header:
+ if type(header[key]) == list:
+ header[key].append(value)
+ else:
+ header[key] = [header[key], value]
+ else:
+ header[key] = value
if 'content-disposition' in header:
link = url
@@ -137,62 +178,100 @@ def _isDirectLink(self, url, resumable=False):
elif 'location' in header and header['location']:
location = header['location']
- if not urlparse(location).scheme:
- p = urlparse(url)
- base = "%s://%s" % (p.scheme, p.netloc)
- location = urljoin(base, location)
+ if not urlparse.urlparse(location).scheme:
+ url_p = urlparse.urlparse(url)
+ baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+ location = urlparse.urljoin(baseurl, location)
if 'code' in header and header['code'] == 302:
link = location
- elif resumable:
+ if follow_location:
url = location
- self.logDebug("Redirect #%d to: %s" % (++i, location))
continue
+ else:
+ extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1]
+
+ if 'content-type' in header and header['content-type']:
+ mimetype = header['content-type'].split(';')[0].strip()
+
+ elif extension:
+ mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream"
+
+ else:
+ mimetype = ""
+
+ if mimetype and (link or 'html' not in mimetype):
+ link = url
+ else:
+ link = ""
+
break
+
else:
- self.logError(_("Too many redirects"))
+ try:
+ self.logError(_("Too many redirects"))
+ except Exception:
+ pass
return link
+def secondsToMidnight(gmt=0):
+ now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt)
+
+ if now.hour is 0 and now.minute < 10:
+ midnight = now
+ else:
+ midnight = now + datetime.timedelta(days=1)
+
+ td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now
+
+ if hasattr(td, 'total_seconds'):
+ res = td.total_seconds()
+ else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds
+ res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+ return int(res)
+
+
class SimpleHoster(Hoster):
__name__ = "SimpleHoster"
__type__ = "hoster"
- __version__ = "0.79"
+ __version__ = "1.50"
__pattern__ = r'^unmatchable$'
+ __config__ = [("use_premium", "bool", "Use premium account if available" , True),
+ ("fallback" , "bool", "Fallback to free download if premium fails", True)]
__description__ = """Simple hoster plugin"""
__license__ = "GPLv3"
- __authors__ = [("zoidberg", "zoidberg@mujmail.cz"),
- ("stickell", "l.stickell@yahoo.it"),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
"""
- Info patterns should be defined by each hoster:
+ Info patterns:
- INFO_PATTERN: (optional) Name and Size of the file
+ INFO_PATTERN: (mandatory) Name and Size of the file
example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)'
or
- NAME_PATTERN: (optional) Name that will be set for the file
+ NAME_PATTERN: (mandatory) Name that will be set for the file
example: NAME_PATTERN = r'(?P<N>file_name)'
- SIZE_PATTERN: (optional) Size that will be checked for the file
+ SIZE_PATTERN: (mandatory) Size that will be checked for the file
example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'
HASHSUM_PATTERN: (optional) Hash code and type of the file
example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'
- OFFLINE_PATTERN: (optional) Check if the file is yet available online
+ OFFLINE_PATTERN: (mandatory) Check if the page is unreachable
example: OFFLINE_PATTERN = r'File (deleted|not found)'
- TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline
+ TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable
example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)'
- Error handling patterns are all optional:
+ Error patterns:
WAIT_PATTERN: (optional) Detect waiting time
example: WAIT_PATTERN = r''
@@ -200,11 +279,23 @@ class SimpleHoster(Hoster):
PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account
example: PREMIUM_ONLY_PATTERN = r'Premium account required'
+ HAPPY_HOUR_PATTERN: (optional)
+ example: HAPPY_HOUR_PATTERN = r'Happy hour'
+
+ IP_BLOCKED_PATTERN: (optional)
+ example: IP_BLOCKED_PATTERN = r'in your country'
+
+ DOWNLOAD_LIMIT_PATTERN: (optional)
+ example: DOWNLOAD_LIMIT_PATTERN = r'download limit'
+
+ SIZE_LIMIT_PATTERN: (optional)
+ example: SIZE_LIMIT_PATTERN = r'up to'
+
ERROR_PATTERN: (optional) Detect any error preventing download
example: ERROR_PATTERN = r''
- Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download:
+ Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling:
LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download
example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"'
@@ -217,31 +308,46 @@ class SimpleHoster(Hoster):
SIZE_REPLACEMENTS = []
URL_REPLACEMENTS = []
- TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct
- COOKIES = True #: or False or list of tuples [(domain, name, value)]
- CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account
- DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False
- MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method)
+ TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct
+ COOKIES = True #: or False or list of tuples [(domain, name, value)]
+ CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account
+ DIRECT_LINK = None #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False
+ MULTI_HOSTER = False #: Set to True to leech other hoster link (as defined in handleMulti method)
+ LOGIN_ACCOUNT = False #: Set to True to require account login
+ DISPOSITION = True #: Set to True to use any content-disposition value in http header as file name
+
+ directLink = getFileURL #@TODO: Remove in 0.4.10
@classmethod
- def parseInfos(cls, urls):
- for url in urls:
- url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10
- yield cls.getInfo(url)
+ def apiInfo(cls, url):
+ url = urllib.unquote(url)
+ url_p = urlparse.urlparse(url)
+ return {'name' : (url_p.path.split('/')[-1]
+ or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0]
+ or url_p.netloc.split('.', 1)[0]),
+ 'size' : 0,
+ 'status': 3 if url else 8,
+ 'url' : url}
@classmethod
def getInfo(cls, url="", html=""):
- info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}
+ info = cls.apiInfo(url)
+ online = True if info['status'] is 2 else False
- if not html:
- try:
- if not url:
- info['error'] = "missing url"
- info['status'] = 1
- raise
+ try:
+ info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here
+ except Exception:
+ info['pattern'] = {}
+
+ if not html and not online:
+ if not url:
+ info['error'] = "missing url"
+ info['status'] = 1
+
+ elif info['status'] is 3:
try:
html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
@@ -253,61 +359,45 @@ class SimpleHoster(Hoster):
if e.code is 404:
info['status'] = 1
- raise
- if e.code is 503:
+ elif e.code is 503:
info['status'] = 6
- raise
- except:
- return info
- online = False
+ except Exception:
+ pass
- if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html):
- info['status'] = 1
+ if html:
+ if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html):
+ info['status'] = 1
- elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html): #@TODO: Remove in 0.4.10
- info['status'] = 1
+ elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html):
+ info['status'] = 6
- elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html):
- info['status'] = 6
-
- else:
- try:
- info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api']
- except:
- info['pattern'] = {}
-
- for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
- "FILE_NAME_PATTERN", "NAME_PATTERN",
- "FILE_SIZE_PATTERN", "SIZE_PATTERN",
- "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
- try:
- attr = getattr(cls, pattern)
- pdict = re.search(attr, html).groupdict()
-
- if all(True for k in pdict if k not in info['pattern']):
- info['pattern'].update(pdict)
+ else:
+ for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"):
+ try:
+ attr = getattr(cls, pattern)
+ pdict = re.search(attr, html).groupdict()
- except AttributeError:
- continue
+ if all(True for k in pdict if k not in info['pattern']):
+ info['pattern'].update(pdict)
- else:
- online = True
+ except AttributeError:
+ continue
- if not info['pattern']:
- info.pop('pattern', None)
+ else:
+ online = True
if online:
info['status'] = 2
if 'N' in info['pattern']:
- info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
- cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10
+ info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()),
+ cls.NAME_REPLACEMENTS)
if 'S' in info['pattern']:
size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'],
- cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
+ cls.SIZE_REPLACEMENTS)
info['size'] = parseFileSize(size)
elif isinstance(info['size'], basestring):
@@ -318,6 +408,9 @@ class SimpleHoster(Hoster):
hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
info[hashtype] = info['pattern']['H']
+ if not info['pattern']:
+ info.pop('pattern', None)
+
return info
@@ -326,11 +419,20 @@ class SimpleHoster(Hoster):
def prepare(self):
+ self.pyfile.error = "" #@TODO: Remove in 0.4.10
+
self.info = {}
+ self.html = ""
self.link = "" #@TODO: Move to hoster class in 0.4.10
self.directDL = False #@TODO: Move to hoster class in 0.4.10
self.multihost = False #@TODO: Move to hoster class in 0.4.10
+ if not self.getConfig('use_premium', True):
+ self.retryFree()
+
+ if self.LOGIN_ACCOUNT and not self.account:
+ self.fail(_("Required account not found"))
+
self.req.setOption("timeout", 120)
if isinstance(self.COOKIES, list):
@@ -347,8 +449,7 @@ class SimpleHoster(Hoster):
else:
self.directDL = self.DIRECT_LINK
- self.pyfile.url = replace_patterns(self.pyfile.url,
- self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS) #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10
+ self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS)
def preload(self):
@@ -359,132 +460,265 @@ class SimpleHoster(Hoster):
def process(self, pyfile):
- self.prepare()
- self.checkInfo()
+ try:
+ self.prepare()
+ self.checkInfo()
- if self.directDL:
- self.logDebug("Looking for direct download link...")
- self.handleDirect()
+ if self.directDL:
+ self.logDebug("Looking for direct download link...")
+ self.handleDirect(pyfile)
- if self.multihost and not self.link and not self.lastDownload:
- self.logDebug("Looking for leeched download link...")
- self.handleMulti()
+ if self.multihost and not self.link and not self.lastDownload:
+ self.logDebug("Looking for leeched download link...")
+ self.handleMulti(pyfile)
+
+ if not self.link and not self.lastDownload:
+ self.MULTI_HOSTER = False
+ self.retry(1, reason="Multi hoster fails")
if not self.link and not self.lastDownload:
- self.MULTI_HOSTER = False
- self.retry(1, reason="Multi hoster fails")
+ self.preload()
+ self.checkInfo()
- if not self.link and not self.lastDownload:
- self.preload()
- self.checkInfo()
+ if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
+ self.logDebug("Handled as premium download")
+ self.handlePremium(pyfile)
- if self.html is None:
- self.fail(_("No html retrieved"))
+ elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
+ self.logDebug("Handled as free download")
+ self.handleFree(pyfile)
- if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
- self.logDebug("Handled as premium download")
- self.handlePremium()
+ self.downloadLink(self.link, self.DISPOSITION)
+ self.checkFile()
+
+ except Fail, e: #@TODO: Move to PluginThread in 0.4.10
+ err = str(e) #@TODO: Recheck in 0.4.10
+
+ if err == _("No captcha result obtained in appropiate time by any of the plugins."): #@TODO: Fix in 0.4.10
+ self.checkFile()
+
+ elif self.getConfig('fallback', True) and self.premium:
+ self.logWarning(_("Premium download failed"), e)
+ self.retryFree()
else:
- self.logDebug("Handled as free download")
- self.handleFree()
+ raise Fail(err)
+
- self.downloadLink(self.link)
- self.checkFile()
+ def downloadLink(self, link, disposition=True):
+ if not link or not isinstance(link, basestring):
+ return
+
+ self.correctCaptcha()
+ link = html_unescape(link.strip().decode('unicode-escape')) #@TODO: Move this check to plugin `load` method in 0.4.10
- def downloadLink(self, link):
- if link and isinstance(link, basestring):
- self.correctCaptcha()
- self.download(link, disposition=True)
+ if not urlparse.urlparse(link).scheme:
+ url_p = urlparse.urlparse(self.pyfile.url)
+ baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+ link = urlparse.urljoin(baseurl, link)
+ self.download(link, ref=False, disposition=disposition)
- def checkFile(self):
+
+ def checkFile(self, rules={}):
if self.cTask and not self.lastDownload:
self.invalidCaptcha()
self.retry(10, reason=_("Wrong captcha"))
- elif not self.lastDownload or not exists(fs_encode(self.lastDownload)):
- self.fail(_("No file downloaded"))
+ elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)):
+ self.lastDownload = ""
+ self.error(self.pyfile.error or _("No file downloaded"))
else:
- rules = {'empty file': re.compile(r"^$")}
+ errmsg = self.checkDownload({'Empty file': re.compile(r'\A\s*\Z'),
+ 'Html error': re.compile(r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)')})
- if hasattr(self, 'ERROR_PATTERN'):
- rules['error'] = re.compile(self.ERROR_PATTERN)
+ if not errmsg:
+ for r, p in [('Html file' , re.compile(r'\A\s*<!DOCTYPE html') ),
+ ('Request error', re.compile(r'([Aa]n error occured while processing your request)'))]:
+ if r not in rules:
+ rules[r] = p
- check = self.checkDownload(rules)
- if check: #@TODO: Move to hoster in 0.4.10
- errmsg = check.strip().capitalize() + (" | " + self.lastCheck.strip() if self.lastCheck else "")
- self.retry(10, 60, errmsg)
+ for r, a in [('Error' , "ERROR_PATTERN" ),
+ ('Premium only', "PREMIUM_ONLY_PATTERN"),
+ ('Wait error' , "WAIT_PATTERN" )]:
+ if r not in rules and hasattr(self, a):
+ rules[r] = getattr(self, a)
+
+ errmsg = self.checkDownload(rules)
+
+ if not errmsg:
+ return
+
+ errmsg = errmsg.strip().capitalize()
+
+ try:
+ errmsg += " | " + self.lastCheck.group(1).strip()
+ except Exception:
+ pass
+
+ self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry")
+ self.retry(3, 60, errmsg)
def checkErrors(self):
- if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html):
- self.fail(_("Link require a premium account to be handled"))
+ if not self.html:
+ self.logWarning(_("No html code to check"))
+ return
+
+ if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html):
+ self.fail(_("Connection from your current IP address is not allowed"))
+
+ elif not self.premium:
+ if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html):
+ self.fail(_("File can be downloaded by premium users only"))
+
+ elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html):
+ self.fail(_("File too large for free download"))
+
+ elif hasattr(self, 'DOWNLOAD_LIMIT_PATTERN') and re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html):
+ m = re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html)
+ try:
+ errmsg = m.group(1).strip()
+ except Exception:
+ errmsg = m.group(0).strip()
+
+ self.info['error'] = re.sub(r'<.*?>', " ", errmsg)
+ self.logWarning(self.info['error'])
+
+ if re.search('da(il)?y|today', errmsg, re.I):
+ wait_time = secondsToMidnight(gmt=2)
+ else:
+ wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+ re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I))
+
+ self.wantReconnect = wait_time > 300
+ self.retry(1, wait_time, _("Download limit exceeded"))
+
+ if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html):
+ self.multiDL = True
if hasattr(self, 'ERROR_PATTERN'):
m = re.search(self.ERROR_PATTERN, self.html)
if m:
- errmsg = self.info['error'] = m.group(1)
- self.error(errmsg)
+ try:
+ errmsg = m.group(1).strip()
+ except Exception:
+ errmsg = m.group(0).strip()
+
+ self.info['error'] = re.sub(r'<.*?>', " ", errmsg)
+ self.logWarning(self.info['error'])
+
+ if re.search('limit|wait', errmsg, re.I):
+ if re.search("da(il)?y|today", errmsg):
+ wait_time = secondsToMidnight(gmt=2)
+ else:
+ wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+ re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I))
+
+ self.wantReconnect = wait_time > 300
+ self.retry(1, wait_time, _("Download limit exceeded"))
+
+ elif re.search('country|ip|region|nation', errmsg, re.I):
+ self.fail(_("Connection from your current IP address is not allowed"))
+
+ elif re.search('captcha|code', errmsg, re.I):
+ self.invalidCaptcha()
+
+ elif re.search('countdown|expired', errmsg, re.I):
+ self.retry(wait_time=60, reason=_("Link expired"))
+
+ elif re.search('maintenance|maintainance|temp', errmsg, re.I):
+ self.tempOffline()
- if hasattr(self, 'WAIT_PATTERN'):
+ elif re.search('up to', errmsg, re.I):
+ self.fail(_("File too large for free download"))
+
+ elif re.search('offline|delet|remov|not (found|available)', errmsg, re.I):
+ self.offline()
+
+ elif re.search('premium', errmsg, re.I):
+ self.fail(_("File can be downloaded by premium users only"))
+
+ else:
+ self.wantReconnect = True
+ self.retry(wait_time=60, reason=errmsg)
+
+ elif hasattr(self, 'WAIT_PATTERN'):
m = re.search(self.WAIT_PATTERN, self.html)
if m:
- wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
- re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)])
+ try:
+ waitmsg = m.group(1).strip()
+ except Exception:
+ waitmsg = m.group(0).strip()
+
+ wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+ re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I))
self.wait(wait_time, wait_time > 300)
- return
self.info.pop('error', None)
- def checkStatus(self):
- status = self.info['status']
+ def checkStatus(self, getinfo=True):
+ if not self.info or getinfo:
+ self.logDebug("Update file info...")
+ self.logDebug("Previous file info: %s" % self.info)
+ self.info.update(self.getInfo(self.pyfile.url, self.html))
+ self.logDebug("Current file info: %s" % self.info)
- if status is 1:
- self.offline()
+ try:
+ status = self.info['status']
- elif status is 6:
- self.tempOffline()
+ if status is 1:
+ self.offline()
- elif status is not 2:
- self.logDebug("File status: %s" % statusMap[status],
- "File info: %s" % self.info)
+ elif status is 6:
+ self.tempOffline()
+ elif status is 8:
+ self.fail(self.info['error'] if 'error' in self.info else _("Failed"))
- def checkNameSize(self):
- name = self.info['name']
- size = self.info['size']
- url = self.info['url']
+ finally:
+ self.logDebug("File status: %s" % statusMap[status])
- if name and name != url:
- self.pyfile.name = name
- else:
- self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1]
- if size > 0:
- self.pyfile.size = size
- else:
- size = "Unknown"
+ def checkNameSize(self, getinfo=True):
+ if not self.info or getinfo:
+ self.logDebug("Update file info...")
+ self.logDebug("Previous file info: %s" % self.info)
+ self.info.update(self.getInfo(self.pyfile.url, self.html))
+ self.logDebug("Current file info: %s" % self.info)
- self.logDebug("File name: %s" % name,
- "File size: %s" % size)
+ try:
+ url = self.info['url'].strip()
+ name = self.info['name'].strip()
+ if name and name != url:
+ self.pyfile.name = name
+ except Exception:
+ pass
+
+ try:
+ size = self.info['size']
+ if size > 0:
+ self.pyfile.size = size
+
+ except Exception:
+ pass
+
+ self.logDebug("File name: %s" % self.pyfile.name,
+ "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown")
- def checkInfo(self):
- self.updateInfo(self.getInfo(self.pyfile.url, self.html))
+ def checkInfo(self):
self.checkNameSize()
if self.html:
self.checkErrors()
+ self.checkNameSize()
- self.updateInfo(self.getInfo(self.pyfile.url, self.html))
-
- self.checkNameSize()
- self.checkStatus()
+ self.checkStatus(getinfo=False)
#: Deprecated
@@ -494,56 +728,43 @@ class SimpleHoster(Hoster):
return self.info
- def updateInfo(self, info):
- self.logDebug(_("File info (BEFORE): %s") % self.info)
- self.info.update(info)
- self.logDebug(_("File info (AFTER): %s") % self.info)
-
-
- def handleDirect(self):
- link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)
+ def handleDirect(self, pyfile):
+ link = self.directLink(pyfile.url, self.resumeDownload)
if link:
self.logInfo(_("Direct download link detected"))
-
self.link = link
else:
- self.logDebug(_("Direct download link not found"))
+ self.logDebug("Direct download link not found")
- def handleMulti(self): #: Multi-hoster handler
+ def handleMulti(self, pyfile): #: Multi-hoster handler
pass
- def handleFree(self):
+ def handleFree(self, pyfile):
if not hasattr(self, 'LINK_FREE_PATTERN'):
- self.fail(_("Free download not implemented"))
-
- try:
- m = re.search(self.LINK_FREE_PATTERN, self.html)
- if m is None:
- self.error(_("Free download link not found"))
+ self.logError(_("Free download not implemented"))
+ m = re.search(self.LINK_FREE_PATTERN, self.html)
+ if m is None:
+ self.error(_("Free download link not found"))
+ else:
self.link = m.group(1)
- except Exception, e:
- self.fail(e)
-
- def handlePremium(self):
+ def handlePremium(self, pyfile):
if not hasattr(self, 'LINK_PREMIUM_PATTERN'):
- self.fail(_("Premium download not implemented"))
-
- try:
- m = re.search(self.LINK_PREMIUM_PATTERN, self.html)
- if m is None:
- self.error(_("Premium download link not found"))
+ self.logError(_("Premium download not implemented"))
+ self.logDebug("Handled as free download")
+ self.handleFree(pyfile)
+ m = re.search(self.LINK_PREMIUM_PATTERN, self.html)
+ if m is None:
+ self.error(_("Premium download link not found"))
+ else:
self.link = m.group(1)
- except Exception, e:
- self.fail(e)
-
def longWait(self, wait_time=None, max_tries=3):
if wait_time and isinstance(wait_time, (int, long, float)):
@@ -555,8 +776,7 @@ class SimpleHoster(Hoster):
self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str)
- self.setWait(wait_time, True)
- self.wait()
+ self.wait(wait_time, True)
self.retry(max_tries=max_tries, reason=_("Download limit reached"))
@@ -565,6 +785,9 @@ class SimpleHoster(Hoster):
def checkTrafficLeft(self):
+ if not self.account:
+ return True
+
traffic = self.account.getAccountInfo(self.user, True)['trafficleft']
if traffic is None:
@@ -578,6 +801,26 @@ class SimpleHoster(Hoster):
#@TODO: Remove in 0.4.10
+ def getConfig(self, option, default=''):
+ """getConfig with default value - sublass may not implements all config options"""
+ try:
+ return self.getConf(option)
+
+ except KeyError:
+ return default
+
+
+ def retryFree(self):
+ if not self.premium:
+ return
+ self.premium = False
+ self.account = None
+ self.req = self.core.requestFactory.getRequest(self.__name__)
+ self.retries = -1
+ raise Retry(_("Fallback to free download"))
+
+
+ #@TODO: Remove in 0.4.10
def wait(self, seconds=0, reconnect=None):
return _wait(self, seconds, reconnect)