Merge pull request #1 from pyload/stable

sync stable
author: GammaC0de <GammaC0de@users.noreply.github.com> 2015-05-29 23:33:10 +0200
committer: GammaC0de <GammaC0de@users.noreply.github.com> 2015-05-29 23:33:10 +0200
commit: 844dfd92f590e531ca2f7fd86305fcbc13a03721 (patch)
tree: 5303bd07749b362dab071ada6197fe37dda85b27 /module/plugins/internal/SimpleHoster.py
parent: [BitshareCom] Code cosmetics (diff)
parent: [SimpleHoster] Fix DB error (diff)
download: pyload-844dfd92f590e531ca2f7fd86305fcbc13a03721.tar.xz
1 files changed, 446 insertions, 203 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py
index 6726726e1..1d44a6642 100644
--- a/module/plugins/internal/SimpleHoster.py
+++ b/module/plugins/internal/SimpleHoster.py
@@ -1,18 +1,20 @@
 # -*- coding: utf-8 -*-
 
+import datetime
+import mimetypes
+import os
 import re
-
-from os.path import exists
-from time import time
-from urllib import unquote
-from urlparse import urljoin, urlparse
+import time
+import urllib
+import urlparse
 
 from module.PyFile import statusMap as _statusMap
 from module.network.CookieJar import CookieJar
+from module.network.HTTPRequest import BadHeader
 from module.network.RequestFactory import getURL
 from module.plugins.Hoster import Hoster
-from module.plugins.Plugin import Fail
-from module.utils import fixup, fs_encode, parseFileSize
+from module.plugins.Plugin import Fail, Retry
+from module.utils import fixup, fs_encode, html_unescape, parseFileSize
 
 
 #@TODO: Adapt and move to PyFile in 0.4.10
@@ -25,7 +27,7 @@ def _error(self, reason, type):
             type = "unknown"
 
         msg  = _("%s error") % type.strip().capitalize() if type else _("Error")
-        msg += ": %s" % reason.strip() if reason else ""
+        msg += (": %s" % reason.strip()) if reason else ""
         msg += _(" | Plugin may be out of date")
 
         raise Fail(msg)
@@ -72,7 +74,7 @@ def parseHtmlForm(attr_str, html, input_names={}):
             if name:
                 value = parseHtmlTagAttrValue("value", inputtag.group(1))
                 if not value:
-                    inputs[name] = inputtag.group(3) or ''
+                    inputs[name] = inputtag.group(3) or ""
                 else:
                     inputs[name] = value
 
@@ -98,38 +100,77 @@ def parseHtmlForm(attr_str, html, input_names={}):
     return {}, None  #: no matching form found
 
 
-#: Deprecated
+#@TODO: Remove in 0.4.10
 def parseFileInfo(plugin, url="", html=""):
     if hasattr(plugin, "getInfo"):
         info = plugin.getInfo(url, html)
         res  = info['name'], info['size'], info['status'], info['url']
     else:
-        res  = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url
+        url   = urllib.unquote(url)
+        url_p = urlparse.urlparse(url)
+        res   = ((url_p.path.split('/')[-1]
+                  or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0]
+                  or url_p.netloc.split('.', 1)[0]),
+                 0,
+                 3 if url else 8,
+                 url)
 
     return res
 
 
 #@TODO: Remove in 0.4.10
-#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10
 def create_getInfo(plugin):
-    if hasattr(plugin, "parseInfos"):
-        fn = lambda urls: [(info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls)]
-    else:
-        fn = lambda urls: [parseFileInfo(url) for url in urls]
+    def getInfo(urls):
+        for url in urls:
+            if hasattr(plugin, "URL_REPLACEMENTS"):
+                url = replace_patterns(url, plugin.URL_REPLACEMENTS)
+            yield parseFileInfo(plugin, url)
 
-    return fn
+    return getInfo
 
 
 def timestamp():
-    return int(time() * 1000)
+    return int(time.time() * 1000)
 
 
 #@TODO: Move to hoster class in 0.4.10
-def _isDirectLink(self, url, resumable=False):
-    link = ""
+def getFileURL(self, url, follow_location=None):
+    link     = ""
+    redirect = 1
+
+    if type(follow_location) is int:
+        redirect = max(follow_location, 1)
+    else:
+        redirect = 10
+
+    for i in xrange(redirect):
+        try:
+            self.logDebug("Redirect #%d to: %s" % (i, url))
+            header = self.load(url, just_header=True, decode=True)
+
+        except Exception:  #: Bad bad bad... rewrite this part in 0.4.10
+            req = pyreq.getHTTPRequest()
+            res = req.load(url, just_header=True, decode=True)
 
-    for i in xrange(5 if resumable else 1):
-        header = self.load(url, ref=True, cookies=True, just_header=True, decode=True)
+            req.close()
+
+            header = {"code": req.code}
+            for line in res.splitlines():
+                line = line.strip()
+                if not line or ":" not in line:
+                    continue
+
+                key, none, value = line.partition(":")
+                key              = key.lower().strip()
+                value            = value.strip()
+
+                if key in header:
+                    if type(header[key]) == list:
+                        header[key].append(value)
+                    else:
+                        header[key] = [header[key], value]
+                else:
+                    header[key] = value
 
         if 'content-disposition' in header:
             link = url
@@ -137,62 +178,100 @@ def _isDirectLink(self, url, resumable=False):
         elif 'location' in header and header['location']:
             location = header['location']
 
-            if not urlparse(location).scheme:
-                p = urlparse(url)
-                base = "%s://%s" % (p.scheme, p.netloc)
-                location = urljoin(base, location)
+            if not urlparse.urlparse(location).scheme:
+                url_p    = urlparse.urlparse(url)
+                baseurl  = "%s://%s" % (url_p.scheme, url_p.netloc)
+                location = urlparse.urljoin(baseurl, location)
 
             if 'code' in header and header['code'] == 302:
                 link = location
 
-            elif resumable:
+            if follow_location:
                 url = location
-                self.logDebug("Redirect #%d to: %s" % (++i, location))
                 continue
 
+        else:
+            extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1]
+
+            if 'content-type' in header and header['content-type']:
+                mimetype = header['content-type'].split(';')[0].strip()
+
+            elif extension:
+                mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream"
+
+            else:
+                mimetype = ""
+
+            if mimetype and (link or 'html' not in mimetype):
+                link = url
+            else:
+                link = ""
+
         break
+
     else:
-        self.logError(_("Too many redirects"))
+        try:
+            self.logError(_("Too many redirects"))
+        except Exception:
+            pass
 
     return link
 
 
+def secondsToMidnight(gmt=0):
+    now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt)
+
+    if now.hour is 0 and now.minute < 10:
+        midnight = now
+    else:
+        midnight = now + datetime.timedelta(days=1)
+
+    td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now
+
+    if hasattr(td, 'total_seconds'):
+        res = td.total_seconds()
+    else:  #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds
+        res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6
+
+    return int(res)
+
+
 class SimpleHoster(Hoster):
     __name__    = "SimpleHoster"
     __type__    = "hoster"
-    __version__ = "0.79"
+    __version__ = "1.50"
 
     __pattern__ = r'^unmatchable$'
+    __config__  = [("use_premium", "bool", "Use premium account if available"          , True),
+                   ("fallback"   , "bool", "Fallback to free download if premium fails", True)]
 
     __description__ = """Simple hoster plugin"""
     __license__     = "GPLv3"
-    __authors__     = [("zoidberg", "zoidberg@mujmail.cz"),
-                       ("stickell", "l.stickell@yahoo.it"),
-                       ("Walter Purcaro", "vuolter@gmail.com")]
+    __authors__     = [("Walter Purcaro", "vuolter@gmail.com")]
 
 
     """
-    Info patterns should be defined by each hoster:
+    Info patterns:
 
-      INFO_PATTERN: (optional) Name and Size of the file
+      INFO_PATTERN: (mandatory) Name and Size of the file
         example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)'
       or
-        NAME_PATTERN: (optional) Name that will be set for the file
+        NAME_PATTERN: (mandatory) Name that will be set for the file
           example: NAME_PATTERN = r'(?P<N>file_name)'
-        SIZE_PATTERN: (optional) Size that will be checked for the file
+        SIZE_PATTERN: (mandatory) Size that will be checked for the file
           example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'
 
       HASHSUM_PATTERN: (optional) Hash code and type of the file
         example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'
 
-      OFFLINE_PATTERN: (optional) Check if the file is yet available online
+      OFFLINE_PATTERN: (mandatory) Check if the page is unreachable
         example: OFFLINE_PATTERN = r'File (deleted|not found)'
 
-      TEMP_OFFLINE_PATTERN: (optional) Check if the file is temporarily offline
+      TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable
         example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)'
 
 
-    Error handling patterns are all optional:
+    Error patterns:
 
       WAIT_PATTERN: (optional) Detect waiting time
         example: WAIT_PATTERN = r''
@@ -200,11 +279,23 @@ class SimpleHoster(Hoster):
       PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account
         example: PREMIUM_ONLY_PATTERN = r'Premium account required'
 
+      HAPPY_HOUR_PATTERN: (optional)
+        example: HAPPY_HOUR_PATTERN = r'Happy hour'
+
+      IP_BLOCKED_PATTERN: (optional)
+        example: IP_BLOCKED_PATTERN = r'in your country'
+
+      DOWNLOAD_LIMIT_PATTERN: (optional)
+        example: DOWNLOAD_LIMIT_PATTERN = r'download limit'
+
+      SIZE_LIMIT_PATTERN: (optional)
+        example: SIZE_LIMIT_PATTERN = r'up to'
+
       ERROR_PATTERN: (optional) Detect any error preventing download
         example: ERROR_PATTERN = r''
 
 
-    Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download:
+    Instead overriding handleFree and handlePremium methods you may define the following patterns for basic link handling:
 
       LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download
         example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"'
@@ -217,31 +308,46 @@ class SimpleHoster(Hoster):
     SIZE_REPLACEMENTS = []
     URL_REPLACEMENTS  = []
 
-    TEXT_ENCODING       = False  #: Set to True or encoding name if encoding value in http header is not correct
-    COOKIES             = True   #: or False or list of tuples [(domain, name, value)]
-    CHECK_TRAFFIC       = False  #: Set to True to force checking traffic left for premium account
-    DIRECT_LINK         = None   #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False
-    MULTI_HOSTER        = False  #: Set to True to leech other hoster link (as defined in handleMulti method)
+    TEXT_ENCODING = False  #: Set to True or encoding name if encoding value in http header is not correct
+    COOKIES       = True   #: or False or list of tuples [(domain, name, value)]
+    CHECK_TRAFFIC = False  #: Set to True to force checking traffic left for premium account
+    DIRECT_LINK   = None   #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False
+    MULTI_HOSTER  = False  #: Set to True to leech other hoster link (as defined in handleMulti method)
+    LOGIN_ACCOUNT = False  #: Set to True to require account login
+    DISPOSITION   = True   #: Set to True to use any content-disposition value in http header as file name
+
+    directLink = getFileURL  #@TODO: Remove in 0.4.10
 
 
     @classmethod
-    def parseInfos(cls, urls):
-        for url in urls:
-            url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS)  #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10
-            yield cls.getInfo(url)
+    def apiInfo(cls, url):
+        url   = urllib.unquote(url)
+        url_p = urlparse.urlparse(url)
+        return {'name'  : (url_p.path.split('/')[-1]
+                           or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0]
+                           or url_p.netloc.split('.', 1)[0]),
+                'size'  : 0,
+                'status': 3 if url else 8,
+                'url'   : url}
 
 
     @classmethod
     def getInfo(cls, url="", html=""):
-        info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}
+        info   = cls.apiInfo(url)
+        online = True if info['status'] is 2 else False
 
-        if not html:
-            try:
-                if not url:
-                    info['error']  = "missing url"
-                    info['status'] = 1
-                    raise
+        try:
+            info['pattern'] = re.match(cls.__pattern__, url).groupdict()  #: pattern groups will be saved here
 
+        except Exception:
+            info['pattern'] = {}
+
+        if not html and not online:
+            if not url:
+                info['error']  = "missing url"
+                info['status'] = 1
+
+            elif info['status'] is 3:
                 try:
                     html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
 
@@ -253,61 +359,45 @@ class SimpleHoster(Hoster):
 
                     if e.code is 404:
                         info['status'] = 1
-                        raise
 
-                    if e.code is 503:
+                    elif e.code is 503:
                         info['status'] = 6
-                        raise
-            except:
-                return info
 
-        online = False
+                except Exception:
+                    pass
 
-        if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html):
-            info['status'] = 1
+        if html:
+            if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html):
+                info['status'] = 1
 
-        elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html):  #@TODO: Remove in 0.4.10
-            info['status'] = 1
+            elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html):
+                info['status'] = 6
 
-        elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html):
-            info['status'] = 6
-
-        else:
-            try:
-                info['pattern'] = re.match(cls.__pattern__, url).groupdict()  #: pattern groups will be saved here, please save api stuff to info['api']
-            except:
-                info['pattern'] = {}
-
-            for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
-                            "FILE_NAME_PATTERN", "NAME_PATTERN",
-                            "FILE_SIZE_PATTERN", "SIZE_PATTERN",
-                            "HASHSUM_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
-                try:
-                    attr  = getattr(cls, pattern)
-                    pdict = re.search(attr, html).groupdict()
-
-                    if all(True for k in pdict if k not in info['pattern']):
-                        info['pattern'].update(pdict)
+            else:
+                for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", "HASHSUM_PATTERN"):
+                    try:
+                        attr  = getattr(cls, pattern)
+                        pdict = re.search(attr, html).groupdict()
 
-                except AttributeError:
-                    continue
+                        if all(True for k in pdict if k not in info['pattern']):
+                            info['pattern'].update(pdict)
 
-                else:
-                    online = True
+                    except AttributeError:
+                        continue
 
-            if not info['pattern']:
-                info.pop('pattern', None)
+                    else:
+                        online = True
 
         if online:
             info['status'] = 2
 
             if 'N' in info['pattern']:
-                info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
-                                                cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS)  #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10
+                info['name'] = replace_patterns(urllib.unquote(info['pattern']['N'].strip()),
+                                                cls.NAME_REPLACEMENTS)
 
             if 'S' in info['pattern']:
                 size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'],
-                                        cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS)  #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
+                                        cls.SIZE_REPLACEMENTS)
                 info['size'] = parseFileSize(size)
 
             elif isinstance(info['size'], basestring):
@@ -318,6 +408,9 @@ class SimpleHoster(Hoster):
                 hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
                 info[hashtype] = info['pattern']['H']
 
+        if not info['pattern']:
+            info.pop('pattern', None)
+
         return info
 
 
@@ -326,11 +419,20 @@ class SimpleHoster(Hoster):
 
 
     def prepare(self):
+        self.pyfile.error = ""  #@TODO: Remove in 0.4.10
+
         self.info      = {}
+        self.html      = ""
         self.link      = ""     #@TODO: Move to hoster class in 0.4.10
         self.directDL  = False  #@TODO: Move to hoster class in 0.4.10
         self.multihost = False  #@TODO: Move to hoster class in 0.4.10
 
+        if not self.getConfig('use_premium', True):
+            self.retryFree()
+
+        if self.LOGIN_ACCOUNT and not self.account:
+            self.fail(_("Required account not found"))
+
         self.req.setOption("timeout", 120)
 
         if isinstance(self.COOKIES, list):
@@ -347,8 +449,7 @@ class SimpleHoster(Hoster):
         else:
             self.directDL = self.DIRECT_LINK
 
-        self.pyfile.url = replace_patterns(self.pyfile.url,
-                                           self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS)  #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10
+        self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS)
 
 
     def preload(self):
@@ -359,132 +460,265 @@ class SimpleHoster(Hoster):
 
 
     def process(self, pyfile):
-        self.prepare()
-        self.checkInfo()
+        try:
+            self.prepare()
+            self.checkInfo()
 
-        if self.directDL:
-            self.logDebug("Looking for direct download link...")
-            self.handleDirect()
+            if self.directDL:
+                self.logDebug("Looking for direct download link...")
+                self.handleDirect(pyfile)
 
-        if self.multihost and not self.link and not self.lastDownload:
-            self.logDebug("Looking for leeched download link...")
-            self.handleMulti()
+            if self.multihost and not self.link and not self.lastDownload:
+                self.logDebug("Looking for leeched download link...")
+                self.handleMulti(pyfile)
+
+                if not self.link and not self.lastDownload:
+                    self.MULTI_HOSTER = False
+                    self.retry(1, reason="Multi hoster fails")
 
             if not self.link and not self.lastDownload:
-                self.MULTI_HOSTER = False
-                self.retry(1, reason="Multi hoster fails")
+                self.preload()
+                self.checkInfo()
 
-        if not self.link and not self.lastDownload:
-            self.preload()
-            self.checkInfo()
+                if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
+                    self.logDebug("Handled as premium download")
+                    self.handlePremium(pyfile)
 
-            if self.html is None:
-                self.fail(_("No html retrieved"))
+                elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
+                    self.logDebug("Handled as free download")
+                    self.handleFree(pyfile)
 
-            if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
-                self.logDebug("Handled as premium download")
-                self.handlePremium()
+            self.downloadLink(self.link, self.DISPOSITION)
+            self.checkFile()
+
+        except Fail, e:  #@TODO: Move to PluginThread in 0.4.10
+            err = str(e)  #@TODO: Recheck in 0.4.10
+
+            if err == _("No captcha result obtained in appropiate time by any of the plugins."):  #@TODO: Fix in 0.4.10
+                self.checkFile()
+
+            elif self.getConfig('fallback', True) and self.premium:
+                self.logWarning(_("Premium download failed"), e)
+                self.retryFree()
 
             else:
-                self.logDebug("Handled as free download")
-                self.handleFree()
+                raise Fail(err)
+
 
-        self.downloadLink(self.link)
-        self.checkFile()
+    def downloadLink(self, link, disposition=True):
+        if not link or not isinstance(link, basestring):
+            return
+
+        self.correctCaptcha()
 
+        link = html_unescape(link.strip().decode('unicode-escape'))  #@TODO: Move this check to plugin `load` method in 0.4.10
 
-    def downloadLink(self, link):
-        if link and isinstance(link, basestring):
-            self.correctCaptcha()
-            self.download(link, disposition=True)
+        if not urlparse.urlparse(link).scheme:
+            url_p   = urlparse.urlparse(self.pyfile.url)
+            baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+            link    = urlparse.urljoin(baseurl, link)
 
+        self.download(link, ref=False, disposition=disposition)
 
-    def checkFile(self):
+
+    def checkFile(self, rules={}):
         if self.cTask and not self.lastDownload:
             self.invalidCaptcha()
             self.retry(10, reason=_("Wrong captcha"))
 
-        elif not self.lastDownload or not exists(fs_encode(self.lastDownload)):
-            self.fail(_("No file downloaded"))
+        elif not self.lastDownload or not os.path.exists(fs_encode(self.lastDownload)):
+            self.lastDownload = ""
+            self.error(self.pyfile.error or _("No file downloaded"))
 
         else:
-            rules = {'empty file': re.compile(r"^$")}
+            errmsg = self.checkDownload({'Empty file': re.compile(r'\A\s*\Z'),
+                                         'Html error': re.compile(r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)')})
 
-            if hasattr(self, 'ERROR_PATTERN'):
-                rules['error'] = re.compile(self.ERROR_PATTERN)
+            if not errmsg:
+                for r, p in [('Html file'    , re.compile(r'\A\s*<!DOCTYPE html')                                ),
+                             ('Request error', re.compile(r'([Aa]n error occured while processing your request)'))]:
+                    if r not in rules:
+                        rules[r] = p
 
-            check = self.checkDownload(rules)
-            if check:  #@TODO: Move to hoster in 0.4.10
-                errmsg = check.strip().capitalize() + (" | " + self.lastCheck.strip() if self.lastCheck else "")
-                self.retry(10, 60, errmsg)
+                for r, a in [('Error'       , "ERROR_PATTERN"       ),
+                             ('Premium only', "PREMIUM_ONLY_PATTERN"),
+                             ('Wait error'  , "WAIT_PATTERN"        )]:
+                    if r not in rules and hasattr(self, a):
+                        rules[r] = getattr(self, a)
+
+                errmsg = self.checkDownload(rules)
+
+            if not errmsg:
+                return
+
+            errmsg = errmsg.strip().capitalize()
+
+            try:
+                errmsg += " | " + self.lastCheck.group(1).strip()
+            except Exception:
+                pass
+
+            self.logWarning("Check result: " + errmsg, "Waiting 1 minute and retry")
+            self.retry(3, 60, errmsg)
 
 
     def checkErrors(self):
-        if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html):
-            self.fail(_("Link require a premium account to be handled"))
+        if not self.html:
+            self.logWarning(_("No html code to check"))
+            return
+
+        if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html):
+            self.fail(_("Connection from your current IP address is not allowed"))
+
+        elif not self.premium:
+            if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html):
+                self.fail(_("File can be downloaded by premium users only"))
+
+            elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html):
+                self.fail(_("File too large for free download"))
+
+            elif hasattr(self, 'DOWNLOAD_LIMIT_PATTERN') and re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html):
+                m = re.search(self.DOWNLOAD_LIMIT_PATTERN, self.html)
+                try:
+                    errmsg = m.group(1).strip()
+                except Exception:
+                    errmsg = m.group(0).strip()
+
+                self.info['error'] = re.sub(r'<.*?>', " ", errmsg)
+                self.logWarning(self.info['error'])
+
+                if re.search('da(il)?y|today', errmsg, re.I):
+                    wait_time = secondsToMidnight(gmt=2)
+                else:
+                    wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+                                re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I))
+
+                self.wantReconnect = wait_time > 300
+                self.retry(1, wait_time, _("Download limit exceeded"))
+
+        if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html):
+            self.multiDL = True
 
         if hasattr(self, 'ERROR_PATTERN'):
             m = re.search(self.ERROR_PATTERN, self.html)
             if m:
-                errmsg = self.info['error'] = m.group(1)
-                self.error(errmsg)
+                try:
+                    errmsg = m.group(1).strip()
+                except Exception:
+                    errmsg = m.group(0).strip()
+
+                self.info['error'] = re.sub(r'<.*?>', " ", errmsg)
+                self.logWarning(self.info['error'])
+
+                if re.search('limit|wait', errmsg, re.I):
+                    if re.search("da(il)?y|today", errmsg):
+                        wait_time = secondsToMidnight(gmt=2)
+                    else:
+                        wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+                                    re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I))
+
+                    self.wantReconnect = wait_time > 300
+                    self.retry(1, wait_time, _("Download limit exceeded"))
+
+                elif re.search('country|ip|region|nation', errmsg, re.I):
+                    self.fail(_("Connection from your current IP address is not allowed"))
+
+                elif re.search('captcha|code', errmsg, re.I):
+                    self.invalidCaptcha()
+
+                elif re.search('countdown|expired', errmsg, re.I):
+                    self.retry(wait_time=60, reason=_("Link expired"))
+
+                elif re.search('maintenance|maintainance|temp', errmsg, re.I):
+                    self.tempOffline()
 
-        if hasattr(self, 'WAIT_PATTERN'):
+                elif re.search('up to', errmsg, re.I):
+                    self.fail(_("File too large for free download"))
+
+                elif re.search('offline|delet|remov|not (found|available)', errmsg, re.I):
+                    self.offline()
+
+                elif re.search('premium', errmsg, re.I):
+                    self.fail(_("File can be downloaded by premium users only"))
+
+                else:
+                    self.wantReconnect = True
+                    self.retry(wait_time=60, reason=errmsg)
+
+        elif hasattr(self, 'WAIT_PATTERN'):
             m = re.search(self.WAIT_PATTERN, self.html)
             if m:
-                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
-                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)])
+                try:
+                    waitmsg = m.group(1).strip()
+                except Exception:
+                    waitmsg = m.group(0).strip()
+
+                wait_time = sum(int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1, "": 1}[u.lower()] for v, u in
+                                re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I))
                 self.wait(wait_time, wait_time > 300)
-                return
 
         self.info.pop('error', None)
 
 
-    def checkStatus(self):
-        status = self.info['status']
+    def checkStatus(self, getinfo=True):
+        if not self.info or getinfo:
+            self.logDebug("Update file info...")
+            self.logDebug("Previous file info: %s" % self.info)
+            self.info.update(self.getInfo(self.pyfile.url, self.html))
+            self.logDebug("Current file info: %s"  % self.info)
 
-        if status is 1:
-            self.offline()
+        try:
+            status = self.info['status']
 
-        elif status is 6:
-            self.tempOffline()
+            if status is 1:
+                self.offline()
 
-        elif status is not 2:
-            self.logDebug("File status: %s" % statusMap[status],
-                          "File info: %s"   % self.info)
+            elif status is 6:
+                self.tempOffline()
 
+            elif status is 8:
+                self.fail(self.info['error'] if 'error' in self.info else _("Failed"))
 
-    def checkNameSize(self):
-        name = self.info['name']
-        size = self.info['size']
-        url  = self.info['url']
+        finally:
+            self.logDebug("File status: %s" % statusMap[status])
 
-        if name and name != url:
-            self.pyfile.name = name
-        else:
-            self.pyfile.name = name = self.info['name'] = urlparse(name).path.split('/')[-1]
 
-        if size > 0:
-            self.pyfile.size = size
-        else:
-            size = "Unknown"
+    def checkNameSize(self, getinfo=True):
+        if not self.info or getinfo:
+            self.logDebug("Update file info...")
+            self.logDebug("Previous file info: %s" % self.info)
+            self.info.update(self.getInfo(self.pyfile.url, self.html))
+            self.logDebug("Current file info: %s"  % self.info)
 
-        self.logDebug("File name: %s" % name,
-                      "File size: %s" % size)
+        try:
+            url  = self.info['url'].strip()
+            name = self.info['name'].strip()
+            if name and name != url:
+                self.pyfile.name = name
 
+        except Exception:
+            pass
+
+        try:
+            size = self.info['size']
+            if size > 0:
+                self.pyfile.size = size
+
+        except Exception:
+            pass
+
+        self.logDebug("File name: %s" % self.pyfile.name,
+                      "File size: %s byte" % self.pyfile.size if self.pyfile.size > 0 else "File size: Unknown")
 
-    def checkInfo(self):
-        self.updateInfo(self.getInfo(self.pyfile.url, self.html))
 
+    def checkInfo(self):
         self.checkNameSize()
 
         if self.html:
             self.checkErrors()
+            self.checkNameSize()
 
-        self.updateInfo(self.getInfo(self.pyfile.url, self.html))
-
-        self.checkNameSize()
-        self.checkStatus()
+        self.checkStatus(getinfo=False)
 
 
     #: Deprecated
@@ -494,56 +728,43 @@ class SimpleHoster(Hoster):
         return self.info
 
 
-    def updateInfo(self, info):
-        self.logDebug(_("File info (BEFORE): %s") % self.info)
-        self.info.update(info)
-        self.logDebug(_("File info (AFTER): %s")  % self.info)
-
-
-    def handleDirect(self):
-        link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)
+    def handleDirect(self, pyfile):
+        link = self.directLink(pyfile.url, self.resumeDownload)
 
         if link:
             self.logInfo(_("Direct download link detected"))
-
             self.link = link
         else:
-            self.logDebug(_("Direct download link not found"))
+            self.logDebug("Direct download link not found")
 
 
-    def handleMulti(self):  #: Multi-hoster handler
+    def handleMulti(self, pyfile):  #: Multi-hoster handler
         pass
 
 
-    def handleFree(self):
+    def handleFree(self, pyfile):
         if not hasattr(self, 'LINK_FREE_PATTERN'):
-            self.fail(_("Free download not implemented"))
-
-        try:
-            m = re.search(self.LINK_FREE_PATTERN, self.html)
-            if m is None:
-                self.error(_("Free download link not found"))
+            self.logError(_("Free download not implemented"))
 
+        m = re.search(self.LINK_FREE_PATTERN, self.html)
+        if m is None:
+            self.error(_("Free download link not found"))
+        else:
             self.link = m.group(1)
 
-        except Exception, e:
-            self.fail(e)
-
 
-    def handlePremium(self):
+    def handlePremium(self, pyfile):
         if not hasattr(self, 'LINK_PREMIUM_PATTERN'):
-            self.fail(_("Premium download not implemented"))
-
-        try:
-            m = re.search(self.LINK_PREMIUM_PATTERN, self.html)
-            if m is None:
-                self.error(_("Premium download link not found"))
+            self.logError(_("Premium download not implemented"))
+            self.logDebug("Handled as free download")
+            self.handleFree(pyfile)
 
+        m = re.search(self.LINK_PREMIUM_PATTERN, self.html)
+        if m is None:
+            self.error(_("Premium download link not found"))
+        else:
             self.link = m.group(1)
 
-        except Exception, e:
-            self.fail(e)
-
 
     def longWait(self, wait_time=None, max_tries=3):
         if wait_time and isinstance(wait_time, (int, long, float)):
@@ -555,8 +776,7 @@ class SimpleHoster(Hoster):
 
         self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str)
 
-        self.setWait(wait_time, True)
-        self.wait()
+        self.wait(wait_time, True)
         self.retry(max_tries=max_tries, reason=_("Download limit reached"))
 
 
@@ -565,6 +785,9 @@ class SimpleHoster(Hoster):
 
 
     def checkTrafficLeft(self):
+        if not self.account:
+            return True
+
         traffic = self.account.getAccountInfo(self.user, True)['trafficleft']
 
         if traffic is None:
@@ -578,6 +801,26 @@ class SimpleHoster(Hoster):
 
 
     #@TODO: Remove in 0.4.10
+    def getConfig(self, option, default=''):
+        """getConfig with default value - sublass may not implements all config options"""
+        try:
+            return self.getConf(option)
+
+        except KeyError:
+            return default
+
+
+    def retryFree(self):
+        if not self.premium:
+            return
+        self.premium = False
+        self.account = None
+        self.req     = self.core.requestFactory.getRequest(self.__name__)
+        self.retries = -1
+        raise Retry(_("Fallback to free download"))
+
+
+    #@TODO: Remove in 0.4.10
     def wait(self, seconds=0, reconnect=None):
         return _wait(self, seconds, reconnect)
author	GammaC0de <GammaC0de@users.noreply.github.com>	2015-05-29 23:33:10 +0200
committer	GammaC0de <GammaC0de@users.noreply.github.com>	2015-05-29 23:33:10 +0200
commit	844dfd92f590e531ca2f7fd86305fcbc13a03721 (patch)
tree	5303bd07749b362dab071ada6197fe37dda85b27 /module/plugins/internal/SimpleHoster.py
parent	[BitshareCom] Code cosmetics (diff)
parent	[SimpleHoster] Fix DB error (diff)
download	pyload-844dfd92f590e531ca2f7fd86305fcbc13a03721.tar.xz