diff options
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
| -rw-r--r-- | module/plugins/internal/SimpleHoster.py | 309 | 
1 files changed, 138 insertions, 171 deletions
| diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 9c310ca27..e5526a7bd 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -2,33 +2,27 @@  from __future__ import with_statement -import mimetypes  import os  import re  import time -import urlparse -from module.PyFile import statusMap as _statusMap  from module.network.HTTPRequest import BadHeader  from module.network.RequestFactory import getURL as get_url  from module.plugins.internal.Hoster import Hoster, create_getInfo, parse_fileInfo -from module.plugins.internal.Plugin import Fail, encode, fixurl, replace_patterns, seconds_to_midnight, set_cookie, set_cookies +from module.plugins.internal.Plugin import Fail, encode, parse_name, parse_time, replace_patterns, seconds_to_midnight, set_cookie, set_cookies  from module.utils import fixup, fs_encode, parseFileSize as parse_size -#@TODO: Adapt and move to PyFile in 0.4.10 -statusMap = dict((v, k) for k, v in _statusMap.items()) - -  class SimpleHoster(Hoster):      __name__    = "SimpleHoster"      __type__    = "hoster" -    __version__ = "1.80" +    __version__ = "1.93"      __status__  = "testing"      __pattern__ = r'^unmatchable$' -    __config__  = [("use_premium", "bool", "Use premium account if available"          , True), -                   ("fallback"   , "bool", "Fallback to free download if premium fails", True)] +    __config__  = [("use_premium"     , "bool", "Use premium account if available"          , True), +                   ("fallback_premium", "bool", "Fallback to free download if premium fails", True), +                   ("chk_filesize"    , "bool", "Check file size"                           , True)]      __description__ = """Simple hoster plugin"""      __license__     = "GPLv3" @@ -91,25 +85,43 @@ class SimpleHoster(Hoster):          LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download            example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"'      """ -    NAME_REPLACEMENTS = [("&#?\w+;", fixup)] -    SIZE_REPLACEMENTS = [] -    URL_REPLACEMENTS  = [] - -    FILE_ERRORS = [('Html error'   , r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)'), -                   ('Request error', r'([Aa]n error occured while processing your request)'                   ), -                   ('Html file'    , r'\A\s*<!DOCTYPE html'                                                   )] -    CHECK_FILE    = True   #: Set to False to not check the last downloaded file with declared error patterns -    CHECK_TRAFFIC = False  #: Set to True to reload checking traffic left for premium account -    COOKIES       = True   #: or False or list of tuples [(domain, name, value)] -    DIRECT_LINK   = None   #: Set to True to looking for direct link (as defined in handle_direct method), set to None to do it if self.account is True else False -    DISPOSITION   = True   #: Set to True to use any content-disposition value in http header as file name -    LOGIN_ACCOUNT = False  #: Set to True to require account login -    LOGIN_PREMIUM = False  #: Set to True to require premium account login -    LEECH_HOSTER  = False  #: Set to True to leech other hoster link (as defined in handle_multi method) -    TEXT_ENCODING = True   #: Set to encoding name if encoding value in http header is not correct - -    LINK_PATTERN = None +    NAME_REPLACEMENTS    = [] +    SIZE_REPLACEMENTS    = [] +    URL_REPLACEMENTS     = [] + +    CHECK_FILE           = True   #: Set to False to not check the last downloaded file with declared error patterns +    CHECK_TRAFFIC        = False  #: Set to True to reload checking traffic left for premium account +    COOKIES              = True   #: or False or list of tuples [(domain, name, value)] +    DIRECT_LINK          = None   #: Set to True to looking for direct link (as defined in handle_direct method), set to None to do it if self.account is True else False +    DISPOSITION          = True   #: Set to True to use any content-disposition value in http header as file name +    LOGIN_ACCOUNT        = False  #: Set to True to require account login +    LOGIN_PREMIUM        = False  #: Set to True to require premium account login +    LEECH_HOSTER         = False  #: Set to True to leech other hoster link (as defined in handle_multi method) +    TEXT_ENCODING        = True   #: Set to encoding name if encoding value in http header is not correct + +    LINK_PATTERN         = None +    LINK_FREE_PATTERN    = None +    LINK_PREMIUM_PATTERN = None + +    INFO_PATTERN         = None +    NAME_PATTERN         = None +    SIZE_PATTERN         = None +    HASHSUM_PATTERN      = None +    OFFLINE_PATTERN      = None +    TEMP_OFFLINE_PATTERN = None + +    WAIT_PATTERN         = None +    PREMIUM_ONLY_PATTERN = None +    HAPPY_HOUR_PATTERN   = None +    IP_BLOCKED_PATTERN   = None +    DL_LIMIT_PATTERN     = None +    SIZE_LIMIT_PATTERN   = None +    ERROR_PATTERN        = None + +    FILE_ERRORS          = [('Html error'   , r'\A(?:\s*<.+>)?((?:[\w\s]*(?:[Ee]rror|ERROR)\s*\:?)?\s*\d{3})(?:\Z|\s+)'), +                            ('Request error', r'([Aa]n error occured while processing your request)'                   ), +                            ('Html file'    , r'\A\s*<!DOCTYPE html'                                                   )]      @classmethod @@ -150,10 +162,10 @@ class SimpleHoster(Hoster):                      pass          if html: -            if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html): +            if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html):                  info['status'] = 1 -            elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html): +            elif cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html):                  info['status'] = 6              else: @@ -165,7 +177,7 @@ class SimpleHoster(Hoster):                          if all(True for k in pdict if k not in info['pattern']):                              info['pattern'].update(pdict) -                    except AttributeError: +                    except Exception:                          continue                      else: @@ -175,8 +187,8 @@ class SimpleHoster(Hoster):              info['status'] = 2              if 'N' in info['pattern']: -                info['name'] = replace_patterns(fixurl(info['pattern']['N']), -                                                cls.NAME_REPLACEMENTS) +                name = replace_patterns(info['pattern']['N'], cls.NAME_REPLACEMENTS) +                info['name'] = parse_name(name)              if 'S' in info['pattern']:                  size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], @@ -202,19 +214,12 @@ class SimpleHoster(Hoster):      def prepare(self): -        self.pyfile.error  = ""  #@TODO: Remove in 0.4.10 -        self.html          = ""  #@TODO: Recheck in 0.4.10 -        self.link          = ""  #@TODO: Recheck in 0.4.10 -        self.last_download = "" -        self.direct_dl     = False -        self.leech_dl      = False - -        if not self.get_config('use_premium', True): -            self.restart(nopremium=True) +        self.link      = "" +        self.direct_dl = False +        self.leech_dl  = False          if self.LOGIN_PREMIUM and not self.premium:              self.fail(_("Required premium account not found")) -            self.LOGIN_ACCOUNT = True          if self.LOGIN_ACCOUNT and not self.account:              self.fail(_("Required account not found")) @@ -222,10 +227,10 @@ class SimpleHoster(Hoster):          self.req.setOption("timeout", 120)          if self.LINK_PATTERN: -            if not hasattr(self, 'LINK_FREE_PATTERN'): +            if self.LINK_FREE_PATTERN is None:                  self.LINK_FREE_PATTERN = self.LINK_PATTERN -            if not hasattr(self, 'LINK_PREMIUM_PATTERN'): +            if self.LINK_PREMIUM_PATTERN is None:                  self.LINK_PREMIUM_PATTERN = self.LINK_PATTERN          if (self.LEECH_HOSTER @@ -253,95 +258,72 @@ class SimpleHoster(Hoster):      def process(self, pyfile): -        try: -            self.prepare() -            self.check_info()  #@TODO: Remove in 0.4.10 - -            if self.leech_dl: -                self.log_info(_("Processing as debrid download...")) -                self.handle_multi(pyfile) - -                if not self.link and not was_downloaded(): -                    self.log_info(_("Failed to leech url")) +        self.prepare() +        self.check_info()  #@TODO: Remove in 0.4.10 -            else: -                if not self.link and self.direct_dl and not self.last_download: -                    self.log_info(_("Looking for direct download link...")) -                    self.handle_direct(pyfile) +        if self.leech_dl: +            self.log_info(_("Processing as debrid download...")) +            self.handle_multi(pyfile) -                    if self.link or self.last_download: -                        self.log_info(_("Direct download link detected")) -                    else: -                        self.log_info(_("Direct download link not found")) +            if not self.link and not was_downloaded(): +                self.log_info(_("Failed to leech url")) -                if not self.link and not self.last_download: -                    self.preload() +        else: +            if not self.link and self.direct_dl and not self.last_download: +                self.log_info(_("Looking for direct download link...")) +                self.handle_direct(pyfile) -                    if 'status' not in self.info or self.info['status'] is 3:  #@TODO: Recheck in 0.4.10 -                        self.check_info() +                if self.link or self.last_download: +                    self.log_info(_("Direct download link detected")) +                else: +                    self.log_info(_("Direct download link not found")) -                    if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic_left()): -                        self.log_info(_("Processing as premium download...")) -                        self.handle_premium(pyfile) +            if not self.link and not self.last_download: +                self.preload() -                    elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.check_traffic_left()): -                        self.log_info(_("Processing as free download...")) -                        self.handle_free(pyfile) +                if 'status' not in self.info or self.info['status'] is 3:  #@TODO: Recheck in 0.4.10 +                    self.check_info() -            if not self.last_download: -                self.log_info(_("Downloading file...")) -                self.download(self.link, disposition=self.DISPOSITION) +                if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic()): +                    self.log_info(_("Processing as premium download...")) +                    self.handle_premium(pyfile) -            self.check_file() +                elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.check_traffic()): +                    self.log_info(_("Processing as free download...")) +                    self.handle_free(pyfile) -        except Fail, e:  #@TODO: Move to PluginThread in 0.4.10 -            if self.get_config('fallback', True) and self.premium: -                self.log_warning(_("Premium download failed"), e) -                self.restart(nopremium=True) +        if not self.last_download: +            self.log_info(_("Downloading file...")) +            self.download(self.link, disposition=self.DISPOSITION) -            else: -                raise Fail(encode(e))  #@TODO: Remove `encode` in 0.4.10 +        self.check_download() -    def check_file(self): -        self.log_info(_("Checking file...")) +    def check_download(self): +        self.log_info(_("Checking downloaded file...")) +        self.log_debug("Using default check rules...") +        for r, p in self.FILE_ERRORS: +            errmsg = self.check_file({r: re.compile(p)}) +            if errmsg is not None: +                errmsg = errmsg.strip().capitalize() -        if self.captcha.task and not self.last_download: -            self.captcha.invalid() -            self.retry(10, reason=_("Wrong captcha")) +                try: +                    errmsg += " | " + self.last_check.group(1).strip() -        # 10485760 is 10MB, tolerance is used when comparing displayed size on the hoster website to real size -        # For example displayed size can be 1.46GB for example, but real size can be 1.4649853GB -        elif self.check_download({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}, -                                 file_size=self.info['size'] if 'size' in self.info else 0, -                                 size_tolerance=10485760, -                                 delete=True): -            self.error(_("Empty file")) +                except Exception: +                    pass +                self.log_warning(_("Check result: ") + errmsg, _("Waiting 1 minute and retry")) +                self.wait(60, reconnect=True) +                self.restart(errmsg, premium=True)          else: -            self.log_debug("Using default check rules...") -            for r, p in self.FILE_ERRORS: -                errmsg = self.check_download({r: re.compile(p)}) -                if errmsg is not None: -                    errmsg = errmsg.strip().capitalize() - -                    try: -                        errmsg += " | " + self.last_check.group(1).strip() -                    except Exception: -                        pass - -                    self.log_warning(_("Check result: ") + errmsg, _("Waiting 1 minute and retry")) -                    self.wantReconnect = True -                    self.retry(wait_time=60, reason=errmsg) -            else: -                if self.CHECK_FILE: -                    self.log_debug("Using custom check rules...") -                    with open(fs_encode(self.last_download), "rb") as f: -                        self.html = f.read(1048576)  #@TODO: Recheck in 0.4.10 -                    self.check_errors() +            if self.CHECK_FILE: +                self.log_debug("Using custom check rules...") +                with open(fs_encode(self.last_download), "rb") as f: +                    self.html = f.read(1048576)  #@TODO: Recheck in 0.4.10 +                self.check_errors()          self.log_info(_("No errors found")) -        self.pyfile.error = ""      def check_errors(self): @@ -349,65 +331,56 @@ class SimpleHoster(Hoster):              self.log_warning(_("No html code to check"))              return -        if hasattr(self, 'IP_BLOCKED_PATTERN') and re.search(self.IP_BLOCKED_PATTERN, self.html): +        if self.IP_BLOCKED_PATTERN and re.search(self.IP_BLOCKED_PATTERN, self.html):              self.fail(_("Connection from your current IP address is not allowed"))          elif not self.premium: -            if hasattr(self, 'PREMIUM_ONLY_PATTERN') and re.search(self.PREMIUM_ONLY_PATTERN, self.html): +            if self.PREMIUM_ONLY_PATTERN and re.search(self.PREMIUM_ONLY_PATTERN, self.html):                  self.fail(_("File can be downloaded by premium users only")) -            elif hasattr(self, 'SIZE_LIMIT_PATTERN') and re.search(self.SIZE_LIMIT_PATTERN, self.html): +            elif self.SIZE_LIMIT_PATTERN and re.search(self.SIZE_LIMIT_PATTERN, self.html):                  self.fail(_("File too large for free download")) -            elif hasattr(self, 'DL_LIMIT_PATTERN') and re.search(self.DL_LIMIT_PATTERN, self.html): +            elif self.DL_LIMIT_PATTERN and re.search(self.DL_LIMIT_PATTERN, self.html):                  m = re.search(self.DL_LIMIT_PATTERN, self.html)                  try:                      errmsg = m.group(1).strip() -                except Exception: + +                except (AttributeError, IndexError):                      errmsg = m.group(0).strip()                  self.info['error'] = re.sub(r'<.*?>', " ", errmsg)                  self.log_warning(self.info['error']) -                if re.search('da(il)?y|today', errmsg, re.I): -                    wait_time = seconds_to_midnight(gmt=2) -                else: -                    wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in -                                re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) +                wait_time = parse_time(errmsg) +                self.wait(wait_time, reconnect=wait_time > 300) +                self.restart(_("Download limit exceeded"), premium=True) -                self.wantReconnect = wait_time > 300 -                self.retry(1, wait_time, _("Download limit exceeded")) - -        if hasattr(self, 'HAPPY_HOUR_PATTERN') and re.search(self.HAPPY_HOUR_PATTERN, self.html): +        if self.HAPPY_HOUR_PATTERN and re.search(self.HAPPY_HOUR_PATTERN, self.html):              self.multiDL = True -        if hasattr(self, 'ERROR_PATTERN'): +        if self.ERROR_PATTERN:              m = re.search(self.ERROR_PATTERN, self.html) -            if m: +            if m is not None:                  try:                      errmsg = m.group(1).strip() -                except Exception: + +                except (AttributeError, IndexError):                      errmsg = m.group(0).strip()                  self.info['error'] = re.sub(r'<.*?>', " ", errmsg)                  self.log_warning(self.info['error'])                  if re.search('limit|wait|slot', errmsg, re.I): -                    if re.search("da(il)?y|today", errmsg): -                        wait_time = seconds_to_midnight(gmt=2) -                    else: -                        wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in -                                    re.findall(r'(\d+)\s*(hr|hour|min|sec|)', errmsg, re.I)) - -                    self.wantReconnect = wait_time > 300 -                    self.retry(1, wait_time, _("Download limit exceeded")) +                    wait_time = parse_time(errmsg) +                    self.wait(wait_time, reconnect=wait_time > 300) +                    self.restart(_("Download limit exceeded"), premium=True)                  elif re.search('country|ip|region|nation', errmsg, re.I):                      self.fail(_("Connection from your current IP address is not allowed"))                  elif re.search('captcha|code', errmsg, re.I): -                    self.captcha.invalid() -                    self.retry(10, reason=_("Wrong captcha")) +                    self.retry_captcha()                  elif re.search('countdown|expired', errmsg, re.I):                      self.retry(10, 60, _("Link expired")) @@ -422,28 +395,26 @@ class SimpleHoster(Hoster):                      self.offline()                  elif re.search('filename', errmsg, re.I): -                    url_p = urlparse.urlparse(self.pyfile.url) -                    self.pyfile.url = "%s://%s/%s" % (url_p.scheme, url_p.netloc, url_p.path.split('/')[0]) -                    self.retry(1, reason=_("Wrong url")) +                    self.fail(_("Invalid url"))                  elif re.search('premium', errmsg, re.I):                      self.fail(_("File can be downloaded by premium users only"))                  else: -                    self.wantReconnect = True -                    self.retry(wait_time=60, reason=errmsg) +                    self.wait(60, reconnect=True) +                    self.restart(errmsg, premium=True) -        elif hasattr(self, 'WAIT_PATTERN'): +        elif self.WAIT_PATTERN:              m = re.search(self.WAIT_PATTERN, self.html) -            if m: +            if m is not None:                  try:                      waitmsg = m.group(1).strip() -                except Exception: + +                except (AttributeError, IndexError):                      waitmsg = m.group(0).strip() -                wait_time = sum(int(v) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, "": 1}[u.lower()] for v, u in -                                re.findall(r'(\d+)\s*(hr|hour|min|sec|)', waitmsg, re.I)) -                self.wait(wait_time, wait_time > 300) +                wait_time = parse_time(waitmsg) +                self.wait(wait_time, econnect=wait_time > 300)          self.info.pop('error', None) @@ -457,22 +428,19 @@ class SimpleHoster(Hoster):              self.log_debug("Previous file info: %s" % old_info)          try: -            status = self.info['status'] or None +            status = self.info['status'] or 14 -            if status == 1: +            if status is 1:                  self.offline() -            elif status == 6: +            elif status is 6:                  self.temp_offline() -            elif status == 8: -                if 'error' in self.info: -                    self.fail(self.info['error']) -                else: -                    self.fail(_("File status: " + statusMap[status])) +            elif status is 8: +                self.fail()          finally: -            self.log_info(_("File status: ") + (statusMap[status] if status else _("Unknown"))) +            self.log_info(_("File status: ") + self.pyfile.getStatusName())      def check_name_size(self, getinfo=True): @@ -484,8 +452,8 @@ class SimpleHoster(Hoster):              self.log_debug("Previous file info: %s" % old_info)          try: -            url  = self.info['url'].strip() -            name = self.info['name'].strip() +            url  = self.info['url'] +            name = self.info['name']          except KeyError:              pass @@ -494,7 +462,7 @@ class SimpleHoster(Hoster):              if name and name is not url:                  self.pyfile.name = name -        if 'size' in self.info and self.info['size'] > 0: +        if self.info.get('size') > 0:              self.pyfile.size = int(self.info['size'])  #@TODO: Fix int conversion in 0.4.10          # self.pyfile.sync() @@ -535,7 +503,7 @@ class SimpleHoster(Hoster):      def handle_free(self, pyfile): -        if not hasattr(self, 'LINK_FREE_PATTERN'): +        if not self.LINK_FREE_PATTERN:              self.log_error(_("Free download not implemented"))          m = re.search(self.LINK_FREE_PATTERN, self.html) @@ -546,10 +514,9 @@ class SimpleHoster(Hoster):      def handle_premium(self, pyfile): -        if not hasattr(self, 'LINK_PREMIUM_PATTERN'): +        if not self.LINK_PREMIUM_PATTERN:              self.log_error(_("Premium download not implemented")) -            self.log_info(_("Processing as free download...")) -            self.handle_free(pyfile) +            self.restart()          m = re.search(self.LINK_PREMIUM_PATTERN, self.html)          if m is None: | 
