[SimpleHoster] Fix some stuff, improve others

author: Walter Purcaro <vuolter@gmail.com> 2014-12-01 18:10:59 +0100
committer: Walter Purcaro <vuolter@gmail.com> 2014-12-01 18:23:43 +0100
commit: 7ef266b8c5681236111fe3e71acbb7eaeefd543f (patch)
tree: f6d457dee9122155dad3606404abf0c27dd90730 /module
parent: [DeadCrypter][DeadHoster] Improve getInfo (diff)
download: pyload-7ef266b8c5681236111fe3e71acbb7eaeefd543f.tar.xz
1 files changed, 83 insertions, 40 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py
index f741cd691..736d15f3a 100644
--- a/module/plugins/internal/SimpleHoster.py
+++ b/module/plugins/internal/SimpleHoster.py
@@ -4,7 +4,7 @@ import re
 
 from time import time
 from urllib import unquote
-from urlparse import urlparse
+from urlparse import urljoin, urlparse
 
 from module.PyFile import statusMap as _statusMap
 from module.network.CookieJar import CookieJar
@@ -33,7 +33,7 @@ def _error(self, reason, type):
 #@TODO: Remove in 0.4.10
 def _wait(self, seconds, reconnect):
     if seconds:
-        self.setWait(seconds)
+        self.setWait(seconds + 1)
 
     if reconnect is not None:
         self.wantReconnect = reconnect
@@ -114,25 +114,35 @@ def timestamp():
 
 
 #@TODO: Move to hoster class in 0.4.10
-def _getDirectLink(self, url):
+def _isDirectLink(self, url, resumable=True):
     header = self.load(url, ref=True, just_header=True, decode=True)
 
-    if not 'code' in header or header['code'] != 302:
-        return ""
-
     if not 'location' in header or not header['location']:
         return ""
 
-    # if 'content-type' in header and "text/plain" not in header['content-type']:
-        # return ""
+    location = header['location']
+
+    if resumable:  #: sometimes http code may be wrong...
+        if self.load(location, ref=True, cookies=True, just_header=True, decode=True)['location']:
+            return ""
+    else:
+        if not 'code' in header or header['code'] != 302:
+            return ""
 
-    return header['location']
+    if urlparse(location).scheme:
+        link = location
+    else:
+        p = urlparse(url)
+        base = "%s://%s" % (p.scheme, p.netloc)
+        link = urljoin(base, location)
+
+    return link
 
 
 class SimpleHoster(Hoster):
     __name__    = "SimpleHoster"
     __type__    = "hoster"
-    __version__ = "0.68"
+    __version__ = "0.69"
 
     __pattern__ = r'^unmatchable$'
 
@@ -154,6 +164,9 @@ class SimpleHoster(Hoster):
         SIZE_PATTERN: (optional) Size that will be checked for the file
           example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'
 
+      HASHSUM_PATTERN: (optional) Hash code and type of the file
+        example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'
+
       OFFLINE_PATTERN: (optional) Check if the file is yet available online
         example: OFFLINE_PATTERN = r'File (deleted|not found)'
 
@@ -189,9 +202,9 @@ class SimpleHoster(Hoster):
     TEXT_ENCODING       = False  #: Set to True or encoding name if encoding value in http header is not correct
     COOKIES             = True   #: or False or list of tuples [(domain, name, value)]
     FORCE_CHECK_TRAFFIC = False  #: Set to True to force checking traffic left for premium account
-    CHECK_DIRECT_LINK   = None   #: when None self-set to True if self.account else False
-    MULTI_HOSTER        = False  #: Set to True to leech other hoster link
-    CONTENT_DISPOSITION = False  #: Set to True to replace file name with content-disposition value in http header
+    CHECK_DIRECT_LINK   = None   #: Set to True to check for direct link, set to None to do it only if self.account is True
+    MULTI_HOSTER        = False  #: Set to True to leech other hoster link (according its multihoster hook if available)
+    CONTENT_DISPOSITION = False  #: Set to True to replace file name with content-disposition value from http header
 
 
     @classmethod
@@ -203,14 +216,32 @@ class SimpleHoster(Hoster):
 
     @classmethod
     def getInfo(cls, url="", html=""):
-        info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""}
+        info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}
 
         if not html:
-            if url:
-                html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
-                if isinstance(cls.TEXT_ENCODING, basestring):
-                    html = unicode(html, cls.TEXT_ENCODING)
-            else:
+            try:
+                if not url:
+                    info['error']  = "missing url"
+                    info['status'] = 1
+                    raise
+
+                try:
+                    html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
+
+                    if isinstance(cls.TEXT_ENCODING, basestring):
+                        html = unicode(html, cls.TEXT_ENCODING)
+
+                except BadHeader, e:
+                    info['error'] = "%d: %s" % (e.code, e.content)
+
+                    if e.code is 404:
+                        info['status'] = 1
+                        raise
+
+                    if e.code is 503:
+                        info['status'] = 6
+                        raise
+            except:
                 return info
 
         online = False
@@ -226,19 +257,20 @@ class SimpleHoster(Hoster):
 
         else:
             try:
-                info.update(re.match(cls.__pattern__, url).groupdict())
+                info['pattern'] = re.match(cls.__pattern__, url).groupdict()  #: pattern groups will be saved here, please save api stuff to info['api']
             except:
                 pass
 
             for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
                             "FILE_NAME_PATTERN", "NAME_PATTERN",
-                            "FILE_SIZE_PATTERN", "SIZE_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
+                            "FILE_SIZE_PATTERN", "SIZE_PATTERN",
+                            "HASHSUM_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
                 try:
                     attr = getattr(cls, pattern)
                     dict = re.search(attr, html).groupdict()
 
-                    if all(True for k in dict if k not in info):
-                        info.update(dict)
+                    if all(True for k in dict if k not in info['pattern']):
+                        info['pattern'].update(dict)
 
                 except AttributeError:
                     continue
@@ -249,12 +281,12 @@ class SimpleHoster(Hoster):
         if online:
             info['status'] = 2
 
-            if 'N' in info:
-                info['name'] = replace_patterns(unquote(info['N'].strip()),
+            if 'N' in info['pattern']:
+                info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
                                                 cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS)  #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10
 
-            if 'S' in info:
-                size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'],
+            if 'S' in info['pattern']:
+                size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'],
                                         cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS)  #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
                 info['size'] = parseFileSize(size)
 
@@ -262,6 +294,10 @@ class SimpleHoster(Hoster):
                 unit = info['units'] if 'units' in info else None
                 info['size'] = parseFileSize(info['size'], unit)
 
+            if 'H' in info['pattern']:
+                hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
+                info[hashtype] = info['pattern']['H']
+
         return info
 
 
@@ -349,15 +385,20 @@ class SimpleHoster(Hoster):
         if self.link:
             self.download(self.link, disposition=self.CONTENT_DISPOSITION)
 
+        self.checkFile()
+
+
+    def checkFile(self)
+        if self.checkDownload({'empty': re.compile(r"^$")}) is "empty":  #@TODO: Move to hoster in 0.4.10
+            self.fail(_("Empty file"))
+
 
     def checkErrors(self):
-        if hasattr(self, 'WAIT_PATTERN'):
-            m = re.search(self.WAIT_PATTERN, self.html)
+        if hasattr(self, 'ERROR_PATTERN'):
+            m = re.search(self.ERROR_PATTERN, self.html)
             if m:
-                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
-                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
-                self.wait(wait_time, False)
-                return
+                e = self.info['error'] = m.group(1)
+                self.error(e)
 
         if hasattr(self, 'PREMIUM_ONLY_PATTERN'):
             m = re.search(self.PREMIUM_ONLY_PATTERN, self.html)
@@ -365,11 +406,13 @@ class SimpleHoster(Hoster):
                 self.info['error'] = "premium-only"
                 return
 
-        if hasattr(self, 'ERROR_PATTERN'):
-            m = re.search(self.ERROR_PATTERN, self.html)
+        if hasattr(self, 'WAIT_PATTERN'):
+            m = re.search(self.WAIT_PATTERN, self.html)
             if m:
-                e = self.info['error'] = m.group(1)
-                self.error(e)
+                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
+                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
+                self.wait(wait_time, False)
+                return
 
         self.info.pop('error', None)
 
@@ -431,7 +474,7 @@ class SimpleHoster(Hoster):
 
 
     def handleDirect(self):
-        link = _getDirectLink(self, self.pyfile.url)
+        link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)
 
         if link:
             self.logInfo(_("Direct download link detected"))
@@ -460,7 +503,7 @@ class SimpleHoster(Hoster):
             self.link = m.group(1)
 
         except Exception, e:
-            self.fail(str(e))
+            self.fail(e)
 
 
     def handlePremium(self):
@@ -475,7 +518,7 @@ class SimpleHoster(Hoster):
             self.link = m.group(1)
 
         except Exception, e:
-            self.fail(str(e))
+            self.fail(e)
 
 
     def longWait(self, wait_time=None, max_tries=3):
author	Walter Purcaro <vuolter@gmail.com>	2014-12-01 18:10:59 +0100
committer	Walter Purcaro <vuolter@gmail.com>	2014-12-01 18:23:43 +0100
commit	7ef266b8c5681236111fe3e71acbb7eaeefd543f (patch)
tree	f6d457dee9122155dad3606404abf0c27dd90730 /module
parent	[DeadCrypter][DeadHoster] Improve getInfo (diff)
download	pyload-7ef266b8c5681236111fe3e71acbb7eaeefd543f.tar.xz