summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-01 18:10:59 +0100
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-01 18:23:43 +0100
commit7ef266b8c5681236111fe3e71acbb7eaeefd543f (patch)
treef6d457dee9122155dad3606404abf0c27dd90730 /module
parent[DeadCrypter][DeadHoster] Improve getInfo (diff)
downloadpyload-7ef266b8c5681236111fe3e71acbb7eaeefd543f.tar.xz
[SimpleHoster] Fix some stuff, improve others
Diffstat (limited to 'module')
-rw-r--r--module/plugins/internal/SimpleHoster.py123
1 files changed, 83 insertions, 40 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py
index f741cd691..736d15f3a 100644
--- a/module/plugins/internal/SimpleHoster.py
+++ b/module/plugins/internal/SimpleHoster.py
@@ -4,7 +4,7 @@ import re
from time import time
from urllib import unquote
-from urlparse import urlparse
+from urlparse import urljoin, urlparse
from module.PyFile import statusMap as _statusMap
from module.network.CookieJar import CookieJar
@@ -33,7 +33,7 @@ def _error(self, reason, type):
#@TODO: Remove in 0.4.10
def _wait(self, seconds, reconnect):
if seconds:
- self.setWait(seconds)
+ self.setWait(seconds + 1)
if reconnect is not None:
self.wantReconnect = reconnect
@@ -114,25 +114,35 @@ def timestamp():
#@TODO: Move to hoster class in 0.4.10
-def _getDirectLink(self, url):
+def _isDirectLink(self, url, resumable=True):
header = self.load(url, ref=True, just_header=True, decode=True)
- if not 'code' in header or header['code'] != 302:
- return ""
-
if not 'location' in header or not header['location']:
return ""
- # if 'content-type' in header and "text/plain" not in header['content-type']:
- # return ""
+ location = header['location']
+
+ if resumable: #: sometimes http code may be wrong...
+ if self.load(location, ref=True, cookies=True, just_header=True, decode=True)['location']:
+ return ""
+ else:
+ if not 'code' in header or header['code'] != 302:
+ return ""
- return header['location']
+ if urlparse(location).scheme:
+ link = location
+ else:
+ p = urlparse(url)
+ base = "%s://%s" % (p.scheme, p.netloc)
+ link = urljoin(base, location)
+
+ return link
class SimpleHoster(Hoster):
__name__ = "SimpleHoster"
__type__ = "hoster"
- __version__ = "0.68"
+ __version__ = "0.69"
__pattern__ = r'^unmatchable$'
@@ -154,6 +164,9 @@ class SimpleHoster(Hoster):
SIZE_PATTERN: (optional) Size that will be checked for the file
example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'
+ HASHSUM_PATTERN: (optional) Hash code and type of the file
+ example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'
+
OFFLINE_PATTERN: (optional) Check if the file is yet available online
example: OFFLINE_PATTERN = r'File (deleted|not found)'
@@ -189,9 +202,9 @@ class SimpleHoster(Hoster):
TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct
COOKIES = True #: or False or list of tuples [(domain, name, value)]
FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account
- CHECK_DIRECT_LINK = None #: when None self-set to True if self.account else False
- MULTI_HOSTER = False #: Set to True to leech other hoster link
- CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value in http header
+ CHECK_DIRECT_LINK = None #: Set to True to check for direct link, set to None to do it only if self.account is True
+ MULTI_HOSTER = False #: Set to True to leech other hoster link (according its multihoster hook if available)
+ CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value from http header
@classmethod
@@ -203,14 +216,32 @@ class SimpleHoster(Hoster):
@classmethod
def getInfo(cls, url="", html=""):
- info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""}
+ info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}
if not html:
- if url:
- html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
- if isinstance(cls.TEXT_ENCODING, basestring):
- html = unicode(html, cls.TEXT_ENCODING)
- else:
+ try:
+ if not url:
+ info['error'] = "missing url"
+ info['status'] = 1
+ raise
+
+ try:
+ html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
+
+ if isinstance(cls.TEXT_ENCODING, basestring):
+ html = unicode(html, cls.TEXT_ENCODING)
+
+ except BadHeader, e:
+ info['error'] = "%d: %s" % (e.code, e.content)
+
+ if e.code is 404:
+ info['status'] = 1
+ raise
+
+ if e.code is 503:
+ info['status'] = 6
+ raise
+ except:
return info
online = False
@@ -226,19 +257,20 @@ class SimpleHoster(Hoster):
else:
try:
- info.update(re.match(cls.__pattern__, url).groupdict())
+ info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api']
except:
pass
for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
"FILE_NAME_PATTERN", "NAME_PATTERN",
- "FILE_SIZE_PATTERN", "SIZE_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
+ "FILE_SIZE_PATTERN", "SIZE_PATTERN",
+ "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
try:
attr = getattr(cls, pattern)
dict = re.search(attr, html).groupdict()
- if all(True for k in dict if k not in info):
- info.update(dict)
+ if all(True for k in dict if k not in info['pattern']):
+ info['pattern'].update(dict)
except AttributeError:
continue
@@ -249,12 +281,12 @@ class SimpleHoster(Hoster):
if online:
info['status'] = 2
- if 'N' in info:
- info['name'] = replace_patterns(unquote(info['N'].strip()),
+ if 'N' in info['pattern']:
+ info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10
- if 'S' in info:
- size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'],
+ if 'S' in info['pattern']:
+ size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'],
cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
info['size'] = parseFileSize(size)
@@ -262,6 +294,10 @@ class SimpleHoster(Hoster):
unit = info['units'] if 'units' in info else None
info['size'] = parseFileSize(info['size'], unit)
+ if 'H' in info['pattern']:
+ hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
+ info[hashtype] = info['pattern']['H']
+
return info
@@ -349,15 +385,20 @@ class SimpleHoster(Hoster):
if self.link:
self.download(self.link, disposition=self.CONTENT_DISPOSITION)
+ self.checkFile()
+
+
+ def checkFile(self)
+ if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10
+ self.fail(_("Empty file"))
+
def checkErrors(self):
- if hasattr(self, 'WAIT_PATTERN'):
- m = re.search(self.WAIT_PATTERN, self.html)
+ if hasattr(self, 'ERROR_PATTERN'):
+ m = re.search(self.ERROR_PATTERN, self.html)
if m:
- wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
- re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
- self.wait(wait_time, False)
- return
+ e = self.info['error'] = m.group(1)
+ self.error(e)
if hasattr(self, 'PREMIUM_ONLY_PATTERN'):
m = re.search(self.PREMIUM_ONLY_PATTERN, self.html)
@@ -365,11 +406,13 @@ class SimpleHoster(Hoster):
self.info['error'] = "premium-only"
return
- if hasattr(self, 'ERROR_PATTERN'):
- m = re.search(self.ERROR_PATTERN, self.html)
+ if hasattr(self, 'WAIT_PATTERN'):
+ m = re.search(self.WAIT_PATTERN, self.html)
if m:
- e = self.info['error'] = m.group(1)
- self.error(e)
+ wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
+ re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
+ self.wait(wait_time, False)
+ return
self.info.pop('error', None)
@@ -431,7 +474,7 @@ class SimpleHoster(Hoster):
def handleDirect(self):
- link = _getDirectLink(self, self.pyfile.url)
+ link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)
if link:
self.logInfo(_("Direct download link detected"))
@@ -460,7 +503,7 @@ class SimpleHoster(Hoster):
self.link = m.group(1)
except Exception, e:
- self.fail(str(e))
+ self.fail(e)
def handlePremium(self):
@@ -475,7 +518,7 @@ class SimpleHoster(Hoster):
self.link = m.group(1)
except Exception, e:
- self.fail(str(e))
+ self.fail(e)
def longWait(self, wait_time=None, max_tries=3):