summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-02 00:08:42 +0100
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-02 00:08:42 +0100
commit2a8d0af88b07e62eeb316a8dfbb7cd8d7dbfdcac (patch)
tree4324ec7c224724156ee9897e8689135f85e2e804 /module/plugins/internal
parentMerge pull request #920 from chris-19/stable (diff)
parent[SimpleHoster] Force _isDirectLink to old style (diff)
downloadpyload-2a8d0af88b07e62eeb316a8dfbb7cd8d7dbfdcac.tar.xz
Merge branch 'stable-next' into stable
Diffstat (limited to 'module/plugins/internal')
-rw-r--r--module/plugins/internal/DeadCrypter.py5
-rw-r--r--module/plugins/internal/DeadHoster.py5
-rw-r--r--module/plugins/internal/SimpleHoster.py126
-rw-r--r--module/plugins/internal/XFSAccount.py8
-rw-r--r--module/plugins/internal/XFSHoster.py20
5 files changed, 106 insertions, 58 deletions
diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py
index c721c8390..07c5c3881 100644
--- a/module/plugins/internal/DeadCrypter.py
+++ b/module/plugins/internal/DeadCrypter.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+from urllib import unquote
from urlparse import urlparse
from module.plugins.internal.SimpleCrypter import create_getInfo
@@ -9,7 +10,7 @@ from module.plugins.Crypter import Crypter as _Crypter
class DeadCrypter(_Crypter):
__name__ = "DeadCrypter"
__type__ = "crypter"
- __version__ = "0.03"
+ __version__ = "0.04"
__pattern__ = r'^unmatchable$'
@@ -20,7 +21,7 @@ class DeadCrypter(_Crypter):
@classmethod
def getInfo(cls, url="", html=""):
- return {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url or ""}
+ return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url}
def setup(self):
diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py
index b85aea3f9..6f3252f70 100644
--- a/module/plugins/internal/DeadHoster.py
+++ b/module/plugins/internal/DeadHoster.py
@@ -1,5 +1,6 @@
# -*- coding: utf-8 -*-
+from urllib import unquote
from urlparse import urlparse
from module.plugins.internal.SimpleHoster import create_getInfo
@@ -9,7 +10,7 @@ from module.plugins.Hoster import Hoster as _Hoster
class DeadHoster(_Hoster):
__name__ = "DeadHoster"
__type__ = "hoster"
- __version__ = "0.13"
+ __version__ = "0.14"
__pattern__ = r'^unmatchable$'
@@ -20,7 +21,7 @@ class DeadHoster(_Hoster):
@classmethod
def getInfo(cls, url="", html=""):
- return {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url or ""}
+ return {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 1, 'url': url}
def setup(self):
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py
index 24a2fa6b0..ba718950d 100644
--- a/module/plugins/internal/SimpleHoster.py
+++ b/module/plugins/internal/SimpleHoster.py
@@ -3,7 +3,8 @@
import re
from time import time
-from urlparse import urlparse
+from urllib import unquote
+from urlparse import urljoin, urlparse
from module.PyFile import statusMap as _statusMap
from module.network.CookieJar import CookieJar
@@ -32,7 +33,7 @@ def _error(self, reason, type):
#@TODO: Remove in 0.4.10
def _wait(self, seconds, reconnect):
if seconds:
- self.setWait(seconds)
+ self.setWait(seconds + 1)
if reconnect is not None:
self.wantReconnect = reconnect
@@ -113,25 +114,37 @@ def timestamp():
#@TODO: Move to hoster class in 0.4.10
-def _getDirectLink(self, url):
+def _isDirectLink(self, url, resumable=True):
header = self.load(url, ref=True, just_header=True, decode=True)
- if not 'code' in header or header['code'] != 302:
- return ""
-
if not 'location' in header or not header['location']:
return ""
- # if 'content-type' in header and "text/plain" not in header['content-type']:
- # return ""
+ location = header['location']
+
+ resumable = False #@NOTE: Testing...
+
+ if resumable: #: sometimes http code may be wrong...
+ if 'location' in self.load(location, ref=True, cookies=True, just_header=True, decode=True):
+ return ""
+ else:
+ if not 'code' in header or header['code'] != 302:
+ return ""
+
+ if urlparse(location).scheme:
+ link = location
+ else:
+ p = urlparse(url)
+ base = "%s://%s" % (p.scheme, p.netloc)
+ link = urljoin(base, location)
- return header['location']
+ return link
class SimpleHoster(Hoster):
__name__ = "SimpleHoster"
__type__ = "hoster"
- __version__ = "0.67"
+ __version__ = "0.69"
__pattern__ = r'^unmatchable$'
@@ -153,6 +166,9 @@ class SimpleHoster(Hoster):
SIZE_PATTERN: (optional) Size that will be checked for the file
example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'
+ HASHSUM_PATTERN: (optional) Hash code and type of the file
+ example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'
+
OFFLINE_PATTERN: (optional) Check if the file is yet available online
example: OFFLINE_PATTERN = r'File (deleted|not found)'
@@ -188,9 +204,9 @@ class SimpleHoster(Hoster):
TEXT_ENCODING = False #: Set to True or encoding name if encoding value in http header is not correct
COOKIES = True #: or False or list of tuples [(domain, name, value)]
FORCE_CHECK_TRAFFIC = False #: Set to True to force checking traffic left for premium account
- CHECK_DIRECT_LINK = None #: when None self-set to True if self.account else False
- MULTI_HOSTER = False #: Set to True to leech other hoster link
- CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value in http header
+ CHECK_DIRECT_LINK = None #: Set to True to check for direct link, set to None to do it only if self.account is True
+ MULTI_HOSTER = False #: Set to True to leech other hoster link (according its multihoster hook if available)
+ CONTENT_DISPOSITION = False #: Set to True to replace file name with content-disposition value from http header
@classmethod
@@ -202,14 +218,32 @@ class SimpleHoster(Hoster):
@classmethod
def getInfo(cls, url="", html=""):
- info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""}
+ info = {'name': urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url}
if not html:
- if url:
- html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
- if isinstance(cls.TEXT_ENCODING, basestring):
- html = unicode(html, cls.TEXT_ENCODING)
- else:
+ try:
+ if not url:
+ info['error'] = "missing url"
+ info['status'] = 1
+ raise
+
+ try:
+ html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)
+
+ if isinstance(cls.TEXT_ENCODING, basestring):
+ html = unicode(html, cls.TEXT_ENCODING)
+
+ except BadHeader, e:
+ info['error'] = "%d: %s" % (e.code, e.content)
+
+ if e.code is 404:
+ info['status'] = 1
+ raise
+
+ if e.code is 503:
+ info['status'] = 6
+ raise
+ except:
return info
online = False
@@ -225,19 +259,20 @@ class SimpleHoster(Hoster):
else:
try:
- info.update(re.match(cls.__pattern__, url).groupdict())
+ info['pattern'] = re.match(cls.__pattern__, url).groupdict() #: pattern groups will be saved here, please save api stuff to info['api']
except:
pass
for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
"FILE_NAME_PATTERN", "NAME_PATTERN",
- "FILE_SIZE_PATTERN", "SIZE_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
+ "FILE_SIZE_PATTERN", "SIZE_PATTERN",
+ "HASHSUM_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
try:
attr = getattr(cls, pattern)
dict = re.search(attr, html).groupdict()
- if all(True for k in dict if k not in info):
- info.update(dict)
+ if all(True for k in dict if k not in info['pattern']):
+ info['pattern'].update(dict)
except AttributeError:
continue
@@ -248,12 +283,12 @@ class SimpleHoster(Hoster):
if online:
info['status'] = 2
- if 'N' in info:
- info['name'] = replace_patterns(info['N'].strip(),
+ if 'N' in info['pattern']:
+ info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS) #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10
- if 'S' in info:
- size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'],
+ if 'S' in info['pattern']:
+ size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info else info['pattern']['S'],
cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS) #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
info['size'] = parseFileSize(size)
@@ -261,6 +296,10 @@ class SimpleHoster(Hoster):
unit = info['units'] if 'units' in info else None
info['size'] = parseFileSize(info['size'], unit)
+ if 'H' in info['pattern']:
+ hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
+ info[hashtype] = info['pattern']['H']
+
return info
@@ -348,15 +387,20 @@ class SimpleHoster(Hoster):
if self.link:
self.download(self.link, disposition=self.CONTENT_DISPOSITION)
+ self.checkFile()
+
+
+ def checkFile(self):
+ if self.checkDownload({'empty': re.compile(r"^$")}) is "empty": #@TODO: Move to hoster in 0.4.10
+ self.fail(_("Empty file"))
+
def checkErrors(self):
- if hasattr(self, 'WAIT_PATTERN'):
- m = re.search(self.WAIT_PATTERN, self.html)
+ if hasattr(self, 'ERROR_PATTERN'):
+ m = re.search(self.ERROR_PATTERN, self.html)
if m:
- wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
- re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
- self.wait(wait_time, False)
- return
+ e = self.info['error'] = m.group(1)
+ self.error(e)
if hasattr(self, 'PREMIUM_ONLY_PATTERN'):
m = re.search(self.PREMIUM_ONLY_PATTERN, self.html)
@@ -364,11 +408,13 @@ class SimpleHoster(Hoster):
self.info['error'] = "premium-only"
return
- if hasattr(self, 'ERROR_PATTERN'):
- m = re.search(self.ERROR_PATTERN, self.html)
+ if hasattr(self, 'WAIT_PATTERN'):
+ m = re.search(self.WAIT_PATTERN, self.html)
if m:
- e = self.info['error'] = m.group(1)
- self.error(e)
+ wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
+ re.findall(r'(\d+)\s*(hr|hour|min|sec)', m, re.I)])
+ self.wait(wait_time, False)
+ return
self.info.pop('error', None)
@@ -430,7 +476,7 @@ class SimpleHoster(Hoster):
def handleDirect(self):
- link = _getDirectLink(self, self.pyfile.url)
+ link = _isDirectLink(self, self.pyfile.url, self.resumeDownload)
if link:
self.logInfo(_("Direct download link detected"))
@@ -459,7 +505,7 @@ class SimpleHoster(Hoster):
self.link = m.group(1)
except Exception, e:
- self.fail(str(e))
+ self.fail(e)
def handlePremium(self):
@@ -474,7 +520,7 @@ class SimpleHoster(Hoster):
self.link = m.group(1)
except Exception, e:
- self.fail(str(e))
+ self.fail(e)
def longWait(self, wait_time=None, max_tries=3):
diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py
index 5a265c08a..2094b1480 100644
--- a/module/plugins/internal/XFSAccount.py
+++ b/module/plugins/internal/XFSAccount.py
@@ -12,7 +12,7 @@ from module.plugins.internal.SimpleHoster import parseHtmlForm, set_cookies
class XFSAccount(Account):
__name__ = "XFSAccount"
__type__ = "account"
- __version__ = "0.31"
+ __version__ = "0.32"
__description__ = """XFileSharing account plugin"""
__license__ = "GPLv3"
@@ -27,15 +27,15 @@ class XFSAccount(Account):
PREMIUM_PATTERN = r'\(Premium only\)'
- VALID_UNTIL_PATTERN = r'>Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})'
+ VALID_UNTIL_PATTERN = r'Premium.[Aa]ccount expire:.*?(\d{1,2} [\w^_]+ \d{4})'
- TRAFFIC_LEFT_PATTERN = r'>Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>'
+ TRAFFIC_LEFT_PATTERN = r'Traffic available today:.*?<b>\s*(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>'
TRAFFIC_LEFT_UNIT = "MB" #: used only if no group <U> was found
LEECH_TRAFFIC_PATTERN = r'Leech Traffic left:<b>.*?(?P<S>[\d.,]+|[Uu]nlimited)\s*(?:(?P<U>[\w^_]+)\s*)?</b>'
LEECH_TRAFFIC_UNIT = "MB" #: used only if no group <U> was found
- LOGIN_FAIL_PATTERN = r'>(Incorrect Login or Password|Error<)'
+ LOGIN_FAIL_PATTERN = r'>\s*(Incorrect Login or Password|Error<)'
def __init__(self, manager, accounts): #@TODO: remove in 0.4.10
diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py
index 2aaf18b1a..c3db3f335 100644
--- a/module/plugins/internal/XFSHoster.py
+++ b/module/plugins/internal/XFSHoster.py
@@ -16,7 +16,7 @@ from module.utils import html_unescape
class XFSHoster(SimpleHoster):
__name__ = "XFSHoster"
__type__ = "hoster"
- __version__ = "0.26"
+ __version__ = "0.27"
__pattern__ = r'^unmatchable$'
@@ -35,7 +35,6 @@ class XFSHoster(SimpleHoster):
CHECK_DIRECT_LINK = None
MULTI_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy...
- INFO_PATTERN = r'<tr><td align=right><b>Filename:</b></td><td nowrap>(?P<N>[^<]+)</td></tr>\s*.*?<small>\((?P<S>[^<]+)\)</small>'
NAME_PATTERN = r'(>Filename:</b></td><td nowrap>|name="fname" value="|<span class="name">)(?P<N>.+?)(\s*<|")'
SIZE_PATTERN = r'(>Size:</b></td><td>|>File:.*>|<span class="size">)(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)'
@@ -49,10 +48,10 @@ class XFSHoster(SimpleHoster):
LEECH_LINK_PATTERN = r'<h2>Download Link</h2>\s*<textarea[^>]*>([^<]+)'
LINK_PATTERN = None #: final download url pattern
- CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)'
- CAPTCHA_DIV_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>'
- RECAPTCHA_PATTERN = None
- SOLVEMEDIA_PATTERN = None
+ CAPTCHA_PATTERN = r'(https?://[^"\']+?/captchas?/[^"\']+)'
+ CAPTCHA_BLOCK_PATTERN = r'>Enter code.*?<div.*?>(.+?)</div>'
+ RECAPTCHA_PATTERN = None
+ SOLVEMEDIA_PATTERN = None
FORM_PATTERN = None
FORM_INPUTS_MAP = None #: dict passed as input_names to parseHtmlForm
@@ -234,10 +233,10 @@ class XFSHoster(SimpleHoster):
retries = 3
else:
delay = 1 * 60 * 60
- retries = 25
+ retries = 24
- self.wait(delay, True)
- self.retry(retries, reason=_("Download limit exceeded"))
+ self.wantReconnect = True
+ self.retry(retries, delay, _("Download limit exceeded"))
elif 'countdown' in self.errmsg or 'Expired' in self.errmsg:
self.retry(reason=_("Link expired"))
@@ -249,6 +248,7 @@ class XFSHoster(SimpleHoster):
self.fail(_("File too large for free download"))
else:
+ self.wantReconnect = True
self.retry(wait_time=60, reason=self.errmsg)
if self.errmsg:
@@ -311,7 +311,7 @@ class XFSHoster(SimpleHoster):
inputs['code'] = self.decryptCaptcha(captcha_url)
return 1
- m = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
+ m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)
if m:
captcha_div = m.group(1)
numerals = re.findall(r'<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))