diff options
Diffstat (limited to 'module/plugins/internal/SimpleHoster.py')
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 68 |
1 files changed, 40 insertions, 28 deletions
diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 01702d423..24a2fa6b0 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -5,8 +5,6 @@ import re from time import time from urlparse import urlparse -from pycurl import FOLLOWLOCATION - from module.PyFile import statusMap as _statusMap from module.network.CookieJar import CookieJar from module.network.RequestFactory import getURL @@ -34,7 +32,11 @@ def _error(self, reason, type): #@TODO: Remove in 0.4.10 def _wait(self, seconds, reconnect): if seconds: - self.setWait(seconds, reconnect) + self.setWait(seconds) + + if reconnect is not None: + self.wantReconnect = reconnect + super(SimpleHoster, self).wait() @@ -57,12 +59,13 @@ def parseHtmlTagAttrValue(attr_name, tag): return m.group(2) if m else None -def parseHtmlForm(attr_str, html, input_names=None): - for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, +def parseHtmlForm(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, html, re.S | re.I): inputs = {} - action = parseHtmlTagAttrValue("action", form.group('tag')) - for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I): + action = parseHtmlTagAttrValue("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): name = parseHtmlTagAttrValue("name", inputtag.group(1)) if name: value = parseHtmlTagAttrValue("value", inputtag.group(1)) @@ -71,7 +74,7 @@ def parseHtmlForm(attr_str, html, input_names=None): else: inputs[name] = value - if isinstance(input_names, dict): + if input_names: # check input attributes for key, val in input_names.iteritems(): if key in inputs: @@ -111,23 +114,24 @@ def timestamp(): #@TODO: Move to hoster class in 0.4.10 def _getDirectLink(self, url): - self.req.http.c.setopt(FOLLOWLOCATION, 0) + header = self.load(url, ref=True, just_header=True, decode=True) - html = self.load(url, ref=True, decode=True) + if not 'code' in header or header['code'] != 302: + return "" - self.req.http.c.setopt(FOLLOWLOCATION, 1) + if not 'location' in header or not header['location']: + return "" - if self.getInfo(url, html)['status'] is not 2: - try: - return re.search(r'Location\s*:\s*(.+)', self.req.http.header, re.I).group(1).rstrip() #@TODO: Remove .rstrip() in 0.4.10 - except: - pass + # if 'content-type' in header and "text/plain" not in header['content-type']: + # return "" + + return header['location'] class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "0.61" + __version__ = "0.67" __pattern__ = r'^unmatchable$' @@ -198,7 +202,7 @@ class SimpleHoster(Hoster): @classmethod def getInfo(cls, url="", html=""): - info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3, 'url': url or ""} + info = {'name': urlparse(url).path.split('/')[-1] or _("Unknown"), 'size': 0, 'status': 3 if url else 1, 'url': url or ""} if not html: if url: @@ -225,13 +229,19 @@ class SimpleHoster(Hoster): except: pass - for pattern in ("INFO_PATTERN", "NAME_PATTERN", "SIZE_PATTERN", - "FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): #@TODO: Remove in 0.4.10 + for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN", + "FILE_NAME_PATTERN", "NAME_PATTERN", + "FILE_SIZE_PATTERN", "SIZE_PATTERN"): #@TODO: Remove old patterns starting with "FILE_" in 0.4.10 try: attr = getattr(cls, pattern) - info.update(re.search(attr, html).groupdict()) + dict = re.search(attr, html).groupdict() + + if all(True for k in dict if k not in info): + info.update(dict) + except AttributeError: continue + else: online = True @@ -316,8 +326,7 @@ class SimpleHoster(Hoster): premium_only = 'error' in self.info and self.info['error'] == "premium-only" - info = self.getInfo(pyfile.url, self.html) - self._updateInfo(info) + self._updateInfo(self.getInfo(pyfile.url, self.html)) self.checkNameSize() @@ -409,7 +418,9 @@ class SimpleHoster(Hoster): #: Deprecated def getFileInfo(self): - return self.checkInfo() + self.info = {} + self.checkInfo() + return self.info def _updateInfo(self, info): @@ -419,14 +430,15 @@ class SimpleHoster(Hoster): def handleDirect(self): - self.link = _getDirectLink(self, self.pyfile.url) + link = _getDirectLink(self, self.pyfile.url) - if self.link: + if link: self.logInfo(_("Direct download link detected")) + self.link = link + self._updateInfo(self.getInfo(self.pyfile.url)) self.checkNameSize() - else: self.logDebug(_("Direct download link not found")) @@ -480,7 +492,7 @@ class SimpleHoster(Hoster): self.retry(max_tries=max_tries, reason=_("Download limit reached")) - def parseHtmlForm(self, attr_str='', input_names=None): + def parseHtmlForm(self, attr_str="", input_names={}): return parseHtmlForm(attr_str, self.html, input_names) |