diff options
author | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-09-21 14:01:21 +0200 |
---|---|---|
committer | Walter Purcaro <vuolter@users.noreply.github.com> | 2015-09-21 14:01:21 +0200 |
commit | af896302d7701e2bc63b1523b9ea73f38f5201c8 (patch) | |
tree | f4a49d87bf577f3c00d34e70de807f6a456292f9 /module/plugins/internal | |
parent | Merge branch 'pr/n1736_GammaC0de' into stable (diff) | |
download | pyload-af896302d7701e2bc63b1523b9ea73f38f5201c8.tar.xz |
Fix pyfile.name processing
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/Crypter.py | 15 | ||||
-rw-r--r-- | module/plugins/internal/Hoster.py | 45 | ||||
-rw-r--r-- | module/plugins/internal/Plugin.py | 30 | ||||
-rw-r--r-- | module/plugins/internal/SimpleCrypter.py | 1 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 20 |
5 files changed, 45 insertions, 66 deletions
diff --git a/module/plugins/internal/Crypter.py b/module/plugins/internal/Crypter.py index d0e8eb1b4..2033b67df 100644 --- a/module/plugins/internal/Crypter.py +++ b/module/plugins/internal/Crypter.py @@ -1,15 +1,13 @@ # -*- coding: utf-8 -*- -import urlparse - -from module.plugins.internal.Hoster import Hoster, _fixurl +from module.plugins.internal.Hoster import Hoster, parse_name from module.utils import save_path as safe_filename class Crypter(Hoster): __name__ = "Crypter" __type__ = "crypter" - __version__ = "0.07" + __version__ = "0.08" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -78,13 +76,14 @@ class Crypter(Hoster): "%d links" % len(links), "Saved to folder: %s" % folder if folder else "Saved to download folder") - pid = self.pyload.api.addPackage(name, map(self.fixurl, links), package_queue) + links = map(self.fixurl, links) + pid = self.pyload.api.addPackage(name, links, package_queue) if package_password: self.pyload.api.setPackageData(pid, {'password': package_password}) #: Workaround to do not break API addPackage method - set_folder = lambda x: self.pyload.api.setPackageData(pid, {'folder': x or ""}) + set_folder = lambda x: self.pyload.api.setPackageData(pid, {'folder': safe_filename(x) or ""}) if use_subfolder: if not subfolder_per_package: @@ -93,9 +92,9 @@ class Crypter(Hoster): elif not folder_per_package or name is not folder: if not folder: - folder = urlparse.urlparse(_fixurl(name)).path.split("/")[-1] + folder = parse_name(name) - set_folder(safe_filename(folder)) + set_folder(folder) self.log_debug("Set package %(name)s folder to: %(folder)s" % {'name': name, 'folder': folder}) elif folder_per_package: diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 3449c8128..8457420c1 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -12,16 +12,17 @@ import urlparse from module.plugins.internal.Captcha import Captcha from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip, - chunks, encode, exists, fixurl as _fixurl, replace_patterns, - seconds_to_midnight, set_cookie, set_cookies, parse_html_form, - parse_html_tag_attr_value, timestamp) + chunks, encode, exists, parse_html_form, + parse_html_tag_attr_value, parse_name, + replace_patterns, seconds_to_midnight, + set_cookie, set_cookies, timestamp) from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename #@TODO: Remove in 0.4.10 def parse_fileInfo(klass, url="", html=""): info = klass.get_info(url, html) - return info['name'], info['size'], info['status'], info['url'] + return encode(info['name']), info['size'], info['status'], info['url'] #@TODO: Remove in 0.4.10 @@ -44,7 +45,7 @@ def create_getInfo(klass): class Hoster(Plugin): __name__ = "Hoster" __type__ = "hoster" - __version__ = "0.23" + __version__ = "0.24" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -117,13 +118,9 @@ class Hoster(Plugin): @classmethod def get_info(cls, url="", html=""): - url = _fixurl(url) - url_p = urlparse.urlparse(url) - return {'name' : (url_p.path.split('/')[-1] or - url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or - url_p.netloc.split('.', 1)[0]), + return {'name' : parse_name(url), 'size' : 0, - 'status': 3 if url else 8, + 'status': 3 if url.strip() else 8, 'url' : url} @@ -370,11 +367,12 @@ class Hoster(Plugin): raise Retry(encode(msg)) #@TODO: Remove `encode` in 0.4.10 - def fixurl(self, url): - url = _fixurl(url) + def fixurl(self, url, baseurl=None): + if not baseurl: + baseurl = self.pyfile.url if not urlparse.urlparse(url).scheme: - url_p = urlparse.urlparse(self.pyfile.url) + url_p = urlparse.urlparse(baseurl) baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) url = urlparse.urljoin(baseurl, url) @@ -401,17 +399,11 @@ class Hoster(Plugin): """ self.check_abort() - url = self.fixurl(url) - - if not url or not isinstance(url, basestring): - self.fail(_("No given url")) - if self.pyload.debug: self.log_debug("DOWNLOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) - name = _fixurl(self.pyfile.name) - self.pyfile.name = urlparse.urlparse(name).path.split('/')[-1] or name + self.pyfile.name = parse_name(self.pyfile.name) #: Safe check self.captcha.correct() self.check_for_same_files() @@ -445,7 +437,7 @@ class Hoster(Plugin): #@TODO: Recheck in 0.4.10 if disposition and newname: - finalname = urlparse.urlparse(newname).path.split('/')[-1].split(' filename*=')[0] + finalname = parse_name(newname).split(' filename*=')[0] if finalname != newname: try: @@ -609,12 +601,7 @@ class Hoster(Plugin): link = url elif 'location' in header and header['location']: - location = header['location'] - - if not urlparse.urlparse(location).scheme: - url_p = urlparse.urlparse(url) - baseurl = "%s://%s" % (url_p.scheme, url_p.netloc) - location = urlparse.urljoin(baseurl, location) + location = self.fixurl(header['location'], url) if 'code' in header and header['code'] == 302: link = location @@ -624,7 +611,7 @@ class Hoster(Plugin): continue else: - extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1] + extension = os.path.splitext(parse_name(url))[-1] if 'content-type' in header and header['content-type']: mimetype = header['content-type'].split(';')[0].strip() diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py index e4c16846c..9494d34a6 100644 --- a/module/plugins/internal/Plugin.py +++ b/module/plugins/internal/Plugin.py @@ -7,8 +7,9 @@ import inspect import os import re import sys -import urllib import unicodedata +import urllib +import urlparse if os.name != "nt": import grp @@ -49,17 +50,19 @@ def exists(path): #@TODO: Move to utils in 0.4.10 -def fixurl(url): - return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip().rstrip('/') +def parse_name(url): + url = urllib.unquote(url) + url = url.decode('unicode-escape') + url = html_unescape(url) + url = urllib.quote(url) + + url_p = urlparse.urlparse(url.strip().rstrip('/')) + name = (url_p.path.split('/')[-1] or + url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or + url_p.netloc.split('.', 1)[0]) -def fixname(m): - m = unicodedata.normalize('NFKD', m) - output = '' - for c in m: - if not unicodedata.combining(c): - output += c - return output + return urllib.unquote(name) #@TODO: Move to utils in 0.4.10 @@ -177,7 +180,7 @@ def chunks(iterable, size): class Plugin(object): __name__ = "Plugin" __type__ = "plugin" - __version__ = "0.33" + __version__ = "0.34" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -342,11 +345,6 @@ class Plugin(object): :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ - url = fixurl(url) - - if not url or not isinstance(url, basestring): - self.fail(_("No given url")) - if self.pyload.debug: self.log_debug("LOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")]) diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index 488578bc0..3771956bc 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -4,7 +4,6 @@ import re from module.plugins.internal.Crypter import Crypter from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, replace_patterns, set_cookie, set_cookies -from module.utils import fixup, html_unescape class SimpleCrypter(Crypter, SimpleHoster): diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 19263dd8f..7c47d3620 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -5,13 +5,12 @@ from __future__ import with_statement import os import re import time -import urlparse from module.PyFile import statusMap as _statusMap from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL as get_url from module.plugins.internal.Hoster import Hoster, create_getInfo, parse_fileInfo -from module.plugins.internal.Plugin import Fail, encode, fixurl, fixname, replace_patterns, seconds_to_midnight, set_cookie, set_cookies +from module.plugins.internal.Plugin import Fail, encode, parse_name, replace_patterns, seconds_to_midnight, set_cookie, set_cookies from module.utils import fixup, fs_encode, parseFileSize as parse_size @@ -22,7 +21,7 @@ statusMap = dict((v, k) for k, v in _statusMap.items()) class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "1.83" + __version__ = "1.84" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -91,7 +90,7 @@ class SimpleHoster(Hoster): LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"' """ - NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + NAME_REPLACEMENTS = [] SIZE_REPLACEMENTS = [] URL_REPLACEMENTS = [] @@ -175,9 +174,8 @@ class SimpleHoster(Hoster): info['status'] = 2 if 'N' in info['pattern']: - name = fixname(info['pattern']['N']) - info['name'] = replace_patterns(fixurl(name), - cls.NAME_REPLACEMENTS) + name = replace_patterns(info['pattern']['N'], cls.NAME_REPLACEMENTS) + info['name'] = parse_name(name) if 'S' in info['pattern']: size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'], @@ -420,9 +418,7 @@ class SimpleHoster(Hoster): self.offline() elif re.search('filename', errmsg, re.I): - url_p = urlparse.urlparse(self.pyfile.url) - self.pyfile.url = "%s://%s/%s" % (url_p.scheme, url_p.netloc, url_p.path.split('/')[0]) - self.retry(1, msg=_("Wrong url")) + self.fail(_("Wrong url")) elif re.search('premium', errmsg, re.I): self.fail(_("File can be downloaded by premium users only")) @@ -482,8 +478,8 @@ class SimpleHoster(Hoster): self.log_debug("Previous file info: %s" % old_info) try: - url = self.info['url'].strip() - name = self.info['name'].strip() + url = self.info['url'] + name = self.info['name'] except KeyError: pass |