summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-12-27 20:15:34 +0100
committerGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-12-27 22:53:31 +0100
commitff8c0249c13e40b9dd5c52244d94c7c6b76ce209 (patch)
tree4d72b84cb15d01f715a875156cd82440d2145348 /module
parent[internal] Spare code optimizations and fixes (diff)
downloadpyload-ff8c0249c13e40b9dd5c52244d94c7c6b76ce209.tar.xz
[Hoster] Rewrite some routines, improve others
Diffstat (limited to 'module')
-rw-r--r--module/plugins/internal/Hoster.py346
1 files changed, 198 insertions, 148 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py
index f5ba13875..d563d426f 100644
--- a/module/plugins/internal/Hoster.py
+++ b/module/plugins/internal/Hoster.py
@@ -2,14 +2,15 @@
from __future__ import with_statement
+import hashlib
import mimetypes
import os
import re
from module.network.HTTPRequest import BadHeader
-from module.plugins.internal.Base import Base, create_getInfo, parse_fileInfo
+from module.plugins.internal.Base import Base
from module.plugins.internal.Plugin import Fail, Retry
-from module.plugins.internal.utils import encode, exists, fixurl, fs_join, parse_name
+from module.plugins.internal.misc import compute_checksum, encode, exists, fixurl, fsjoin, parse_name, safejoin
class Hoster(Base):
@@ -19,16 +20,26 @@ class Hoster(Base):
__status__ = "stable"
__pattern__ = r'^unmatchable$'
- __config__ = [("activated" , "bool", "Activated" , True),
- ("use_premium" , "bool", "Use premium account if available" , True),
- ("fallback" , "bool", "Fallback to free download if premium fails", True),
- ("chk_filesize", "bool", "Check file size" , True)]
+ __config__ = [("activated" , "bool", "Activated" , True ),
+ ("use_premium", "bool", "Use premium account if available" , True ),
+ ("fallback" , "bool", "Fallback to free download if premium fails", True )]
__description__ = """Base hoster plugin"""
__license__ = "GPLv3"
__authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
+ @property
+ def last_download(self):
+ return self._last_download if exists(self._last_download) else ""
+
+
+ @last_download.setter
+ def last_download(self, value):
+ if exists(value):
+ self._last_download = value or ""
+
+
def init_base(self):
#: Enable simultaneous processing of multiple downloads
self.limitDL = 0 #@TODO: Change to `limit_dl` in 0.4.10
@@ -40,7 +51,7 @@ class Hoster(Base):
self.resume_download = False
#: Location where the last call to download was saved
- self.last_download = None
+ self._last_download = ""
#: Re match of the last call to `checkDownload`
self.last_check = None
@@ -72,40 +83,64 @@ class Hoster(Base):
def _process(self, thread):
- self.log_debug("Plugin version: " + self.__version__)
- self.log_debug("Plugin status: " + self.__status__)
-
- if self.__status__ is "broken":
- self.fail(_("Plugin is temporarily unavailable"))
-
- elif self.__status__ is "testing":
- self.log_warning(_("Plugin may be unstable"))
-
self.thread = thread
+
+ self._initialize()
self._setup()
# self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10
- self.check_status()
+ # self.check_status()
+ self.check_duplicates()
self.pyfile.setStatus("starting")
try:
+ self.log_info(_("Processing url: ") + self.pyfile.url)
self.process(self.pyfile)
self.check_status()
- self.check_download()
+
+ self._check_download()
except Fail, e: #@TODO: Move to PluginThread in 0.4.10
- if self.get_config('fallback', True) and self.premium:
+ if self.config.get('fallback', True) and self.premium:
self.log_warning(_("Premium download failed"), e)
self.restart(premium=False)
else:
raise Fail(encode(e))
+ finally:
+ self._finalize()
+
+
+ #@TODO: Remove in 0.4.10
+ def _finalize(self):
+ pypack = self.pyfile.package()
+
+ self.pyload.hookManager.dispatchEvent("download_processed", self.pyfile)
+
+ try:
+ unfinished = any(pyfile.hasStatus('queued') for pyfile in pypack.getChildren()
+ if pyfile.id is not self.pyfile.id)
+ if unfinished:
+ return
+
+ self.pyload.hookManager.dispatchEvent("package_processed", pypack)
+
+ failed = any(pyfile.status in (1, 6, 8, 9, 14) for pyfile in pypack.getChildren())
+
+ if not failed:
+ return
+
+ self.pyload.hookManager.dispatchEvent("package_failed", pypack)
+
+ finally:
+ self.check_status()
+
def isdownload(self, url, resume=None, redirect=True):
link = False
- maxredirs = 10
+ maxredirs = 5
if resume is None:
resume = self.resume_download
@@ -114,7 +149,7 @@ class Hoster(Base):
maxredirs = max(redirect, 1)
elif redirect:
- maxredirs = self.get_config("maxredirs", default=maxredirs, plugin="UserAgentSwitcher")
+ maxredirs = self.pyload.api.getConfigValue("UserAgentSwitcher", "maxredirs", "plugin") or maxredirs
for i in xrange(maxredirs):
self.log_debug("Redirect #%d to: %s" % (i, url))
@@ -128,10 +163,10 @@ class Hoster(Base):
location = self.fixurl(header.get('location'), url)
code = header.get('code')
- if code == 302:
+ if code is 302:
link = location
- elif code == 301:
+ elif code is 301:
url = location
if redirect:
continue
@@ -176,7 +211,8 @@ class Hoster(Base):
if self.pyload.debug:
self.log_debug("DOWNLOAD URL " + url,
- *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url", "_[1]")])
+ *["%s=%s" % (key, val) for key, val in locals().items()
+ if key not in ("self", "url", "_[1]")])
dl_url = self.fixurl(url)
dl_basename = parse_name(self.pyfile.name)
@@ -184,15 +220,13 @@ class Hoster(Base):
self.pyfile.name = dl_basename
self.captcha.correct()
-
- if self.pyload.config.get("download", "skip_existing"):
- self.check_filedupe()
+ self.check_duplicates()
self.pyfile.setStatus("downloading")
dl_folder = self.pyload.config.get("general", "download_folder")
- dl_dirname = os.path.join(dl_folder, self.pyfile.package().folder)
- dl_filename = os.path.join(dl_dirname, dl_basename)
+ dl_dirname = safejoin(dl_folder, self.pyfile.package().folder)
+ dl_filename = safejoin(dl_dirname, dl_basename)
dl_dir = encode(dl_dirname)
dl_file = encode(dl_filename) #@TODO: Move safe-filename check to HTTPDownload in 0.4.10
@@ -212,7 +246,7 @@ class Hoster(Base):
dl_chunks = self.pyload.config.get("download", "chunks")
chunk_limit = chunks or self.chunk_limit or -1
- if dl_chunks is -1 or chunk_limit is -1:
+ if -1 in (dl_chunks, chunk_limit):
chunks = max(dl_chunks, chunk_limit)
else:
chunks = min(dl_chunks, chunk_limit)
@@ -233,15 +267,11 @@ class Hoster(Base):
self.pyfile.size = self.req.size
if self.req.code in (404, 410):
- bad_file = fs_join(dl_dirname, newname)
- try:
- os.remove(bad_file)
-
- except OSError, e:
- self.log_debug(_("Error removing `%s`") % bad_file, e)
-
- else:
+ bad_file = fsjoin(dl_dirname, newname)
+ if self.remove(bad_file):
return ""
+ else:
+ self.log_info(_("File saved"))
#@TODO: Recheck in 0.4.10
if disposition and newname:
@@ -249,8 +279,8 @@ class Hoster(Base):
if safename != newname:
try:
- old_file = fs_join(dl_dirname, newname)
- new_file = fs_join(dl_dirname, safename)
+ old_file = fsjoin(dl_dirname, newname)
+ new_file = fsjoin(dl_dirname, safename)
os.rename(old_file, new_file)
except OSError, e:
@@ -272,158 +302,178 @@ class Hoster(Base):
return dl_filename
- def check_filesize(self, file_size, size_tolerance=1024):
- """
- Checks the file size of the last downloaded file
-
- :param file_size: expected file size
- :param size_tolerance: size check tolerance
- """
- if not self.last_download:
- return
-
- dl_location = encode(self.last_download)
- dl_size = os.stat(dl_location).st_size
-
- if dl_size < 1:
- self.fail(_("Empty file"))
-
- elif file_size > 0:
- diff = abs(file_size - dl_size)
-
- if diff > size_tolerance:
- self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s")
- % (file_size, dl_size))
-
- elif diff != 0:
- self.log_warning(_("File size is not equal to expected size"))
-
-
- def check_file(self, rules, delete=False, read_size=1048576, file_size=0, size_tolerance=1024):
+ def scan_download(self, rules, read_size=1048576):
"""
Checks the content of the last downloaded file, re match is saved to `last_check`
:param rules: dict with names and rules to match (compiled regexp or strings)
:param delete: delete if matched
- :param file_size: expected file size
- :param size_tolerance: size check tolerance
- :param read_size: amount of bytes to read from files
:return: dictionary key of the first rule that matched
"""
- do_delete = False
- last_download = encode(self.last_download) #@TODO: Recheck in 0.4.10
+ dl_file = encode(self.last_download) #@TODO: Recheck in 0.4.10
- if not self.last_download or not exists(last_download):
- self.fail(self.pyfile.error or _("No file downloaded"))
+ if not self.last_download:
+ self.log_warning(_("No file to scan"))
+ return
- try:
- self.check_filesize(file_size, size_tolerance)
-
- with open(last_download, "rb") as f:
- content = f.read(read_size)
-
- #: Produces encoding errors, better log to other file in the future?
- # self.log_debug("Content: %s" % content)
- for name, rule in rules.items():
- if isinstance(rule, basestring):
- if rule in content:
- do_delete = True
- return name
-
- elif hasattr(rule, "search"):
- m = rule.search(content)
- if m is not None:
- do_delete = True
- self.last_check = m
- return name
- finally:
- if delete and do_delete:
- try:
- os.remove(last_download)
+ with open(dl_file, "rb") as f:
+ content = f.read(read_size)
- except OSError, e:
- self.log_warning(_("Error removing `%s`") % last_download, e)
+ #: Produces encoding errors, better log to other file in the future?
+ # self.log_debug("Content: %s" % content)
+ for name, rule in rules.items():
+ if isinstance(rule, basestring):
+ if rule in content:
+ return name
- else:
- self.log_info(_("File deleted: ") + self.last_download)
- self.last_download = "" #: Recheck in 0.4.10
+ elif hasattr(rule, "search"):
+ m = rule.search(content)
+ if m is not None:
+ self.last_check = m
+ return name
- def check_download(self):
- self.log_info(_("Checking downloaded file..."))
+ def _check_download(self):
+ self.log_info(_("Checking download..."))
+ self.pyfile.setCustomStatus(_("checking"))
- if self.captcha.task and not self.last_download:
- self.retry_captcha()
+ if not self.last_download:
+ if self.captcha.task:
+ self.retry_captcha()
+ else:
+ self.error(_("No file downloaded"))
- elif self.check_file({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')},
- delete=True):
+ elif self.scan_download({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}):
+ if self.remove(self.last_download):
+ self.last_download = ""
self.error(_("Empty file"))
- elif self.get_config('chk_filesize', False) and self.info.get('size'):
- # 10485760 is 10MB, tolerance is used when comparing displayed size on the hoster website to real size
- # For example displayed size can be 1.46GB for example, but real size can be 1.4649853GB
- self.check_filesize(self.info['size'], size_tolerance=10485760)
-
else:
- self.log_info(_("File is OK"))
+ self.pyload.hookManager.dispatchEvent("download_check", self.pyfile)
+ self.check_status()
+ self.log_info(_("File is OK"))
- def check_traffic(self):
+
+ def out_of_traffic(self):
if not self.account:
- return True
+ return
traffic = self.account.get_data('trafficleft')
if traffic is None:
- return False
+ return True
elif traffic is -1:
- return True
+ return False
else:
#@TODO: Rewrite in 0.4.10
size = self.pyfile.size / 1024
self.log_info(_("Filesize: %s KiB") % size,
_("Traffic left for user `%s`: %d KiB") % (self.account.user, traffic))
- return size <= traffic
+ return size > traffic
- def check_filedupe(self):
- """
- Checks if same file was/is downloaded within same package
+ # def check_size(self, file_size, size_tolerance=1024, delete=False):
+ # """
+ # Checks the file size of the last downloaded file
- :param starting: indicates that the current download is going to start
- :raises Skip:
- """
- pack = self.pyfile.package()
+ # :param file_size: expected file size
+ # :param size_tolerance: size check tolerance
+ # """
+ # self.log_info(_("Checking file size..."))
+
+ # if not self.last_download:
+ # self.log_warning(_("No file to check"))
+ # return
- for pyfile in self.pyload.files.cache.values():
- if pyfile is self.pyfile:
- continue
+ # dl_file = encode(self.last_download)
+ # dl_size = os.stat(dl_file).st_size
- if pyfile.name != self.pyfile.name or pyfile.package().folder != pack.folder:
- continue
+ # try:
+ # if dl_size == 0:
+ # delete = True
+ # self.fail(_("Empty file"))
- if pyfile.status in (0, 5, 7, 12): #: (finished, waiting, starting, downloading)
- self.skip(pyfile.pluginname)
+ # elif file_size > 0:
+ # diff = abs(file_size - dl_size)
- dl_folder = self.pyload.config.get("general", "download_folder")
- package_folder = pack.folder if self.pyload.config.get("general", "folder_per_package") else ""
- dl_location = fs_join(dl_folder, package_folder, self.pyfile.name)
+ # if diff > size_tolerance:
+ # self.fail(_("File size mismatch | Expected file size: %s bytes | Downloaded file size: %s bytes")
+ # % (file_size, dl_size))
- if not exists(dl_location):
+ # elif diff != 0:
+ # self.log_warning(_("File size is not equal to expected download size, but does not exceed the tolerance threshold"))
+ # self.log_debug("Expected file size: %s bytes" % file_size,
+ # "Downloaded file size: %s bytes" % dl_size,
+ # "Tolerance threshold: %s bytes" % size_tolerance)
+ # else:
+ # delete = False
+ # self.log_info(_("File size match"))
+
+ # finally:
+ # if delete:
+ # self.remove(dl_file, trash=False)
+
+
+ # def check_hash(self, type, digest, delete=False):
+ # hashtype = type.strip('-').upper()
+
+ # self.log_info(_("Checking file hashsum %s...") % hashtype)
+
+ # if not self.last_download:
+ # self.log_warning(_("No file to check"))
+ # return
+
+ # dl_file = encode(self.last_download)
+
+ # try:
+ # dl_hash = digest
+ # file_hash = compute_checksum(dl_file, hashtype)
+
+ # if not file_hash:
+ # self.fail(_("Unsupported hashing algorithm: ") + hashtype)
+
+ # elif dl_hash == file_hash:
+ # delete = False
+ # self.log_info(_("File hashsum %s match") % hashtype)
+
+ # else:
+ # self.fail(_("File hashsum %s mismatch | Expected file hashsum: %s | Downloaded file hashsum: %s")
+ # % (hashtype, dl_hash, file_hash))
+ # finally:
+ # if delete:
+ # self.remove(dl_file, trash=False)
+
+
+ def check_duplicates(self):
+ """
+ Checks if same file was downloaded within same package
+
+ :raises Skip:
+ """
+ pack_folder = self.pyfile.package().folder if self.pyload.config.get("general", "folder_per_package") else ""
+ dl_folder = self.pyload.config.get("general", "download_folder")
+ dl_file = fsjoin(dl_folder, pack_folder, self.pyfile.name)
+
+ if not exists(dl_file):
return
- pyfile = self.pyload.db.findDuplicates(self.pyfile.id, package_folder, self.pyfile.name)
- if pyfile:
- self.skip(pyfile[0])
+ if os.stat(dl_file).st_size == 0:
+ if self.remove(self.last_download):
+ self.last_download = ""
+ return
- size = os.stat(dl_location).st_size
- if size >= self.pyfile.size:
- self.skip(_("File exists"))
+ if self.pyload.config.get("download", "skip_existing"):
+ plugin = self.pyload.db.findDuplicates(self.pyfile.id, pack_folder, self.pyfile.name)
+ msg = plugin[0] if plugin else _("File exists")
+ self.skip(msg)
+ else:
+ dl_n = int(re.match(r'.+(\(\d+\)|)$', self.pyfile.name).group(1).strip("()") or 1)
+ self.pyfile.name += " (%s)" % (dl_n + 1)
- #: Deprecated method, use `check_filedupe` instead (Remove in 0.4.10)
+ #: Deprecated method (Recheck in 0.4.10)
def checkForSameFiles(self, *args, **kwargs):
- if self.pyload.config.get("download", "skip_existing"):
- return self.check_filedupe()
+ pass