diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-12-24 01:11:44 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-12-24 01:11:44 +0100 |
commit | 29df1397bbbe80eced4674b6fa39e16540c80901 (patch) | |
tree | 11bf4c6cbc8bb4f1efba88f9e81c5b830cefb721 | |
parent | [SkipRev] Typo (diff) | |
download | pyload-29df1397bbbe80eced4674b6fa39e16540c80901.tar.xz |
Extractor rewritten
-rw-r--r-- | module/plugins/hooks/ExtractArchive.py | 168 | ||||
-rw-r--r-- | module/plugins/internal/UnRar.py | 179 | ||||
-rw-r--r-- | module/plugins/internal/UnZip.py | 64 |
3 files changed, 272 insertions, 139 deletions
diff --git a/module/plugins/hooks/ExtractArchive.py b/module/plugins/hooks/ExtractArchive.py index ddec8319b..16942bef0 100644 --- a/module/plugins/hooks/ExtractArchive.py +++ b/module/plugins/hooks/ExtractArchive.py @@ -51,32 +51,33 @@ if os.name != "nt": from pwd import getpwnam from module.plugins.Hook import Hook, threaded, Expose -from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, WrongPassword -from module.utils import save_join, fs_encode +from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, PasswordError +from module.utils import save_join, uniqify class ExtractArchive(Hook): __name__ = "ExtractArchive" __type__ = "hook" - __version__ = "0.20" - - __config__ = [("activated", "bool", "Activated", True), - ("fullpath", "bool", "Extract full path", True), - ("overwrite", "bool", "Overwrite files", True), - ("passwordfile", "file", "password file", "archive_password.txt"), - ("deletearchive", "bool", "Delete archives when done", False), - ("subfolder", "bool", "Create subfolder for each package", False), - ("destination", "folder", "Extract files to", ""), - ("excludefiles", "str", "Exclude files from unpacking (seperated by ;)", ""), - ("recursive", "bool", "Extract archives in archvies", True), - ("queue", "bool", "Wait for all downloads to be finished", True), - ("renice", "int", "CPU Priority", 0)] + __version__ = "1.00" + + __config__ = [("activated" , "bool" , "Activated" , True ), + ("fullpath" , "bool" , "Extract full path" , True ), + ("overwrite" , "bool" , "Overwrite files" , False ), + ("keepbroken" , "bool" , "Extract broken archives" , False ), + ("repair" , "bool" , "Repair broken archives" , True ), + ("passwordfile" , "file" , "Store passwords in file" , "archive_password.txt" ), + ("delete" , "bool" , "Delete archive when successfully extracted", False ), + ("subfolder" , "bool" , "Create subfolder for each package" , False ), + ("destination" , "folder", "Extract files to" , "" ), + ("extensions" , "str" , "Extract the following extensions" , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"), + ("excludefiles" , "str" , "Don't extract the following files" , "*.nfo,*.DS_Store,index.dat,thumb.db" ), + ("recursive" , "bool" , "Extract archives in archives" , True ), + ("queue" , "bool" , "Wait for all downloads to be finished" , True ), + ("renice" , "int" , "CPU Priority" , 0 )] __description__ = """Extract different kind of archives""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "ranan@pyload.org"), - ("AndroKev", None), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] event_list = ["allDownloadsProcessed"] @@ -92,7 +93,7 @@ class ExtractArchive(Hook): self.passwords = [] names = [] - for p in ("UnRar", "UnZip"): + for p in ("UnRar", "SevenZip", "UnZip"): try: module = self.core.pluginManager.loadModule("internal", p) klass = getattr(module, p) @@ -154,13 +155,21 @@ class ExtractArchive(Hook): extracted = [] failed = [] + clearlist = lambda string: [x.lstrip('.') for x in string.replace(' ', '').replace(',', '|').replace(';', '|').split('|')] + destination = self.getConfig("destination") subfolder = self.getConfig("subfolder") fullpath = self.getConfig("fullpath") overwrite = self.getConfig("overwrite") - excludefiles = self.getConfig("excludefiles") + extensions = clearlist(self.getConfig("extensions")) + excludefiles = clearlist(self.getConfig("excludefiles")) renice = self.getConfig("renice") recursive = self.getConfig("recursive") + delete = self.getConfig("delete") + keepbroken = self.getConfig("keepbroken") + + if extensions: + self.logDebug("Extensions allowed: %s" % "|.".join(extensions)) # reload from txt file self.reloadPasswords() @@ -171,7 +180,7 @@ class ExtractArchive(Hook): #iterate packages -> plugins -> targets for pid in ids: p = self.core.files.getPackage(pid) - self.logInfo(_("Check package %s") % p.name) + self.logInfo(_("Check package: %s") % p.name) if not p: continue @@ -179,21 +188,25 @@ class ExtractArchive(Hook): out = save_join(dl, p.folder, destination, "") #: force trailing slash if subfolder: - out = save_join(out, fs_encode(p.folder)) + out = save_join(out, p.folder) if not exists(out): makedirs(out) files_ids = [(save_join(dl, p.folder, x['name']), x['id']) for x in p.getChildren().itervalues()] - matched = False - success = True + matched = False + success = True # check as long there are unseen files while files_ids: new_files_ids = [] + if extensions: + files_ids = [(file, id) for file, id in files_ids if filter(lambda ext: file.endswith(ext), extensions)] + for plugin in self.plugins: targets = plugin.getTargets(files_ids) + if targets: self.logDebug("Targets for %s: %s" % (plugin.__name__, targets)) matched = True @@ -205,19 +218,31 @@ class ExtractArchive(Hook): processed.append(target) # prevent extracting same file twice - self.logInfo(basename(target), _("Extract to %s") % out) + self.logInfo(basename(target), _("Extract to: %s") % out) try: - klass = plugin(self, target, out, fullpath, overwrite, excludefiles, renice) + klass = plugin(self, + target, + out, + p.password, + fullpath, + overwrite, + excludefiles, + renice, + delete, + keepbroken) klass.init() - new_files = self._extract(klass, fid, [p.password.strip()], thread) + new_files = self._extract(klass, fid, thread) except Exception, e: self.logError(basename(target), e) + new_files = None + + if new_files is None: success = False continue - self.logDebug("Extracted", new_files) + self.logDebug("Extracted files: %s" % new_files) self.setPermissions(new_files) for file in new_files: @@ -242,43 +267,78 @@ class ExtractArchive(Hook): return True if not failed else False - def _extract(self, plugin, fid, passwords, thread): + def _extract(self, plugin, fid, thread): pyfile = self.core.files.getFile(fid) - deletearchive = self.getConfig("deletearchive") pyfile.setCustomStatus(_("extracting")) thread.addActive(pyfile) # keep this file until everything is done try: - progress = lambda x: pyfile.setProgress(x) - success = False + progress = lambda x: pyfile.setProgress(x) + encrypted = False + passwords = self.getPasswords() + + try: + self.logInfo(basename(plugin.file), "Verifying...") + + tmp_password = plugin.password + plugin.password = "" #: Force verifying without password + + plugin.verify() + + except PasswordError: + encrypted = True + + except CRCError: + self.logWarning(basename(plugin.file), _("Archive damaged")) + + if not self.getConfig("repair"): + raise CRCError + + elif plugin.repair(): + self.logInfo(basename(plugin.file), _("Successfully repaired")) + + elif not self.getConfig("keepbroken"): + raise ArchiveError(_("Broken archive")) + + else: + self.logInfo(basename(plugin.file), _("All OK")) + + plugin.password = tmp_password + + if not encrypted: + plugin.extract(progress) - if not plugin.checkArchive(): - plugin.extract(progress, pw) - success = True else: self.logInfo(basename(plugin.file), _("Password protected")) - self.logDebug("Passwords: %s" % passwords if passwords else "No password provided") - for pw in set(passwords) | set(self.getPasswords()): + if plugin.password: + passwords.insert(0, plugin.password) + passwords = uniqify(self.passwords) + self.logDebug("Password: %s" % plugin.password) + else: + self.logDebug("No package password provided") + + for pw in passwords: try: self.logDebug("Try password: %s" % pw) - if plugin.checkPassword(pw): - plugin.extract(progress, pw) + + if plugin.setPassword(pw): + plugin.extract(progress) self.addPassword(pw) - success = True break + else: + raise PasswordError - except WrongPassword: + except PasswordError: self.logDebug("Password was wrong") - - if not success: - raise Exception(_("Wrong password")) + else: + raise PasswordError if self.core.debug: - self.logDebug("Would delete", ", ".join(plugin.getDeleteFiles())) + self.logDebug("Would delete: %s" % ", ".join(plugin.getDeleteFiles())) - if deletearchive: + if self.getConfig("delete"): files = plugin.getDeleteFiles() self.logInfo(_("Deleting %s files") % len(files)) for f in files: @@ -294,12 +354,16 @@ class ExtractArchive(Hook): return extracted_files - except ArchiveError, e: - self.logError(basename(plugin.file), _("Archive Error"), e) + except PasswordError: + self.logError(basename(plugin.file), _("Wrong password" if passwords else "No password found")) + plugin.password = "" except CRCError: self.logError(basename(plugin.file), _("CRC Mismatch")) + except ArchiveError, e: + self.logError(basename(plugin.file), _("Archive Error"), e) + except Exception, e: if self.core.debug: print_exc() @@ -307,7 +371,7 @@ class ExtractArchive(Hook): self.manager.dispatchEvent("archive_extract_failed", pyfile) - raise Exception(_("Extract failed")) + self.logError(basename(plugin.file), _("Extract failed")) @Expose @@ -337,15 +401,13 @@ class ExtractArchive(Hook): """ Adds a password to saved list""" passwordfile = self.getConfig("passwordfile") - if pw in self.passwords: - self.passwords.remove(pw) - self.passwords.insert(0, pw) + self.passwords = uniqify(self.passwords) try: with open(passwordfile, "wb") as f: for pw in self.passwords: - f.write(pw + "\n") + f.write(pw + '\n') except IOError, e: self.logError(e) diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 4bbd2042c..5633b31f7 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -4,11 +4,11 @@ import os import re from glob import glob -from os.path import basename, join +from os.path import basename, dirname, join from string import digits from subprocess import Popen, PIPE -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError from module.utils import save_join, decode @@ -22,21 +22,25 @@ def renice(pid, value): class UnRar(AbtractExtractor): __name__ = "UnRar" - __version__ = "0.21" + __version__ = "1.00" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] CMD = "unrar" + EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"] + + #@NOTE: there are some more uncovered rar formats - re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) - re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I) + re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) + re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I) + re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') - re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I) + re_wrongpwd = re.compile(r'password', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) @classmethod @@ -60,69 +64,99 @@ class UnRar(AbtractExtractor): @classmethod + def isArchive(cls, file): + f = basename(file).lower() + return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS) + + + @classmethod def getTargets(cls, files_ids): - result = [] + targets = [] for file, id in files_ids: - if not file.endswith(".rar"): + if not cls.isArchive(file): continue - match = cls.re_splitfile.findall(file) - if match: + m = cls.re_rarpart.findall(file) + if m: # only add first parts - if int(match[0][1]) == 1: - result.append((file, id)) + if int(m[0][1]) == 1: + targets.append((file, id)) else: - result.append((file, id)) + targets.append((file, id)) - return result + return targets - def init(self): - self.passwordProtected = False - self.headerProtected = False #: list files will not work without password - self.password = "" #: save the correct password + def check(self, out="", err=""): + if not out or not err: + return + if err.strip(): + if self.re_wrongpwd.search(err): + raise PasswordError - def checkArchive(self): - p = self.call_unrar("l", "-v", self.file) - out, err = p.communicate() - if self.re_wrongpwd.search(err): - self.passwordProtected = True - self.headerProtected = True - return True + elif self.re_wrongcrc.search(err): + raise CRCError + + else: #: raise error if anything is on stderr + raise ArchiveError(err.strip()) # output only used to check if passworded files are present for attr in self.re_filelist.findall(out): if attr[0].startswith("*"): - self.passwordProtected = True - return True + raise PasswordError + - self.listContent() - if not self.files: - raise ArchiveError("Empty Archive") + def verify(self): + p = self.call_cmd("l", "-v", self.file, password=self.password) + + self.check(*p.communicate()) + + if p and p.returncode: + raise ArchiveError("Process terminated") + + if not self.list(): + raise ArchiveError("Empty archive") + + + def isPassword(self, password): + if isinstance(password, basestring): + p = self.call_cmd("l", "-v", self.file, password=password) + out, err = p.communicate() + + if not self.re_wrongpwd.search(err): + return True return False - def checkPassword(self, password): - # at this point we can only verify header protected files - if self.headerProtected: - p = self.call_unrar("l", "-v", self.file, password=password) + def repair(self): + p = self.call_cmd("rc", self.file) + out, err = p.communicate() + + if p.returncode or err.strip(): + p = self.call_cmd("r", self.file) out, err = p.communicate() - if self.re_wrongpwd.search(err): + + if p.returncode or err.strip(): return False + else: + self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0)) return True - def extract(self, progress, password=""): + def extract(self, progress=lambda x: None): + self.verify() + + progress(0) + command = "x" if self.fullpath else "e" - p = self.call_unrar(command, self.file, self.out, password=password) - renice(p.pid, self.renice) + p = self.call_cmd(command, self.file, self.out, password=self.password) - progress(0) + renice(p.pid, self.renice) progressstring = "" while True: @@ -131,7 +165,7 @@ class UnRar(AbtractExtractor): if not c: break # reading a percentage sign -> set progress and restart - if c == '%': + if c is '%': progress(int(progressstring)) progressstring = "" # not reading a digit -> therefore restart @@ -139,46 +173,43 @@ class UnRar(AbtractExtractor): progressstring = "" # add digit to progressstring else: - progressstring = progressstring + c + progressstring += c progress(100) - # retrieve stderr - err = p.stderr.read() + self.files = self.list() - if "CRC failed" in err and not password and not self.passwordProtected: - raise CRCError - elif "CRC failed" in err: - raise WrongPassword + # retrieve stderr + self.check(err=p.stderr.read()) - if err.strip(): #: raise error if anything is on stderr - raise ArchiveError(err.strip()) if p.returncode: raise ArchiveError("Process terminated") - if not self.files: - self.password = password - self.listContent() - def getDeleteFiles(self): if ".part" in basename(self.file): return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + # get files which matches .r* and filter unsuited files out parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) - return filter(lambda x: self.re_partfiles.match(x), parts) + return filter(lambda x: self.re_rarfile.match(x), parts) - def listContent(self): + + def list(self): command = "vb" if self.fullpath else "lb" - p = self.call_unrar(command, "-v", self.file, password=self.password) + + p = self.call_cmd(command, "-v", self.file, password=self.password) out, err = p.communicate() - if "Cannot open" in err: - raise ArchiveError("Cannot open file") + if err.strip(): + self.m.logError(err) + if "Cannot open" in err: + return list() - if err.strip(): #: only log error at this point - self.m.logError(err.strip()) + if p.returncode: + self.m.logError("Process terminated") + return list() result = set() @@ -186,17 +217,22 @@ class UnRar(AbtractExtractor): f = f.strip() result.add(save_join(self.out, f)) - self.files = result + return list(result) - def call_unrar(self, command, *xargs, **kwargs): + def call_cmd(self, command, *xargs, **kwargs): args = [] + # overwrite flag - args.append("-o+") if self.overwrite else args.append("-o-") + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") - if self.excludefiles: - for word in self.excludefiles.split(';'): - args.append("-x%s" % word) + for word in self.excludefiles: + args.append("-x%s" % word.strip()) # assume yes on all queries args.append("-y") @@ -207,10 +243,11 @@ class UnRar(AbtractExtractor): else: args.append("-p-") + if self.keepbroken: + args.append("-kb") + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) self.m.logDebug(" ".join(call)) - p = Popen(call, stdout=PIPE, stderr=PIPE) - - return p + return Popen(call, stdout=PIPE, stderr=PIPE) diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 81c298784..b3d54cba0 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,19 +1,23 @@ # -*- coding: utf-8 -*- +from __future__ import with_statement + import sys import zipfile -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError class UnZip(AbtractExtractor): __name__ = "UnZip" - __version__ = "0.12" + __version__ = "1.00" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = ["zip", "zip64"] @classmethod @@ -22,31 +26,61 @@ class UnZip(AbtractExtractor): @classmethod - def getTargets(cls, files_ids): - result = [] + def isArchive(cls, file): + return zipfile.is_zipfile(file) - for file, id in files_ids: - if file.endswith(".zip"): - result.append((file, id)) - return result + def verify(self): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + badcrc = z.testzip() + except (BadZipfile, LargeZipFile), e: + raise ArchiveError(e) - def extract(self, progress, password=""): + except RuntimeError, e: + if 'encrypted' in e: + raise PasswordError + else: + raise ArchiveError(e) + + else: + if badcrc: + raise CRCError + + if not self.list(): + raise ArchiveError("Empty archive") + + + def list(self): try: - z = zipfile.ZipFile(self.file) - self.files = z.namelist() - z.extractall(self.out, pwd=password) + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + return z.namelist() + except Exception: + return list() + + + def extract(self, progress=lambda x: None): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + progress(0) + z.extractall(self.out, pwd=self.password) + progress(100) except (BadZipfile, LargeZipFile), e: raise ArchiveError(e) except RuntimeError, e: if e is "Bad password for file": - raise WrongPassword + raise PasswordError else: raise ArchiveError(e) + finally: + self.files = self.list() + def getDeleteFiles(self): return [self.file] |