diff options
author | Walter Purcaro <vuolter@gmail.com> | 2015-01-27 17:48:25 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2015-01-27 17:48:25 +0100 |
commit | 8848a359a43316fb346b728d1d79d7b72d27e5a0 (patch) | |
tree | a9604ae6ddbc34713cbac0090a29e2e6e88c6556 /module | |
parent | [ClickAndLoad] Use self.manager instead hookManager (diff) | |
download | pyload-8848a359a43316fb346b728d1d79d7b72d27e5a0.tar.xz |
Update Extractor (again)
Diffstat (limited to 'module')
-rw-r--r-- | module/plugins/hooks/ExtractArchive.py | 273 | ||||
-rw-r--r-- | module/plugins/internal/Extractor.py | 76 | ||||
-rw-r--r-- | module/plugins/internal/SevenZip.py | 163 | ||||
-rw-r--r-- | module/plugins/internal/UnRar.py | 175 | ||||
-rw-r--r-- | module/plugins/internal/UnZip.py | 47 |
5 files changed, 481 insertions, 253 deletions
diff --git a/module/plugins/hooks/ExtractArchive.py b/module/plugins/hooks/ExtractArchive.py index 20e585046..11764759f 100644 --- a/module/plugins/hooks/ExtractArchive.py +++ b/module/plugins/hooks/ExtractArchive.py @@ -6,8 +6,6 @@ import os import sys from copy import copy -from os import remove, chmod, makedirs -from os.path import exists, basename, isfile, isdir from traceback import print_exc # monkey patch bug in python 2.6 and lower @@ -47,35 +45,37 @@ if sys.version_info < (2, 7) and os.name != "nt": if os.name != "nt": from grp import getgrnam - from os import chown from pwd import getpwnam from module.plugins.Hook import Hook, threaded, Expose -from module.plugins.internal.Extractor import ArchiveError, CRCError, WrongPassword -from module.utils import save_join, fs_encode +from module.plugins.internal.Extractor import ArchiveError, CRCError, PasswordError +from module.utils import fs_decode, save_join, uniqify class ExtractArchive(Hook): __name__ = "ExtractArchive" __type__ = "hook" - __version__ = "1.08" - - __config__ = [("activated", "bool", "Activated", True), - ("fullpath", "bool", "Extract full path", True), - ("overwrite", "bool", "Overwrite files", True), - ("passwordfile", "file", "password file", "archive_password.txt"), - ("deletearchive", "bool", "Delete archives when done", False), - ("subfolder", "bool", "Create subfolder for each package", False), - ("destination", "folder", "Extract files to", ""), - ("excludefiles", "str", "Exclude files from unpacking (seperated by ;)", ""), - ("recursive", "bool", "Extract archives in archvies", True), - ("queue", "bool", "Wait for all downloads to be finished", True), - ("renice", "int", "CPU Priority", 0)] + __version__ = "1.09" + + __config__ = [("activated" , "bool" , "Activated" , True ), + ("fullpath" , "bool" , "Extract full path" , True ), + ("overwrite" , "bool" , "Overwrite files" , False ), + ("keepbroken" , "bool" , "Extract broken archives" , False ), + # ("repair" , "bool" , "Repair broken archives" , True ), + ("passwordfile" , "file" , "Store passwords in file" , "archive_password.txt" ), + ("delete" , "bool" , "Delete archive when successfully extracted", False ), + ("subfolder" , "bool" , "Create subfolder for each package" , False ), + ("destination" , "folder", "Extract files to" , "" ), + ("extensions" , "str" , "Extract the following extensions" , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"), + ("excludefiles" , "str" , "Don't extract the following files" , "*.nfo,*.DS_Store,index.dat,thumb.db" ), + ("recursive" , "bool" , "Extract archives in archives" , True ), + ("queue" , "bool" , "Wait for all downloads to be finished" , False ), + ("renice" , "int" , "CPU Priority" , 0 )] __description__ = """Extract different kind of archives""" __license__ = "GPLv3" __authors__ = [("RaNaN", "ranan@pyload.org"), - ("AndroKev", None), + ("AndroKev", ""), ("Walter Purcaro", "vuolter@gmail.com")] @@ -87,18 +87,23 @@ class ExtractArchive(Hook): pass + def coreReady(self): + self.extracting = False + + def setup(self): - self.plugins = [] - self.passwords = [] - names = [] + self.interval = 300 + self.extractors = [] + self.passwords = [] - for p in ("UnRar", "UnZip"): + names = [] + for p in ("UnRar", "SevenZip", "UnZip"): try: module = self.core.pluginManager.loadModule("internal", p) klass = getattr(module, p) if klass.checkDeps(): names.append(p) - self.plugins.append(klass) + self.extractors.append(klass) except OSError, e: if e.errno == 2: @@ -118,49 +123,55 @@ class ExtractArchive(Hook): else: self.logInfo(_("No Extract plugins activated")) - # queue with package ids - self.queue = [] + + def periodical(self): + if not self.extracting: + self.extractPackage(*self.getQueue()) @Expose - def extractPackage(self, id): - """ Extract package with given id""" - self.manager.startThread(self.extract, [id]) + def extractPackage(self, *ids): + """ Extract packages with given id""" + self.manager.startThread(self.extract, ids) def packageFinished(self, pypack): - pid = pypack.id - if self.getConfig("queue"): + if self.extracting or self.getConfig("queue"): self.logInfo(_("Package %s queued for later extracting") % pypack.name) - self.queue.append(pid) + self.addToQueue(pypack.id) else: - self.manager.startThread(self.extract, [pid]) + self.extractPackage(pypack.id) @threaded - def allDownloadsProcessed(self, thread): - local = copy(self.queue) - - del self.queue[:] - - if self.extract(local, thread): #: check only if all gone fine, no failed reporting for now + def allDownloadsProcessed(self): + if self.extract(self.getQueue()): #@NOTE: check only if all gone fine, no failed reporting for now self.manager.dispatchEvent("all_archives_extracted") self.manager.dispatchEvent("all_archives_processed") - def extract(self, ids, thread=None): + def extract(self, ids): processed = [] extracted = [] failed = [] + clearList = lambda string: [x.lstrip('.') for x in string.replace(' ', '').replace(',', '|').replace(';', '|').split('|')] + destination = self.getConfig("destination") subfolder = self.getConfig("subfolder") fullpath = self.getConfig("fullpath") overwrite = self.getConfig("overwrite") + extensions = clearList(self.getConfig("extensions")) + excludefiles = clearList(self.getConfig("excludefiles")) excludefiles = self.getConfig("excludefiles") renice = self.getConfig("renice") recursive = self.getConfig("recursive") + delete = self.getConfig("delete") + keepbroken = self.getConfig("keepbroken") + + if extensions: + self.logDebug("Extensions: %s" % "|.".join(extensions)) # reload from txt file self.reloadPasswords() @@ -168,63 +179,81 @@ class ExtractArchive(Hook): # dl folder dl = self.config['general']['download_folder'] - #iterate packages -> plugins -> targets + #iterate packages -> extractors -> targets for pid in ids: - p = self.core.files.getPackage(pid) - self.logInfo(_("Check package %s") % p.name) - if not p: + pypack = self.core.files.getPackage(pid) + + self.logInfo(_("Check package: %s") % pypack.name) + + if not pypack: continue # determine output folder - out = save_join(dl, p.folder, destination, "") #: force trailing slash + out = save_join(dl, pypack.folder, destination, "") #: force trailing slash if subfolder: - out = save_join(out, fs_encode(p.folder)) + out = save_join(out, pypack.folder) - if not exists(out): - makedirs(out) + if not os.path.exists(out): + os.makedirs(out) - files_ids = [(save_join(dl, p.folder, x['name']), x['id']) for x in p.getChildren().itervalues()] - matched = False - success = True + matched = False + success = True + files_ids = [(save_join(dl, pypack.folder, x['name']), x['id']) for x in pypack.getChildren().itervalues()] # check as long there are unseen files while files_ids: new_files_ids = [] - for plugin in self.plugins: - targets = plugin.getTargets(files_ids) + if extensions: + files_ids = [(file, id) for file, id in files_ids if filter(lambda ext: file.endswith(ext), extensions)] + + for Extractor in self.extractors: + targets = Extractor.getTargets(files_ids) if targets: - self.logDebug("Targets for %s: %s" % (plugin.__name__, targets)) + self.logDebug("Targets for %s: %s" % (Extractor.__name__, targets)) matched = True - for target, fid in targets: - if target in processed: - self.logDebug(basename(target), "skipped") + for filename, fid in targets: + fname = os.path.basename(filename) + + if filename in processed: + self.logDebug(fname, "Skipped") continue - processed.append(target) # prevent extracting same file twice + processed.append(filename) # prevent extracting same file twice - self.logInfo(basename(target), _("Extract to %s") % out) + self.logInfo(fname, _("Extract to: %s") % out) try: - klass = plugin(self, target, out, fullpath, overwrite, excludefiles, renice) + self.extracting = True + + klass = Extractor(self, + filename, + out, + fullpath, + overwrite, + excludefiles, + renice, + delete, + keepbroken, + fid) klass.init() - new_files = self._extract(klass, fid, [p.password.strip()], thread) + new_files = self._extract(klass, fid, pypack.password) except Exception, e: - self.logError(basename(target), e) + self.logError(fname, e) success = False continue - self.logDebug("Extracted", new_files) + self.logDebug("Extracted files: %s" % new_files) self.setPermissions(new_files) for file in new_files: - if not exists(file): + if not os.path.exists(file): self.logDebug("New file %s does not exists" % file) continue - if recursive and isfile(file): + if recursive and os.path.isfile(file): new_files_ids.append((file, fid)) # append as new target files_ids = new_files_ids # also check extracted files @@ -232,96 +261,120 @@ class ExtractArchive(Hook): if matched: if success: extracted.append(pid) - self.manager.dispatchEvent("package_extracted", p) + self.manager.dispatchEvent("package_extracted", pypack) else: failed.append(pid) - self.manager.dispatchEvent("package_extract_failed", p) + self.manager.dispatchEvent("package_extract_failed", pypack) else: self.logInfo(_("No files found to extract")) + if not matched or not success and subfolder: + try: + os.rmdir(out) + except OSError: + pass + + self.extracting = False return True if not failed else False - def _extract(self, plugin, fid, passwords, thread): + def _extract(self, archive, fid, password): pyfile = self.core.files.getFile(fid) - deletearchive = self.getConfig("deletearchive") + fname = os.path.basename(fs_decode(archive.target)) pyfile.setCustomStatus(_("extracting")) - thread.addActive(pyfile) # keep this file until everything is done + pyfile.setProgress(0) try: - progress = lambda x: pyfile.setProgress(x) success = False - if not plugin.checkArchive(): - plugin.extract(progress, self.getPasswords()) + if not archive.checkArchive(): + archive.extract(password) success = True else: - self.logInfo(basename(plugin.file), _("Password protected")) - self.logDebug("Passwords: %s" % passwords if passwords else "No password provided") + self.logInfo(fname, _("Password protected")) + self.logDebug("Password: %s" % (password or "No provided")) - for pw in set(passwords) | set(self.getPasswords()): + for pw in set(self.getPasswords(False) + [password]): try: self.logDebug("Try password: %s" % pw) - if plugin.checkPassword(pw): - plugin.extract(progress, pw) + if archive.checkPassword(pw): + archive.extract(pw) self.addPassword(pw) success = True break - except WrongPassword: + except PasswordError: self.logDebug("Password was wrong") + else: + raise PasswordError - if not success: - raise Exception(_("Wrong password")) + pyfile.setProgress(100) + pyfile.setStatus("processing") if self.core.debug: - self.logDebug("Would delete", ", ".join(plugin.getDeleteFiles())) + self.logDebug("Would delete: %s" % ", ".join(plugin.getDeleteFiles())) - if deletearchive: - files = plugin.getDeleteFiles() + if self.getConfig("delete"): + files = archive.getDeleteFiles() self.logInfo(_("Deleting %s files") % len(files)) for f in files: - if exists(f): - remove(f) + if os.path.exists(f): + os.remove(f) else: self.logDebug("%s does not exists" % f) - self.logInfo(basename(plugin.file), _("Extracting finished")) + self.logInfo(fname, _("Extracting finished")) - extracted_files = plugin.getExtractedFiles() - self.manager.dispatchEvent("archive_extracted", pyfile, plugin.out, plugin.file, extracted_files) + extracted_files = archive.getExtractedFiles() + self.manager.dispatchEvent("archive_extracted", pyfile, archive.out, archive.file, extracted_files) return extracted_files - except ArchiveError, e: - self.logError(basename(plugin.file), _("Archive Error"), e) + except PasswordError: + self.logError(fname, _("Wrong password" if password else "No password found")) except CRCError: - self.logError(basename(plugin.file), _("CRC Mismatch")) + self.logError(fname, _("CRC Mismatch")) + + except ArchiveError, e: + self.logError(fname, _("Archive Error"), e) except Exception, e: if self.core.debug: print_exc() - self.logError(basename(plugin.file), _("Unknown Error"), e) + self.logError(fname, _("Unknown Error"), e) + + finally: + pyfile.finishIfDone() self.manager.dispatchEvent("archive_extract_failed", pyfile) raise Exception(_("Extract failed")) + def getQueue(self): + return self.getStorage("ExtractArchive", []) + + + def addToQueue(self, item): + queue = self.getQueue() + return self.setStorage("ExtractArchive", queue + [item] if item not in queue else queue) + + @Expose - def getPasswords(self): + def getPasswords(self, reload=True): """ List of saved passwords """ + if reload: + self.reloadPasswords() + return self.passwords def reloadPasswords(self): - passwordfile = self.getConfig("passwordfile") - try: passwords = [] - with open(passwordfile, "a+") as f: + with open(self.getConfig("passwordfile")) as f: for pw in f.read().splitlines(): passwords.append(pw) @@ -335,17 +388,12 @@ class ExtractArchive(Hook): @Expose def addPassword(self, pw): """ Adds a password to saved list""" - passwordfile = self.getConfig("passwordfile") - - if pw in self.passwords: - self.passwords.remove(pw) - - self.passwords.insert(0, pw) - try: - with open(passwordfile, "wb") as f: + self.passwords = uniqify([pw] + self.passwords) + + with open(self.getConfig("passwordfile"), "wb") as f: for pw in self.passwords: - f.write(pw + "\n") + f.write(pw + '\n') except IOError, e: self.logError(e) @@ -353,20 +401,21 @@ class ExtractArchive(Hook): def setPermissions(self, files): for f in files: - if not exists(f): + if not os.path.exists(f): continue try: if self.config['permission']['change_file']: - if isfile(f): - chmod(f, int(self.config['permission']['file'], 8)) - elif isdir(f): - chmod(f, int(self.config['permission']['folder'], 8)) + if os.path.isfile(f): + os.chmod(f, int(self.config['permission']['file'], 8)) + + elif os.path.isdir(f): + os.chmod(f, int(self.config['permission']['folder'], 8)) if self.config['permission']['change_dl'] and os.name != "nt": uid = getpwnam(self.config['permission']['user'])[2] gid = getgrnam(self.config['permission']['group'])[2] - chown(f, uid, gid) + os.chown(f, uid, gid) except Exception, e: self.logWarning(_("Setting User and Group failed"), e) diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 55d9b2e83..ddf0f8a85 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -1,5 +1,11 @@ # -*- coding: utf-8 -*- +import os + +from module.PyFile import PyFile +from module.utils import fs_encode + + class ArchiveError(Exception): pass @@ -8,29 +14,39 @@ class CRCError(Exception): pass -class WrongPassword(Exception): +class PasswordError(Exception): pass class Extractor: __name__ = "Extractor" - __version__ = "0.14" + __version__ = "0.15" __description__ = """Base extractor plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] + __authors__ = [("RaNaN", "ranan@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = [] - @staticmethod - def checkDeps(): + @classmethod + def isArchive(cls, filename): + name = os.path.basename(filename).lower() + return any(name.endswith(ext) for ext in cls.EXTENSIONS) + + + @classmethod + def checkDeps(cls): """ Check if system statisfy dependencies :return: boolean """ return True - @staticmethod - def getTargets(files_ids): + @classmethod + def getTargets(cls, files_ids): """ Filter suited targets from list of filename id tuple list :param files_ids: List of filepathes :return: List of targets, id tuple list @@ -38,24 +54,28 @@ class Extractor: raise NotImplementedError - def __init__(self, m, file, out, fullpath, overwrite, excludefiles, renice): - """Initialize extractor for specific file - - :param m: ExtractArchive Hook plugin - :param file: Absolute filepath - :param out: Absolute path to destination directory - :param fullpath: extract to fullpath - :param overwrite: Overwrite existing archives - :param renice: Renice value - """ - self.m = m - self.file = file - self.out = out - self.fullpath = fullpath - self.overwrite = overwrite - self.excludefiles = excludefiles - self.renice = renice - self.files = [] #: Store extracted files here + def __init__(self, manager, filename, out, + fullpath=True, + overwrite=False, + excludefiles=[], + renice=0, + delete=False, + keepbroken=False, + fid=None): + """ Initialize extractor for specific file """ + self.manager = manager + self.target = fs_encode(filename) + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.excludefiles = excludefiles + self.renice = renice + self.delete = delete + self.keepbroken = keepbroken + self.files = [] #: Store extracted files here + + pyfile = self.manager.core.files.getFile(fid) if fid else None + self.notifyProgress = lambda x: pyfile.setProgress(x) if pyfile else lambda x: None def init(self): @@ -83,12 +103,12 @@ class Extractor: return True - def extract(self, progress, password=None): + def extract(self, password=None): """Extract the archive. Raise specific errors in case of failure. :param progress: Progress function, call this to update status :param password password to use - :raises WrongPassword + :raises PasswordError :raises CRCError :raises ArchiveError :return: @@ -101,7 +121,7 @@ class Extractor: :return: List with paths of files to delete """ - raise NotImplementedError + return [self.target] def getExtractedFiles(self): diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py new file mode 100644 index 000000000..508cf9c8d --- /dev/null +++ b/module/plugins/internal/SevenZip.py @@ -0,0 +1,163 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from subprocess import Popen, PIPE + +from module.plugins.internal.UnRar import UnRar, renice +from module.utils import save_join + + +class SevenZip(UnRar): + __name__ = "SevenZip" + __version__ = "0.02" + + __description__ = """7-Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Michael Nowak", ""), + ("Walter Purcaro", "vuolter@gmail.com")] + + + CMD = "7z" + + EXTENSIONS = [".7z", ".xz", ".zip", ".gz", ".gzip", ".tgz", ".bz2", ".bzip2", + ".tbz2", ".tbz", ".tar", ".wim", ".swm", ".lzma", ".rar", ".cab", + ".arj", ".z", ".taz", ".cpio", ".rpm", ".deb", ".lzh", ".lha", + ".chm", ".chw", ".hxs", ".iso", ".msi", ".doc", ".xls", ".ppt", + ".dmg", ".xar", ".hfs", ".exe", ".ntfs", ".fat", ".vhd", ".mbr", + ".squashfs", ".cramfs", ".scap"] + + + #@NOTE: there are some more uncovered 7z formats + re_filelist = re.compile(r'([\d\:]+)\s+([\d\:]+)\s+([\w\.]+)\s+(\d+)\s+(\d+)\s+(.+)') + re_wrongpwd = re.compile(r'(Can not open encrypted archive|Wrong password)', re.I) + re_wrongcrc = re.compile(r'Encrypted\s+\=\s+\+', re.I) + + + @classmethod + def checkDeps(cls): + if os.name == "nt": + cls.CMD = os.path.join(pypath, "7z.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + else: + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + + return True + + + def checkArchive(self): + p = self.call_cmd("l", "-slt", self.target) + out, err = p.communicate() + + if p.returncode > 1: + raise ArchiveError("Process terminated") + + # check if output or error macthes the 'wrong password'-Regexp + if self.re_wrongpwd.search(out): + return True + + # check if output matches 'Encrypted = +' + if self.re_wrongcrc.search(out): + return True + + # check if archive is empty + self.files = self.list() + if not self.files: + raise ArchiveError("Empty Archive") + + return False + + + def checkPassword(self, password): + p = self.call_cmd("l", self.target, password=password) + p.communicate() + return p.returncode == 0 + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, '-o' + self.out, self.target, password=password) + + renice(p.pid, self.renice) + + progressstring = "" + while True: + c = p.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + self.notifyProgress(int(progressstring)) + progressstring = "" + # not reading a digit -> therefore restart + elif c not in digits: + progressstring = "" + # add digit to progressstring + else: + progressstring += c + + # retrieve stderr + err = p.stderr.read() + + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError + + elif err.strip(): #: raise error if anything is on stderr + raise ArchiveError(err.strip()) + + if p.returncode > 1: + raise ArchiveError("Process terminated") + + if not self.files: + self.files = self.list(password) + + + def list(self, password=None): + command = "l" if self.fullpath else "l" + + p = self.call_cmd(command, self.target, password=password) + out, err = p.communicate() + code = p.returncode + + if "Can not open" in err: + raise ArchiveError("Cannot open file") + + if code != 0: + raise ArchiveError("Process terminated unsuccessful") + + result = set() + for groups in self.re_filelist.findall(out): + f = groups[-1].strip() + result.add(save_join(self.out, f)) + + return list(result) + + + def call_cmd(self, command, *xargs, **kwargs): + args = [] + + #overwrite flag + if self.overwrite: + args.append("-y") + + #set a password + if "password" in kwargs and kwargs["password"]: + args.append("-p%s" % kwargs["password"]) + else: + args.append("-p-") + + #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.cmd, command] + args + list(xargs) + + self.manager.logDebug(" ".join([decode(arg) for arg in call])) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + return p diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 43592c3de..7f1b08caf 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -4,110 +4,92 @@ import os import re from glob import glob -from os.path import basename, join from string import digits from subprocess import Popen, PIPE -from module.plugins.internal.Extractor import Extractor, WrongPassword, ArchiveError, CRCError +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError from module.utils import save_join, decode def renice(pid, value): - if os.name != "nt" and value: + if value and os.name != "nt": try: Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) except Exception: - print "Renice failed" + pass class UnRar(Extractor): __name__ = "UnRar" - __version__ = "1.03" + __version__ = "1.04" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org")] + __authors__ = [("RaNaN", "RaNaN@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] CMD = "unrar" - # there are some more uncovered rar formats - re_version = re.compile(r'UNRAR 5[\d.]+') - re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) - re_partfiles = re.compile(r'.*\.(rar|r\d+)$', re.I) - re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+') - re_filelist5 = re.compile(r'(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') - re_wrongpwd = re.compile(r'Corrupt file or wrong password|password incorrect', re.I) + EXTENSIONS = [".rar", ".zip", ".cab", ".arj", ".lzh", ".tar", ".gz", ".bz2", + ".ace", ".uue", ".jar", ".iso", ".7z", ".xz", ".z"] + + #@NOTE: there are some more uncovered rar formats + re_rarpart1 = re.compile(r'\.part(\d+)\.rar$', re.I) + re_rarpart2 = re.compile(r'\.r(\d+)$', re.I) + + re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') + re_wrongpwd = re.compile(r'password', re.I) re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) - @staticmethod - def checkDeps(): + @classmethod + def checkDeps(cls): if os.name == "nt": - UnRar.CMD = join(pypath, "UnRAR.exe") - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + cls.CMD = os.path.join(pypath, "UnRAR.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() else: try: - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() - except OSError: - # fallback to rar - UnRar.CMD = "rar" - p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + except OSError: #: fallback to rar + cls.CMD = "rar" + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) p.communicate() return True - @staticmethod - def getTargets(files_ids): - result = [] + @classmethod + def getTargets(cls, files_ids): + targets = [] - for file, id in files_ids: - if not file.endswith(".rar"): + for filename, id in files_ids: + if not cls.isArchive(filename): continue - match = UnRar.re_splitfile.findall(file) - if match: - # only add first parts - if int(match[0][1]) == 1: - result.append((file, id)) - else: - result.append((file, id)) - - return result - + m = cls.re_rarpart1.match(filename) + if not m or int(m.group(1)) is 1: #@NOTE: only add first part file + targets.append((filename, id)) - def init(self): - self.passwordProtected = False - self.headerProtected = False #: list files will not work without password - self.smallestFile = None #: small file to test passwords - self.password = "" #: save the correct password + return targets def checkArchive(self): - p = self.call_unrar("l", "-v", self.file) + p = self.call_cmd("l", "-v", self.target) out, err = p.communicate() + if self.re_wrongpwd.search(err): - self.passwordProtected = True - self.headerProtected = True return True # output only used to check if passworded files are present - if self.re_version.search(out): - for attr, size, name in self.re_filelist5.findall(out): - if attr.startswith("*"): - self.passwordProtected = True - return True - else: - for name, size, packed in self.re_filelist.findall(out): - if name.startswith("*"): - self.passwordProtected = True - return True + for attr in self.re_filelist.findall(out): + if attr[0].startswith("*"): + return True - self.listContent() + self.files = self.list() if not self.files: raise ArchiveError("Empty Archive") @@ -116,22 +98,18 @@ class UnRar(Extractor): def checkPassword(self, password): # at this point we can only verify header protected files - if self.headerProtected: - p = self.call_unrar("l", "-v", self.file, password=password) - out, err = p.communicate() - if self.re_wrongpwd.search(err): - return False - - return True + p = self.call_cmd("l", "-v", self.target, password=password) + out, err = p.communicate() + return False if self.re_wrongpwd.search(err) else True - def extract(self, progress, password=None): + def extract(self, password=None): command = "x" if self.fullpath else "e" - p = self.call_unrar(command, self.file, self.out, password=password) + p = self.call_cmd(command, self.target, self.out, password=password) + renice(p.pid, self.renice) - progress(0) progressstring = "" while True: c = p.stdout.read(1) @@ -140,21 +118,20 @@ class UnRar(Extractor): break # reading a percentage sign -> set progress and restart if c == '%': - progress(int(progressstring)) + self.notifyProgress(int(progressstring)) progressstring = "" # not reading a digit -> therefore restart elif c not in digits: progressstring = "" # add digit to progressstring else: - progressstring = progressstring + c - progress(100) + progressstring += c # retrieve stderr err = p.stderr.read() if self.re_wrongpwd.search(err): - raise WrongPassword + raise PasswordError elif self.re_wrongcrc.search(err): raise CRCError @@ -162,50 +139,65 @@ class UnRar(Extractor): elif err.strip(): #: raise error if anything is on stderr raise ArchiveError(err.strip()) - if p.returncode: + if p.returncode != 0: raise ArchiveError("Process terminated") if not self.files: - self.password = password - self.listContent() + self.files = self.list(password) def getDeleteFiles(self): - if ".part" in basename(self.file): - return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) - # get files which matches .r* and filter unsuited files out - parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) - return filter(lambda x: self.re_partfiles.match(x), parts) + files = [] + + for i in [1, 2]: + try: + dir, name = os.path.split(self.target) + part = self.getattr(self, "re_rarpart%d" % i).match(name).group(1) + filename = os.path.join(dir, name.replace(part, '*', 1)) + files.extend(glob(filename)) + + except Exception: + continue + if self.target not in files: + files.insert(0, self.target) - def listContent(self): + return files + + + def list(self, password=None): command = "vb" if self.fullpath else "lb" - p = self.call_unrar(command, "-v", self.file, password=self.password) + + p = self.call_cmd(command, "-v", self.target, password=password) out, err = p.communicate() if "Cannot open" in err: raise ArchiveError("Cannot open file") if err.strip(): #: only log error at this point - self.m.logError(err.strip()) + self.manager.logError(err.strip()) result = set() - for f in decode(out).splitlines(): f = f.strip() result.add(save_join(self.out, f)) - self.files = result + return list(result) - def call_unrar(self, command, *xargs, **kwargs): + def call_cmd(self, command, *xargs, **kwargs): args = [] + # overwrite flag - args.append("-o+") if self.overwrite else args.append("-o-") + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") - if self.excludefiles: - for word in self.excludefiles.split(';'): - args.append("-x%s" % word) + for word in self.excludefiles: + args.append("-x%s" % word.strip()) # assume yes on all queries args.append("-y") @@ -216,10 +208,13 @@ class UnRar(Extractor): else: args.append("-p-") + if self.keepbroken: + args.append("-kb") + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) - self.m.logDebug(" ".join(call)) - p = Popen(call, stdout=PIPE, stderr=PIPE) + self.manager.logDebug(" ".join(call)) + p = Popen(call, stdout=PIPE, stderr=PIPE) return p diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 888ae7ebe..026503be5 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,51 +1,52 @@ # -*- coding: utf-8 -*- +from __future__ import with_statement + +import os import sys import zipfile -from module.plugins.internal.Extractor import Extractor, WrongPassword, ArchiveError +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError class UnZip(Extractor): __name__ = "UnZip" - __version__ = "1.02" + __version__ = "1.03" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org")] + __authors__ = [("RaNaN", "RaNaN@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com")] - @staticmethod - def checkDeps(): - return sys.version_info[:2] >= (2, 6) + EXTENSIONS = [".zip", ".zip64"] - @staticmethod - def getTargets(files_ids): - result = [] + @classmethod + def checkDeps(cls): + return sys.version_info[:2] >= (2, 6) - for file, id in files_ids: - if file.endswith(".zip"): - result.append((file, id)) - return result + @classmethod + def getTargets(cls, files_ids): + return [(filename, id) for filename, id in files_ids if cls.isArchive(filename)] - def extract(self, progress, password=None): + def extract(self, password=None): try: - z = zipfile.ZipFile(self.file) - self.files = z.namelist() - z.extractall(self.out, pwd=password) + with zipfile.ZipFile(self.target, 'r', allowZip64=True) as z: + z.setpassword(self.password) + if not z.testzip(): + z.extractall(self.out) + self.files = z.namelist() + else: + raise CRCError except (BadZipfile, LargeZipFile), e: raise ArchiveError(e) except RuntimeError, e: - if e is "Bad password for file": - raise WrongPassword + if "encrypted" in e: + raise PasswordError else: raise ArchiveError(e) - - - def getDeleteFiles(self): - return [self.file] |