From 29df1397bbbe80eced4674b6fa39e16540c80901 Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Wed, 24 Dec 2014 01:11:44 +0100 Subject: Extractor rewritten --- module/plugins/internal/UnRar.py | 179 +++++++++++++++++++++++---------------- module/plugins/internal/UnZip.py | 64 ++++++++++---- 2 files changed, 157 insertions(+), 86 deletions(-) (limited to 'module/plugins/internal') diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 4bbd2042c..5633b31f7 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -4,11 +4,11 @@ import os import re from glob import glob -from os.path import basename, join +from os.path import basename, dirname, join from string import digits from subprocess import Popen, PIPE -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError from module.utils import save_join, decode @@ -22,21 +22,25 @@ def renice(pid, value): class UnRar(AbtractExtractor): __name__ = "UnRar" - __version__ = "0.21" + __version__ = "1.00" __description__ = """Rar extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] CMD = "unrar" + EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"] + + #@NOTE: there are some more uncovered rar formats - re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) - re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I) + re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I) + re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I) + re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)') - re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I) + re_wrongpwd = re.compile(r'password', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) @classmethod @@ -59,70 +63,100 @@ class UnRar(AbtractExtractor): return True + @classmethod + def isArchive(cls, file): + f = basename(file).lower() + return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS) + + @classmethod def getTargets(cls, files_ids): - result = [] + targets = [] for file, id in files_ids: - if not file.endswith(".rar"): + if not cls.isArchive(file): continue - match = cls.re_splitfile.findall(file) - if match: + m = cls.re_rarpart.findall(file) + if m: # only add first parts - if int(match[0][1]) == 1: - result.append((file, id)) + if int(m[0][1]) == 1: + targets.append((file, id)) else: - result.append((file, id)) + targets.append((file, id)) - return result + return targets - def init(self): - self.passwordProtected = False - self.headerProtected = False #: list files will not work without password - self.password = "" #: save the correct password + def check(self, out="", err=""): + if not out or not err: + return + if err.strip(): + if self.re_wrongpwd.search(err): + raise PasswordError - def checkArchive(self): - p = self.call_unrar("l", "-v", self.file) - out, err = p.communicate() - if self.re_wrongpwd.search(err): - self.passwordProtected = True - self.headerProtected = True - return True + elif self.re_wrongcrc.search(err): + raise CRCError + + else: #: raise error if anything is on stderr + raise ArchiveError(err.strip()) # output only used to check if passworded files are present for attr in self.re_filelist.findall(out): if attr[0].startswith("*"): - self.passwordProtected = True - return True + raise PasswordError + - self.listContent() - if not self.files: - raise ArchiveError("Empty Archive") + def verify(self): + p = self.call_cmd("l", "-v", self.file, password=self.password) + + self.check(*p.communicate()) + + if p and p.returncode: + raise ArchiveError("Process terminated") + + if not self.list(): + raise ArchiveError("Empty archive") + + + def isPassword(self, password): + if isinstance(password, basestring): + p = self.call_cmd("l", "-v", self.file, password=password) + out, err = p.communicate() + + if not self.re_wrongpwd.search(err): + return True return False - def checkPassword(self, password): - # at this point we can only verify header protected files - if self.headerProtected: - p = self.call_unrar("l", "-v", self.file, password=password) + def repair(self): + p = self.call_cmd("rc", self.file) + out, err = p.communicate() + + if p.returncode or err.strip(): + p = self.call_cmd("r", self.file) out, err = p.communicate() - if self.re_wrongpwd.search(err): + + if p.returncode or err.strip(): return False + else: + self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0)) return True - def extract(self, progress, password=""): + def extract(self, progress=lambda x: None): + self.verify() + + progress(0) + command = "x" if self.fullpath else "e" - p = self.call_unrar(command, self.file, self.out, password=password) - renice(p.pid, self.renice) + p = self.call_cmd(command, self.file, self.out, password=self.password) - progress(0) + renice(p.pid, self.renice) progressstring = "" while True: @@ -131,7 +165,7 @@ class UnRar(AbtractExtractor): if not c: break # reading a percentage sign -> set progress and restart - if c == '%': + if c is '%': progress(int(progressstring)) progressstring = "" # not reading a digit -> therefore restart @@ -139,46 +173,43 @@ class UnRar(AbtractExtractor): progressstring = "" # add digit to progressstring else: - progressstring = progressstring + c + progressstring += c progress(100) - # retrieve stderr - err = p.stderr.read() + self.files = self.list() - if "CRC failed" in err and not password and not self.passwordProtected: - raise CRCError - elif "CRC failed" in err: - raise WrongPassword + # retrieve stderr + self.check(err=p.stderr.read()) - if err.strip(): #: raise error if anything is on stderr - raise ArchiveError(err.strip()) if p.returncode: raise ArchiveError("Process terminated") - if not self.files: - self.password = password - self.listContent() - def getDeleteFiles(self): if ".part" in basename(self.file): return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I)) + # get files which matches .r* and filter unsuited files out parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I)) - return filter(lambda x: self.re_partfiles.match(x), parts) + return filter(lambda x: self.re_rarfile.match(x), parts) - def listContent(self): + + def list(self): command = "vb" if self.fullpath else "lb" - p = self.call_unrar(command, "-v", self.file, password=self.password) + + p = self.call_cmd(command, "-v", self.file, password=self.password) out, err = p.communicate() - if "Cannot open" in err: - raise ArchiveError("Cannot open file") + if err.strip(): + self.m.logError(err) + if "Cannot open" in err: + return list() - if err.strip(): #: only log error at this point - self.m.logError(err.strip()) + if p.returncode: + self.m.logError("Process terminated") + return list() result = set() @@ -186,17 +217,22 @@ class UnRar(AbtractExtractor): f = f.strip() result.add(save_join(self.out, f)) - self.files = result + return list(result) - def call_unrar(self, command, *xargs, **kwargs): + def call_cmd(self, command, *xargs, **kwargs): args = [] + # overwrite flag - args.append("-o+") if self.overwrite else args.append("-o-") + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") - if self.excludefiles: - for word in self.excludefiles.split(';'): - args.append("-x%s" % word) + for word in self.excludefiles: + args.append("-x%s" % word.strip()) # assume yes on all queries args.append("-y") @@ -207,10 +243,11 @@ class UnRar(AbtractExtractor): else: args.append("-p-") + if self.keepbroken: + args.append("-kb") + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue call = [self.CMD, command] + args + list(xargs) self.m.logDebug(" ".join(call)) - p = Popen(call, stdout=PIPE, stderr=PIPE) - - return p + return Popen(call, stdout=PIPE, stderr=PIPE) diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 81c298784..b3d54cba0 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -1,19 +1,23 @@ # -*- coding: utf-8 -*- +from __future__ import with_statement + import sys import zipfile -from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError +from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError class UnZip(AbtractExtractor): __name__ = "UnZip" - __version__ = "0.12" + __version__ = "1.00" __description__ = """Zip extractor plugin""" __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = ["zip", "zip64"] @classmethod @@ -22,31 +26,61 @@ class UnZip(AbtractExtractor): @classmethod - def getTargets(cls, files_ids): - result = [] + def isArchive(cls, file): + return zipfile.is_zipfile(file) - for file, id in files_ids: - if file.endswith(".zip"): - result.append((file, id)) - return result + def verify(self): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + badcrc = z.testzip() + except (BadZipfile, LargeZipFile), e: + raise ArchiveError(e) - def extract(self, progress, password=""): + except RuntimeError, e: + if 'encrypted' in e: + raise PasswordError + else: + raise ArchiveError(e) + + else: + if badcrc: + raise CRCError + + if not self.list(): + raise ArchiveError("Empty archive") + + + def list(self): try: - z = zipfile.ZipFile(self.file) - self.files = z.namelist() - z.extractall(self.out, pwd=password) + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + z.setpassword(self.password) + return z.namelist() + except Exception: + return list() + + + def extract(self, progress=lambda x: None): + try: + with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z: + progress(0) + z.extractall(self.out, pwd=self.password) + progress(100) except (BadZipfile, LargeZipFile), e: raise ArchiveError(e) except RuntimeError, e: if e is "Bad password for file": - raise WrongPassword + raise PasswordError else: raise ArchiveError(e) + finally: + self.files = self.list() + def getDeleteFiles(self): return [self.file] -- cgit v1.2.3