diff options
Diffstat (limited to 'pyload/plugin/extractor')
-rw-r--r-- | pyload/plugin/extractor/SevenZip.py | 155 | ||||
-rw-r--r-- | pyload/plugin/extractor/UnRar.py | 248 | ||||
-rw-r--r-- | pyload/plugin/extractor/UnZip.py | 68 | ||||
-rw-r--r-- | pyload/plugin/extractor/__init__.py | 1 |
4 files changed, 472 insertions, 0 deletions
diff --git a/pyload/plugin/extractor/SevenZip.py b/pyload/plugin/extractor/SevenZip.py new file mode 100644 index 000000000..4b23c6ff2 --- /dev/null +++ b/pyload/plugin/extractor/SevenZip.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from subprocess import Popen, PIPE + +from pyload.plugin.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice +from pyload.utils import fs_encode, safe_join + + +class SevenZip(UnRar): + __name__ = "SevenZip" + __version__ = "0.08" + + __description__ = """7-Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Michael Nowak", ""), + ("Walter Purcaro", "vuolter@gmail.com")] + + + CMD = "7z" + VERSION = "" + + EXTENSIONS = [".7z", ".xz", ".zip", ".gz", ".gzip", ".tgz", ".bz2", ".bzip2", + ".tbz2", ".tbz", ".tar", ".wim", ".swm", ".lzma", ".rar", ".cab", + ".arj", ".z", ".taz", ".cpio", ".rpm", ".deb", ".lzh", ".lha", + ".chm", ".chw", ".hxs", ".iso", ".msi", ".doc", ".xls", ".ppt", + ".dmg", ".xar", ".hfs", ".exe", ".ntfs", ".fat", ".vhd", ".mbr", + ".squashfs", ".cramfs", ".scap"] + + + #@NOTE: there are some more uncovered 7z formats + re_filelist = re.compile(r'([\d\:]+)\s+([\d\:]+)\s+([\w\.]+)\s+(\d+)\s+(\d+)\s+(.+)') + re_wrongpwd = re.compile(r'(Can not open encrypted archive|Wrong password)', re.I) + re_wrongcrc = re.compile(r'Encrypted\s+\=\s+\+', re.I) + re_version = re.compile(r'7-Zip\s(?:\[64\]\s)?(\d+\.\d+)', re.I) + + + @classmethod + def isUsable(cls): + if os.name == "nt": + cls.CMD = os.path.join(pypath, "7z.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out,err = p.communicate() + else: + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + + cls.VERSION = cls.re_version.search(out).group(1) + + return True + + + def check(self): + file = fs_encode(self.filename) + + p = self.call_cmd("t", file) + out, err = p.communicate() + + if p.returncode > 1: + raise CRCError(err) + + p = self.call_cmd("l", "-slt", file) + out, err = p.communicate() + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + # check if output or error macthes the 'wrong password'-Regexp + if self.re_wrongpwd.search(out): + raise PasswordError + + # check if output matches 'Encrypted = +' + if self.re_wrongcrc.search(out): + raise CRCError(_("Header protected")) + + + def isPassword(self, password): + p = self.call_cmd("l", fs_encode(self.filename), password=password) + p.communicate() + return p.returncode == 0 + + + def repair(self): + return False + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, '-o' + self.out, fs_encode(self.filename), password=password) + + renice(p.pid, self.renice) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + self.files = self.list(password) + + + def list(self, password=None): + command = "l" if self.fullpath else "l" + + p = self.call_cmd(command, fs_encode(self.filename), password=password) + out, err = p.communicate() + + if "Can not open" in err: + raise ArchiveError(_("Cannot open file")) + + if p.returncode > 1: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + result = set() + for groups in self.re_filelist.findall(out): + f = groups[-1].strip() + result.add(safe_join(self.out, f)) + + return list(result) + + + def call_cmd(self, command, *xargs, **kwargs): + args = [] + + #overwrite flag + if self.overwrite: + args.append("-y") + + #set a password + if "password" in kwargs and kwargs["password"]: + args.append("-p'%s'" % kwargs["password"]) + else: + args.append("-p-") + + #@NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + + self.manager.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + return p diff --git a/pyload/plugin/extractor/UnRar.py b/pyload/plugin/extractor/UnRar.py new file mode 100644 index 000000000..8a3985678 --- /dev/null +++ b/pyload/plugin/extractor/UnRar.py @@ -0,0 +1,248 @@ +# -*- coding: utf-8 -*- + +import os +import re + +from glob import glob +from string import digits +from subprocess import Popen, PIPE + +from pyload.plugin.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from pyload.utils import decode, fs_encode, safe_join + + +def renice(pid, value): + if value and os.name != "nt": + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + + except Exception: + pass + + +class UnRar(Extractor): + __name__ = "UnRar" + __version__ = "1.13" + + __description__ = """Rar extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN", "RaNaN@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("Immenz", "immenz@gmx.net"),] + + + CMD = "unrar" + VERSION = "" + + EXTENSIONS = [".rar"] + + + re_multipart = re.compile(r'\.(part|r)(\d+)(?:\.rar)?',re.I) + + re_filefixed = re.compile(r'Building (.+)') + re_filelist = re.compile(r'^(.)(\s*[\w\.\-]+)\s+(\d+\s+)+(?:\d+\%\s+)?[\d\-]{8}\s+[\d\:]{5}', re.M|re.I) + + re_wrongpwd = re.compile(r'password', re.I) + re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I) + + re_version = re.compile(r'UNRAR\s(\d+\.\d+)', re.I) + + + @classmethod + def isUsable(cls): + if os.name == "nt": + cls.CMD = os.path.join(pypath, "UnRAR.exe") + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + else: + try: + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + + except OSError: #: fallback to rar + cls.CMD = "rar" + p = Popen([cls.CMD], stdout=PIPE, stderr=PIPE) + out, err = p.communicate() + + cls.VERSION = cls.re_version.search(out).group(1) + + return True + + + @classmethod + def isMultipart(cls,filename): + multipart = cls.re_multipart.search(filename) + if multipart: + # First Multipart file (part1.rar for *.part1-9.rar format or *.rar for .r1-9 format) handled as normal Archive + return False if (multipart.group(1) == "part" and int(multipart.group(2)) == 1) else True + + return False + + + def check(self): + p = self.call_cmd("l", "-v", fs_encode(self.filename)) + out, err = p.communicate() + + if self.re_wrongpwd.search(err): + raise PasswordError + + if self.re_wrongcrc.search(err): + raise CRCError(err) + + # output only used to check if passworded files are present + for attr in self.re_filelist.findall(out): + if attr[0].startswith("*"): + raise PasswordError + + + def isPassword(self, password): + # at this point we can only verify header protected files + p = self.call_cmd("l", "-v", fs_encode(self.filename), password=password) + out, err = p.communicate() + return False if self.re_wrongpwd.search(err) else True + + + def repair(self): + p = self.call_cmd("rc", fs_encode(self.filename)) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err or p.returncode: + p = self.call_cmd("r", fs_encode(self.filename)) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err or p.returncode: + return False + else: + dir = os.path.dirname(filename) + name = re_filefixed.search(out).group(1) + + self.filename = os.path.join(dir, name) + + return True + + + def _progress(self, process): + s = "" + while True: + c = process.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + self.notifyProgress(int(s)) + s = "" + # not reading a digit -> therefore restart + elif c not in digits: + s = "" + # add digit to progressstring + else: + s += c + + + def extract(self, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_cmd(command, fs_encode(self.filename), self.out, password=password) + + renice(p.pid, self.renice) + + # communicate and retrieve stderr + self._progress(p) + err = p.stderr.read().strip() + + if err: + if self.re_wrongpwd.search(err): + raise PasswordError + + elif self.re_wrongcrc.search(err): + raise CRCError(err) + + else: #: raise error if anything is on stderr + raise ArchiveError(err) + + if p.returncode: + raise ArchiveError(_("Process return code: %d") % p.returncode) + + self.files = self.list(password) + + + def getDeleteFiles(self): + dir, name = os.path.split(self.filename) + + # actually extracted file + files = [self.filename] + + # eventually Multipart Files + files.extend(safe_join(dir, os.path.basename(file)) for file in filter(self.isMultipart, os.listdir(dir)) + if re.sub(self.re_multipart,".rar",name) == re.sub(self.re_multipart,".rar",file)) + + return files + + + def list(self, password=None): + command = "vb" if self.fullpath else "lb" + + p = self.call_cmd(command, "-v", fs_encode(self.filename), password=password) + out, err = p.communicate() + + if "Cannot open" in err: + raise ArchiveError(_("Cannot open file")) + + if err.strip(): #: only log error at this point + self.manager.logError(err.strip()) + + result = set() + if not self.fullpath and self.VERSION.startswith('5'): + # NOTE: Unrar 5 always list full path + for f in decode(out).splitlines(): + f = safe_join(self.out, os.path.basename(f.strip())) + if os.path.isfile(f): + result.add(safe_join(self.out, os.path.basename(f))) + else: + for f in decode(out).splitlines(): + f = f.strip() + result.add(safe_join(self.out, f)) + + return list(result) + + + def call_cmd(self, command, *xargs, **kwargs): + args = [] + + # overwrite flag + if self.overwrite: + args.append("-o+") + else: + args.append("-o-") + if self.delete: + args.append("-or") + + for word in self.excludefiles: + args.append("-x'%s'" % word.strip()) + + # assume yes on all queries + args.append("-y") + + # set a password + if "password" in kwargs and kwargs['password']: + args.append("-p%s" % kwargs['password']) + else: + args.append("-p-") + + if self.keepbroken: + args.append("-kb") + + # NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + + self.manager.logDebug(" ".join(call)) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + return p diff --git a/pyload/plugin/extractor/UnZip.py b/pyload/plugin/extractor/UnZip.py new file mode 100644 index 000000000..cb6621f99 --- /dev/null +++ b/pyload/plugin/extractor/UnZip.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +from __future__ import with_statement + +import os +import sys +import zipfile + +from pyload.plugin.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from pyload.utils import fs_encode + + +class UnZip(Extractor): + __name__ = "UnZip" + __version__ = "1.10" + + __description__ = """Zip extractor plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + EXTENSIONS = [".zip", ".zip64"] + VERSION ="(python %s.%s.%s)" % (sys.version_info[0], sys.version_info[1], sys.version_info[2]) + + + @classmethod + def isUsable(cls): + return sys.version_info[:2] >= (2, 6) + + + def list(self, password=None): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + return z.namelist() + + + def check(self): + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + badfile = z.testzip() + + if badfile: + raise CRCError(badfile) + else: + raise PasswordError + + + def extract(self, password=None): + try: + with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z: + z.setpassword(password) + + badfile = z.testzip() + + if badfile: + raise CRCError(badfile) + else: + z.extractall(self.out) + + except (zipfile.BadZipfile, zipfile.LargeZipFile), e: + raise ArchiveError(e) + + except RuntimeError, e: + if "encrypted" in e: + raise PasswordError + else: + raise ArchiveError(e) + else: + self.files = z.namelist() diff --git a/pyload/plugin/extractor/__init__.py b/pyload/plugin/extractor/__init__.py new file mode 100644 index 000000000..40a96afc6 --- /dev/null +++ b/pyload/plugin/extractor/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- |