summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-24 01:11:44 +0100
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-24 01:11:44 +0100
commit29df1397bbbe80eced4674b6fa39e16540c80901 (patch)
tree11bf4c6cbc8bb4f1efba88f9e81c5b830cefb721
parent[SkipRev] Typo (diff)
downloadpyload-29df1397bbbe80eced4674b6fa39e16540c80901.tar.xz
Extractor rewritten
-rw-r--r--module/plugins/hooks/ExtractArchive.py168
-rw-r--r--module/plugins/internal/UnRar.py179
-rw-r--r--module/plugins/internal/UnZip.py64
3 files changed, 272 insertions, 139 deletions
diff --git a/module/plugins/hooks/ExtractArchive.py b/module/plugins/hooks/ExtractArchive.py
index ddec8319b..16942bef0 100644
--- a/module/plugins/hooks/ExtractArchive.py
+++ b/module/plugins/hooks/ExtractArchive.py
@@ -51,32 +51,33 @@ if os.name != "nt":
from pwd import getpwnam
from module.plugins.Hook import Hook, threaded, Expose
-from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, WrongPassword
-from module.utils import save_join, fs_encode
+from module.plugins.internal.AbstractExtractor import ArchiveError, CRCError, PasswordError
+from module.utils import save_join, uniqify
class ExtractArchive(Hook):
__name__ = "ExtractArchive"
__type__ = "hook"
- __version__ = "0.20"
-
- __config__ = [("activated", "bool", "Activated", True),
- ("fullpath", "bool", "Extract full path", True),
- ("overwrite", "bool", "Overwrite files", True),
- ("passwordfile", "file", "password file", "archive_password.txt"),
- ("deletearchive", "bool", "Delete archives when done", False),
- ("subfolder", "bool", "Create subfolder for each package", False),
- ("destination", "folder", "Extract files to", ""),
- ("excludefiles", "str", "Exclude files from unpacking (seperated by ;)", ""),
- ("recursive", "bool", "Extract archives in archvies", True),
- ("queue", "bool", "Wait for all downloads to be finished", True),
- ("renice", "int", "CPU Priority", 0)]
+ __version__ = "1.00"
+
+ __config__ = [("activated" , "bool" , "Activated" , True ),
+ ("fullpath" , "bool" , "Extract full path" , True ),
+ ("overwrite" , "bool" , "Overwrite files" , False ),
+ ("keepbroken" , "bool" , "Extract broken archives" , False ),
+ ("repair" , "bool" , "Repair broken archives" , True ),
+ ("passwordfile" , "file" , "Store passwords in file" , "archive_password.txt" ),
+ ("delete" , "bool" , "Delete archive when successfully extracted", False ),
+ ("subfolder" , "bool" , "Create subfolder for each package" , False ),
+ ("destination" , "folder", "Extract files to" , "" ),
+ ("extensions" , "str" , "Extract the following extensions" , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"),
+ ("excludefiles" , "str" , "Don't extract the following files" , "*.nfo,*.DS_Store,index.dat,thumb.db" ),
+ ("recursive" , "bool" , "Extract archives in archives" , True ),
+ ("queue" , "bool" , "Wait for all downloads to be finished" , True ),
+ ("renice" , "int" , "CPU Priority" , 0 )]
__description__ = """Extract different kind of archives"""
__license__ = "GPLv3"
- __authors__ = [("RaNaN", "ranan@pyload.org"),
- ("AndroKev", None),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
event_list = ["allDownloadsProcessed"]
@@ -92,7 +93,7 @@ class ExtractArchive(Hook):
self.passwords = []
names = []
- for p in ("UnRar", "UnZip"):
+ for p in ("UnRar", "SevenZip", "UnZip"):
try:
module = self.core.pluginManager.loadModule("internal", p)
klass = getattr(module, p)
@@ -154,13 +155,21 @@ class ExtractArchive(Hook):
extracted = []
failed = []
+ clearlist = lambda string: [x.lstrip('.') for x in string.replace(' ', '').replace(',', '|').replace(';', '|').split('|')]
+
destination = self.getConfig("destination")
subfolder = self.getConfig("subfolder")
fullpath = self.getConfig("fullpath")
overwrite = self.getConfig("overwrite")
- excludefiles = self.getConfig("excludefiles")
+ extensions = clearlist(self.getConfig("extensions"))
+ excludefiles = clearlist(self.getConfig("excludefiles"))
renice = self.getConfig("renice")
recursive = self.getConfig("recursive")
+ delete = self.getConfig("delete")
+ keepbroken = self.getConfig("keepbroken")
+
+ if extensions:
+ self.logDebug("Extensions allowed: %s" % "|.".join(extensions))
# reload from txt file
self.reloadPasswords()
@@ -171,7 +180,7 @@ class ExtractArchive(Hook):
#iterate packages -> plugins -> targets
for pid in ids:
p = self.core.files.getPackage(pid)
- self.logInfo(_("Check package %s") % p.name)
+ self.logInfo(_("Check package: %s") % p.name)
if not p:
continue
@@ -179,21 +188,25 @@ class ExtractArchive(Hook):
out = save_join(dl, p.folder, destination, "") #: force trailing slash
if subfolder:
- out = save_join(out, fs_encode(p.folder))
+ out = save_join(out, p.folder)
if not exists(out):
makedirs(out)
files_ids = [(save_join(dl, p.folder, x['name']), x['id']) for x in p.getChildren().itervalues()]
- matched = False
- success = True
+ matched = False
+ success = True
# check as long there are unseen files
while files_ids:
new_files_ids = []
+ if extensions:
+ files_ids = [(file, id) for file, id in files_ids if filter(lambda ext: file.endswith(ext), extensions)]
+
for plugin in self.plugins:
targets = plugin.getTargets(files_ids)
+
if targets:
self.logDebug("Targets for %s: %s" % (plugin.__name__, targets))
matched = True
@@ -205,19 +218,31 @@ class ExtractArchive(Hook):
processed.append(target) # prevent extracting same file twice
- self.logInfo(basename(target), _("Extract to %s") % out)
+ self.logInfo(basename(target), _("Extract to: %s") % out)
try:
- klass = plugin(self, target, out, fullpath, overwrite, excludefiles, renice)
+ klass = plugin(self,
+ target,
+ out,
+ p.password,
+ fullpath,
+ overwrite,
+ excludefiles,
+ renice,
+ delete,
+ keepbroken)
klass.init()
- new_files = self._extract(klass, fid, [p.password.strip()], thread)
+ new_files = self._extract(klass, fid, thread)
except Exception, e:
self.logError(basename(target), e)
+ new_files = None
+
+ if new_files is None:
success = False
continue
- self.logDebug("Extracted", new_files)
+ self.logDebug("Extracted files: %s" % new_files)
self.setPermissions(new_files)
for file in new_files:
@@ -242,43 +267,78 @@ class ExtractArchive(Hook):
return True if not failed else False
- def _extract(self, plugin, fid, passwords, thread):
+ def _extract(self, plugin, fid, thread):
pyfile = self.core.files.getFile(fid)
- deletearchive = self.getConfig("deletearchive")
pyfile.setCustomStatus(_("extracting"))
thread.addActive(pyfile) # keep this file until everything is done
try:
- progress = lambda x: pyfile.setProgress(x)
- success = False
+ progress = lambda x: pyfile.setProgress(x)
+ encrypted = False
+ passwords = self.getPasswords()
+
+ try:
+ self.logInfo(basename(plugin.file), "Verifying...")
+
+ tmp_password = plugin.password
+ plugin.password = "" #: Force verifying without password
+
+ plugin.verify()
+
+ except PasswordError:
+ encrypted = True
+
+ except CRCError:
+ self.logWarning(basename(plugin.file), _("Archive damaged"))
+
+ if not self.getConfig("repair"):
+ raise CRCError
+
+ elif plugin.repair():
+ self.logInfo(basename(plugin.file), _("Successfully repaired"))
+
+ elif not self.getConfig("keepbroken"):
+ raise ArchiveError(_("Broken archive"))
+
+ else:
+ self.logInfo(basename(plugin.file), _("All OK"))
+
+ plugin.password = tmp_password
+
+ if not encrypted:
+ plugin.extract(progress)
- if not plugin.checkArchive():
- plugin.extract(progress, pw)
- success = True
else:
self.logInfo(basename(plugin.file), _("Password protected"))
- self.logDebug("Passwords: %s" % passwords if passwords else "No password provided")
- for pw in set(passwords) | set(self.getPasswords()):
+ if plugin.password:
+ passwords.insert(0, plugin.password)
+ passwords = uniqify(self.passwords)
+ self.logDebug("Password: %s" % plugin.password)
+ else:
+ self.logDebug("No package password provided")
+
+ for pw in passwords:
try:
self.logDebug("Try password: %s" % pw)
- if plugin.checkPassword(pw):
- plugin.extract(progress, pw)
+
+ if plugin.setPassword(pw):
+ plugin.extract(progress)
self.addPassword(pw)
- success = True
break
+ else:
+ raise PasswordError
- except WrongPassword:
+ except PasswordError:
self.logDebug("Password was wrong")
-
- if not success:
- raise Exception(_("Wrong password"))
+ else:
+ raise PasswordError
if self.core.debug:
- self.logDebug("Would delete", ", ".join(plugin.getDeleteFiles()))
+ self.logDebug("Would delete: %s" % ", ".join(plugin.getDeleteFiles()))
- if deletearchive:
+ if self.getConfig("delete"):
files = plugin.getDeleteFiles()
self.logInfo(_("Deleting %s files") % len(files))
for f in files:
@@ -294,12 +354,16 @@ class ExtractArchive(Hook):
return extracted_files
- except ArchiveError, e:
- self.logError(basename(plugin.file), _("Archive Error"), e)
+ except PasswordError:
+ self.logError(basename(plugin.file), _("Wrong password" if passwords else "No password found"))
+ plugin.password = ""
except CRCError:
self.logError(basename(plugin.file), _("CRC Mismatch"))
+ except ArchiveError, e:
+ self.logError(basename(plugin.file), _("Archive Error"), e)
+
except Exception, e:
if self.core.debug:
print_exc()
@@ -307,7 +371,7 @@ class ExtractArchive(Hook):
self.manager.dispatchEvent("archive_extract_failed", pyfile)
- raise Exception(_("Extract failed"))
+ self.logError(basename(plugin.file), _("Extract failed"))
@Expose
@@ -337,15 +401,13 @@ class ExtractArchive(Hook):
""" Adds a password to saved list"""
passwordfile = self.getConfig("passwordfile")
- if pw in self.passwords:
- self.passwords.remove(pw)
-
self.passwords.insert(0, pw)
+ self.passwords = uniqify(self.passwords)
try:
with open(passwordfile, "wb") as f:
for pw in self.passwords:
- f.write(pw + "\n")
+ f.write(pw + '\n')
except IOError, e:
self.logError(e)
diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py
index 4bbd2042c..5633b31f7 100644
--- a/module/plugins/internal/UnRar.py
+++ b/module/plugins/internal/UnRar.py
@@ -4,11 +4,11 @@ import os
import re
from glob import glob
-from os.path import basename, join
+from os.path import basename, dirname, join
from string import digits
from subprocess import Popen, PIPE
-from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError
+from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError
from module.utils import save_join, decode
@@ -22,21 +22,25 @@ def renice(pid, value):
class UnRar(AbtractExtractor):
__name__ = "UnRar"
- __version__ = "0.21"
+ __version__ = "1.00"
__description__ = """Rar extractor plugin"""
__license__ = "GPLv3"
- __authors__ = [("RaNaN", "RaNaN@pyload.org"),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
CMD = "unrar"
+ EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"]
+
+
#@NOTE: there are some more uncovered rar formats
- re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I)
- re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I)
+ re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I)
+ re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I)
+
re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)')
- re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I)
+ re_wrongpwd = re.compile(r'password', re.I)
+ re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I)
@classmethod
@@ -60,69 +64,99 @@ class UnRar(AbtractExtractor):
@classmethod
+ def isArchive(cls, file):
+ f = basename(file).lower()
+ return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS)
+
+
+ @classmethod
def getTargets(cls, files_ids):
- result = []
+ targets = []
for file, id in files_ids:
- if not file.endswith(".rar"):
+ if not cls.isArchive(file):
continue
- match = cls.re_splitfile.findall(file)
- if match:
+ m = cls.re_rarpart.findall(file)
+ if m:
# only add first parts
- if int(match[0][1]) == 1:
- result.append((file, id))
+ if int(m[0][1]) == 1:
+ targets.append((file, id))
else:
- result.append((file, id))
+ targets.append((file, id))
- return result
+ return targets
- def init(self):
- self.passwordProtected = False
- self.headerProtected = False #: list files will not work without password
- self.password = "" #: save the correct password
+ def check(self, out="", err=""):
+ if not out or not err:
+ return
+ if err.strip():
+ if self.re_wrongpwd.search(err):
+ raise PasswordError
- def checkArchive(self):
- p = self.call_unrar("l", "-v", self.file)
- out, err = p.communicate()
- if self.re_wrongpwd.search(err):
- self.passwordProtected = True
- self.headerProtected = True
- return True
+ elif self.re_wrongcrc.search(err):
+ raise CRCError
+
+ else: #: raise error if anything is on stderr
+ raise ArchiveError(err.strip())
# output only used to check if passworded files are present
for attr in self.re_filelist.findall(out):
if attr[0].startswith("*"):
- self.passwordProtected = True
- return True
+ raise PasswordError
+
- self.listContent()
- if not self.files:
- raise ArchiveError("Empty Archive")
+ def verify(self):
+ p = self.call_cmd("l", "-v", self.file, password=self.password)
+
+ self.check(*p.communicate())
+
+ if p and p.returncode:
+ raise ArchiveError("Process terminated")
+
+ if not self.list():
+ raise ArchiveError("Empty archive")
+
+
+ def isPassword(self, password):
+ if isinstance(password, basestring):
+ p = self.call_cmd("l", "-v", self.file, password=password)
+ out, err = p.communicate()
+
+ if not self.re_wrongpwd.search(err):
+ return True
return False
- def checkPassword(self, password):
- # at this point we can only verify header protected files
- if self.headerProtected:
- p = self.call_unrar("l", "-v", self.file, password=password)
+ def repair(self):
+ p = self.call_cmd("rc", self.file)
+ out, err = p.communicate()
+
+ if p.returncode or err.strip():
+ p = self.call_cmd("r", self.file)
out, err = p.communicate()
- if self.re_wrongpwd.search(err):
+
+ if p.returncode or err.strip():
return False
+ else:
+ self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0))
return True
- def extract(self, progress, password=""):
+ def extract(self, progress=lambda x: None):
+ self.verify()
+
+ progress(0)
+
command = "x" if self.fullpath else "e"
- p = self.call_unrar(command, self.file, self.out, password=password)
- renice(p.pid, self.renice)
+ p = self.call_cmd(command, self.file, self.out, password=self.password)
- progress(0)
+ renice(p.pid, self.renice)
progressstring = ""
while True:
@@ -131,7 +165,7 @@ class UnRar(AbtractExtractor):
if not c:
break
# reading a percentage sign -> set progress and restart
- if c == '%':
+ if c is '%':
progress(int(progressstring))
progressstring = ""
# not reading a digit -> therefore restart
@@ -139,46 +173,43 @@ class UnRar(AbtractExtractor):
progressstring = ""
# add digit to progressstring
else:
- progressstring = progressstring + c
+ progressstring += c
progress(100)
- # retrieve stderr
- err = p.stderr.read()
+ self.files = self.list()
- if "CRC failed" in err and not password and not self.passwordProtected:
- raise CRCError
- elif "CRC failed" in err:
- raise WrongPassword
+ # retrieve stderr
+ self.check(err=p.stderr.read())
- if err.strip(): #: raise error if anything is on stderr
- raise ArchiveError(err.strip())
if p.returncode:
raise ArchiveError("Process terminated")
- if not self.files:
- self.password = password
- self.listContent()
-
def getDeleteFiles(self):
if ".part" in basename(self.file):
return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I))
+
# get files which matches .r* and filter unsuited files out
parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I))
- return filter(lambda x: self.re_partfiles.match(x), parts)
+ return filter(lambda x: self.re_rarfile.match(x), parts)
- def listContent(self):
+
+ def list(self):
command = "vb" if self.fullpath else "lb"
- p = self.call_unrar(command, "-v", self.file, password=self.password)
+
+ p = self.call_cmd(command, "-v", self.file, password=self.password)
out, err = p.communicate()
- if "Cannot open" in err:
- raise ArchiveError("Cannot open file")
+ if err.strip():
+ self.m.logError(err)
+ if "Cannot open" in err:
+ return list()
- if err.strip(): #: only log error at this point
- self.m.logError(err.strip())
+ if p.returncode:
+ self.m.logError("Process terminated")
+ return list()
result = set()
@@ -186,17 +217,22 @@ class UnRar(AbtractExtractor):
f = f.strip()
result.add(save_join(self.out, f))
- self.files = result
+ return list(result)
- def call_unrar(self, command, *xargs, **kwargs):
+ def call_cmd(self, command, *xargs, **kwargs):
args = []
+
# overwrite flag
- args.append("-o+") if self.overwrite else args.append("-o-")
+ if self.overwrite:
+ args.append("-o+")
+ else:
+ args.append("-o-")
+ if self.delete:
+ args.append("-or")
- if self.excludefiles:
- for word in self.excludefiles.split(';'):
- args.append("-x%s" % word)
+ for word in self.excludefiles:
+ args.append("-x%s" % word.strip())
# assume yes on all queries
args.append("-y")
@@ -207,10 +243,11 @@ class UnRar(AbtractExtractor):
else:
args.append("-p-")
+ if self.keepbroken:
+ args.append("-kb")
+
# NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue
call = [self.CMD, command] + args + list(xargs)
self.m.logDebug(" ".join(call))
- p = Popen(call, stdout=PIPE, stderr=PIPE)
-
- return p
+ return Popen(call, stdout=PIPE, stderr=PIPE)
diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py
index 81c298784..b3d54cba0 100644
--- a/module/plugins/internal/UnZip.py
+++ b/module/plugins/internal/UnZip.py
@@ -1,19 +1,23 @@
# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
import sys
import zipfile
-from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError
+from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError
class UnZip(AbtractExtractor):
__name__ = "UnZip"
- __version__ = "0.12"
+ __version__ = "1.00"
__description__ = """Zip extractor plugin"""
__license__ = "GPLv3"
- __authors__ = [("RaNaN", "RaNaN@pyload.org"),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
+
+
+ EXTENSIONS = ["zip", "zip64"]
@classmethod
@@ -22,31 +26,61 @@ class UnZip(AbtractExtractor):
@classmethod
- def getTargets(cls, files_ids):
- result = []
+ def isArchive(cls, file):
+ return zipfile.is_zipfile(file)
- for file, id in files_ids:
- if file.endswith(".zip"):
- result.append((file, id))
- return result
+ def verify(self):
+ try:
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ z.setpassword(self.password)
+ badcrc = z.testzip()
+ except (BadZipfile, LargeZipFile), e:
+ raise ArchiveError(e)
- def extract(self, progress, password=""):
+ except RuntimeError, e:
+ if 'encrypted' in e:
+ raise PasswordError
+ else:
+ raise ArchiveError(e)
+
+ else:
+ if badcrc:
+ raise CRCError
+
+ if not self.list():
+ raise ArchiveError("Empty archive")
+
+
+ def list(self):
try:
- z = zipfile.ZipFile(self.file)
- self.files = z.namelist()
- z.extractall(self.out, pwd=password)
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ z.setpassword(self.password)
+ return z.namelist()
+ except Exception:
+ return list()
+
+
+ def extract(self, progress=lambda x: None):
+ try:
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ progress(0)
+ z.extractall(self.out, pwd=self.password)
+ progress(100)
except (BadZipfile, LargeZipFile), e:
raise ArchiveError(e)
except RuntimeError, e:
if e is "Bad password for file":
- raise WrongPassword
+ raise PasswordError
else:
raise ArchiveError(e)
+ finally:
+ self.files = self.list()
+
def getDeleteFiles(self):
return [self.file]