summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-24 01:11:44 +0100
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-12-24 01:11:44 +0100
commit29df1397bbbe80eced4674b6fa39e16540c80901 (patch)
tree11bf4c6cbc8bb4f1efba88f9e81c5b830cefb721 /module/plugins/internal
parent[SkipRev] Typo (diff)
downloadpyload-29df1397bbbe80eced4674b6fa39e16540c80901.tar.xz
Extractor rewritten
Diffstat (limited to 'module/plugins/internal')
-rw-r--r--module/plugins/internal/UnRar.py179
-rw-r--r--module/plugins/internal/UnZip.py64
2 files changed, 157 insertions, 86 deletions
diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py
index 4bbd2042c..5633b31f7 100644
--- a/module/plugins/internal/UnRar.py
+++ b/module/plugins/internal/UnRar.py
@@ -4,11 +4,11 @@ import os
import re
from glob import glob
-from os.path import basename, join
+from os.path import basename, dirname, join
from string import digits
from subprocess import Popen, PIPE
-from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError
+from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError
from module.utils import save_join, decode
@@ -22,21 +22,25 @@ def renice(pid, value):
class UnRar(AbtractExtractor):
__name__ = "UnRar"
- __version__ = "0.21"
+ __version__ = "1.00"
__description__ = """Rar extractor plugin"""
__license__ = "GPLv3"
- __authors__ = [("RaNaN", "RaNaN@pyload.org"),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
CMD = "unrar"
+ EXTENSIONS = ["rar", "zip", "cab", "arj", "lzh", "tar", "gz", "bz2", "ace", "uue", "jar", "iso", "7z", "xz", "z"]
+
+
#@NOTE: there are some more uncovered rar formats
- re_splitfile = re.compile(r'(.*)\.part(\d+)\.rar$', re.I)
- re_partfiles = re.compile(r'.*\.(rar|r\d+)', re.I)
+ re_rarpart = re.compile(r'(.*)\.part(\d+)\.rar$', re.I)
+ re_rarfile = re.compile(r'.*\.(rar|r\d+)$', re.I)
+
re_filelist = re.compile(r'(.+)\s+(\d+)\s+(\d+)\s+|(.+)\s+(\d+)\s+\d\d-\d\d-\d\d\s+\d\d:\d\d\s+(.+)')
- re_wrongpwd = re.compile(r'(Corrupt file or wrong password|password incorrect)', re.I)
+ re_wrongpwd = re.compile(r'password', re.I)
+ re_wrongcrc = re.compile(r'encrypted|damaged|CRC failed|checksum error', re.I)
@classmethod
@@ -60,69 +64,99 @@ class UnRar(AbtractExtractor):
@classmethod
+ def isArchive(cls, file):
+ f = basename(file).lower()
+ return any(f.endswith('.%s' % ext) for ext in cls.EXTENSIONS)
+
+
+ @classmethod
def getTargets(cls, files_ids):
- result = []
+ targets = []
for file, id in files_ids:
- if not file.endswith(".rar"):
+ if not cls.isArchive(file):
continue
- match = cls.re_splitfile.findall(file)
- if match:
+ m = cls.re_rarpart.findall(file)
+ if m:
# only add first parts
- if int(match[0][1]) == 1:
- result.append((file, id))
+ if int(m[0][1]) == 1:
+ targets.append((file, id))
else:
- result.append((file, id))
+ targets.append((file, id))
- return result
+ return targets
- def init(self):
- self.passwordProtected = False
- self.headerProtected = False #: list files will not work without password
- self.password = "" #: save the correct password
+ def check(self, out="", err=""):
+ if not out or not err:
+ return
+ if err.strip():
+ if self.re_wrongpwd.search(err):
+ raise PasswordError
- def checkArchive(self):
- p = self.call_unrar("l", "-v", self.file)
- out, err = p.communicate()
- if self.re_wrongpwd.search(err):
- self.passwordProtected = True
- self.headerProtected = True
- return True
+ elif self.re_wrongcrc.search(err):
+ raise CRCError
+
+ else: #: raise error if anything is on stderr
+ raise ArchiveError(err.strip())
# output only used to check if passworded files are present
for attr in self.re_filelist.findall(out):
if attr[0].startswith("*"):
- self.passwordProtected = True
- return True
+ raise PasswordError
+
- self.listContent()
- if not self.files:
- raise ArchiveError("Empty Archive")
+ def verify(self):
+ p = self.call_cmd("l", "-v", self.file, password=self.password)
+
+ self.check(*p.communicate())
+
+ if p and p.returncode:
+ raise ArchiveError("Process terminated")
+
+ if not self.list():
+ raise ArchiveError("Empty archive")
+
+
+ def isPassword(self, password):
+ if isinstance(password, basestring):
+ p = self.call_cmd("l", "-v", self.file, password=password)
+ out, err = p.communicate()
+
+ if not self.re_wrongpwd.search(err):
+ return True
return False
- def checkPassword(self, password):
- # at this point we can only verify header protected files
- if self.headerProtected:
- p = self.call_unrar("l", "-v", self.file, password=password)
+ def repair(self):
+ p = self.call_cmd("rc", self.file)
+ out, err = p.communicate()
+
+ if p.returncode or err.strip():
+ p = self.call_cmd("r", self.file)
out, err = p.communicate()
- if self.re_wrongpwd.search(err):
+
+ if p.returncode or err.strip():
return False
+ else:
+ self.file = join(dirname(self.file), re.search(r'(fixed|rebuild)\.%s' % basename(self.file), out).group(0))
return True
- def extract(self, progress, password=""):
+ def extract(self, progress=lambda x: None):
+ self.verify()
+
+ progress(0)
+
command = "x" if self.fullpath else "e"
- p = self.call_unrar(command, self.file, self.out, password=password)
- renice(p.pid, self.renice)
+ p = self.call_cmd(command, self.file, self.out, password=self.password)
- progress(0)
+ renice(p.pid, self.renice)
progressstring = ""
while True:
@@ -131,7 +165,7 @@ class UnRar(AbtractExtractor):
if not c:
break
# reading a percentage sign -> set progress and restart
- if c == '%':
+ if c is '%':
progress(int(progressstring))
progressstring = ""
# not reading a digit -> therefore restart
@@ -139,46 +173,43 @@ class UnRar(AbtractExtractor):
progressstring = ""
# add digit to progressstring
else:
- progressstring = progressstring + c
+ progressstring += c
progress(100)
- # retrieve stderr
- err = p.stderr.read()
+ self.files = self.list()
- if "CRC failed" in err and not password and not self.passwordProtected:
- raise CRCError
- elif "CRC failed" in err:
- raise WrongPassword
+ # retrieve stderr
+ self.check(err=p.stderr.read())
- if err.strip(): #: raise error if anything is on stderr
- raise ArchiveError(err.strip())
if p.returncode:
raise ArchiveError("Process terminated")
- if not self.files:
- self.password = password
- self.listContent()
-
def getDeleteFiles(self):
if ".part" in basename(self.file):
return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.I))
+
# get files which matches .r* and filter unsuited files out
parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.I))
- return filter(lambda x: self.re_partfiles.match(x), parts)
+ return filter(lambda x: self.re_rarfile.match(x), parts)
- def listContent(self):
+
+ def list(self):
command = "vb" if self.fullpath else "lb"
- p = self.call_unrar(command, "-v", self.file, password=self.password)
+
+ p = self.call_cmd(command, "-v", self.file, password=self.password)
out, err = p.communicate()
- if "Cannot open" in err:
- raise ArchiveError("Cannot open file")
+ if err.strip():
+ self.m.logError(err)
+ if "Cannot open" in err:
+ return list()
- if err.strip(): #: only log error at this point
- self.m.logError(err.strip())
+ if p.returncode:
+ self.m.logError("Process terminated")
+ return list()
result = set()
@@ -186,17 +217,22 @@ class UnRar(AbtractExtractor):
f = f.strip()
result.add(save_join(self.out, f))
- self.files = result
+ return list(result)
- def call_unrar(self, command, *xargs, **kwargs):
+ def call_cmd(self, command, *xargs, **kwargs):
args = []
+
# overwrite flag
- args.append("-o+") if self.overwrite else args.append("-o-")
+ if self.overwrite:
+ args.append("-o+")
+ else:
+ args.append("-o-")
+ if self.delete:
+ args.append("-or")
- if self.excludefiles:
- for word in self.excludefiles.split(';'):
- args.append("-x%s" % word)
+ for word in self.excludefiles:
+ args.append("-x%s" % word.strip())
# assume yes on all queries
args.append("-y")
@@ -207,10 +243,11 @@ class UnRar(AbtractExtractor):
else:
args.append("-p-")
+ if self.keepbroken:
+ args.append("-kb")
+
# NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue
call = [self.CMD, command] + args + list(xargs)
self.m.logDebug(" ".join(call))
- p = Popen(call, stdout=PIPE, stderr=PIPE)
-
- return p
+ return Popen(call, stdout=PIPE, stderr=PIPE)
diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py
index 81c298784..b3d54cba0 100644
--- a/module/plugins/internal/UnZip.py
+++ b/module/plugins/internal/UnZip.py
@@ -1,19 +1,23 @@
# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
import sys
import zipfile
-from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError
+from module.plugins.internal.AbstractExtractor import AbtractExtractor, PasswordError, ArchiveError, CRCError
class UnZip(AbtractExtractor):
__name__ = "UnZip"
- __version__ = "0.12"
+ __version__ = "1.00"
__description__ = """Zip extractor plugin"""
__license__ = "GPLv3"
- __authors__ = [("RaNaN", "RaNaN@pyload.org"),
- ("Walter Purcaro", "vuolter@gmail.com")]
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
+
+
+ EXTENSIONS = ["zip", "zip64"]
@classmethod
@@ -22,31 +26,61 @@ class UnZip(AbtractExtractor):
@classmethod
- def getTargets(cls, files_ids):
- result = []
+ def isArchive(cls, file):
+ return zipfile.is_zipfile(file)
- for file, id in files_ids:
- if file.endswith(".zip"):
- result.append((file, id))
- return result
+ def verify(self):
+ try:
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ z.setpassword(self.password)
+ badcrc = z.testzip()
+ except (BadZipfile, LargeZipFile), e:
+ raise ArchiveError(e)
- def extract(self, progress, password=""):
+ except RuntimeError, e:
+ if 'encrypted' in e:
+ raise PasswordError
+ else:
+ raise ArchiveError(e)
+
+ else:
+ if badcrc:
+ raise CRCError
+
+ if not self.list():
+ raise ArchiveError("Empty archive")
+
+
+ def list(self):
try:
- z = zipfile.ZipFile(self.file)
- self.files = z.namelist()
- z.extractall(self.out, pwd=password)
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ z.setpassword(self.password)
+ return z.namelist()
+ except Exception:
+ return list()
+
+
+ def extract(self, progress=lambda x: None):
+ try:
+ with zipfile.ZipFile(self.file, 'r', allowZip64=True) as z:
+ progress(0)
+ z.extractall(self.out, pwd=self.password)
+ progress(100)
except (BadZipfile, LargeZipFile), e:
raise ArchiveError(e)
except RuntimeError, e:
if e is "Bad password for file":
- raise WrongPassword
+ raise PasswordError
else:
raise ArchiveError(e)
+ finally:
+ self.files = self.list()
+
def getDeleteFiles(self):
return [self.file]