diff options
| author | 2015-01-29 23:13:54 +0100 | |
|---|---|---|
| committer | 2015-01-29 23:13:54 +0100 | |
| commit | 788a06132882300a22f6db3aa7ac3a6009d4d762 (patch) | |
| tree | a6df8ca7e7edab49552847dc33cc8c18ae6d2f08 | |
| parent | [RapidgatorNet] Fix typo (diff) | |
| download | pyload-788a06132882300a22f6db3aa7ac3a6009d4d762.tar.xz | |
Update Extractor (2)
| -rw-r--r-- | module/plugins/hooks/ExtractArchive.py | 159 | ||||
| -rw-r--r-- | module/plugins/internal/Extractor.py | 17 | ||||
| -rw-r--r-- | module/plugins/internal/SevenZip.py | 24 | ||||
| -rw-r--r-- | module/plugins/internal/UnRar.py | 64 | ||||
| -rw-r--r-- | module/plugins/internal/UnZip.py | 12 | 
5 files changed, 178 insertions, 98 deletions
| diff --git a/module/plugins/hooks/ExtractArchive.py b/module/plugins/hooks/ExtractArchive.py index 11427109b..f2bc11ec2 100644 --- a/module/plugins/hooks/ExtractArchive.py +++ b/module/plugins/hooks/ExtractArchive.py @@ -12,12 +12,14 @@ from traceback import print_exc  # http://bugs.python.org/issue6122 , http://bugs.python.org/issue1236 , http://bugs.python.org/issue1731717  if sys.version_info < (2, 7) and os.name != "nt":      import errno +      from subprocess import Popen      def _eintr_retry_call(func, *args):          while True:              try:                  return func(*args) +              except OSError, e:                  if e.errno == errno.EINTR:                      continue @@ -49,28 +51,64 @@ if os.name != "nt":  from module.plugins.Hook import Hook, threaded, Expose  from module.plugins.internal.Extractor import ArchiveError, CRCError, PasswordError -from module.utils import fs_decode, fs_encode, save_join, uniqify +from module.utils import fs_encode, save_join, uniqify + + +class ArchiveQueue(object): + +    def __init__(self, plugin, storage): +        self.plugin  = plugin +        self.storage = storage + + +    def get(self): +        return self.plugin.getStorage("ExtractArchive:%s" % storage, []) + + +    def set(self, value): +        return self.plugin.setStorage("ExtractArchive:%s" % storage, value) + + +    def clean(self): +        return self.set([]) + + +    def add(self, item): +        queue = self.get() +        if item not in queue: +            return self.set(queue + [item]) +        else: +            return True + + +    def remove(self, item): +        queue = self.get() +        queue.pop(item, None) +        return self.set(queue) +  class ExtractArchive(Hook):      __name__    = "ExtractArchive"      __type__    = "hook" -    __version__ = "1.11" - -    __config__ = [("activated"    , "bool"  , "Activated"                                 , True                                                                     ), -                  ("fullpath"     , "bool"  , "Extract full path"                         , True                                                                     ), -                  ("overwrite"    , "bool"  , "Overwrite files"                           , False                                                                    ), -                  ("keepbroken"   , "bool"  , "Extract broken archives"                   , False                                                                    ), -                  ("repair"       , "bool"  , "Repair broken archives"                    , True                                                                     ), -                  ("passwordfile" , "file"  , "Store passwords in file"                   , "archive_password.txt"                                                   ), -                  ("delete"       , "bool"  , "Delete archive when successfully extracted", False                                                                    ), -                  ("subfolder"    , "bool"  , "Create subfolder for each package"         , False                                                                    ), -                  ("destination"  , "folder", "Extract files to"                          , ""                                                                       ), -                  ("extensions"   , "str"   , "Extract the following extensions"          , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"), -                  ("excludefiles" , "str"   , "Don't extract the following files"         , "*.nfo,*.DS_Store,index.dat,thumb.db"                                    ), -                  ("recursive"    , "bool"  , "Extract archives in archives"              , True                                                                     ), -                  ("queue"        , "bool"  , "Wait for all downloads to be finished"     , False                                                                    ), -                  ("renice"       , "int"   , "CPU Priority"                              , 0                                                                        )] +    __version__ = "1.12" + +    __config__ = [("activated"       , "bool"  , "Activated"                                 , True                                                                     ), +                  ("fullpath"        , "bool"  , "Extract full path"                         , True                                                                     ), +                  ("overwrite"       , "bool"  , "Overwrite files"                           , False                                                                    ), +                  ("keepbroken"      , "bool"  , "Try to extract broken archives"            , False                                                                    ), +                  ("repair"          , "bool"  , "Repair broken archives"                    , False                                                                    ), +                  ("extractempty"    , "bool"  , "Extract empty archives"                    , True                                                                     ), +                  ("usepasswordfile" , "bool"  , "Use password file"                         , True                                                                     ), +                  ("passwordfile"    , "file"  , "Password file"                             , "archive_password.txt"                                                   ), +                  ("delete"          , "bool"  , "Delete archive when successfully extracted", False                                                                    ), +                  ("subfolder"       , "bool"  , "Create subfolder for each package"         , False                                                                    ), +                  ("destination"     , "folder", "Extract files to folder"                   , ""                                                                       ), +                  ("extensions"      , "str"   , "Extract the following extensions"          , "7z,bz2,bzip2,gz,gzip,lha,lzh,lzma,rar,tar,taz,tbz,tbz2,tgz,xar,xz,z,zip"), +                  ("excludefiles"    , "str"   , "Don't extract the following files"         , "*.nfo,*.DS_Store,index.dat,thumb.db"                                    ), +                  ("recursive"       , "bool"  , "Extract archives in archives"              , True                                                                     ), +                  ("queue"           , "bool"  , "Wait for all downloads to be finished"     , False                                                                    ), +                  ("renice"          , "int"   , "CPU priority"                              , 0                                                                        )]      __description__ = """Extract different kind of archives"""      __license__     = "GPLv3" @@ -90,6 +128,9 @@ class ExtractArchive(Hook):      def setup(self): +        self.queue  = ArchiveQueue(self, "Queue") +        self.failed = ArchiveQueue(self, "Failed") +          self.interval   = 300          self.extractors = []          self.passwords  = [] @@ -124,7 +165,7 @@ class ExtractArchive(Hook):      def periodical(self):          if not self.extracting: -            self.extractPackage(*self.getQueue()) +            self.extractPackage(*self.queue.get())      @Expose @@ -136,14 +177,14 @@ class ExtractArchive(Hook):      def packageFinished(self, pypack):          if self.extracting or self.getConfig("queue"):              self.logInfo(_("Package %s queued for later extracting") % pypack.name) -            self.addToQueue(pypack.id) +            self.queue.add(pypack.id)          else:              self.extractPackage(pypack.id)      @threaded      def allDownloadsProcessed(self): -        if self.extract(self.getQueue()):  #@NOTE: check only if all gone fine, no failed reporting for now +        if self.extract(self.queue.get()):  #@NOTE: check only if all gone fine, no failed reporting for now              self.manager.dispatchEvent("all_archives_extracted")          self.manager.dispatchEvent("all_archives_processed") @@ -180,11 +221,11 @@ class ExtractArchive(Hook):          for pid in ids:              pypack = self.core.files.getPackage(pid) -            self.logInfo(_("Check package: %s") % pypack.name) -              if not pypack:                  continue +            self.logInfo(_("Check package: %s") % pypack.name) +              # determine output folder              out = save_join(dl, pypack.folder, destination, "")  #: force trailing slash @@ -224,19 +265,19 @@ class ExtractArchive(Hook):                          try:                              self.extracting = True -                            klass = Extractor(self, -                                              filename, -                                              out, -                                              fullpath, -                                              overwrite, -                                              excludefiles, -                                              renice, -                                              delete, -                                              keepbroken, -                                              fid) -                            klass.init() +                            archive = Extractor(self, +                                                filename, +                                                out, +                                                fullpath, +                                                overwrite, +                                                excludefiles, +                                                renice, +                                                delete, +                                                keepbroken, +                                                fid) +                            archive.init() -                            new_files = self._extract(klass, fid, pypack.password) +                            new_files = self._extract(archive, fid, pypack.password)                          except Exception, e:                              self.logError(fname, e) @@ -250,6 +291,7 @@ class ExtractArchive(Hook):                              if not os.path.exists(file):                                  self.logDebug("New file %s does not exists" % file)                                  continue +                              if recursive and os.path.isfile(file):                                  new_files_ids.append((file, fid))  # append as new target @@ -262,43 +304,60 @@ class ExtractArchive(Hook):                  else:                      failed.append(pid)                      self.manager.dispatchEvent("package_extract_failed", pypack) + +                    self.failed.add(pid)              else:                  self.logInfo(_("No files found to extract"))              if not matched or not success and subfolder:                  try:                      os.rmdir(out) +                  except OSError:                      pass +            self.queue.remove(pid) +          self.extracting = False          return True if not failed else False      def _extract(self, archive, fid, password):          pyfile = self.core.files.getFile(fid) -        fname  = os.path.basename(fs_decode(archive.target)) +        fname  = os.path.basename(archive.filename)          pyfile.setCustomStatus(_("extracting"))          pyfile.setProgress(0)          try: -            success = False +            try: +                archive.check() -            if not archive.checkArchive(): -                archive.extract(password) -                success = True -            else: +            except CRCError: +                self.logInfo(fname, _("Header protected")) + +                if self.getConfig("repair"): +                    self.logWarning(fname, "Repairing...") +                    archive.repair() + +            except PasswordError):                  self.logInfo(fname, _("Password protected")) -                self.logDebug("Password: %s" % (password or "No provided")) +            except ArchiveError, e: +                if e != "Empty Archive" or not self.getConfig("extractempty"): +                    raise ArchiveError(e) + +            self.logDebug("Password: %s" % (password or "No provided")) + +            if not self.getConfig("usepasswordfile"): +                archive.extract(password) +            else:                  for pw in set(self.getPasswords(False) + [password]):                      try:                          self.logDebug("Try password: %s" % pw) -                        if archive.checkPassword(pw): +                        if archive.isPassword(pw):                              archive.extract(pw)                              self.addPassword(pw) -                            success = True                              break                      except PasswordError: @@ -316,8 +375,9 @@ class ExtractArchive(Hook):                  files = archive.getDeleteFiles()                  self.logInfo(_("Deleting %s files") % len(files))                  for f in files: -                    if os.path.exists(f): -                        os.remove(f) +                    file = fs_encode(f) +                    if os.path.exists(file): +                        os.remove(file)                      else:                          self.logDebug("%s does not exists" % f) @@ -350,15 +410,6 @@ class ExtractArchive(Hook):          raise Exception(_("Extract failed")) -    def getQueue(self): -        return self.getStorage("ExtractArchive", []) - - -    def addToQueue(self, item): -        queue = self.getQueue() -        return self.setStorage("ExtractArchive", queue + [item] if item not in queue else queue) - -      @Expose      def getPasswords(self, reload=True):          """ List of saved passwords """ diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index ddf0f8a85..3ea634ec8 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -3,7 +3,6 @@  import os  from module.PyFile import PyFile -from module.utils import fs_encode  class ArchiveError(Exception): @@ -20,7 +19,7 @@ class PasswordError(Exception):  class Extractor:      __name__    = "Extractor" -    __version__ = "0.15" +    __version__ = "0.16"      __description__ = """Base extractor plugin"""      __license__     = "GPLv3" @@ -64,7 +63,7 @@ class Extractor:                   fid=None):          """ Initialize extractor for specific file """          self.manager        = manager -        self.target         = fs_encode(filename) +        self.filename       = filename          self.out            = out          self.fullpath       = fullpath          self.overwrite      = overwrite @@ -83,17 +82,17 @@ class Extractor:          pass -    def checkArchive(self): +    def check(self):          """Check if password if needed. Raise ArchiveError if integrity is          questionable.          :return: boolean          :raises ArchiveError          """ -        return False +        raise PasswordError -    def checkPassword(self, password): +    def isPassword(self, password):          """ Check if the given password is/might be correct.          If it can not be decided at this point return true. @@ -103,6 +102,10 @@ class Extractor:          return True +    def repair(self): +        return False + +      def extract(self, password=None):          """Extract the archive. Raise specific errors in case of failure. @@ -121,7 +124,7 @@ class Extractor:          :return: List with paths of files to delete          """ -        return [self.target] +        return [self.filename]      def getExtractedFiles(self): diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py index 508cf9c8d..e808e0d88 100644 --- a/module/plugins/internal/SevenZip.py +++ b/module/plugins/internal/SevenZip.py @@ -5,13 +5,13 @@ import re  from subprocess import Popen, PIPE -from module.plugins.internal.UnRar import UnRar, renice -from module.utils import save_join +from module.plugins.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice +from module.utils import fs_encode, save_join  class SevenZip(UnRar):      __name__    = "SevenZip" -    __version__ = "0.02" +    __version__ = "0.03"      __description__ = """7-Zip extractor plugin"""      __license__     = "GPLv3" @@ -48,8 +48,8 @@ class SevenZip(UnRar):          return True -    def checkArchive(self): -        p = self.call_cmd("l", "-slt", self.target) +    def check(self): +        p = self.call_cmd("l", "-slt", fs_encode(self.filename))          out, err = p.communicate()          if p.returncode > 1: @@ -57,22 +57,20 @@ class SevenZip(UnRar):          # check if output or error macthes the 'wrong password'-Regexp          if self.re_wrongpwd.search(out): -            return True +            raise PasswordError          # check if output matches 'Encrypted = +'          if self.re_wrongcrc.search(out): -            return True +            raise CRCError          # check if archive is empty          self.files = self.list()          if not self.files:              raise ArchiveError("Empty Archive") -        return False - -    def checkPassword(self, password): -        p = self.call_cmd("l", self.target, password=password) +    def isPassword(self, password): +        p = self.call_cmd("l", fs_encode(self.filename), password=password)          p.communicate()          return p.returncode == 0 @@ -80,7 +78,7 @@ class SevenZip(UnRar):      def extract(self, password=None):          command = "x" if self.fullpath else "e" -        p = self.call_cmd(command, '-o' + self.out, self.target, password=password) +        p = self.call_cmd(command, '-o' + self.out, fs_encode(self.filename), password=password)          renice(p.pid, self.renice) @@ -123,7 +121,7 @@ class SevenZip(UnRar):      def list(self, password=None):          command = "l" if self.fullpath else "l" -        p = self.call_cmd(command, self.target, password=password) +        p = self.call_cmd(command, fs_encode(self.filename), password=password)          out, err = p.communicate()          code     = p.returncode diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 7f1b08caf..b8e2c3606 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -8,20 +8,21 @@ from string import digits  from subprocess import Popen, PIPE  from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError -from module.utils import save_join, decode +from module.utils import decode, fs_encode, save_join, uniqify  def renice(pid, value):      if value and os.name != "nt":          try:              Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) +          except Exception:              pass  class UnRar(Extractor):      __name__    = "UnRar" -    __version__ = "1.04" +    __version__ = "1.05"      __description__ = """Rar extractor plugin"""      __license__     = "GPLv3" @@ -71,42 +72,65 @@ class UnRar(Extractor):                  continue              m = cls.re_rarpart1.match(filename) -            if not m or int(m.group(1)) is 1:  #@NOTE: only add first part file +            if not m or int(m.group(1)) == 1:  #@NOTE: only add first part file                  targets.append((filename, id))          return targets -    def checkArchive(self): -        p = self.call_cmd("l", "-v", self.target) +    def check(self): +        p = self.call_cmd("l", "-v", fs_encode(self.filename))          out, err = p.communicate()          if self.re_wrongpwd.search(err): -            return True +            raise PasswordError + +        if self.re_wrongcrc.search(err): +            raise CRCError          # output only used to check if passworded files are present          for attr in self.re_filelist.findall(out):              if attr[0].startswith("*"): -                return True +                raise PasswordError          self.files = self.list()          if not self.files:              raise ArchiveError("Empty Archive") -        return False - -    def checkPassword(self, password): +    def isPassword(self, password):          # at this point we can only verify header protected files -        p = self.call_cmd("l", "-v", self.target, password=password) +        p = self.call_cmd("l", "-v", fs_encode(self.filename), password=password)          out, err = p.communicate()          return False if self.re_wrongpwd.search(err) else True +    def repair(self): +        p = self.call_cmd("rc", fs_encode(self.filename)) +        out, err = p.communicate() + +        if p.returncode or err.strip(): +            p = self.call_cmd("r", fs_encode(self.filename)) +            out, err = p.communicate() + +            if p.returncode or err.strip(): +                return False +            else: +                dir, name = os.path.split(filename) + +                if 'fixed' in out: +                    self.filename = os.path.join(dir, 'fixed.' + name) + +                elif 'rebuild' in out: +                    self.filename = os.path.join(dir, 'rebuild.' + name) + +        return True + +      def extract(self, password=None):          command = "x" if self.fullpath else "e" -        p = self.call_cmd(command, self.target, self.out, password=password) +        p = self.call_cmd(command, fs_encode(self.filename), self.out, password=password)          renice(p.pid, self.renice) @@ -139,7 +163,7 @@ class UnRar(Extractor):          elif err.strip():  #: raise error if anything is on stderr              raise ArchiveError(err.strip()) -        if p.returncode != 0: +        if p.returncode:              raise ArchiveError("Process terminated")          if not self.files: @@ -149,18 +173,18 @@ class UnRar(Extractor):      def getDeleteFiles(self):          files = [] -        for i in [1, 2]: +        for i in (1, 2):              try: -                dir, name = os.path.split(self.target) +                dir, name = os.path.split(self.filename)                  part      = self.getattr(self, "re_rarpart%d" % i).match(name).group(1) -                filename  = os.path.join(dir, name.replace(part, '*', 1)) -                files.extend(glob(filename)) +                file      = fs_encode(os.path.join(dir, name.replace(part, '*', 1))) +                files.extend(glob(file))              except Exception:                  continue -        if self.target not in files: -            files.insert(0, self.target) +        if self.filename not in files: +            files.insert(0, self.filename)          return files @@ -168,7 +192,7 @@ class UnRar(Extractor):      def list(self, password=None):          command = "vb" if self.fullpath else "lb" -        p = self.call_cmd(command, "-v", self.target, password=password) +        p = self.call_cmd(command, "-v", fs_encode(self.filename), password=password)          out, err = p.communicate()          if "Cannot open" in err: diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 026503be5..4f3f1ca32 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -7,16 +7,16 @@ import sys  import zipfile  from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError +from module.utils import fs_encode  class UnZip(Extractor):      __name__    = "UnZip" -    __version__ = "1.03" +    __version__ = "1.04"      __description__ = """Zip extractor plugin"""      __license__     = "GPLv3" -    __authors__     = [("RaNaN", "RaNaN@pyload.org"), -                       ("Walter Purcaro", "vuolter@gmail.com")] +    __authors__     = [("Walter Purcaro", "vuolter@gmail.com")]      EXTENSIONS = [".zip", ".zip64"] @@ -32,9 +32,13 @@ class UnZip(Extractor):          return [(filename, id) for filename, id in files_ids if cls.isArchive(filename)] +    def repair(self): +        return False + +      def extract(self, password=None):          try: -            with zipfile.ZipFile(self.target, 'r', allowZip64=True) as z: +            with zipfile.ZipFile(fs_encode(self.filename), 'r', allowZip64=True) as z:                  z.setpassword(self.password)                  if not z.testzip():                      z.extractall(self.out) | 
