1 files changed, 681 insertions, 23 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py
index 3fed8a7c6..9db22d1d7 100644
--- a/module/plugins/internal/Hoster.py
+++ b/module/plugins/internal/Hoster.py
@@ -1,33 +1,691 @@
 # -*- coding: utf-8 -*-
 
-"""
-    This program is free software; you can redistribute it and/or modify
-    it under the terms of the GNU General Public License as published by
-    the Free Software Foundation; either version 3 of the License,
-    or (at your option) any later version.
+from __future__ import with_statement
 
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-    See the GNU General Public License for more details.
+import inspect
+import os
+import random
+import time
+import urlparse
 
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, see <http://www.gnu.org/licenses/>.
+if os.name != "nt":
+    import grp
+    import pwd
 
-    @author: mkaay
-"""
+from module.plugins.internal.Plugin import Plugin, Abort, Fail, Reconnect, Retry, Skip
+from module.utils import fs_decode, fs_encode, save_join as fs_join
 
-from module.plugins.internal.Plugin import Plugin
 
-def getInfo(self):
-        #result = [ .. (name, size, status, url) .. ]
-        return
+def getInfo(urls):
+    #result = [ .. (name, size, status, url) .. ]
+    pass
+
 
 class Hoster(Plugin):
-    __name__ = "Hoster"
-    __version__ = "0.02"
-    __pattern__ = None
-    __type__ = "hoster"
+    __name__    = "Hoster"
+    __type__    = "hoster"
+    __version__ = "0.03"
+
+    __pattern__ = r'^unmatchable$'
+    __config__  = []  #: [("name", "type", "desc", "default")]
+
     __description__ = """Base hoster plugin"""
-    __author_name__ = ("mkaay")
-    __author_mail__ = ("mkaay@mkaay.de")
+    __license__     = "GPLv3"
+    __authors__     = [("RaNaN"         , "RaNaN@pyload.org" ),
+                       ("spoob"         , "spoob@pyload.org" ),
+                       ("mkaay"         , "mkaay@mkaay.de"   ),
+                       ("Walter Purcaro", "vuolter@gmail.com")]
+
+
+    def __init__(self, pyfile):
+        super(Hoster, self).__init__(pyfile.m.core)
+
+        #: engage wan reconnection
+        self.wantReconnect = False
+
+        #: enable simultaneous processing of multiple downloads
+        self.multiDL = True
+        self.limitDL = 0
+
+        #: chunk limit
+        self.chunkLimit = 1
+        self.resumeDownload = False
+
+        #: time.time() + wait in seconds
+        self.waitUntil = 0
+        self.waiting = False
+
+        #: captcha reader instance
+        self.ocr = None
+
+        #: account handler instance, see :py:class:`Account`
+        self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__)
+
+        #: premium status
+        self.premium = False
+
+        #: username/login
+        self.user = None
+
+        if self.account and not self.account.canUse():
+            self.account = None
+
+        if self.account:
+            self.user, data = self.account.selectAccount()
+
+            #: Browser instance, see `network.Browser`
+            self.req = self.account.getAccountRequest(self.user)
+            self.chunkLimit = -1  #: chunk limit, -1 for unlimited
+
+            #: enables resume (will be ignored if server dont accept chunks)
+            self.resumeDownload = True
+
+            #: premium status
+            self.premium = self.account.isPremium(self.user)
+        else:
+            self.req = pyfile.m.core.requestFactory.getRequest(self.__name__)
+
+        #: associated pyfile instance, see `PyFile`
+        self.pyfile = pyfile
+
+        self.thread = None  #: holds thread in future
+
+        #: location where the last call to download was saved
+        self.lastDownload = ""
+
+        #: re match of the last call to `checkDownload`
+        self.lastCheck = None
+
+        #: js engine, see `JsEngine`
+        self.js = self.core.js
+
+        #: captcha task
+        self.cTask = None
+
+        #: some plugins store html code here
+        self.html = None
+
+        #: dict of the amount of retries already made
+        self.retries = {}
+
+        self.init()
+
+
+    def init(self):
+        """
+        Initialize the plugin (in addition to `__init__`)
+        """
+        pass
+
+
+    def setup(self):
+        """
+        Setup for enviroment and other things, called before downloading (possibly more than one time)
+        """
+        pass
+
+
+    def preprocessing(self, thread):
+        """
+        Handles important things to do before starting
+        """
+        self.thread = thread
+
+        if self.account:
+            self.account.checkLogin(self.user)
+        else:
+            self.req.clearCookies()
+
+        self.setup()
+
+        self.pyfile.setStatus("starting")
+
+        return self.process(self.pyfile)
+
+
+    def process(self, pyfile):
+        """
+        The 'main' method of every plugin, you **have to** overwrite it
+        """
+        raise NotImplementedError
+
+
+    def getChunkCount(self):
+        if self.chunkLimit <= 0:
+            return self.core.config.get("download", "chunks")
+        return min(self.core.config.get("download", "chunks"), self.chunkLimit)
+
+
+    def resetAccount(self):
+        """
+        Don't use account and retry download
+        """
+        self.account = None
+        self.req = self.core.requestFactory.getRequest(self.__name__)
+        self.retry()
+
+
+    def setReconnect(self, reconnect):
+        reconnect = bool(reconnect)
+        self.logDebug("Set wantReconnect to: %s (previous: %s)" % (reconnect, self.wantReconnect))
+        self.wantReconnect = reconnect
+
+
+    def setWait(self, seconds, reconnect=None):
+        """
+        Set a specific wait time later used with `wait`
+
+        :param seconds: wait time in seconds
+        :param reconnect: True if a reconnect would avoid wait time
+        """
+        wait_time  = int(seconds) + 1
+        wait_until = time.time() + wait_time
+
+        self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.pyfile.waitUntil),
+                      "Wait: %d seconds" % wait_time)
+
+        self.pyfile.waitUntil = wait_until
+
+        if reconnect is not None:
+            self.setReconnect(reconnect)
+
+
+    def wait(self, seconds=None, reconnect=None):
+        """
+        Waits the time previously set
+        """
+        pyfile = self.pyfile
+
+        if seconds is not None:
+            self.setWait(seconds)
+
+        if reconnect is not None:
+            self.setReconnect(reconnect)
+
+        self.waiting = True
+
+        status = pyfile.status
+        pyfile.setStatus("waiting")
+
+        self.logInfo(_("Wait: %d seconds") % (pyfile.waitUntil - time.time()),
+                     _("Reconnect: %s")    % self.wantReconnect)
+
+        if self.account:
+            self.logDebug("Ignore reconnection due account logged")
+
+            while pyfile.waitUntil > time.time():
+                if pyfile.abort:
+                    self.abort()
+
+                time.sleep(1)
+        else:
+            while pyfile.waitUntil > time.time():
+                self.thread.m.reconnecting.wait(2)
+
+                if pyfile.abort:
+                    self.abort()
+
+                if self.thread.m.reconnecting.isSet():
+                    self.waiting = False
+                    self.wantReconnect = False
+                    raise Reconnect
+
+                time.sleep(1)
+
+        self.waiting = False
+
+        pyfile.status = status
+
+
+    def skip(self, reason=""):
+        """
+        Skip and give reason
+        """
+        raise Skip(fs_encode(reason))
+
+
+    def abort(self, reason=""):
+        """
+        Abort and give reason
+        """
+        if reason:
+            self.pyfile.error = fs_encode(reason)
+        raise Abort
+
+
+    def offline(self, reason=""):
+        """
+        Fail and indicate file is offline
+        """
+        if reason:
+            self.pyfile.error = fs_encode(reason)
+        raise Fail("offline")
+
+
+    def tempOffline(self, reason=""):
+        """
+        Fail and indicates file ist temporary offline, the core may take consequences
+        """
+        if reason:
+            self.pyfile.error = fs_encode(reason)
+        raise Fail("temp. offline")
+
+
+    def retry(self, max_tries=5, wait_time=1, reason=""):
+        """
+        Retries and begin again from the beginning
+
+        :param max_tries: number of maximum retries
+        :param wait_time: time to wait in seconds
+        :param reason: reason for retrying, will be passed to fail if max_tries reached
+        """
+        id = inspect.currentframe().f_back.f_lineno
+        if id not in self.retries:
+            self.retries[id] = 0
+
+        if 0 < max_tries <= self.retries[id]:
+            self.fail(reason or _("Max retries reached"), "retry")
+
+        self.wait(wait_time, False)
+
+        self.retries[id] += 1
+        raise Retry(reason)
+
+
+    def invalidCaptcha(self):
+        self.logError(_("Invalid captcha"))
+        if self.cTask:
+            self.cTask.invalid()
+
+
+    def correctCaptcha(self):
+        self.logInfo(_("Correct captcha"))
+        if self.cTask:
+            self.cTask.correct()
+
+
+    def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False,
+                       imgtype='jpg', result_type='textual'):
+        """
+        Loads a captcha and decrypts it with ocr, plugin, user input
+
+        :param url: url of captcha image
+        :param get: get part for request
+        :param post: post part for request
+        :param cookies: True if cookies should be enabled
+        :param forceUser: if True, ocr is not used
+        :param imgtype: Type of the Image
+        :param result_type: 'textual' if text is written on the captcha\
+        or 'positional' for captcha where the user have to click\
+        on a specific region on the captcha
+
+        :return: result of decrypting
+        """
+
+        img = self.load(url, get=get, post=post, cookies=cookies)
+
+        id = ("%.2f" % time.time())[-6:].replace(".", "")
+
+        with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") as tmpCaptcha:
+            tmpCaptcha.write(img)
+
+        has_plugin = self.__name__ in self.core.pluginManager.ocrPlugins
+
+        if self.core.captcha:
+            Ocr = self.core.pluginManager.loadClass("ocr", self.__name__)
+        else:
+            Ocr = None
+
+        if Ocr and not forceUser:
+            time.sleep(random.randint(3000, 5000) / 1000.0)
+            if self.pyfile.abort:
+                self.abort()
+
+            ocr = Ocr()
+            result = ocr.get_captcha(tmpCaptcha.name)
+        else:
+            captchaManager = self.core.captchaManager
+            task = captchaManager.newTask(img, imgtype, tmpCaptcha.name, result_type)
+            self.cTask = task
+            captchaManager.handleCaptcha(task)
+
+            while task.isWaiting():
+                if self.pyfile.abort:
+                    captchaManager.removeTask(task)
+                    self.abort()
+                time.sleep(1)
+
+            captchaManager.removeTask(task)
+
+            if task.error and has_plugin:  #: ignore default error message since the user could use OCR
+                self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting"))
+            elif task.error:
+                self.fail(task.error)
+            elif not task.result:
+                self.fail(_("No captcha result obtained in appropiate time by any of the plugins"))
+
+            result = task.result
+            self.logDebug("Received captcha result: %s" % result)
+
+        if not self.core.debug:
+            try:
+                os.remove(tmpCaptcha.name)
+            except Exception:
+                pass
+
+        return result
+
+
+    def fixurl(self, url):
+        url_p   = urlparse.urlparse(self.pyfile.url)
+        baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+
+        url = super(Hoster, self).fixurl(url)
+
+        if not urlparse.urlparse(url).scheme:
+            url = urlparse.urljoin(baseurl, url)
+
+        return url
+
+
+    def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False):
+        """
+        Downloads the content at url to download folder
+
+        :param url:
+        :param get:
+        :param post:
+        :param ref:
+        :param cookies:
+        :param disposition: if True and server provides content-disposition header\
+        the filename will be changed if needed
+        :return: The location where the file was saved
+        """
+        if self.pyfile.abort:
+            self.abort()
+
+        url = self.fixurl(url)
+
+        if not url or not isinstance(url, basestring):
+            self.fail(_("No url given"))
+
+        if self.core.debug:
+            self.logDebug("Download url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")])
+
+        self.correctCaptcha()
+        self.checkForSameFiles()
+
+        self.pyfile.setStatus("downloading")
+
+        if disposition:
+            self.pyfile.name = urlparse.urlparse(url).path.split('/')[-1] or self.pyfile.name
+
+        download_folder = self.core.config.get("general", "download_folder")
+
+        location = fs_join(download_folder, self.pyfile.package().folder)
+
+        if not os.path.exists(location):
+            try:
+                os.makedirs(location, int(self.core.config.get("permission", "folder"), 8))
+
+                if self.core.config.get("permission", "change_dl") and os.name != "nt":
+                    uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2]
+                    gid = grp.getgrnam(self.core.config.get("permission", "group"))[2]
+                    os.chown(location, uid, gid)
+
+            except Exception, e:
+                self.fail(e)
+
+        # convert back to unicode
+        location = fs_decode(location)
+        name = safe_filename(self.pyfile.name)
+
+        filename = os.path.join(location, name)
+
+        self.core.addonManager.dispatchEvent("download-start", self.pyfile, url, filename)
+
+        try:
+            newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies,
+                                            chunks=self.getChunkCount(), resume=self.resumeDownload,
+                                            progressNotify=self.pyfile.setProgress, disposition=disposition)
+        finally:
+            self.pyfile.size = self.req.size
+
+        if newname:
+            newname = urlparse.urlparse(newname).path.split('/')[-1]
+
+            if disposition and newname != name:
+                self.logInfo(_("%(name)s saved as %(newname)s") % {"name": name, "newname": newname})
+                self.pyfile.name = newname
+                filename = os.path.join(location, newname)
+
+        fs_filename = fs_encode(filename)
+
+        if self.core.config.get("permission", "change_file"):
+            try:
+                os.chmod(fs_filename, int(self.core.config.get("permission", "file"), 8))
+            except Exception, e:
+                self.logWarning(_("Setting file mode failed"), e)
+
+        if self.core.config.get("permission", "change_dl") and os.name != "nt":
+            try:
+                uid = pwd.getpwnam(self.core.config.get("permission", "user"))[2]
+                gid = grp.getgrnam(self.core.config.get("permission", "group"))[2]
+                os.chown(fs_filename, uid, gid)
+
+            except Exception, e:
+                self.logWarning(_("Setting User and Group failed"), e)
+
+        self.lastDownload = filename
+        return self.lastDownload
+
+
+    def checkDownload(self, rules, delete=True, file_size=None, size_tolerance=1000, read_size=100000):
+        """
+        Checks the content of the last downloaded file, re match is saved to `lastCheck`
+
+        :param rules: dict with names and rules to match (compiled regexp or strings)
+        :param delete: delete if matched
+        :param file_size: expected file size
+        :param size_tolerance: size check tolerance
+        :param read_size: amount of bytes to read from files
+        :return: dictionary key of the first rule that matched
+        """
+        lastDownload = fs_encode(self.lastDownload)
+
+        if not self.lastDownload or not os.path.exists(lastDownload):
+            self.lastDownload = ""
+            self.fail(self.pyfile.error or _("No file downloaded"))
+
+        download_size = os.stat(lastDownload).st_size
+
+        if download_size < 1 or (file_size and abs(file_size - download_size) > size_tolerance):
+            if delete:
+                os.remove(lastDownload)
+            self.fail(_("Empty file"))
+
+        self.logDebug("Download Check triggered")
+
+        with open(lastDownload, "rb") as f:
+            content = f.read(read_size)
+
+        # produces encoding errors, better log to other file in the future?
+        # self.logDebug("Content: %s" % content)
+        for name, rule in rules.iteritems():
+            if isinstance(rule, basestring):
+                if rule in content:
+                    if delete:
+                        os.remove(lastDownload)
+                    return name
+
+            elif hasattr(rule, "search"):
+                m = rule.search(content)
+                if m:
+                    if delete:
+                        os.remove(lastDownload)
+                    self.lastCheck = m
+                    return name
+
+
+    def directLink(self, url, follow_location=None):
+        link = ""
+
+        if follow_location is None:
+            redirect = 1
+
+        elif type(follow_location) is int:
+            redirect = max(follow_location, 1)
+
+        else:
+            redirect = self.getConfig("maxredirs", plugin="UserAgentSwitcher")
+
+        for i in xrange(redirect):
+            try:
+                self.logDebug("Redirect #%d to: %s" % (i, url))
+                header = self.load(url, just_header=True)
+
+            except Exception:  #: Bad bad bad... rewrite this part in 0.4.10
+                req = pyreq.getHTTPRequest()
+                res = self.load(url, just_header=True)
+
+                req.close()
+
+                header = {"code": req.code}
+                for line in res.splitlines():
+                    line = line.strip()
+                    if not line or ":" not in line:
+                        continue
+
+                    key, none, value = line.partition(":")
+                    key              = key.lower().strip()
+                    value            = value.strip()
+
+                    if key in header:
+                        if type(header[key]) == list:
+                            header[key].append(value)
+                        else:
+                            header[key] = [header[key], value]
+                    else:
+                        header[key] = value
+
+            if 'content-disposition' in header:
+                link = url
+
+            elif 'location' in header and header['location']:
+                location = header['location']
+
+                if not urlparse.urlparse(location).scheme:
+                    url_p    = urlparse.urlparse(url)
+                    baseurl  = "%s://%s" % (url_p.scheme, url_p.netloc)
+                    location = urlparse.urljoin(baseurl, location)
+
+                if 'code' in header and header['code'] == 302:
+                    link = location
+
+                if follow_location:
+                    url = location
+                    continue
+
+            else:
+                extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1]
+
+                if 'content-type' in header and header['content-type']:
+                    mimetype = header['content-type'].split(';')[0].strip()
+
+                elif extension:
+                    mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream"
+
+                else:
+                    mimetype = ""
+
+                if mimetype and (link or 'html' not in mimetype):
+                    link = url
+                else:
+                    link = ""
+
+            break
+
+        else:
+            try:
+                self.logError(_("Too many redirects"))
+            except Exception:
+                pass
+
+        return link
+
+
+    def parseHtmlForm(self, attr_str="", input_names={}):
+        return parseHtmlForm(attr_str, self.html, input_names)
+
+
+    def checkTrafficLeft(self):
+        if not self.account:
+            return True
+
+        traffic = self.account.getAccountInfo(self.user, True)['trafficleft']
+
+        if traffic is None:
+            return False
+        elif traffic == -1:
+            return True
+        else:
+            size = self.pyfile.size / 1024
+            self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic))
+            return size <= traffic
+
+
+    def getPassword(self):
+        """
+        Get the password the user provided in the package
+        """
+        return self.pyfile.package().password or ""
+
+
+    def checkForSameFiles(self, starting=False):
+        """
+        Checks if same file was/is downloaded within same package
+
+        :param starting: indicates that the current download is going to start
+        :raises Skip:
+        """
+        pack = self.pyfile.package()
+
+        for pyfile in self.core.files.cache.values():
+            if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder:
+                if pyfile.status in (0, 12):  #: finished or downloading
+                    self.skip(pyfile.pluginname)
+                elif pyfile.status in (5, 7) and starting:  #: a download is waiting/starting and was appenrently started before
+                    self.skip(pyfile.pluginname)
+
+        download_folder = self.core.config.get("general", "download_folder")
+        location = fs_join(download_folder, pack.folder, self.pyfile.name)
+
+        if starting and self.core.config.get("download", "skip_existing") and os.path.exists(location):
+            size = os.stat(location).st_size
+            if size >= self.pyfile.size:
+                self.skip("File exists")
+
+        pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name)
+        if pyfile:
+            if os.path.exists(location):
+                self.skip(pyfile[0])
+
+            self.logDebug("File %s not skipped, because it does not exists." % self.pyfile.name)
+
+
+    def clean(self):
+        """
+        Clean everything and remove references
+        """
+        if hasattr(self, "pyfile"):
+            del self.pyfile
+
+        if hasattr(self, "req"):
+            self.req.close()
+            del self.req
+
+        if hasattr(self, "thread"):
+            del self.thread
+
+        if hasattr(self, "html"):
+            del self.html