1 files changed, 648 insertions, 0 deletions
diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py
new file mode 100644
index 000000000..a0cdb1e2e
--- /dev/null
+++ b/module/plugins/internal/Hoster.py
@@ -0,0 +1,648 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import with_statement
+
+import inspect
+import os
+import random
+import time
+import traceback
+import urlparse
+
+from module.plugins.internal.Captcha import Captcha
+from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip,
+                                            chunks, encode, exists, fixurl as _fixurl, replace_patterns,
+                                            seconds_to_midnight, set_cookie, set_cookies, parse_html_form,
+                                            parse_html_tag_attr_value, timestamp)
+from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename
+
+
+#@TODO: Remove in 0.4.10
+def parse_fileInfo(klass, url="", html=""):
+    info = klass.get_info(url, html)
+    return info['name'], info['size'], info['status'], info['url']
+
+
+#@TODO: Remove in 0.4.10
+def getInfo(urls):
+    #: result = [ .. (name, size, status, url) .. ]
+    pass
+
+
+#@TODO: Remove in 0.4.10
+def create_getInfo(klass):
+    def get_info(urls):
+        for url in urls:
+            if hasattr(klass, "URL_REPLACEMENTS"):
+                url = replace_patterns(url, klass.URL_REPLACEMENTS)
+            yield parse_fileInfo(klass, url)
+
+    return get_info
+
+
+class Hoster(Plugin):
+    __name__    = "Hoster"
+    __type__    = "hoster"
+    __version__ = "0.19"
+    __status__  = "testing"
+
+    __pattern__ = r'^unmatchable$'
+    __config__  = []  #: [("name", "type", "desc", "default")]
+
+    __description__ = """Base hoster plugin"""
+    __license__     = "GPLv3"
+    __authors__     = [("RaNaN"         , "RaNaN@pyload.org" ),
+                       ("spoob"         , "spoob@pyload.org" ),
+                       ("mkaay"         , "mkaay@mkaay.de"   ),
+                       ("Walter Purcaro", "vuolter@gmail.com")]
+
+
+    def __init__(self, pyfile):
+        self._init(pyfile.m.core)
+
+        #: Engage wan reconnection
+        self.wantReconnect = False  #@TODO: Change to `want_reconnect` in 0.4.10
+
+        #: Enable simultaneous processing of multiple downloads
+        self.multiDL = True  #@TODO: Change to `multi_dl` in 0.4.10
+        self.limitDL = 0     #@TODO: Change to `limit_dl` in 0.4.10
+
+        #: time.time() + wait in seconds
+        self.wait_until = 0
+        self.waiting    = False
+
+        #: Account handler instance, see :py:class:`Account`
+        self.account = None
+        self.user    = None
+        self.req     = None  #: Browser instance, see `network.Browser`
+
+        #: Associated pyfile instance, see `PyFile`
+        self.pyfile = pyfile
+
+        self.thread = None  #: Holds thread in future
+
+        #: Location where the last call to download was saved
+        self.last_download = ""
+
+        #: Re match of the last call to `checkDownload`
+        self.last_check = None
+
+        #: Js engine, see `JsEngine`
+        self.js = self.pyload.js
+
+        #: Captcha stuff
+        self.captcha = Captcha(self)
+
+        #: Some plugins store html code here
+        self.html = None
+
+        #: Dict of the amount of retries already made
+        self.retries    = {}
+        self.retry_free = False  #@TODO: Recheck in 0.4.10
+
+        self._setup()
+        self.init()
+
+
+    @classmethod
+    def get_info(cls, url="", html=""):
+        url   = _fixurl(url)
+        url_p = urlparse.urlparse(url)
+        return {'name'  : (url_p.path.split('/')[-1] or
+                            url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or
+                                url_p.netloc.split('.', 1)[0]),
+                'size'  : 0,
+                'status': 3 if url else 8,
+                'url'   : url}
+
+
+    def init(self):
+        """
+        Initialize the plugin (in addition to `__init__`)
+        """
+        pass
+
+
+    def setup(self):
+        """
+        Setup for enviroment and other things, called before downloading (possibly more than one time)
+        """
+        pass
+
+
+    def _setup(self):
+        if self.account:
+            self.req             = self.pyload.requestFactory.getRequest(self.__name__, self.user)
+            self.chunk_limit     = -1  #: -1 for unlimited
+            self.resume_download = True
+            self.premium         = self.account.is_premium(self.user)
+        else:
+            self.req             = self.pyload.requestFactory.getRequest(self.__name__)
+            self.chunk_limit     = 1
+            self.resume_download = False
+            self.premium         = False
+
+
+    def load_account(self):
+        if self.req:
+            self.req.close()
+
+        if not self.account:
+            self.account = self.pyload.accountManager.getAccountPlugin(self.__name__)
+
+        if self.account:
+            if not self.user:
+                self.user = self.account.select()[0]
+
+            if not self.user or not self.account.is_logged(self.user, True):
+                self.account = False
+
+
+    def preprocessing(self, thread):
+        """
+        Handles important things to do before starting
+        """
+        self.thread = thread
+
+        if self.retry_free:
+            self.account = False
+        else:
+            self.load_account()  #@TODO: Move to PluginThread in 0.4.10
+            self.retry_free = False
+
+        self._setup()
+        self.setup()
+
+        self.pyload.hookManager.downloadPreparing(self.pyfile)  #@TODO: Recheck in 0.4.10
+
+        if self.pyfile.abort:
+            self.abort()
+
+        self.pyfile.setStatus("starting")
+        self.log_debug("PROCESS URL " + self.pyfile.url, "PLUGIN VERSION %s" % self.__version__)
+
+        return self.process(self.pyfile)
+
+
+    def process(self, pyfile):
+        """
+        The 'main' method of every plugin, you **have to** overwrite it
+        """
+        raise NotImplementedError
+
+
+    def set_reconnect(self, reconnect):
+        reconnect = bool(reconnect)
+
+        self.log_info(_("RECONNECT ") + ("enabled" if reconnect else "disabled"))
+        self.log_debug("Previous wantReconnect: %s" % self.wantReconnect)
+
+        self.wantReconnect = reconnect
+
+
+    def set_wait(self, seconds, reconnect=None):
+        """
+        Set a specific wait time later used with `wait`
+
+        :param seconds: wait time in seconds
+        :param reconnect: True if a reconnect would avoid wait time
+        """
+        wait_time  = max(int(seconds), 1)
+        wait_until = time.time() + wait_time + 1
+
+        self.log_info(_("WAIT %d seconds") % wait_time)
+        self.log_debug("Previous waitUntil: %f" % self.pyfile.waitUntil)
+
+        self.pyfile.waitUntil = wait_until
+
+        if reconnect is not None:
+            self.set_reconnect(reconnect)
+
+
+    def wait(self, seconds=None, reconnect=None):
+        """
+        Waits the time previously set
+        """
+        pyfile = self.pyfile
+
+        if seconds is not None:
+            self.set_wait(seconds)
+
+        if reconnect is not None:
+            self.set_reconnect(reconnect)
+
+        self.waiting = True
+
+        status = pyfile.status  #@NOTE: Remove in 0.4.10
+        pyfile.setStatus("waiting")
+
+        if not self.wantReconnect or self.account:
+            if self.account:
+                self.log_warning("Ignore reconnection due logged account")
+
+            while pyfile.waitUntil > time.time():
+                if pyfile.abort:
+                    self.abort()
+
+                time.sleep(2)
+
+        else:
+            while pyfile.waitUntil > time.time():
+                if pyfile.abort:
+                    self.abort()
+
+                if self.thread.m.reconnecting.isSet():
+                    self.waiting = False
+                    self.wantReconnect = False
+                    raise Reconnect
+
+                self.thread.m.reconnecting.wait(2)
+                time.sleep(2)
+
+        self.waiting = False
+        pyfile.status = status  #@NOTE: Remove in 0.4.10
+
+
+    def skip(self, reason=""):
+        """
+        Skip and give reason
+        """
+        raise Skip(encode(reason))  #@TODO: Remove `encode` in 0.4.10
+
+
+    def abort(self, reason=""):
+        """
+        Abort and give reason
+        """
+        #@TODO: Remove in 0.4.10
+        if reason:
+            self.pyfile.error = encode(reason)
+
+        raise Abort
+
+
+    def offline(self, reason=""):
+        """
+        Fail and indicate file is offline
+        """
+        #@TODO: Remove in 0.4.10
+        if reason:
+            self.pyfile.error = encode(reason)
+
+        raise Fail("offline")
+
+
+    def temp_offline(self, reason=""):
+        """
+        Fail and indicates file ist temporary offline, the core may take consequences
+        """
+        #@TODO: Remove in 0.4.10
+        if reason:
+            self.pyfile.error = encode(reason)
+
+        raise Fail("temp. offline")
+
+
+    def retry(self, max_tries=5, wait_time=1, reason=""):
+        """
+        Retries and begin again from the beginning
+
+        :param max_tries: number of maximum retries
+        :param wait_time: time to wait in seconds
+        :param reason: reason for retrying, will be passed to fail if max_tries reached
+        """
+        id = inspect.currentframe().f_back.f_lineno
+        if id not in self.retries:
+            self.retries[id] = 0
+
+        if 0 < max_tries <= self.retries[id]:
+            self.fail(reason or _("Max retries reached"))
+
+        self.wait(wait_time, False)
+
+        self.retries[id] += 1
+        raise Retry(encode(reason))  #@TODO: Remove `encode` in 0.4.10
+
+
+    def restart(self, reason=None, nopremium=False):
+        if not reason:
+            reason = _("Fallback to free download") if nopremium else _("Restart")
+
+        if nopremium:
+            if self.premium:
+                self.retry_free = True
+            else:
+                self.fail("%s | %s" % (reason, _("Download was already free")))
+
+        raise Retry(encode(reason))  #@TODO: Remove `encode` in 0.4.10
+
+
+    def fixurl(self, url):
+        url = _fixurl(url)
+
+        if not urlparse.urlparse(url).scheme:
+            url_p = urlparse.urlparse(self.pyfile.url)
+            baseurl = "%s://%s" % (url_p.scheme, url_p.netloc)
+            url = urlparse.urljoin(baseurl, url)
+
+        return url
+
+
+    def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=True):
+        """
+        Downloads the content at url to download folder
+
+        :param url:
+        :param get:
+        :param post:
+        :param ref:
+        :param cookies:
+        :param disposition: if True and server provides content-disposition header\
+        the filename will be changed if needed
+        :return: The location where the file was saved
+        """
+        if self.pyfile.abort:
+            self.abort()
+
+        url = self.fixurl(url)
+
+        if not url or not isinstance(url, basestring):
+            self.fail(_("No url given"))
+
+        if self.pyload.debug:
+            self.log_debug("DOWNLOAD URL " + url,
+                           *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])
+
+        name = _fixurl(self.pyfile.name)
+        self.pyfile.name = urlparse.urlparse(name).path.split('/')[-1] or name
+
+        self.captcha.correct()
+        self.check_for_same_files()
+
+        self.pyfile.setStatus("downloading")
+
+        download_folder   = self.pyload.config.get("general", "download_folder")
+        download_location = fs_join(download_folder, self.pyfile.package().folder)
+
+        if not exists(download_location):
+            try:
+                os.makedirs(download_location)
+            except Exception, e:
+                self.fail(e)
+
+        self.set_permissions(download_location)
+
+        location = fs_decode(download_location)
+        filename = os.path.join(location, safe_filename(self.pyfile.name))  #@TODO: Move `safe_filename` check to HTTPDownload in 0.4.10
+
+        self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, url, filename)
+
+        if self.pyfile.abort:
+            self.abort()
+
+        try:
+            newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies,
+                                            chunks=self.get_chunk_count(), resume=self.resume_download,
+                                            progressNotify=self.pyfile.setProgress, disposition=disposition)
+        finally:
+            self.pyfile.size = self.req.size
+
+        #@TODO: Recheck in 0.4.10
+        if disposition and newname:
+            finalname = urlparse.urlparse(newname).path.split('/')[-1].split(' filename*=')[0]
+
+            if finalname != newname != self.pyfile.name:
+                try:
+                    os.rename(fs_join(location, newname), fs_join(location, finalname))
+
+                except OSError, e:
+                    self.log_warning(_("Error renaming `%s` to `%s`") % (newname, finalname), e)
+                    finalname = newname
+
+                self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, finalname))
+                self.pyfile.name = finalname
+                filename = os.path.join(location, finalname)
+
+        self.set_permissions(fs_encode(filename))
+
+        self.last_download = filename
+
+        return self.last_download
+
+
+    def check_download(self, rules, delete=False, file_size=0, size_tolerance=1024, read_size=1048576):
+        """
+        Checks the content of the last downloaded file, re match is saved to `lastCheck`
+
+        :param rules: dict with names and rules to match (compiled regexp or strings)
+        :param delete: delete if matched
+        :param file_size: expected file size
+        :param size_tolerance: size check tolerance
+        :param read_size: amount of bytes to read from files
+        :return: dictionary key of the first rule that matched
+        """
+        do_delete = False
+        last_download = fs_encode(self.last_download)
+
+        if not self.last_download or not exists(last_download):
+            self.last_download = ""
+            self.fail(self.pyfile.error or _("No file downloaded"))
+
+        try:
+            download_size = os.stat(last_download).st_size
+
+            if download_size < 1:
+                do_delete = True
+                self.fail(_("Empty file"))
+
+            elif file_size > 0:
+                diff = abs(file_size - download_size)
+
+                if diff > size_tolerance:
+                    do_delete = True
+                    self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s")
+                              % (file_size, download_size))
+
+                elif diff != 0:
+                    self.log_warning(_("File size is not equal to expected size"))
+
+            with open(last_download, "rb") as f:
+                content = f.read(read_size)
+
+            #: Produces encoding errors, better log to other file in the future?
+            # self.log_debug("Content: %s" % content)
+            for name, rule in rules.items():
+                if isinstance(rule, basestring):
+                    if rule in content:
+                        do_delete = True
+                        return name
+
+                elif hasattr(rule, "search"):
+                    m = rule.search(content)
+                    if m:
+                        do_delete = True
+                        self.last_check = m
+                        return name
+        finally:
+            if delete and do_delete:
+                try:
+                    os.remove(last_download)
+
+                except OSError, e:
+                    self.log_warning(_("Error removing: %s") % last_download, e)
+                    if self.pyload.debug:
+                        traceback.print_exc()
+
+                else:
+                    self.last_download = ""
+                    self.log_info(_("File deleted"))
+
+
+    def direct_link(self, url, follow_location=None):
+        link = ""
+
+        if follow_location is None:
+            redirect = 1
+
+        elif type(follow_location) is int:
+            redirect = max(follow_location, 1)
+
+        else:
+            redirect = self.get_config("maxredirs", 10, "UserAgentSwitcher")
+
+        for i in xrange(redirect):
+            try:
+                self.log_debug("Redirect #%d to: %s" % (i, url))
+                header = self.load(url, just_header=True)
+
+            except Exception:  #: Bad bad bad... rewrite this part in 0.4.10
+                res = self.load(url,
+                                just_header=True,
+                                req=self.pyload.requestFactory.getRequest())
+
+                header = {'code': req.code}
+                for line in res.splitlines():
+                    line = line.strip()
+                    if not line or ":" not in line:
+                        continue
+
+                    key, none, value = line.partition(":")
+                    key              = key.lower().strip()
+                    value            = value.strip()
+
+                    if key in header:
+                        if type(header[key]) is list:
+                            header[key].append(value)
+                        else:
+                            header[key] = [header[key], value]
+                    else:
+                        header[key] = value
+
+            if 'content-disposition' in header:
+                link = url
+
+            elif 'location' in header and header['location']:
+                location = header['location']
+
+                if not urlparse.urlparse(location).scheme:
+                    url_p    = urlparse.urlparse(url)
+                    baseurl  = "%s://%s" % (url_p.scheme, url_p.netloc)
+                    location = urlparse.urljoin(baseurl, location)
+
+                if 'code' in header and header['code'] == 302:
+                    link = location
+
+                if follow_location:
+                    url = location
+                    continue
+
+            else:
+                extension = os.path.splitext(urlparse.urlparse(url).path.split('/')[-1])[-1]
+
+                if 'content-type' in header and header['content-type']:
+                    mimetype = header['content-type'].split(';')[0].strip()
+
+                elif extension:
+                    mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream"
+
+                else:
+                    mimetype = ""
+
+                if mimetype and (link or 'html' not in mimetype):
+                    link = url
+                else:
+                    link = ""
+
+            break
+
+        else:
+            try:
+                self.log_error(_("Too many redirects"))
+            except Exception:
+                pass
+
+        return link
+
+
+    def parse_html_form(self, attr_str="", input_names={}):
+        return parse_html_form(attr_str, self.html, input_names)
+
+
+    def check_traffic_left(self):
+        if not self.account:
+            return True
+
+        traffic = self.account.get_data(self.user, True)['trafficleft']
+
+        if traffic is None:
+            return False
+        elif traffic == -1:
+            return True
+        else:
+            size = self.pyfile.size / 1024
+            self.log_info(_("Filesize: %s KiB, Traffic left for user %s: %s KiB") % (size, self.user, traffic))
+            return size <= traffic
+
+
+    def get_password(self):
+        """
+        Get the password the user provided in the package
+        """
+        return self.pyfile.package().password or ""
+
+
+    #: Deprecated method, use `check_for_same_files` instead (Remove in 0.4.10)
+    def checkForSameFiles(self, *args, **kwargs):
+        return self.check_for_same_files(*args, **kwargs)
+
+
+    def check_for_same_files(self, starting=False):
+        """
+        Checks if same file was/is downloaded within same package
+
+        :param starting: indicates that the current download is going to start
+        :raises Skip:
+        """
+        pack = self.pyfile.package()
+
+        for pyfile in self.pyload.files.cache.values():
+            if pyfile != self.pyfile and pyfile.name is self.pyfile.name and pyfile.package().folder is pack.folder:
+                if pyfile.status in (0, 12):  #: Finished or downloading
+                    self.skip(pyfile.pluginname)
+                elif pyfile.status in (5, 7) and starting:  #: A download is waiting/starting and was appenrently started before
+                    self.skip(pyfile.pluginname)
+
+        download_folder = self.pyload.config.get("general", "download_folder")
+        location = fs_join(download_folder, pack.folder, self.pyfile.name)
+
+        if starting and self.pyload.config.get("download", "skip_existing") and exists(location):
+            size = os.stat(location).st_size
+            if size >= self.pyfile.size:
+                self.skip("File exists")
+
+        pyfile = self.pyload.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name)
+        if pyfile:
+            if exists(location):
+                self.skip(pyfile[0])
+
+            self.log_debug("File %s not skipped, because it does not exists." % self.pyfile.name)