# -*- coding: utf-8 -*-

import re

from datetime import datetime, timedelta
from inspect import isclass
from os.path import exists
from time import time
from urllib import unquote
from urlparse import urljoin, urlparse

from module.PyFile import statusMap as _statusMap
from module.network.CookieJar import CookieJar
from module.network.HTTPRequest import BadHeader
from module.network.RequestFactory import getURL
from module.plugins.Hoster import Hoster
from module.plugins.Plugin import Fail
from module.utils import fixup, fs_encode, parseFileSize


#@TODO: Adapt and move to PyFile in 0.4.10
statusMap = dict((v, k) for k, v in _statusMap.iteritems())


#@TODO: Remove in 0.4.10 and redirect to self.error instead
def _error(self, reason, type):
        if not reason and not type:
            type = "unknown"

        msg  = _("%s error") % type.strip().capitalize() if type else _("Error")
        msg += ": %s" % reason.strip() if reason else ""
        msg += _(" | Plugin may be out of date")

        raise Fail(msg)


#@TODO: Remove in 0.4.10
def _wait(self, seconds, reconnect):
    if seconds:
        self.setWait(int(seconds) + 1)

    if reconnect is not None:
        self.wantReconnect = reconnect

    super(SimpleHoster, self).wait()


def replace_patterns(string, ruleslist):
    for r in ruleslist:
        rf, rt = r
        string = re.sub(rf, rt, string)
    return string


def set_cookies(cj, cookies):
    for cookie in cookies:
        if isinstance(cookie, tuple) and len(cookie) == 3:
            domain, name, value = cookie
            cj.setCookie(domain, name, value)


def parseHtmlTagAttrValue(attr_name, tag):
    m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I)
    return m.group(2) if m else None


def parseHtmlForm(attr_str, html, input_names={}):
    for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str,
                            html, re.S | re.I):
        inputs = {}
        action = parseHtmlTagAttrValue("action", form.group('TAG'))

        for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I):
            name = parseHtmlTagAttrValue("name", inputtag.group(1))
            if name:
                value = parseHtmlTagAttrValue("value", inputtag.group(1))
                if not value:
                    inputs[name] = inputtag.group(3) or ''
                else:
                    inputs[name] = value

        if input_names:
            # check input attributes
            for key, val in input_names.iteritems():
                if key in inputs:
                    if isinstance(val, basestring) and inputs[key] == val:
                        continue
                    elif isinstance(val, tuple) and inputs[key] in val:
                        continue
                    elif hasattr(val, "search") and re.match(val, inputs[key]):
                        continue
                    break  #: attibute value does not match
                else:
                    break  #: attibute name does not match
            else:
                return action, inputs  #: passed attribute check
        else:
            # no attribute check
            return action, inputs

    return {}, None  #: no matching form found


#: Deprecated
def parseFileInfo(plugin, url="", html=""):
    if hasattr(plugin, "getInfo"):
        info = plugin.getInfo(url, html)
        res  = info['name'], info['size'], info['status'], info['url']
    else:
        res  = urlparse(unquote(url)).path.split('/')[-1] or _("Unknown"), 0, 3, url

    return res


#@TODO: Remove in 0.4.10
#@NOTE: Every plugin must have own parseInfos classmethod to work with 0.4.10
def create_getInfo(plugin):

    def generator(list):
        for x in list:
            yield x

    if hasattr(plugin, "parseInfos"):
        fn = lambda urls: generator((info['name'], info['size'], info['status'], info['url']) for info in plugin.parseInfos(urls))
    else:
        fn = lambda urls: generator(parseFileInfo(url) for url in urls)

    return fn


def timestamp():
    return int(time() * 1000)


#@TODO: Move to hoster class in 0.4.10
def directLink(self, url, resumable=False):
    link = ""

    for i in xrange(5 if resumable else 1):
        header = self.load(url, ref=True, cookies=True, just_header=True, decode=True)

        if 'content-disposition' in header:
            link = url

        elif 'location' in header and header['location']:
            location = header['location']

            if not urlparse(location).scheme:
                parsed   = urlparse(url)
                base     = "%s://%s" % (parsed.scheme, parsed.netloc)
                location = urljoin(base, location)

            if resumable:
                url = location
                self.logDebug("Redirect #%d to: %s" % (++i, location))
                continue

            elif 'code' in header and header['code'] == 302:
                link = location

        elif 'content-type' in header and header['content-type'] and "html" not in header['content-type']:
            link = url

        break
    else:
        self.logError(_("Too many redirects"))

    return link


def secondsToMidnight(gmt=0):
    now = datetime.utcnow() + timedelta(hours=gmt)

    if now.hour is 0 and now.minute < 10:
        midnight = now
    else:
        midnight = now + timedelta(days=1)

    td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now

    if hasattr(td, 'total_seconds'):
        res = td.total_seconds()
    else:  #@NOTE: work-around for python 2.5 and 2.6 missing timedelta.total_seconds
        res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6

    return int(res)


class SimpleHoster(Hoster):
    __name__    = "SimpleHoster"
    __type__    = "hoster"
    __version__ = "0.98"

    __pattern__ = r'^unmatchable$'

    __description__ = """Simple hoster plugin"""
    __license__     = "GPLv3"
    __authors__     = [("zoidberg", "zoidberg@mujmail.cz"),
                       ("stickell", "l.stickell@yahoo.it"),
                       ("Walter Purcaro", "vuolter@gmail.com")]


    """
    Info patterns should be defined by each hoster:

      INFO_PATTERN: (optional) Name and Size of the file
        example: INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>size_unit)'
      or
        NAME_PATTERN: (optional) Name that will be set for the file
          example: NAME_PATTERN = r'(?P<N>file_name)'
        SIZE_PATTERN: (optional) Size that will be checked for the file
          example: SIZE_PATTERN = r'(?P<S>file_size) (?P<U>size_unit)'

      HASHSUM_PATTERN: (optional) Hash code and type of the file
        example: HASHSUM_PATTERN = r'(?P<H>hash_code) (?P<T>MD5)'

      OFFLINE_PATTERN: (optional) Check if the page is unreachable
        example: OFFLINE_PATTERN = r'File (deleted|not found)'

      TEMP_OFFLINE_PATTERN: (optional) Check if the page is temporarily unreachable
        example: TEMP_OFFLINE_PATTERN = r'Server (maintenance|maintainance)'


    Error handling patterns are all optional:

      WAIT_PATTERN: (optional) Detect waiting time
        example: WAIT_PATTERN = r''

      PREMIUM_ONLY_PATTERN: (optional) Check if the file can be downloaded only with a premium account
        example: PREMIUM_ONLY_PATTERN = r'Premium account required'

      ERROR_PATTERN: (optional) Detect any error preventing download
        example: ERROR_PATTERN = r''


    Instead overriding handleFree and handlePremium methods you can define the following patterns for direct download:

      LINK_FREE_PATTERN: (optional) group(1) should be the direct link for free download
        example: LINK_FREE_PATTERN = r'<div class="link"><a href="(.+?)"'

      LINK_PREMIUM_PATTERN: (optional) group(1) should be the direct link for premium download
        example: LINK_PREMIUM_PATTERN = r'<div class="link"><a href="(.+?)"'
    """

    NAME_REPLACEMENTS = [("&#?\w+;", fixup)]
    SIZE_REPLACEMENTS = []
    URL_REPLACEMENTS  = []

    TEXT_ENCODING = False  #: Set to True or encoding name if encoding value in http header is not correct
    COOKIES       = True   #: or False or list of tuples [(domain, name, value)]
    CHECK_TRAFFIC = False  #: Set to True to force checking traffic left for premium account
    DIRECT_LINK   = None   #: Set to True to looking for direct link (as defined in handleDirect method), set to None to do it if self.account is True else False
    MULTI_HOSTER  = False  #: Set to True to leech other hoster link (as defined in handleMulti method)
    LOGIN_ACCOUNT = False  #: Set to True to require account login

    directLink = directLink  #@TODO: Remove in 0.4.10


    @classmethod
    def parseInfos(cls, urls):  #@TODO: Built-in in 0.4.10 core, then remove from plugins
        for url in urls:
            url = replace_patterns(url, cls.FILE_URL_REPLACEMENTS if hasattr(cls, "FILE_URL_REPLACEMENTS") else cls.URL_REPLACEMENTS)  #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10
            yield cls.getInfo(url)


    @classmethod
    def apiInfo(cls, url="", get={}, post={}):
        url = unquote(url)
        return {'name'  : (urlparse(url).path.split('/')[-1]
                           or urlparse(url).query.split('=', 1)[::-1][0].split('&', 1)[0]
                           or _("Unknown")),
                'size'  : 0,
                'status': 3 if url else 8,
                'url'   : url}


    @classmethod
    def getInfo(cls, url="", html=""):
        info   = cls.apiInfo(url)
        online = False

        try:
            info['pattern'] = re.match(cls.__pattern__, url).groupdict()  #: pattern groups will be saved here

        except Exception:
            info['pattern'] = {}

        if not html:
            if not url:
                info['error']  = "missing url"
                info['status'] = 1

            elif info['status'] is 3:
                try:
                    html = getURL(url, cookies=cls.COOKIES, decode=not cls.TEXT_ENCODING)

                    if isinstance(cls.TEXT_ENCODING, basestring):
                        html = unicode(html, cls.TEXT_ENCODING)

                except BadHeader, e:
                    info['error'] = "%d: %s" % (e.code, e.content)

                    if e.code is 404:
                        info['status'] = 1

                    elif e.code is 503:
                        info['status'] = 6

        if html:
            if hasattr(cls, "OFFLINE_PATTERN") and re.search(cls.OFFLINE_PATTERN, html):
                info['status'] = 1

            elif hasattr(cls, "FILE_OFFLINE_PATTERN") and re.search(cls.FILE_OFFLINE_PATTERN, html):  #@TODO: Remove in 0.4.10
                info['status'] = 1

            elif hasattr(cls, "TEMP_OFFLINE_PATTERN") and re.search(cls.TEMP_OFFLINE_PATTERN, html):
                info['status'] = 6

            else:
                for pattern in ("FILE_INFO_PATTERN", "INFO_PATTERN",
                                "FILE_NAME_PATTERN", "NAME_PATTERN",
                                "FILE_SIZE_PATTERN", "SIZE_PATTERN",
                                "HASHSUM_PATTERN"):  #@TODO: Remove old patterns starting with "FILE_" in 0.4.10
                    try:
                        attr  = getattr(cls, pattern)
                        pdict = re.search(attr, html).groupdict()

                        if all(True for k in pdict if k not in info['pattern']):
                            info['pattern'].update(pdict)

                    except AttributeError:
                        continue

                    else:
                        online = True

        if online:
            info['status'] = 2

            if 'N' in info['pattern']:
                info['name'] = replace_patterns(unquote(info['pattern']['N'].strip()),
                                                cls.FILE_NAME_REPLACEMENTS if hasattr(cls, "FILE_NAME_REPLACEMENTS") else cls.NAME_REPLACEMENTS)  #@TODO: Remove FILE_NAME_REPLACEMENTS check in 0.4.10

            if 'S' in info['pattern']:
                size = replace_patterns(info['pattern']['S'] + info['pattern']['U'] if 'U' in info['pattern'] else info['pattern']['S'],
                                        cls.FILE_SIZE_REPLACEMENTS if hasattr(cls, "FILE_SIZE_REPLACEMENTS") else cls.SIZE_REPLACEMENTS)  #@TODO: Remove FILE_SIZE_REPLACEMENTS check in 0.4.10
                info['size'] = parseFileSize(size)

            elif isinstance(info['size'], basestring):
                unit = info['units'] if 'units' in info else None
                info['size'] = parseFileSize(info['size'], unit)

            if 'H' in info['pattern']:
                hashtype = info['pattern']['T'] if 'T' in info['pattern'] else "hash"
                info[hashtype] = info['pattern']['H']

        if not info['pattern']:
            info.pop('pattern', None)

        return info


    def setup(self):
        self.resumeDownload = self.multiDL = self.premium


    def prepare(self):
        self.pyfile.error = ""  #@TODO: Remove in 0.4.10

        self.info      = {}
        self.link      = ""     #@TODO: Move to hoster class in 0.4.10
        self.directDL  = False  #@TODO: Move to hoster class in 0.4.10
        self.multihost = False  #@TODO: Move to hoster class in 0.4.10

        if self.LOGIN_ACCOUNT and not self.account:
            self.fail(_("Required account not found"))

        self.req.setOption("timeout", 120)

        if isinstance(self.COOKIES, list):
            set_cookies(self.req.cj, self.COOKIES)

        if (self.MULTI_HOSTER
            and (self.__pattern__ != self.core.pluginManager.hosterPlugins[self.__name__]['pattern']
                 or re.match(self.__pattern__, self.pyfile.url) is None)):
            self.multihost = True
            return

        if self.DIRECT_LINK is None:
            self.directDL = bool(self.account)
        else:
            self.directDL = self.DIRECT_LINK

        self.pyfile.url = replace_patterns(self.pyfile.url,
                                           self.FILE_URL_REPLACEMENTS if hasattr(self, "FILE_URL_REPLACEMENTS") else self.URL_REPLACEMENTS)  #@TODO: Remove FILE_URL_REPLACEMENTS check in 0.4.10


    def preload(self):
        self.html = self.load(self.pyfile.url, cookies=bool(self.COOKIES), decode=not self.TEXT_ENCODING)

        if isinstance(self.TEXT_ENCODING, basestring):
            self.html = unicode(self.html, self.TEXT_ENCODING)


    def process(self, pyfile):
        self.prepare()
        self.checkInfo()

        if self.directDL:
            self.logDebug("Looking for direct download link...")
            self.handleDirect(pyfile)

        if self.multihost and not self.link and not self.lastDownload:
            self.logDebug("Looking for leeched download link...")
            self.handleMulti(pyfile)

            if not self.link and not self.lastDownload:
                self.MULTI_HOSTER = False
                self.retry(1, reason="Multi hoster fails")

        if not self.link and not self.lastDownload:
            self.preload()
            self.checkInfo()

            if self.premium and (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
                self.logDebug("Handled as premium download")
                self.handlePremium(pyfile)

            elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.checkTrafficLeft()):
                self.logDebug("Handled as free download")
                self.handleFree(pyfile)

        self.downloadLink(self.link)
        self.checkFile()


    def downloadLink(self, link):
        if link and isinstance(link, basestring):
            self.correctCaptcha()
            self.download(link, disposition=False)  #@TODO: Set `disposition=True` in 0.4.10


    def checkFile(self):
        if self.cTask and not self.lastDownload:
            self.invalidCaptcha()
            self.retry(10, reason=_("Wrong captcha"))

        elif not self.lastDownload or not exists(fs_encode(self.lastDownload)):
            self.lastDownload = ""
            self.error(self.pyfile.error or _("No file downloaded"))

        else:
            rules = {'empty file': re.compile(r'\A\Z'),
                     'html file' : re.compile(r'\A\s*<!DOCTYPE html'),
                     'html error': re.compile(r'\A\s*(<.+>)?\d{3}(\Z|\s+)')}

            if hasattr(self, 'ERROR_PATTERN'):
                rules['error'] = re.compile(self.ERROR_PATTERN)

            check = self.checkDownload(rules)
            if check:  #@TODO: Move to hoster in 0.4.10
                errmsg = check.strip().capitalize()
                if self.lastCheck:
                    errmsg += " | " + self.lastCheck.group(0).strip()

                self.lastDownload = ""
                self.retry(10, 60, errmsg)


    def checkErrors(self):
        if not self.html:
            self.logWarning(_("No html code to check"))
            return

        if hasattr(self, 'PREMIUM_ONLY_PATTERN') and self.premium and re.search(self.PREMIUM_ONLY_PATTERN, self.html):
            self.fail(_("Link require a premium account to be handled"))

        elif hasattr(self, 'ERROR_PATTERN'):
            m = re.search(self.ERROR_PATTERN, self.html)
            if m:
                errmsg = self.info['error'] = m.group(1)
                self.error(errmsg)

        elif hasattr(self, 'WAIT_PATTERN'):
            m = re.search(self.WAIT_PATTERN, self.html)
            if m:
                wait_time = sum([int(v) * {"hr": 3600, "hour": 3600, "min": 60, "sec": 1}[u.lower()] for v, u in
                                 re.findall(r'(\d+)\s*(hr|hour|min|sec)', m.group(0), re.I)])
                self.wait(wait_time, wait_time > 300)
                return

        self.info.pop('error', None)


    def checkStatus(self, getinfo=True):
        if not self.info or getinfo:
            self.logDebug("File info (BEFORE): %s" % self.info)
            self.info.update(self.getInfo(self.pyfile.url, self.html))

        try:
            status = self.info['status']

            if status is 1:
                self.offline()

            elif status is 6:
                self.tempOffline()

            elif status is 8:
                self.fail()

        finally:
            self.logDebug("File status: %s" % statusMap[status],
                          "File info: %s"   % self.info)


    def checkNameSize(self, getinfo=True):
        if not self.info or getinfo:
            self.logDebug("File info (BEFORE): %s" % self.info)
            self.info.update(self.getInfo(self.pyfile.url, self.html))
            self.logDebug("File info (AFTER): %s"  % self.info)

        try:
            url  = self.info['url']
            name = self.info['name']
            if name and name != url:
                self.pyfile.name = name

        except Exception:
            pass

        try:
            size = self.info['size']
            if size > 0:
                self.pyfile.size = size

        except Exception:
            pass

        self.logDebug("File name: %s" % self.pyfile.name,
                      "File size: %s" % self.pyfile.size if self.pyfile.size > 0 else "Unknown")


    def checkInfo(self):
        self.checkNameSize()

        if self.html:
            self.checkErrors()
            self.checkNameSize()

        self.checkStatus(getinfo=False)


    #: Deprecated
    def getFileInfo(self):
        self.info = {}
        self.checkInfo()
        return self.info


    def handleDirect(self, pyfile):
        link = self.directLink(pyfile.url, self.resumeDownload)

        if link:
            self.logInfo(_("Direct download link detected"))

            self.link = link
        else:
            self.logDebug("Direct download link not found")


    def handleMulti(self, pyfile):  #: Multi-hoster handler
        pass


    def handleFree(self, pyfile):
        if not hasattr(self, 'LINK_FREE_PATTERN'):
            self.logError(_("Free download not implemented"))

        try:
            m = re.search(self.LINK_FREE_PATTERN, self.html)
            if m is None:
                self.error(_("Free download link not found"))

            self.link = m.group(1)

        except Exception, e:
            self.fail(e)


    def handlePremium(self, pyfile):
        if not hasattr(self, 'LINK_PREMIUM_PATTERN'):
            self.logError(_("Premium download not implemented"))
            self.logDebug("Handled as free download")
            self.handleFree(pyfile)

        try:
            m = re.search(self.LINK_PREMIUM_PATTERN, self.html)
            if m is None:
                self.error(_("Premium download link not found"))

            self.link = m.group(1)

        except Exception, e:
            self.fail(e)


    def longWait(self, wait_time=None, max_tries=3):
        if wait_time and isinstance(wait_time, (int, long, float)):
            time_str  = "%dh %dm" % divmod(wait_time / 60, 60)
        else:
            wait_time = 900
            time_str  = _("(unknown time)")
            max_tries = 100

        self.logInfo(_("Download limit reached, reconnect or wait %s") % time_str)

        self.setWait(wait_time, True)
        self.wait()
        self.retry(max_tries=max_tries, reason=_("Download limit reached"))


    def parseHtmlForm(self, attr_str="", input_names={}):
        return parseHtmlForm(attr_str, self.html, input_names)


    def checkTrafficLeft(self):
        if not self.account:
            return True

        traffic = self.account.getAccountInfo(self.user, True)['trafficleft']

        if traffic is None:
            return False
        elif traffic == -1:
            return True
        else:
            size = self.pyfile.size / 1024
            self.logInfo(_("Filesize: %i KiB, Traffic left for user %s: %i KiB") % (size, self.user, traffic))
            return size <= traffic


    #@TODO: Remove in 0.4.10
    def wait(self, seconds=0, reconnect=None):
        return _wait(self, seconds, reconnect)


    def error(self, reason="", type="parse"):
        return _error(self, reason, type)