diff options
Diffstat (limited to 'module/plugins/hoster/FilefactoryCom.py')
| -rw-r--r-- | module/plugins/hoster/FilefactoryCom.py | 174 | 
1 files changed, 91 insertions, 83 deletions
| diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py index 37b2bb7ce..135dd90a1 100644 --- a/module/plugins/hoster/FilefactoryCom.py +++ b/module/plugins/hoster/FilefactoryCom.py @@ -1,122 +1,133 @@  # -*- coding: utf-8 -*- -from __future__ import with_statement -  from module.network.RequestFactory import getURL  from module.plugins.Hoster import Hoster  from module.plugins.ReCaptcha import ReCaptcha +from module.utils import parseFileSize +from module.plugins.Plugin import chunks +from module.common.json_layer import json_loads  import re -def getInfo(urls): -    result = [] +def checkFile(plugin, urls): +    file_info = [] +    url_dict = {}      for url in urls: -         -        # Get file info html -        # @TODO: Force responses in english language so current patterns will be right -        html = getURL(url) -        if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html): -            result.append((url, 0, 1, url)) +        url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url) +    url_ids = url_dict.keys() +    urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids) -        # Name -        name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name') -        m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html) +    html = getURL("http://www.filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True)    -        # Size -        value = float(m.group('size')) -        units = m.group('units') -        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]  -        size = int(value*1024**pow) +    for m in re.finditer(plugin.LC_INFO_PATTERN, html): +        if m.group('id') in url_ids: +            url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3]) +             +    for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html): +        if m.group('id') in url_ids: +            url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3]) -        # Return info -        result.append((name, size, 2, url)) -         -    yield result +    file_info = url_dict.values() +    return file_info +     class FilefactoryCom(Hoster):      __name__ = "FilefactoryCom"      __type__ = "hoster" -    __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement -    __version__ = "0.3" +    __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement +    __version__ = "0.34"      __description__ = """Filefactory.Com File Download Hoster""" -    __author_name__ = ("paulking") +    __author_name__ = ("paulking", "zoidberg") +    LC_INFO_PATTERN = r'<h1 class="name">(?P<name>[^<]+) \((?P<size>[0-9.]+ \w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<id>\w+)/' +    LC_OFFLINE_PATTERN = r'<p>http://www.filefactory.com/file/(?P<id>\w+)/</p>\s*<p class="errorResponse">' +       FILE_OFFLINE_PATTERN = r'<title>File Not Found'      FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'      FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded' -    FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\'' -    CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",'  -    WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"' +     +    FILE_CHECK_PATTERN = r'check:\s*\'(?P<check>.*?)\'' +    CAPTCHA_KEY_PATTERN = r'Recaptcha.create\(\s*"(.*?)",'       WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"' -    FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">' -         +    FILE_URL_PATTERN = r'<p[^>]*?id="downloadLinkTarget"[^>]*>\s*<a href="(?P<url>.*?)"'  +                  def setup(self): -        self.multiDL = False +        self.multiDL = self.resumeDownloads = self.premium      def process(self, pyfile): -     -        self.pyfile = pyfile +        # Check file +        pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0]      +        if status != 2: self.offline() +        self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size))  -        # Force responses language to US English -        self.req.cj.setCookie("filefactory.com", "ff_locale","") - -        # Load main page -        self.html = self.load(self.pyfile.url, ref=False, decode=True) - -        # Check offline -        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None: -            self.offline() +        # Handle downloading +        url = self.checkDirectDownload(pyfile.url) +        if url: +            self.download(url) +        else:                 +            self.html = self.load(pyfile.url, decode = True) +                       +            if self.premium: +                self.handlePremium() +            else: +                self.handleFree() +               +    def checkDirectDownload(self, url): +        for i in range(5): +            header = self.load(url, just_header = True)            +            if 'location' in header: +                url = header['location'].strip()  +                if not url.startswith("http://"): +                    url = "http://www.filefactory.com" + url +                self.logDebug('URL: ' + url) +            elif 'content-disposition' in header: +                return url -        # File id -        self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id') -        self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id)) -            -        # File name -        self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name') - -        # Check Id -        self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check') -        self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check)) - -        # Handle free downloading -        self.handleFree() +        return False                                      def handleFree(self): -     +        if "Currently only Premium Members can download files larger than" in self.html: +            self.fail("File too large for free download") +        elif "All free download slots on this server are currently in use" in self.html: +            self.retry(50, 900, "All free slots are busy") +              +        # Check Id +        self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check') +        self.logDebug("File check code is [%s]" % self.check) +                  # Resolve captcha -        self.log.debug("%s: File is captcha protected" % self.__name__) -        id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey') +        found = re.search(self.CAPTCHA_KEY_PATTERN, self.html) +        recaptcha_key = found.group(1) if found else "6LeN8roSAAAAAPdC1zy399Qei4b1BwmSBSsBN8zm" +        recaptcha = ReCaptcha(self) +                  # Try up to 5 times -        for i in range(5): -            self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1)) -            recaptcha = ReCaptcha(self) -            challenge, code = recaptcha.challenge(id) -            response = self.load("http://www.filefactory.com/file/checkCaptcha.php", -                            post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}) -            captchavalid = self.handleCaptchaErrors(response) -            if captchavalid: +        for i in range(5):            +            challenge, code = recaptcha.challenge(recaptcha_key) +            response = json_loads(self.load("http://www.filefactory.com/file/checkCaptcha.php", +                            post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code})) +            if response['status'] == 'ok': +                self.correctCaptcha()                  break -        if not captchavalid: +            else: +                self.invalidCaptcha()                             +        else:              self.fail("No valid captcha after 5 attempts") - -        # Get wait URL -        waitpath = re.search(self.WAIT_PATH_PATTERN, response).group('path') -        waiturl = "http://www.filefactory.com" + waitpath          # This will take us to a wait screen -        self.log.debug("%s: fetching wait with url [%s]" % (self.__name__, waiturl)) +        waiturl = "http://www.filefactory.com" + response['path'] +        self.logDebug("Fetching wait with url [%s]" % waiturl)          waithtml = self.load(waiturl, decode=True)          # Find the wait value and wait               wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait')) -        self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait)) +        self.logDebug("Waiting %d seconds." % wait)          self.setWait(wait, True)          self.wait()          # Now get the real download url and retrieve the file          url = re.search(self.FILE_URL_PATTERN,waithtml).group('url')          # this may either download our file or forward us to an error page -        self.log.debug("%s: download url %s" % (self.__name__, url)) +        self.logDebug("Download URL: %s" % url)          dl = self.download(url)          check = self.checkDownload({"multiple": "You are currently downloading too many files at once.", @@ -124,17 +135,14 @@ class FilefactoryCom(Hoster):          if check == "multiple":              self.setWait(15*60) -            self.log.debug("%s: Parallel downloads detected waiting 15 minutes" % self.__name__) +            self.logDebug("Parallel downloads detected; waiting 15 minutes")              self.wait()              self.retry()          elif check == "error":              self.fail("Unknown error") - -    def handleCaptchaErrors(self, response): -        self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response)) -        if 'status:"ok"' in response: -            self.correctCaptcha() -            return True +     +    def handlePremium(self): +        self.fail('Please enable direct downloads') -        self.log.debug("%s: Wrong captcha" % self.__name__) -        self.invalidCaptcha() +def getInfo(urls): +    for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk) | 
