diff options
-rw-r--r-- | module/plugins/hoster/FilefactoryCom.py | 218 |
1 files changed, 90 insertions, 128 deletions
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py index b3eb4c865..e92c1505d 100644 --- a/module/plugins/hoster/FilefactoryCom.py +++ b/module/plugins/hoster/FilefactoryCom.py @@ -1,159 +1,121 @@ # -*- coding: utf-8 -*- -from module.network.RequestFactory import getURL -from module.plugins.Hoster import Hoster -from module.plugins.ReCaptcha import ReCaptcha -from module.utils import parseFileSize -from module.plugins.Plugin import chunks -from module.common.json_layer import json_loads -import re +############################################################################ +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU Affero General Public License as # +# published by the Free Software Foundation, either version 3 of the # +# License, or (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU Affero General Public License for more details. # +# # +# You should have received a copy of the GNU Affero General Public License # +# along with this program. If not, see <http://www.gnu.org/licenses/>. # +############################################################################ # Test links (random.bin): # http://www.filefactory.com/file/ymxkmdud2o3/n/random.bin -def checkFile(plugin, urls): - url_dict = {} - +import re + +from module.plugins.internal.SimpleHoster import SimpleHoster +from module.network.RequestFactory import getURL +from module.utils import parseFileSize + + +def getInfo(urls): + file_info = list() + list_ids = dict() + + # Create a dict id:url. Will be used to retrieve original url for url in urls: - url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url) - url_ids = url_dict.keys() - urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids) - - html = getURL("http://www.filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True) - - for m in re.finditer(plugin.LC_INFO_PATTERN, html): - if m.group('id') in url_ids: - url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3]) - - for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html): - if m.group('id') in url_ids: - url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3]) - - file_info = url_dict.values() - + m = re.search(FilefactoryCom.__pattern__, url) + list_ids[m.group('id')] = url + + # WARN: There could be a limit of urls for request + post_data = {'func': 'links', 'links': '\n'.join(urls)} + rep = getURL('http://www.filefactory.com/tool/links.php', post=post_data, decode=True) + + # Online links + for m in re.finditer( + r'innerText">\s*<h1 class="name">(?P<N>.+) \((?P<S>[\w.]+) (?P<U>\w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<ID>\w+).*</p>\s*<p class="hidden size">', + rep): + file_info.append((m.group('N'), parseFileSize(m.group('S'), m.group('U')), 2, list_ids[m.group('ID')])) + + # Offline links + for m in re.finditer( + r'innerText">\s*<h1>(http://www.filefactory.com/file/(?P<ID>\w+)/)</h1>\s*<p>\1</p>\s*<p class="errorResponse">Error: file not found</p>', + rep): + file_info.append((list_ids[m.group('ID')], 0, 1, list_ids[m.group('ID')])) + return file_info - -class FilefactoryCom(Hoster): + + +class FilefactoryCom(SimpleHoster): __name__ = "FilefactoryCom" __type__ = "hoster" - __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement - __version__ = "0.37" + __pattern__ = r"https?://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" + __version__ = "0.38" __description__ = """Filefactory.Com File Download Hoster""" - __author_name__ = ("paulking", "zoidberg") - - LC_INFO_PATTERN = r'<h1 class="name">(?P<name>[^<]+) \((?P<size>[0-9.]+ \w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<id>\w+)/' - LC_OFFLINE_PATTERN = r'<p>http://www.filefactory.com/file/(?P<id>\w+)/</p>\s*<p class="errorResponse">' - + __author_name__ = ("stickell") + __author_mail__ = ("l.stickell@yahoo.it") + + FILE_INFO_PATTERN = r'(?P<N>\S+)\s*</span>\s*</h1>\s*<h2>(?P<S>[\w.]+) (?P<U>\w+) file uploaded' FILE_OFFLINE_PATTERN = r'<title>File Not Found' - FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>' - FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded' - - FILE_CHECK_PATTERN = r'check:\s*\'(?P<check>.*?)\'' - CAPTCHA_KEY_PATTERN = r'Recaptcha.create\(\s*"(.*?)",' - WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"' - FILE_URL_PATTERN = r'<p[^>]*?id="downloadLinkTarget"[^>]*>\s*<a href="(?P<url>.*?)"' - - - def setup(self): - self.multiDL = self.resumeDownloads = self.premium def process(self, pyfile): - # Check file - pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0] - if status != 2: self.offline() - self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size)) - - # Handle downloading - url = self.checkDirectDownload(pyfile.url) - if url: - self.download(url) - else: - self.html = self.load(pyfile.url, decode = True) - - if self.premium: - self.handlePremium() - else: - self.handleFree() - - def checkDirectDownload(self, url): - for i in range(5): - header = self.load(url, just_header = True) - if 'location' in header: - url = header['location'].strip() - if not url.startswith("http://"): - url = "http://www.filefactory.com" + url - self.logDebug('URL: ' + url) - elif 'content-disposition' in header: - return url - - return False - + if self.premium and (not self.SH_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.handlePremium() + else: + self.handleFree() + def handleFree(self): + self.html = self.load(self.pyfile.url, decode=True) if "Currently only Premium Members can download files larger than" in self.html: self.fail("File too large for free download") elif "All free download slots on this server are currently in use" in self.html: self.retry(50, 900, "All free slots are busy") - - url = re.search(r"document\.location\.host \+\s*'(.+)';", self.html).group(1) - if not url.startswith('"http://"'): - url = 'http://www.filefactory.com' + url + + url = re.search(r"document\.location\.host \+\s*'(.+)';", self.html) + if not url: + self.parseError('Unable to detect free link') + url = 'http://www.filefactory.com' + url.group(1) self.html = self.load(url, decode=True) - direct = re.search(r'data-href-direct="(.*)" class="button', self.html).group(1) - waittime = re.search(r'id="startWait" value="(\d+)"', self.html).group(1) - self.setWait(waittime) + waittime = re.search(r'id="startWait" value="(\d+)"', self.html) + if not waittime: + self.parseError('Unable to detect wait time') + self.setWait(int(waittime.group(1))) self.wait() - # # Resolve captcha - # found = re.search(self.CAPTCHA_KEY_PATTERN, self.html) - # recaptcha_key = found.group(1) if found else "6LeN8roSAAAAAPdC1zy399Qei4b1BwmSBSsBN8zm" - # recaptcha = ReCaptcha(self) - # - # # Try up to 5 times - # for i in range(5): - # challenge, code = recaptcha.challenge(recaptcha_key) - # response = json_loads(self.load("http://www.filefactory.com/file/checkCaptcha.php", - # post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code})) - # if response['status'] == 'ok': - # self.correctCaptcha() - # break - # else: - # self.invalidCaptcha() - # else: - # self.fail("No valid captcha after 5 attempts") - # - # # This will take us to a wait screen - # waiturl = "http://www.filefactory.com" + response['path'] - # self.logDebug("Fetching wait with url [%s]" % waiturl) - # waithtml = self.load(waiturl, decode=True) - # found = re.search(r'<a href="(http://www.filefactory.com/dlf/.*?)"', waithtml) - # waithtml = self.load(found.group(1), decode=True) - # - # # Find the wait value and wait - # wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait')) - # self.logDebug("Waiting %d seconds." % wait) - # self.setWait(wait, True) - # self.wait() - # - # # Now get the real download url and retrieve the file - # url = re.search(self.FILE_URL_PATTERN,waithtml).group('url') - # # this may either download our file or forward us to an error page - # self.logDebug("Download URL: %s" % url) - self.download(direct) - + direct = re.search(r'data-href-direct="(.*)" class="button', self.html) + if not direct: + self.parseError('Unable to detect free direct link') + direct = direct.group(1) + self.logDebug('DIRECT LINK: ' + direct) + self.download(direct, disposition=True) + check = self.checkDownload({"multiple": "You are currently downloading too many files at once.", "error": '<div id="errorMessage">'}) if check == "multiple": - self.setWait(15*60) self.logDebug("Parallel downloads detected; waiting 15 minutes") - self.wait() - self.retry() + self.retry(wait_time=15 * 60, reason='Parallel downloads') elif check == "error": self.fail("Unknown error") - + def handlePremium(self): - self.fail('Please enable direct downloads') - -def getInfo(urls): - for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk) + header = self.load(self.pyfile.url, just_header=True) + if 'location' in header: + url = header['location'].strip() + if not url.startswith("http://"): + url = "http://www.filefactory.com" + url + elif 'content-disposition' in header: + url = self.pyfile.url + else: + self.parseError('Unable to detect premium direct link') + + self.logDebug('DIRECT PREMIUM LINK: ' + url) + self.download(url, disposition=True) |