diff options
author | zoidberg10 <zoidberg@mujmail.cz> | 2012-02-08 21:27:49 +0100 |
---|---|---|
committer | zoidberg10 <zoidberg@mujmail.cz> | 2012-02-08 21:27:49 +0100 |
commit | f226ac102cee63721fcbaffc60dcdf75c242d5e6 (patch) | |
tree | c90ebaa2563c8126d37c85cc737e2962cfe7d5ca /module | |
parent | Handle Oron TOS errors (diff) | |
download | pyload-f226ac102cee63721fcbaffc60dcdf75c242d5e6.tar.xz |
filefactory premium, uloz.to new url pattern
Diffstat (limited to 'module')
-rw-r--r-- | module/plugins/accounts/FilefactoryCom.py | 54 | ||||
-rw-r--r-- | module/plugins/hoster/FilefactoryCom.py | 115 | ||||
-rw-r--r-- | module/plugins/hoster/UlozTo.py | 82 |
3 files changed, 161 insertions, 90 deletions
diff --git a/module/plugins/accounts/FilefactoryCom.py b/module/plugins/accounts/FilefactoryCom.py new file mode 100644 index 000000000..8c04cf49b --- /dev/null +++ b/module/plugins/accounts/FilefactoryCom.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: zoidberg +""" + +from module.plugins.Account import Account +import re +from time import mktime, strptime + +class FilefactoryCom(Account): + __name__ = "FilefactoryCom" + __version__ = "0.1" + __type__ = "account" + __description__ = """filefactory.com account plugin""" + __author_name__ = ("zoidberg") + __author_mail__ = ("zoidberg@mujmail.cz") + + ACCOUNT_INFO_PATTERN = r'Your account is valid until the <strong>(.*?)</strong>' + + def loadAccountInfo(self, user, req): + premium = False + validuntil = -1 + + html = req.load("http://filefactory.com/member/") + if "You are a FileFactory Premium Member" in html: + premium = True + found = re.search(self.ACCOUNT_INFO_PATTERN, html) + if found: + validuntil = mktime(strptime(re.sub(r"(\d)[a-z]{2} ", r"\1 ", found.group(1)),"%d %B, %Y")) + + return {"premium": premium, "trafficleft": -1, "validuntil": validuntil} + + def login(self, user, data, req): + html = req.load("http://filefactory.com/member/login.php", post={ + "email": user, + "password": data["password"], + "redirect": "/"}) + + if not re.search(r'location:.*?\?login=1', req.http.header, re.I): + self.wrongPassword()
\ No newline at end of file diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py index 37b2bb7ce..17520a6c3 100644 --- a/module/plugins/hoster/FilefactoryCom.py +++ b/module/plugins/hoster/FilefactoryCom.py @@ -1,88 +1,95 @@ # -*- coding: utf-8 -*- -from __future__ import with_statement - from module.network.RequestFactory import getURL from module.plugins.Hoster import Hoster from module.plugins.ReCaptcha import ReCaptcha +from module.utils import parseFileSize +from module.plugins.Plugin import chunks import re -def getInfo(urls): - result = [] +def checkFile(plugin, urls): + file_info = [] + url_dict = {} for url in urls: - - # Get file info html - # @TODO: Force responses in english language so current patterns will be right - html = getURL(url) - if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html): - result.append((url, 0, 1, url)) + url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url) + url_ids = url_dict.keys() + urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids) - # Name - name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name') - m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html) + html = getURL("http://filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True) - # Size - value = float(m.group('size')) - units = m.group('units') - pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] - size = int(value*1024**pow) + for m in re.finditer(plugin.LC_INFO_PATTERN, html): + if m.group('id') in url_ids: + url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3]) + + for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html): + if m.group('id') in url_ids: + url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3]) - # Return info - result.append((name, size, 2, url)) - - yield result + file_info = url_dict.values() + return file_info + class FilefactoryCom(Hoster): __name__ = "FilefactoryCom" __type__ = "hoster" - __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement - __version__ = "0.3" + __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement + __version__ = "0.31" __description__ = """Filefactory.Com File Download Hoster""" - __author_name__ = ("paulking") + __author_name__ = ("paulking", "zoidberg") + + LC_INFO_PATTERN = r'<tr class="(even|odd)">\s*<td>\s*<a href="http://www.filefactory.com/file/(?P<id>\w+)[^"]*">(?P<name>[^<]+)</a>\s*.*\s*</td>\s*<td>(?P<size>[0-9.]+ \w+)</td>' + LC_OFFLINE_PATTERN = r'<li class="(even|odd)">\s*<div class="metadata">http://www.filefactory.com/file/(?P<id>\w+)/</div>' FILE_OFFLINE_PATTERN = r'<title>File Not Found' FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>' FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded' + FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\'' CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",' WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"' WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"' FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">' - + def setup(self): - self.multiDL = False + self.multiDL = self.resumeDownloads = self.premium def process(self, pyfile): - - self.pyfile = pyfile + # Check file + pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0] + if status != 2: self.offline() + self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size)) - # Force responses language to US English - self.req.cj.setCookie("filefactory.com", "ff_locale","") - - # Load main page - self.html = self.load(self.pyfile.url, ref=False, decode=True) - - # Check offline - if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None: - self.offline() + # Handle downloading + url = self.checkDirectDownload(pyfile.url) + if url: + self.download(url) + else: + self.html = self.load(pyfile.url, decode = True) + + if self.premium: + self.handlePremium() + else: + self.handleFree() + + def checkDirectDownload(self, url): + for i in range(5): + header = self.load(url, just_header = True) + if 'location' in header: + url = header['location'].strip() + if not url.startswith("http://"): + url = "http://www.filefactory.com" + url + self.logDebug('URL: ' + url) + elif 'content-disposition' in header: + return url - # File id - self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id') - self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id)) - - # File name - self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name') - + return False + + def handleFree(self): # Check Id self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check') self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check)) - - # Handle free downloading - self.handleFree() - - def handleFree(self): - + # Resolve captcha self.log.debug("%s: File is captcha protected" % self.__name__) id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey') @@ -138,3 +145,9 @@ class FilefactoryCom(Hoster): self.log.debug("%s: Wrong captcha" % self.__name__) self.invalidCaptcha() + + def handlePremium(self): + self.fail('Please enable direct downloads') + +def getInfo(urls): + for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk) diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py index 5f482e189..a67e52d4d 100644 --- a/module/plugins/hoster/UlozTo.py +++ b/module/plugins/hoster/UlozTo.py @@ -21,54 +21,57 @@ from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo def convertDecimalPrefix(m): # decimal prefixes used in filesize and traffic - return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','') + return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','') class UlozTo(SimpleHoster): __name__ = "UlozTo" __type__ = "hoster" - __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\d+/[^/?]*)" - __version__ = "0.83" + __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\w+/[^/?]*)" + __version__ = "0.84" __description__ = """uloz.to""" __author_name__ = ("zoidberg") - FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>' - FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>' - FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)] - FILE_OFFLINE_PATTERN = ur'<title>(404 - Page not found|Stránka nenalezena|Nie można wyświetlić strony)</title>' - - PASSWD_PATTERN = r'<input type="password" class="text" name="file_password" id="frmfilepasswordForm-file_password" />' - VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">' + FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>' + FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>' + FILE_INFO_PATTERN = r'<p>File <strong>(?P<N>[^<]+)</strong> is password protected</p>' + FILE_OFFLINE_PATTERN = r'<title>404 - Page not found</title>|<h1 class="h1">File was banned</h1>' + FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)] + FILE_URL_REPLACEMENTS = [(r"(?<=http://)([^/]+)", "www.ulozto.net")] + + PASSWD_PATTERN = r'<div class="passwordProtectedFile">' + VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">' FREE_URL_PATTERN = r'<div class="freeDownloadForm"><form action="([^"]+)"' PREMIUM_URL_PATTERN = r'<div class="downloadForm"><form action="([^"]+)"' CAPTCHA_PATTERN = r'<img class="captcha" src="(.*?(\d+).png)" alt="" />' - - def process(self, pyfile): - self.url = "http://www.ulozto.net/" + re.match(self.__pattern__, pyfile.url).group('id') - - self.html = self.load(self.url, decode=True) - - # password protected links - passwords = self.getPassword().splitlines() + + def setup(self): + self.multiDL = self.resumeDownload = True + + def process(self, pyfile): + pyfile.url = re.sub(r"(?<=http://)([^/]+)", "www.ulozto.net", pyfile.url) + self.html = self.load(pyfile.url, decode = True, cookies = False) + + passwords = self.getPassword().splitlines() while self.PASSWD_PATTERN in self.html: if passwords: password = passwords.pop(0) self.logInfo("Password protected link, trying " + password) - self.html = self.load(self.url, get = {"do": "filepasswordForm-submit"}, post={"file_password": password, "fpwdsend": 'Odeslat'}, cookies=True) + self.html = self.load(pyfile.url, get = {"do": "passwordProtectedForm-submit"}, + post={"password": password, "password_send": 'Send'}, cookies=True) else: self.fail("No or incorrect password") - - self.file_info = self.getFileInfo() - - # adult content + if re.search(self.VIPLINK_PATTERN, self.html): - self.html = self.load(self.url, get={"disclaimer": "1"}) - + self.html = self.load(pyfile.url, get={"disclaimer": "1"}) + + self.file_info = self.getFileInfo() + if self.premium and self.checkTrafficLeft(): self.handlePremium() - else: + else: self.handleFree() - - def handleFree(self): + + def handleFree(self): parsed_url = self.findDownloadURL(premium=False) # get and decrypt captcha @@ -82,21 +85,21 @@ class UlozTo(SimpleHoster): captcha_url, captcha_id = found.groups() captcha_text = self.decryptCaptcha(captcha_url) - + self.log.debug('CAPTCHA_URL:' + captcha_url + ' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text) - # download and check + # download and check self.download(parsed_url, post={"captcha[id]": captcha_id, "captcha[text]": captcha_text, "freeDownload": "Download"}, cookies=True) - self.doCheckDownload() - + self.doCheckDownload() + self.setStorage("captcha_id", captcha_id) self.setStorage("captcha_text", captcha_text) - + def handlePremium(self): parsed_url = self.findDownloadURL(premium=True) self.download(parsed_url, post={"download": "Download"}) self.doCheckDownload() - + def findDownloadURL(self, premium=False): msg = "%s link" % ("Premium" if premium else "Free") found = re.search(self.PREMIUM_URL_PATTERN if premium else self.FREE_URL_PATTERN, self.html) @@ -104,13 +107,13 @@ class UlozTo(SimpleHoster): parsed_url = "http://www.ulozto.net" + found.group(1) self.logDebug("%s: %s" % (msg, parsed_url)) return parsed_url - + def doCheckDownload(self): check = self.checkDownload({ "wrong_captcha": re.compile(self.CAPTCHA_PATTERN), "offline": re.compile(self.FILE_OFFLINE_PATTERN), "passwd": self.PASSWD_PATTERN, - "paralell_dl": u'<h2 class="center">Z Vašeho počítače se již stahuje</h2>' + "paralell_dl": re.compile(r'<title>Uloz.to - Ji. stahuje.</title>') }) if check == "wrong_captcha": @@ -123,8 +126,9 @@ class UlozTo(SimpleHoster): elif check == "passwd": self.fail("Wrong password") elif check == "paralell_dl": - self.setWait(600, True) + self.multiDL = False + self.setWait(300, True) self.wait() - self.retry() + self.retry() -getInfo = create_getInfo(UlozTo)
\ No newline at end of file +getInfo = create_getInfo(UlozTo)
\ No newline at end of file |