diff options
author | zoidberg10 <zoidberg@mujmail.cz> | 2012-06-14 07:19:13 +0200 |
---|---|---|
committer | zoidberg10 <zoidberg@mujmail.cz> | 2012-06-14 07:19:13 +0200 |
commit | 3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2 (patch) | |
tree | a8ba960ec7c19391598d28294271365956cdc8d0 /module/plugins | |
parent | closed #605 (diff) | |
download | pyload-3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2.tar.xz |
update uloz.to, cloudnator.com, checksum plugin
Diffstat (limited to 'module/plugins')
-rw-r--r-- | module/plugins/hooks/Checksum.py | 56 | ||||
-rw-r--r-- | module/plugins/hoster/ShragleCom.py | 149 | ||||
-rw-r--r-- | module/plugins/hoster/UlozTo.py | 35 |
3 files changed, 150 insertions, 90 deletions
diff --git a/module/plugins/hooks/Checksum.py b/module/plugins/hooks/Checksum.py index 89e8ec762..0c2751e7a 100644 --- a/module/plugins/hooks/Checksum.py +++ b/module/plugins/hooks/Checksum.py @@ -18,6 +18,7 @@ """ from __future__ import with_statement import hashlib, zlib +from os.path import getsize, isfile from module.utils import save_join, fs_encode from module.plugins.Hook import Hook @@ -48,13 +49,20 @@ def computeChecksum(local_file, algorithm): class Checksum(Hook): __name__ = "Checksum" - __version__ = "0.03" - __description__ = "Check downloaded file hash" + __version__ = "0.04" + __description__ = "Verify downloaded file size and checksum (enable in general preferences)" __config__ = [("activated", "bool", "Activated", True), ("action", "fail;retry;nothing", "What to do if check fails?", "retry"), ("max_tries", "int", "Number of retries", 2)] __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") + + def setup(self): + self.algorithms = sorted(getattr(hashlib, "algorithms", ("md5", "sha1", "sha224", "sha256", "sha384", "sha512")), reverse = True) + self.algorithms.append(["crc32", "adler32"]) + + if not self.config['general']['checksum']: + self.logInfo("Checksum validation is disabled in general configuration") def downloadFinished(self, pyfile): """ @@ -62,25 +70,43 @@ class Checksum(Hook): pyfile.plugin.check_data should be a dictionary which can contain: a) if known, the exact filesize in bytes (e.g. "size": 123456789) b) hexadecimal hash string with algorithm name as key (e.g. "md5": "d76505d0869f9f928a17d42d66326307") - """ + """ + if hasattr(pyfile.plugin, "check_data") and (isinstance(pyfile.plugin.check_data, dict)): + data = pyfile.plugin.check_data + elif hasattr(pyfile.plugin, "api_data") and (isinstance(pyfile.plugin.api_data, dict)): + data = pyfile.plugin.api_data + else: + return + + download_folder = self.config['general']['download_folder'] + local_file = fs_encode(save_join(download_folder, pyfile.package().folder, pyfile.name)) + + if not isfile(local_file): + self.checkFailed(pyfile, "File does not exist") + + # validate file size + if "size" in data: + api_size = int(data['size']) + file_size = getsize(local_file) + if api_size != file_size: + self.logWarning("File %s has incorrect size: %d B (%d expected)" % (pyfile.name, file_size, api_size)) + self.checkFailed(pyfile, "Incorrect file size") + + # validate checksum + if self.config['general']['checksum']: + if "checksum" in data: + data['md5'] = data['checksum'] - download_folder = self.config['general']['download_folder'] - local_file = fs_encode(save_join(download_folder, pyfile.package().folder, pyfile.name)) - - for key, value in sorted(pyfile.plugin.check_data.items(), reverse = True): - if key == "size": - if value and value != pyfile.size: - self.logWarning("File %s has incorrect size: %d B (%d expected)" % (pyfile.size, value)) - self.checkFailed(pyfile, "Incorrect file size") - else: + for key in self.algorithms: + if key in data: checksum = computeChecksum(local_file, key.replace("-","").lower()) if checksum: - if checksum == value: - self.logInfo('File integrity of "%s" verified by %s checksum (%s).' % (pyfile.name, key.upper() , checksum)) + if checksum == data[key]: + self.logInfo('File integrity of "%s" verified by %s checksum (%s).' % (pyfile.name, key.upper(), checksum)) return else: - self.logWarning("%s checksum for file %s does not match (%s != %s)" % (key.upper(), pyfile.name, checksum, value)) + self.logWarning("%s checksum for file %s does not match (%s != %s)" % (key.upper(), pyfile.name, checksum, data[key])) self.checkFailed(pyfile, "Checksums do not match") else: self.logWarning("Unsupported hashing algorithm: %s" % key.upper()) diff --git a/module/plugins/hoster/ShragleCom.py b/module/plugins/hoster/ShragleCom.py index 9ebf4917b..8fe05a2b9 100644 --- a/module/plugins/hoster/ShragleCom.py +++ b/module/plugins/hoster/ShragleCom.py @@ -2,84 +2,105 @@ # -*- coding: utf-8 -*- import re -import time +from pycurl import FOLLOWLOCATION from module.plugins.Hoster import Hoster +from module.plugins.internal.SimpleHoster import parseHtmlForm +from module.plugins.ReCaptcha import ReCaptcha +from module.network.RequestFactory import getURL + +API_KEY = "078e5ca290d728fd874121030efb4a0d" + +def parseFileInfo(self, url): + file_id = re.match(self.__pattern__, url).group('ID') + + data = getURL( + "http://www.cloudnator.com/api.php?key=%s&action=getStatus&fileID=%s" % (API_KEY, file_id), + decode = True + ).split() + + if len(data) == 4: + name, size, md5, status = data + size = int(size) + + if hasattr(self, "check_data"): + self.checkdata = {"size": size, "md5": md5} + + return name, size, 2 if status == "0" else 1, url + else: + return url, 0, 1, url + +def getInfo(urls): + for url in urls: + file_info = parseFileInfo(plugin, url) + yield file_info class ShragleCom(Hoster): __name__ = "ShragleCom" __type__ = "hoster" - __pattern__ = r"http://(?:www.)?shragle.com/files/" - __version__ = "0.1" - __description__ = """Shragle Download PLugin""" - __author_name__ = ("RaNaN") - __author_mail__ = ("RaNaN@pyload.org") + __pattern__ = r"http://(?:www.)?(cloudnator|shragle).com/files/(?P<ID>.*?)/" + __version__ = "0.20" + __description__ = """Cloudnator.com (Shragle.com) Download PLugin""" + __author_name__ = ("RaNaN", "zoidberg") + __author_mail__ = ("RaNaN@pyload.org", "zoidberg@mujmail.cz") def setup(self): self.html = None self.multiDL = False + self.check_data = None def process(self, pyfile): - self.pyfile = pyfile - - if not self.file_exists(): + #get file status and info + self.pyfile.name, self.pyfile.size, status = parseFileInfo(self, pyfile.url)[:3] + if status != 2: self.offline() - - self.pyfile.name = self.get_file_name() - self.setWait(self.get_waiting_time()) - self.wait() + self.handleFree() - self.proceed(self.get_file_url()) - - def get_waiting_time(self): - if self.html is None: - self.download_html() - - timestring = re.search('\s*var\sdownloadWait\s=\s(\d*);', self.html) - if timestring: - return int(timestring.group(1)) - else: - return 10 - - def download_html(self): + def handleFree(self): self.html = self.load(self.pyfile.url) - - def get_file_url(self): - """ returns the absolute downloadable filepath - """ - if self.html is None: - self.download_html() - - self.fileID = re.search(r'name="fileID"\svalue="(.*?)"', self.html).group(1) - self.dlSession = re.search(r'name="dlSession"\svalue="(.*?)"', self.html).group(1) - self.userID = re.search(r'name="userID"\svalue="(.*?)"', self.html).group(1) - self.password = re.search(r'name="password"\svalue="(.*?)"', self.html).group(1) - self.lang = re.search(r'name="lang"\svalue="(.*?)"', self.html).group(1) - return re.search(r'id="download"\saction="(.*?)"', self.html).group(1) - - def get_file_name(self): - if self.html is None: - self.download_html() - - #file_name_pattern = r'You want to download \xc2\xbb<strong>(.*?)</strong>\xc2\xab' - file_name_pattern = r'<h2 class="colorgrey center" style="overflow:hidden;width:1000px;"> (.*)<br /><span style="font-size:12px;font-weight:normal; width:100px;"> ([\d\.]*) MB</span></h2>' - res = re.search(file_name_pattern, self.html) - if res: - return res.group(1) - else: - self.fail("filename cant be extracted") - - def file_exists(self): - """ returns True or False - """ - if self.html is None: - self.download_html() - - if re.search(r"html", self.html) is None: - return False + + #get wait time + found = re.search('\s*var\sdownloadWait\s=\s(\d+);', self.html) + self.setWait(int(found.group(1)) if found else 30) + + #parse download form + action, inputs = parseHtmlForm('id="download', self.html) + + #solve captcha + found = re.search('recaptcha/api/(?:challenge|noscript)?k=(.+?)', self.html) + captcha_key = found.group(1) if found else "6LdEFb0SAAAAAAwM70vnYo2AkiVkCx-xmfniatHz" + + recaptcha = ReCaptcha(self) + + inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + self.wait() + + #validate + self.req.http.c.setopt(FOLLOWLOCATION, 0) + self.html = self.load(action, post = inputs) + + found = re.search(r"Location\s*:\s*(\S*)", self.req.http.header, re.I) + if found: + self.correctCaptcha() + download_url = found.group(1) else: - return True - - def proceed(self, url): - self.download(url, post={'fileID': self.fileID, 'dlSession': self.dlSession, 'userID': self.userID, 'password': self.password, 'lang': self.lang}) + if "Sicherheitscode falsch" in self.html: + self.invalidCaptcha() + self.retry(max_tries = 5, reason = "Invalid captcha") + else: + self.fail("Invalid session") + + #download + self.req.http.c.setopt(FOLLOWLOCATION, 1) + self.download(download_url) + + check = self.checkDownload({ + "ip_blocked": re.compile(r'<div class="error".*IP.*loading') + }) + if check == "ip_blocked": + self.setWait(1800, True) + self.wait() + self.retry() + +
\ No newline at end of file diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py index dc9f9a733..e4d9766d7 100644 --- a/module/plugins/hoster/UlozTo.py +++ b/module/plugins/hoster/UlozTo.py @@ -27,7 +27,7 @@ class UlozTo(SimpleHoster): __name__ = "UlozTo" __type__ = "hoster" __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\w+/[^/?]*)" - __version__ = "0.87" + __version__ = "0.88" __description__ = """uloz.to""" __author_name__ = ("zoidberg") @@ -42,7 +42,6 @@ class UlozTo(SimpleHoster): VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">' FREE_URL_PATTERN = r'<div class="freeDownloadForm"><form action="([^"]+)"' PREMIUM_URL_PATTERN = r'<div class="downloadForm"><form action="([^"]+)"' - CAPTCHA_PATTERN = r'<img class="captcha" src="(.*?(\d+).png)" alt="" />' def setup(self): self.multiDL = self.premium @@ -80,24 +79,38 @@ class UlozTo(SimpleHoster): self.parseError("free download form") # get and decrypt captcha + captcha_id_field = captcha_text_field = None + captcha_id = captcha_text = None + + for key in inputs.keys(): + found = re.match("captcha.*(id|text|value)", key) + if found: + if found.group(1) == "id": + captcha_id_field = key + else: + captcha_text_field = key + + if not captcha_id_field or not captcha_text_field: + self.parseError("CAPTCHA form changed") + + """ captcha_id = self.getStorage("captcha_id") captcha_text = self.getStorage("captcha_text") - captcha_url = "DUMMY" if not captcha_id or not captcha_text: - found = re.search(self.CAPTCHA_PATTERN, self.html) - if not found: self.parseError("CAPTCHA") - captcha_url, captcha_id = found.groups() - - captcha_text = self.decryptCaptcha(captcha_url) + """ + captcha_id = inputs[captcha_id_field] + captcha_text = self.decryptCaptcha("http://img.uloz.to/captcha/%s.png" % captcha_id) - self.log.debug('CAPTCHA_URL:' + captcha_url + ' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text) + self.log.debug(' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text) + """ self.setStorage("captcha_id", captcha_id) self.setStorage("captcha_text", captcha_text) + """ self.multiDL = True - inputs.update({"captcha[id]": captcha_id, "captcha[text]": captcha_text}) + inputs.update({captcha_id_field: captcha_id, captcha_text_field: captcha_text}) self.download("http://www.ulozto.net" + action, post=inputs, cookies=True) @@ -116,7 +129,7 @@ class UlozTo(SimpleHoster): def doCheckDownload(self): check = self.checkDownload({ - "wrong_captcha": re.compile(self.CAPTCHA_PATTERN), + "wrong_captcha": re.compile(r'<ul class="error">\s*<li>Error rewriting the text.</li>'), "offline": re.compile(self.FILE_OFFLINE_PATTERN), "passwd": self.PASSWD_PATTERN, "paralell_dl": "<title>Uloz.to - Již stahuješ</title>", |