diff options
author | Gregy <gregy@gregy.cz> | 2010-08-08 22:59:38 +0200 |
---|---|---|
committer | Gregy <gregy@gregy.cz> | 2010-08-08 22:59:38 +0200 |
commit | cec729f7f196076c72310bbf4fc45cfbeaf1286b (patch) | |
tree | e834629997e4d33c0dc84a6a4ab82469652ebfd4 /module/plugins/hoster | |
parent | Multihome debug message (diff) | |
download | pyload-cec729f7f196076c72310bbf4fc45cfbeaf1286b.tar.xz |
NetloadIn partial rewrite (issues fix, implemented info prefetch)
Diffstat (limited to 'module/plugins/hoster')
-rw-r--r-- | module/plugins/hoster/NetloadIn.py | 228 |
1 files changed, 144 insertions, 84 deletions
diff --git a/module/plugins/hoster/NetloadIn.py b/module/plugins/hoster/NetloadIn.py index 2e88b98b4..059d6a8f8 100644 --- a/module/plugins/hoster/NetloadIn.py +++ b/module/plugins/hoster/NetloadIn.py @@ -1,15 +1,57 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os import re -import tempfile -from time import time from time import sleep -import hashlib from module.plugins.Hoster import Hoster -from module.plugins.Plugin import Plugin +from module.network.Request import getURL + +def getInfo(urls): + ## returns list of tupels (name, size (in bytes), status (see FileDatabase), url) + + + apiurl = "http://api.netload.in/info.php?auth=Zf9SnQh9WiReEsb18akjvQGqT0I830e8&bz=1&md5=1&file_id=" + id_regex = re.compile("http://.*netload\.in/(?:datei(.*?)(?:\.htm|/)|index.php?id=10&file_id=)") + urls_per_query = 80 + + iterations = len(urls)/urls_per_query + if len(urls)%urls_per_query > 0: + iterations = iterations +1 + + for i in range(iterations): + ids = "" + for url in urls[i*urls_per_query:(i+1)*urls_per_query]: + match = id_regex.search(url) + if match: + ids = ids + match.group(1) +";" + + api = getURL(apiurl+ids) + + if api == None or len(api) < 10: + print "Netload prefetch: failed " + return + if api.find("unknown_auth") >= 0: + print "Netload prefetch: Outdated auth code " + return + + result = [] + + counter = 0 + for r in api.split(): + try: + tmp = r.split(";") + try: + size = int(tmp[2]) + except: + size = 0 + result.append( (tmp[1], size, 2 if tmp[3] == "online" else 1, urls[(i*80)+counter]) ) + except: + print "Netload prefetch: Error while processing response: " + print r + counter = counter +1 + + yield result class NetloadIn(Hoster): __name__ = "NetloadIn" @@ -17,8 +59,9 @@ class NetloadIn(Hoster): __pattern__ = r"http://.*netload\.in/(?:datei(.*?)(?:\.htm|/)|index.php?id=10&file_id=)" __version__ = "0.2" __description__ = """Netload.in Download Hoster""" - __author_name__ = ("spoob", "RaNaN") - __author_mail__ = ("spoob@pyload.org", "ranan@pyload.org") + __config__ = [ ("dumpgen", "bool", "Generate debug page dumps on stdout", "False") ] + __author_name__ = ("spoob", "RaNaN", "Gregy") + __author_mail__ = ("spoob@pyload.org", "ranan@pyload.org", "gregy@gregy.cz") def setup(self): self.multiDL = False @@ -27,38 +70,33 @@ class NetloadIn(Hoster): self.req.canContinue = True def process(self, pyfile): - self.html = [None, None, None] self.url = pyfile.url self.prepare() self.pyfile.setStatus("downloading") self.proceed(self.url) - + + def getInfo(self): + self.log.debug("Netload: Info prefetch") + self.download_api_data() + if self.api_data and self.api_data["filename"]: + self.pyfile.name = self.api_data["filename"] + self.pyfile.sync() + def prepare(self): self.download_api_data() - if self.file_exists(): - self.pyfile.name = self.get_file_name() - if self.account: - self.log.debug("Netload: Use Premium Account") - return True - - for i in range(5): - if not self.download_html(): - self.setWait(5) - self.log.info(_("Netload: waiting %d minutes, because the file is currently not available." % self.get_wait_time())) - self.wait() - continue - - wait_time = self.get_wait_time() - self.setWait(wait_time) - self.log.debug(_("Netload: waiting %d seconds" % wait_time)) - self.wait() - - self.url = self.get_file_url() - return True + if self.api_data and self.api_data["filename"]: + self.pyfile.name = self.api_data["filename"] + + if self.account: + self.log.debug("Netload: Use Premium Account") + return True + if self.download_html(): + return True else: - self.offline() + self.fail("Failed") + return False def download_api_data(self): url = self.url @@ -67,94 +105,116 @@ class NetloadIn(Hoster): if match: apiurl = "http://netload.in/share/fileinfos2.php" src = self.load(apiurl, cookies=False, get={"file_id": match.group(1)}) + self.log.debug("Netload: APIDATA: "+src.strip()) self.api_data = {} if src == "unknown_server_data": self.api_data = False - self.html[0] = self.load(self.url, cookies=False) elif not src == "unknown file_data": + lines = src.split(";") self.api_data["exists"] = True self.api_data["fileid"] = lines[0] self.api_data["filename"] = lines[1] self.api_data["size"] = lines[2] #@TODO formatting? (ex: '2.07 KB') self.api_data["status"] = lines[3] - self.api_data["checksum"] = lines[4].strip() + if self.api_data["status"] == "online": + self.api_data["checksum"] = lines[4].strip() + else: + self.offline(); else: self.api_data["exists"] = False else: self.api_data = False self.html[0] = self.load(self.url, cookies=False) + def final_wait(self, page): + wait_time = self.get_wait_time(page) + self.setWait(wait_time) + self.log.debug(_("Netload: final wait %d seconds" % wait_time)) + self.wait() + self.url = self.get_file_url(page) + def download_html(self): - self.html[0] = self.load(self.url, cookies=True) - url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&.*&captcha=1)', self.html[0]).group(1).replace("amp;", "") - - m = re.search(r"countdown\((\d+),'change\(\)'\);", url_captcha_html) - if m: - wait_time = int(m.group(1)) - self.log.debug(_("Netload: waiting %d seconds." % wait_time)) - self.setWait(wait_time) - self.wait() - - for i in range(6): - self.html[1] = self.load(url_captcha_html, cookies=True) - if "Please retry again in a few minutes" in self.html[1]: - return False + self.log.debug("Netload: Entering download_html") + page = self.load(self.url, cookies=True) + captchawaited = False + for i in range(10): + self.log.debug(_("Netload: try number %d " % i)) + if self.getConf('dumpgen'): + print page + + if re.search(r"(We will prepare your download..)", page) != None: + self.log.debug("Netload: We will prepare your download") + self.final_wait(page); + return True + if re.search(r"(We had a reqeust with the IP)", page) != None: + wait = self.get_wait_time(page); + if wait == 0: + self.log.debug("Netload: Wait was 0 setting 30") + wait = 30 + self.log.info(_("Netload: waiting between downloads %d s." % wait)) + self.setWait(wait) + self.wait() + + link = re.search(r"You can download now your next file. <a href=\"(index.php\?id=10&.*)\" class=\"Orange_Link\">Click here for the download</a>", page) + if link != None: + self.log.debug("Netload: Using new link found on page") + page = self.load("http://netload.in/" + link.group(1).replace("amp;", "")) + else: + self.log.debug("Netload: No new link found, using old one") + page = self.load(self.url, cookies=True) + continue + + self.log.debug("Netload: Trying to find captcha") + + url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&.*&captcha=1)', page).group(1).replace("amp;", "") + page = self.load(url_captcha_html, cookies=True) + try: - captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', self.html[1]).group(1) + captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', page).group(1) except: - open("dump.html", "w").write(self.html[1]) - url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&.*&captcha=1)', self.html[1]).group(1).replace("amp;", "") - self.html[1] = self.load(url_captcha_html, cookies=True) - captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', self.html[1]).group(1) + open("dump.html", "w").write(page) + self.log.debug("Netload: Could not find captcha, try again from begining") + continue + + file_id = re.search('<input name="file_id" type="hidden" value="(.*)" />', page).group(1) + if not captchawaited: + wait = self.get_wait_time(page); + self.log.info(_("Netload: waiting for captcha %d s." % wait)) + self.setWait(wait) + self.wait() + captchawaited = True - file_id = re.search('<input name="file_id" type="hidden" value="(.*)" />', self.html[1]).group(1) - captcha = self.decryptCaptcha(captcha_url) - sleep(5) - - self.html[2] = self.load("http://netload.in/index.php?id=10", post={"file_id": file_id, "captcha_check": captcha}, cookies=True) + sleep(4) + page = self.load("http://netload.in/index.php?id=10", post={"file_id": file_id, "captcha_check": captcha}, cookies=True) - if re.search(r"(We will prepare your download..|We had a reqeust with the IP)", self.html[2]) != None: - return True + return False - self.fail("Captcha not decrypted") - def get_file_url(self): + def get_file_url(self, page): try: file_url_pattern = r"<a class=\"Orange_Link\" href=\"(http://.+)\" >Click here" - return re.search(file_url_pattern, self.html[2]).group(1) + attempt = re.search(file_url_pattern, page) + if attempt != None: + return attempt.group(1) + else: + self.log.debug("Netload: Backup try for final link") + file_url_pattern = r"<a href=\"(.+)\" class=\"Orange_Link\">Click here" + attempt = re.search(file_url_pattern, page) + return "http://netload.in/"+attempt.group(1); except: + self.log.debug("Netload: Getting final link failed") return None - def get_wait_time(self): - if re.search(r"We had a reqeust with the IP", self.html[2]): - wait_minutes = int(re.search(r"countdown\((.+),'change\(\)'\)", self.html[2]).group(1)) / 6000 - self.wantReconnect = True - return wait_minutes * 60 - - wait_seconds = int(re.search(r"countdown\((.+),'change\(\)'\)", self.html[2]).group(1)) / 100 + def get_wait_time(self, page): + wait_seconds = int(re.search(r"countdown\((.+),'change\(\)'\)", page).group(1)) / 100 return wait_seconds - def get_file_name(self): - if self.api_data and self.api_data["filename"]: - return self.api_data["filename"] - elif self.html[0]: - file_name_pattern = '\t\t\t(.+)<span style="color: #8d8d8d;">' - file_name_search = re.search(file_name_pattern, self.html[0]) - if file_name_search: - return file_name_search.group(1) - return self.url - - def file_exists(self): - if self.api_data and self.api_data["exists"]: - return self.api_data["exists"] - elif self.html[0] and re.search(r"The file has been deleted", self.html[0]) == None: - return True - return False def proceed(self, url): + self.log.debug("Netload: Downloading..") if self.account: self.req.load("http://netload.in/index.php", None, { "txtuser" : self.config['username'], "txtpass" : self.config['password'], "txtcheck" : "login", "txtlogin" : ""}, cookies=True) self.download(url, cookies=True) |