NetloadIn partial rewrite (issues fix, implemented info prefetch)

author: Gregy <gregy@gregy.cz> 2010-08-08 22:59:38 +0200
committer: Gregy <gregy@gregy.cz> 2010-08-08 22:59:38 +0200
commit: cec729f7f196076c72310bbf4fc45cfbeaf1286b (patch)
tree: e834629997e4d33c0dc84a6a4ab82469652ebfd4 /module/plugins/hoster
parent: Multihome debug message (diff)
download: pyload-cec729f7f196076c72310bbf4fc45cfbeaf1286b.tar.xz
1 files changed, 144 insertions, 84 deletions
diff --git a/module/plugins/hoster/NetloadIn.py b/module/plugins/hoster/NetloadIn.py
index 2e88b98b4..059d6a8f8 100644
--- a/module/plugins/hoster/NetloadIn.py
+++ b/module/plugins/hoster/NetloadIn.py
@@ -1,15 +1,57 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import os
 import re
-import tempfile
-from time import time
 from time import sleep
-import hashlib
 
 from module.plugins.Hoster import Hoster
-from module.plugins.Plugin import Plugin
+from module.network.Request import getURL
+
+def getInfo(urls):
+ ##  returns list of tupels (name, size (in bytes), status (see FileDatabase), url)
+
+
+    apiurl = "http://api.netload.in/info.php?auth=Zf9SnQh9WiReEsb18akjvQGqT0I830e8&bz=1&md5=1&file_id="
+    id_regex = re.compile("http://.*netload\.in/(?:datei(.*?)(?:\.htm|/)|index.php?id=10&file_id=)")
+    urls_per_query = 80
+
+    iterations = len(urls)/urls_per_query
+    if len(urls)%urls_per_query > 0:
+        iterations = iterations +1
+
+    for i in range(iterations):
+        ids = ""
+        for url in urls[i*urls_per_query:(i+1)*urls_per_query]:
+            match = id_regex.search(url)
+            if match:
+                ids = ids + match.group(1) +";"
+
+        api = getURL(apiurl+ids)
+
+        if api == None or len(api) < 10:
+            print "Netload prefetch: failed "
+            return
+        if api.find("unknown_auth") >= 0:
+            print "Netload prefetch: Outdated auth code "
+            return
+
+        result = []
+
+        counter = 0
+        for r in api.split():
+            try:
+                tmp = r.split(";")
+                try:
+                    size = int(tmp[2])
+                except:
+                    size = 0
+                result.append( (tmp[1], size, 2 if tmp[3] == "online" else 1, urls[(i*80)+counter]) )
+            except:
+                print "Netload prefetch: Error while processing response: "
+                print r
+            counter = counter +1
+
+        yield result
 
 class NetloadIn(Hoster):
     __name__ = "NetloadIn"
@@ -17,8 +59,9 @@ class NetloadIn(Hoster):
     __pattern__ = r"http://.*netload\.in/(?:datei(.*?)(?:\.htm|/)|index.php?id=10&file_id=)"
     __version__ = "0.2"
     __description__ = """Netload.in Download Hoster"""
-    __author_name__ = ("spoob", "RaNaN")
-    __author_mail__ = ("spoob@pyload.org", "ranan@pyload.org")
+    __config__ = [ ("dumpgen", "bool", "Generate debug page dumps on stdout", "False") ]
+    __author_name__ = ("spoob", "RaNaN", "Gregy")
+    __author_mail__ = ("spoob@pyload.org", "ranan@pyload.org", "gregy@gregy.cz")
 
     def setup(self):
         self.multiDL = False
@@ -27,38 +70,33 @@ class NetloadIn(Hoster):
             self.req.canContinue = True
     
     def process(self, pyfile):
-        self.html = [None, None, None]
         self.url = pyfile.url
         self.prepare()
         self.pyfile.setStatus("downloading")
         self.proceed(self.url)
-    
+
+    def getInfo(self):
+        self.log.debug("Netload: Info prefetch")
+        self.download_api_data()
+        if self.api_data and self.api_data["filename"]:
+            self.pyfile.name = self.api_data["filename"]
+            self.pyfile.sync()
+
     def prepare(self):
         self.download_api_data()
-        if self.file_exists():
-            self.pyfile.name = self.get_file_name()
 
-            if self.account:
-                self.log.debug("Netload: Use Premium Account")
-                return True
-                    
-            for i in range(5):
-                if not self.download_html():
-                    self.setWait(5)     
-                    self.log.info(_("Netload: waiting %d minutes, because the file is currently not available." % self.get_wait_time()))
-                    self.wait()
-                    continue
-            
-                wait_time = self.get_wait_time()
-                self.setWait(wait_time)
-                self.log.debug(_("Netload: waiting %d seconds" % wait_time))
-                self.wait()
-                
-                self.url = self.get_file_url()
-                return True
+        if self.api_data and self.api_data["filename"]:
+            self.pyfile.name = self.api_data["filename"]
+
+        if self.account:
+            self.log.debug("Netload: Use Premium Account")
+            return True
 
+        if self.download_html():
+            return True
         else:
-            self.offline()
+            self.fail("Failed")
+            return False
             
     def download_api_data(self):
         url = self.url
@@ -67,94 +105,116 @@ class NetloadIn(Hoster):
         if match:
             apiurl = "http://netload.in/share/fileinfos2.php"
             src = self.load(apiurl, cookies=False, get={"file_id": match.group(1)})
+            self.log.debug("Netload: APIDATA: "+src.strip())
             self.api_data = {}
             if src == "unknown_server_data":
                 self.api_data = False
-                self.html[0] = self.load(self.url, cookies=False)
             elif not src == "unknown file_data":
+                
                 lines = src.split(";")
                 self.api_data["exists"] = True
                 self.api_data["fileid"] = lines[0]
                 self.api_data["filename"] = lines[1]
                 self.api_data["size"] = lines[2] #@TODO formatting? (ex: '2.07 KB')
                 self.api_data["status"] = lines[3]
-                self.api_data["checksum"] = lines[4].strip()
+                if self.api_data["status"] == "online":
+                    self.api_data["checksum"] = lines[4].strip()
+                else:
+                    self.offline();
             else:
                 self.api_data["exists"] = False
         else:
             self.api_data = False
             self.html[0] = self.load(self.url, cookies=False)
 
+    def final_wait(self, page):
+        wait_time = self.get_wait_time(page)
+        self.setWait(wait_time)
+        self.log.debug(_("Netload: final wait %d seconds" % wait_time))
+        self.wait()
+        self.url = self.get_file_url(page)
+
     def download_html(self):
-        self.html[0] = self.load(self.url, cookies=True)
-        url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&amp;.*&amp;captcha=1)', self.html[0]).group(1).replace("amp;", "")
-        
-        m = re.search(r"countdown\((\d+),'change\(\)'\);", url_captcha_html)
-        if m:
-            wait_time = int(m.group(1))
-            self.log.debug(_("Netload: waiting %d seconds." % wait_time))
-            self.setWait(wait_time)
-            self.wait()
-
-        for i in range(6):
-            self.html[1] = self.load(url_captcha_html, cookies=True)
-            if "Please retry again in a few minutes" in self.html[1]:
-                return False
+        self.log.debug("Netload: Entering download_html")
+        page = self.load(self.url, cookies=True)
+        captchawaited = False
+        for i in range(10):
+            self.log.debug(_("Netload: try number %d " % i))
+            if self.getConf('dumpgen'):
+                print page
+
+            if re.search(r"(We will prepare your download..)", page) != None:
+                self.log.debug("Netload: We will prepare your download")
+                self.final_wait(page);
+                return True
+            if re.search(r"(We had a reqeust with the IP)", page) != None:
+                wait = self.get_wait_time(page);
+                if wait == 0:
+                    self.log.debug("Netload: Wait was 0 setting 30")
+                    wait = 30
+                self.log.info(_("Netload: waiting between downloads %d s." % wait))
+                self.setWait(wait)
+                self.wait()
+
+                link = re.search(r"You can download now your next file. <a href=\"(index.php\?id=10&amp;.*)\" class=\"Orange_Link\">Click here for the download</a>", page)
+                if link != None:
+                    self.log.debug("Netload: Using new link found on page")
+                    page = self.load("http://netload.in/" + link.group(1).replace("amp;", ""))
+                else:
+                    self.log.debug("Netload: No new link found, using old one")
+                    page = self.load(self.url, cookies=True)
+                continue
+
             
+            self.log.debug("Netload: Trying to find captcha")
+
+            url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&amp;.*&amp;captcha=1)', page).group(1).replace("amp;", "")
+            page = self.load(url_captcha_html, cookies=True)
+
             try:
-                captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', self.html[1]).group(1)
+                captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', page).group(1)
             except:
-                open("dump.html", "w").write(self.html[1])
-                url_captcha_html = "http://netload.in/" + re.search('(index.php\?id=10&amp;.*&amp;captcha=1)', self.html[1]).group(1).replace("amp;", "")
-                self.html[1] = self.load(url_captcha_html, cookies=True)
-                captcha_url = "http://netload.in/" + re.search('(share/includes/captcha.php\?t=\d*)', self.html[1]).group(1)
+                open("dump.html", "w").write(page)
+                self.log.debug("Netload: Could not find captcha, try again from begining")
+                continue
+
+            file_id = re.search('<input name="file_id" type="hidden" value="(.*)" />', page).group(1)
+            if not captchawaited:
+                wait = self.get_wait_time(page);
+                self.log.info(_("Netload: waiting for captcha %d s." % wait))
+                self.setWait(wait)
+                self.wait()
+                captchawaited = True
 
-            file_id = re.search('<input name="file_id" type="hidden" value="(.*)" />', self.html[1]).group(1)
-            
             captcha = self.decryptCaptcha(captcha_url)
-            sleep(5)
-            
-            self.html[2] = self.load("http://netload.in/index.php?id=10", post={"file_id": file_id, "captcha_check": captcha}, cookies=True)
+            sleep(4)
+            page = self.load("http://netload.in/index.php?id=10", post={"file_id": file_id, "captcha_check": captcha}, cookies=True)
 
-            if re.search(r"(We will prepare your download..|We had a reqeust with the IP)", self.html[2]) != None:
-                return True
+        return False
         
-        self.fail("Captcha not decrypted")
 
-    def get_file_url(self):
+    def get_file_url(self, page):
         try:
             file_url_pattern = r"<a class=\"Orange_Link\" href=\"(http://.+)\" >Click here"
-            return re.search(file_url_pattern, self.html[2]).group(1)
+            attempt = re.search(file_url_pattern, page)
+            if attempt != None:
+                return attempt.group(1)
+            else:
+                self.log.debug("Netload: Backup try for final link")
+                file_url_pattern = r"<a href=\"(.+)\" class=\"Orange_Link\">Click here"
+                attempt = re.search(file_url_pattern, page)
+                return "http://netload.in/"+attempt.group(1);
         except:
+            self.log.debug("Netload: Getting final link failed")
             return None
 
-    def get_wait_time(self):
-        if re.search(r"We had a reqeust with the IP", self.html[2]):
-            wait_minutes = int(re.search(r"countdown\((.+),'change\(\)'\)", self.html[2]).group(1)) / 6000
-            self.wantReconnect = True
-            return wait_minutes * 60
-            
-        wait_seconds = int(re.search(r"countdown\((.+),'change\(\)'\)", self.html[2]).group(1)) / 100
+    def get_wait_time(self, page):
+        wait_seconds = int(re.search(r"countdown\((.+),'change\(\)'\)", page).group(1)) / 100
         return wait_seconds
         
-    def get_file_name(self):
-        if self.api_data and self.api_data["filename"]:
-            return self.api_data["filename"]
-        elif self.html[0]:
-            file_name_pattern = '\t\t\t(.+)<span style="color: #8d8d8d;">'
-            file_name_search = re.search(file_name_pattern, self.html[0])
-            if file_name_search:
-                return file_name_search.group(1)
-        return self.url
-
-    def file_exists(self):
-        if self.api_data and self.api_data["exists"]:
-            return self.api_data["exists"]
-        elif self.html[0] and re.search(r"The file has been deleted", self.html[0]) == None:
-            return True
-        return False
 
     def proceed(self, url):
+        self.log.debug("Netload: Downloading..")
         if self.account:
             self.req.load("http://netload.in/index.php", None, { "txtuser" : self.config['username'], "txtpass" : self.config['password'], "txtcheck" : "login", "txtlogin" : ""}, cookies=True)
         self.download(url, cookies=True)
author	Gregy <gregy@gregy.cz>	2010-08-08 22:59:38 +0200
committer	Gregy <gregy@gregy.cz>	2010-08-08 22:59:38 +0200
commit	cec729f7f196076c72310bbf4fc45cfbeaf1286b (patch)
tree	e834629997e4d33c0dc84a6a4ab82469652ebfd4 /module/plugins/hoster
parent	Multihome debug message (diff)
download	pyload-cec729f7f196076c72310bbf4fc45cfbeaf1286b.tar.xz