update uloz.to, cloudnator.com, checksum plugin

author: zoidberg10 <zoidberg@mujmail.cz> 2012-06-14 07:19:13 +0200
committer: zoidberg10 <zoidberg@mujmail.cz> 2012-06-14 07:19:13 +0200
commit: 3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2 (patch)
tree: a8ba960ec7c19391598d28294271365956cdc8d0 /module/plugins
parent: closed #605 (diff)
download: pyload-3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2.tar.xz
3 files changed, 150 insertions, 90 deletions
diff --git a/module/plugins/hooks/Checksum.py b/module/plugins/hooks/Checksum.py
index 89e8ec762..0c2751e7a 100644
--- a/module/plugins/hooks/Checksum.py
+++ b/module/plugins/hooks/Checksum.py
@@ -18,6 +18,7 @@
 """
 from __future__ import with_statement
 import hashlib, zlib
+from os.path import getsize, isfile
 
 from module.utils import save_join, fs_encode
 from module.plugins.Hook import Hook
@@ -48,13 +49,20 @@ def computeChecksum(local_file, algorithm):
 
 class Checksum(Hook):
     __name__ = "Checksum"
-    __version__ = "0.03"
-    __description__ = "Check downloaded file hash"
+    __version__ = "0.04"
+    __description__ = "Verify downloaded file size and checksum (enable in general preferences)"
     __config__ = [("activated", "bool", "Activated", True),
                   ("action", "fail;retry;nothing", "What to do if check fails?", "retry"),
                   ("max_tries", "int", "Number of retries", 2)]
     __author_name__ = ("zoidberg")
     __author_mail__ = ("zoidberg@mujmail.cz")
+    
+    def setup(self):    
+        self.algorithms = sorted(getattr(hashlib, "algorithms", ("md5", "sha1", "sha224", "sha256", "sha384", "sha512")), reverse = True)
+        self.algorithms.append(["crc32", "adler32"])
+        
+        if not self.config['general']['checksum']:
+            self.logInfo("Checksum validation is disabled in general configuration")                                  
              
     def downloadFinished(self, pyfile):
         """ 
@@ -62,25 +70,43 @@ class Checksum(Hook):
         pyfile.plugin.check_data should be a dictionary which can contain:
         a) if known, the exact filesize in bytes (e.g. "size": 123456789)
         b) hexadecimal hash string with algorithm name as key (e.g. "md5": "d76505d0869f9f928a17d42d66326307")    
-        """
+        """        
+        
         if hasattr(pyfile.plugin, "check_data") and (isinstance(pyfile.plugin.check_data, dict)):
+            data = pyfile.plugin.check_data        
+        elif hasattr(pyfile.plugin, "api_data") and (isinstance(pyfile.plugin.api_data, dict)):
+            data = pyfile.plugin.api_data
+        else:
+            return                    
+        
+        download_folder = self.config['general']['download_folder']
+        local_file = fs_encode(save_join(download_folder, pyfile.package().folder, pyfile.name))
+        
+        if not isfile(local_file):
+            self.checkFailed(pyfile, "File does not exist")  
+        
+        # validate file size
+        if "size" in data:
+            api_size = int(data['size'])
+            file_size = getsize(local_file)
+            if api_size != file_size:
+                self.logWarning("File %s has incorrect size: %d B (%d expected)" % (pyfile.name, file_size, api_size))
+                self.checkFailed(pyfile, "Incorrect file size")
+                
+        # validate checksum
+        if self.config['general']['checksum']:                                                       
+            if "checksum" in data:
+                data['md5'] = data['checksum']
             
-            download_folder = self.config['general']['download_folder']
-            local_file = fs_encode(save_join(download_folder, pyfile.package().folder, pyfile.name))
-            
-            for key, value in sorted(pyfile.plugin.check_data.items(), reverse = True):                          
-                if key == "size":
-                    if value and value != pyfile.size:
-                        self.logWarning("File %s has incorrect size: %d B (%d expected)" % (pyfile.size, value))
-                        self.checkFailed(pyfile, "Incorrect file size")
-                else:
+            for key in self.algorithms:
+                if key in data: 
                     checksum = computeChecksum(local_file, key.replace("-","").lower())                    
                     if checksum:
-                        if checksum == value:
-                            self.logInfo('File integrity of "%s" verified by %s checksum (%s).' % (pyfile.name, key.upper() , checksum))
+                        if checksum == data[key]:
+                            self.logInfo('File integrity of "%s" verified by %s checksum (%s).' % (pyfile.name, key.upper(), checksum))
                             return
                         else:
-                            self.logWarning("%s checksum for file %s does not match (%s != %s)" % (key.upper(), pyfile.name, checksum, value))    
+                            self.logWarning("%s checksum for file %s does not match (%s != %s)" % (key.upper(), pyfile.name, checksum, data[key]))    
                             self.checkFailed(pyfile, "Checksums do not match")
                     else:
                         self.logWarning("Unsupported hashing algorithm: %s" % key.upper())  
diff --git a/module/plugins/hoster/ShragleCom.py b/module/plugins/hoster/ShragleCom.py
index 9ebf4917b..8fe05a2b9 100644
--- a/module/plugins/hoster/ShragleCom.py
+++ b/module/plugins/hoster/ShragleCom.py
@@ -2,84 +2,105 @@
 # -*- coding: utf-8 -*-
 
 import re
-import time
+from pycurl import FOLLOWLOCATION
 
 from module.plugins.Hoster import Hoster
+from module.plugins.internal.SimpleHoster import parseHtmlForm
+from module.plugins.ReCaptcha import ReCaptcha
+from module.network.RequestFactory import getURL
+
+API_KEY = "078e5ca290d728fd874121030efb4a0d"
+
+def parseFileInfo(self, url):
+    file_id = re.match(self.__pattern__, url).group('ID')
+    
+    data = getURL(
+        "http://www.cloudnator.com/api.php?key=%s&action=getStatus&fileID=%s" % (API_KEY, file_id),
+        decode = True
+        ).split()
+    
+    if len(data) == 4:
+        name, size, md5, status = data
+        size = int(size)
+        
+        if hasattr(self, "check_data"):
+            self.checkdata = {"size": size, "md5": md5} 
+            
+        return name, size, 2 if status == "0" else 1, url
+    else:
+        return url, 0, 1, url
+
+def getInfo(urls):
+    for url in urls:
+        file_info = parseFileInfo(plugin, url)
+        yield file_info        
 
 class ShragleCom(Hoster):
     __name__ = "ShragleCom"
     __type__ = "hoster"
-    __pattern__ = r"http://(?:www.)?shragle.com/files/"
-    __version__ = "0.1"
-    __description__ = """Shragle Download PLugin"""
-    __author_name__ = ("RaNaN")
-    __author_mail__ = ("RaNaN@pyload.org")
+    __pattern__ = r"http://(?:www.)?(cloudnator|shragle).com/files/(?P<ID>.*?)/"
+    __version__ = "0.20"
+    __description__ = """Cloudnator.com (Shragle.com) Download PLugin"""
+    __author_name__ = ("RaNaN", "zoidberg")
+    __author_mail__ = ("RaNaN@pyload.org", "zoidberg@mujmail.cz")
 
     def setup(self):
         self.html = None
         self.multiDL = False
+        self.check_data = None
         
     def process(self, pyfile):
-        self.pyfile = pyfile
-        
-        if not self.file_exists():
+        #get file status and info
+        self.pyfile.name, self.pyfile.size, status = parseFileInfo(self, pyfile.url)[:3]
+        if status != 2:     
             self.offline()
-            
-        self.pyfile.name = self.get_file_name()
         
-        self.setWait(self.get_waiting_time())
-        self.wait()
+        self.handleFree()
         
-        self.proceed(self.get_file_url())
-
-    def get_waiting_time(self):
-        if self.html is None:
-            self.download_html()
-
-        timestring = re.search('\s*var\sdownloadWait\s=\s(\d*);', self.html)
-        if timestring: 
-            return int(timestring.group(1))
-        else:
-            return 10
-
-    def download_html(self):
+    def handleFree(self):
         self.html = self.load(self.pyfile.url)
-
-    def get_file_url(self):
-        """ returns the absolute downloadable filepath
-        """
-        if self.html is None:
-            self.download_html()
-
-        self.fileID = re.search(r'name="fileID"\svalue="(.*?)"', self.html).group(1)
-        self.dlSession = re.search(r'name="dlSession"\svalue="(.*?)"', self.html).group(1)
-        self.userID = re.search(r'name="userID"\svalue="(.*?)"', self.html).group(1)
-        self.password = re.search(r'name="password"\svalue="(.*?)"', self.html).group(1)
-        self.lang = re.search(r'name="lang"\svalue="(.*?)"', self.html).group(1)
-        return re.search(r'id="download"\saction="(.*?)"', self.html).group(1)
-
-    def get_file_name(self):
-        if self.html is None:
-            self.download_html()
-
-        #file_name_pattern = r'You want to download  \xc2\xbb<strong>(.*?)</strong>\xc2\xab'
-        file_name_pattern = r'<h2 class="colorgrey center" style="overflow:hidden;width:1000px;"> (.*)<br /><span style="font-size:12px;font-weight:normal; width:100px;"> ([\d\.]*) MB</span></h2>'
-        res = re.search(file_name_pattern, self.html)
-        if res:
-            return res.group(1)
-        else:
-            self.fail("filename cant be extracted")
-
-    def file_exists(self):
-        """ returns True or False
-        """
-        if self.html is None:
-            self.download_html()
-
-        if re.search(r"html", self.html) is None:
-            return False
+        
+        #get wait time
+        found = re.search('\s*var\sdownloadWait\s=\s(\d+);', self.html)
+        self.setWait(int(found.group(1)) if found else 30)
+        
+        #parse download form
+        action, inputs = parseHtmlForm('id="download', self.html)
+        
+        #solve captcha
+        found = re.search('recaptcha/api/(?:challenge|noscript)?k=(.+?)', self.html)
+        captcha_key = found.group(1) if found else "6LdEFb0SAAAAAAwM70vnYo2AkiVkCx-xmfniatHz"
+               
+        recaptcha = ReCaptcha(self)
+        
+        inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key)
+        self.wait()
+        
+        #validate
+        self.req.http.c.setopt(FOLLOWLOCATION, 0)
+        self.html = self.load(action, post = inputs)      
+        
+        found = re.search(r"Location\s*:\s*(\S*)", self.req.http.header, re.I)
+        if found:
+            self.correctCaptcha()
+            download_url = found.group(1)
         else:
-            return True
-
-    def proceed(self, url):
-        self.download(url, post={'fileID': self.fileID, 'dlSession': self.dlSession, 'userID': self.userID, 'password': self.password, 'lang': self.lang})
+            if "Sicherheitscode falsch" in self.html:
+                self.invalidCaptcha()
+                self.retry(max_tries = 5, reason = "Invalid captcha")
+            else:
+                self.fail("Invalid session")
+            
+        #download
+        self.req.http.c.setopt(FOLLOWLOCATION, 1)
+        self.download(download_url)
+        
+        check = self.checkDownload({
+            "ip_blocked": re.compile(r'<div class="error".*IP.*loading')
+            })
+        if check == "ip_blocked":
+            self.setWait(1800, True)
+            self.wait()
+            self.retry()
+            
+            
+\ No newline at end of file
diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py
index dc9f9a733..e4d9766d7 100644
--- a/module/plugins/hoster/UlozTo.py
+++ b/module/plugins/hoster/UlozTo.py
@@ -27,7 +27,7 @@ class UlozTo(SimpleHoster):
     __name__ = "UlozTo"
     __type__ = "hoster"
     __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\w+/[^/?]*)"
-    __version__ = "0.87"
+    __version__ = "0.88"
     __description__ = """uloz.to"""
     __author_name__ = ("zoidberg")
 
@@ -42,7 +42,6 @@ class UlozTo(SimpleHoster):
     VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">'
     FREE_URL_PATTERN = r'<div class="freeDownloadForm"><form action="([^"]+)"'
     PREMIUM_URL_PATTERN = r'<div class="downloadForm"><form action="([^"]+)"'
-    CAPTCHA_PATTERN = r'<img class="captcha" src="(.*?(\d+).png)" alt="" />'
 
     def setup(self):
         self.multiDL = self.premium 
@@ -80,24 +79,38 @@ class UlozTo(SimpleHoster):
             self.parseError("free download form") 
         
         # get and decrypt captcha
+        captcha_id_field = captcha_text_field = None
+        captcha_id = captcha_text = None
+        
+        for key in inputs.keys():            
+            found = re.match("captcha.*(id|text|value)", key)
+            if found:
+                if found.group(1) == "id":
+                    captcha_id_field = key
+                else:
+                    captcha_text_field = key
+                
+        if not captcha_id_field or not captcha_text_field:
+            self.parseError("CAPTCHA form changed")    
+        
+        """
         captcha_id = self.getStorage("captcha_id")
         captcha_text = self.getStorage("captcha_text")
-        captcha_url = "DUMMY"
 
         if not captcha_id or not captcha_text:
-            found = re.search(self.CAPTCHA_PATTERN, self.html)
-            if not found: self.parseError("CAPTCHA")
-            captcha_url, captcha_id = found.groups()
-
-            captcha_text = self.decryptCaptcha(captcha_url)
+        """
+        captcha_id = inputs[captcha_id_field]
+        captcha_text = self.decryptCaptcha("http://img.uloz.to/captcha/%s.png" % captcha_id)
 
-        self.log.debug('CAPTCHA_URL:' + captcha_url + ' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text)
+        self.log.debug(' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text)
         
+        """
         self.setStorage("captcha_id", captcha_id)
         self.setStorage("captcha_text", captcha_text)
+        """
         self.multiDL = True
 
-        inputs.update({"captcha[id]": captcha_id, "captcha[text]": captcha_text})
+        inputs.update({captcha_id_field: captcha_id, captcha_text_field: captcha_text})
         
         self.download("http://www.ulozto.net" + action, post=inputs, cookies=True)
 
@@ -116,7 +129,7 @@ class UlozTo(SimpleHoster):
 
     def doCheckDownload(self):
         check = self.checkDownload({
-            "wrong_captcha": re.compile(self.CAPTCHA_PATTERN),
+            "wrong_captcha": re.compile(r'<ul class="error">\s*<li>Error rewriting the text.</li>'),
             "offline": re.compile(self.FILE_OFFLINE_PATTERN),
             "passwd": self.PASSWD_PATTERN,
             "paralell_dl": "<title>Uloz.to - Již stahuješ</title>",
author	zoidberg10 <zoidberg@mujmail.cz>	2012-06-14 07:19:13 +0200
committer	zoidberg10 <zoidberg@mujmail.cz>	2012-06-14 07:19:13 +0200
commit	3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2 (patch)
tree	a8ba960ec7c19391598d28294271365956cdc8d0 /module/plugins
parent	closed #605 (diff)
download	pyload-3c41e8ce84ee9caaa9f5c21cba875fbd1fdc14a2.tar.xz