cz hoster - update search patterns

author: zoidberg10 <zoidberg@mujmail.cz> 2011-11-20 02:57:33 +0100
committer: zoidberg10 <zoidberg@mujmail.cz> 2011-11-20 02:57:33 +0100
commit: 26c349a9ade1b28b49bdcb4ef104551c0247a12a (patch)
tree: 9c1f966145ac1be857c99b9dc7754d408fbf9d9f /module
parent: disableble thriftbackend (diff)
download: pyload-26c349a9ade1b28b49bdcb4ef104551c0247a12a.tar.xz
4 files changed, 46 insertions, 90 deletions
diff --git a/module/plugins/hoster/CzshareCom.py b/module/plugins/hoster/CzshareCom.py
index 140aa9569..3684e5f53 100644
--- a/module/plugins/hoster/CzshareCom.py
+++ b/module/plugins/hoster/CzshareCom.py
@@ -17,7 +17,7 @@
 """
 
 import re
-from module.plugins.Hoster import Hoster
+from module.plugins.internal.SimpleHoster import SimpleHoster, parseFileInfo
 from module.network.RequestFactory import getURL
 
 def toInfoPage(url):
@@ -36,93 +36,55 @@ def getInfo(urls):
     for url in urls:
         info_url = toInfoPage(url)
         if info_url:
-            html = getURL(info_url, decode=True)
-            if re.search(CzshareCom.FILE_OFFLINE_PATTERN, html):
-                # File offline
-                result.append((url, 0, 1, url))
-            else:
-                # Get file info
-                name, size = url, 0
-
-                found = re.search(CzshareCom.FILE_SIZE_PATTERN, html)
-                if found is not None:
-                    size = float(found.group(1).replace(',','.').replace(' ',''))
-                    units = found.group(2)
-                    pow = {'KiB': 1, 'MiB': 2, 'GiB': 3}[units]
-                    size = int(size * 1024 ** pow)
-
-                found = re.search(CzshareCom.FILE_NAME_PATTERN, html)
-                if found is not None:
-                    name = found.group(1)
-
-                if found or size > 0:
-                    result.append((name, size, 2, url))
+            file_info = parseFileInfo(CzshareCom, url, getURL(info_url, decode=True)) 
+            result.append(file_info)
+            
     yield result
 
-class CzshareCom(Hoster):
+class CzshareCom(SimpleHoster):
     __name__ = "CzshareCom"
     __type__ = "hoster"
     __pattern__ = r"http://(\w*\.)*czshare\.(com|cz)/(\d+/|download.php\?).*"
-    __version__ = "0.8a"
+    __version__ = "0.83"
     __description__ = """CZshare.com"""
     __author_name__ = ("zoidberg")
 
+    SIZE_REPLACEMENTS = {',': '.', ' ': ''}
     FREE_URL_PATTERN = r'<a href="([^"]+)" class="page-download">[^>]*alt="([^"]+)" /></a>'
     FREE_FORM_PATTERN = r'<form action="download.php" method="post">\s*<img src="captcha.php" id="captcha" />(.*?)</form>'
     PREMIUM_FORM_PATTERN = r'<form action="/profi_down.php" method="post">(.*?)</form>'
     FORM_INPUT_PATTERN = r'<input[^>]* name="([^"]+)" value="([^"]+)"[^>]*/>'
-    FILE_OFFLINE_PATTERN = r'<h2 class="red">[^<]*[Ss]oubor (nenalezen|expiroval|je po.kozen|byl smaz.n)[^<]*<span>&nbsp;</span></h2>'
+    #FILE_OFFLINE_PATTERN = r'<h2 class="red">[^<]*[Ss]oubor (nenalezen|expiroval|je po.kozen)[^<]*<span>&nbsp;</span></h2>'
+    FILE_OFFLINE_PATTERN = r'<div class="header clearfix">\s*<h2 class="red">'
     MULTIDL_PATTERN = r"<p><font color='red'>Z[^<]*PROFI.</font></p>"
     #FILE_NAME_PATTERN = r'<h1>([^<]+)<span>&nbsp;</span></h1>'
     FILE_NAME_PATTERN = r'<div class="tab" id="parameters">\s*<p>\s*Cel. n.zev: <a href=[^>]*>([^<]+)</a>'
-    FILE_SIZE_PATTERN = r'<div class="tab" id="category">(?:\s*<p>[^\n]*</p>)*\s*Velikost:\s*([0-9., ]+)(KiB|MiB|GiB)\s*</div>'
-    USER_CREDIT_PATTERN = r'<div class="credit">\s*kredit: <strong>([0-9., ]+)(KB|MB|GB)</strong>\s*</div><!-- .credit -->'
+    FILE_SIZE_PATTERN = r'<div class="tab" id="category">(?:\s*<p>[^\n]*</p>)*\s*Velikost:\s*([0-9., ]+)([kKMG]i?B)\s*</div>'
+    USER_CREDIT_PATTERN = r'<div class="credit">\s*kredit: <strong>([0-9., ]+)([kKMG]i?B)</strong>\s*</div><!-- .credit -->'
 
     def setup(self):
         self.resumeDownload = self.multiDL = True if self.premium else False
         self.chunkLimit = 1
 
     def process(self, pyfile):
-        self.getFileInfo(pyfile)
-
-        if not self.account or not self.handlePremium(pyfile):
-            self.handleFree(pyfile)
-        self.checkDownloadedFile()
-
-    def getFileInfo(self, pyfile):
         url = toInfoPage(pyfile.url)
         if not url:
             self.logError(e)
             self.fail("Invalid URL")
 
         self.html = self.load(url, cookies=True, decode=True)
+        self.getFileInfo()
 
-        #marks the file as "offline" when the pattern was found on the html-page
-        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:
-            self.offline()
-
-        # parse the name from the site and set attribute in pyfile
-        found = re.search(self.FILE_NAME_PATTERN, self.html)
-        if found is None:
-           self.fail("Parse error (NAME)")
-        pyfile.name = found.group(1)
-        self.logDebug("NAME:" + pyfile.name)
-
-        found = re.search(self.FILE_SIZE_PATTERN, self.html)
-        if found is None:
-            self.logError("Parse error (SIZE)")
-        else:
-            size = float(found.group(1).replace(',','.').replace(' ',''))
-            pyfile.size = size * 1024 ** {'KiB': 1, 'MiB': 2, 'GiB': 3}[found.group(2)]
-
-        pyfile.url = url
+        if not self.account or not self.handlePremium():
+            self.handleFree()
+        self.checkDownloadedFile()
 
-    def handlePremium(self, pyfile):
+    def handlePremium(self):
         # check if user logged in
         found = re.search(self.USER_CREDIT_PATTERN, self.html)
         if not found:
             self.account.relogin(self.user)
-            self.html = self.load(pyfile.url, cookies=True, decode=True)
+            self.html = self.load(self.pyfile.url, cookies=True, decode=True)
             found = re.search(self.USER_CREDIT_PATTERN, self.html)
             if not found: return False
 
@@ -130,10 +92,10 @@ class CzshareCom(Hoster):
         try:
             credit = float(found.group(1).replace(',','.').replace(' ',''))
             credit = credit * 1024 ** {'KB': 0, 'MB': 1, 'GB': 2}[found.group(2)]
-            self.logInfo("Premium download for %i KiB of Credit" % (pyfile.size / 1024))
+            self.logInfo("Premium download for %i KiB of Credit" % (self.pyfile.size / 1024))
             self.logInfo("User %s has %i KiB left" % (self.user, credit))
-            if credit * 1024 < pyfile.size:
-                self.logInfo("Not enough credit to download file %s" % pyfile.name)
+            if credit * 1024 < self.pyfile.size:
+                self.logInfo("Not enough credit to download file %s" % self.pyfile.name)
                 self.resetAccount()
         except Exception, e:
             # let's continue and see what happens...
@@ -151,11 +113,11 @@ class CzshareCom(Hoster):
         self.download("http://czshare.com/profi_down.php", cookies=True, post=inputs)
         return True
 
-    def handleFree(self, pyfile):
+    def handleFree(self):
         # get free url
         found = re.search(self.FREE_URL_PATTERN, self.html)
         if found is None:
-           self.fail("Parse error (URL)")
+           raise PluginParseError('Free URL')
         parsed_url = "http://czshare.com" + found.group(1)
         self.logDebug("PARSED_URL:" + parsed_url)
 
@@ -169,10 +131,10 @@ class CzshareCom(Hoster):
         try:
             form = re.search(self.FREE_FORM_PATTERN, self.html, re.DOTALL).group(1)
             inputs = dict(re.findall(self.FORM_INPUT_PATTERN, form))
-            pyfile.size = int(inputs['size'])
+            self.pyfile.size = int(inputs['size'])
         except Exception, e:
             self.logError(e)
-            self.fail("Parse error (FORM)")
+            raise PluginParseError('Form')
 
         # get and decrypt captcha
         captcha_url = 'http://czshare.com/captcha.php'
diff --git a/module/plugins/hoster/HellshareCz.py b/module/plugins/hoster/HellshareCz.py
index d2f5c8e40..c969d3285 100644
--- a/module/plugins/hoster/HellshareCz.py
+++ b/module/plugins/hoster/HellshareCz.py
@@ -33,8 +33,8 @@ def getInfo(urls):
 class HellshareCz(SimpleHoster):
     __name__ = "HellshareCz"
     __type__ = "hoster"
-    __pattern__ = r"http://(?:.*\.)*hellshare\.(?:cz|com|sk|hu)/[^?]*/(\d+).*"
-    __version__ = "0.74"
+    __pattern__ = r"(http://(?:.*\.)*hellshare\.(?:cz|com|sk|hu)/[^?]*/\d+).*"
+    __version__ = "0.75"
     __description__ = """Hellshare.cz"""
     __author_name__ = ("zoidberg")
 
@@ -47,6 +47,7 @@ class HellshareCz(SimpleHoster):
     #FILE_CREDITS_PATTERN = r'<strong class="filesize">(\d+) MB</strong>'
     CREDIT_LEFT_PATTERN = r'<p>After downloading this file you will have (\d+) MB for future downloads.'
     DOWNLOAD_AGAIN_PATTERN = r'<p>This file you downloaded already and re-download is for free. </p>'
+    SHOW_WINDOW_PATTERN = r'<a href="([^?]+/(\d+)/\?do=(fileDownloadButton|relatedFileDownloadButton-\2)-showDownloadWindow)"'
 
     def setup(self):
         self.resumeDownload = self.multiDL = True if self.account else False
@@ -56,16 +57,15 @@ class HellshareCz(SimpleHoster):
         if self.account:
             self.account.relogin(self.user)
 
-        pyfile.url = re.search(r'([^?]*)', pyfile.url).group(1)
+        pyfile.url = re.search(self.__pattern__, pyfile.url).group(1)
         self.html = self.load(pyfile.url, decode = True)
         self.getFileInfo()
-
-        if "do=relatedFileDownloadButton" in self.html:
-            found = re.search(self.__pattern__, self.pyfile.url)
-            show_window = "relatedFileDownloadButton-%s-showDownloadWindow" % found.group(1)
-        else:
-            show_window = "fileDownloadButton-showDownloadWindow"
-        self.html = self.load(pyfile.url, get = {"do" : show_window}, decode=True)
+       
+        found = re.search(self.SHOW_WINDOW_PATTERN, self.html)
+        if not found: self.parseError('SHOW WINDOW')
+        url = found.group(1)        
+        self.logDebug("SHOW WINDOW: " + url)
+        self.html = self.load("http://download.hellshare.com" + url, decode=True)
 
         if self.account:
             self.handlePremium()
diff --git a/module/plugins/hoster/ShareRapidCom.py b/module/plugins/hoster/ShareRapidCom.py
index 17c981b61..ae8211ef7 100644
--- a/module/plugins/hoster/ShareRapidCom.py
+++ b/module/plugins/hoster/ShareRapidCom.py
@@ -27,13 +27,12 @@ class ShareRapidCom(SimpleHoster):
     __name__ = "ShareRapidCom"
     __type__ = "hoster"
     __pattern__ = r"http://(?:www\.)?((share(-?rapid\.(biz|com|cz|info|eu|net|org|pl|sk)|-(central|credit|free|net)\.cz|-ms\.net)|(s-?rapid|rapids)\.(cz|sk))|(e-stahuj|mediatack|premium-rapidshare|rapidshare-premium|qiuck)\.cz|kadzet\.com|stahuj-zdarma\.eu|strelci\.net|universal-share\.com)/(stahuj/.+)"
-    __version__ = "0.44"
+    __version__ = "0.45"
     __description__ = """Share-rapid.com plugin - premium only"""
     __author_name__ = ("MikyWoW", "zoidberg")
     __author_mail__ = ("MikyWoW@seznam.cz", "zoidberg@mujmail.cz")
 
-    FILE_NAME_PATTERN = r'<h3>([^<]+)</h3>'
-    FILE_NAME_INFO_PATTERN = r'<h1[^>]*><span[^>]*>([^<]+)</ br> </h1>'
+    FILE_NAME_PATTERN = r'(?:title="Stahnout"|<h3>)([^<]+)</(?:a|h3)>'
     FILE_SIZE_PATTERN = r'<td class="i">Velikost:</td>\s*<td class="h"><strong>\s*([0-9.]+) (kB|MB|GB)</strong></td>'
     DOWNLOAD_URL_PATTERN = r'<a href="([^"]+)" title="Stahnout">([^<]+)</a>'
     ERR_LOGIN_PATTERN = ur'<div class="error_div"><strong>Stahování je přístupné pouze přihlášeným uživatelům'
diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py
index ffb09a655..f1a08b3e9 100644
--- a/module/plugins/hoster/UlozTo.py
+++ b/module/plugins/hoster/UlozTo.py
@@ -17,24 +17,14 @@
 """
 
 import re
-from module.plugins.internal.SimpleHoster import SimpleHoster, parseFileInfo
+from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo
 from module.network.RequestFactory import getURL
 
-def getInfo(urls):
-    result = []
-
-    for url in urls:
-        file_info = parseFileInfo(UlozTo, url, getURL(url, decode=True))
-        print file_info 
-        result.append(file_info)
-
-    yield result
-
 class UlozTo(SimpleHoster):
     __name__ = "UlozTo"
     __type__ = "hoster"
     __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/.*"
-    __version__ = "0.73"
+    __version__ = "0.74"
     __description__ = """uloz.to"""
     __config__ = [("reuseCaptcha", "bool", "Reuse captcha", "True"),
         ("captchaUser", "str", "captcha_user", ""),
@@ -45,7 +35,7 @@ class UlozTo(SimpleHoster):
     FILE_NAME_PATTERN = r'<h2 class="nadpis" style="margin-left:196px;"><a href="[^"]+">([^<]+)</a></h2>'
     CAPTCHA_PATTERN = r'<img style=".*src="([^"]+)" alt="Captcha" class="captcha"'
     CAPTCHA_NB_PATTERN = r'<input class="captcha_nb" type="hidden" name="captcha_nb" value="([0-9]+)" >'
-    FILE_OFFLINE_PATTERN = r'href="http://www.ulozto.net/(neexistujici|smazano)/\?lg=en&amp;'
+    FILE_OFFLINE_PATTERN = r'http://www.uloz.to/(neexistujici|smazano|nenalezeno)'
     PASSWD_PATTERN = r'<input type="password" class="text" name="file_password" id="frmfilepasswordForm-file_password" />'
     LIVE_URL_PATTERN = r'<div id="flashplayer"[^>]*>\s*<a href="([^"]+)"'
     LIVE_NAME_PATTERN = r'<a share_url="[^&]*&amp;t=([^"]+)"'
@@ -57,8 +47,10 @@ class UlozTo(SimpleHoster):
 
     def process(self, pyfile):
         header = self.load(pyfile.url, just_header=True)
-        if "location" in header and "utm_source=old" in header['location']:
-            self.offline()
+        if "location" in header:
+            self.logDebug('LOCATION: ' + header['location'])
+            if "utm_source=old" in header['location'] or re.search(self.FILE_OFFLINE_PATTERN, header['location']):
+                self.offline()
     
         self.html = self.load(pyfile.url, decode=True)
         
@@ -140,4 +132,7 @@ class UlozTo(SimpleHoster):
             if reuse_captcha:
                 self.setConfig("captchaUser", captcha)
                 self.setConfig("captchaNb", captcha_nb)
+
+getInfo = create_getInfo(UlozTo)
+                
         
 \ No newline at end of file
author	zoidberg10 <zoidberg@mujmail.cz>	2011-11-20 02:57:33 +0100
committer	zoidberg10 <zoidberg@mujmail.cz>	2011-11-20 02:57:33 +0100
commit	26c349a9ade1b28b49bdcb4ef104551c0247a12a (patch)
tree	9c1f966145ac1be857c99b9dc7754d408fbf9d9f /module
parent	disableble thriftbackend (diff)
download	pyload-26c349a9ade1b28b49bdcb4ef104551c0247a12a.tar.xz