diff options
author | zoidberg10 <zoidberg@mujmail.cz> | 2011-12-05 16:50:50 +0100 |
---|---|---|
committer | zoidberg10 <zoidberg@mujmail.cz> | 2011-12-05 16:50:50 +0100 |
commit | 81b2595cdbbca825f0ce553395acb88deffd4e99 (patch) | |
tree | 7d80d443244609ed80cf83a5426498747d5e735e | |
parent | fix filesystem encoding issues on windows and synology nas (diff) | |
download | pyload-81b2595cdbbca825f0ce553395acb88deffd4e99.tar.xz |
simplehoster - use parseFileSize
-rw-r--r-- | module/plugins/hoster/CrockoCom.py | 4 | ||||
-rw-r--r-- | module/plugins/hoster/CzshareCom.py | 13 | ||||
-rw-r--r-- | module/plugins/hoster/FourSharedCom.py | 6 | ||||
-rw-r--r-- | module/plugins/hoster/HellspyCz.py | 4 | ||||
-rw-r--r-- | module/plugins/hoster/IfolderRu.py | 7 | ||||
-rw-r--r-- | module/plugins/hoster/UploadboxCom.py | 4 | ||||
-rw-r--r-- | module/plugins/hoster/UploadedTo.py | 6 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 81 |
8 files changed, 64 insertions, 61 deletions
diff --git a/module/plugins/hoster/CrockoCom.py b/module/plugins/hoster/CrockoCom.py index 7eafa67ed..9598025ec 100644 --- a/module/plugins/hoster/CrockoCom.py +++ b/module/plugins/hoster/CrockoCom.py @@ -9,7 +9,7 @@ class CrockoCom(SimpleHoster): __name__ = "CrockoCom" __type__ = "hoster" __pattern__ = r"http://(www\.)?(crocko|easy-share).com/.*" - __version__ = "0.10" + __version__ = "0.11" __description__ = """Crocko Download Hoster""" __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") @@ -23,7 +23,7 @@ class CrockoCom(SimpleHoster): FORM_PATTERN = r'<form method="post" action="([^"]+)">(.*?)</form>' FORM_INPUT_PATTERN = r'<input[^>]* name="?([^" ]+)"? value="?([^" ]+)"?[^>]*>' - NAME_REPLACEMENTS = [(r'<[^>]*>', '')] + FILE_NAME_REPLACEMENTS = [(r'<[^>]*>', '')] def handleFree(self): if "You need Premium membership to download this file." in self.html: diff --git a/module/plugins/hoster/CzshareCom.py b/module/plugins/hoster/CzshareCom.py index 158fb0d1d..0ef9c267c 100644 --- a/module/plugins/hoster/CzshareCom.py +++ b/module/plugins/hoster/CzshareCom.py @@ -45,21 +45,20 @@ class CzshareCom(SimpleHoster): __name__ = "CzshareCom" __type__ = "hoster" __pattern__ = r"http://(\w*\.)*czshare\.(com|cz)/(\d+/|download.php\?).*" - __version__ = "0.85" + __version__ = "0.86" __description__ = """CZshare.com""" __author_name__ = ("zoidberg") - SIZE_REPLACEMENTS = [(',', '.'), (' ', '')] + FILE_NAME_PATTERN = r'<div class="tab" id="parameters">\s*<p>\s*Cel. n.zev: <a href=[^>]*>(?P<N>[^<]+)</a>' + FILE_SIZE_PATTERN = r'<div class="tab" id="category">(?:\s*<p>[^\n]*</p>)*\s*Velikost:\s*(?P<S>[0-9., ]+)(?P<U>[kKMG])i?B\s*</div>' + FILE_OFFLINE_PATTERN = r'<div class="header clearfix">\s*<h2 class="red">' + FILE_SIZE_REPLACEMENTS = [(' ', '')] + FREE_URL_PATTERN = r'<a href="([^"]+)" class="page-download">[^>]*alt="([^"]+)" /></a>' FREE_FORM_PATTERN = r'<form action="download.php" method="post">\s*<img src="captcha.php" id="captcha" />(.*?)</form>' PREMIUM_FORM_PATTERN = r'<form action="/profi_down.php" method="post">(.*?)</form>' FORM_INPUT_PATTERN = r'<input[^>]* name="([^"]+)" value="([^"]+)"[^>]*/>' - #FILE_OFFLINE_PATTERN = r'<h2 class="red">[^<]*[Ss]oubor (nenalezen|expiroval|je po.kozen)[^<]*<span> </span></h2>' - FILE_OFFLINE_PATTERN = r'<div class="header clearfix">\s*<h2 class="red">' MULTIDL_PATTERN = r"<p><font color='red'>Z[^<]*PROFI.</font></p>" - #FILE_NAME_PATTERN = r'<h1>([^<]+)<span> </span></h1>' - FILE_NAME_PATTERN = r'<div class="tab" id="parameters">\s*<p>\s*Cel. n.zev: <a href=[^>]*>(?P<N>[^<]+)</a>' - FILE_SIZE_PATTERN = r'<div class="tab" id="category">(?:\s*<p>[^\n]*</p>)*\s*Velikost:\s*(?P<S>[0-9., ]+)(?P<U>[kKMG])i?B\s*</div>' USER_CREDIT_PATTERN = r'<div class="credit">\s*kredit: <strong>([0-9., ]+)([kKMG]i?B)</strong>\s*</div><!-- .credit -->' def setup(self): diff --git a/module/plugins/hoster/FourSharedCom.py b/module/plugins/hoster/FourSharedCom.py index 5d10204a7..b1cc252e2 100644 --- a/module/plugins/hoster/FourSharedCom.py +++ b/module/plugins/hoster/FourSharedCom.py @@ -8,7 +8,7 @@ class FourSharedCom(SimpleHoster): __name__ = "FourSharedCom" __type__ = "hoster" __pattern__ = r"http://[\w\.]*?4shared(-china)?\.com/(account/)?(download|get|file|document|photo|video|audio)/.+?/.*" - __version__ = "0.23" + __version__ = "0.24" __description__ = """4Shared Download Hoster""" __author_name__ = ("jeix", "zoidberg") __author_mail__ = ("jeix@hasnomail.de", "zoidberg@mujmail.cz") @@ -16,10 +16,10 @@ class FourSharedCom(SimpleHoster): FILE_NAME_PATTERN = '<meta name="title" content="(?P<N>[^"]+)" />' FILE_SIZE_PATTERN = '<span title="Size: (?P<S>[0-9,.]+) (?P<U>[kKMG])i?B">' FILE_OFFLINE_PATTERN = 'The file link that you requested is not valid\.|This file was deleted.' + FILE_NAME_REPLACEMENTS = [(r"&#(\d+).", lambda m: unichr(int(m.group(1))))] + DOWNLOAD_BUTTON_PATTERN = '<a href="([^"]+)"\s*class="dbtn' DOWNLOAD_URL_PATTERN = "<div class=\"(?:dl|xxlarge bold)\">\s*<a href='([^']+)'" - - NAME_REPLACEMENTS = [(r"&#(\d+).", lambda m: unichr(int(m.group(1))))] def handleFree(self): found = re.search(self.DOWNLOAD_BUTTON_PATTERN, self.html) diff --git a/module/plugins/hoster/HellspyCz.py b/module/plugins/hoster/HellspyCz.py index 9a8817c54..a03e2bf21 100644 --- a/module/plugins/hoster/HellspyCz.py +++ b/module/plugins/hoster/HellspyCz.py @@ -23,14 +23,14 @@ class HellspyCz(SimpleHoster): __name__ = "HellspyCz" __type__ = "hoster" __pattern__ = r"http://(?:\w*\.)*hellspy\.(?:cz|com|sk|hu)(/\S+/\d+)/?.*" - __version__ = "0.23" + __version__ = "0.24" __description__ = """HellSpy.cz""" __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") FILE_INFO_PATTERN = '<span class="filesize right">(?P<S>[0-9.]+) <span>(?P<U>[kKMG])i?B</span></span>\s*<h1>(?P<N>[^<]+)</h1>' FILE_OFFLINE_PATTERN = r'<h2>(404 - Page|File) not found</h2>' - URL_REPLACEMENTS = [(r"http://(?:\w*\.)*hellspy\.(?:cz|com|sk|hu)(/\S+/\d+)/?.*", r"http://www.hellspy.com\1")] + FILE_URL_REPLACEMENTS = [(r"http://(?:\w*\.)*hellspy\.(?:cz|com|sk|hu)(/\S+/\d+)/?.*", r"http://www.hellspy.com\1")] CREDIT_LEFT_PATTERN = r'<strong>Credits: </strong>\s*(\d+)' DOWNLOAD_AGAIN_PATTERN = r'<a id="button-download-start"[^>]*title="You can download the file without deducting your credit.">' diff --git a/module/plugins/hoster/IfolderRu.py b/module/plugins/hoster/IfolderRu.py index 3177271c4..83b98ecc9 100644 --- a/module/plugins/hoster/IfolderRu.py +++ b/module/plugins/hoster/IfolderRu.py @@ -30,9 +30,11 @@ class IfolderRu(SimpleHoster): __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") - SIZE_UNITS = {u'Кб': 1, u'Мб': 2, u'Гб': 3} + FILE_SIZE_REPLACEMENTS = [(u'Кб', 'KB'), (u'Мб', 'MB'), (u'Гб', 'GB')] FILE_NAME_PATTERN = ur'(?:<div><span>)?Название:(?:</span>)? <b>(?P<N>[^<]+)</b><(?:/div|br)>' - FILE_SIZE_PATTERN = ur'(?:<div><span>)?Размер:(?:</span>)? <b>(?P<S>[0-9.]+) (?P<U>[^<]+)</b><(?:/div|br)>' + FILE_SIZE_PATTERN = ur'(?:<div><span>)?Размер:(?:</span>)? <b>(?P<S>[^<]+)</b><(?:/div|br)>' + FILE_OFFLINE_PATTERN = ur'<p>Файл номер <b>[^<]*</b> не найден !!!</p>' + SESSION_ID_PATTERN = r'<a href=(http://ints.ifolder.ru/ints/sponsor/\?bi=\d*&session=([^&]+)&u=[^>]+)>' FORM1_PATTERN = r'<form method=post name="form1" ID="Form1" style="margin-bottom:200px">(.*?)</form>' FORM_INPUT_PATTERN = r'<input[^>]* name="?([^" ]+)"? value="?([^" ]+)"?[^>]*>' @@ -40,7 +42,6 @@ class IfolderRu(SimpleHoster): HIDDEN_INPUT_PATTERN = r"var v = .*?name='([^']+)' value='1'" DOWNLOAD_LINK_PATTERN = r'<a id="download_file_href" href="([^"]+)"' WRONG_CAPTCHA_PATTERN = ur'<font color=Red>неверный код,<br>введите еще раз</font><br>' - FILE_OFFLINE_PATTERN = ur'<p>Файл номер <b>[^<]*</b> не найден !!!</p>' def setup(self): self.resumeDownload = self.multiDL = True if self.account else False diff --git a/module/plugins/hoster/UploadboxCom.py b/module/plugins/hoster/UploadboxCom.py index 584c64e77..0eb023cb2 100644 --- a/module/plugins/hoster/UploadboxCom.py +++ b/module/plugins/hoster/UploadboxCom.py @@ -31,7 +31,7 @@ class UploadboxCom(SimpleHoster): __name__ = "Uploadbox" __type__ = "hoster" __pattern__ = r"http://(?:www\.)?uploadbox\.com/files/([^/]+).*" - __version__ = "0.03" + __version__ = "0.04" __description__ = """UploadBox.com plugin - free only""" __author_name__ = ("zoidberg") __author_mail__ = ("zoidberg@mujmail.cz") @@ -39,7 +39,7 @@ class UploadboxCom(SimpleHoster): FILE_NAME_PATTERN = r'<p><span>File name:</span>\s*(?P<N>[^<]+)</p>' FILE_SIZE_PATTERN = r'<span>Size:</span>\s*(?P<S>[0-9.]+) (?P<U>[kKMG])i?B <span>' FILE_OFFLINE_PATTERN = r'<strong>File deleted from service</strong>' - NAME_REPLACEMENTS = [(r"(.*)", lambda m: unicode(m.group(1), 'koi8_r'))] + FILE_NAME_REPLACEMENTS = [(r"(.*)", lambda m: unicode(m.group(1), 'koi8_r'))] FREE_FORM_PATTERN = r'<form action="([^"]+)" method="post" id="free" name="free">(.*?)</form>' FORM_INPUT_PATTERN = r'<input[^>]* name="([^"]+)" value="([^"]+)" />' diff --git a/module/plugins/hoster/UploadedTo.py b/module/plugins/hoster/UploadedTo.py index 174c386a8..39483cf86 100644 --- a/module/plugins/hoster/UploadedTo.py +++ b/module/plugins/hoster/UploadedTo.py @@ -59,7 +59,7 @@ def parseFileInfo(self, url = '', html = ''): found = re.search(self.FILE_INFO_PATTERN, html) if found: name, fileid = html_unescape(found.group('N')), found.group('ID') - size = parseFileSize(found.group('S'), found.group('U')) + size = parseFileSize(found.group('S')) status = 2 return name, size, status, fileid @@ -84,12 +84,12 @@ class UploadedTo(Hoster): __name__ = "UploadedTo" __type__ = "hoster" __pattern__ = r"(http://[\w\.-]*?uploaded\.to/.*?(file/|\?id=|&id=)[\w]+/?)|(http://[\w\.]*?ul\.to/(\?id=|&id=)?[\w\-]+/.+)|(http://[\w\.]*?ul\.to/(\?id=|&id=)?[\w\-]+/?)" - __version__ = "0.52" + __version__ = "0.53" __description__ = """Uploaded.to Download Hoster""" __author_name__ = ("spoob", "mkaay") __author_mail__ = ("spoob@pyload.org", "mkaay@mkaay.de") - FILE_INFO_PATTERN = r'<a href="file/(?P<ID>\w+)" id="filename">(?P<N>[^<]+)</a> \s*<small[^>]*>(?P<S>[0-9,]+) (?P<U>[KMG])B</small>' + FILE_INFO_PATTERN = r'<a href="file/(?P<ID>\w+)" id="filename">(?P<N>[^<]+)</a> \s*<small[^>]*>(?P<S>[^<]+)</small>' FILE_OFFLINE_PATTERN = r'<small class="cL">Error: 404</small>' def setup(self): diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index c101cbf6d..4a03ec60a 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -20,7 +20,7 @@ from urlparse import urlparse from re import search, sub from module.plugins.Hoster import Hoster -from module.utils import html_unescape +from module.utils import html_unescape, parseFileSize from module.network.RequestFactory import getURL def reSub(string, ruleslist): @@ -31,40 +31,37 @@ def reSub(string, ruleslist): def parseFileInfo(self, url = '', html = ''): if not html and hasattr(self, "html"): html = self.html - name, size, status, found = '', 0, 3, 0 + info = {"name" : url, "size" : 0, "status" : 3} if hasattr(self, "FILE_OFFLINE_PATTERN") and search(self.FILE_OFFLINE_PATTERN, html): # File offline - status = 1 - elif hasattr(self, "FILE_INFO_PATTERN"): - found = search(self.FILE_INFO_PATTERN, html) - if found: - name, size, units = found.group('N'), found.group('S'), found.group('U') + info['status'] = 1 else: - if hasattr(self, "FILE_NAME_PATTERN"): - found = search(self.FILE_NAME_PATTERN, html) - if found: - name = found.group('N') - - if hasattr(self, "FILE_SIZE_PATTERN"): - found = search(self.FILE_SIZE_PATTERN, html) - if found: - size, units = found.group('S'), found.group('U') + for pattern in ("FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): + try: + info = dict(info, **search(getattr(self, pattern), html).groupdict()) + except AttributeError: + continue - if size: + if len(info) > 3: # File online, return name and size - size = float(reSub(size, self.SIZE_REPLACEMENTS)) * 1024 ** self.SIZE_UNITS[units] - status = 2 + info['status'] = 2 + if 'N' in info: info['name'] = reSub(info['N'], self.FILE_NAME_REPLACEMENTS) + if 'S' in info: + size = info['S'] + info['U'] if 'U' in info else info['S'] + print repr(size) + size = parseFileSize(reSub(size, self.FILE_SIZE_REPLACEMENTS)) + print repr(self.FILE_SIZE_REPLACEMENTS), repr(size) + info['size'] = size - name = reSub(name, self.NAME_REPLACEMENTS) if name else url - - return name, size, status, url + print info + return info def create_getInfo(plugin): def getInfo(urls): for url in urls: - file_info = parseFileInfo(plugin, url, getURL(reSub(url, plugin.URL_REPLACEMENTS), decode=True)) - yield file_info + file_info = parseFileInfo(plugin, url, getURL(reSub(url, plugin.FILE_URL_REPLACEMENTS), decode=True)) + yield file_info['name'], file_info['size'], file_info['status'], url return getInfo class PluginParseError(Exception): @@ -90,20 +87,18 @@ class SimpleHoster(Hoster): FILE_OFFLINE_PATTERN = r'File (deleted|not found)' TEMP_OFFLINE_PATTERN = r'Server maintainance' """ - #TODO: could be replaced when using utils.parseFileSize ? - #some plugins need to override these - SIZE_UNITS = {'k': 1, 'K': 1, 'M': 2, 'G': 3} - SIZE_REPLACEMENTS = [(',', ''), (' ', '')] - NAME_REPLACEMENTS = [] - URL_REPLACEMENTS = [] + + FILE_SIZE_REPLACEMENTS = [] + FILE_NAME_REPLACEMENTS = [] + FILE_URL_REPLACEMENTS = [] def setup(self): self.resumeDownload = self.multiDL = True if self.account else False def process(self, pyfile): - pyfile.url = reSub(pyfile.url, self.URL_REPLACEMENTS) + pyfile.url = reSub(pyfile.url, self.FILE_URL_REPLACEMENTS) self.html = self.load(pyfile.url, decode = True) - self.getFileInfo() + self.file_info = self.getFileInfo() if self.account: self.handlePremium() else: @@ -114,17 +109,25 @@ class SimpleHoster(Hoster): if hasattr(self, "TEMP_OFFLINE_PATTERN") and search(self.TEMP_OFFLINE_PATTERN, html): self.tempOffline() - name, size, status, url = parseFileInfo(self) - if status == 1: + file_info = parseFileInfo(self) + if file_info['status'] == 1: self.offline() - elif status != 2: + elif file_info['status'] != 2: + self.logDebug(file_info) self.parseError('File info') - if not name: - name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + if file_info['name']: + self.pyfile.name = file_info['name'] + else: + self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + + if file_info['size']: + self.pyfile.size = file_info['size'] + else: + self.logError("File size not parsed") - self.logDebug("FILE NAME: %s FILE SIZE: %s" % (name, size)) - self.pyfile.name, self.pyfile.size = name, size + self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) + return file_info def handleFree(self): self.fail("Free download not implemented") |