diff options
Diffstat (limited to 'pyload/plugins/internal')
-rw-r--r-- | pyload/plugins/internal/AbstractExtractor.py | 93 | ||||
-rw-r--r-- | pyload/plugins/internal/CaptchaService.py | 77 | ||||
-rw-r--r-- | pyload/plugins/internal/DeadHoster.py | 18 | ||||
-rw-r--r-- | pyload/plugins/internal/NetloadInOCR.py | 27 | ||||
-rw-r--r-- | pyload/plugins/internal/OCR.py | 314 | ||||
-rw-r--r-- | pyload/plugins/internal/ShareonlineBizOCR.py | 53 | ||||
-rw-r--r-- | pyload/plugins/internal/SimpleCrypter.py | 68 | ||||
-rw-r--r-- | pyload/plugins/internal/SimpleHoster.py | 251 | ||||
-rw-r--r-- | pyload/plugins/internal/UnRar.py | 212 | ||||
-rw-r--r-- | pyload/plugins/internal/UnZip.py | 49 | ||||
-rw-r--r-- | pyload/plugins/internal/XFSPAccount.py | 79 | ||||
-rw-r--r-- | pyload/plugins/internal/__init__.py | 0 |
12 files changed, 1241 insertions, 0 deletions
diff --git a/pyload/plugins/internal/AbstractExtractor.py b/pyload/plugins/internal/AbstractExtractor.py new file mode 100644 index 000000000..3cd635eff --- /dev/null +++ b/pyload/plugins/internal/AbstractExtractor.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +class ArchiveError(Exception): + pass + +class CRCError(Exception): + pass + +class WrongPassword(Exception): + pass + +class AbtractExtractor: + @staticmethod + def checkDeps(): + """ Check if system satisfies dependencies + :return: boolean + """ + return True + + @staticmethod + def getTargets(files_ids): + """ Filter suited targets from list of filename id tuple list + :param files_ids: List of file paths + :return: List of targets, id tuple list + """ + raise NotImplementedError + + + def __init__(self, m, file, out, fullpath, overwrite, renice): + """Initialize extractor for specific file + + :param m: ExtractArchive addon plugin + :param file: Absolute file path + :param out: Absolute path to destination directory + :param fullpath: Extract to fullpath + :param overwrite: Overwrite existing archives + :param renice: Renice value + """ + self.m = m + self.file = file + self.out = out + self.fullpath = fullpath + self.overwrite = overwrite + self.renice = renice + self.files = [] # Store extracted files here + + + def init(self): + """ Initialize additional data structures """ + pass + + + def checkArchive(self): + """Check if password is needed. Raise ArchiveError if integrity is + questionable. + + :return: boolean + :raises ArchiveError + """ + return False + + def checkPassword(self, password): + """ Check if the given password is/might be correct. + If it can not be decided at this point return true. + + :param password: + :return: boolean + """ + return True + + def extract(self, progress, password=None): + """Extract the archive. Raise specific errors in case of failure. + + :param progress: Progress function, call this to update status + :param password password to use + :raises WrongPassword + :raises CRCError + :raises ArchiveError + :return: + """ + raise NotImplementedError + + def getDeleteFiles(self): + """Return list of files to delete, do *not* delete them here. + + :return: List with paths of files to delete + """ + raise NotImplementedError + + def getExtractedFiles(self): + """Populate self.files at some point while extracting""" + return self.files
\ No newline at end of file diff --git a/pyload/plugins/internal/CaptchaService.py b/pyload/plugins/internal/CaptchaService.py new file mode 100644 index 000000000..b912436a7 --- /dev/null +++ b/pyload/plugins/internal/CaptchaService.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: zoidberg +""" + +import re + +class CaptchaService(): + __version__ = "0.02" + + def __init__(self, plugin): + self.plugin = plugin + +class ReCaptcha(): + def __init__(self, plugin): + self.plugin = plugin + + def challenge(self, id): + js = self.plugin.req.load("http://www.google.com/recaptcha/api/challenge", get={"k":id}, cookies=True) + + try: + challenge = re.search("challenge : '(.*?)',", js).group(1) + server = re.search("server : '(.*?)',", js).group(1) + except: + self.plugin.fail("recaptcha error") + result = self.result(server,challenge) + + return challenge, result + + def result(self, server, challenge): + return self.plugin.decryptCaptcha("%simage"%server, get={"c":challenge}, cookies=True, forceUser=True, imgtype="jpg") + +class AdsCaptcha(CaptchaService): + def challenge(self, src): + js = self.plugin.req.load(src, cookies=True) + + try: + challenge = re.search("challenge: '(.*?)',", js).group(1) + server = re.search("server: '(.*?)',", js).group(1) + except: + self.plugin.fail("adscaptcha error") + result = self.result(server,challenge) + + return challenge, result + + def result(self, server, challenge): + return self.plugin.decryptCaptcha("%sChallenge.aspx" % server, get={"cid": challenge, "dummy": random()}, cookies=True, imgtype="jpg") + +class SolveMedia(CaptchaService): + def __init__(self,plugin): + self.plugin = plugin + + def challenge(self, src): + html = self.plugin.req.load("http://api.solvemedia.com/papi/challenge.noscript?k=%s" % src, cookies=True) + try: + challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="([^"]+)">', html).group(1) + except: + self.plugin.fail("solvmedia error") + result = self.result(challenge) + + return challenge, result + + def result(self,challenge): + return self.plugin.decryptCaptcha("http://api.solvemedia.com/papi/media?c=%s" % challenge,imgtype="gif")
\ No newline at end of file diff --git a/pyload/plugins/internal/DeadHoster.py b/pyload/plugins/internal/DeadHoster.py new file mode 100644 index 000000000..e180e2384 --- /dev/null +++ b/pyload/plugins/internal/DeadHoster.py @@ -0,0 +1,18 @@ +from module.plugins.Hoster import Hoster as _Hoster + +def create_getInfo(plugin): + def getInfo(urls): + yield [('#N/A: ' + url, 0, 1, url) for url in urls] + return getInfo + +class DeadHoster(_Hoster): + __name__ = "DeadHoster" + __type__ = "hoster" + __pattern__ = r"" + __version__ = "0.11" + __description__ = """Hoster is no longer available""" + __author_name__ = ("zoidberg") + __author_mail__ = ("zoidberg@mujmail.cz") + + def setup(self): + self.fail("Hoster is no longer available")
\ No newline at end of file diff --git a/pyload/plugins/internal/NetloadInOCR.py b/pyload/plugins/internal/NetloadInOCR.py new file mode 100644 index 000000000..e50978701 --- /dev/null +++ b/pyload/plugins/internal/NetloadInOCR.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +from OCR import OCR + +class NetloadInOCR(OCR): + __version__ = 0.1 + + def __init__(self): + OCR.__init__(self) + + def get_captcha(self, image): + self.load_image(image) + self.to_greyscale() + self.clean(3) + self.clean(3) + self.run_tesser(True, True, False, False) + + self.result_captcha = self.result_captcha.replace(" ", "")[:4] # cut to 4 numbers + + return self.result_captcha + +if __name__ == '__main__': + import urllib + ocr = NetloadInOCR() + urllib.urlretrieve("http://netload.in/share/includes/captcha.php", "captcha.png") + + print ocr.get_captcha('captcha.png') diff --git a/pyload/plugins/internal/OCR.py b/pyload/plugins/internal/OCR.py new file mode 100644 index 000000000..9f8b7ef8c --- /dev/null +++ b/pyload/plugins/internal/OCR.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### +from __future__ import with_statement +import os +from os.path import join +from os.path import abspath +import logging +import subprocess +#import tempfile + +import Image +import TiffImagePlugin +import PngImagePlugin +import GifImagePlugin +import JpegImagePlugin + + +class OCR(object): + __version__ = 0.1 + + def __init__(self): + self.logger = logging.getLogger("log") + + def load_image(self, image): + self.image = Image.open(image) + self.pixels = self.image.load() + self.result_captcha = '' + + def unload(self): + """delete all tmp images""" + pass + + def threshold(self, value): + self.image = self.image.point(lambda a: a * value + 10) + + def run(self, command): + """Run a command""" + + popen = subprocess.Popen(command, bufsize = -1, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + popen.wait() + output = popen.stdout.read() +" | "+ popen.stderr.read() + popen.stdout.close() + popen.stderr.close() + self.logger.debug("Tesseract ReturnCode %s Output: %s" % (popen.returncode, output)) + + def run_tesser(self, subset=False, digits=True, lowercase=True, uppercase=True): + #self.logger.debug("create tmp tif") + + + #tmp = tempfile.NamedTemporaryFile(suffix=".tif") + tmp = open(join("tmp", "tmpTif_%s.tif" % self.__name__), "wb") + tmp.close() + #self.logger.debug("create tmp txt") + #tmpTxt = tempfile.NamedTemporaryFile(suffix=".txt") + tmpTxt = open(join("tmp", "tmpTxt_%s.txt" % self.__name__), "wb") + tmpTxt.close() + + self.logger.debug("save tiff") + self.image.save(tmp.name, 'TIFF') + + if os.name == "nt": + tessparams = [join(pypath,"tesseract","tesseract.exe")] + else: + tessparams = ['tesseract'] + + tessparams.extend( [abspath(tmp.name), abspath(tmpTxt.name).replace(".txt", "")] ) + + if subset and (digits or lowercase or uppercase): + #self.logger.debug("create temp subset config") + #tmpSub = tempfile.NamedTemporaryFile(suffix=".subset") + tmpSub = open(join("tmp", "tmpSub_%s.subset" % self.__name__), "wb") + tmpSub.write("tessedit_char_whitelist ") + if digits: + tmpSub.write("0123456789") + if lowercase: + tmpSub.write("abcdefghijklmnopqrstuvwxyz") + if uppercase: + tmpSub.write("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + tmpSub.write("\n") + tessparams.append("nobatch") + tessparams.append(abspath(tmpSub.name)) + tmpSub.close() + + self.logger.debug("run tesseract") + self.run(tessparams) + self.logger.debug("read txt") + + try: + with open(tmpTxt.name, 'r') as f: + self.result_captcha = f.read().replace("\n", "") + except: + self.result_captcha = "" + + self.logger.debug(self.result_captcha) + try: + os.remove(tmp.name) + os.remove(tmpTxt.name) + if subset and (digits or lowercase or uppercase): + os.remove(tmpSub.name) + except: + pass + + def get_captcha(self, name): + raise NotImplementedError + + def to_greyscale(self): + if self.image.mode != 'L': + self.image = self.image.convert('L') + + self.pixels = self.image.load() + + def eval_black_white(self, limit): + self.pixels = self.image.load() + w, h = self.image.size + for x in xrange(w): + for y in xrange(h): + if self.pixels[x, y] > limit: + self.pixels[x, y] = 255 + else: + self.pixels[x, y] = 0 + + def clean(self, allowed): + pixels = self.pixels + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 255: continue + # no point in processing white pixels since we only want to remove black pixel + count = 0 + + try: + if pixels[x-1, y-1] != 255: count += 1 + if pixels[x-1, y] != 255: count += 1 + if pixels[x-1, y + 1] != 255: count += 1 + if pixels[x, y + 1] != 255: count += 1 + if pixels[x + 1, y + 1] != 255: count += 1 + if pixels[x + 1, y] != 255: count += 1 + if pixels[x + 1, y-1] != 255: count += 1 + if pixels[x, y-1] != 255: count += 1 + except: + pass + + # not enough neighbors are dark pixels so mark this pixel + # to be changed to white + if count < allowed: + pixels[x, y] = 1 + + # second pass: this time set all 1's to 255 (white) + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 1: pixels[x, y] = 255 + + self.pixels = pixels + + def derotate_by_average(self): + """rotate by checking each angle and guess most suitable""" + + w, h = self.image.size + pixels = self.pixels + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 155 + + highest = {} + counts = {} + + for angle in range(-45, 45): + + tmpimage = self.image.rotate(angle) + + pixels = tmpimage.load() + + w, h = self.image.size + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + + count = {} + + for x in xrange(w): + count[x] = 0 + for y in xrange(h): + if pixels[x, y] == 155: + count[x] += 1 + + sum = 0 + cnt = 0 + + for x in count.values(): + if x != 0: + sum += x + cnt += 1 + + avg = sum / cnt + counts[angle] = cnt + highest[angle] = 0 + for x in count.values(): + if x > highest[angle]: + highest[angle] = x + + highest[angle] = highest[angle] - avg + + hkey = 0 + hvalue = 0 + + for key, value in highest.iteritems(): + if value > hvalue: + hkey = key + hvalue = value + + self.image = self.image.rotate(hkey) + pixels = self.image.load() + + for x in xrange(w): + for y in xrange(h): + if pixels[x, y] == 0: + pixels[x, y] = 255 + + if pixels[x, y] == 155: + pixels[x, y] = 0 + + self.pixels = pixels + + def split_captcha_letters(self): + captcha = self.image + started = False + letters = [] + width, height = captcha.size + bottomY, topY = 0, height + pixels = captcha.load() + + for x in xrange(width): + black_pixel_in_col = False + for y in xrange(height): + if pixels[x, y] != 255: + if not started: + started = True + firstX = x + lastX = x + + if y > bottomY: bottomY = y + if y < topY: topY = y + if x > lastX: lastX = x + + black_pixel_in_col = True + + if black_pixel_in_col == False and started == True: + rect = (firstX, topY, lastX, bottomY) + new_captcha = captcha.crop(rect) + + w, h = new_captcha.size + if w > 5 and h > 5: + letters.append(new_captcha) + + started = False + bottomY, topY = 0, height + + return letters + + def correct(self, values, var=None): + + if var: + result = var + else: + result = self.result_captcha + + for key, item in values.iteritems(): + + if key.__class__ == str: + result = result.replace(key, item) + else: + for expr in key: + result = result.replace(expr, item) + + if var: + return result + else: + self.result_captcha = result + + +if __name__ == '__main__': + ocr = OCR() + ocr.load_image("B.jpg") + ocr.to_greyscale() + ocr.eval_black_white(140) + ocr.derotate_by_average() + ocr.run_tesser() + print "Tesseract", ocr.result_captcha + ocr.image.save("derotated.jpg") + diff --git a/pyload/plugins/internal/ShareonlineBizOCR.py b/pyload/plugins/internal/ShareonlineBizOCR.py new file mode 100644 index 000000000..c5c2e92e8 --- /dev/null +++ b/pyload/plugins/internal/ShareonlineBizOCR.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +#Copyright (C) 2009 kingzero, RaNaN +# +#This program is free software; you can redistribute it and/or modify +#it under the terms of the GNU General Public License as published by +#the Free Software Foundation; either version 3 of the License, +#or (at your option) any later version. +# +#This program is distributed in the hope that it will be useful, +#but WITHOUT ANY WARRANTY; without even the implied warranty of +#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +#See the GNU General Public License for more details. +# +#You should have received a copy of the GNU General Public License +# along with this program; if not, see <http://www.gnu.org/licenses/>. +# +### +from OCR import OCR + +class ShareonlineBizOCR(OCR): + __version__ = 0.1 + + def __init__(self): + OCR.__init__(self) + + def get_captcha(self, image): + self.load_image(image) + self.to_greyscale() + self.image = self.image.resize((160, 50)) + self.pixels = self.image.load() + self.threshold(1.85) + #self.eval_black_white(240) + #self.derotate_by_average() + + letters = self.split_captcha_letters() + + final = "" + for letter in letters: + self.image = letter + self.run_tesser(True, True, False, False) + final += self.result_captcha + + return final + + #tesseract at 60% + +if __name__ == '__main__': + import urllib + ocr = ShareonlineBizOCR() + urllib.urlretrieve("http://www.share-online.biz/captcha.php", "captcha.jpeg") + print ocr.get_captcha('captcha.jpeg') diff --git a/pyload/plugins/internal/SimpleCrypter.py b/pyload/plugins/internal/SimpleCrypter.py new file mode 100644 index 000000000..d935bf1da --- /dev/null +++ b/pyload/plugins/internal/SimpleCrypter.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: zoidberg +""" + +import re + +from module.plugins.Crypter import Crypter +from module.utils import html_unescape + + +class SimpleCrypter(Crypter): + __name__ = "SimpleCrypter" + __version__ = "0.04" + __pattern__ = None + __type__ = "crypter" + __description__ = """Base crypter plugin""" + __author_name__ = ("stickell", "zoidberg") + __author_mail__ = ("l.stickell@yahoo.it", "zoidberg@mujmail.cz") + """ + These patterns should be defined by each crypter: + + LINK_PATTERN: group(1) must be a download link + example: <div class="link"><a href="(http://speedload.org/\w+) + + TITLE_PATTERN: (optional) the group defined by 'title' should be the title + example: <title>Files of: (?P<title>[^<]+) folder</title> + """ + + def decrypt(self, pyfile): + self.html = self.load(pyfile.url, decode=True) + + package_name, folder_name = self.getPackageNameAndFolder() + + package_links = re.findall(self.LINK_PATTERN, self.html) + self.logDebug('Package has %d links' % len(package_links)) + + if package_links: + self.packages = [(package_name, package_links, folder_name)] + else: + self.fail('Could not extract any links') + + def getPackageNameAndFolder(self): + if hasattr(self, 'TITLE_PATTERN'): + m = re.search(self.TITLE_PATTERN, self.html) + if m: + name = folder = html_unescape(m.group('title').strip()) + self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder)) + return name, folder + + name = self.pyfile.package().name + folder = self.pyfile.package().folder + self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) + return name, folder diff --git a/pyload/plugins/internal/SimpleHoster.py b/pyload/plugins/internal/SimpleHoster.py new file mode 100644 index 000000000..7b1d7323a --- /dev/null +++ b/pyload/plugins/internal/SimpleHoster.py @@ -0,0 +1,251 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: zoidberg +""" +from urlparse import urlparse +import re +from time import time + +from module.plugins.Hoster import Hoster +from module.utils import html_unescape, fixup, parseFileSize +from module.network.RequestFactory import getURL +from module.network.CookieJar import CookieJar + +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + #self.logDebug(rf, rt, string) + return string + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) + +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + +def parseHtmlForm(attr_str, html, input_names=None): + for form in re.finditer(r"(?P<tag><form[^>]*%s[^>]*>)(?P<content>.*?)</?(form|body|html)[^>]*>" % attr_str, html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('tag')) + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('content'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if value is None: + inputs[name] = inputtag.group(3) or '' + else: + inputs[name] = value + + if isinstance(input_names, dict): + # check input attributes + for key, val in input_names.items(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break # attibute value does not match + else: + break # attibute name does not match + else: + return action, inputs # passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None # no matching form found + +def parseFileInfo(self, url = '', html = ''): + info = {"name" : url, "size" : 0, "status" : 3} + + if hasattr(self, "pyfile"): + url = self.pyfile.url + + if hasattr(self, "req") and self.req.http.code == '404': + info['status'] = 1 + else: + if not html and hasattr(self, "html"): html = self.html + if isinstance(self.SH_BROKEN_ENCODING, (str, unicode)): + html = unicode(html, self.SH_BROKEN_ENCODING) + if hasattr(self, "html"): self.html = html + + if hasattr(self, "FILE_OFFLINE_PATTERN") and re.search(self.FILE_OFFLINE_PATTERN, html): + # File offline + info['status'] = 1 + else: + online = False + try: + info.update(re.match(self.__pattern__, url).groupdict()) + except: + pass + + for pattern in ("FILE_INFO_PATTERN", "FILE_NAME_PATTERN", "FILE_SIZE_PATTERN"): + try: + info.update(re.search(getattr(self, pattern), html).groupdict()) + online = True + except AttributeError: + continue + + if online: + # File online, return name and size + info['status'] = 2 + if 'N' in info: + info['name'] = replace_patterns(info['N'], self.FILE_NAME_REPLACEMENTS) + if 'S' in info: + size = replace_patterns(info['S'] + info['U'] if 'U' in info else info['S'], self.FILE_SIZE_REPLACEMENTS) + info['size'] = parseFileSize(size) + elif isinstance(info['size'], (str, unicode)): + if 'units' in info: info['size'] += info['units'] + info['size'] = parseFileSize(info['size']) + + if hasattr(self, "file_info"): + self.file_info = info + + return info['name'], info['size'], info['status'], url + +def create_getInfo(plugin): + def getInfo(urls): + for url in urls: + cj = CookieJar(plugin.__name__) + if isinstance(plugin.SH_COOKIES, list): set_cookies(cj, plugin.SH_COOKIES) + file_info = parseFileInfo(plugin, url, getURL(replace_patterns(url, plugin.FILE_URL_REPLACEMENTS), \ + decode = not plugin.SH_BROKEN_ENCODING, cookies = cj)) + yield file_info + return getInfo + +def timestamp(): + return int(time()*1000) + +class PluginParseError(Exception): + def __init__(self, msg): + Exception.__init__(self) + self.value = 'Parse error (%s) - plugin may be out of date' % msg + def __str__(self): + return repr(self.value) + +class SimpleHoster(Hoster): + __name__ = "SimpleHoster" + __version__ = "0.28" + __pattern__ = None + __type__ = "hoster" + __description__ = """Base hoster plugin""" + __author_name__ = ("zoidberg") + __author_mail__ = ("zoidberg@mujmail.cz") + """ + These patterns should be defined by each hoster: + FILE_INFO_PATTERN = r'(?P<N>file_name) (?P<S>file_size) (?P<U>units)' + or FILE_NAME_PATTERN = r'(?P<N>file_name)' + and FILE_SIZE_PATTERN = r'(?P<S>file_size) (?P<U>units)' + FILE_OFFLINE_PATTERN = r'File (deleted|not found)' + TEMP_OFFLINE_PATTERN = r'Server maintenance' + """ + + FILE_SIZE_REPLACEMENTS = [] + FILE_NAME_REPLACEMENTS = [("&#?\w+;", fixup)] + FILE_URL_REPLACEMENTS = [] + + SH_BROKEN_ENCODING = False # Set to True or encoding name if encoding in http header is not correct + SH_COOKIES = True # or False or list of tuples [(domain, name, value)] + SH_CHECK_TRAFFIC = False # True = force check traffic left for a premium account + + def init(self): + self.file_info = {} + + def setup(self): + self.resumeDownload = self.multiDL = True if self.premium else False + if isinstance(self.SH_COOKIES, list): set_cookies(self.req.cj, self.SH_COOKIES) + + def process(self, pyfile): + pyfile.url = replace_patterns(pyfile.url, self.FILE_URL_REPLACEMENTS) + self.req.setOption("timeout", 120) + self.html = self.load(pyfile.url, decode = not self.SH_BROKEN_ENCODING, cookies = self.SH_COOKIES) + self.getFileInfo() + if self.premium and (not self.SH_CHECK_TRAFFIC or self.checkTrafficLeft()): + self.handlePremium() + else: + self.handleFree() + + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): + if type(url) == unicode: url = url.encode('utf8') + return Hoster.load(self, url=url, get=get, post=post, ref=ref, cookies=cookies, just_header=just_header, decode=decode) + + def getFileInfo(self): + self.logDebug("URL: %s" % self.pyfile.url) + if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html): + self.tempOffline() + + name, size, status = parseFileInfo(self)[:3] + + if status == 1: + self.offline() + elif status != 2: + self.logDebug(self.file_info) + self.parseError('File info') + + if name: + self.pyfile.name = name + else: + self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1]) + + if size: + self.pyfile.size = size + else: + self.logError("File size not parsed") + + self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size)) + return self.file_info + + def handleFree(self): + self.fail("Free download not implemented") + + def handlePremium(self): + self.fail("Premium download not implemented") + + def parseError(self, msg): + raise PluginParseError(msg) + + def longWait(self, wait_time = None, max_tries = 3): + if wait_time and isinstance(wait_time, (int, long, float)): + time_str = "%dh %dm" % divmod(wait_time / 60, 60) + else: + wait_time = 900 + time_str = "(unknown time)" + max_tries = 100 + + self.logInfo("Download limit reached, reconnect or wait %s" % time_str) + + self.setWait(wait_time, True) + self.wait() + self.retry(max_tries = max_tries, reason="Download limit reached") + + def parseHtmlForm(self, attr_str='', input_names=None): + return parseHtmlForm(attr_str, self.html, input_names) + + def checkTrafficLeft(self): + traffic = self.account.getAccountInfo(self.user, True)["trafficleft"] + if traffic == -1: + return True + size = self.pyfile.size / 1024 + self.logInfo("Filesize: %i KiB, Traffic left for user %s: %i KiB" % (size, self.user, traffic)) + return size <= traffic
\ No newline at end of file diff --git a/pyload/plugins/internal/UnRar.py b/pyload/plugins/internal/UnRar.py new file mode 100644 index 000000000..7becd663c --- /dev/null +++ b/pyload/plugins/internal/UnRar.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +import os +import re +from glob import glob +from subprocess import Popen, PIPE +from string import digits + +from module.utils.fs import save_join, decode, fs_encode +from module.plugins.internal.AbstractExtractor import AbtractExtractor, WrongPassword, ArchiveError, CRCError + +class UnRar(AbtractExtractor): + __name__ = "UnRar" + __version__ = "0.13" + + # there are some more uncovered rar formats + re_splitfile = re.compile(r"(.*)\.part(\d+)\.rar$", re.I) + re_partfiles = re.compile(r".*\.(rar|r[0-9]+)", re.I) + re_filelist = re.compile(r"(.+)\s+(\d+)\s+(\d+)\s+") + re_wrongpwd = re.compile("(Corrupt file or wrong password|password incorrect)", re.I) + CMD = "unrar" + + @staticmethod + def checkDeps(): + if os.name == "nt": + UnRar.CMD = save_join(pypath, "UnRAR.exe") + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + else: + try: + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + except OSError: + + #fallback to rar + UnRar.CMD = "rar" + p = Popen([UnRar.CMD], stdout=PIPE, stderr=PIPE) + p.communicate() + + return True + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if not file.endswith(".rar"): continue + + match = UnRar.re_splitfile.findall(file) + if match: + #only add first parts + if int(match[0][1]) == 1: + result.append((file, id)) + else: + result.append((file, id)) + + return result + + + def init(self): + self.passwordProtected = False + self.headerProtected = False #list files will not work without password + self.smallestFile = None #small file to test passwords + self.password = "" #save the correct password + + def checkArchive(self): + p = self.call_unrar("l", "-v", fs_encode(self.file)) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + self.passwordProtected = True + self.headerProtected = True + return True + + # output only used to check if passworded files are present + for name, size, packed in self.re_filelist.findall(out): + if name.startswith("*"): + self.passwordProtected = True + return True + + self.listContent() + if not self.files: + raise ArchiveError("Empty Archive") + + return False + + def checkPassword(self, password): + #at this point we can only verify header protected files + if self.headerProtected: + p = self.call_unrar("l", "-v", fs_encode(self.file), password=password) + out, err = p.communicate() + if self.re_wrongpwd.search(err): + return False + + return True + + + def extract(self, progress, password=None): + command = "x" if self.fullpath else "e" + + p = self.call_unrar(command, fs_encode(self.file), self.out, password=password) + renice(p.pid, self.renice) + + progress(0) + progressstring = "" + while True: + c = p.stdout.read(1) + # quit loop on eof + if not c: + break + # reading a percentage sign -> set progress and restart + if c == '%': + progress(int(progressstring)) + progressstring = "" + # not reading a digit -> therefore restart + elif c not in digits: + progressstring = "" + # add digit to progressstring + else: + progressstring = progressstring + c + progress(100) + + # retrieve stderr + err = p.stderr.read() + + if "CRC failed" in err and not password and not self.passwordProtected: + raise CRCError + elif "CRC failed" in err: + raise WrongPassword + if err.strip(): #raise error if anything is on stderr + raise ArchiveError(err.strip()) + if p.returncode: + raise ArchiveError("Process terminated") + + if not self.files: + self.password = password + self.listContent() + + + def getDeleteFiles(self): + if ".part" in self.file: + return glob(re.sub("(?<=\.part)([01]+)", "*", self.file, re.IGNORECASE)) + # get files which matches .r* and filter unsuited files out + parts = glob(re.sub(r"(?<=\.r)ar$", "*", self.file, re.IGNORECASE)) + return filter(lambda x: self.re_partfiles.match(x), parts) + + def listContent(self): + command = "vb" if self.fullpath else "lb" + p = self.call_unrar(command, "-v", fs_encode(self.file), password=self.password) + out, err = p.communicate() + + if "Cannot open" in err: + raise ArchiveError("Cannot open file") + + if err.strip(): # only log error at this point + self.m.logError(err.strip()) + + result = set() + + for f in decode(out).splitlines(): + f = f.strip() + result.add(save_join(self.out, f)) + + self.files = result + + + def call_unrar(self, command, *xargs, **kwargs): + args = [] + #overwrite flag + args.append("-o+") if self.overwrite else args.append("-o-") + + # assume yes on all queries + args.append("-y") + + #set a password + if "password" in kwargs and kwargs["password"]: + args.append("-p%s" % kwargs["password"]) + else: + args.append("-p-") + + + #NOTE: return codes are not reliable, some kind of threading, cleanup whatever issue + call = [self.CMD, command] + args + list(xargs) + self.m.logDebug(" ".join([decode(arg) for arg in call])) + + p = Popen(call, stdout=PIPE, stderr=PIPE) + + return p + + +def renice(pid, value): + if os.name != "nt" and value: + try: + Popen(["renice", str(value), str(pid)], stdout=PIPE, stderr=PIPE, bufsize=-1) + except: + print "Renice failed" diff --git a/pyload/plugins/internal/UnZip.py b/pyload/plugins/internal/UnZip.py new file mode 100644 index 000000000..9aa9ac75c --- /dev/null +++ b/pyload/plugins/internal/UnZip.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +import zipfile +import sys + +from module.plugins.internal.AbstractExtractor import AbtractExtractor + +class UnZip(AbtractExtractor): + __name__ = "UnZip" + __version__ = "0.1" + + @staticmethod + def checkDeps(): + return sys.version_info[:2] >= (2, 6) + + @staticmethod + def getTargets(files_ids): + result = [] + + for file, id in files_ids: + if file.endswith(".zip"): + result.append((file, id)) + + return result + + def extract(self, progress, password=None): + z = zipfile.ZipFile(self.file) + self.files = z.namelist() + z.extractall(self.out) + + def getDeleteFiles(self): + return [self.file]
\ No newline at end of file diff --git a/pyload/plugins/internal/XFSPAccount.py b/pyload/plugins/internal/XFSPAccount.py new file mode 100644 index 000000000..8333c7265 --- /dev/null +++ b/pyload/plugins/internal/XFSPAccount.py @@ -0,0 +1,79 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: zoidberg +""" + +import re +from time import mktime, strptime +from module.plugins.Account import Account +from module.plugins.internal.SimpleHoster import parseHtmlForm +from module.utils import parseFileSize + +class XFSPAccount(Account): + __name__ = "XFSPAccount" + __version__ = "0.05" + __type__ = "account" + __description__ = """XFileSharingPro account base""" + __author_name__ = ("zoidberg") + __author_mail__ = ("zoidberg@mujmail.cz") + + MAIN_PAGE = None + + VALID_UNTIL_PATTERN = r'>Premium.[Aa]ccount expire:</TD><TD><b>([^<]+)</b>' + TRAFFIC_LEFT_PATTERN = r'>Traffic available today:</TD><TD><b>([^<]+)</b>' + + def loadAccountInfo(self, user, req): + html = req.load(self.MAIN_PAGE + "?op=my_account", decode = True) + + validuntil = trafficleft = None + premium = True if '>Renew premium<' in html else False + + found = re.search(self.VALID_UNTIL_PATTERN, html) + if found: + premium = True + trafficleft = -1 + try: + self.logDebug(found.group(1)) + validuntil = mktime(strptime(found.group(1), "%d %B %Y")) + except Exception, e: + self.logError(e) + else: + found = re.search(self.TRAFFIC_LEFT_PATTERN, html) + if found: + trafficleft = found.group(1) + if "Unlimited" in trafficleft: + premium = True + else: + trafficleft = parseFileSize(trafficleft) / 1024 + + return ({"validuntil": validuntil, "trafficleft": trafficleft, "premium": premium}) + + def login(self, user, data, req): + html = req.load('%slogin.html' % self.MAIN_PAGE, decode = True) + + action, inputs = parseHtmlForm('name="FL"', html) + if not inputs: + inputs = {"op": "login", + "redirect": self.MAIN_PAGE} + + inputs.update({"login": user, + "password": data['password']}) + + html = req.load(self.MAIN_PAGE, post = inputs, decode = True) + + if 'Incorrect Login or Password' in html or '>Error<' in html: + self.wrongPassword()
\ No newline at end of file diff --git a/pyload/plugins/internal/__init__.py b/pyload/plugins/internal/__init__.py new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/pyload/plugins/internal/__init__.py |