diff options
Diffstat (limited to 'module/plugins/internal/Plugin.py')
-rw-r--r-- | module/plugins/internal/Plugin.py | 608 |
1 files changed, 126 insertions, 482 deletions
diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py index da597ef42..0d4c3b165 100644 --- a/module/plugins/internal/Plugin.py +++ b/module/plugins/internal/Plugin.py @@ -1,65 +1,108 @@ # -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. +from __future__ import with_statement - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. +import os +import re +import urllib - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. +from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 +from module.utils import fs_encode, fs_decode, html_unescape, save_join as fs_join - @author: RaNaN, spoob, mkaay -""" -from time import time, sleep -from random import randint +def replace_patterns(string, ruleslist): + for r in ruleslist: + rf, rt = r + string = re.sub(rf, rt, string) + return string -import os -from os import remove, makedirs, chmod, stat -from os.path import exists, join -if os.name != "nt": - from os import chown - from pwd import getpwnam - from grp import getgrnam +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + domain, name, value = cookie + cj.setCookie(domain, name, value) -from itertools import islice -from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 -from module.utils import save_join as fs_join, save_path as safe_filename, fs_encode, fs_decode +def parseHtmlTagAttrValue(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parseHtmlForm(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.S | re.I): + inputs = {} + action = parseHtmlTagAttrValue("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.S | re.I): + name = parseHtmlTagAttrValue("name", inputtag.group(1)) + if name: + value = parseHtmlTagAttrValue("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or "" + else: + inputs[name] = value + + if input_names: + # check input attributes + for key, val in input_names.iteritems(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] == val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + break #: attibute value does not match + else: + break #: attibute name does not match + else: + return action, inputs #: passed attribute check + else: + # no attribute check + return action, inputs + + return {}, None #: no matching form found + def chunks(iterable, size): - it = iter(iterable) + it = iter(iterable) item = list(islice(it, size)) while item: yield item item = list(islice(it, size)) -class Base(object): - """ - A Base class with log/config/db methods *all* plugin types can use - """ +class Plugin(object): + __name__ = "Plugin" + __type__ = "hoster" + __version__ = "0.11" + + __pattern__ = r'^unmatchable$' + __config__ = [] #: [("name", "type", "desc", "default")] + + __description__ = """Base plugin""" + __license__ = "GPLv3" + __authors__ = [("RaNaN" , "RaNaN@pyload.org" ), + ("spoob" , "spoob@pyload.org" ), + ("mkaay" , "mkaay@mkaay.de" ), + ("Walter Purcaro", "vuolter@gmail.com")] + def __init__(self, core): - #: Core instance self.core = core + #: Provide information in dict here + self.info = {} + - #: Log functions def _log(self, level, args): log = getattr(self.core.log, level) - msg = " | ".join((fs_encode(a) if isinstance(a, unicode) else #@NOTE: `fs_encode` -> `encode` in 0.4.10 - str(a)).strip() for a in args if a) + msg = fs_encode(" | ".join((a if isinstance(a, basestring) else str(a)).strip() for a in args if a)) #@NOTE: `fs_encode` -> `encode` in 0.4.10 log("%(plugin)s%(id)s: %(msg)s" % {'plugin': self.__name__, - 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", - 'msg' : msg or _(level.upper() + " MARK")}) + 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", + 'msg' : msg or _(level.upper() + " MARK")}) def logDebug(self, *args): @@ -100,7 +143,7 @@ class Base(object): return self.setConfig(*args, **kwargs) - def getConfig(self, option, default=""): + def getConfig(self, option, default="", plugin=None): """ Returns config value for current plugin @@ -108,9 +151,10 @@ class Base(object): :return: """ try: - return self.core.config.getPlugin(self.__name__, option) + return self.core.config.getPlugin(plugin or self.__name__, option) except KeyError: + self.logWarning(_("Config option or plugin not found")) return default @@ -149,11 +193,11 @@ class Base(object): def fail(self, reason): """Fail and give reason""" - raise Fail(reason) + raise Fail(fs_encode(reason)) def error(self, reason="", type=_("Parse")): - if not reason and not type: + if not reason: type = _("Unknown") msg = _("%s error") % type.strip().capitalize() if type else _("Error") @@ -163,313 +207,73 @@ class Base(object): raise Fail(msg) -class Plugin(Base): - __name__ = "Plugin" - __type__ = "hoster" - __version__ = "0.10" + def fixurl(self, url): + return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip() - __pattern__ = r'^unmatchable$' - __config__ = [] #: [("name", "type", "desc", "default")] - __description__ = """Base plugin""" - __license__ = "GPLv3" - __authors__ = [("RaNaN", "RaNaN@pyload.org"), - ("spoob", "spoob@pyload.org"), - ("mkaay", "mkaay@mkaay.de" )] - - - def __init__(self, pyfile): - super(Plugin, self).__init__(pyfile.m.core) - - self.wantReconnect = False - #: enables simultaneous processing of multiple downloads - self.multiDL = True - self.limitDL = 0 - #: chunk limit - self.chunkLimit = 1 - self.resumeDownload = False - - #: time() + wait in seconds - self.waitUntil = 0 - self.waiting = False - - self.ocr = None #captcha reader instance - #: account handler instance, see :py:class:`Account` - self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) - - #: premium status - self.premium = False - #: username/login - self.user = None - - if self.account and not self.account.canUse(): self.account = None - if self.account: - self.user, data = self.account.selectAccount() - #: Browser instance, see `network.Browser` - self.req = self.account.getAccountRequest(self.user) - self.chunkLimit = -1 # chunk limit, -1 for unlimited - #: enables resume (will be ignored if server dont accept chunks) - self.resumeDownload = True - self.multiDL = True #every hoster with account should provide multiple downloads - #: premium status - self.premium = self.account.isPremium(self.user) - else: - self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) - - #: associated pyfile instance, see `PyFile` - self.pyfile = pyfile - self.thread = None # holds thread in future - - #: location where the last call to download was saved - self.lastDownload = "" - #: re match of the last call to `checkDownload` - self.lastCheck = None - #: js engine, see `JsEngine` - self.js = self.core.js - self.cTask = None #captcha task - - self.retries = 0 # amount of retries already made - self.html = None # some plugins store html code here - - self.init() - - def getChunkCount(self): - if self.chunkLimit <= 0: - return self.core.config["download"]["chunks"] - return min(self.core.config["download"]["chunks"], self.chunkLimit) - - def __call__(self): - return self.__name__ - - def init(self): - """initialize the plugin (in addition to `__init__`)""" - pass - - def setup(self): - """ setup for enviroment and other things, called before downloading (possibly more than one time)""" - pass - - def preprocessing(self, thread): - """ handles important things to do before starting """ - self.thread = thread - - if self.account: - self.account.checkLogin(self.user) - else: - self.req.clearCookies() - - self.setup() - - self.pyfile.setStatus("starting") - - return self.process(self.pyfile) - - - def process(self, pyfile): - """the 'main' method of every plugin, you **have to** overwrite it""" - raise NotImplementedError - - def resetAccount(self): - """ dont use account and retry download """ - self.account = None - self.req = self.core.requestFactory.getRequest(self.__name__) - self.retry() - - def checksum(self, local_file=None): - """ - return codes: - 0 - checksum ok - 1 - checksum wrong - 5 - can't get checksum - 10 - not implemented - 20 - unknown error - """ - #@TODO checksum check hook - - return True, 10 - - - def setWait(self, seconds, reconnect=False): - """Set a specific wait time later used with `wait` - - :param seconds: wait time in seconds - :param reconnect: True if a reconnect would avoid wait time + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, req=None): """ - if reconnect: - self.wantReconnect = True - self.pyfile.waitUntil = time() + int(seconds) - - def wait(self): - """ waits the time previously set """ - self.waiting = True - self.pyfile.setStatus("waiting") - - while self.pyfile.waitUntil > time(): - self.thread.m.reconnecting.wait(2) - - if self.pyfile.abort: raise Abort - if self.thread.m.reconnecting.isSet(): - self.waiting = False - self.wantReconnect = False - raise Reconnect - - self.waiting = False - self.pyfile.setStatus("starting") - - def offline(self): - """ fail and indicate file is offline """ - raise Fail("offline") - - def tempOffline(self): - """ fail and indicates file ist temporary offline, the core may take consequences """ - raise Fail("temp. offline") - - def skip(self, reason): - raise Skip(reason) - - def retry(self, max_tries=3, wait_time=1, reason=""): - """Retries and begin again from the beginning - - :param max_tries: number of maximum retries - :param wait_time: time to wait in seconds - :param reason: reason for retrying, will be passed to fail if max_tries reached - """ - if 0 < max_tries <= self.retries: - if not reason: reason = "Max retries reached" - raise Fail(reason) - - self.wantReconnect = False - self.setWait(wait_time) - self.wait() - - self.retries += 1 - raise Retry(reason) - - def invalidCaptcha(self): - if self.cTask: - self.cTask.invalid() - - def correctCaptcha(self): - if self.cTask: - self.cTask.correct() - - def decryptCaptcha(self, url, get={}, post={}, cookies=False, forceUser=False, imgtype='jpg', - result_type='textual'): - """ Loads a captcha and decrypts it with ocr, plugin, user input - - :param url: url of captcha image - :param get: get part for request - :param post: post part for request - :param cookies: True if cookies should be enabled - :param forceUser: if True, ocr is not used - :param imgtype: Type of the Image - :param result_type: 'textual' if text is written on the captcha\ - or 'positional' for captcha where the user have to click\ - on a specific region on the captcha - - :return: result of decrypting - """ - - img = self.load(url, get=get, post=post, cookies=cookies) - - id = ("%.2f" % time())[-6:].replace(".", "") - temp_file = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") - temp_file.write(img) - temp_file.close() - - has_plugin = self.__name__ in self.core.pluginManager.captchaPlugins - - if self.core.captcha: - Ocr = self.core.pluginManager.loadClass("captcha", self.__name__) - else: - Ocr = None - - if Ocr and not forceUser: - sleep(randint(3000, 5000) / 1000.0) - if self.pyfile.abort: raise Abort - - ocr = Ocr() - result = ocr.get_captcha(temp_file.name) - else: - captchaManager = self.core.captchaManager - task = captchaManager.newTask(img, imgtype, temp_file.name, result_type) - self.cTask = task - captchaManager.handleCaptcha(task) - - while task.isWaiting(): - if self.pyfile.abort: - captchaManager.removeTask(task) - raise Abort - sleep(1) - - captchaManager.removeTask(task) - - if task.error and has_plugin: #ignore default error message since the user could use OCR - self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) - elif task.error: - self.fail(task.error) - elif not task.result: - self.fail(_("No captcha result obtained in appropiate time by any of the plugins.")) - - result = task.result - self.core.log.debug("Received captcha result: %s" % str(result)) - - if not self.core.debug: - try: - remove(temp_file.name) - except: - pass - - return result - - - def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): - """Load content at url and returns it + Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: - :param just_header: if True only the header will be retrieved and returned as dict + :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ - if self.pyfile.abort: raise Abort - #utf8 vs decode -> please use decode attribute in all future plugins - if type(url) == unicode: url = str(url) + if hasattr(self, 'pyfile') and self.pyfile.abort: + self.abort() + + url = self.fixurl(url) - res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode) + if not url or not isinstance(url, basestring): + self.fail(_("No url given")) if self.core.debug: - from inspect import currentframe + self.logDebug("Load url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) + + if req is None: + if hasattr(self, "req"): + req = self.req + else: + req = self.core.requestFactory.getRequest(self.__name__) - frame = currentframe() - if not exists(join("tmp", self.__name__)): - makedirs(join("tmp", self.__name__)) + res = req.load(url, get, post, ref, cookies, just_header, True, bool(decode)) - f = open( - join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) - , "wb") - del frame # delete the frame or it wont be cleaned + if decode: + res = html_unescape(res) + if isinstance(decode, basestring): + res = res.decode(decode) + + if self.core.debug: + import inspect + + frame = inspect.currentframe() + framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: - tmp = res.encode("utf8") - except: - tmp = res + if not os.path.exists(os.path.join("tmp", self.__name__)): + os.makedirs(os.path.join("tmp", self.__name__)) - f.write(tmp) - f.close() + with open(framefile, "wb") as f: + del frame #: delete the frame or it wont be cleaned + f.write(res.encode('utf8')) + except IOError, e: + self.logError(e) if just_header: - #parse header - header = {"code": self.req.code} + # parse header + header = {"code": req.code} for line in res.splitlines(): line = line.strip() - if not line or ":" not in line: continue + if not line or ":" not in line: + continue key, none, value = line.partition(":") - key = key.lower().strip() + key = key.strip().lower() value = value.strip() if key in header: @@ -481,164 +285,4 @@ class Plugin(Base): header[key] = value res = header - return res - - def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=False): - """Downloads the content at url to download folder - - :param url: - :param get: - :param post: - :param ref: - :param cookies: - :param disposition: if True and server provides content-disposition header\ - the filename will be changed if needed - :return: The location where the file was saved - """ - - self.checkForSameFiles() - - self.pyfile.setStatus("downloading") - - download_folder = self.core.config['general']['download_folder'] - - location = fs_join(download_folder, self.pyfile.package().folder) - - if not exists(location): - makedirs(location, int(self.core.config["permission"]["folder"], 8)) - - if self.core.config["permission"]["change_dl"] and os.name != "nt": - try: - uid = getpwnam(self.core.config["permission"]["user"])[2] - gid = getgrnam(self.core.config["permission"]["group"])[2] - - chown(location, uid, gid) - except Exception, e: - self.core.log.warning(_("Setting User and Group failed: %s") % str(e)) - - # convert back to unicode - location = fs_decode(location) - name = safe_filename(self.pyfile.name) - - filename = join(location, name) - - self.core.hookManager.dispatchEvent("downloadStarts", self.pyfile, url, filename) - - try: - newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, - chunks=self.getChunkCount(), resume=self.resumeDownload, - progressNotify=self.pyfile.setProgress, disposition=disposition) - finally: - self.pyfile.size = self.req.size - - if disposition and newname and newname != name: #triple check, just to be sure - self.core.log.info("%(name)s saved as %(newname)s" % {"name": name, "newname": newname}) - self.pyfile.name = newname - filename = join(location, newname) - - fs_filename = fs_encode(filename) - - if self.core.config["permission"]["change_file"]: - chmod(fs_filename, int(self.core.config["permission"]["file"], 8)) - - if self.core.config["permission"]["change_dl"] and os.name != "nt": - try: - uid = getpwnam(self.core.config["permission"]["user"])[2] - gid = getgrnam(self.core.config["permission"]["group"])[2] - - chown(fs_filename, uid, gid) - except Exception, e: - self.core.log.warning(_("Setting User and Group failed: %s") % str(e)) - - self.lastDownload = filename - return self.lastDownload - - def checkDownload(self, rules, api_size=0, max_size=50000, delete=True, read_size=0): - """ checks the content of the last downloaded file, re match is saved to `lastCheck` - - :param rules: dict with names and rules to match (compiled regexp or strings) - :param api_size: expected file size - :param max_size: if the file is larger then it wont be checked - :param delete: delete if matched - :param read_size: amount of bytes to read from files larger then max_size - :return: dictionary key of the first rule that matched - """ - lastDownload = fs_encode(self.lastDownload) - if not exists(lastDownload): return None - - size = stat(lastDownload) - size = size.st_size - - if api_size and api_size <= size: return None - elif size > max_size and not read_size: return None - self.core.log.debug("Download Check triggered") - f = open(lastDownload, "rb") - content = f.read(read_size if read_size else -1) - f.close() - #produces encoding errors, better log to other file in the future? - #self.core.log.debug("Content: %s" % content) - for name, rule in rules.iteritems(): - if type(rule) in (str, unicode): - if rule in content: - if delete: - remove(lastDownload) - return name - elif hasattr(rule, "search"): - m = rule.search(content) - if m: - if delete: - remove(lastDownload) - self.lastCheck = m - return name - - - def getPassword(self): - """ get the password the user provided in the package""" - password = self.pyfile.package().password - if not password: return "" - return password - - - def checkForSameFiles(self, starting=False): - """ checks if same file was/is downloaded within same package - - :param starting: indicates that the current download is going to start - :raises Skip: - """ - - pack = self.pyfile.package() - - for pyfile in self.core.files.cache.values(): - if pyfile != self.pyfile and pyfile.name == self.pyfile.name and pyfile.package().folder == pack.folder: - if pyfile.status in (0, 12): #finished or downloading - self.skip(pyfile.pluginname) - elif pyfile.status in ( - 5, 7) and starting: #a download is waiting/starting and was appenrently started before - self.skip(pyfile.pluginname) - - download_folder = self.core.config['general']['download_folder'] - location = fs_join(download_folder, pack.folder, self.pyfile.name) - - if starting and self.core.config['download']['skip_existing'] and exists(location): - size = os.stat(location).st_size - if size >= self.pyfile.size: - self.skip("File exists") - - pyfile = self.core.db.findDuplicates(self.pyfile.id, self.pyfile.package().folder, self.pyfile.name) - if pyfile: - if exists(location): - self.skip(pyfile[0]) - - self.core.log.debug("File %s not skipped, because it does not exists" % self.pyfile.name) - - def clean(self): - """ clean everything and remove references """ - if hasattr(self, "pyfile"): - del self.pyfile - if hasattr(self, "req"): - self.req.close() - del self.req - if hasattr(self, "thread"): - del self.thread - if hasattr(self, "html"): - del self.html + return res
\ No newline at end of file |