diff options
Diffstat (limited to 'module/plugins/Plugin.py')
-rw-r--r-- | module/plugins/Plugin.py | 133 |
1 files changed, 88 insertions, 45 deletions
diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py index 426a406a9..a8eca5003 100644 --- a/module/plugins/Plugin.py +++ b/module/plugins/Plugin.py @@ -14,8 +14,9 @@ if os.name != "nt": from itertools import islice from traceback import print_exc +from urlparse import urlparse -from module.utils import save_join, save_path, fs_encode, fs_decode +from module.utils import fs_decode, fs_encode, html_unescape, save_join, save_path def chunks(iterable, size): @@ -67,7 +68,8 @@ class Base(object): def logDebug(self, *args): - return self._log("debug", args) + if self.core.debug: + return self._log("debug", args) def logInfo(self, *args): @@ -149,7 +151,7 @@ class Plugin(Base): """ __name__ = "Plugin" __type__ = "hoster" - __version__ = "0.05" + __version__ = "0.06" __pattern__ = None __config__ = [] #: [("name", "type", "desc", "default")] @@ -216,6 +218,7 @@ class Plugin(Base): self.js = self.core.js self.cTask = None #captcha task + self.html = None #@TODO: Move to hoster class self.retries = 0 # amount of retries already made self.init() @@ -289,33 +292,56 @@ class Plugin(Base): :param seconds: wait time in seconds :param reconnect: True if a reconnect would avoid wait time """ + wait_time = int(seconds) + 1 + wait_until = time() + wait_time + + self.logDebug("Set waitUntil to: %f (previous: %f)" % (wait_until, self.self.pyfile.waitUntil), + "Wait: %d seconds" % wait_time) + + self.pyfile.waitUntil = wait_until + if reconnect is not None: - self.wantReconnect = reconnect and not self.account - self.pyfile.waitUntil = time() + int(seconds) + 1 + self.logDebug("Set wantReconnect to: %s (previous: %s)" % (reconnect, self.wantReconnect)) + self.wantReconnect = reconnect def wait(self, seconds=0, reconnect=None): """ waits the time previously set """ - if seconds: - self.setWait(seconds, reconnect) + pyfile = self.pyfile + + self.setWait(seconds, reconnect) self.waiting = True - self.pyfile.setStatus("waiting") - while self.pyfile.waitUntil > time(): - self.thread.m.reconnecting.wait(2) + status = pyfile.status + pyfile.setStatus("waiting") - if self.pyfile.abort: - raise Abort + self.logDebug("WAIT: %d seconds" % wait_time, + "WAITUNTIL: %f" % pyfile.waitUntil, + "RECONNECT: %s" % self.wantReconnect) + + if not account: + self.logDebug("Ignore reconnection due account logged") - if self.thread.m.reconnecting.isSet(): - self.waiting = False - self.wantReconnect = False - raise Reconnect + while pyfile.waitUntil > time(): + self.thread.m.reconnecting.wait(2) + + if pyfile.abort: + self.abort() + + if self.thread.m.reconnecting.isSet(): + self.waiting = False + self.wantReconnect = False + raise Reconnect + else: + while pyfile.waitUntil > time(): + if pyfile.abort: + self.abort() self.waiting = False - self.pyfile.setStatus("starting") + + pyfile.status = status def fail(self, reason): @@ -323,6 +349,11 @@ class Plugin(Base): raise Fail(reason) + def abort(self, reason=""): + """ abort and give reason """ + raise Abort + + def error(self, reason="", type=""): if not reason and not type: type = "unknown" @@ -392,9 +423,9 @@ class Plugin(Base): img = self.load(url, get=get, post=post, cookies=cookies) id = ("%.2f" % time())[-6:].replace(".", "") - tmpCaptcha = open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") - tmpCaptcha.write(img) - tmpCaptcha.close() + + with open(join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") as tmpCaptcha: + tmpCaptcha.write(img) has_plugin = self.__name__ in self.core.pluginManager.captchaPlugins @@ -406,7 +437,7 @@ class Plugin(Base): if Ocr and not forceUser: sleep(randint(3000, 5000) / 1000.0) if self.pyfile.abort: - raise Abort + self.abort() ocr = Ocr() result = ocr.get_captcha(tmpCaptcha.name) @@ -419,7 +450,7 @@ class Plugin(Base): while task.isWaiting(): if self.pyfile.abort: captchaManager.removeTask(task) - raise Abort + self.abort() sleep(1) captchaManager.removeTask(task) @@ -443,7 +474,7 @@ class Plugin(Base): return result - def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False): + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=False, follow_location=True, save_cookies=True): """Load content at url and returns it :param url: @@ -451,22 +482,27 @@ class Plugin(Base): :param post: :param ref: :param cookies: - :param just_header: if True only the header will be retrieved and returned as dict + :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases + :param follow_location: If True follow location else not + :param save_cookies: If True saves received cookies else discard them :return: Loaded content """ if self.pyfile.abort: - raise Abort + self.abort() - url = url.strip() + if not url: + self.fail(_"No url given")) if type(url) == unicode: # utf8 vs decode -> please use decode attribute in all future plugins url = str(url) #: encode('utf8') + url = url.strip() + if self.core.debug: self.logDebug("Load url: " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) - res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode) + res = self.req.load(url, get, post, ref, cookies, just_header, decode=decode, follow_location=follow_location, save_cookies=save_cookies) if self.core.debug: from inspect import currentframe @@ -475,18 +511,13 @@ class Plugin(Base): if not exists(join("tmp", self.__name__)): makedirs(join("tmp", self.__name__)) - f = open( - join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) - , "wb") - del frame # delete the frame or it wont be cleaned - + framefile = save_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: - tmp = res.encode("utf8") - except: - tmp = res - - f.write(tmp) - f.close() + with open(framefile, "wb") as f: + del frame #: delete the frame or it wont be cleaned + f.write(fs_encode(res)) + except IOError, e: + self.logError(str(e)) if just_header: #parse header @@ -524,7 +555,13 @@ class Plugin(Base): :return: The location where the file was saved """ if self.pyfile.abort: - raise Abort + self.abort() + + if not url: + self.fail(_"No url given")) + + if type(url) == unicode: + url = str(url) url = url.strip() @@ -563,6 +600,7 @@ class Plugin(Base): newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, cookies=cookies, chunks=self.getChunkCount(), resume=self.resumeDownload, progressNotify=self.pyfile.setProgress, disposition=disposition) + newname = urlparse(html_unescape(newname)).path.split("/")[-1] finally: self.pyfile.size = self.req.size @@ -600,7 +638,8 @@ class Plugin(Base): :return: dictionary key of the first rule that matched """ lastDownload = fs_encode(self.lastDownload) - if not exists(lastDownload): return None + if not exists(lastDownload): + return None size = stat(lastDownload) size = size.st_size @@ -608,9 +647,10 @@ class Plugin(Base): if api_size and api_size <= size: return None elif size > max_size and not read_size: return None self.logDebug("Download Check triggered") - f = open(lastDownload, "rb") - content = f.read(read_size if read_size else -1) - f.close() + + with open(lastDownload, "rb") as f: + content = f.read(read_size if read_size else -1) + #produces encoding errors, better log to other file in the future? #self.logDebug("Content: %s" % content) for name, rule in rules.iteritems(): @@ -672,10 +712,13 @@ class Plugin(Base): """ clean everything and remove references """ if hasattr(self, "pyfile"): del self.pyfile + if hasattr(self, "req"): self.req.close() del self.req + if hasattr(self, "thread"): del self.thread - # if hasattr(self, "html"): - # del self.html + + if hasattr(self, "html"): + del self.html |