diff options
Diffstat (limited to 'module/plugins/internal/Captcha.py')
-rw-r--r-- | module/plugins/internal/Captcha.py | 149 |
1 files changed, 117 insertions, 32 deletions
diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index b4af46493..c08050ee8 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -1,56 +1,141 @@ # -*- coding: utf-8 -*- -from module.plugins.Plugin import Base +from __future__ import with_statement +import os +import time +import traceback -#@TODO: Extend (new) Plugin class; remove all `html` args -class Captcha(Base): +from module.plugins.internal.Plugin import Plugin + + +class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" - __version__ = "0.29" + __version__ = "0.42" + __status__ = "testing" - __description__ = """Base captcha service plugin""" + __description__ = """Base anti-captcha plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team", "admin@pyload.org")] - + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - key = None #: last key detected + def __init__(self, plugin): #@TODO: Pass pyfile instead plugin, so store plugin's html in its associated pyfile as data + self._init(plugin.pyload) - def __init__(self, plugin): self.plugin = plugin - super(Captcha, self).__init__(plugin.core) + self.task = None #: captchaManager task + + self.init() + + + def init(self): + """ + Initialize additional data structures + """ + pass + + + def _log(self, level, plugintype, pluginname, messages): + return self.plugin._log(level, + plugintype, + self.plugin.__name__, + (self.__name__,) + messages) + + + def recognize(self, image): + """ + Extend to build your custom anti-captcha ocr + """ + pass + + + def decrypt(self, url, get={}, post={}, ref=False, cookies=False, decode=False, + input_type='jpg', output_type='textual', ocr=True, timeout=120): + img = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode) + return self._decrypt(img, input_type, output_type, ocr, timeout) + + + #@TODO: Definitely choose a better name for this method! + def _decrypt(self, raw, input_type='jpg', output_type='textual', ocr=False, timeout=120): + """ + Loads a captcha and decrypts it with ocr, plugin, user input + + :param raw: image raw data + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param input_type: Type of the Image + :param output_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + :param ocr: if True, ocr is not used + + :return: result of decrypting + """ + result = "" + time_ref = ("%.2f" % time.time())[-6:].replace(".", "") + + with open(os.path.join("tmp", "captcha_image_%s_%s.%s" % (self.plugin.__name__, time_ref, input_type)), "wb") as tmp_img: + tmp_img.write(raw) + + if ocr: + if isinstance(ocr, basestring): + OCR = self.pyload.pluginManager.loadClass("captcha", ocr) #: Rename `captcha` to `ocr` in 0.4.10 + result = OCR(self.plugin).recognize(tmp_img.name) + else: + result = self.recognize(tmp_img.name) + + if not result: + captchaManager = self.pyload.captchaManager + + try: + self.task = captchaManager.newTask(raw, input_type, tmp_img.name, output_type) + + captchaManager.handleCaptcha(self.task) + + self.task.setWaiting(max(timeout, 50)) #@TODO: Move to `CaptchaManager` in 0.4.10 + while self.task.isWaiting(): + if self.plugin.pyfile.abort: + self.plugin.abort() + time.sleep(1) + + finally: + captchaManager.removeTask(self.task) + + if self.task.error: + self.fail(self.task.error) + elif not self.task.result: + self.invalid() + self.plugin.retry(reason=_("No captcha result obtained in appropiate time")) - #@TODO: Recheck in 0.4.10 - def fail(self, reason): - self.plugin.fail(reason) - raise AttributeError(reason) + result = self.task.result + if not self.pyload.debug: + try: + os.remove(tmp_img.name) - #@TODO: Recheck in 0.4.10 - def retrieve_key(self, html): - if self.detect_key(html): - return self.key - else: - self.fail(_("%s key not found") % self.__name__) + except OSError, e: + self.log_warning(_("Error removing: %s") % tmp_img.name, e) + traceback.print_exc() + self.log_info(_("Captcha result: ") + result) #@TODO: Remove from here? - #@TODO: Recheck in 0.4.10 - def retrieve_html(self): - if hasattr(self.plugin, "html") and self.plugin.html: - return self.plugin.html - else: - self.fail(_("%s html not found") % self.__name__) + return result - def detect_key(self, html=None): - raise NotImplementedError + def invalid(self): + if not self.task: + return + self.log_error(_("Invalid captcha")) + self.task.invalid() - def challenge(self, key=None, html=None): - raise NotImplementedError + def correct(self): + if not self.task: + return - def result(self, server, challenge): - raise NotImplementedError + self.log_info(_("Correct captcha")) + self.task.correct() |