summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal/Captcha.py
diff options
context:
space:
mode:
Diffstat (limited to 'module/plugins/internal/Captcha.py')
-rw-r--r--module/plugins/internal/Captcha.py149
1 files changed, 117 insertions, 32 deletions
diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py
index b4af46493..c08050ee8 100644
--- a/module/plugins/internal/Captcha.py
+++ b/module/plugins/internal/Captcha.py
@@ -1,56 +1,141 @@
# -*- coding: utf-8 -*-
-from module.plugins.Plugin import Base
+from __future__ import with_statement
+import os
+import time
+import traceback
-#@TODO: Extend (new) Plugin class; remove all `html` args
-class Captcha(Base):
+from module.plugins.internal.Plugin import Plugin
+
+
+class Captcha(Plugin):
__name__ = "Captcha"
__type__ = "captcha"
- __version__ = "0.29"
+ __version__ = "0.42"
+ __status__ = "testing"
- __description__ = """Base captcha service plugin"""
+ __description__ = """Base anti-captcha plugin"""
__license__ = "GPLv3"
- __authors__ = [("pyLoad Team", "admin@pyload.org")]
-
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
- key = None #: last key detected
+ def __init__(self, plugin): #@TODO: Pass pyfile instead plugin, so store plugin's html in its associated pyfile as data
+ self._init(plugin.pyload)
- def __init__(self, plugin):
self.plugin = plugin
- super(Captcha, self).__init__(plugin.core)
+ self.task = None #: captchaManager task
+
+ self.init()
+
+
+ def init(self):
+ """
+ Initialize additional data structures
+ """
+ pass
+
+
+ def _log(self, level, plugintype, pluginname, messages):
+ return self.plugin._log(level,
+ plugintype,
+ self.plugin.__name__,
+ (self.__name__,) + messages)
+
+
+ def recognize(self, image):
+ """
+ Extend to build your custom anti-captcha ocr
+ """
+ pass
+
+
+ def decrypt(self, url, get={}, post={}, ref=False, cookies=False, decode=False,
+ input_type='jpg', output_type='textual', ocr=True, timeout=120):
+ img = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode)
+ return self._decrypt(img, input_type, output_type, ocr, timeout)
+
+
+ #@TODO: Definitely choose a better name for this method!
+ def _decrypt(self, raw, input_type='jpg', output_type='textual', ocr=False, timeout=120):
+ """
+ Loads a captcha and decrypts it with ocr, plugin, user input
+
+ :param raw: image raw data
+ :param get: get part for request
+ :param post: post part for request
+ :param cookies: True if cookies should be enabled
+ :param input_type: Type of the Image
+ :param output_type: 'textual' if text is written on the captcha\
+ or 'positional' for captcha where the user have to click\
+ on a specific region on the captcha
+ :param ocr: if True, ocr is not used
+
+ :return: result of decrypting
+ """
+ result = ""
+ time_ref = ("%.2f" % time.time())[-6:].replace(".", "")
+
+ with open(os.path.join("tmp", "captcha_image_%s_%s.%s" % (self.plugin.__name__, time_ref, input_type)), "wb") as tmp_img:
+ tmp_img.write(raw)
+
+ if ocr:
+ if isinstance(ocr, basestring):
+ OCR = self.pyload.pluginManager.loadClass("captcha", ocr) #: Rename `captcha` to `ocr` in 0.4.10
+ result = OCR(self.plugin).recognize(tmp_img.name)
+ else:
+ result = self.recognize(tmp_img.name)
+
+ if not result:
+ captchaManager = self.pyload.captchaManager
+
+ try:
+ self.task = captchaManager.newTask(raw, input_type, tmp_img.name, output_type)
+
+ captchaManager.handleCaptcha(self.task)
+
+ self.task.setWaiting(max(timeout, 50)) #@TODO: Move to `CaptchaManager` in 0.4.10
+ while self.task.isWaiting():
+ if self.plugin.pyfile.abort:
+ self.plugin.abort()
+ time.sleep(1)
+
+ finally:
+ captchaManager.removeTask(self.task)
+
+ if self.task.error:
+ self.fail(self.task.error)
+ elif not self.task.result:
+ self.invalid()
+ self.plugin.retry(reason=_("No captcha result obtained in appropiate time"))
- #@TODO: Recheck in 0.4.10
- def fail(self, reason):
- self.plugin.fail(reason)
- raise AttributeError(reason)
+ result = self.task.result
+ if not self.pyload.debug:
+ try:
+ os.remove(tmp_img.name)
- #@TODO: Recheck in 0.4.10
- def retrieve_key(self, html):
- if self.detect_key(html):
- return self.key
- else:
- self.fail(_("%s key not found") % self.__name__)
+ except OSError, e:
+ self.log_warning(_("Error removing: %s") % tmp_img.name, e)
+ traceback.print_exc()
+ self.log_info(_("Captcha result: ") + result) #@TODO: Remove from here?
- #@TODO: Recheck in 0.4.10
- def retrieve_html(self):
- if hasattr(self.plugin, "html") and self.plugin.html:
- return self.plugin.html
- else:
- self.fail(_("%s html not found") % self.__name__)
+ return result
- def detect_key(self, html=None):
- raise NotImplementedError
+ def invalid(self):
+ if not self.task:
+ return
+ self.log_error(_("Invalid captcha"))
+ self.task.invalid()
- def challenge(self, key=None, html=None):
- raise NotImplementedError
+ def correct(self):
+ if not self.task:
+ return
- def result(self, server, challenge):
- raise NotImplementedError
+ self.log_info(_("Correct captcha"))
+ self.task.correct()