diff options
Diffstat (limited to 'module/plugins/internal')
-rw-r--r-- | module/plugins/internal/AdYouLike.py | 12 | ||||
-rw-r--r-- | module/plugins/internal/AdsCaptcha.py | 20 | ||||
-rw-r--r-- | module/plugins/internal/Captcha.py | 102 | ||||
-rw-r--r-- | module/plugins/internal/CaptchaService.py | 46 | ||||
-rw-r--r-- | module/plugins/internal/Hoster.py | 92 | ||||
-rw-r--r-- | module/plugins/internal/ReCaptcha.py | 32 | ||||
-rw-r--r-- | module/plugins/internal/SimpleHoster.py | 6 | ||||
-rw-r--r-- | module/plugins/internal/SolveMedia.py | 20 | ||||
-rw-r--r-- | module/plugins/internal/XFSHoster.py | 2 |
9 files changed, 179 insertions, 153 deletions
diff --git a/module/plugins/internal/AdYouLike.py b/module/plugins/internal/AdYouLike.py index fcb9b8372..d14babb51 100644 --- a/module/plugins/internal/AdYouLike.py +++ b/module/plugins/internal/AdYouLike.py @@ -3,10 +3,10 @@ import re from module.common.json_layer import json_loads -from module.plugins.internal.Captcha import Captcha +from module.plugins.internal.CaptchaService import CaptchaService -class AdYouLike(Captcha): +class AdYouLike(CaptchaService): __name__ = "AdYouLike" __type__ = "captcha" __version__ = "0.07" @@ -21,8 +21,8 @@ class AdYouLike(Captcha): CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' - def detect_key(self, html=None): - html = html or self.retrieve_html() + def detect_key(self, data=None): + html = data or self.retrieve_data() m = re.search(self.AYL_PATTERN, html) n = re.search(self.CALLBACK_PATTERN, html) @@ -35,8 +35,8 @@ class AdYouLike(Captcha): return None - def challenge(self, key=None, html=None): - ayl, callback = key or self.retrieve_key(html) + def challenge(self, key=None, data=None): + ayl, callback = key or self.retrieve_key(data) #: {'adyoulike':{'key':"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, #: 'all':{'element_id':"ayl_private_cap_92300",'lang':"fr",'env':"prod"}} diff --git a/module/plugins/internal/AdsCaptcha.py b/module/plugins/internal/AdsCaptcha.py index b99697a84..f487042e2 100644 --- a/module/plugins/internal/AdsCaptcha.py +++ b/module/plugins/internal/AdsCaptcha.py @@ -3,10 +3,10 @@ import random import re -from module.plugins.internal.Captcha import Captcha +from module.plugins.internal.CaptchaService import CaptchaService -class AdsCaptcha(Captcha): +class AdsCaptcha(CaptchaService): __name__ = "AdsCaptcha" __type__ = "captcha" __version__ = "0.10" @@ -21,8 +21,8 @@ class AdsCaptcha(Captcha): PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)' - def detect_key(self, html=None): - html = html or self.retrieve_html() + def detect_key(self, data=None): + html = data or self.retrieve_data() m = re.search(self.PUBLICKEY_PATTERN, html) n = re.search(self.CAPTCHAID_PATTERN, html) @@ -35,8 +35,8 @@ class AdsCaptcha(Captcha): return None - def challenge(self, key=None, html=None): - PublicKey, CaptchaId = key or self.retrieve_key(html) + def challenge(self, key=None, data=None): + PublicKey, CaptchaId = key or self.retrieve_key(data) html = self.plugin.load("http://api.adscaptcha.com/Get.aspx", get={'CaptchaId': CaptchaId, @@ -54,10 +54,10 @@ class AdsCaptcha(Captcha): def result(self, server, challenge): - result = self.plugin.decryptCaptcha("%sChallenge.aspx" % server, - get={'cid': challenge, 'dummy': random.random()}, - cookies=True, - imgtype="jpg") + result = self.decrypt_image("%sChallenge.aspx" % server, + get={'cid': challenge, 'dummy': random.random()}, + cookies=True, + input_type="jpg") self.log_debug("Result: %s" % result) diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index 814c36756..af7f66ed5 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -6,24 +6,28 @@ from module.plugins.internal.Plugin import Plugin class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" - __version__ = "0.31" + __version__ = "0.01" __status__ = "stable" - __description__ = """Base captcha service plugin""" + __description__ = """Base anti-captcha plugin""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - def __init__(self, plugin): + def __init__(self, plugin): #@TODO: pass pyfile instead plugin, so store plugin's html in its associated pyfile as data self.pyload = plugin.core self.info = {} #: Provide information in dict here self.plugin = plugin - self.key = None #: Last key detected + self.task = None #: captchaManager task self.init() + def _log(self, type, args): + return super(Captcha, self)._log(type, (self.plugin.__name__,) + args) + + def init(self): """ Initialize additional data structures @@ -31,29 +35,87 @@ class Captcha(Plugin): pass - #@TODO: Recheck in 0.4.10 - def retrieve_key(self, html): - if self.detect_key(html): - return self.key + def decrypt_image(self, url, get={}, post={}, ref=False, cookies=False, decode=False, + input_type='png', output_type='textual', try_ocr=True): + image = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode) + return self.decrypt(image, input_type, output_type, try_ocr) + + + def decrypt(self, data, input_type='png', output_type='textual', try_ocr=True): + """ + Loads a captcha and decrypts it with ocr, plugin, user input + + :param url: url of captcha image + :param get: get part for request + :param post: post part for request + :param cookies: True if cookies should be enabled + :param input_type: Type of the Image + :param output_type: 'textual' if text is written on the captcha\ + or 'positional' for captcha where the user have to click\ + on a specific region on the captcha + :param try_ocr: if True, ocr is not used + + :return: result of decrypting + """ + id = ("%.2f" % time.time())[-6:].replace(".", "") + + with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.plugin.__name__, id, input_type)), "wb") as tmpCaptcha: + tmpCaptcha.write(img) + + has_plugin = self.plugin.__name__ in self.pyload.pluginManager.ocrPlugins + + if self.pyload.captcha: + Ocr = self.pyload.pluginManager.loadClass("ocr", self.plugin.__name__) else: - self.fail(_("%s key not found") % self.__name__) + Ocr = None + if Ocr and try_ocr: + time.sleep(random.randint(3000, 5000) / 1000.0) + if self.pyfile.abort: + self.abort() - #@TODO: Recheck in 0.4.10 - def retrieve_html(self): - if hasattr(self.plugin, "html") and self.plugin.html: - return self.plugin.html + ocr = Ocr(self.pyfile) + result = ocr.get_captcha(tmpCaptcha.name) else: - self.fail(_("%s html not found") % self.__name__) + captchaManager = self.pyload.captchaManager + task = captchaManager.newTask(img, input_type, tmpCaptcha.name, output_type) + self.task = task + captchaManager.handleCaptcha(task) + + while task.isWaiting(): + if self.pyfile.abort: + captchaManager.removeTask(task) + self.abort() + time.sleep(1) + + captchaManager.removeTask(task) + + if task.error and has_plugin: #: Ignore default error message since the user could use try_ocr + self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) + elif task.error: + self.fail(task.error) + elif not task.result: + self.fail(_("No captcha result obtained in appropiate time by any of the plugins")) + + result = task.result + self.log_debug("Received captcha result: %s" % result) + if not self.pyload.debug: + try: + os.remove(tmpCaptcha.name) + except Exception: + pass - def detect_key(self, html=None): - raise NotImplementedError + return result - def challenge(self, key=None, html=None): - raise NotImplementedError + def invalid(self): + self.log_error(_("Invalid captcha")) + if self.task: + self.task.invalid() - def result(self, server, challenge): - raise NotImplementedError + def correct(self): + self.log_info(_("Correct captcha")) + if self.task: + self.task.correct() diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py new file mode 100644 index 000000000..05af8ccec --- /dev/null +++ b/module/plugins/internal/CaptchaService.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- + +from module.plugins.internal.Plugin import Plugin + + +class Captcha(Plugin): + __name__ = "Captcha" + __type__ = "captcha" + __version__ = "0.31" + __status__ = "stable" + + __description__ = """Base anti-captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + def init(self): + self.key = None #: Last key detected + + + #@TODO: Recheck in 0.4.10 + def retrieve_key(self, data): + if self.detect_key(data): + return self.key + else: + self.fail(_("%s key not found") % self.__name__) + + + #@TODO: Recheck in 0.4.10, html is now pyfile.data + def retrieve_data(self): + if hasattr(self.plugin, "html") and self.plugin.html: + return self.plugin.html + else: + self.fail(_("%s data not found") % self.__name__) + + + def detect_key(self, data=None): + raise NotImplementedError + + + def challenge(self, key=None, data=None): + raise NotImplementedError + + + def result(self, server, challenge): + raise NotImplementedError diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index 35e6ef23e..af3e80acf 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -12,6 +12,7 @@ if os.name != "nt": import grp import pwd +from module.plugins.internal import Captcha from module.plugins.internal.Plugin import (Plugin, Abort, Fail, Reconnect, Retry, Skip chunks, fixurl as _fixurl, replace_patterns, seconds_to_midnight, set_cookies, parse_html_form, parse_html_tag_attr_value, @@ -122,8 +123,8 @@ class Hoster(Plugin): #: Js engine, see `JsEngine` self.js = self.pyload.js - #: Captcha task - self.c_task = None + #: Captcha stuff + self.captcha = Captcha(self) #: Some plugins store html code here self.html = None @@ -257,7 +258,7 @@ class Hoster(Plugin): time.sleep(1) else: while pyfile.waitUntil > time.time(): - self.thread.m.reconnecting.wait(2) + self.thread.m.reconnecting.wait(1) if pyfile.abort: self.abort() @@ -329,89 +330,6 @@ class Hoster(Plugin): raise Retry(reason) - def invalid_captcha(self): - self.log_error(_("Invalid captcha")) - if self.c_task: - self.c_task.invalid() - - - def correct_captcha(self): - self.log_info(_("Correct captcha")) - if self.c_task: - self.c_task.correct() - - - def decrypt_captcha(self, url, get={}, post={}, cookies=False, forceUser=False, - imgtype='jpg', result_type='textual'): - """ - Loads a captcha and decrypts it with ocr, plugin, user input - - :param url: url of captcha image - :param get: get part for request - :param post: post part for request - :param cookies: True if cookies should be enabled - :param forceUser: if True, ocr is not used - :param imgtype: Type of the Image - :param result_type: 'textual' if text is written on the captcha\ - or 'positional' for captcha where the user have to click\ - on a specific region on the captcha - - :return: result of decrypting - """ - img = self.load(url, get=get, post=post, cookies=cookies) - - id = ("%.2f" % time.time())[-6:].replace(".", "") - - with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.__name__, id, imgtype)), "wb") as tmpCaptcha: - tmpCaptcha.write(img) - - has_plugin = self.__name__ in self.pyload.pluginManager.ocrPlugins - - if self.pyload.captcha: - Ocr = self.pyload.pluginManager.loadClass("ocr", self.__name__) - else: - Ocr = None - - if Ocr and not forceUser: - time.sleep(random.randint(3000, 5000) / 1000.0) - if self.pyfile.abort: - self.abort() - - ocr = Ocr(self.pyfile) - result = ocr.get_captcha(tmpCaptcha.name) - else: - captchaManager = self.pyload.captchaManager - task = captchaManager.newTask(img, imgtype, tmpCaptcha.name, result_type) - self.c_task = task - captchaManager.handleCaptcha(task) - - while task.isWaiting(): - if self.pyfile.abort: - captchaManager.removeTask(task) - self.abort() - time.sleep(1) - - captchaManager.removeTask(task) - - if task.error and has_plugin: #: Ignore default error message since the user could use OCR - self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting")) - elif task.error: - self.fail(task.error) - elif not task.result: - self.fail(_("No captcha result obtained in appropiate time by any of the plugins")) - - result = task.result - self.log_debug("Received captcha result: %s" % result) - - if not self.pyload.debug: - try: - os.remove(tmpCaptcha.name) - except Exception: - pass - - return result - - def fixurl(self, url): url = _fixurl(url) @@ -447,7 +365,7 @@ class Hoster(Plugin): if self.pyload.debug: self.log_debug("DOWNLOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) - self.correct_captcha() + self.captcha.correct() self.check_for_same_files() self.pyfile.setStatus("downloading") diff --git a/module/plugins/internal/ReCaptcha.py b/module/plugins/internal/ReCaptcha.py index 4d04c07db..b4f9ef1eb 100644 --- a/module/plugins/internal/ReCaptcha.py +++ b/module/plugins/internal/ReCaptcha.py @@ -7,10 +7,10 @@ import urlparse from base64 import b64encode -from module.plugins.internal.Captcha import Captcha +from module.plugins.internal.CaptchaService import CaptchaService -class ReCaptcha(Captcha): +class ReCaptcha(CaptchaService): __name__ = "ReCaptcha" __type__ = "captcha" __version__ = "0.18" @@ -27,8 +27,8 @@ class ReCaptcha(Captcha): KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)' - def detect_key(self, html=None): - html = html or self.retrieve_html() + def detect_key(self, data=None): + html = data or self.retrieve_data() m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html) if m: @@ -40,15 +40,15 @@ class ReCaptcha(Captcha): return None - def challenge(self, key=None, html=None, version=None): - key = key or self.retrieve_key(html) + def challenge(self, key=None, data=None, version=None): + key = key or self.retrieve_key(data) if version in (1, 2): return getattr(self, "_challenge_v%s" % version)(key) else: return self.challenge(key, - version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_html()) else 1) + version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_data()) else 1) def _challenge_v1(self, key): @@ -81,11 +81,11 @@ class ReCaptcha(Captcha): self.fail(_("ReCaptcha second challenge pattern not found")) self.log_debug("Second challenge: %s" % challenge) - result = self.plugin.decryptCaptcha("%simage" % server, - get={'c': challenge}, - cookies=True, - forceUser=True, - imgtype="jpg") + result = self.decrypt("%simage" % server, + get={'c': challenge}, + cookies=True, + input_type="jpg", + try_ocr=False) self.log_debug("Result: %s" % result) @@ -170,10 +170,10 @@ class ReCaptcha(Captcha): self.log_debug("Token #3: %s" % token3.group(1)) millis_captcha_loading = int(round(time.time() * 1000)) - captcha_response = self.plugin.decryptCaptcha("https://www.google.com/recaptcha/api2/payload", - get={'c':token3.group(1), 'k':key}, - cookies=True, - forceUser=True) + captcha_response = self.decrypt_image("https://www.google.com/recaptcha/api2/payload", + get={'c':token3.group(1), 'k':key}, + cookies=True, + try_ocr=False) response = b64encode('{"response":"%s"}' % captcha_response) self.log_debug("Result: %s" % response) diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 9d1ebc0bf..5960794cc 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -301,8 +301,8 @@ class SimpleHoster(Hoster): def check_file(self): lastDownload = fs_encode(self.last_download) - if self.c_task and not self.last_download: - self.invalid_captcha() + if self.captcha.task and not self.last_download: + self.captcha.invalid() self.retry(10, reason=_("Wrong captcha")) elif self.check_download({'Empty file': re.compile(r'\A((.|)(\2|\s)*)\Z')}, @@ -396,7 +396,7 @@ class SimpleHoster(Hoster): self.fail(_("Connection from your current IP address is not allowed")) elif re.search('captcha|code', errmsg, re.I): - self.invalid_captcha() + self.captcha.invalid() self.retry(10, reason=_("Wrong captcha")) elif re.search('countdown|expired', errmsg, re.I): diff --git a/module/plugins/internal/SolveMedia.py b/module/plugins/internal/SolveMedia.py index 5d701a5a2..ce4ebb007 100644 --- a/module/plugins/internal/SolveMedia.py +++ b/module/plugins/internal/SolveMedia.py @@ -3,10 +3,10 @@ import re from module.plugins.internal.Plugin import Fail -from module.plugins.internal.Captcha import Captcha +from module.plugins.internal.CaptchaService import CaptchaService -class SolveMedia(Captcha): +class SolveMedia(CaptchaService): __name__ = "SolveMedia" __type__ = "captcha" __version__ = "0.15" @@ -20,8 +20,8 @@ class SolveMedia(Captcha): KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' - def detect_key(self, html=None): - html = html or self.retrieve_html() + def detect_key(self, data=None): + html = data or self.retrieve_data() m = re.search(self.KEY_PATTERN, html) if m: @@ -33,8 +33,8 @@ class SolveMedia(Captcha): return None - def challenge(self, key=None, html=None): - key = key or self.retrieve_key(html) + def challenge(self, key=None, data=None): + key = key or self.retrieve_key(data) html = self.plugin.load("http://api.solvemedia.com/papi/challenge.noscript", get={'k': key}) @@ -95,10 +95,10 @@ class SolveMedia(Captcha): def result(self, server, challenge): - result = self.plugin.decryptCaptcha(server, - get={'c': challenge}, - cookies=True, - imgtype="gif") + result = self.decrypt_image(server, + get={'c': challenge}, + cookies=True, + input_type="gif") self.log_debug("Result: %s" % result) diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index 09799aa28..ec9a18a48 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -221,7 +221,7 @@ class XFSHoster(SimpleHoster): m = re.search(self.CAPTCHA_PATTERN, self.html) if m: captcha_url = m.group(1) - inputs['code'] = self.decrypt_captcha(captcha_url) + inputs['code'] = self.captcha.decrypt_image(captcha_url) return m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S) |