diff options
author | 2015-07-23 23:44:45 +0200 | |
---|---|---|
committer | 2015-07-23 23:44:45 +0200 | |
commit | 6af9b38a8d5d49355b85aef6ddd003605d6bba05 (patch) | |
tree | cbfb5b2212cab406ba75b3acd553879311e9153f /module/plugins/captcha | |
parent | Code cosmetics (diff) | |
download | pyload-6af9b38a8d5d49355b85aef6ddd003605d6bba05.tar.xz |
Improve Captcha
Diffstat (limited to 'module/plugins/captcha')
-rw-r--r-- | module/plugins/captcha/AdYouLike.py | 92 | ||||
-rw-r--r-- | module/plugins/captcha/AdsCaptcha.py | 64 | ||||
-rw-r--r-- | module/plugins/captcha/GigasizeCom.py | 2 | ||||
-rw-r--r-- | module/plugins/captcha/LinksaveIn.py | 2 | ||||
-rw-r--r-- | module/plugins/captcha/NetloadIn.py | 2 | ||||
-rw-r--r-- | module/plugins/captcha/ReCaptcha.py | 197 | ||||
-rw-r--r-- | module/plugins/captcha/ShareonlineBiz.py | 2 | ||||
-rw-r--r-- | module/plugins/captcha/SolveMedia.py | 105 |
8 files changed, 462 insertions, 4 deletions
diff --git a/module/plugins/captcha/AdYouLike.py b/module/plugins/captcha/AdYouLike.py new file mode 100644 index 000000000..d14babb51 --- /dev/null +++ b/module/plugins/captcha/AdYouLike.py @@ -0,0 +1,92 @@ +# -*- coding: utf-8 -*- + +import re + +from module.common.json_layer import json_loads +from module.plugins.internal.CaptchaService import CaptchaService + + +class AdYouLike(CaptchaService): + __name__ = "AdYouLike" + __type__ = "captcha" + __version__ = "0.07" + __status__ = "stable" + + __description__ = """AdYouLike captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + + AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)' + CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)' + + + def detect_key(self, data=None): + html = data or self.retrieve_data() + + m = re.search(self.AYL_PATTERN, html) + n = re.search(self.CALLBACK_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) + self.log_debug("Ayl: %s | Callback: %s" % self.key) + return self.key #: Key is the tuple(ayl, callback) + else: + self.log_warning(_("Ayl or callback pattern not found")) + return None + + + def challenge(self, key=None, data=None): + ayl, callback = key or self.retrieve_key(data) + + #: {'adyoulike':{'key':"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"}, + #: 'all':{'element_id':"ayl_private_cap_92300",'lang':"fr",'env':"prod"}} + ayl = json_loads(ayl) + + html = self.plugin.load("http://api-ayl.appspot.com/challenge", + get={'key' : ayl['adyoulike']['key'], + 'env' : ayl['all']['env'], + 'callback': callback}) + try: + challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1)) + + except AttributeError: + self.fail(_("AdYouLike challenge pattern not found")) + + self.log_debug("Challenge: %s" % challenge) + + return self.result(ayl, challenge), challenge + + + def result(self, server, challenge): + #: Adyoulike.g._jsonp_5579316662423138 + #: ({'translations':{'fr':{'instructions_visual':"Recopiez « Soonnight » ci-dessous :"}}, + #: 'site_under':true,'clickable':true,'pixels':{'VIDEO_050':[],'DISPLAY':[],'VIDEO_000':[],'VIDEO_100':[], + #: 'VIDEO_025':[],'VIDEO_075':[]},'medium_type':"image/adyoulike", + #: 'iframes':{'big':"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\" + #: height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},'shares':{},'id':256, + #: 'token':"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1",'formats':{'small':{'y':300,'x':0,'w':300,'h':60}, + #: 'big':{'y':0,'x':0,'w':300,'h':250},'hover':{'y':440,'x':0,'w':300,'h':60}}, + #: 'tid':"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"}) + + if isinstance(server, basestring): + server = json_loads(server) + + if isinstance(challenge, basestring): + challenge = json_loads(challenge) + + try: + instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual'] + result = re.search(u'«(.+?)»', instructions_visual).group(1).strip() + + except AttributeError: + self.fail(_("AdYouLike result not found")) + + result = {'_ayl_captcha_engine' : "adyoulike", + '_ayl_env' : server['all']['env'], + '_ayl_tid' : challenge['tid'], + '_ayl_token_challenge': challenge['token'], + '_ayl_response' : response} + + self.log_debug("Result: %s" % result) + + return result diff --git a/module/plugins/captcha/AdsCaptcha.py b/module/plugins/captcha/AdsCaptcha.py new file mode 100644 index 000000000..da0c531be --- /dev/null +++ b/module/plugins/captcha/AdsCaptcha.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- + +import random +import re + +from module.plugins.internal.CaptchaService import CaptchaService + + +class AdsCaptcha(CaptchaService): + __name__ = "AdsCaptcha" + __type__ = "captcha" + __version__ = "0.10" + __status__ = "stable" + + __description__ = """AdsCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?CaptchaId=(\d+)' + PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)' + + + def detect_key(self, data=None): + html = data or self.retrieve_data() + + m = re.search(self.PUBLICKEY_PATTERN, html) + n = re.search(self.CAPTCHAID_PATTERN, html) + if m and n: + self.key = (m.group(1).strip(), n.group(1).strip()) #: Key is the tuple(PublicKey, CaptchaId) + self.log_debug("Key: %s | ID: %s" % self.key) + return self.key + else: + self.log_warning(_("Key or id pattern not found")) + return None + + + def challenge(self, key=None, data=None): + PublicKey, CaptchaId = key or self.retrieve_key(data) + + html = self.plugin.load("http://api.adscaptcha.com/Get.aspx", + get={'CaptchaId': CaptchaId, + 'PublicKey': PublicKey}) + try: + challenge = re.search("challenge: '(.+?)',", html).group(1) + server = re.search("server: '(.+?)',", html).group(1) + + except AttributeError: + self.fail(_("AdsCaptcha challenge pattern not found")) + + self.log_debug("Challenge: %s" % challenge) + + return self.result(server, challenge), challenge + + + def result(self, server, challenge): + result = self.decrypt("%sChallenge.aspx" % server, + get={'cid': challenge, 'dummy': random.random()}, + cookies=True, + input_type="jpg") + + self.log_debug("Result: %s" % result) + + return result diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py index 19ad9d680..f71266b23 100644 --- a/module/plugins/captcha/GigasizeCom.py +++ b/module/plugins/captcha/GigasizeCom.py @@ -14,7 +14,7 @@ class GigasizeCom(OCR): __authors__ = [("pyLoad Team", "admin@pyload.org")] - def get_captcha(self, image): + def recognize(self, image): self.load_image(image) self.threshold(2.8) self.run_tesser(True, False, False, True) diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index 68704f21d..0a4731375 100644 --- a/module/plugins/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py @@ -137,7 +137,7 @@ class LinksaveIn(OCR): self.pixels = self.image.load() - def get_captcha(self, image): + def recognize(self, image): self.load_image(image) bg = self.get_bg() self.substract_bg(bg) diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index b6ba2b6e9..56b7c9196 100644 --- a/module/plugins/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py @@ -14,7 +14,7 @@ class NetloadIn(OCR): __authors__ = [("pyLoad Team", "admin@pyload.org")] - def get_captcha(self, image): + def recognize(self, image): self.load_image(image) self.to_greyscale() self.clean(3) diff --git a/module/plugins/captcha/ReCaptcha.py b/module/plugins/captcha/ReCaptcha.py new file mode 100644 index 000000000..8f9755961 --- /dev/null +++ b/module/plugins/captcha/ReCaptcha.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +import random +import re +import time +import urlparse + +from base64 import b64encode + +from module.plugins.internal.CaptchaService import CaptchaService + + +class ReCaptcha(CaptchaService): + __name__ = "ReCaptcha" + __type__ = "captcha" + __version__ = "0.18" + __status__ = "stable" + + __description__ = """ReCaptcha captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org"), + ("Walter Purcaro", "vuolter@gmail.com"), + ("zapp-brannigan", "fuerst.reinje@web.de")] + + + KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)' + KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)' + + + def detect_key(self, data=None): + html = data or self.retrieve_data() + + m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.log_debug("Key: %s" % self.key) + return self.key + else: + self.log_warning(_("Key pattern not found")) + return None + + + def challenge(self, key=None, data=None, version=None): + key = key or self.retrieve_key(data) + + if version in (1, 2): + return getattr(self, "_challenge_v%s" % version)(key) + + else: + return self.challenge(key, + version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_data()) else 1) + + + def _challenge_v1(self, key): + html = self.plugin.load("http://www.google.com/recaptcha/api/challenge", + get={'k': key}) + try: + challenge = re.search("challenge : '(.+?)',", html).group(1) + server = re.search("server : '(.+?)',", html).group(1) + + except AttributeError: + self.fail(_("ReCaptcha challenge pattern not found")) + + self.log_debug("Challenge: %s" % challenge) + + return self.result(server, challenge, key) + + + def result(self, server, challenge, key): + self.plugin.load("http://www.google.com/recaptcha/api/js/recaptcha.js") + html = self.plugin.load("http://www.google.com/recaptcha/api/reload", + get={'c' : challenge, + 'k' : key, + 'reason': "i", + 'type' : "image"}) + + try: + challenge = re.search('\(\'(.+?)\',',html).group(1) + + except AttributeError: + self.fail(_("ReCaptcha second challenge pattern not found")) + + self.log_debug("Second challenge: %s" % challenge) + result = self.decrypt("%simage" % server, + get={'c': challenge}, + cookies=True, + input_type="jpg", + ocr=False) + + self.log_debug("Result: %s" % result) + + return result, challenge + + + def _collect_api_info(self): + html = self.plugin.load("http://www.google.com/recaptcha/api.js") + a = re.search(r'po.src = \'(.*?)\';', html).group(1) + vers = a.split("/")[5] + + self.log_debug("API version: %s" % vers) + + language = a.split("__")[1].split(".")[0] + + self.log_debug("API language: %s" % language) + + html = self.plugin.load("https://apis.google.com/js/api.js") + b = re.search(r'"h":"(.*?)","', html).group(1) + jsh = b.decode('unicode-escape') + + self.log_debug("API jsh-string: %s" % jsh) + + return vers, language, jsh + + + def _prepare_time_and_rpc(self): + self.plugin.load("http://www.google.com/recaptcha/api2/demo") + + millis = int(round(time.time() * 1000)) + + self.log_debug("Time: %s" % millis) + + rand = random.randint(1, 99999999) + a = "0.%s" % str(rand * 2147483647) + rpc = int(100000000 * float(a)) + + self.log_debug("Rpc-token: %s" % rpc) + + return millis, rpc + + + def _challenge_v2(self, key, parent=None): + if parent is None: + try: + parent = urlparse.urljoin("http://", urlparse.urlparse(self.plugin.pyfile.url).netloc) + + except Exception: + parent = "" + + botguardstring = "!A" + vers, language, jsh = self._collect_api_info() + millis, rpc = self._prepare_time_and_rpc() + + html = self.plugin.load("https://www.google.com/recaptcha/api2/anchor", + get={'k' : key, + 'hl' : language, + 'v' : vers, + 'usegapi' : "1", + 'jsh' : "%s#id=IO_%s" % (jsh, millis), + 'parent' : parent, + 'pfname' : "", + 'rpctoken': rpc}) + + token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html) + self.log_debug("Token #1: %s" % token1.group(1)) + + html = self.plugin.load("https://www.google.com/recaptcha/api2/frame", + get={'c' : token1.group(1), + 'hl' : language, + 'v' : vers, + 'bg' : botguardstring, + 'k' : key, + 'usegapi': "1", + 'jsh' : jsh}, + decode="unicode-escape") + + token2 = re.search(r'"finput","(.*?)",', html) + self.log_debug("Token #2: %s" % token2.group(1)) + + token3 = re.search(r'"rresp","(.*?)",', html) + self.log_debug("Token #3: %s" % token3.group(1)) + + millis_captcha_loading = int(round(time.time() * 1000)) + captcha_response = self.decrypt("https://www.google.com/recaptcha/api2/payload", + get={'c':token3.group(1), 'k':key}, + cookies=True, + ocr=False) + response = b64encode('{"response":"%s"}' % captcha_response) + + self.log_debug("Result: %s" % response) + + timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading + timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500) + + html = self.plugin.load("https://www.google.com/recaptcha/api2/userverify", + post={'k' : key, + 'c' : token3.group(1), + 'response': response, + 't' : timeToSolve, + 'ct' : timeToSolveMore, + 'bg' : botguardstring}) + + token4 = re.search(r'"uvresp","(.*?)",', html) + self.log_debug("Token #4: %s" % token4.group(1)) + + result = token4.group(1) + + return result, None diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index 7f25f164d..98994b121 100644 --- a/module/plugins/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py @@ -14,7 +14,7 @@ class ShareonlineBiz(OCR): __authors__ = [("RaNaN", "RaNaN@pyload.org")] - def get_captcha(self, image): + def recognize(self, image): self.load_image(image) self.to_greyscale() self.image = self.image.resize((160, 50)) diff --git a/module/plugins/captcha/SolveMedia.py b/module/plugins/captcha/SolveMedia.py new file mode 100644 index 000000000..cbac2dec2 --- /dev/null +++ b/module/plugins/captcha/SolveMedia.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- + +import re + +from module.plugins.internal.Plugin import Fail +from module.plugins.internal.CaptchaService import CaptchaService + + +class SolveMedia(CaptchaService): + __name__ = "SolveMedia" + __type__ = "captcha" + __version__ = "0.15" + __status__ = "stable" + + __description__ = """SolveMedia captcha service plugin""" + __license__ = "GPLv3" + __authors__ = [("pyLoad Team", "admin@pyload.org")] + + + KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']' + + + def detect_key(self, data=None): + html = data or self.retrieve_data() + + m = re.search(self.KEY_PATTERN, html) + if m: + self.key = m.group(1).strip() + self.log_debug("Key: %s" % self.key) + return self.key + else: + self.log_warning(_("Key pattern not found") + return None + + + def challenge(self, key=None, data=None): + key = key or self.retrieve_key(data) + + html = self.plugin.load("http://api.solvemedia.com/papi/challenge.noscript", + get={'k': key}) + + for i in xrange(1, 11): + try: + magic = re.search(r'name="magic" value="(.+?)"', html).group(1) + + except AttributeError: + self.log_warning(_("Magic pattern not found") + magic = None + + try: + challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="(.+?)">', + html).group(1) + + except AttributeError: + self.fail(_("SolveMedia challenge pattern not found")) + + else: + self.log_debug("Challenge: %s" % challenge) + + try: + result = self.result("http://api.solvemedia.com/papi/media", challenge) + + except Fail, e: + self.log_warning(e) + self.plugin.invalidCaptcha() + result = None + + html = self.plugin.load("http://api.solvemedia.com/papi/verify.noscript", + post={'adcopy_response' : result, + 'k' : key, + 'l' : "en", + 't' : "img", + 's' : "standard", + 'magic' : magic, + 'adcopy_challenge': challenge, + 'ref' : self.plugin.pyfile.url}) + try: + redirect = re.search(r'URL=(.+?)">', html).group(1) + + except AttributeError: + self.fail(_("SolveMedia verify pattern not found")) + + else: + if "error" in html: + self.log_warning(_("Captcha code was invalid")) + self.log_debug("Retry #%d" % i) + html = self.plugin.load(redirect) + else: + break + + else: + self.fail(_("SolveMedia max retries exceeded")) + + return result, challenge + + + def result(self, server, challenge): + result = self.decrypt(server, + get={'c': challenge}, + cookies=True, + input_type="gif") + + self.log_debug("Result: %s" % result) + + return result |