summaryrefslogtreecommitdiffstats
path: root/module/plugins/captcha
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-07-23 23:44:45 +0200
committerGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-07-23 23:44:45 +0200
commit6af9b38a8d5d49355b85aef6ddd003605d6bba05 (patch)
treecbfb5b2212cab406ba75b3acd553879311e9153f /module/plugins/captcha
parentCode cosmetics (diff)
downloadpyload-6af9b38a8d5d49355b85aef6ddd003605d6bba05.tar.xz
Improve Captcha
Diffstat (limited to 'module/plugins/captcha')
-rw-r--r--module/plugins/captcha/AdYouLike.py92
-rw-r--r--module/plugins/captcha/AdsCaptcha.py64
-rw-r--r--module/plugins/captcha/GigasizeCom.py2
-rw-r--r--module/plugins/captcha/LinksaveIn.py2
-rw-r--r--module/plugins/captcha/NetloadIn.py2
-rw-r--r--module/plugins/captcha/ReCaptcha.py197
-rw-r--r--module/plugins/captcha/ShareonlineBiz.py2
-rw-r--r--module/plugins/captcha/SolveMedia.py105
8 files changed, 462 insertions, 4 deletions
diff --git a/module/plugins/captcha/AdYouLike.py b/module/plugins/captcha/AdYouLike.py
new file mode 100644
index 000000000..d14babb51
--- /dev/null
+++ b/module/plugins/captcha/AdYouLike.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from module.common.json_layer import json_loads
+from module.plugins.internal.CaptchaService import CaptchaService
+
+
+class AdYouLike(CaptchaService):
+ __name__ = "AdYouLike"
+ __type__ = "captcha"
+ __version__ = "0.07"
+ __status__ = "stable"
+
+ __description__ = """AdYouLike captcha service plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
+
+
+ AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)'
+ CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)'
+
+
+ def detect_key(self, data=None):
+ html = data or self.retrieve_data()
+
+ m = re.search(self.AYL_PATTERN, html)
+ n = re.search(self.CALLBACK_PATTERN, html)
+ if m and n:
+ self.key = (m.group(1).strip(), n.group(1).strip())
+ self.log_debug("Ayl: %s | Callback: %s" % self.key)
+ return self.key #: Key is the tuple(ayl, callback)
+ else:
+ self.log_warning(_("Ayl or callback pattern not found"))
+ return None
+
+
+ def challenge(self, key=None, data=None):
+ ayl, callback = key or self.retrieve_key(data)
+
+ #: {'adyoulike':{'key':"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"},
+ #: 'all':{'element_id':"ayl_private_cap_92300",'lang':"fr",'env':"prod"}}
+ ayl = json_loads(ayl)
+
+ html = self.plugin.load("http://api-ayl.appspot.com/challenge",
+ get={'key' : ayl['adyoulike']['key'],
+ 'env' : ayl['all']['env'],
+ 'callback': callback})
+ try:
+ challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1))
+
+ except AttributeError:
+ self.fail(_("AdYouLike challenge pattern not found"))
+
+ self.log_debug("Challenge: %s" % challenge)
+
+ return self.result(ayl, challenge), challenge
+
+
+ def result(self, server, challenge):
+ #: Adyoulike.g._jsonp_5579316662423138
+ #: ({'translations':{'fr':{'instructions_visual':"Recopiez « Soonnight » ci-dessous :"}},
+ #: 'site_under':true,'clickable':true,'pixels':{'VIDEO_050':[],'DISPLAY':[],'VIDEO_000':[],'VIDEO_100':[],
+ #: 'VIDEO_025':[],'VIDEO_075':[]},'medium_type':"image/adyoulike",
+ #: 'iframes':{'big':"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\"
+ #: height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},'shares':{},'id':256,
+ #: 'token':"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1",'formats':{'small':{'y':300,'x':0,'w':300,'h':60},
+ #: 'big':{'y':0,'x':0,'w':300,'h':250},'hover':{'y':440,'x':0,'w':300,'h':60}},
+ #: 'tid':"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"})
+
+ if isinstance(server, basestring):
+ server = json_loads(server)
+
+ if isinstance(challenge, basestring):
+ challenge = json_loads(challenge)
+
+ try:
+ instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual']
+ result = re.search(u'«(.+?)»', instructions_visual).group(1).strip()
+
+ except AttributeError:
+ self.fail(_("AdYouLike result not found"))
+
+ result = {'_ayl_captcha_engine' : "adyoulike",
+ '_ayl_env' : server['all']['env'],
+ '_ayl_tid' : challenge['tid'],
+ '_ayl_token_challenge': challenge['token'],
+ '_ayl_response' : response}
+
+ self.log_debug("Result: %s" % result)
+
+ return result
diff --git a/module/plugins/captcha/AdsCaptcha.py b/module/plugins/captcha/AdsCaptcha.py
new file mode 100644
index 000000000..da0c531be
--- /dev/null
+++ b/module/plugins/captcha/AdsCaptcha.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+
+import random
+import re
+
+from module.plugins.internal.CaptchaService import CaptchaService
+
+
+class AdsCaptcha(CaptchaService):
+ __name__ = "AdsCaptcha"
+ __type__ = "captcha"
+ __version__ = "0.10"
+ __status__ = "stable"
+
+ __description__ = """AdsCaptcha captcha service plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
+
+
+ CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?CaptchaId=(\d+)'
+ PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)'
+
+
+ def detect_key(self, data=None):
+ html = data or self.retrieve_data()
+
+ m = re.search(self.PUBLICKEY_PATTERN, html)
+ n = re.search(self.CAPTCHAID_PATTERN, html)
+ if m and n:
+ self.key = (m.group(1).strip(), n.group(1).strip()) #: Key is the tuple(PublicKey, CaptchaId)
+ self.log_debug("Key: %s | ID: %s" % self.key)
+ return self.key
+ else:
+ self.log_warning(_("Key or id pattern not found"))
+ return None
+
+
+ def challenge(self, key=None, data=None):
+ PublicKey, CaptchaId = key or self.retrieve_key(data)
+
+ html = self.plugin.load("http://api.adscaptcha.com/Get.aspx",
+ get={'CaptchaId': CaptchaId,
+ 'PublicKey': PublicKey})
+ try:
+ challenge = re.search("challenge: '(.+?)',", html).group(1)
+ server = re.search("server: '(.+?)',", html).group(1)
+
+ except AttributeError:
+ self.fail(_("AdsCaptcha challenge pattern not found"))
+
+ self.log_debug("Challenge: %s" % challenge)
+
+ return self.result(server, challenge), challenge
+
+
+ def result(self, server, challenge):
+ result = self.decrypt("%sChallenge.aspx" % server,
+ get={'cid': challenge, 'dummy': random.random()},
+ cookies=True,
+ input_type="jpg")
+
+ self.log_debug("Result: %s" % result)
+
+ return result
diff --git a/module/plugins/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py
index 19ad9d680..f71266b23 100644
--- a/module/plugins/captcha/GigasizeCom.py
+++ b/module/plugins/captcha/GigasizeCom.py
@@ -14,7 +14,7 @@ class GigasizeCom(OCR):
__authors__ = [("pyLoad Team", "admin@pyload.org")]
- def get_captcha(self, image):
+ def recognize(self, image):
self.load_image(image)
self.threshold(2.8)
self.run_tesser(True, False, False, True)
diff --git a/module/plugins/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py
index 68704f21d..0a4731375 100644
--- a/module/plugins/captcha/LinksaveIn.py
+++ b/module/plugins/captcha/LinksaveIn.py
@@ -137,7 +137,7 @@ class LinksaveIn(OCR):
self.pixels = self.image.load()
- def get_captcha(self, image):
+ def recognize(self, image):
self.load_image(image)
bg = self.get_bg()
self.substract_bg(bg)
diff --git a/module/plugins/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py
index b6ba2b6e9..56b7c9196 100644
--- a/module/plugins/captcha/NetloadIn.py
+++ b/module/plugins/captcha/NetloadIn.py
@@ -14,7 +14,7 @@ class NetloadIn(OCR):
__authors__ = [("pyLoad Team", "admin@pyload.org")]
- def get_captcha(self, image):
+ def recognize(self, image):
self.load_image(image)
self.to_greyscale()
self.clean(3)
diff --git a/module/plugins/captcha/ReCaptcha.py b/module/plugins/captcha/ReCaptcha.py
new file mode 100644
index 000000000..8f9755961
--- /dev/null
+++ b/module/plugins/captcha/ReCaptcha.py
@@ -0,0 +1,197 @@
+# -*- coding: utf-8 -*-
+
+import random
+import re
+import time
+import urlparse
+
+from base64 import b64encode
+
+from module.plugins.internal.CaptchaService import CaptchaService
+
+
+class ReCaptcha(CaptchaService):
+ __name__ = "ReCaptcha"
+ __type__ = "captcha"
+ __version__ = "0.18"
+ __status__ = "stable"
+
+ __description__ = """ReCaptcha captcha service plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org"),
+ ("Walter Purcaro", "vuolter@gmail.com"),
+ ("zapp-brannigan", "fuerst.reinje@web.de")]
+
+
+ KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)'
+ KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)'
+
+
+ def detect_key(self, data=None):
+ html = data or self.retrieve_data()
+
+ m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html)
+ if m:
+ self.key = m.group(1).strip()
+ self.log_debug("Key: %s" % self.key)
+ return self.key
+ else:
+ self.log_warning(_("Key pattern not found"))
+ return None
+
+
+ def challenge(self, key=None, data=None, version=None):
+ key = key or self.retrieve_key(data)
+
+ if version in (1, 2):
+ return getattr(self, "_challenge_v%s" % version)(key)
+
+ else:
+ return self.challenge(key,
+ version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_data()) else 1)
+
+
+ def _challenge_v1(self, key):
+ html = self.plugin.load("http://www.google.com/recaptcha/api/challenge",
+ get={'k': key})
+ try:
+ challenge = re.search("challenge : '(.+?)',", html).group(1)
+ server = re.search("server : '(.+?)',", html).group(1)
+
+ except AttributeError:
+ self.fail(_("ReCaptcha challenge pattern not found"))
+
+ self.log_debug("Challenge: %s" % challenge)
+
+ return self.result(server, challenge, key)
+
+
+ def result(self, server, challenge, key):
+ self.plugin.load("http://www.google.com/recaptcha/api/js/recaptcha.js")
+ html = self.plugin.load("http://www.google.com/recaptcha/api/reload",
+ get={'c' : challenge,
+ 'k' : key,
+ 'reason': "i",
+ 'type' : "image"})
+
+ try:
+ challenge = re.search('\(\'(.+?)\',',html).group(1)
+
+ except AttributeError:
+ self.fail(_("ReCaptcha second challenge pattern not found"))
+
+ self.log_debug("Second challenge: %s" % challenge)
+ result = self.decrypt("%simage" % server,
+ get={'c': challenge},
+ cookies=True,
+ input_type="jpg",
+ ocr=False)
+
+ self.log_debug("Result: %s" % result)
+
+ return result, challenge
+
+
+ def _collect_api_info(self):
+ html = self.plugin.load("http://www.google.com/recaptcha/api.js")
+ a = re.search(r'po.src = \'(.*?)\';', html).group(1)
+ vers = a.split("/")[5]
+
+ self.log_debug("API version: %s" % vers)
+
+ language = a.split("__")[1].split(".")[0]
+
+ self.log_debug("API language: %s" % language)
+
+ html = self.plugin.load("https://apis.google.com/js/api.js")
+ b = re.search(r'"h":"(.*?)","', html).group(1)
+ jsh = b.decode('unicode-escape')
+
+ self.log_debug("API jsh-string: %s" % jsh)
+
+ return vers, language, jsh
+
+
+ def _prepare_time_and_rpc(self):
+ self.plugin.load("http://www.google.com/recaptcha/api2/demo")
+
+ millis = int(round(time.time() * 1000))
+
+ self.log_debug("Time: %s" % millis)
+
+ rand = random.randint(1, 99999999)
+ a = "0.%s" % str(rand * 2147483647)
+ rpc = int(100000000 * float(a))
+
+ self.log_debug("Rpc-token: %s" % rpc)
+
+ return millis, rpc
+
+
+ def _challenge_v2(self, key, parent=None):
+ if parent is None:
+ try:
+ parent = urlparse.urljoin("http://", urlparse.urlparse(self.plugin.pyfile.url).netloc)
+
+ except Exception:
+ parent = ""
+
+ botguardstring = "!A"
+ vers, language, jsh = self._collect_api_info()
+ millis, rpc = self._prepare_time_and_rpc()
+
+ html = self.plugin.load("https://www.google.com/recaptcha/api2/anchor",
+ get={'k' : key,
+ 'hl' : language,
+ 'v' : vers,
+ 'usegapi' : "1",
+ 'jsh' : "%s#id=IO_%s" % (jsh, millis),
+ 'parent' : parent,
+ 'pfname' : "",
+ 'rpctoken': rpc})
+
+ token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html)
+ self.log_debug("Token #1: %s" % token1.group(1))
+
+ html = self.plugin.load("https://www.google.com/recaptcha/api2/frame",
+ get={'c' : token1.group(1),
+ 'hl' : language,
+ 'v' : vers,
+ 'bg' : botguardstring,
+ 'k' : key,
+ 'usegapi': "1",
+ 'jsh' : jsh},
+ decode="unicode-escape")
+
+ token2 = re.search(r'"finput","(.*?)",', html)
+ self.log_debug("Token #2: %s" % token2.group(1))
+
+ token3 = re.search(r'"rresp","(.*?)",', html)
+ self.log_debug("Token #3: %s" % token3.group(1))
+
+ millis_captcha_loading = int(round(time.time() * 1000))
+ captcha_response = self.decrypt("https://www.google.com/recaptcha/api2/payload",
+ get={'c':token3.group(1), 'k':key},
+ cookies=True,
+ ocr=False)
+ response = b64encode('{"response":"%s"}' % captcha_response)
+
+ self.log_debug("Result: %s" % response)
+
+ timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading
+ timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500)
+
+ html = self.plugin.load("https://www.google.com/recaptcha/api2/userverify",
+ post={'k' : key,
+ 'c' : token3.group(1),
+ 'response': response,
+ 't' : timeToSolve,
+ 'ct' : timeToSolveMore,
+ 'bg' : botguardstring})
+
+ token4 = re.search(r'"uvresp","(.*?)",', html)
+ self.log_debug("Token #4: %s" % token4.group(1))
+
+ result = token4.group(1)
+
+ return result, None
diff --git a/module/plugins/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py
index 7f25f164d..98994b121 100644
--- a/module/plugins/captcha/ShareonlineBiz.py
+++ b/module/plugins/captcha/ShareonlineBiz.py
@@ -14,7 +14,7 @@ class ShareonlineBiz(OCR):
__authors__ = [("RaNaN", "RaNaN@pyload.org")]
- def get_captcha(self, image):
+ def recognize(self, image):
self.load_image(image)
self.to_greyscale()
self.image = self.image.resize((160, 50))
diff --git a/module/plugins/captcha/SolveMedia.py b/module/plugins/captcha/SolveMedia.py
new file mode 100644
index 000000000..cbac2dec2
--- /dev/null
+++ b/module/plugins/captcha/SolveMedia.py
@@ -0,0 +1,105 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from module.plugins.internal.Plugin import Fail
+from module.plugins.internal.CaptchaService import CaptchaService
+
+
+class SolveMedia(CaptchaService):
+ __name__ = "SolveMedia"
+ __type__ = "captcha"
+ __version__ = "0.15"
+ __status__ = "stable"
+
+ __description__ = """SolveMedia captcha service plugin"""
+ __license__ = "GPLv3"
+ __authors__ = [("pyLoad Team", "admin@pyload.org")]
+
+
+ KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']'
+
+
+ def detect_key(self, data=None):
+ html = data or self.retrieve_data()
+
+ m = re.search(self.KEY_PATTERN, html)
+ if m:
+ self.key = m.group(1).strip()
+ self.log_debug("Key: %s" % self.key)
+ return self.key
+ else:
+ self.log_warning(_("Key pattern not found")
+ return None
+
+
+ def challenge(self, key=None, data=None):
+ key = key or self.retrieve_key(data)
+
+ html = self.plugin.load("http://api.solvemedia.com/papi/challenge.noscript",
+ get={'k': key})
+
+ for i in xrange(1, 11):
+ try:
+ magic = re.search(r'name="magic" value="(.+?)"', html).group(1)
+
+ except AttributeError:
+ self.log_warning(_("Magic pattern not found")
+ magic = None
+
+ try:
+ challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="(.+?)">',
+ html).group(1)
+
+ except AttributeError:
+ self.fail(_("SolveMedia challenge pattern not found"))
+
+ else:
+ self.log_debug("Challenge: %s" % challenge)
+
+ try:
+ result = self.result("http://api.solvemedia.com/papi/media", challenge)
+
+ except Fail, e:
+ self.log_warning(e)
+ self.plugin.invalidCaptcha()
+ result = None
+
+ html = self.plugin.load("http://api.solvemedia.com/papi/verify.noscript",
+ post={'adcopy_response' : result,
+ 'k' : key,
+ 'l' : "en",
+ 't' : "img",
+ 's' : "standard",
+ 'magic' : magic,
+ 'adcopy_challenge': challenge,
+ 'ref' : self.plugin.pyfile.url})
+ try:
+ redirect = re.search(r'URL=(.+?)">', html).group(1)
+
+ except AttributeError:
+ self.fail(_("SolveMedia verify pattern not found"))
+
+ else:
+ if "error" in html:
+ self.log_warning(_("Captcha code was invalid"))
+ self.log_debug("Retry #%d" % i)
+ html = self.plugin.load(redirect)
+ else:
+ break
+
+ else:
+ self.fail(_("SolveMedia max retries exceeded"))
+
+ return result, challenge
+
+
+ def result(self, server, challenge):
+ result = self.decrypt(server,
+ get={'c': challenge},
+ cookies=True,
+ input_type="gif")
+
+ self.log_debug("Result: %s" % result)
+
+ return result