summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-07-23 23:44:45 +0200
committerGravatar Walter Purcaro <vuolter@users.noreply.github.com> 2015-07-23 23:44:45 +0200
commit6af9b38a8d5d49355b85aef6ddd003605d6bba05 (patch)
treecbfb5b2212cab406ba75b3acd553879311e9153f /module/plugins/internal
parentCode cosmetics (diff)
downloadpyload-6af9b38a8d5d49355b85aef6ddd003605d6bba05.tar.xz
Improve Captcha
Diffstat (limited to 'module/plugins/internal')
-rw-r--r--module/plugins/internal/AdYouLike.py92
-rw-r--r--module/plugins/internal/AdsCaptcha.py64
-rw-r--r--module/plugins/internal/Captcha.py89
-rw-r--r--module/plugins/internal/OCR.py4
-rw-r--r--module/plugins/internal/ReCaptcha.py197
-rw-r--r--module/plugins/internal/SolveMedia.py105
-rw-r--r--module/plugins/internal/XFSHoster.py6
7 files changed, 53 insertions, 504 deletions
diff --git a/module/plugins/internal/AdYouLike.py b/module/plugins/internal/AdYouLike.py
deleted file mode 100644
index d14babb51..000000000
--- a/module/plugins/internal/AdYouLike.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import re
-
-from module.common.json_layer import json_loads
-from module.plugins.internal.CaptchaService import CaptchaService
-
-
-class AdYouLike(CaptchaService):
- __name__ = "AdYouLike"
- __type__ = "captcha"
- __version__ = "0.07"
- __status__ = "stable"
-
- __description__ = """AdYouLike captcha service plugin"""
- __license__ = "GPLv3"
- __authors__ = [("Walter Purcaro", "vuolter@gmail.com")]
-
-
- AYL_PATTERN = r'Adyoulike\.create\s*\((.+?)\)'
- CALLBACK_PATTERN = r'(Adyoulike\.g\._jsonp_\d+)'
-
-
- def detect_key(self, data=None):
- html = data or self.retrieve_data()
-
- m = re.search(self.AYL_PATTERN, html)
- n = re.search(self.CALLBACK_PATTERN, html)
- if m and n:
- self.key = (m.group(1).strip(), n.group(1).strip())
- self.log_debug("Ayl: %s | Callback: %s" % self.key)
- return self.key #: Key is the tuple(ayl, callback)
- else:
- self.log_warning(_("Ayl or callback pattern not found"))
- return None
-
-
- def challenge(self, key=None, data=None):
- ayl, callback = key or self.retrieve_key(data)
-
- #: {'adyoulike':{'key':"P~zQ~O0zV0WTiAzC-iw0navWQpCLoYEP"},
- #: 'all':{'element_id':"ayl_private_cap_92300",'lang':"fr",'env':"prod"}}
- ayl = json_loads(ayl)
-
- html = self.plugin.load("http://api-ayl.appspot.com/challenge",
- get={'key' : ayl['adyoulike']['key'],
- 'env' : ayl['all']['env'],
- 'callback': callback})
- try:
- challenge = json_loads(re.search(callback + r'\s*\((.+?)\)', html).group(1))
-
- except AttributeError:
- self.fail(_("AdYouLike challenge pattern not found"))
-
- self.log_debug("Challenge: %s" % challenge)
-
- return self.result(ayl, challenge), challenge
-
-
- def result(self, server, challenge):
- #: Adyoulike.g._jsonp_5579316662423138
- #: ({'translations':{'fr':{'instructions_visual':"Recopiez « Soonnight » ci-dessous :"}},
- #: 'site_under':true,'clickable':true,'pixels':{'VIDEO_050':[],'DISPLAY':[],'VIDEO_000':[],'VIDEO_100':[],
- #: 'VIDEO_025':[],'VIDEO_075':[]},'medium_type':"image/adyoulike",
- #: 'iframes':{'big':"<iframe src=\"http://www.soonnight.com/campagn.html\" scrolling=\"no\"
- #: height=\"250\" width=\"300\" frameborder=\"0\"></iframe>"},'shares':{},'id':256,
- #: 'token':"e6QuI4aRSnbIZJg02IsV6cp4JQ9~MjA1",'formats':{'small':{'y':300,'x':0,'w':300,'h':60},
- #: 'big':{'y':0,'x':0,'w':300,'h':250},'hover':{'y':440,'x':0,'w':300,'h':60}},
- #: 'tid':"SqwuAdxT1EZoi4B5q0T63LN2AkiCJBg5"})
-
- if isinstance(server, basestring):
- server = json_loads(server)
-
- if isinstance(challenge, basestring):
- challenge = json_loads(challenge)
-
- try:
- instructions_visual = challenge['translations'][server['all']['lang']]['instructions_visual']
- result = re.search(u'«(.+?)»', instructions_visual).group(1).strip()
-
- except AttributeError:
- self.fail(_("AdYouLike result not found"))
-
- result = {'_ayl_captcha_engine' : "adyoulike",
- '_ayl_env' : server['all']['env'],
- '_ayl_tid' : challenge['tid'],
- '_ayl_token_challenge': challenge['token'],
- '_ayl_response' : response}
-
- self.log_debug("Result: %s" % result)
-
- return result
diff --git a/module/plugins/internal/AdsCaptcha.py b/module/plugins/internal/AdsCaptcha.py
deleted file mode 100644
index f487042e2..000000000
--- a/module/plugins/internal/AdsCaptcha.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import random
-import re
-
-from module.plugins.internal.CaptchaService import CaptchaService
-
-
-class AdsCaptcha(CaptchaService):
- __name__ = "AdsCaptcha"
- __type__ = "captcha"
- __version__ = "0.10"
- __status__ = "stable"
-
- __description__ = """AdsCaptcha captcha service plugin"""
- __license__ = "GPLv3"
- __authors__ = [("pyLoad Team", "admin@pyload.org")]
-
-
- CAPTCHAID_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?CaptchaId=(\d+)'
- PUBLICKEY_PATTERN = r'api\.adscaptcha\.com/Get\.aspx\?.*?PublicKey=([\w-]+)'
-
-
- def detect_key(self, data=None):
- html = data or self.retrieve_data()
-
- m = re.search(self.PUBLICKEY_PATTERN, html)
- n = re.search(self.CAPTCHAID_PATTERN, html)
- if m and n:
- self.key = (m.group(1).strip(), n.group(1).strip()) #: Key is the tuple(PublicKey, CaptchaId)
- self.log_debug("Key: %s | ID: %s" % self.key)
- return self.key
- else:
- self.log_warning(_("Key or id pattern not found"))
- return None
-
-
- def challenge(self, key=None, data=None):
- PublicKey, CaptchaId = key or self.retrieve_key(data)
-
- html = self.plugin.load("http://api.adscaptcha.com/Get.aspx",
- get={'CaptchaId': CaptchaId,
- 'PublicKey': PublicKey})
- try:
- challenge = re.search("challenge: '(.+?)',", html).group(1)
- server = re.search("server: '(.+?)',", html).group(1)
-
- except AttributeError:
- self.fail(_("AdsCaptcha challenge pattern not found"))
-
- self.log_debug("Challenge: %s" % challenge)
-
- return self.result(server, challenge), challenge
-
-
- def result(self, server, challenge):
- result = self.decrypt_image("%sChallenge.aspx" % server,
- get={'cid': challenge, 'dummy': random.random()},
- cookies=True,
- input_type="jpg")
-
- self.log_debug("Result: %s" % result)
-
- return result
diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py
index af7f66ed5..942021f26 100644
--- a/module/plugins/internal/Captcha.py
+++ b/module/plugins/internal/Captcha.py
@@ -1,12 +1,14 @@
# -*- coding: utf-8 -*-
+import time
+
from module.plugins.internal.Plugin import Plugin
class Captcha(Plugin):
__name__ = "Captcha"
__type__ = "captcha"
- __version__ = "0.01"
+ __version__ = "0.02"
__status__ = "stable"
__description__ = """Base anti-captcha plugin"""
@@ -35,13 +37,21 @@ class Captcha(Plugin):
pass
- def decrypt_image(self, url, get={}, post={}, ref=False, cookies=False, decode=False,
- input_type='png', output_type='textual', try_ocr=True):
- image = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode)
- return self.decrypt(image, input_type, output_type, try_ocr)
+ def recognize(self, image):
+ """
+ Extend to build your custom anti-captcha ocr
+ """
+ pass
+
+
+ def decrypt(self, url, get={}, post={}, ref=False, cookies=False, decode=False,
+ input_type='png', output_type='textual', ocr=True):
+ img = self.load(url, get=get, post=post, ref=ref, cookies=cookies, decode=decode)
+ return self._decrypt(img, input_type, output_type, ocr)
- def decrypt(self, data, input_type='png', output_type='textual', try_ocr=True):
+ #@TODO: Definitely dhoose a better name for this method!
+ def _decrypt(self, raw, input_type='png', output_type='textual', ocr=None):
"""
Loads a captcha and decrypts it with ocr, plugin, user input
@@ -53,56 +63,53 @@ class Captcha(Plugin):
:param output_type: 'textual' if text is written on the captcha\
or 'positional' for captcha where the user have to click\
on a specific region on the captcha
- :param try_ocr: if True, ocr is not used
+ :param ocr: if True, ocr is not used
:return: result of decrypting
"""
- id = ("%.2f" % time.time())[-6:].replace(".", "")
+ time_ref = ("%.2f" % time.time())[-6:].replace(".", "")
+
+ with open(os.path.join("tmp", "captcha_image_%s_%s.%s" % (self.plugin.__name__, time_ref, input_type)), "wb") as tmp_img:
+ tmp_img.write(raw)
+
+ if ocr is not False:
+ if isinstance(ocr, basestring):
+ OCR = self.pyload.pluginManager.loadClass("captcha", ocr) #: Rename `captcha` to `ocr` in 0.4.10
- with open(os.path.join("tmp", "tmpCaptcha_%s_%s.%s" % (self.plugin.__name__, id, input_type)), "wb") as tmpCaptcha:
- tmpCaptcha.write(img)
+ if self.plugin.pyfile.abort:
+ self.abort()
- has_plugin = self.plugin.__name__ in self.pyload.pluginManager.ocrPlugins
+ result = OCR(self.plugin.pyfile).recognize(tmp_img.name)
- if self.pyload.captcha:
- Ocr = self.pyload.pluginManager.loadClass("ocr", self.plugin.__name__)
- else:
- Ocr = None
+ else:
+ result = self.recognize(tmp_img.name)
- if Ocr and try_ocr:
- time.sleep(random.randint(3000, 5000) / 1000.0)
- if self.pyfile.abort:
- self.abort()
+ else:
+ captchaManager = self.pyload.captchaManager
- ocr = Ocr(self.pyfile)
- result = ocr.get_captcha(tmpCaptcha.name)
- else:
- captchaManager = self.pyload.captchaManager
- task = captchaManager.newTask(img, input_type, tmpCaptcha.name, output_type)
- self.task = task
- captchaManager.handleCaptcha(task)
+ try:
+ self.task = captchaManager.newTask(img, input_type, tmp_img.name, output_type)
+ captchaManager.handleCaptcha(self.task)
- while task.isWaiting():
- if self.pyfile.abort:
- captchaManager.removeTask(task)
- self.abort()
- time.sleep(1)
+ while self.task.isWaiting():
+ if self.plugin.pyfile.abort:
+ self.abort()
+ time.sleep(1)
+ finally:
+ captchaManager.removeTask(self.task)
- captchaManager.removeTask(task)
+ if self.task.error:
+ self.fail(task.error)
- if task.error and has_plugin: #: Ignore default error message since the user could use try_ocr
- self.fail(_("Pil and tesseract not installed and no Client connected for captcha decrypting"))
- elif task.error:
- self.fail(task.error)
- elif not task.result:
- self.fail(_("No captcha result obtained in appropiate time by any of the plugins"))
+ elif not self.task.result:
+ self.fail(_("No captcha result obtained in appropiate time by any of the plugins"))
- result = task.result
- self.log_debug("Received captcha result: %s" % result)
+ result = task.result
+ self.log_debug("Received captcha result: %s" % result) #@TODO: Remove from here?
if not self.pyload.debug:
try:
- os.remove(tmpCaptcha.name)
+ os.remove(tmp_img.name)
except Exception:
pass
diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py
index ee5571f77..5c22866c8 100644
--- a/module/plugins/internal/OCR.py
+++ b/module/plugins/internal/OCR.py
@@ -20,7 +20,7 @@ from module.utils import save_join as fs_join
class OCR(Plugin):
__name__ = "OCR"
__type__ = "ocr"
- __version__ = "0.12"
+ __version__ = "0.13"
__status__ = "stable"
__description__ = """OCR base plugin"""
@@ -133,7 +133,7 @@ class OCR(Plugin):
pass
- def get_captcha(self, name):
+ def recognize(self, name):
raise NotImplementedError
diff --git a/module/plugins/internal/ReCaptcha.py b/module/plugins/internal/ReCaptcha.py
deleted file mode 100644
index b4f9ef1eb..000000000
--- a/module/plugins/internal/ReCaptcha.py
+++ /dev/null
@@ -1,197 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import random
-import re
-import time
-import urlparse
-
-from base64 import b64encode
-
-from module.plugins.internal.CaptchaService import CaptchaService
-
-
-class ReCaptcha(CaptchaService):
- __name__ = "ReCaptcha"
- __type__ = "captcha"
- __version__ = "0.18"
- __status__ = "stable"
-
- __description__ = """ReCaptcha captcha service plugin"""
- __license__ = "GPLv3"
- __authors__ = [("pyLoad Team", "admin@pyload.org"),
- ("Walter Purcaro", "vuolter@gmail.com"),
- ("zapp-brannigan", "fuerst.reinje@web.de")]
-
-
- KEY_V1_PATTERN = r'(?:recaptcha(?:/api|\.net)/(?:challenge|noscript)\?k=|Recaptcha\.create\s*\(\s*["\'])([\w-]+)'
- KEY_V2_PATTERN = r'(?:data-sitekey=["\']|["\']sitekey["\']:\s*["\'])([\w-]+)'
-
-
- def detect_key(self, data=None):
- html = data or self.retrieve_data()
-
- m = re.search(self.KEY_V2_PATTERN, html) or re.search(self.KEY_V1_PATTERN, html)
- if m:
- self.key = m.group(1).strip()
- self.log_debug("Key: %s" % self.key)
- return self.key
- else:
- self.log_warning(_("Key pattern not found"))
- return None
-
-
- def challenge(self, key=None, data=None, version=None):
- key = key or self.retrieve_key(data)
-
- if version in (1, 2):
- return getattr(self, "_challenge_v%s" % version)(key)
-
- else:
- return self.challenge(key,
- version=2 if re.search(self.KEY_V2_PATTERN, html or self.retrieve_data()) else 1)
-
-
- def _challenge_v1(self, key):
- html = self.plugin.load("http://www.google.com/recaptcha/api/challenge",
- get={'k': key})
- try:
- challenge = re.search("challenge : '(.+?)',", html).group(1)
- server = re.search("server : '(.+?)',", html).group(1)
-
- except AttributeError:
- self.fail(_("ReCaptcha challenge pattern not found"))
-
- self.log_debug("Challenge: %s" % challenge)
-
- return self.result(server, challenge, key)
-
-
- def result(self, server, challenge, key):
- self.plugin.load("http://www.google.com/recaptcha/api/js/recaptcha.js")
- html = self.plugin.load("http://www.google.com/recaptcha/api/reload",
- get={'c' : challenge,
- 'k' : key,
- 'reason': "i",
- 'type' : "image"})
-
- try:
- challenge = re.search('\(\'(.+?)\',',html).group(1)
-
- except AttributeError:
- self.fail(_("ReCaptcha second challenge pattern not found"))
-
- self.log_debug("Second challenge: %s" % challenge)
- result = self.decrypt("%simage" % server,
- get={'c': challenge},
- cookies=True,
- input_type="jpg",
- try_ocr=False)
-
- self.log_debug("Result: %s" % result)
-
- return result, challenge
-
-
- def _collect_api_info(self):
- html = self.plugin.load("http://www.google.com/recaptcha/api.js")
- a = re.search(r'po.src = \'(.*?)\';', html).group(1)
- vers = a.split("/")[5]
-
- self.log_debug("API version: %s" % vers)
-
- language = a.split("__")[1].split(".")[0]
-
- self.log_debug("API language: %s" % language)
-
- html = self.plugin.load("https://apis.google.com/js/api.js")
- b = re.search(r'"h":"(.*?)","', html).group(1)
- jsh = b.decode('unicode-escape')
-
- self.log_debug("API jsh-string: %s" % jsh)
-
- return vers, language, jsh
-
-
- def _prepare_time_and_rpc(self):
- self.plugin.load("http://www.google.com/recaptcha/api2/demo")
-
- millis = int(round(time.time() * 1000))
-
- self.log_debug("Time: %s" % millis)
-
- rand = random.randint(1, 99999999)
- a = "0.%s" % str(rand * 2147483647)
- rpc = int(100000000 * float(a))
-
- self.log_debug("Rpc-token: %s" % rpc)
-
- return millis, rpc
-
-
- def _challenge_v2(self, key, parent=None):
- if parent is None:
- try:
- parent = urlparse.urljoin("http://", urlparse.urlparse(self.plugin.pyfile.url).netloc)
-
- except Exception:
- parent = ""
-
- botguardstring = "!A"
- vers, language, jsh = self._collect_api_info()
- millis, rpc = self._prepare_time_and_rpc()
-
- html = self.plugin.load("https://www.google.com/recaptcha/api2/anchor",
- get={'k' : key,
- 'hl' : language,
- 'v' : vers,
- 'usegapi' : "1",
- 'jsh' : "%s#id=IO_%s" % (jsh, millis),
- 'parent' : parent,
- 'pfname' : "",
- 'rpctoken': rpc})
-
- token1 = re.search(r'id="recaptcha-token" value="(.*?)">', html)
- self.log_debug("Token #1: %s" % token1.group(1))
-
- html = self.plugin.load("https://www.google.com/recaptcha/api2/frame",
- get={'c' : token1.group(1),
- 'hl' : language,
- 'v' : vers,
- 'bg' : botguardstring,
- 'k' : key,
- 'usegapi': "1",
- 'jsh' : jsh},
- decode="unicode-escape")
-
- token2 = re.search(r'"finput","(.*?)",', html)
- self.log_debug("Token #2: %s" % token2.group(1))
-
- token3 = re.search(r'"rresp","(.*?)",', html)
- self.log_debug("Token #3: %s" % token3.group(1))
-
- millis_captcha_loading = int(round(time.time() * 1000))
- captcha_response = self.decrypt_image("https://www.google.com/recaptcha/api2/payload",
- get={'c':token3.group(1), 'k':key},
- cookies=True,
- try_ocr=False)
- response = b64encode('{"response":"%s"}' % captcha_response)
-
- self.log_debug("Result: %s" % response)
-
- timeToSolve = int(round(time.time() * 1000)) - millis_captcha_loading
- timeToSolveMore = timeToSolve + int(float("0." + str(random.randint(1, 99999999))) * 500)
-
- html = self.plugin.load("https://www.google.com/recaptcha/api2/userverify",
- post={'k' : key,
- 'c' : token3.group(1),
- 'response': response,
- 't' : timeToSolve,
- 'ct' : timeToSolveMore,
- 'bg' : botguardstring})
-
- token4 = re.search(r'"uvresp","(.*?)",', html)
- self.log_debug("Token #4: %s" % token4.group(1))
-
- result = token4.group(1)
-
- return result, None
diff --git a/module/plugins/internal/SolveMedia.py b/module/plugins/internal/SolveMedia.py
deleted file mode 100644
index ce4ebb007..000000000
--- a/module/plugins/internal/SolveMedia.py
+++ /dev/null
@@ -1,105 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import re
-
-from module.plugins.internal.Plugin import Fail
-from module.plugins.internal.CaptchaService import CaptchaService
-
-
-class SolveMedia(CaptchaService):
- __name__ = "SolveMedia"
- __type__ = "captcha"
- __version__ = "0.15"
- __status__ = "stable"
-
- __description__ = """SolveMedia captcha service plugin"""
- __license__ = "GPLv3"
- __authors__ = [("pyLoad Team", "admin@pyload.org")]
-
-
- KEY_PATTERN = r'api\.solvemedia\.com/papi/challenge\.(?:no)?script\?k=(.+?)["\']'
-
-
- def detect_key(self, data=None):
- html = data or self.retrieve_data()
-
- m = re.search(self.KEY_PATTERN, html)
- if m:
- self.key = m.group(1).strip()
- self.log_debug("Key: %s" % self.key)
- return self.key
- else:
- self.log_warning(_("Key pattern not found")
- return None
-
-
- def challenge(self, key=None, data=None):
- key = key or self.retrieve_key(data)
-
- html = self.plugin.load("http://api.solvemedia.com/papi/challenge.noscript",
- get={'k': key})
-
- for i in xrange(1, 11):
- try:
- magic = re.search(r'name="magic" value="(.+?)"', html).group(1)
-
- except AttributeError:
- self.log_warning(_("Magic pattern not found")
- magic = None
-
- try:
- challenge = re.search(r'<input type=hidden name="adcopy_challenge" id="adcopy_challenge" value="(.+?)">',
- html).group(1)
-
- except AttributeError:
- self.fail(_("SolveMedia challenge pattern not found"))
-
- else:
- self.log_debug("Challenge: %s" % challenge)
-
- try:
- result = self.result("http://api.solvemedia.com/papi/media", challenge)
-
- except Fail, e:
- self.log_warning(e)
- self.plugin.invalidCaptcha()
- result = None
-
- html = self.plugin.load("http://api.solvemedia.com/papi/verify.noscript",
- post={'adcopy_response' : result,
- 'k' : key,
- 'l' : "en",
- 't' : "img",
- 's' : "standard",
- 'magic' : magic,
- 'adcopy_challenge': challenge,
- 'ref' : self.plugin.pyfile.url})
- try:
- redirect = re.search(r'URL=(.+?)">', html).group(1)
-
- except AttributeError:
- self.fail(_("SolveMedia verify pattern not found"))
-
- else:
- if "error" in html:
- self.log_warning(_("Captcha code was invalid"))
- self.log_debug("Retry #%d" % i)
- html = self.plugin.load(redirect)
- else:
- break
-
- else:
- self.fail(_("SolveMedia max retries exceeded"))
-
- return result, challenge
-
-
- def result(self, server, challenge):
- result = self.decrypt_image(server,
- get={'c': challenge},
- cookies=True,
- input_type="gif")
-
- self.log_debug("Result: %s" % result)
-
- return result
diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py
index ec9a18a48..18a50a6b0 100644
--- a/module/plugins/internal/XFSHoster.py
+++ b/module/plugins/internal/XFSHoster.py
@@ -4,8 +4,8 @@ import pycurl
import random
import re
-from module.plugins.internal.ReCaptcha import ReCaptcha
-from module.plugins.internal.SolveMedia import SolveMedia
+from module.plugins.captcha.ReCaptcha import ReCaptcha
+from module.plugins.captcha.SolveMedia import SolveMedia
from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, seconds_to_midnight
from module.utils import html_unescape
@@ -221,7 +221,7 @@ class XFSHoster(SimpleHoster):
m = re.search(self.CAPTCHA_PATTERN, self.html)
if m:
captcha_url = m.group(1)
- inputs['code'] = self.captcha.decrypt_image(captcha_url)
+ inputs['code'] = self.captcha.decrypt(captcha_url)
return
m = re.search(self.CAPTCHA_BLOCK_PATTERN, self.html, re.S)