From e4042057fcc010f4e3bcb60bb4a27b42efacf48b Mon Sep 17 00:00:00 2001 From: Walter Purcaro Date: Fri, 3 Oct 2014 08:35:26 +0200 Subject: [XFileSharingPro] Captcha refactoring + pattern improvement --- module/plugins/hoster/XFileSharingPro.py | 73 +++++++++++++++++--------------- 1 file changed, 40 insertions(+), 33 deletions(-) (limited to 'module') diff --git a/module/plugins/hoster/XFileSharingPro.py b/module/plugins/hoster/XFileSharingPro.py index 233a1fed3..8edfd8988 100644 --- a/module/plugins/hoster/XFileSharingPro.py +++ b/module/plugins/hoster/XFileSharingPro.py @@ -21,7 +21,7 @@ class XFileSharingPro(SimpleHoster): """ __name__ = "XFileSharingPro" __type__ = "hoster" - __version__ = "0.36" + __version__ = "0.37" __pattern__ = r'^unmatchable$' @@ -40,7 +40,7 @@ class XFileSharingPro(SimpleHoster): FILE_NAME_PATTERN = r'[\d\.\,]+) ?(?P\w+)?\)' - OFFLINE_PATTERN = r'>\w+ (Not Found|file (was|has been) removed)' + OFFLINE_PATTERN = r'>\s*\w+ (Not Found|file (was|has been) removed)' WAIT_PATTERN = r'.*?>(\d+)' @@ -48,11 +48,11 @@ class XFileSharingPro(SimpleHoster): LINK_PATTERN = None #: final download url pattern CAPTCHA_URL_PATTERN = r'(http://[^"\']+?/captchas?/[^"\']+)' - RECAPTCHA_URL_PATTERN = r'http://[^"\']+?recaptcha[^"\']+?\?k=([^"\']+)"' - CAPTCHA_DIV_PATTERN = r'>Enter code.*?(.*?)' - SOLVEMEDIA_PATTERN = r'http:\/\/api\.solvemedia\.com\/papi\/challenge\.script\?k=(.*?)"' + CAPTCHA_DIV_PATTERN = r'>Enter code.*?(.+?)' + RECAPTCHA_PATTERN = None + SOLVEMEDIA_PATTERN = None - ERROR_PATTERN = r'class=["\']err["\'][^>]*>(.*?)]*>(.+?)(\d)', html_unescape(captcha_div)) - inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) - self.logDebug("CAPTCHA", inputs['code'], numerals) - return 3 - else: - m = re.search(self.SOLVEMEDIA_PATTERN, self.html) - if m: - captcha_key = m.group(1) - captcha = SolveMedia(self) - inputs['adcopy_challenge'], inputs['adcopy_response'] = captcha.challenge(captcha_key) - return 4 + + m = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.DOTALL) + if m: + captcha_div = m.group(1) + self.logDebug(captcha_div) + numerals = re.findall(r'(\d)', html_unescape(captcha_div)) + inputs['code'] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))]) + self.logDebug("CAPTCHA", inputs['code'], numerals) + return 2 + + recaptcha = ReCaptcha(self) + try: + captcha_key = re.search(self.RECAPTCHA_PATTERN, self.html).group(1) + except: + captcha_key = recaptcha.detect_key() + + if captcha_key: + self.logDebug("RECAPTCHA KEY: %s" % captcha_key) + inputs['recaptcha_challenge_field'], inputs['recaptcha_response_field'] = recaptcha.challenge(captcha_key) + return 3 + + solvemedia = SolveMedia(self) + try: + captcha_key = re.search(self.SOLVEMEDIA_PATTERN, self.html).group(1) + except: + captcha_key = solvemedia.detect_key() + + if captcha_key: + inputs['adcopy_challenge'], inputs['adcopy_response'] = solvemedia.challenge(captcha_key) + return 4 + return 0 -- cgit v1.2.3