diff options
author | Paul King <devnull@localhost> | 2011-05-04 21:13:52 +0200 |
---|---|---|
committer | Paul King <devnull@localhost> | 2011-05-04 21:13:52 +0200 |
commit | d138ad887aac7d3617e6f626259a492eecc4bc82 (patch) | |
tree | 4115cf87ed45aeafecca807a14f35451b5cc1056 | |
parent | new file states (temp. offline, skipped) (diff) | |
download | pyload-d138ad887aac7d3617e6f626259a492eecc4bc82.tar.xz |
FilesonicCom API fix, New hoster FilefactoryCom
-rw-r--r-- | module/plugins/hoster/FilefactoryCom.py | 140 | ||||
-rw-r--r-- | module/plugins/hoster/FilesonicCom.py | 6 |
2 files changed, 144 insertions, 2 deletions
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py new file mode 100644 index 000000000..55bdeb5bf --- /dev/null +++ b/module/plugins/hoster/FilefactoryCom.py @@ -0,0 +1,140 @@ +# -*- coding: utf-8 -*- +from __future__ import with_statement + +from module.network.RequestFactory import getURL +from module.plugins.Hoster import Hoster +from module.plugins.ReCaptcha import ReCaptcha + +import re + +def getInfo(urls): + result = [] + + for url in urls: + + # Get file info html + # @TODO: Force responses in english language so current patterns will be right + html = getURL(url) + if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html): + result.append((url, 0, 1, url)) + + # Name + name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name') + m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html) + + # Size + value = float(m.group('size')) + units = m.group('units') + pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] + size = int(value*1024**pow) + + # Return info + result.append((name, size, 2, url)) + + yield result + +class FilefactoryCom(Hoster): + __name__ = "FilefactoryCom" + __type__ = "hoster" + __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement + __version__ = "0.3" + __description__ = """Filefactory.Com File Download Hoster""" + __author_name__ = ("paulking") + + FILE_OFFLINE_PATTERN = r'<title>File Not Found' + FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>' + FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded' + FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\'' + CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",' + WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"' + WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"' + FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">' + + def setup(self): + self.multiDL = False + + def process(self, pyfile): + + self.pyfile = pyfile + + # Force responses language to US English + self.req.cj.setCookie("filefactory.com", "ff_locale","") + + # Load main page + self.html = self.load(self.pyfile.url, ref=False, utf8=True, cookies=True) + + # Check offline + if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None: + self.offline() + + # File id + self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id') + self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id)) + + # File name + self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name') + + # Check Id + self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check') + self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check)) + + # Handle free downloading + self.handleFree() + + def handleFree(self): + + # Resolve captcha + self.log.debug("%s: File is captcha protected" % self.__name__) + id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey') + # Try up to 5 times + for i in range(5): + self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1)) + recaptcha = ReCaptcha(self) + challenge, code = recaptcha.challenge(id) + response = self.load("http://www.filefactory.com/file/checkCaptcha.php", + post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}) + captchavalid = self.handleCaptchaErrors(response) + if captchavalid: + break + if not captchavalid: + self.fail("No valid captcha after 5 attempts") + + # Get wait URL + waitpath = re.search(self.WAIT_PATH_PATTERN, response).group('path') + waiturl = "http://www.filefactory.com" + waitpath + + # This will take us to a wait screen + self.log.debug("%s: fetching wait with url [%s]" % (self.__name__, waiturl)) + waithtml = self.load(waiturl, ref=True, utf8=True, cookies=True) + + # Find the wait value and wait + wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait')) + self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait)) + self.setWait(wait, True) + self.wait() + + # Now get the real download url and retrieve the file + url = re.search(self.FILE_URL_PATTERN,waithtml).group('url') + # this may either download our file or forward us to an error page + self.log.debug("%s: download url %s" % (self.__name__, url)) + dl = self.download(url) + + check = self.checkDownload({"multiple": "You are currently downloading too many files at once.", + "error": '<div id="errorMessage">'}) + + if check == "multiple": + self.setWait(15*60) + self.log.debug("%s: Parallel downloads detected waiting 15 minutes" % self.__name__) + self.wait() + self.retry() + elif check == "error": + self.fail("Unknown error") + + def handleCaptchaErrors(self, response): + self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response)) + if 'status:"ok"' in response: + self.correctCaptcha() + return True + + self.log.debug("%s: Wrong captcha" % self.__name__) + self.invalidCaptcha() diff --git a/module/plugins/hoster/FilesonicCom.py b/module/plugins/hoster/FilesonicCom.py index c3e3febb8..03bba3a64 100644 --- a/module/plugins/hoster/FilesonicCom.py +++ b/module/plugins/hoster/FilesonicCom.py @@ -30,8 +30,8 @@ def getInfo(urls): yield result
def getDomain():
- html = decode(getURL("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp"))
- return re.search(r"response>.*?filesonic(\..*?)</resp", html).group(1)
+ html = decode(getURL("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp&format=xml"))
+ return re.search(r"response>.*?(filesonic\..*?)</resp", html).group(1)
class FilesonicCom(Hoster):
__name__ = "FilesonicCom"
@@ -140,6 +140,8 @@ class FilesonicCom(Hoster): if chall:
self.invalidCaptcha()
+ else:
+ self.correctCaptcha()
re_url = re.search(realLinkRegexp, self.html)
if re_url:
|