summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar Paul King <devnull@localhost> 2011-05-04 21:13:52 +0200
committerGravatar Paul King <devnull@localhost> 2011-05-04 21:13:52 +0200
commitd138ad887aac7d3617e6f626259a492eecc4bc82 (patch)
tree4115cf87ed45aeafecca807a14f35451b5cc1056
parentnew file states (temp. offline, skipped) (diff)
downloadpyload-d138ad887aac7d3617e6f626259a492eecc4bc82.tar.xz
FilesonicCom API fix, New hoster FilefactoryCom
-rw-r--r--module/plugins/hoster/FilefactoryCom.py140
-rw-r--r--module/plugins/hoster/FilesonicCom.py6
2 files changed, 144 insertions, 2 deletions
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py
new file mode 100644
index 000000000..55bdeb5bf
--- /dev/null
+++ b/module/plugins/hoster/FilefactoryCom.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+from __future__ import with_statement
+
+from module.network.RequestFactory import getURL
+from module.plugins.Hoster import Hoster
+from module.plugins.ReCaptcha import ReCaptcha
+
+import re
+
+def getInfo(urls):
+ result = []
+
+ for url in urls:
+
+ # Get file info html
+ # @TODO: Force responses in english language so current patterns will be right
+ html = getURL(url)
+ if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html):
+ result.append((url, 0, 1, url))
+
+ # Name
+ name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name')
+ m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html)
+
+ # Size
+ value = float(m.group('size'))
+ units = m.group('units')
+ pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]
+ size = int(value*1024**pow)
+
+ # Return info
+ result.append((name, size, 2, url))
+
+ yield result
+
+class FilefactoryCom(Hoster):
+ __name__ = "FilefactoryCom"
+ __type__ = "hoster"
+ __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement
+ __version__ = "0.3"
+ __description__ = """Filefactory.Com File Download Hoster"""
+ __author_name__ = ("paulking")
+
+ FILE_OFFLINE_PATTERN = r'<title>File Not Found'
+ FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'
+ FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded'
+ FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\''
+ CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",'
+ WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"'
+ WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"'
+ FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">'
+
+ def setup(self):
+ self.multiDL = False
+
+ def process(self, pyfile):
+
+ self.pyfile = pyfile
+
+ # Force responses language to US English
+ self.req.cj.setCookie("filefactory.com", "ff_locale","")
+
+ # Load main page
+ self.html = self.load(self.pyfile.url, ref=False, utf8=True, cookies=True)
+
+ # Check offline
+ if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:
+ self.offline()
+
+ # File id
+ self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id')
+ self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id))
+
+ # File name
+ self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name')
+
+ # Check Id
+ self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check')
+ self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check))
+
+ # Handle free downloading
+ self.handleFree()
+
+ def handleFree(self):
+
+ # Resolve captcha
+ self.log.debug("%s: File is captcha protected" % self.__name__)
+ id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey')
+ # Try up to 5 times
+ for i in range(5):
+ self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1))
+ recaptcha = ReCaptcha(self)
+ challenge, code = recaptcha.challenge(id)
+ response = self.load("http://www.filefactory.com/file/checkCaptcha.php",
+ post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code})
+ captchavalid = self.handleCaptchaErrors(response)
+ if captchavalid:
+ break
+ if not captchavalid:
+ self.fail("No valid captcha after 5 attempts")
+
+ # Get wait URL
+ waitpath = re.search(self.WAIT_PATH_PATTERN, response).group('path')
+ waiturl = "http://www.filefactory.com" + waitpath
+
+ # This will take us to a wait screen
+ self.log.debug("%s: fetching wait with url [%s]" % (self.__name__, waiturl))
+ waithtml = self.load(waiturl, ref=True, utf8=True, cookies=True)
+
+ # Find the wait value and wait
+ wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait'))
+ self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
+ self.setWait(wait, True)
+ self.wait()
+
+ # Now get the real download url and retrieve the file
+ url = re.search(self.FILE_URL_PATTERN,waithtml).group('url')
+ # this may either download our file or forward us to an error page
+ self.log.debug("%s: download url %s" % (self.__name__, url))
+ dl = self.download(url)
+
+ check = self.checkDownload({"multiple": "You are currently downloading too many files at once.",
+ "error": '<div id="errorMessage">'})
+
+ if check == "multiple":
+ self.setWait(15*60)
+ self.log.debug("%s: Parallel downloads detected waiting 15 minutes" % self.__name__)
+ self.wait()
+ self.retry()
+ elif check == "error":
+ self.fail("Unknown error")
+
+ def handleCaptchaErrors(self, response):
+ self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
+ if 'status:"ok"' in response:
+ self.correctCaptcha()
+ return True
+
+ self.log.debug("%s: Wrong captcha" % self.__name__)
+ self.invalidCaptcha()
diff --git a/module/plugins/hoster/FilesonicCom.py b/module/plugins/hoster/FilesonicCom.py
index c3e3febb8..03bba3a64 100644
--- a/module/plugins/hoster/FilesonicCom.py
+++ b/module/plugins/hoster/FilesonicCom.py
@@ -30,8 +30,8 @@ def getInfo(urls):
yield result
def getDomain():
- html = decode(getURL("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp"))
- return re.search(r"response>.*?filesonic(\..*?)</resp", html).group(1)
+ html = decode(getURL("http://api.filesonic.com/utility?method=getFilesonicDomainForCurrentIp&format=xml"))
+ return re.search(r"response>.*?(filesonic\..*?)</resp", html).group(1)
class FilesonicCom(Hoster):
__name__ = "FilesonicCom"
@@ -140,6 +140,8 @@ class FilesonicCom(Hoster):
if chall:
self.invalidCaptcha()
+ else:
+ self.correctCaptcha()
re_url = re.search(realLinkRegexp, self.html)
if re_url: