1 files changed, 91 insertions, 83 deletions
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py
index 37b2bb7ce..135dd90a1 100644
--- a/module/plugins/hoster/FilefactoryCom.py
+++ b/module/plugins/hoster/FilefactoryCom.py
@@ -1,122 +1,133 @@
 # -*- coding: utf-8 -*-
-from __future__ import with_statement
-
 from module.network.RequestFactory import getURL
 from module.plugins.Hoster import Hoster
 from module.plugins.ReCaptcha import ReCaptcha
+from module.utils import parseFileSize
+from module.plugins.Plugin import chunks
+from module.common.json_layer import json_loads
 
 import re
 
-def getInfo(urls):
-    result = []
+def checkFile(plugin, urls):
+    file_info = []
+    url_dict = {}
     
     for url in urls:
-        
-        # Get file info html
-        # @TODO: Force responses in english language so current patterns will be right
-        html = getURL(url)
-        if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html):
-            result.append((url, 0, 1, url))
+        url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url)
+    url_ids = url_dict.keys()
+    urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids)
 
-        # Name
-        name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name')
-        m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html)
+    html = getURL("http://www.filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True)   
         
-        # Size
-        value = float(m.group('size'))
-        units = m.group('units')
-        pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units] 
-        size = int(value*1024**pow)
+    for m in re.finditer(plugin.LC_INFO_PATTERN, html):
+        if m.group('id') in url_ids:
+            url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3])
+            
+    for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html):
+        if m.group('id') in url_ids:
+            url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3])
     
-        # Return info
-        result.append((name, size, 2, url))
-        
-    yield result
+    file_info = url_dict.values()
     
+    return file_info
+   
 class FilefactoryCom(Hoster):
     __name__ = "FilefactoryCom"
     __type__ = "hoster"
-    __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement
-    __version__ = "0.3"
+    __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement
+    __version__ = "0.34"
     __description__ = """Filefactory.Com File Download Hoster"""
-    __author_name__ = ("paulking")
+    __author_name__ = ("paulking", "zoidberg")
     
+    LC_INFO_PATTERN = r'<h1 class="name">(?P<name>[^<]+) \((?P<size>[0-9.]+ \w+)\)</h1>\s*<p>http://www.filefactory.com/file/(?P<id>\w+)/'
+    LC_OFFLINE_PATTERN = r'<p>http://www.filefactory.com/file/(?P<id>\w+)/</p>\s*<p class="errorResponse">'
+ 
     FILE_OFFLINE_PATTERN = r'<title>File Not Found'
     FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'
     FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded'
-    FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\''
-    CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",' 
-    WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"'
+    
+    FILE_CHECK_PATTERN = r'check:\s*\'(?P<check>.*?)\''
+    CAPTCHA_KEY_PATTERN = r'Recaptcha.create\(\s*"(.*?)",' 
     WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"'
-    FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">'
-        
+    FILE_URL_PATTERN = r'<p[^>]*?id="downloadLinkTarget"[^>]*>\s*<a href="(?P<url>.*?)"' 
+            
     def setup(self):
-        self.multiDL = False
+        self.multiDL = self.resumeDownloads = self.premium
 
     def process(self, pyfile):
-    
-        self.pyfile = pyfile
+        # Check file
+        pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0]     
+        if status != 2: self.offline()
+        self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size)) 
         
-        # Force responses language to US English
-        self.req.cj.setCookie("filefactory.com", "ff_locale","")
-
-        # Load main page
-        self.html = self.load(self.pyfile.url, ref=False, decode=True)
-
-        # Check offline
-        if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:
-            self.offline()
+        # Handle downloading
+        url = self.checkDirectDownload(pyfile.url)
+        if url:
+            self.download(url)
+        else:                
+            self.html = self.load(pyfile.url, decode = True)
+                      
+            if self.premium:
+                self.handlePremium()
+            else:
+                self.handleFree()
+              
+    def checkDirectDownload(self, url):
+        for i in range(5):
+            header = self.load(url, just_header = True)           
+            if 'location' in header:
+                url = header['location'].strip() 
+                if not url.startswith("http://"):
+                    url = "http://www.filefactory.com" + url
+                self.logDebug('URL: ' + url)
+            elif 'content-disposition' in header:
+                return url
         
-        # File id
-        self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id')
-        self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id))
-           
-        # File name
-        self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name')
-
-        # Check Id
-        self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check')
-        self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check))
-
-        # Handle free downloading
-        self.handleFree()
+        return False                                
     
     def handleFree(self):
-    
+        if "Currently only Premium Members can download files larger than" in self.html:
+            self.fail("File too large for free download")
+        elif "All free download slots on this server are currently in use" in self.html:
+            self.retry(50, 900, "All free slots are busy")
+             
+        # Check Id
+        self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check')
+        self.logDebug("File check code is [%s]" % self.check)
+        
         # Resolve captcha
-        self.log.debug("%s: File is captcha protected" % self.__name__)
-        id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey')
+        found = re.search(self.CAPTCHA_KEY_PATTERN, self.html)
+        recaptcha_key = found.group(1) if found else "6LeN8roSAAAAAPdC1zy399Qei4b1BwmSBSsBN8zm"
+        recaptcha = ReCaptcha(self)
+        
         # Try up to 5 times
-        for i in range(5):
-            self.log.debug("%s: Resolving ReCaptcha with key [%s], round %d" % (self.__name__, id, i+1))
-            recaptcha = ReCaptcha(self)
-            challenge, code = recaptcha.challenge(id)
-            response = self.load("http://www.filefactory.com/file/checkCaptcha.php",
-                            post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code})
-            captchavalid = self.handleCaptchaErrors(response)
-            if captchavalid:
+        for i in range(5):           
+            challenge, code = recaptcha.challenge(recaptcha_key)
+            response = json_loads(self.load("http://www.filefactory.com/file/checkCaptcha.php",
+                            post={"check" : self.check, "recaptcha_challenge_field" : challenge, "recaptcha_response_field" : code}))
+            if response['status'] == 'ok':
+                self.correctCaptcha()
                 break
-        if not captchavalid:
+            else:
+                self.invalidCaptcha()                            
+        else:
             self.fail("No valid captcha after 5 attempts")
-
-        # Get wait URL
-        waitpath = re.search(self.WAIT_PATH_PATTERN, response).group('path')
-        waiturl = "http://www.filefactory.com" + waitpath
         
         # This will take us to a wait screen
-        self.log.debug("%s: fetching wait with url [%s]" % (self.__name__, waiturl))
+        waiturl = "http://www.filefactory.com" + response['path']
+        self.logDebug("Fetching wait with url [%s]" % waiturl)
         waithtml = self.load(waiturl, decode=True)
 
         # Find the wait value and wait     
         wait = int(re.search(self.WAIT_PATTERN, waithtml).group('wait'))
-        self.log.debug("%s: Waiting %d seconds." % (self.__name__, wait))
+        self.logDebug("Waiting %d seconds." % wait)
         self.setWait(wait, True)
         self.wait()
 
         # Now get the real download url and retrieve the file
         url = re.search(self.FILE_URL_PATTERN,waithtml).group('url')
         # this may either download our file or forward us to an error page
-        self.log.debug("%s: download url %s" % (self.__name__, url))
+        self.logDebug("Download URL: %s" % url)
         dl = self.download(url)
         
         check = self.checkDownload({"multiple": "You are currently downloading too many files at once.",
@@ -124,17 +135,14 @@ class FilefactoryCom(Hoster):
 
         if check == "multiple":
             self.setWait(15*60)
-            self.log.debug("%s: Parallel downloads detected waiting 15 minutes" % self.__name__)
+            self.logDebug("Parallel downloads detected; waiting 15 minutes")
             self.wait()
             self.retry()
         elif check == "error":
             self.fail("Unknown error")
-
-    def handleCaptchaErrors(self, response):
-        self.log.debug("%s: Result of captcha resolving [%s]" % (self.__name__, response))
-        if 'status:"ok"' in response:
-            self.correctCaptcha()
-            return True
+    
+    def handlePremium(self):
+        self.fail('Please enable direct downloads')
         
-        self.log.debug("%s: Wrong captcha" % self.__name__)
-        self.invalidCaptcha()
+def getInfo(urls):
+    for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk)