summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar zoidberg10 <zoidberg@mujmail.cz> 2012-02-08 21:27:49 +0100
committerGravatar zoidberg10 <zoidberg@mujmail.cz> 2012-02-08 21:27:49 +0100
commitf226ac102cee63721fcbaffc60dcdf75c242d5e6 (patch)
treec90ebaa2563c8126d37c85cc737e2962cfe7d5ca /module
parentHandle Oron TOS errors (diff)
downloadpyload-f226ac102cee63721fcbaffc60dcdf75c242d5e6.tar.xz
filefactory premium, uloz.to new url pattern
Diffstat (limited to 'module')
-rw-r--r--module/plugins/accounts/FilefactoryCom.py54
-rw-r--r--module/plugins/hoster/FilefactoryCom.py115
-rw-r--r--module/plugins/hoster/UlozTo.py82
3 files changed, 161 insertions, 90 deletions
diff --git a/module/plugins/accounts/FilefactoryCom.py b/module/plugins/accounts/FilefactoryCom.py
new file mode 100644
index 000000000..8c04cf49b
--- /dev/null
+++ b/module/plugins/accounts/FilefactoryCom.py
@@ -0,0 +1,54 @@
+# -*- coding: utf-8 -*-
+
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: zoidberg
+"""
+
+from module.plugins.Account import Account
+import re
+from time import mktime, strptime
+
+class FilefactoryCom(Account):
+ __name__ = "FilefactoryCom"
+ __version__ = "0.1"
+ __type__ = "account"
+ __description__ = """filefactory.com account plugin"""
+ __author_name__ = ("zoidberg")
+ __author_mail__ = ("zoidberg@mujmail.cz")
+
+ ACCOUNT_INFO_PATTERN = r'Your account is valid until the <strong>(.*?)</strong>'
+
+ def loadAccountInfo(self, user, req):
+ premium = False
+ validuntil = -1
+
+ html = req.load("http://filefactory.com/member/")
+ if "You are a FileFactory Premium Member" in html:
+ premium = True
+ found = re.search(self.ACCOUNT_INFO_PATTERN, html)
+ if found:
+ validuntil = mktime(strptime(re.sub(r"(\d)[a-z]{2} ", r"\1 ", found.group(1)),"%d %B, %Y"))
+
+ return {"premium": premium, "trafficleft": -1, "validuntil": validuntil}
+
+ def login(self, user, data, req):
+ html = req.load("http://filefactory.com/member/login.php", post={
+ "email": user,
+ "password": data["password"],
+ "redirect": "/"})
+
+ if not re.search(r'location:.*?\?login=1', req.http.header, re.I):
+ self.wrongPassword() \ No newline at end of file
diff --git a/module/plugins/hoster/FilefactoryCom.py b/module/plugins/hoster/FilefactoryCom.py
index 37b2bb7ce..17520a6c3 100644
--- a/module/plugins/hoster/FilefactoryCom.py
+++ b/module/plugins/hoster/FilefactoryCom.py
@@ -1,88 +1,95 @@
# -*- coding: utf-8 -*-
-from __future__ import with_statement
-
from module.network.RequestFactory import getURL
from module.plugins.Hoster import Hoster
from module.plugins.ReCaptcha import ReCaptcha
+from module.utils import parseFileSize
+from module.plugins.Plugin import chunks
import re
-def getInfo(urls):
- result = []
+def checkFile(plugin, urls):
+ file_info = []
+ url_dict = {}
for url in urls:
-
- # Get file info html
- # @TODO: Force responses in english language so current patterns will be right
- html = getURL(url)
- if re.search(FilefactoryCom.FILE_OFFLINE_PATTERN, html):
- result.append((url, 0, 1, url))
+ url_dict[re.search(plugin.__pattern__, url).group('id')] = (url, 0, 0, url)
+ url_ids = url_dict.keys()
+ urls = map(lambda url_id: 'http://www.filefactory.com/file/' + url_id, url_ids)
- # Name
- name = re.search(FilefactoryCom.FILE_NAME_PATTERN, html).group('name')
- m = re.search(FilefactoryCom.FILE_INFO_PATTERN, html)
+ html = getURL("http://filefactory.com/tool/links.php", post = {"func": "links", "links": "\n".join(urls)}, decode=True)
- # Size
- value = float(m.group('size'))
- units = m.group('units')
- pow = {'KB' : 1, 'MB' : 2, 'GB' : 3}[units]
- size = int(value*1024**pow)
+ for m in re.finditer(plugin.LC_INFO_PATTERN, html):
+ if m.group('id') in url_ids:
+ url_dict[m.group('id')] = (m.group('name'), parseFileSize(m.group('size')), 2, url_dict[m.group('id')][3])
+
+ for m in re.finditer(plugin.LC_OFFLINE_PATTERN, html):
+ if m.group('id') in url_ids:
+ url_dict[m.group('id')] = (url_dict[m.group('id')][0], 0, 1, url_dict[m.group('id')][3])
- # Return info
- result.append((name, size, 2, url))
-
- yield result
+ file_info = url_dict.values()
+ return file_info
+
class FilefactoryCom(Hoster):
__name__ = "FilefactoryCom"
__type__ = "hoster"
- __pattern__ = r"http://(www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+)" # URLs given out are often longer but this is the requirement
- __version__ = "0.3"
+ __pattern__ = r"http://(?:www\.)?filefactory\.com/file/(?P<id>[a-zA-Z0-9]+).*" # URLs given out are often longer but this is the requirement
+ __version__ = "0.31"
__description__ = """Filefactory.Com File Download Hoster"""
- __author_name__ = ("paulking")
+ __author_name__ = ("paulking", "zoidberg")
+
+ LC_INFO_PATTERN = r'<tr class="(even|odd)">\s*<td>\s*<a href="http://www.filefactory.com/file/(?P<id>\w+)[^"]*">(?P<name>[^<]+)</a>\s*.*\s*</td>\s*<td>(?P<size>[0-9.]+ \w+)</td>'
+ LC_OFFLINE_PATTERN = r'<li class="(even|odd)">\s*<div class="metadata">http://www.filefactory.com/file/(?P<id>\w+)/</div>'
FILE_OFFLINE_PATTERN = r'<title>File Not Found'
FILE_NAME_PATTERN = r'<span class="last">(?P<name>.*?)</span>'
FILE_INFO_PATTERN = r'<span>(?P<size>\d(\d|\.)*) (?P<units>..) file uploaded'
+
FILE_CHECK_PATTERN = r'check:\'(?P<check>.*?)\''
CAPTCHA_KEY_PATTERN = r'Recaptcha.create\("(?P<recaptchakey>.*?)",'
WAIT_PATH_PATTERN = r'path:"(?P<path>.*?)"'
WAIT_PATTERN = r'id="startWait" value="(?P<wait>\d+)"'
FILE_URL_PATTERN = r'<a href="(?P<url>.*?)" id="downloadLinkTarget">'
-
+
def setup(self):
- self.multiDL = False
+ self.multiDL = self.resumeDownloads = self.premium
def process(self, pyfile):
-
- self.pyfile = pyfile
+ # Check file
+ pyfile.name, pyfile.size, status, self.url = checkFile(self, [pyfile.url])[0]
+ if status != 2: self.offline()
+ self.logDebug("File Name: %s Size: %d" % (pyfile.name, pyfile.size))
- # Force responses language to US English
- self.req.cj.setCookie("filefactory.com", "ff_locale","")
-
- # Load main page
- self.html = self.load(self.pyfile.url, ref=False, decode=True)
-
- # Check offline
- if re.search(self.FILE_OFFLINE_PATTERN, self.html) is not None:
- self.offline()
+ # Handle downloading
+ url = self.checkDirectDownload(pyfile.url)
+ if url:
+ self.download(url)
+ else:
+ self.html = self.load(pyfile.url, decode = True)
+
+ if self.premium:
+ self.handlePremium()
+ else:
+ self.handleFree()
+
+ def checkDirectDownload(self, url):
+ for i in range(5):
+ header = self.load(url, just_header = True)
+ if 'location' in header:
+ url = header['location'].strip()
+ if not url.startswith("http://"):
+ url = "http://www.filefactory.com" + url
+ self.logDebug('URL: ' + url)
+ elif 'content-disposition' in header:
+ return url
- # File id
- self.file_id = re.match(self.__pattern__, self.pyfile.url).group('id')
- self.log.debug("%s: File id is [%s]" % (self.__name__, self.file_id))
-
- # File name
- self.pyfile.name = re.search(self.FILE_NAME_PATTERN, self.html).group('name')
-
+ return False
+
+ def handleFree(self):
# Check Id
self.check = re.search(self.FILE_CHECK_PATTERN, self.html).group('check')
self.log.debug("%s: File check code is [%s]" % (self.__name__, self.check))
-
- # Handle free downloading
- self.handleFree()
-
- def handleFree(self):
-
+
# Resolve captcha
self.log.debug("%s: File is captcha protected" % self.__name__)
id = re.search(self.CAPTCHA_KEY_PATTERN, self.html).group('recaptchakey')
@@ -138,3 +145,9 @@ class FilefactoryCom(Hoster):
self.log.debug("%s: Wrong captcha" % self.__name__)
self.invalidCaptcha()
+
+ def handlePremium(self):
+ self.fail('Please enable direct downloads')
+
+def getInfo(urls):
+ for chunk in chunks(urls, 100): yield checkFile(FilefactoryCom, chunk)
diff --git a/module/plugins/hoster/UlozTo.py b/module/plugins/hoster/UlozTo.py
index 5f482e189..a67e52d4d 100644
--- a/module/plugins/hoster/UlozTo.py
+++ b/module/plugins/hoster/UlozTo.py
@@ -21,54 +21,57 @@ from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo
def convertDecimalPrefix(m):
# decimal prefixes used in filesize and traffic
- return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','')
+ return ("%%.%df" % {'k':3,'M':6,'G':9}[m.group(2)] % float(m.group(1))).replace('.','')
class UlozTo(SimpleHoster):
__name__ = "UlozTo"
__type__ = "hoster"
- __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\d+/[^/?]*)"
- __version__ = "0.83"
+ __pattern__ = r"http://(\w*\.)?(uloz\.to|ulozto\.(cz|sk|net)|bagruj.cz|zachowajto.pl)/(?:live/)?(?P<id>\w+/[^/?]*)"
+ __version__ = "0.84"
__description__ = """uloz.to"""
__author_name__ = ("zoidberg")
- FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>'
- FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>'
- FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)]
- FILE_OFFLINE_PATTERN = ur'<title>(404 - Page not found|Stránka nenalezena|Nie można wyświetlić strony)</title>'
-
- PASSWD_PATTERN = r'<input type="password" class="text" name="file_password" id="frmfilepasswordForm-file_password" />'
- VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">'
+ FILE_NAME_PATTERN = r'<a href="#download" class="jsShowDownload">(?P<N>[^<]+)</a>'
+ FILE_SIZE_PATTERN = r'<span id="fileSize">(?P<S>[^<]+)</span>'
+ FILE_INFO_PATTERN = r'<p>File <strong>(?P<N>[^<]+)</strong> is password protected</p>'
+ FILE_OFFLINE_PATTERN = r'<title>404 - Page not found</title>|<h1 class="h1">File was banned</h1>'
+ FILE_SIZE_REPLACEMENTS = [('([0-9.]+)\s([kMG])B', convertDecimalPrefix)]
+ FILE_URL_REPLACEMENTS = [(r"(?<=http://)([^/]+)", "www.ulozto.net")]
+
+ PASSWD_PATTERN = r'<div class="passwordProtectedFile">'
+ VIPLINK_PATTERN = r'<a href="[^"]*\?disclaimer=1" class="linkVip">'
FREE_URL_PATTERN = r'<div class="freeDownloadForm"><form action="([^"]+)"'
PREMIUM_URL_PATTERN = r'<div class="downloadForm"><form action="([^"]+)"'
CAPTCHA_PATTERN = r'<img class="captcha" src="(.*?(\d+).png)" alt="" />'
-
- def process(self, pyfile):
- self.url = "http://www.ulozto.net/" + re.match(self.__pattern__, pyfile.url).group('id')
-
- self.html = self.load(self.url, decode=True)
-
- # password protected links
- passwords = self.getPassword().splitlines()
+
+ def setup(self):
+ self.multiDL = self.resumeDownload = True
+
+ def process(self, pyfile):
+ pyfile.url = re.sub(r"(?<=http://)([^/]+)", "www.ulozto.net", pyfile.url)
+ self.html = self.load(pyfile.url, decode = True, cookies = False)
+
+ passwords = self.getPassword().splitlines()
while self.PASSWD_PATTERN in self.html:
if passwords:
password = passwords.pop(0)
self.logInfo("Password protected link, trying " + password)
- self.html = self.load(self.url, get = {"do": "filepasswordForm-submit"}, post={"file_password": password, "fpwdsend": 'Odeslat'}, cookies=True)
+ self.html = self.load(pyfile.url, get = {"do": "passwordProtectedForm-submit"},
+ post={"password": password, "password_send": 'Send'}, cookies=True)
else:
self.fail("No or incorrect password")
-
- self.file_info = self.getFileInfo()
-
- # adult content
+
if re.search(self.VIPLINK_PATTERN, self.html):
- self.html = self.load(self.url, get={"disclaimer": "1"})
-
+ self.html = self.load(pyfile.url, get={"disclaimer": "1"})
+
+ self.file_info = self.getFileInfo()
+
if self.premium and self.checkTrafficLeft():
self.handlePremium()
- else:
+ else:
self.handleFree()
-
- def handleFree(self):
+
+ def handleFree(self):
parsed_url = self.findDownloadURL(premium=False)
# get and decrypt captcha
@@ -82,21 +85,21 @@ class UlozTo(SimpleHoster):
captcha_url, captcha_id = found.groups()
captcha_text = self.decryptCaptcha(captcha_url)
-
+
self.log.debug('CAPTCHA_URL:' + captcha_url + ' CAPTCHA ID:' + captcha_id + ' CAPTCHA TEXT:' + captcha_text)
- # download and check
+ # download and check
self.download(parsed_url, post={"captcha[id]": captcha_id, "captcha[text]": captcha_text, "freeDownload": "Download"}, cookies=True)
- self.doCheckDownload()
-
+ self.doCheckDownload()
+
self.setStorage("captcha_id", captcha_id)
self.setStorage("captcha_text", captcha_text)
-
+
def handlePremium(self):
parsed_url = self.findDownloadURL(premium=True)
self.download(parsed_url, post={"download": "Download"})
self.doCheckDownload()
-
+
def findDownloadURL(self, premium=False):
msg = "%s link" % ("Premium" if premium else "Free")
found = re.search(self.PREMIUM_URL_PATTERN if premium else self.FREE_URL_PATTERN, self.html)
@@ -104,13 +107,13 @@ class UlozTo(SimpleHoster):
parsed_url = "http://www.ulozto.net" + found.group(1)
self.logDebug("%s: %s" % (msg, parsed_url))
return parsed_url
-
+
def doCheckDownload(self):
check = self.checkDownload({
"wrong_captcha": re.compile(self.CAPTCHA_PATTERN),
"offline": re.compile(self.FILE_OFFLINE_PATTERN),
"passwd": self.PASSWD_PATTERN,
- "paralell_dl": u'<h2 class="center">Z Vašeho počítače se již stahuje</h2>'
+ "paralell_dl": re.compile(r'<title>Uloz.to - Ji. stahuje.</title>')
})
if check == "wrong_captcha":
@@ -123,8 +126,9 @@ class UlozTo(SimpleHoster):
elif check == "passwd":
self.fail("Wrong password")
elif check == "paralell_dl":
- self.setWait(600, True)
+ self.multiDL = False
+ self.setWait(300, True)
self.wait()
- self.retry()
+ self.retry()
-getInfo = create_getInfo(UlozTo) \ No newline at end of file
+getInfo = create_getInfo(UlozTo) \ No newline at end of file