summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-12-28 22:51:10 +0100
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-12-28 22:51:10 +0100
commit52ba83f29d9221c05434857e79f12c44752a321e (patch)
tree42369d33d8b0116524e26c429c649ec40cec516a /module
parentworking speedlimit + proxy support, closed #197 (diff)
downloadpyload-52ba83f29d9221c05434857e79f12c44752a321e.tar.xz
more fixes and chunk+resume debug
Diffstat (limited to 'module')
-rw-r--r--module/config/default.conf2
-rw-r--r--module/network/Browser.py10
-rw-r--r--module/network/Bucket.py2
-rw-r--r--module/network/HTTPChunk.py18
-rw-r--r--module/network/HTTPDownload.py33
-rw-r--r--module/network/HTTPRequest.py31
-rw-r--r--module/plugins/Plugin.py13
-rw-r--r--module/plugins/hooks/MultiHome.py2
-rw-r--r--module/plugins/hoster/Ftp.py33
9 files changed, 87 insertions, 57 deletions
diff --git a/module/config/default.conf b/module/config/default.conf
index 143e2520f..b6ecce755 100644
--- a/module/config/default.conf
+++ b/module/config/default.conf
@@ -30,7 +30,7 @@ general - "General":
bool skip_existing : "Skip already existing files" = False
int renice : "CPU Priority" = 0
download - "Download":
- int chunks : "Max connections for one download" = 4
+ int chunks : "Max connections for one download" = 3
int max_downloads : "Max Parallel Downloads" = 3
int max_speed : "Max Download Speed kb/s" = -1
bool limit_speed : "Limit Download Speed" = False
diff --git a/module/network/Browser.py b/module/network/Browser.py
index 0a45c1ef4..19b6aca66 100644
--- a/module/network/Browser.py
+++ b/module/network/Browser.py
@@ -81,15 +81,9 @@ class Browser(object):
return self.httpDownload(url, join(folder, file_name), get, post, ref, cookies)
-
- def load(self, url, get={}, post={}, ref=True, cookies=True):
- self.log.warning("Browser: deprecated call 'load'")
-
- return self.getPage(url, get, post, ref, cookies)
-
- def getPage(self, url, get={}, post={}, ref=True, cookies=True):
+ def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False):
""" retrieves page """
- return self.http.load(url, get, post, ref, cookies)
+ return self.http.load(url, get, post, ref, cookies, just_header)
def clean(self):
""" cleanup """
diff --git a/module/network/Bucket.py b/module/network/Bucket.py
index 60d8a757a..1a2d77409 100644
--- a/module/network/Bucket.py
+++ b/module/network/Bucket.py
@@ -34,7 +34,7 @@ class Bucket:
def consumed(self, amount):
""" return time the process have to sleep, after consumed specified amount """
- if self.rate < 0: return 0
+ if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise
self.lock.acquire()
self.calc_tokens()
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index 0c184db94..5cb1d9178 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -19,6 +19,7 @@
from os import remove
from os.path import exists
from time import sleep
+from re import search
import pycurl
@@ -143,15 +144,15 @@ class HTTPChunk(HTTPRequest):
self.arrived = self.fp.tell()
if self.range:
- #print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1])
+ print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1])
self.c.setopt(pycurl.RANGE, "%i-%i" % (self.arrived+self.range[0], self.range[1]))
else:
- #print "Resume File from %i" % self.arrived
+ print "Resume File from %i" % self.arrived
self.c.setopt(pycurl.RESUME_FROM, self.arrived)
else:
if self.range:
- #print "Chunked with range %i-%i" % self.range
+ print "Chunked with range %i-%i" % self.range
self.c.setopt(pycurl.RANGE, "%i-%i" % self.range)
self.fp = open(self.p.info.getChunkName(self.id), "wb")
@@ -162,8 +163,15 @@ class HTTPChunk(HTTPRequest):
self.header += buf
#@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers
# as first chunk, we will parse the headers
- if self.header.endswith("\r\n\r\n") and not self.range:
+ if not self.range and self.header.endswith("\r\n\r\n"):
self.parseHeader()
+ elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing
+ size = search(r"(\d+) bytes", buf)
+ if size:
+ self.p.size = int(size.group(1))
+ self.p.chunkSupport = True
+
+ self.headerParsed = True
def writeBody(self, buf):
size = len(buf)
@@ -187,7 +195,7 @@ class HTTPChunk(HTTPRequest):
if not self.resume and line.startswith("content-length"):
self.p.size = int(line.split(":")[1])
-
+
self.headerParsed = True
def close(self):
diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py
index e3ac09e84..5ee33608b 100644
--- a/module/network/HTTPDownload.py
+++ b/module/network/HTTPDownload.py
@@ -23,12 +23,13 @@ from shutil import move
import pycurl
-from HTTPRequest import HTTPRequest
from HTTPChunk import ChunkInfo, HTTPChunk
+from HTTPRequest import BadHeader
from module.plugins.Plugin import Abort
-class HTTPDownload(HTTPRequest):
+class HTTPDownload():
+ """ loads a url http + ftp """
def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None,
interface=None, proxies={}):
self.url = url
@@ -48,10 +49,13 @@ class HTTPDownload(HTTPRequest):
self.chunks = []
self.chunksDone = 0
+ self.infoSaved = False # needed for 1 chunk resume
+
try:
self.info = ChunkInfo.load(filename)
self.info.resume = True #resume is only possible with valid info file
self.size = self.info.size
+ self.infoSaved = True
except IOError:
self.info = ChunkInfo(filename)
@@ -94,6 +98,8 @@ class HTTPDownload(HTTPRequest):
break
fo.write(data)
fi.close()
+ if fo.tell() < self.info.getChunkName(i)[2]:
+ raise Exception("Downloaded content was smaller than expected")
remove(fname) #remove chunk
fo.close()
@@ -112,7 +118,7 @@ class HTTPDownload(HTTPRequest):
def _download(self, chunks, resume):
if not resume:
- self.info.addChunk("%s.chunk0" % self.filename, (0, 0))
+ self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #set a range so the header is not parsed
init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed)
@@ -120,6 +126,12 @@ class HTTPDownload(HTTPRequest):
self.m.add_handle(init.getHandle())
while 1:
+ if (chunks == 1) and self.chunkSupport and self.size and not self.infoSaved:
+ self.info.setSize(self.size)
+ self.info.createChunks(1)
+ self.info.save()
+ self.infoSaved = True
+
#need to create chunks
if len(self.chunks) < chunks and self.chunkSupport and self.size: #will be set later by first chunk
@@ -184,20 +196,29 @@ class HTTPDownload(HTTPRequest):
if self.abort:
raise Abort()
- sleep(0.001) #supress busy waiting - limits dl speed to (1 / x) * buffersize
+ sleep(0.005) #supress busy waiting - limits dl speed to (1 / x) * buffersize
self.m.select(1)
+ failed = False
for chunk in self.chunks:
+ try:
+ chunk.verifyHeader()
+ except BadHeader, e:
+ failed = e.code
+ remove(self.info.getChunkName(chunk.id))
+
chunk.fp.close()
self.m.remove_handle(chunk.c)
+ if failed: raise BadHeader(failed)
+
self._copyChunks()
def clean(self):
""" cleanup """
for chunk in self.chunks:
- chunk.close()
- self.m.remove_handle(chunk.c)
+ chunk.close()
+ self.m.remove_handle(chunk.c)
self.m.close()
self.chunks = []
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index 805305f80..b4bb0857a 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -22,9 +22,17 @@ import pycurl
from urllib import quote, urlencode
from cStringIO import StringIO
+from module.plugins.Plugin import Abort
+
def myquote(url):
return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
+class BadHeader(Exception):
+ def __init__(self, code):
+ Exception.__init__(self, "Bad server response: %s"% code)
+ self.code = code
+
+
class HTTPRequest():
def __init__(self, cookies=None, interface=None, proxies=None):
self.c = pycurl.Curl()
@@ -35,6 +43,7 @@ class HTTPRequest():
self.lastURL = None
self.lastEffectiveURL = None
self.abort = False
+ self.code = 0 # last http code
self.header = ""
@@ -118,7 +127,7 @@ class HTTPRequest():
self.getCookies()
- def load(self, url, get={}, post={}, referer=True, cookies=True):
+ def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False):
""" load and returns a given page """
self.setRequestContext(url, get, post, referer, cookies)
@@ -126,15 +135,29 @@ class HTTPRequest():
self.header = ""
self.c.setopt(pycurl.WRITEFUNCTION, self.write)
self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
- #@TODO header_only, raw_cookies and some things in old backend, which are apperently not needed
+ #@TODO raw_cookies and some things in old backend, which are apperently not needed
+
+ if just_header:
+ self.c.setopt(pycurl.NOBODY, 1)
+ self.c.perform()
+ rep = self.header
+ else:
+ self.c.perform()
+ rep = self.getResponse()
- self.c.perform()
+ self.code = self.verifyHeader()
self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
self.addCookies()
- return self.getResponse()
+ return rep
+ def verifyHeader(self):
+ """ raise an exceptions on bad headers """
+ code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
+ if code in range(400,404) or code in range(405,418) or code in range(500,506):
+ raise BadHeader(code) #404 will NOT raise an exception
+ return code
def getResponse(self):
""" retrieve response from string io """
diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py
index 3da6e5116..a7ee72fd2 100644
--- a/module/plugins/Plugin.py
+++ b/module/plugins/Plugin.py
@@ -78,7 +78,6 @@ class Plugin(object):
__author_name__ = ("RaNaN", "spoob", "mkaay")
__author_mail__ = ("RaNaN@pyload.org", "spoob@pyload.org", "mkaay@mkaay.de")
-
def __init__(self, pyfile):
self.config = pyfile.m.core.config
self.core = pyfile.m.core
@@ -91,8 +90,6 @@ class Plugin(object):
self.waitUntil = 0 # time() + wait in seconds
self.waiting = False
-
- self.premium = False
self.ocr = None # captcha reader instance
self.account = pyfile.m.core.accountManager.getAccountPlugin(self.__name__) # account handler instance
@@ -100,7 +97,8 @@ class Plugin(object):
if self.account:
self.user, data = self.account.selectAccount()
self.req = self.account.getAccountRequest(self.user)
- #self.req.canContinue = True
+ self.chunkLimit = -1 #enable chunks for all premium plugins
+ self.resumeDownload = True #also enable resume (both will be ignored if server dont accept chunks)
else:
self.req = pyfile.m.core.requestFactory.getRequest(self.__name__)
@@ -286,7 +284,10 @@ class Plugin(object):
""" returns the content loaded """
if self.pyfile.abort: raise Abort
- res = self.req.getPage(url, get, post, ref, cookies)
+ if raw_cookies: self.log.warning("Deprecated argument raw cookies: %s" % raw_cookies)
+ if no_post_encode: self.log.warning("Deprecated argument no_post_encode: %s" % no_post_encode)
+
+ res = self.req.load(url, get, post, ref, cookies, just_header)
if self.core.debug:
from inspect import currentframe
frame = currentframe()
@@ -329,7 +330,7 @@ class Plugin(object):
name = self.pyfile.name.encode(sys.getfilesystemencoding(), "replace")
filename = join(location, name)
- self.req.httpDownload(url, filename, get=get, post=post, chunks=self.getChunkCount(), resume=self.resumeDownload)
+ self.req.httpDownload(url, filename, get=get, post=post, ref=ref, chunks=self.getChunkCount(), resume=self.resumeDownload)
newname = basename(filename)
diff --git a/module/plugins/hooks/MultiHome.py b/module/plugins/hooks/MultiHome.py
index 88491f6f4..4678412bf 100644
--- a/module/plugins/hooks/MultiHome.py
+++ b/module/plugins/hooks/MultiHome.py
@@ -34,7 +34,7 @@ class MultiHome(Hook):
self.interfaces = []
self.parseInterfaces(self.getConfig("interfaces").split(";"))
if not self.interfaces:
- self.parseInterfaces([self.config["general"]["download_interface"]])
+ self.parseInterfaces([self.config["download"]["interface"]])
self.setConfig("interfaces", self.toConfig())
def toConfig(self):
diff --git a/module/plugins/hoster/Ftp.py b/module/plugins/hoster/Ftp.py
index 9303b00c8..ca0689c62 100644
--- a/module/plugins/hoster/Ftp.py
+++ b/module/plugins/hoster/Ftp.py
@@ -14,17 +14,10 @@
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
- @author: jeix
+ @author: jeix
@author: mkaay
"""
-import logging
-from os.path import exists
-from os.path import join
-from os.path import exists
-from os import makedirs
-import sys
-
from module.plugins.Hoster import Hoster
@@ -38,21 +31,11 @@ class Ftp(Hoster):
__author_mail__ = ("jeix@hasnomail.com", "mkaay@mkaay.de")
def process(self, pyfile):
- self.req = pyfile.m.core.requestFactory.getRequest(self.__name__, type="FTP")
+ self.req = pyfile.m.core.requestFactory.getRequest(self.__name__)
pyfile.name = self.pyfile.url.rpartition('/')[2]
-
- self.doDownload(pyfile.url, pyfile.name)
-
- def doDownload(self, url, filename):
- self.pyfile.setStatus("downloading")
-
- download_folder = self.core.config['general']['download_folder']
- location = join(download_folder, self.pyfile.package().folder.decode(sys.getfilesystemencoding()))
- if not exists(location):
- makedirs(location)
-
- newname = self.req.download(str(url), join(location, filename.decode(sys.getfilesystemencoding())))
- self.pyfile.size = self.req.dl_size
-
- if newname:
- self.pyfile.name = newname
+
+ self.chunkLimit = -1
+ self.resumeDownload = True
+
+ self.download(pyfile.url)
+