summaryrefslogtreecommitdiffstats
path: root/module/network
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-12-04 13:39:42 +0100
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-12-04 13:39:42 +0100
commitd2e3afceb738af20aeb8e41f9aad12150cf1e8a7 (patch)
tree91a1ce5bc7fb51be6c3d188aed11552662d6f4bf /module/network
parentclosed #440 (diff)
downloadpyload-d2e3afceb738af20aeb8e41f9aad12150cf1e8a7.tar.xz
Better download connection handling: Detect server error earlier, fallback to single connection if possible
Diffstat (limited to 'module/network')
-rw-r--r--module/network/HTTPChunk.py22
-rw-r--r--module/network/HTTPDownload.py91
-rw-r--r--module/network/HTTPRequest.py7
3 files changed, 92 insertions, 28 deletions
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index 69eedb19c..582067aa8 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -16,7 +16,7 @@
@author: RaNaN
"""
-from os import remove, stat
+from os import remove, stat, fsync
from os.path import exists
from time import sleep
from re import search
@@ -146,6 +146,9 @@ class HTTPChunk(HTTPRequest):
self.sleep = 0.000
self.lastSize = 0
+ def __repr__(self):
+ return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived)
+
@property
def cj(self):
return self.p.cj
@@ -157,7 +160,7 @@ class HTTPChunk(HTTPRequest):
self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
- # request one byte more, since some servers in russia seems to have a defect arihmetic unit
+ # request all bytes, since some servers in russia seems to have a defect arihmetic unit
if self.resume:
self.fp = open(self.p.info.getChunkName(self.id), "ab")
@@ -259,10 +262,25 @@ class HTTPChunk(HTTPRequest):
self.headerParsed = True
+ def stop(self):
+ """The download will not proceed after next call of writeBody"""
+ self.range = [0,0]
+ self.size = 0
+
+ def resetRange(self):
+ """ Reset the range, so the download will load all data available """
+ self.range = None
+
def setRange(self, range):
self.range = range
self.size = range[1] - range[0]
+ def flushFile(self):
+ """ flush and close file """
+ self.fp.flush()
+ fsync(self.fp.fileno()) #make sure everything was written to disk
+ self.fp.close() #needs to be closed, or merging chunks will fail
+
def close(self):
""" closes everything, unusable after this """
if self.fp: self.fp.close()
diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py
index 1a2886332..13c674833 100644
--- a/module/network/HTTPDownload.py
+++ b/module/network/HTTPDownload.py
@@ -140,7 +140,7 @@ class HTTPDownload():
return self._download(chunks, False)
else:
- raise e
+ raise
finally:
self.close()
@@ -161,7 +161,7 @@ class HTTPDownload():
lastFinishCheck = 0
lastTimeCheck = 0
- chunksDone = set()
+ chunksDone = set() # list of curl handles that are finished
chunksCreated = False
done = False
if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can
@@ -202,32 +202,76 @@ class HTTPDownload():
t = time()
# reduce these calls
- while lastFinishCheck + 1 < t:
+ while lastFinishCheck + 0.5 < t:
+ # list of failed curl handles
+ failed = []
+ ex = None # save only last exception, we can only raise one anyway
+
num_q, ok_list, err_list = self.m.info_read()
for c in ok_list:
- chunksDone.add(c)
+ chunk = self.findChunk(c)
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except BadHeader, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(c)
+
for c in err_list:
curl, errno, msg = c
- #test if chunk was finished, otherwise raise the exception
+ chunk = self.findChunk(curl)
+ #test if chunk was finished
if errno != 23 or "0 !=" not in msg:
- raise pycurl.error(errno, msg)
-
- #@TODO KeyBoardInterrupts are seen as finished chunks,
- #but normally not handled to this process, only in the testcase
+ failed.append(chunk)
+ ex = pycurl.error(errno, msg)
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
+ continue
+
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except BadHeader, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(curl)
+ if not num_q: # no more infos to get
+
+ # check if init is not finished so we reset download connections
+ # note that other chunks are closed and downloaded with init too
+ if failed and init not in failed and init.c not in chunksDone:
+ self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))
+
+ #list of chunks to clean and remove
+ to_clean = filter(lambda x: x is not init, self.chunks)
+ for chunk in to_clean:
+ self.closeChunk(chunk)
+ self.chunks.remove(chunk)
+ remove(self.info.getChunkName(chunk.id))
+
+ #let first chunk load the rest and update the info file
+ init.resetRange()
+ self.info.clear()
+ self.info.addChunk("%s.chunk0" % self.filename, (0, self.size))
+ self.info.save()
+ elif failed:
+ raise ex
- chunksDone.add(curl)
- if not num_q:
lastFinishCheck = t
- if len(chunksDone) == len(self.chunks):
- done = True #all chunks loaded
+ if len(chunksDone) >= len(self.chunks):
+ if len(chunksDone) > len(self.chunks):
+ self.log.warning("Finished download chunks size incorrect, please report bug.")
+ done = True #all chunks loaded
break
if done:
break #all chunks loaded
- # calc speed once per second
+ # calc speed once per second, averaging over 3 seconds
if lastTimeCheck + 1 < t:
diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in
enumerate(self.chunks)]
@@ -247,15 +291,7 @@ class HTTPDownload():
failed = False
for chunk in self.chunks:
- try:
- chunk.verifyHeader()
- except BadHeader, e:
- failed = e.code
- remove(self.info.getChunkName(chunk.id))
-
- chunk.fp.flush()
- fsync(chunk.fp.fileno()) #make sure everything was written to disk
- chunk.fp.close() #needs to be closed, or merging chunks will fail
+ chunk.flushFile() #make sure downloads are written to disk
if failed: raise BadHeader(failed)
@@ -265,11 +301,16 @@ class HTTPDownload():
if self.progressNotify:
self.progressNotify(self.percent)
+ def findChunk(self, handle):
+ """ linear search to find a chunk (should be ok since chunk size is usually low) """
+ for chunk in self.chunks:
+ if chunk.c == handle: return chunk
+
def closeChunk(self, chunk):
try:
self.m.remove_handle(chunk.c)
- except pycurl.error:
- self.log.debug("Error removing chunk")
+ except pycurl.error, e:
+ self.log.debug("Error removing chunk: %s" % str(e))
finally:
chunk.close()
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index bd8cdd72e..e58fd114e 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -30,6 +30,7 @@ from module.plugins.Plugin import Abort
def myquote(url):
return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
+bad_headers = range(400, 404) + range(405, 418) + range(500, 506)
class BadHeader(Exception):
def __init__(self, code, content=""):
@@ -211,11 +212,15 @@ class HTTPRequest():
def verifyHeader(self):
""" raise an exceptions on bad headers """
code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
- if code in range(400, 404) or code in range(405, 418) or code in range(500, 506):
+ if code in bad_headers:
#404 will NOT raise an exception
raise BadHeader(code, self.getResponse())
return code
+ def checkHeader(self):
+ """ check if header indicates failure"""
+ return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers
+
def getResponse(self):
""" retrieve response from string io """
if self.rep is None: return ""