diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-12-04 13:39:42 +0100 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-12-04 13:39:42 +0100 |
commit | d2e3afceb738af20aeb8e41f9aad12150cf1e8a7 (patch) | |
tree | 91a1ce5bc7fb51be6c3d188aed11552662d6f4bf /module/network | |
parent | closed #440 (diff) | |
download | pyload-d2e3afceb738af20aeb8e41f9aad12150cf1e8a7.tar.xz |
Better download connection handling: Detect server error earlier, fallback to single connection if possible
Diffstat (limited to 'module/network')
-rw-r--r-- | module/network/HTTPChunk.py | 22 | ||||
-rw-r--r-- | module/network/HTTPDownload.py | 91 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 7 |
3 files changed, 92 insertions, 28 deletions
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index 69eedb19c..582067aa8 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -16,7 +16,7 @@ @author: RaNaN """ -from os import remove, stat +from os import remove, stat, fsync from os.path import exists from time import sleep from re import search @@ -146,6 +146,9 @@ class HTTPChunk(HTTPRequest): self.sleep = 0.000 self.lastSize = 0 + def __repr__(self): + return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) + @property def cj(self): return self.p.cj @@ -157,7 +160,7 @@ class HTTPChunk(HTTPRequest): self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - # request one byte more, since some servers in russia seems to have a defect arihmetic unit + # request all bytes, since some servers in russia seems to have a defect arihmetic unit if self.resume: self.fp = open(self.p.info.getChunkName(self.id), "ab") @@ -259,10 +262,25 @@ class HTTPChunk(HTTPRequest): self.headerParsed = True + def stop(self): + """The download will not proceed after next call of writeBody""" + self.range = [0,0] + self.size = 0 + + def resetRange(self): + """ Reset the range, so the download will load all data available """ + self.range = None + def setRange(self, range): self.range = range self.size = range[1] - range[0] + def flushFile(self): + """ flush and close file """ + self.fp.flush() + fsync(self.fp.fileno()) #make sure everything was written to disk + self.fp.close() #needs to be closed, or merging chunks will fail + def close(self): """ closes everything, unusable after this """ if self.fp: self.fp.close() diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py index 1a2886332..13c674833 100644 --- a/module/network/HTTPDownload.py +++ b/module/network/HTTPDownload.py @@ -140,7 +140,7 @@ class HTTPDownload(): return self._download(chunks, False) else: - raise e + raise finally: self.close() @@ -161,7 +161,7 @@ class HTTPDownload(): lastFinishCheck = 0 lastTimeCheck = 0 - chunksDone = set() + chunksDone = set() # list of curl handles that are finished chunksCreated = False done = False if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can @@ -202,32 +202,76 @@ class HTTPDownload(): t = time() # reduce these calls - while lastFinishCheck + 1 < t: + while lastFinishCheck + 0.5 < t: + # list of failed curl handles + failed = [] + ex = None # save only last exception, we can only raise one anyway + num_q, ok_list, err_list = self.m.info_read() for c in ok_list: - chunksDone.add(c) + chunk = self.findChunk(c) + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except BadHeader, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(c) + for c in err_list: curl, errno, msg = c - #test if chunk was finished, otherwise raise the exception + chunk = self.findChunk(curl) + #test if chunk was finished if errno != 23 or "0 !=" not in msg: - raise pycurl.error(errno, msg) - - #@TODO KeyBoardInterrupts are seen as finished chunks, - #but normally not handled to this process, only in the testcase + failed.append(chunk) + ex = pycurl.error(errno, msg) + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) + continue + + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except BadHeader, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(curl) + if not num_q: # no more infos to get + + # check if init is not finished so we reset download connections + # note that other chunks are closed and downloaded with init too + if failed and init not in failed and init.c not in chunksDone: + self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + + #list of chunks to clean and remove + to_clean = filter(lambda x: x is not init, self.chunks) + for chunk in to_clean: + self.closeChunk(chunk) + self.chunks.remove(chunk) + remove(self.info.getChunkName(chunk.id)) + + #let first chunk load the rest and update the info file + init.resetRange() + self.info.clear() + self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) + self.info.save() + elif failed: + raise ex - chunksDone.add(curl) - if not num_q: lastFinishCheck = t - if len(chunksDone) == len(self.chunks): - done = True #all chunks loaded + if len(chunksDone) >= len(self.chunks): + if len(chunksDone) > len(self.chunks): + self.log.warning("Finished download chunks size incorrect, please report bug.") + done = True #all chunks loaded break if done: break #all chunks loaded - # calc speed once per second + # calc speed once per second, averaging over 3 seconds if lastTimeCheck + 1 < t: diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in enumerate(self.chunks)] @@ -247,15 +291,7 @@ class HTTPDownload(): failed = False for chunk in self.chunks: - try: - chunk.verifyHeader() - except BadHeader, e: - failed = e.code - remove(self.info.getChunkName(chunk.id)) - - chunk.fp.flush() - fsync(chunk.fp.fileno()) #make sure everything was written to disk - chunk.fp.close() #needs to be closed, or merging chunks will fail + chunk.flushFile() #make sure downloads are written to disk if failed: raise BadHeader(failed) @@ -265,11 +301,16 @@ class HTTPDownload(): if self.progressNotify: self.progressNotify(self.percent) + def findChunk(self, handle): + """ linear search to find a chunk (should be ok since chunk size is usually low) """ + for chunk in self.chunks: + if chunk.c == handle: return chunk + def closeChunk(self, chunk): try: self.m.remove_handle(chunk.c) - except pycurl.error: - self.log.debug("Error removing chunk") + except pycurl.error, e: + self.log.debug("Error removing chunk: %s" % str(e)) finally: chunk.close() diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index bd8cdd72e..e58fd114e 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -30,6 +30,7 @@ from module.plugins.Plugin import Abort def myquote(url): return quote(url, safe="%/:=&?~#+!$,;'@()*[]") +bad_headers = range(400, 404) + range(405, 418) + range(500, 506) class BadHeader(Exception): def __init__(self, code, content=""): @@ -211,11 +212,15 @@ class HTTPRequest(): def verifyHeader(self): """ raise an exceptions on bad headers """ code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in range(400, 404) or code in range(405, 418) or code in range(500, 506): + if code in bad_headers: #404 will NOT raise an exception raise BadHeader(code, self.getResponse()) return code + def checkHeader(self): + """ check if header indicates failure""" + return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers + def getResponse(self): """ retrieve response from string io """ if self.rep is None: return "" |