diff options
Diffstat (limited to 'module/network')
-rw-r--r-- | module/network/Browser.py | 10 | ||||
-rw-r--r-- | module/network/Bucket.py | 2 | ||||
-rw-r--r-- | module/network/HTTPChunk.py | 18 | ||||
-rw-r--r-- | module/network/HTTPDownload.py | 33 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 31 |
5 files changed, 70 insertions, 24 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py index 0a45c1ef4..19b6aca66 100644 --- a/module/network/Browser.py +++ b/module/network/Browser.py @@ -81,15 +81,9 @@ class Browser(object): return self.httpDownload(url, join(folder, file_name), get, post, ref, cookies) - - def load(self, url, get={}, post={}, ref=True, cookies=True): - self.log.warning("Browser: deprecated call 'load'") - - return self.getPage(url, get, post, ref, cookies) - - def getPage(self, url, get={}, post={}, ref=True, cookies=True): + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False): """ retrieves page """ - return self.http.load(url, get, post, ref, cookies) + return self.http.load(url, get, post, ref, cookies, just_header) def clean(self): """ cleanup """ diff --git a/module/network/Bucket.py b/module/network/Bucket.py index 60d8a757a..1a2d77409 100644 --- a/module/network/Bucket.py +++ b/module/network/Bucket.py @@ -34,7 +34,7 @@ class Bucket: def consumed(self, amount): """ return time the process have to sleep, after consumed specified amount """ - if self.rate < 0: return 0 + if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise self.lock.acquire() self.calc_tokens() diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index 0c184db94..5cb1d9178 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -19,6 +19,7 @@ from os import remove from os.path import exists from time import sleep +from re import search import pycurl @@ -143,15 +144,15 @@ class HTTPChunk(HTTPRequest): self.arrived = self.fp.tell() if self.range: - #print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1]) + print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1]) self.c.setopt(pycurl.RANGE, "%i-%i" % (self.arrived+self.range[0], self.range[1])) else: - #print "Resume File from %i" % self.arrived + print "Resume File from %i" % self.arrived self.c.setopt(pycurl.RESUME_FROM, self.arrived) else: if self.range: - #print "Chunked with range %i-%i" % self.range + print "Chunked with range %i-%i" % self.range self.c.setopt(pycurl.RANGE, "%i-%i" % self.range) self.fp = open(self.p.info.getChunkName(self.id), "wb") @@ -162,8 +163,15 @@ class HTTPChunk(HTTPRequest): self.header += buf #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers # as first chunk, we will parse the headers - if self.header.endswith("\r\n\r\n") and not self.range: + if not self.range and self.header.endswith("\r\n\r\n"): self.parseHeader() + elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing + size = search(r"(\d+) bytes", buf) + if size: + self.p.size = int(size.group(1)) + self.p.chunkSupport = True + + self.headerParsed = True def writeBody(self, buf): size = len(buf) @@ -187,7 +195,7 @@ class HTTPChunk(HTTPRequest): if not self.resume and line.startswith("content-length"): self.p.size = int(line.split(":")[1]) - + self.headerParsed = True def close(self): diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py index e3ac09e84..5ee33608b 100644 --- a/module/network/HTTPDownload.py +++ b/module/network/HTTPDownload.py @@ -23,12 +23,13 @@ from shutil import move import pycurl -from HTTPRequest import HTTPRequest from HTTPChunk import ChunkInfo, HTTPChunk +from HTTPRequest import BadHeader from module.plugins.Plugin import Abort -class HTTPDownload(HTTPRequest): +class HTTPDownload(): + """ loads a url http + ftp """ def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, interface=None, proxies={}): self.url = url @@ -48,10 +49,13 @@ class HTTPDownload(HTTPRequest): self.chunks = [] self.chunksDone = 0 + self.infoSaved = False # needed for 1 chunk resume + try: self.info = ChunkInfo.load(filename) self.info.resume = True #resume is only possible with valid info file self.size = self.info.size + self.infoSaved = True except IOError: self.info = ChunkInfo(filename) @@ -94,6 +98,8 @@ class HTTPDownload(HTTPRequest): break fo.write(data) fi.close() + if fo.tell() < self.info.getChunkName(i)[2]: + raise Exception("Downloaded content was smaller than expected") remove(fname) #remove chunk fo.close() @@ -112,7 +118,7 @@ class HTTPDownload(HTTPRequest): def _download(self, chunks, resume): if not resume: - self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) + self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #set a range so the header is not parsed init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) @@ -120,6 +126,12 @@ class HTTPDownload(HTTPRequest): self.m.add_handle(init.getHandle()) while 1: + if (chunks == 1) and self.chunkSupport and self.size and not self.infoSaved: + self.info.setSize(self.size) + self.info.createChunks(1) + self.info.save() + self.infoSaved = True + #need to create chunks if len(self.chunks) < chunks and self.chunkSupport and self.size: #will be set later by first chunk @@ -184,20 +196,29 @@ class HTTPDownload(HTTPRequest): if self.abort: raise Abort() - sleep(0.001) #supress busy waiting - limits dl speed to (1 / x) * buffersize + sleep(0.005) #supress busy waiting - limits dl speed to (1 / x) * buffersize self.m.select(1) + failed = False for chunk in self.chunks: + try: + chunk.verifyHeader() + except BadHeader, e: + failed = e.code + remove(self.info.getChunkName(chunk.id)) + chunk.fp.close() self.m.remove_handle(chunk.c) + if failed: raise BadHeader(failed) + self._copyChunks() def clean(self): """ cleanup """ for chunk in self.chunks: - chunk.close() - self.m.remove_handle(chunk.c) + chunk.close() + self.m.remove_handle(chunk.c) self.m.close() self.chunks = [] diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index 805305f80..b4bb0857a 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -22,9 +22,17 @@ import pycurl from urllib import quote, urlencode from cStringIO import StringIO +from module.plugins.Plugin import Abort + def myquote(url): return quote(url, safe="%/:=&?~#+!$,;'@()*[]") +class BadHeader(Exception): + def __init__(self, code): + Exception.__init__(self, "Bad server response: %s"% code) + self.code = code + + class HTTPRequest(): def __init__(self, cookies=None, interface=None, proxies=None): self.c = pycurl.Curl() @@ -35,6 +43,7 @@ class HTTPRequest(): self.lastURL = None self.lastEffectiveURL = None self.abort = False + self.code = 0 # last http code self.header = "" @@ -118,7 +127,7 @@ class HTTPRequest(): self.getCookies() - def load(self, url, get={}, post={}, referer=True, cookies=True): + def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False): """ load and returns a given page """ self.setRequestContext(url, get, post, referer, cookies) @@ -126,15 +135,29 @@ class HTTPRequest(): self.header = "" self.c.setopt(pycurl.WRITEFUNCTION, self.write) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - #@TODO header_only, raw_cookies and some things in old backend, which are apperently not needed + #@TODO raw_cookies and some things in old backend, which are apperently not needed + + if just_header: + self.c.setopt(pycurl.NOBODY, 1) + self.c.perform() + rep = self.header + else: + self.c.perform() + rep = self.getResponse() - self.c.perform() + self.code = self.verifyHeader() self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) self.addCookies() - return self.getResponse() + return rep + def verifyHeader(self): + """ raise an exceptions on bad headers """ + code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) + if code in range(400,404) or code in range(405,418) or code in range(500,506): + raise BadHeader(code) #404 will NOT raise an exception + return code def getResponse(self): """ retrieve response from string io """ |