#!/usr/bin/env python # -*- coding: utf-8 -*- """ This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, see . @author: RaNaN """ from os import remove from time import sleep, time from shutil import move import pycurl from HTTPChunk import ChunkInfo, HTTPChunk from HTTPRequest import BadHeader from module.plugins.Plugin import Abort class HTTPDownload(): """ loads a url http + ftp """ def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, interface=None, proxies={}): self.url = url self.filename = filename #complete file destination, not only name self.get = get self.post = post self.referer = referer self.cj = cj #cookiejar if cookies are needed self.bucket = bucket self.interface = interface self.proxies = proxies # all arguments self.abort = False self.size = 0 self.chunks = [] self.chunksDone = 0 self.infoSaved = False # needed for 1 chunk resume try: self.info = ChunkInfo.load(filename) self.info.resume = True #resume is only possible with valid info file self.size = self.info.size self.infoSaved = True except IOError: self.info = ChunkInfo(filename) self.chunkSupport = None self.m = pycurl.CurlMulti() #needed for speed calculation self.lastChecked = 0 self.lastArrived = [] self.speeds = [] @property def speed(self): return sum(self.speeds) @property def arrived(self): return sum([c.arrived for c in self.chunks]) @property def percent(self): if not self.size: return 0 return (self.arrived * 100) / self.size def _copyChunks(self): init = self.info.getChunkName(0) #initial chunk name if len(self.chunks) > 1: fo = open(init, "rb+") #first chunkfile for i in range(1, self.info.getCount()): #input file fo.seek(self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks fname = "%s.chunk%d" % (self.filename, i) fi = open(fname, "rb") buf = 512 * 1024 while True: #copy in chunks, consumes less memory data = fi.read(buf) if not data: break fo.write(data) fi.close() if fo.tell() < self.info.getChunkRange(i)[1]: raise Exception("Downloaded content was smaller than expected") remove(fname) #remove chunk fo.close() move(init, self.filename) self.info.remove() #remove info file def download(self, chunks=1, resume=False): chunks = max(1, chunks) resume = self.info.resume and resume self.chunks = [] try: self._download(chunks, resume) finally: self.clean() def _download(self, chunks, resume): if not resume: self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #set a range so the header is not parsed init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) self.chunks.append(init) self.m.add_handle(init.getHandle()) while 1: if (chunks == 1) and self.chunkSupport and self.size and not self.infoSaved: self.info.setSize(self.size) self.info.createChunks(1) self.info.save() self.infoSaved = True #need to create chunks if len(self.chunks) < chunks and self.chunkSupport and self.size: #will be set later by first chunk if not resume: self.info.setSize(self.size) self.info.createChunks(chunks) self.info.save() chunks = self.info.getCount() init.range = self.info.getChunkRange(0) for i in range(1, chunks): c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) self.chunks.append(c) self.m.add_handle(c.getHandle()) while 1: ret, num_handles = self.m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break while 1: num_q, ok_list, err_list = self.m.info_read() for c in ok_list: self.chunksDone += 1 for c in err_list: curl, errno, msg = c #test if chunk was finished, otherwise raise the exception if errno != 23 or "0 !=" not in msg: raise pycurl.error(errno, msg) #@TODO KeyBoardInterrupts are seen as finished chunks, #but normally not handled to this process, only in the testcase self.chunksDone += 1 if not num_q: break if self.chunksDone == len(self.chunks): break #all chunks loaded # calc speed once per second t = time() if self.lastChecked + 1 < t: diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in enumerate(self.chunks)] #for i, c in enumerate(self.chunks): # diff[i] = c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) self.speeds = [float(a) / (t - self.lastChecked) for a in diff] self.lastArrived = [c.arrived for c in self.chunks] self.lastChecked = t #print "------------------------" #print self.speed / 1024, "kb/s" #print "Arrived:", self.arrived #print "Size:", self.size #print self.percent, "%" if self.abort: raise Abort() sleep(0.005) #supress busy waiting - limits dl speed to (1 / x) * buffersize self.m.select(1) failed = False for chunk in self.chunks: try: chunk.verifyHeader() except BadHeader, e: failed = e.code remove(self.info.getChunkName(chunk.id)) chunk.fp.close() self.m.remove_handle(chunk.c) if failed: raise BadHeader(failed) self._copyChunks() def clean(self): """ cleanup """ for chunk in self.chunks: chunk.close() self.m.remove_handle(chunk.c) self.m.close() self.chunks = [] if hasattr(self, "cj"): del self.cj if hasattr(self, "info"): del self.info if __name__ == "__main__": url = "http://speedtest.netcologne.de/test_10mb.bin" from Bucket import Bucket bucket = Bucket() bucket.setRate(200 * 1024) bucket = None print "starting" dwnld = HTTPDownload(url, "test_10mb.bin", bucket=bucket) dwnld.download(chunks=3, resume=True)