summaryrefslogtreecommitdiffstats
path: root/pyload/network/HTTPChunk.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyload/network/HTTPChunk.py')
-rw-r--r--pyload/network/HTTPChunk.py294
1 files changed, 0 insertions, 294 deletions
diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py
deleted file mode 100644
index 41752b940..000000000
--- a/pyload/network/HTTPChunk.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# -*- coding: utf-8 -*-
-# @author: RaNaN
-
-from os import remove, stat, fsync
-from os.path import exists
-from time import sleep
-from re import search
-from pyload.utils import fs_encode
-import codecs
-import pycurl
-import urllib
-
-from pyload.network.HTTPRequest import HTTPRequest
-
-class WrongFormat(Exception):
- pass
-
-
-class ChunkInfo(object):
- def __init__(self, name):
- self.name = unicode(name)
- self.size = 0
- self.resume = False
- self.chunks = []
-
- def __repr__(self):
- ret = "ChunkInfo: %s, %s\n" % (self.name, self.size)
- for i, c in enumerate(self.chunks):
- ret += "%s# %s\n" % (i, c[1])
-
- return ret
-
- def setSize(self, size):
- self.size = int(size)
-
- def addChunk(self, name, range):
- self.chunks.append((name, range))
-
- def clear(self):
- self.chunks = []
-
- def createChunks(self, chunks):
- self.clear()
- chunk_size = self.size / chunks
-
- current = 0
- for i in range(chunks):
- end = self.size - 1 if (i == chunks - 1) else current + chunk_size
- self.addChunk("%s.chunk%s" % (self.name, i), (current, end))
- current += chunk_size + 1
-
-
- def save(self):
- fs_name = fs_encode("%s.chunks" % self.name)
- fh = codecs.open(fs_name, "w", "utf_8")
- fh.write("name:%s\n" % self.name)
- fh.write("size:%s\n" % self.size)
- for i, c in enumerate(self.chunks):
- fh.write("#%d:\n" % i)
- fh.write("\tname:%s\n" % c[0])
- fh.write("\trange:%i-%i\n" % c[1])
- fh.close()
-
- @staticmethod
- def load(name):
- fs_name = fs_encode("%s.chunks" % name)
- if not exists(fs_name):
- raise IOError()
- fh = codecs.open(fs_name, "r", "utf_8")
- name = fh.readline()[:-1]
- size = fh.readline()[:-1]
- if name.startswith("name:") and size.startswith("size:"):
- name = name[5:]
- size = size[5:]
- else:
- fh.close()
- raise WrongFormat()
- ci = ChunkInfo(name)
- ci.loaded = True
- ci.setSize(size)
- while True:
- if not fh.readline(): #skip line
- break
- name = fh.readline()[1:-1]
- range = fh.readline()[1:-1]
- if name.startswith("name:") and range.startswith("range:"):
- name = name[5:]
- range = range[6:].split("-")
- else:
- raise WrongFormat()
-
- ci.addChunk(name, (long(range[0]), long(range[1])))
- fh.close()
- return ci
-
- def remove(self):
- fs_name = fs_encode("%s.chunks" % self.name)
- if exists(fs_name): remove(fs_name)
-
- def getCount(self):
- return len(self.chunks)
-
- def getChunkName(self, index):
- return self.chunks[index][0]
-
- def getChunkRange(self, index):
- return self.chunks[index][1]
-
-
-class HTTPChunk(HTTPRequest):
- def __init__(self, id, parent, range=None, resume=False):
- self.id = id
- self.p = parent # HTTPDownload instance
- self.range = range # tuple (start, end)
- self.resume = resume
- self.log = parent.log
-
- self.size = range[1] - range[0] if range else -1
- self.arrived = 0
- self.lastURL = self.p.referer
-
- self.c = pycurl.Curl()
-
- self.header = ""
- self.headerParsed = False #indicates if the header has been processed
-
- self.fp = None #file handle
-
- self.initHandle()
- self.setInterface(self.p.options)
-
- self.BOMChecked = False # check and remove byte order mark
-
- self.rep = None
-
- self.sleep = 0.000
- self.lastSize = 0
-
- def __repr__(self):
- return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived)
-
- @property
- def cj(self):
- return self.p.cj
-
- def getHandle(self):
- """ returns a Curl handle ready to use for perform/multiperform """
-
- self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj)
- self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
- self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
-
- # request all bytes, since some servers in russia seems to have a defect arihmetic unit
-
- fs_name = fs_encode(self.p.info.getChunkName(self.id))
- if self.resume:
- self.fp = open(fs_name, "ab")
- self.arrived = self.fp.tell()
- if not self.arrived:
- self.arrived = stat(fs_name).st_size
-
- if self.range:
- #do nothing if chunk already finished
- if self.arrived + self.range[0] >= self.range[1]: return None
-
- if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything
- range = "%i-" % (self.arrived + self.range[0])
- else:
- range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1))
-
- self.log.debug("Chunked resume with range %s" % range)
- self.c.setopt(pycurl.RANGE, range)
- else:
- self.log.debug("Resume File from %i" % self.arrived)
- self.c.setopt(pycurl.RESUME_FROM, self.arrived)
-
- else:
- if self.range:
- if self.id == len(self.p.info.chunks) - 1: # see above
- range = "%i-" % self.range[0]
- else:
- range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1))
-
- self.log.debug("Chunked with range %s" % range)
- self.c.setopt(pycurl.RANGE, range)
-
- self.fp = open(fs_name, "wb")
-
- return self.c
-
- def writeHeader(self, buf):
- self.header += buf
- #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers
- # as first chunk, we will parse the headers
- if not self.range and self.header.endswith("\r\n\r\n"):
- self.parseHeader()
- elif not self.range and buf.startswith("150") and "data connection" in buf.lower(): #: ftp file size parsing
- size = search(r"(\d+) bytes", buf)
- if size:
- self.p.size = int(size.group(1))
- self.p.chunkSupport = True
-
- self.headerParsed = True
-
- def writeBody(self, buf):
- #ignore BOM, it confuses unrar
- if not self.BOMChecked:
- if [ord(b) for b in buf[:3]] == [239, 187, 191]:
- buf = buf[3:]
- self.BOMChecked = True
-
- size = len(buf)
-
- self.arrived += size
-
- self.fp.write(buf)
-
- if self.p.bucket:
- sleep(self.p.bucket.consumed(size))
- else:
- # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller
- # otherwise reduce sleep time percentual (values are based on tests)
- # So in general cpu time is saved without reducing bandwith too much
-
- if size < self.lastSize:
- self.sleep += 0.002
- else:
- self.sleep *= 0.7
-
- self.lastSize = size
-
- sleep(self.sleep)
-
- if self.range and self.arrived > self.size:
- return 0 #close if we have enough data
-
-
- def parseHeader(self):
- """parse data from recieved header"""
- for orgline in self.decodeResponse(self.header).splitlines():
- line = orgline.strip().lower()
-
- if line.startswith("accept-ranges") and "bytes" in line:
- self.p.chunkSupport = True
-
- if line.startswith("content-disposition") and ("filename=" in line or "filename*=" in line):
- if "filename*=" in line:
- # extended version according to RFC 6266 and RFC 5987.
- encoding = line.partition("*=")[2].partition("''")[0]
- name = orgline.partition("''")[2]
- name = urllib.unquote(str(name)).decode(charEnc(encoding))
- else:
- # basic version, US-ASCII only
- name = orgline.partition("filename=")[2]
-
- name = name.replace('"', "").replace("'", "").replace(";", "").replace("/", "_").strip()
- self.p.nameDisposition = name
-
- self.log.debug("Content-Disposition: %s" % name)
-
- if not self.resume and line.startswith("content-length"):
- self.p.size = int(line.split(":")[1])
-
- self.headerParsed = True
-
- def stop(self):
- """The download will not proceed after next call of writeBody"""
- self.range = [0, 0]
- self.size = 0
-
- def resetRange(self):
- """ Reset the range, so the download will load all data available """
- self.range = None
-
- def setRange(self, range):
- self.range = range
- self.size = range[1] - range[0]
-
- def flushFile(self):
- """ flush and close file """
- self.fp.flush()
- fsync(self.fp.fileno()) #make sure everything was written to disk
- self.fp.close() #needs to be closed, or merging chunks will fail
-
- def close(self):
- """ closes everything, unusable after this """
- if self.fp: self.fp.close()
- self.c.close()
- if hasattr(self, "p"): del self.p
-
-
-def charEnc(enc):
- return {'utf-8' : "utf_8",
- 'iso-8859-1': "latin_1"}.get(enc, "unknown")