diff options
-rw-r--r-- | pyload/network/Browser.py | 153 | ||||
-rw-r--r-- | pyload/network/HTTPChunk.py | 298 | ||||
-rw-r--r-- | pyload/network/HTTPDownload.py | 338 | ||||
-rw-r--r-- | pyload/network/HTTPRequest.py | 324 | ||||
-rw-r--r-- | pyload/threads/DownloadThread.py | 4 |
5 files changed, 2 insertions, 1115 deletions
diff --git a/pyload/network/Browser.py b/pyload/network/Browser.py deleted file mode 100644 index 262adaebd..000000000 --- a/pyload/network/Browser.py +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from logging import getLogger - -from HTTPRequest import HTTPRequest -from HTTPDownload import HTTPDownload - -# @ Deprecated -class Browser(object): - __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") - - def __init__(self, bucket=None, options={}): - self.log = getLogger("log") - - self.options = options #holds pycurl options - self.bucket = bucket - - self.cj = None # needs to be set later - self._size = 0 - - self.renewHTTPRequest() - self.dl = None - - - def renewHTTPRequest(self): - if hasattr(self, "http"): self.http.close() - self.http = HTTPRequest(self.cj, self.options) - - def setLastURL(self, val): - self.http.lastURL = val - - # tunnel some attributes from HTTP Request to Browser - lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) - lastURL = property(lambda self: self.http.lastURL, setLastURL) - code = property(lambda self: self.http.code) - cookieJar = property(lambda self: self.cj) - - def setCookieJar(self, cj): - self.cj = cj - self.http.cj = cj - - @property - def speed(self): - if self.dl: - return self.dl.speed - return 0 - - @property - def size(self): - if self._size: - return self._size - if self.dl: - return self.dl.size - return 0 - - @property - def name(self): - if self.dl: - return self.dl.name - else: - return "" - - @property - def arrived(self): - if self.dl: - return self.dl.arrived - return 0 - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - def clearCookies(self): - if self.cj: - self.cj.clear() - self.http.clearCookies() - - def clearReferer(self): - self.http.lastURL = None - - def abortDownloads(self): - self.http.abort = True - if self.dl: - self._size = self.dl.size - self.dl.abort = True - - def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False, - disposition=False): - """ this can also download ftp """ - self._size = 0 - self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, - self.cj if cookies else None, self.bucket, self.options, disposition) - name = self.dl.download(chunks, resume) - self._size = self.dl.size - - self.dl = None - - return name - - def load(self, *args, **kwargs): - """ retrieves page """ - return self.http.load(*args, **kwargs) - - def putHeader(self, name, value): - """ add a header to the request """ - self.http.putHeader(name, value) - - def addAuth(self, pwd): - """Adds user and pw for http auth - - :param pwd: string, user:password - """ - self.options["auth"] = pwd - self.renewHTTPRequest() #we need a new request - - def removeAuth(self): - if "auth" in self.options: del self.options["auth"] - self.renewHTTPRequest() - - def setOption(self, name, value): - """Adds an option to the request, see HTTPRequest for existing ones""" - self.options[name] = value - - def deleteOption(self, name): - if name in self.options: del self.options[name] - - def clearHeaders(self): - self.http.clearHeaders() - - def close(self): - """ cleanup """ - if hasattr(self, "http"): - self.http.close() - del self.http - if hasattr(self, "dl"): - del self.dl - if hasattr(self, "cj"): - del self.cj - -if __name__ == "__main__": - browser = Browser()#proxies={"socks5": "localhost:5000"}) - ip = "http://www.whatismyip.com/automation/n09230945.asp" - #browser.getPage("http://google.com/search?q=bar") - #browser.getPage("https://encrypted.google.com/") - #print browser.getPage(ip) - #print browser.getRedirectLocation("http://google.com/") - #browser.getPage("https://encrypted.google.com/") - #browser.getPage("http://google.com/search?q=bar") - - browser.httpDownload("http://speedtest.netcologne.de/test_10mb.bin", "test_10mb.bin") - diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py deleted file mode 100644 index 4389aef28..000000000 --- a/pyload/network/HTTPChunk.py +++ /dev/null @@ -1,298 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" -from os import remove, stat, fsync -from os.path import exists -from time import sleep -from re import search - -import codecs -import pycurl - -from pyload.utils import remove_chars -from pyload.utils.fs import fs_encode - -from HTTPRequest import HTTPRequest - -class WrongFormat(Exception): - pass - - -class ChunkInfo(): - def __init__(self, name): - self.name = unicode(name) - self.size = 0 - self.resume = False - self.chunks = [] - - def __repr__(self): - ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) - for i, c in enumerate(self.chunks): - ret += "%s# %s\n" % (i, c[1]) - - return ret - - def setSize(self, size): - self.size = int(size) - - def addChunk(self, name, range): - self.chunks.append((name, range)) - - def clear(self): - self.chunks = [] - - def createChunks(self, chunks): - self.clear() - chunk_size = self.size / chunks - - current = 0 - for i in range(chunks): - end = self.size - 1 if (i == chunks - 1) else current + chunk_size - self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) - current += chunk_size + 1 - - - def save(self): - fs_name = fs_encode("%s.chunks" % self.name) - fh = codecs.open(fs_name, "w", "utf_8") - fh.write("name:%s\n" % self.name) - fh.write("size:%s\n" % self.size) - for i, c in enumerate(self.chunks): - fh.write("#%d:\n" % i) - fh.write("\tname:%s\n" % c[0]) - fh.write("\trange:%i-%i\n" % c[1]) - fh.close() - - @staticmethod - def load(name): - fs_name = fs_encode("%s.chunks" % name) - if not exists(fs_name): - raise IOError() - fh = codecs.open(fs_name, "r", "utf_8") - name = fh.readline()[:-1] - size = fh.readline()[:-1] - if name.startswith("name:") and size.startswith("size:"): - name = name[5:] - size = size[5:] - else: - fh.close() - raise WrongFormat() - ci = ChunkInfo(name) - ci.loaded = True - ci.setSize(size) - while True: - if not fh.readline(): #skip line - break - name = fh.readline()[1:-1] - range = fh.readline()[1:-1] - if name.startswith("name:") and range.startswith("range:"): - name = name[5:] - range = range[6:].split("-") - else: - raise WrongFormat() - - ci.addChunk(name, (long(range[0]), long(range[1]))) - fh.close() - return ci - - def remove(self): - fs_name = fs_encode("%s.chunks" % self.name) - if exists(fs_name): remove(fs_name) - - def getCount(self): - return len(self.chunks) - - def getChunkName(self, index): - return self.chunks[index][0] - - def getChunkRange(self, index): - return self.chunks[index][1] - - -class HTTPChunk(HTTPRequest): - def __init__(self, id, parent, range=None, resume=False): - self.id = id - self.p = parent # HTTPDownload instance - self.range = range # tuple (start, end) - self.resume = resume - self.log = parent.log - - self.size = range[1] - range[0] if range else -1 - self.arrived = 0 - self.lastURL = self.p.referer - - self.c = pycurl.Curl() - - self.header = "" - self.headerParsed = False #indicates if the header has been processed - - self.fp = None #file handle - - self.initHandle() - self.setInterface(self.p.options) - - self.BOMChecked = False # check and remove byte order mark - - self.rep = None - - self.sleep = 0.000 - self.lastSize = 0 - - def __repr__(self): - return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) - - @property - def cj(self): - return self.p.cj - - def getHandle(self): - """ returns a Curl handle ready to use for perform/multiperform """ - - self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) - self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - # request all bytes, since some servers in russia seems to have a defect arihmetic unit - - fs_name = fs_encode(self.p.info.getChunkName(self.id)) - if self.resume: - self.fp = open(fs_name, "ab") - self.arrived = self.fp.tell() - if not self.arrived: - self.arrived = stat(fs_name).st_size - - if self.range: - #do nothing if chunk already finished - if self.arrived + self.range[0] >= self.range[1]: return None - - if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything - range = "%i-" % (self.arrived + self.range[0]) - else: - range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked resume with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - else: - self.log.debug("Resume File from %i" % self.arrived) - self.c.setopt(pycurl.RESUME_FROM, self.arrived) - - else: - if self.range: - if self.id == len(self.p.info.chunks) - 1: # see above - range = "%i-" % self.range[0] - else: - range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - - self.fp = open(fs_name, "wb") - - return self.c - - def writeHeader(self, buf): - self.header += buf - #@TODO forward headers?, this is possibly unneeded, when we just parse valid 200 headers - # as first chunk, we will parse the headers - if not self.range and self.header.endswith("\r\n\r\n"): - self.parseHeader() - elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing - size = search(r"(\d+) bytes", buf) - if size: - self.p.size = int(size.group(1)) - self.p.chunkSupport = True - - self.headerParsed = True - - def writeBody(self, buf): - #ignore BOM, it confuses unrar - if not self.BOMChecked: - if [ord(b) for b in buf[:3]] == [239, 187, 191]: - buf = buf[3:] - self.BOMChecked = True - - size = len(buf) - - self.arrived += size - - self.fp.write(buf) - - if self.p.bucket: - sleep(self.p.bucket.consumed(size)) - else: - # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller - # otherwise reduce sleep time percentile (values are based on tests) - # So in general cpu time is saved without reducing bandwidth too much - - if size < self.lastSize: - self.sleep += 0.002 - else: - self.sleep *= 0.7 - - self.lastSize = size - - sleep(self.sleep) - - if self.range and self.arrived > self.size: - return 0 #close if we have enough data - - - def parseHeader(self): - """parse data from received header""" - for orgline in self.decodeResponse(self.header).splitlines(): - line = orgline.strip().lower() - if line.startswith("accept-ranges") and "bytes" in line: - self.p.chunkSupport = True - - if "content-disposition" in line: - - m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", line) - if m: - name = remove_chars(m.groupdict()['name'], "\"';/").strip() - self.p._name = name - self.log.debug("Content-Disposition: %s" % name) - - if not self.resume and line.startswith("content-length"): - self.p.size = int(line.split(":")[1]) - - self.headerParsed = True - - def stop(self): - """The download will not proceed after next call of writeBody""" - self.range = [0,0] - self.size = 0 - - def resetRange(self): - """ Reset the range, so the download will load all data available """ - self.range = None - - def setRange(self, range): - self.range = range - self.size = range[1] - range[0] - - def flushFile(self): - """ flush and close file """ - self.fp.flush() - fsync(self.fp.fileno()) #make sure everything was written to disk - self.fp.close() #needs to be closed, or merging chunks will fail - - def close(self): - """ closes everything, unusable after this """ - if self.fp: self.fp.close() - self.c.close() - if hasattr(self, "p"): del self.p diff --git a/pyload/network/HTTPDownload.py b/pyload/network/HTTPDownload.py deleted file mode 100644 index 04bf2363a..000000000 --- a/pyload/network/HTTPDownload.py +++ /dev/null @@ -1,338 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -from os import remove -from os.path import dirname -from time import time -from shutil import move -from logging import getLogger - -import pycurl - -from HTTPChunk import ChunkInfo, HTTPChunk -from HTTPRequest import BadHeader - -from pyload.plugins.Base import Abort -from pyload.utils.fs import save_join, fs_encode - -# TODO: save content-disposition for resuming - -class HTTPDownload(): - """ loads an url, http + ftp supported """ - - def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, - options={}, disposition=False): - self.url = url - self.filename = filename #complete file destination, not only name - self.get = get - self.post = post - self.referer = referer - self.cj = cj #cookiejar if cookies are needed - self.bucket = bucket - self.options = options - self.disposition = disposition - # all arguments - - self.abort = False - self.size = 0 - self._name = ""# will be parsed from content disposition - - self.chunks = [] - - self.log = getLogger("log") - - try: - self.info = ChunkInfo.load(filename) - self.info.resume = True #resume is only possible with valid info file - self.size = self.info.size - self.infoSaved = True - except IOError: - self.info = ChunkInfo(filename) - - self.chunkSupport = None - self.m = pycurl.CurlMulti() - - #needed for speed calculation - self.lastArrived = [] - self.speeds = [] - self.lastSpeeds = [0, 0] - - @property - def speed(self): - last = [sum(x) for x in self.lastSpeeds if x] - return (sum(self.speeds) + sum(last)) / (1 + len(last)) - - @property - def arrived(self): - return sum([c.arrived for c in self.chunks]) - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - @property - def name(self): - return self._name if self.disposition else "" - - def _copyChunks(self): - init = fs_encode(self.info.getChunkName(0)) #initial chunk name - - if self.info.getCount() > 1: - fo = open(init, "rb+") #first chunkfile - for i in range(1, self.info.getCount()): - #input file - fo.seek( - self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks - fname = fs_encode("%s.chunk%d" % (self.filename, i)) - fi = open(fname, "rb") - buf = 32 * 1024 - while True: #copy in chunks, consumes less memory - data = fi.read(buf) - if not data: - break - fo.write(data) - fi.close() - if fo.tell() < self.info.getChunkRange(i)[1]: - fo.close() - remove(init) - self.info.remove() #there are probably invalid chunks - raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") - remove(fname) #remove chunk - fo.close() - - if self.name: - self.filename = save_join(dirname(self.filename), self.name) - - move(init, fs_encode(self.filename)) - self.info.remove() #remove info file - - def download(self, chunks=1, resume=False): - """ returns new filename or None """ - - chunks = max(1, chunks) - resume = self.info.resume and resume - - try: - self._download(chunks, resume) - except pycurl.error, e: - #code 33 - no resume - code = e.args[0] - if code == 33: - # try again without resume - self.log.debug("Errno 33 -> Restart without resume") - - #remove old handles - for chunk in self.chunks: - self.closeChunk(chunk) - - return self._download(chunks, False) - else: - raise - finally: - self.close() - - return self.name - - def _download(self, chunks, resume): - if not resume: - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry - - self.chunks = [] - - init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) - - self.chunks.append(init) - self.m.add_handle(init.getHandle()) - - lastFinishCheck = 0 - lastTimeCheck = 0 - chunksDone = set() # list of curl handles that are finished - chunksCreated = False - done = False - if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can - self.chunkSupport = True - - while 1: - #need to create chunks - if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk - - if not resume: - self.info.setSize(self.size) - self.info.createChunks(chunks) - self.info.save() - - chunks = self.info.getCount() - - init.setRange(self.info.getChunkRange(0)) - - for i in range(1, chunks): - c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) - - handle = c.getHandle() - if handle: - self.chunks.append(c) - self.m.add_handle(handle) - else: - #close immediately - self.log.debug("Invalid curl handle -> closed") - c.close() - - chunksCreated = True - - while 1: - ret, num_handles = self.m.perform() - if ret != pycurl.E_CALL_MULTI_PERFORM: - break - - t = time() - - # reduce these calls - # when num_q is 0, the loop is exited - while lastFinishCheck + 0.5 < t: - # list of failed curl handles - failed = [] - ex = None # save only last exception, we can only raise one anyway - - num_q, ok_list, err_list = self.m.info_read() - for c in ok_list: - chunk = self.findChunk(c) - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(c) - - for c in err_list: - curl, errno, msg = c - chunk = self.findChunk(curl) - #test if chunk was finished - if errno != 23 or "0 !=" not in msg: - failed.append(chunk) - ex = pycurl.error(errno, msg) - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) - continue - - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(curl) - if not num_q: # no more info to get - - # check if init is not finished so we reset download connections - # note that other chunks are closed and everything downloaded with initial connection - if failed and init not in failed and init.c not in chunksDone: - self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) - - #list of chunks to clean and remove - to_clean = filter(lambda x: x is not init, self.chunks) - for chunk in to_clean: - self.closeChunk(chunk) - self.chunks.remove(chunk) - remove(fs_encode(self.info.getChunkName(chunk.id))) - - #let first chunk load the rest and update the info file - init.resetRange() - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) - self.info.save() - elif failed: - raise ex - - lastFinishCheck = t - - if len(chunksDone) >= len(self.chunks): - if len(chunksDone) > len(self.chunks): - self.log.warning("Finished download chunks size incorrect, please report bug.") - done = True #all chunks loaded - - break - - if done: - break #all chunks loaded - - # calc speed once per second, averaging over 3 seconds - if lastTimeCheck + 1 < t: - diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in - enumerate(self.chunks)] - - self.lastSpeeds[1] = self.lastSpeeds[0] - self.lastSpeeds[0] = self.speeds - self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] - self.lastArrived = [c.arrived for c in self.chunks] - lastTimeCheck = t - - if self.abort: - raise Abort() - - self.m.select(1) - - for chunk in self.chunks: - chunk.flushFile() #make sure downloads are written to disk - - self._copyChunks() - - def findChunk(self, handle): - """ linear search to find a chunk (should be ok since chunk size is usually low) """ - for chunk in self.chunks: - if chunk.c == handle: return chunk - - def closeChunk(self, chunk): - try: - self.m.remove_handle(chunk.c) - except pycurl.error, e: - self.log.debug("Error removing chunk: %s" % str(e)) - finally: - chunk.close() - - def close(self): - """ cleanup """ - for chunk in self.chunks: - self.closeChunk(chunk) - - self.chunks = [] - if hasattr(self, "m"): - self.m.close() - del self.m - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "info"): - del self.info - -if __name__ == "__main__": - url = "http://speedtest.netcologne.de/test_100mb.bin" - - from Bucket import Bucket - - bucket = Bucket() - bucket.setRate(200 * 1024) - bucket = None - - print "starting" - - dwnld = HTTPDownload(url, "test_100mb.bin", bucket=bucket) - dwnld.download(chunks=3, resume=True) diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py deleted file mode 100644 index ebf2c3132..000000000 --- a/pyload/network/HTTPRequest.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -import pycurl - -from codecs import getincrementaldecoder, lookup, BOM_UTF8 -from urllib import quote, urlencode -from httplib import responses -from logging import getLogger -from cStringIO import StringIO - -from pyload.plugins.Base import Abort - -def myquote(url): - return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") - -def myurlencode(data): - data = dict(data) - return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \ - y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) - -bad_headers = range(400, 404) + range(405, 418) + range(500, 506) - -class BadHeader(Exception): - def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s" % (code, responses.get(int(code), "Unknown Header"))) - self.code = code - self.content = content - - -class HTTPRequest(): - def __init__(self, cookies=None, options=None): - self.c = pycurl.Curl() - self.rep = StringIO() - - self.cj = cookies #cookiejar - - self.lastURL = None - self.lastEffectiveURL = None - self.abort = False - self.code = 0 # last http code - - self.header = "" - - self.headers = [] #temporary request header - - self.initHandle() - self.setInterface(options) - self.setOptions(options) - - self.c.setopt(pycurl.WRITEFUNCTION, self.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - self.log = getLogger("log") - - - def initHandle(self): - """ sets common options to curl handle """ - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.MAXREDIRS, 5) - self.c.setopt(pycurl.CONNECTTIMEOUT, 30) - self.c.setopt(pycurl.NOSIGNAL, 1) - self.c.setopt(pycurl.NOPROGRESS, 1) - if hasattr(pycurl, "AUTOREFERER"): - self.c.setopt(pycurl.AUTOREFERER, 1) - self.c.setopt(pycurl.SSL_VERIFYPEER, 0) - # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download - self.c.setopt(pycurl.LOW_SPEED_TIME, 45) - self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) - - #self.c.setopt(pycurl.VERBOSE, 1) - - self.c.setopt(pycurl.USERAGENT, - "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") - if pycurl.version_info()[7]: - self.c.setopt(pycurl.ENCODING, "gzip, deflate") - self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US,en", - "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300", - "Expect:"]) - - def setInterface(self, options): - - interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - - if interface and interface.lower() != "none": - self.c.setopt(pycurl.INTERFACE, str(interface)) - - if proxy: - if proxy["type"] == "socks4": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) - elif proxy["type"] == "socks5": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) - else: - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - - self.c.setopt(pycurl.PROXY, str(proxy["address"])) - self.c.setopt(pycurl.PROXYPORT, proxy["port"]) - - if proxy["username"]: - self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) - - if ipv6: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) - else: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) - - if "auth" in options: - self.c.setopt(pycurl.USERPWD, str(options["auth"])) - - if "timeout" in options: - self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) - - def setOptions(self, options): - """ Sets same options as available in pycurl """ - for k, v in options.iteritems(): - if hasattr(pycurl, k): - self.c.setopt(getattr(pycurl, k), v) - - def addCookies(self): - """ put cookies from curl handle to cj """ - if self.cj: - self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) - - def getCookies(self): - """ add cookies from cj to curl handle """ - if self.cj: - for c in self.cj.getCookies(): - self.c.setopt(pycurl.COOKIELIST, c) - return - - def clearCookies(self): - self.c.setopt(pycurl.COOKIELIST, "") - - def setRequestContext(self, url, get, post, referer, cookies, multipart=False): - """ sets everything needed for the request """ - - url = myquote(url) - - if get: - get = urlencode(get) - url = "%s?%s" % (url, get) - - self.c.setopt(pycurl.URL, url) - self.lastURL = url - - if post: - self.c.setopt(pycurl.POST, 1) - if not multipart: - if type(post) == unicode: - post = str(post) #unicode not allowed - elif type(post) == str: - pass - else: - post = myurlencode(post) - - self.c.setopt(pycurl.POSTFIELDS, post) - else: - post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] - self.c.setopt(pycurl.HTTPPOST, post) - else: - self.c.setopt(pycurl.POST, 0) - - if referer and self.lastURL: - self.c.setopt(pycurl.REFERER, str(self.lastURL)) - - if cookies: - self.c.setopt(pycurl.COOKIEFILE, "") - self.c.setopt(pycurl.COOKIEJAR, "") - self.getCookies() - - - def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): - """ load and returns a given page """ - - self.setRequestContext(url, get, post, referer, cookies, multipart) - - # TODO: use http/rfc message instead - self.header = "" - - self.c.setopt(pycurl.HTTPHEADER, self.headers) - - if just_header: - self.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post? - - # overwrite HEAD request, we want a common request type - if post: - self.c.setopt(pycurl.CUSTOMREQUEST, "POST") - else: - self.c.setopt(pycurl.CUSTOMREQUEST, "GET") - - try: - self.c.perform() - rep = self.header - finally: - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.NOBODY, 0) - self.c.unsetopt(pycurl.CUSTOMREQUEST) - - else: - self.c.perform() - rep = self.getResponse() - - self.c.setopt(pycurl.POSTFIELDS, "") - self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) - self.code = self.verifyHeader() - - self.addCookies() - - if decode: - rep = self.decodeResponse(rep) - - return rep - - def verifyHeader(self): - """ raise an exceptions on bad headers """ - code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - # TODO: raise anyway to be consistent, also rename exception - if code in bad_headers: - #404 will NOT raise an exception - raise BadHeader(code, self.getResponse()) - return code - - def checkHeader(self): - """ check if header indicates failure""" - return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers - - def getResponse(self): - """ retrieve response from string io """ - if self.rep is None: return "" - value = self.rep.getvalue() - self.rep.close() - self.rep = StringIO() - return value - - def decodeResponse(self, rep): - """ decode with correct encoding, relies on header """ - header = self.header.splitlines() - encoding = "utf8" # default encoding - - for line in header: - line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or\ - ("text" not in line and "application" not in line): - continue - - none, delemiter, charset = line.rpartition("charset=") - if delemiter: - charset = charset.split(";") - if charset: - encoding = charset[0] - - try: - #self.log.debug("Decoded %s" % encoding ) - if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): - encoding = 'utf-8-sig' - - decoder = getincrementaldecoder(encoding)("replace") - rep = decoder.decode(rep, True) - - #TODO: html_unescape as default - - except LookupError: - self.log.debug("No Decoder found for %s" % encoding) - except Exception: - self.log.debug("Error when decoding string from %s." % encoding) - - return rep - - def write(self, buf): - """ writes response """ - if self.rep.tell() > 1000000 or self.abort: - rep = self.getResponse() - if self.abort: raise Abort() - f = open("response.dump", "wb") - f.write(rep) - f.close() - raise Exception("Loaded Url exceeded limit") - - self.rep.write(buf) - - def writeHeader(self, buf): - """ writes header """ - self.header += buf - - def putHeader(self, name, value): - self.headers.append("%s: %s" % (name, value)) - - def clearHeaders(self): - self.headers = [] - - def close(self): - """ cleanup, unusable after this """ - self.rep.close() - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "c"): - self.c.close() - del self.c - -if __name__ == "__main__": - url = "http://pyload.org" - c = HTTPRequest() - print c.load(url) - diff --git a/pyload/threads/DownloadThread.py b/pyload/threads/DownloadThread.py index b5a45185f..4c4ee597c 100644 --- a/pyload/threads/DownloadThread.py +++ b/pyload/threads/DownloadThread.py @@ -26,7 +26,7 @@ from pycurl import error from pyload.plugins.Base import Fail, Retry, Abort from pyload.plugins.Hoster import Reconnect, SkipDownload -from pyload.network.HTTPRequest import BadHeader +from pyload.plugins.Request import ResponseException from BaseThread import BaseThread @@ -182,7 +182,7 @@ class DownloadThread(BaseThread): except Exception, e: - if isinstance(e, BadHeader) and e.code == 500: + if isinstance(e, ResponseException) and e.code == 500: pyfile.setStatus("temp. offline") self.log.warning(_("Download is temporary offline: %s") % pyfile.name) pyfile.error = _("Internal Server Error") |