diff options
Diffstat (limited to 'pyload/plugins/network')
-rw-r--r-- | pyload/plugins/network/CurlChunk.py | 299 | ||||
-rw-r--r-- | pyload/plugins/network/CurlDownload.py | 323 | ||||
-rw-r--r-- | pyload/plugins/network/CurlRequest.py | 314 | ||||
-rw-r--r-- | pyload/plugins/network/DefaultRequest.py | 9 | ||||
-rw-r--r-- | pyload/plugins/network/__init__.py | 1 |
5 files changed, 946 insertions, 0 deletions
diff --git a/pyload/plugins/network/CurlChunk.py b/pyload/plugins/network/CurlChunk.py new file mode 100644 index 000000000..4250db2ce --- /dev/null +++ b/pyload/plugins/network/CurlChunk.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright(c) 2008-2012 pyLoad Team +# http://www.pyload.org +# +# This file is part of pyLoad. +# pyLoad is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# Subjected to the terms and conditions in LICENSE +# +# @author: RaNaN +############################################################################### + +from os import remove, stat, fsync +from os.path import exists +from time import sleep +from re import search + +import codecs +import pycurl + +from pyload.utils import remove_chars +from pyload.utils.fs import fs_encode + +from CurlRequest import CurlRequest + +class WrongFormat(Exception): + pass + + +class ChunkInfo(): + def __init__(self, name): + self.name = unicode(name) + self.size = 0 + self.resume = False + self.chunks = [] + + def __repr__(self): + ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) + for i, c in enumerate(self.chunks): + ret += "%s# %s\n" % (i, c[1]) + + return ret + + def setSize(self, size): + self.size = int(size) + + def addChunk(self, name, range): + self.chunks.append((name, range)) + + def clear(self): + self.chunks = [] + + def createChunks(self, chunks): + self.clear() + chunk_size = self.size / chunks + + current = 0 + for i in range(chunks): + end = self.size - 1 if (i == chunks - 1) else current + chunk_size + self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) + current += chunk_size + 1 + + + def save(self): + fs_name = fs_encode("%s.chunks" % self.name) + fh = codecs.open(fs_name, "w", "utf_8") + fh.write("name:%s\n" % self.name) + fh.write("size:%s\n" % self.size) + for i, c in enumerate(self.chunks): + fh.write("#%d:\n" % i) + fh.write("\tname:%s\n" % c[0]) + fh.write("\trange:%i-%i\n" % c[1]) + fh.close() + + @staticmethod + def load(name): + fs_name = fs_encode("%s.chunks" % name) + if not exists(fs_name): + raise IOError() + fh = codecs.open(fs_name, "r", "utf_8") + name = fh.readline()[:-1] + size = fh.readline()[:-1] + if name.startswith("name:") and size.startswith("size:"): + name = name[5:] + size = size[5:] + else: + fh.close() + raise WrongFormat() + ci = ChunkInfo(name) + ci.loaded = True + ci.setSize(size) + while True: + if not fh.readline(): #skip line + break + name = fh.readline()[1:-1] + range = fh.readline()[1:-1] + if name.startswith("name:") and range.startswith("range:"): + name = name[5:] + range = range[6:].split("-") + else: + raise WrongFormat() + + ci.addChunk(name, (long(range[0]), long(range[1]))) + fh.close() + return ci + + def remove(self): + fs_name = fs_encode("%s.chunks" % self.name) + if exists(fs_name): remove(fs_name) + + def getCount(self): + return len(self.chunks) + + def getChunkName(self, index): + return self.chunks[index][0] + + def getChunkRange(self, index): + return self.chunks[index][1] + + +class CurlChunk(CurlRequest): + def __init__(self, id, parent, range=None, resume=False): + self.setContext(*parent.getContext()) + + self.id = id + self.p = parent # CurlDownload instance + self.range = range # tuple (start, end) + self.resume = resume + self.log = parent.log + + self.size = range[1] - range[0] if range else -1 + self.arrived = 0 + self.lastURL = self.p.referer + + self.c = pycurl.Curl() + + self.header = "" + self.headerParsed = False #indicates if the header has been processed + + self.fp = None #file handle + + self.initContext() + + self.BOMChecked = False # check and remove byte order mark + + self.rep = None + + self.sleep = 0.000 + self.lastSize = 0 + + def __repr__(self): + return "<CurlChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) + + @property + def cj(self): + return self.p.context + + def getHandle(self): + """ returns a Curl handle ready to use for perform/multiperform """ + + self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.cj) + self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + + # request all bytes, since some servers in russia seems to have a defect arihmetic unit + + fs_name = fs_encode(self.p.info.getChunkName(self.id)) + if self.resume: + self.fp = open(fs_name, "ab") + self.arrived = self.fp.tell() + if not self.arrived: + self.arrived = stat(fs_name).st_size + + if self.range: + #do nothing if chunk already finished + if self.arrived + self.range[0] >= self.range[1]: return None + + if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything + range = "%i-" % (self.arrived + self.range[0]) + else: + range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) + + self.log.debug("Chunked resume with range %s" % range) + self.c.setopt(pycurl.RANGE, range) + else: + self.log.debug("Resume File from %i" % self.arrived) + self.c.setopt(pycurl.RESUME_FROM, self.arrived) + + else: + if self.range: + if self.id == len(self.p.info.chunks) - 1: # see above + range = "%i-" % self.range[0] + else: + range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) + + self.log.debug("Chunked with range %s" % range) + self.c.setopt(pycurl.RANGE, range) + + self.fp = open(fs_name, "wb") + + return self.c + + def writeHeader(self, buf): + self.header += buf + #@TODO forward headers?, this is possibly unneeded, when we just parse valid 200 headers + # as first chunk, we will parse the headers + if not self.range and self.header.endswith("\r\n\r\n"): + self.parseHeader() + elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing + size = search(r"(\d+) bytes", buf) + if size: + self.p._size = int(size.group(1)) + self.p.chunkSupport = True + + self.headerParsed = True + + def writeBody(self, buf): + #ignore BOM, it confuses unrar + if not self.BOMChecked: + if [ord(b) for b in buf[:3]] == [239, 187, 191]: + buf = buf[3:] + self.BOMChecked = True + + size = len(buf) + + self.arrived += size + + self.fp.write(buf) + + if self.p.bucket: + sleep(self.p.bucket.consumed(size)) + else: + # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller + # otherwise reduce sleep time percentile (values are based on tests) + # So in general cpu time is saved without reducing bandwidth too much + + if size < self.lastSize: + self.sleep += 0.002 + else: + self.sleep *= 0.7 + + self.lastSize = size + + sleep(self.sleep) + + if self.range and self.arrived > self.size: + return 0 #close if we have enough data + + + def parseHeader(self): + """parse data from received header""" + for orgline in self.decodeResponse(self.header).splitlines(): + line = orgline.strip().lower() + if line.startswith("accept-ranges") and "bytes" in line: + self.p.chunkSupport = True + + if "content-disposition" in line: + + m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", line) + if m: + name = remove_chars(m.groupdict()['name'], "\"';/").strip() + self.p._name = name + self.log.debug("Content-Disposition: %s" % name) + + if not self.resume and line.startswith("content-length"): + self.p._size = int(line.split(":")[1]) + + self.headerParsed = True + + def stop(self): + """The download will not proceed after next call of writeBody""" + self.range = [0,0] + self.size = 0 + + def resetRange(self): + """ Reset the range, so the download will load all data available """ + self.range = None + + def setRange(self, range): + self.range = range + self.size = range[1] - range[0] + + def flushFile(self): + """ flush and close file """ + self.fp.flush() + fsync(self.fp.fileno()) #make sure everything was written to disk + self.fp.close() #needs to be closed, or merging chunks will fail + + def close(self): + """ closes everything, unusable after this """ + if self.fp: self.fp.close() + self.c.close() + if hasattr(self, "p"): del self.p diff --git a/pyload/plugins/network/CurlDownload.py b/pyload/plugins/network/CurlDownload.py new file mode 100644 index 000000000..5de83ec7b --- /dev/null +++ b/pyload/plugins/network/CurlDownload.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright(c) 2008-2012 pyLoad Team +# http://www.pyload.org +# +# This file is part of pyLoad. +# pyLoad is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# Subjected to the terms and conditions in LICENSE +# +# @author: RaNaN +############################################################################### + +from os import remove +from os.path import dirname +from time import time +from shutil import move + +import pycurl + +from pyload.plugins.Base import Abort +from pyload.utils.fs import save_join, fs_encode + +from ..Download import Download +from CurlChunk import ChunkInfo, CurlChunk +from CurlRequest import ResponseException + +# TODO: save content-disposition for resuming + +class CurlDownload(Download): + """ loads an url, http + ftp supported """ + + # def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, + # options={}, disposition=False): + + def __init__(self, *args, **kwargs): + Download.__init__(self, *args, **kwargs) + + self.path = None + self.disposition = False + + self.chunks = [] + self.chunkSupport = None + + self.m = pycurl.CurlMulti() + + #needed for speed calculation + self.lastArrived = [] + self.speeds = [] + self.lastSpeeds = [0, 0] + + @property + def speed(self): + last = [sum(x) for x in self.lastSpeeds if x] + return (sum(self.speeds) + sum(last)) / (1 + len(last)) + + @property + def arrived(self): + return sum(c.arrived for c in self.chunks) if self.chunks else self._size + + @property + def name(self): + return self._name if self.disposition else None + + def _copyChunks(self): + init = fs_encode(self.info.getChunkName(0)) #initial chunk name + + if self.info.getCount() > 1: + fo = open(init, "rb+") #first chunkfile + for i in range(1, self.info.getCount()): + #input file + fo.seek( + self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks + fname = fs_encode("%s.chunk%d" % (self.path, i)) + fi = open(fname, "rb") + buf = 32 * 1024 + while True: #copy in chunks, consumes less memory + data = fi.read(buf) + if not data: + break + fo.write(data) + fi.close() + if fo.tell() < self.info.getChunkRange(i)[1]: + fo.close() + remove(init) + self.info.remove() #there are probably invalid chunks + raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") + remove(fname) #remove chunk + fo.close() + + if self.name: + self.filename = save_join(dirname(self.path), self.name) + + move(init, fs_encode(self.path)) + self.info.remove() #remove info file + + def checkResume(self): + try: + self.info = ChunkInfo.load(self.path) + self.info.resume = True #resume is only possible with valid info file + self._size = self.info.size + self.infoSaved = True + except IOError: + self.info = ChunkInfo(self.path) + + def download(self, uri, path, get={}, post={}, referer=True, disposition=False, chunks=1, resume=False): + """ returns new filename or None """ + self.url = uri + self.path = path + self.disposition = disposition + self.get = get + self.post = post + self.referer = referer + + self.checkResume() + chunks = max(1, chunks) + resume = self.info.resume and resume + + try: + self._download(chunks, resume) + except pycurl.error, e: + #code 33 - no resume + code = e.args[0] + if code == 33: + # try again without resume + self.log.debug("Errno 33 -> Restart without resume") + + #remove old handles + for chunk in self.chunks: + self.closeChunk(chunk) + + return self._download(chunks, False) + else: + raise + finally: + self.close() + + return self.name + + def _download(self, chunks, resume): + if not resume: + self.info.clear() + self.info.addChunk("%s.chunk0" % self.path, (0, 0)) #create an initial entry + + self.chunks = [] + + init = CurlChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) + + self.chunks.append(init) + self.m.add_handle(init.getHandle()) + + lastFinishCheck = 0 + lastTimeCheck = 0 + chunksDone = set() # list of curl handles that are finished + chunksCreated = False + done = False + if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can + self.chunkSupport = True + + while 1: + #need to create chunks + if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk + + if not resume: + self.info.setSize(self.size) + self.info.createChunks(chunks) + self.info.save() + + chunks = self.info.getCount() + + init.setRange(self.info.getChunkRange(0)) + + for i in range(1, chunks): + c = CurlChunk(i, self, self.info.getChunkRange(i), resume) + + handle = c.getHandle() + if handle: + self.chunks.append(c) + self.m.add_handle(handle) + else: + #close immediately + self.log.debug("Invalid curl handle -> closed") + c.close() + + chunksCreated = True + + while 1: + ret, num_handles = self.m.perform() + if ret != pycurl.E_CALL_MULTI_PERFORM: + break + + t = time() + + # reduce these calls + # when num_q is 0, the loop is exited + while lastFinishCheck + 0.5 < t: + # list of failed curl handles + failed = [] + ex = None # save only last exception, we can only raise one anyway + + num_q, ok_list, err_list = self.m.info_read() + for c in ok_list: + chunk = self.findChunk(c) + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except ResponseException, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(c) + + for c in err_list: + curl, errno, msg = c + chunk = self.findChunk(curl) + #test if chunk was finished + if errno != 23 or "0 !=" not in msg: + failed.append(chunk) + ex = pycurl.error(errno, msg) + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) + continue + + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except ResponseException, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(curl) + if not num_q: # no more info to get + + # check if init is not finished so we reset download connections + # note that other chunks are closed and everything downloaded with initial connection + if failed and init not in failed and init.c not in chunksDone: + self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + + #list of chunks to clean and remove + to_clean = filter(lambda x: x is not init, self.chunks) + for chunk in to_clean: + self.closeChunk(chunk) + self.chunks.remove(chunk) + remove(fs_encode(self.info.getChunkName(chunk.id))) + + #let first chunk load the rest and update the info file + init.resetRange() + self.info.clear() + self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) + self.info.save() + elif failed: + raise ex + + lastFinishCheck = t + + if len(chunksDone) >= len(self.chunks): + if len(chunksDone) > len(self.chunks): + self.log.warning("Finished download chunks size incorrect, please report bug.") + done = True #all chunks loaded + + break + + if done: + break #all chunks loaded + + # calc speed once per second, averaging over 3 seconds + if lastTimeCheck + 1 < t: + diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in + enumerate(self.chunks)] + + self.lastSpeeds[1] = self.lastSpeeds[0] + self.lastSpeeds[0] = self.speeds + self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] + self.lastArrived = [c.arrived for c in self.chunks] + lastTimeCheck = t + + if self.doAbort: + raise Abort() + + self.m.select(1) + + for chunk in self.chunks: + chunk.flushFile() #make sure downloads are written to disk + + self._copyChunks() + + def findChunk(self, handle): + """ linear search to find a chunk (should be ok since chunk size is usually low) """ + for chunk in self.chunks: + if chunk.c == handle: return chunk + + def closeChunk(self, chunk): + try: + self.m.remove_handle(chunk.c) + except pycurl.error, e: + self.log.debug("Error removing chunk: %s" % str(e)) + finally: + chunk.close() + + def close(self): + """ cleanup """ + for chunk in self.chunks: + self.closeChunk(chunk) + else: + #Workaround: pycurl segfaults when closing multi, that never had any curl handles + if hasattr(self, "m"): + c = pycurl.Curl() + self.m.add_handle(c) + self.m.remove_handle(c) + c.close() + + self.chunks = [] + if hasattr(self, "m"): + self.m.close() + del self.m + if hasattr(self, "cj"): + del self.cj + if hasattr(self, "info"): + del self.info
\ No newline at end of file diff --git a/pyload/plugins/network/CurlRequest.py b/pyload/plugins/network/CurlRequest.py new file mode 100644 index 000000000..4630403df --- /dev/null +++ b/pyload/plugins/network/CurlRequest.py @@ -0,0 +1,314 @@ +# -*- coding: utf-8 -*- + +############################################################################### +# Copyright(c) 2008-2012 pyLoad Team +# http://www.pyload.org +# +# This file is part of pyLoad. +# pyLoad is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# Subjected to the terms and conditions in LICENSE +# +# @author: RaNaN +############################################################################### + +import pycurl + +from codecs import getincrementaldecoder, lookup, BOM_UTF8 +from urllib import quote, urlencode +from httplib import responses +from cStringIO import StringIO + +from pyload.plugins.Base import Abort +from pyload.network.CookieJar import CookieJar + +from ..Request import Request, ResponseException + + +def myquote(url): + return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") + + +def myurlencode(data): + data = dict(data) + return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \ + y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) + + +bad_headers = range(400, 418) + range(500, 506) + + +class CurlRequest(Request): + """ Request class based on libcurl """ + + __version__ = "0.1" + + CONTEXT_CLASS = CookieJar + + def __init__(self, *args, **kwargs): + self.c = pycurl.Curl() + Request.__init__(self, *args, **kwargs) + + self.rep = StringIO() + self.lastURL = None + self.lastEffectiveURL = None + + # cookiejar defines the context + self.cj = self.context + + self.c.setopt(pycurl.WRITEFUNCTION, self.write) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + + # TODO: addAuth, addHeader + + def initContext(self): + self.initHandle() + + if self.config: + self.setInterface(self.config) + self.initOptions(self.config) + + def initHandle(self): + """ sets common options to curl handle """ + + self.c.setopt(pycurl.FOLLOWLOCATION, 1) + self.c.setopt(pycurl.MAXREDIRS, 5) + self.c.setopt(pycurl.CONNECTTIMEOUT, 30) + self.c.setopt(pycurl.NOSIGNAL, 1) + self.c.setopt(pycurl.NOPROGRESS, 1) + if hasattr(pycurl, "AUTOREFERER"): + self.c.setopt(pycurl.AUTOREFERER, 1) + self.c.setopt(pycurl.SSL_VERIFYPEER, 0) + # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download + self.c.setopt(pycurl.LOW_SPEED_TIME, 45) + self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) + + # don't save the cookies + self.c.setopt(pycurl.COOKIEFILE, "") + self.c.setopt(pycurl.COOKIEJAR, "") + + #self.c.setopt(pycurl.VERBOSE, 1) + + self.c.setopt(pycurl.USERAGENT, + "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") + if pycurl.version_info()[7]: + self.c.setopt(pycurl.ENCODING, "gzip, deflate") + self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", + "Accept-Language: en-US,en", + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Connection: keep-alive", + "Keep-Alive: 300", + "Expect:"]) + + def setInterface(self, options): + + interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] + + if interface and interface.lower() != "none": + self.c.setopt(pycurl.INTERFACE, str(interface)) + + if proxy: + if proxy["type"] == "socks4": + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) + elif proxy["type"] == "socks5": + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) + else: + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) + + self.c.setopt(pycurl.PROXY, str(proxy["address"])) + self.c.setopt(pycurl.PROXYPORT, proxy["port"]) + + if proxy["username"]: + self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) + + if ipv6: + self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) + else: + self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + + if "timeout" in options: + self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) + + def initOptions(self, options): + """ Sets same options as available in pycurl """ + for k, v in options.iteritems(): + if hasattr(pycurl, k): + self.c.setopt(getattr(pycurl, k), v) + + def setRequestContext(self, url, get, post, referer, cookies, multipart=False): + """ sets everything needed for the request """ + url = myquote(url) + + if get: + get = urlencode(get) + url = "%s?%s" % (url, get) + + self.c.setopt(pycurl.URL, url) + self.lastURL = url + + if post: + self.c.setopt(pycurl.POST, 1) + if not multipart: + if type(post) == unicode: + post = str(post) #unicode not allowed + elif type(post) == str: + pass + else: + post = myurlencode(post) + + self.c.setopt(pycurl.POSTFIELDS, post) + else: + post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] + self.c.setopt(pycurl.HTTPPOST, post) + else: + self.c.setopt(pycurl.POST, 0) + + if referer and self.lastURL: + self.c.setopt(pycurl.REFERER, str(self.lastURL)) + else: + self.c.setopt(pycurl.REFERER, "") + + if cookies: + self.c.setopt(pycurl.COOKIELIST, self.cj.output()) + else: + # Magic string that erases all cookies + self.c.setopt(pycurl.COOKIELIST, "ALL") + + # TODO: remove auth again + if "auth" in self.options: + self.c.setopt(pycurl.USERPWD, str(self.options["auth"])) + + + def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): + """ load and returns a given page """ + + self.setRequestContext(url, get, post, referer, cookies, multipart) + + # TODO: use http/rfc message instead + self.header = "" + + if "header" in self.options: + self.c.setopt(pycurl.HTTPHEADER, self.options["header"]) + + if just_header: + self.c.setopt(pycurl.FOLLOWLOCATION, 0) + self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post? + + # overwrite HEAD request, we want a common request type + if post: + self.c.setopt(pycurl.CUSTOMREQUEST, "POST") + else: + self.c.setopt(pycurl.CUSTOMREQUEST, "GET") + + try: + self.c.perform() + rep = self.header + finally: + self.c.setopt(pycurl.FOLLOWLOCATION, 1) + self.c.setopt(pycurl.NOBODY, 0) + self.c.unsetopt(pycurl.CUSTOMREQUEST) + + else: + self.c.perform() + rep = self.getResponse() + + self.c.setopt(pycurl.POSTFIELDS, "") + self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) + self.code = self.verifyHeader() + + if cookies: + self.parseCookies() + + if decode: + rep = self.decodeResponse(rep) + + return rep + + def parseCookies(self): + for c in self.c.getinfo(pycurl.INFO_COOKIELIST): + #http://xiix.wordpress.com/2006/03/23/mozillafirefox-cookie-format + domain, flag, path, secure, expires, name, value = c.split("\t") + # http only was added in py 2.6 + domain = domain.replace("#HttpOnly_", "") + self.cj.setCookie(domain, name, value, path, expires, secure) + + def verifyHeader(self): + """ raise an exceptions on bad headers """ + code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) + if code in bad_headers: + raise ResponseException(code, responses.get(code, "Unknown statuscode")) + return code + + def getResponse(self): + """ retrieve response from string io """ + if self.rep is None: return "" + value = self.rep.getvalue() + self.rep.close() + self.rep = StringIO() + return value + + def decodeResponse(self, rep): + """ decode with correct encoding, relies on header """ + header = self.header.splitlines() + encoding = "utf8" # default encoding + + for line in header: + line = line.lower().replace(" ", "") + if not line.startswith("content-type:") or \ + ("text" not in line and "application" not in line): + continue + + none, delemiter, charset = line.rpartition("charset=") + if delemiter: + charset = charset.split(";") + if charset: + encoding = charset[0] + + try: + #self.log.debug("Decoded %s" % encoding ) + if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): + encoding = 'utf-8-sig' + + decoder = getincrementaldecoder(encoding)("replace") + rep = decoder.decode(rep, True) + + #TODO: html_unescape as default + + except LookupError: + self.log.debug("No Decoder found for %s" % encoding) + except Exception: + self.log.debug("Error when decoding string from %s." % encoding) + + return rep + + def write(self, buf): + """ writes response """ + if self.rep.tell() > 1000000 or self.doAbort: + rep = self.getResponse() + if self.doAbort: raise Abort() + f = open("response.dump", "wb") + f.write(rep) + f.close() + raise Exception("Loaded Url exceeded limit") + + self.rep.write(buf) + + def writeHeader(self, buf): + """ writes header """ + self.header += buf + + def reset(self): + self.cj.clear() + self.options.clear() + + def close(self): + """ cleanup, unusable after this """ + self.rep.close() + if hasattr(self, "cj"): + del self.cj + if hasattr(self, "c"): + self.c.close() + del self.c
\ No newline at end of file diff --git a/pyload/plugins/network/DefaultRequest.py b/pyload/plugins/network/DefaultRequest.py new file mode 100644 index 000000000..dce486ea5 --- /dev/null +++ b/pyload/plugins/network/DefaultRequest.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- + +from CurlRequest import CurlRequest +from CurlDownload import CurlDownload + +__version__ = "0.1" + +DefaultRequest = CurlRequest +DefaultDownload = CurlDownload
\ No newline at end of file diff --git a/pyload/plugins/network/__init__.py b/pyload/plugins/network/__init__.py new file mode 100644 index 000000000..4b31e848b --- /dev/null +++ b/pyload/plugins/network/__init__.py @@ -0,0 +1 @@ +__author__ = 'christian' |