diff options
Diffstat (limited to 'pyload/network')
-rw-r--r-- | pyload/network/Browser.py | 132 | ||||
-rw-r--r-- | pyload/network/Bucket.py | 59 | ||||
-rw-r--r-- | pyload/network/CookieJar.py | 50 | ||||
-rw-r--r-- | pyload/network/HTTPChunk.py | 292 | ||||
-rw-r--r-- | pyload/network/HTTPDownload.py | 325 | ||||
-rw-r--r-- | pyload/network/HTTPRequest.py | 303 | ||||
-rw-r--r-- | pyload/network/RequestFactory.py | 126 | ||||
-rw-r--r-- | pyload/network/XDCCRequest.py | 159 | ||||
-rw-r--r-- | pyload/network/__init__.py | 1 |
9 files changed, 1447 insertions, 0 deletions
diff --git a/pyload/network/Browser.py b/pyload/network/Browser.py new file mode 100644 index 000000000..e78d24688 --- /dev/null +++ b/pyload/network/Browser.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- + +from logging import getLogger + +from HTTPRequest import HTTPRequest +from HTTPDownload import HTTPDownload + + +class Browser(object): + __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") + + def __init__(self, bucket=None, options={}): + self.log = getLogger("log") + + self.options = options #holds pycurl options + self.bucket = bucket + + self.cj = None # needs to be setted later + self._size = 0 + + self.renewHTTPRequest() + self.dl = None + + + def renewHTTPRequest(self): + if hasattr(self, "http"): self.http.close() + self.http = HTTPRequest(self.cj, self.options) + + def setLastURL(self, val): + self.http.lastURL = val + + # tunnel some attributes from HTTP Request to Browser + lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) + lastURL = property(lambda self: self.http.lastURL, setLastURL) + code = property(lambda self: self.http.code) + cookieJar = property(lambda self: self.cj) + + def setCookieJar(self, cj): + self.cj = cj + self.http.cj = cj + + @property + def speed(self): + if self.dl: + return self.dl.speed + return 0 + + @property + def size(self): + if self._size: + return self._size + if self.dl: + return self.dl.size + return 0 + + @property + def arrived(self): + if self.dl: + return self.dl.arrived + return 0 + + @property + def percent(self): + if not self.size: return 0 + return (self.arrived * 100) / self.size + + def clearCookies(self): + if self.cj: + self.cj.clear() + self.http.clearCookies() + + def clearReferer(self): + self.http.lastURL = None + + def abortDownloads(self): + self.http.abort = True + if self.dl: + self._size = self.dl.size + self.dl.abort = True + + def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False, + progressNotify=None, disposition=False): + """ this can also download ftp """ + self._size = 0 + self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, + self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) + name = self.dl.download(chunks, resume) + self._size = self.dl.size + + self.dl = None + + return name + + def load(self, *args, **kwargs): + """ retrieves page """ + return self.http.load(*args, **kwargs) + + def putHeader(self, name, value): + """ add a header to the request """ + self.http.putHeader(name, value) + + def addAuth(self, pwd): + """Adds user and pw for http auth + + :param pwd: string, user:password + """ + self.options["auth"] = pwd + self.renewHTTPRequest() #we need a new request + + def removeAuth(self): + if "auth" in self.options: del self.options["auth"] + self.renewHTTPRequest() + + def setOption(self, name, value): + """Adds an option to the request, see HTTPRequest for existing ones""" + self.options[name] = value + + def deleteOption(self, name): + if name in self.options: del self.options[name] + + def clearHeaders(self): + self.http.clearHeaders() + + def close(self): + """ cleanup """ + if hasattr(self, "http"): + self.http.close() + del self.http + if hasattr(self, "dl"): + del self.dl + if hasattr(self, "cj"): + del self.cj diff --git a/pyload/network/Bucket.py b/pyload/network/Bucket.py new file mode 100644 index 000000000..a096d644a --- /dev/null +++ b/pyload/network/Bucket.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +from time import time +from threading import Lock + +class Bucket: + def __init__(self): + self.rate = 0 + self.tokens = 0 + self.timestamp = time() + self.lock = Lock() + + def __nonzero__(self): + return False if self.rate < 10240 else True + + def setRate(self, rate): + self.lock.acquire() + self.rate = int(rate) + self.lock.release() + + def consumed(self, amount): + """ return time the process have to sleep, after consumed specified amount """ + if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise + self.lock.acquire() + + self.calc_tokens() + self.tokens -= amount + + if self.tokens < 0: + time = -self.tokens/float(self.rate) + else: + time = 0 + + + self.lock.release() + return time + + def calc_tokens(self): + if self.tokens < self.rate: + now = time() + delta = self.rate * (now - self.timestamp) + self.tokens = min(self.rate, self.tokens + delta) + self.timestamp = now diff --git a/pyload/network/CookieJar.py b/pyload/network/CookieJar.py new file mode 100644 index 000000000..a6ae090bc --- /dev/null +++ b/pyload/network/CookieJar.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay, RaNaN +""" + +from time import time + +class CookieJar: + def __init__(self, pluginname, account=None): + self.cookies = {} + self.plugin = pluginname + self.account = account + + def addCookies(self, clist): + for c in clist: + name = c.split("\t")[5] + self.cookies[name] = c + + def getCookies(self): + return self.cookies.values() + + def parseCookie(self, name): + if name in self.cookies: + return self.cookies[name].split("\t")[6] + else: + return None + + def getCookie(self, name): + return self.parseCookie(name) + + def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180): + s = ".%s TRUE %s FALSE %s %s %s" % (domain, path, exp, name, value) + self.cookies[name] = s + + def clear(self): + self.cookies = {} diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py new file mode 100644 index 000000000..b9d2a5379 --- /dev/null +++ b/pyload/network/HTTPChunk.py @@ -0,0 +1,292 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" +from os import remove, stat, fsync +from os.path import exists +from time import sleep +from re import search +from pyload.utils import fs_encode +import codecs +import pycurl + +from HTTPRequest import HTTPRequest + +class WrongFormat(Exception): + pass + + +class ChunkInfo: + def __init__(self, name): + self.name = unicode(name) + self.size = 0 + self.resume = False + self.chunks = [] + + def __repr__(self): + ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) + for i, c in enumerate(self.chunks): + ret += "%s# %s\n" % (i, c[1]) + + return ret + + def setSize(self, size): + self.size = int(size) + + def addChunk(self, name, range): + self.chunks.append((name, range)) + + def clear(self): + self.chunks = [] + + def createChunks(self, chunks): + self.clear() + chunk_size = self.size / chunks + + current = 0 + for i in range(chunks): + end = self.size - 1 if (i == chunks - 1) else current + chunk_size + self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) + current += chunk_size + 1 + + + def save(self): + fs_name = fs_encode("%s.chunks" % self.name) + fh = codecs.open(fs_name, "w", "utf_8") + fh.write("name:%s\n" % self.name) + fh.write("size:%s\n" % self.size) + for i, c in enumerate(self.chunks): + fh.write("#%d:\n" % i) + fh.write("\tname:%s\n" % c[0]) + fh.write("\trange:%i-%i\n" % c[1]) + fh.close() + + @staticmethod + def load(name): + fs_name = fs_encode("%s.chunks" % name) + if not exists(fs_name): + raise IOError() + fh = codecs.open(fs_name, "r", "utf_8") + name = fh.readline()[:-1] + size = fh.readline()[:-1] + if name.startswith("name:") and size.startswith("size:"): + name = name[5:] + size = size[5:] + else: + fh.close() + raise WrongFormat() + ci = ChunkInfo(name) + ci.loaded = True + ci.setSize(size) + while True: + if not fh.readline(): #skip line + break + name = fh.readline()[1:-1] + range = fh.readline()[1:-1] + if name.startswith("name:") and range.startswith("range:"): + name = name[5:] + range = range[6:].split("-") + else: + raise WrongFormat() + + ci.addChunk(name, (long(range[0]), long(range[1]))) + fh.close() + return ci + + def remove(self): + fs_name = fs_encode("%s.chunks" % self.name) + if exists(fs_name): remove(fs_name) + + def getCount(self): + return len(self.chunks) + + def getChunkName(self, index): + return self.chunks[index][0] + + def getChunkRange(self, index): + return self.chunks[index][1] + + +class HTTPChunk(HTTPRequest): + def __init__(self, id, parent, range=None, resume=False): + self.id = id + self.p = parent # HTTPDownload instance + self.range = range # tuple (start, end) + self.resume = resume + self.log = parent.log + + self.size = range[1] - range[0] if range else -1 + self.arrived = 0 + self.lastURL = self.p.referer + + self.c = pycurl.Curl() + + self.header = "" + self.headerParsed = False #indicates if the header has been processed + + self.fp = None #file handle + + self.initHandle() + self.setInterface(self.p.options) + + self.BOMChecked = False # check and remove byte order mark + + self.rep = None + + self.sleep = 0.000 + self.lastSize = 0 + + def __repr__(self): + return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) + + @property + def cj(self): + return self.p.cj + + def getHandle(self): + """ returns a Curl handle ready to use for perform/multiperform """ + + self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) + self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + + # request all bytes, since some servers in russia seems to have a defect arihmetic unit + + fs_name = fs_encode(self.p.info.getChunkName(self.id)) + if self.resume: + self.fp = open(fs_name, "ab") + self.arrived = self.fp.tell() + if not self.arrived: + self.arrived = stat(fs_name).st_size + + if self.range: + #do nothing if chunk already finished + if self.arrived + self.range[0] >= self.range[1]: return None + + if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything + range = "%i-" % (self.arrived + self.range[0]) + else: + range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) + + self.log.debug("Chunked resume with range %s" % range) + self.c.setopt(pycurl.RANGE, range) + else: + self.log.debug("Resume File from %i" % self.arrived) + self.c.setopt(pycurl.RESUME_FROM, self.arrived) + + else: + if self.range: + if self.id == len(self.p.info.chunks) - 1: # see above + range = "%i-" % self.range[0] + else: + range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) + + self.log.debug("Chunked with range %s" % range) + self.c.setopt(pycurl.RANGE, range) + + self.fp = open(fs_name, "wb") + + return self.c + + def writeHeader(self, buf): + self.header += buf + #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers + # as first chunk, we will parse the headers + if not self.range and self.header.endswith("\r\n\r\n"): + self.parseHeader() + elif not self.range and buf.startswith("150") and "data connection" in buf.lower(): #: ftp file size parsing + size = search(r"(\d+) bytes", buf) + if size: + self.p.size = int(size.group(1)) + self.p.chunkSupport = True + + self.headerParsed = True + + def writeBody(self, buf): + #ignore BOM, it confuses unrar + if not self.BOMChecked: + if [ord(b) for b in buf[:3]] == [239, 187, 191]: + buf = buf[3:] + self.BOMChecked = True + + size = len(buf) + + self.arrived += size + + self.fp.write(buf) + + if self.p.bucket: + sleep(self.p.bucket.consumed(size)) + else: + # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller + # otherwise reduce sleep time percentual (values are based on tests) + # So in general cpu time is saved without reducing bandwith too much + + if size < self.lastSize: + self.sleep += 0.002 + else: + self.sleep *= 0.7 + + self.lastSize = size + + sleep(self.sleep) + + if self.range and self.arrived > self.size: + return 0 #close if we have enough data + + + def parseHeader(self): + """parse data from recieved header""" + for orgline in self.decodeResponse(self.header).splitlines(): + line = orgline.strip().lower() + if line.startswith("accept-ranges") and "bytes" in line: + self.p.chunkSupport = True + + if line.startswith("content-disposition") and "filename=" in line: + name = orgline.partition("filename=")[2] + name = name.replace('"', "").replace("'", "").replace(";", "").strip() + self.p.nameDisposition = name + self.log.debug("Content-Disposition: %s" % name) + + if not self.resume and line.startswith("content-length"): + self.p.size = int(line.split(":")[1]) + + self.headerParsed = True + + def stop(self): + """The download will not proceed after next call of writeBody""" + self.range = [0, 0] + self.size = 0 + + def resetRange(self): + """ Reset the range, so the download will load all data available """ + self.range = None + + def setRange(self, range): + self.range = range + self.size = range[1] - range[0] + + def flushFile(self): + """ flush and close file """ + self.fp.flush() + fsync(self.fp.fileno()) #make sure everything was written to disk + self.fp.close() #needs to be closed, or merging chunks will fail + + def close(self): + """ closes everything, unusable after this """ + if self.fp: self.fp.close() + self.c.close() + if hasattr(self, "p"): del self.p diff --git a/pyload/network/HTTPDownload.py b/pyload/network/HTTPDownload.py new file mode 100644 index 000000000..50c6b4bdf --- /dev/null +++ b/pyload/network/HTTPDownload.py @@ -0,0 +1,325 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +from os import remove, fsync +from os.path import dirname +from time import sleep, time +from shutil import move +from logging import getLogger + +import pycurl + +from HTTPChunk import ChunkInfo, HTTPChunk +from HTTPRequest import BadHeader + +from pyload.plugins.Plugin import Abort +from pyload.utils import safe_join, fs_encode + +class HTTPDownload: + """ loads a url http + ftp """ + + def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, + options={}, progressNotify=None, disposition=False): + self.url = url + self.filename = filename #complete file destination, not only name + self.get = get + self.post = post + self.referer = referer + self.cj = cj #cookiejar if cookies are needed + self.bucket = bucket + self.options = options + self.disposition = disposition + # all arguments + + self.abort = False + self.size = 0 + self.nameDisposition = None #will be parsed from content disposition + + self.chunks = [] + + self.log = getLogger("log") + + try: + self.info = ChunkInfo.load(filename) + self.info.resume = True #resume is only possible with valid info file + self.size = self.info.size + self.infoSaved = True + except IOError: + self.info = ChunkInfo(filename) + + self.chunkSupport = None + self.m = pycurl.CurlMulti() + + #needed for speed calculation + self.lastArrived = [] + self.speeds = [] + self.lastSpeeds = [0, 0] + + self.progressNotify = progressNotify + + @property + def speed(self): + last = [sum(x) for x in self.lastSpeeds if x] + return (sum(self.speeds) + sum(last)) / (1 + len(last)) + + @property + def arrived(self): + return sum([c.arrived for c in self.chunks]) + + @property + def percent(self): + if not self.size: return 0 + return (self.arrived * 100) / self.size + + def _copyChunks(self): + init = fs_encode(self.info.getChunkName(0)) #initial chunk name + + if self.info.getCount() > 1: + fo = open(init, "rb+") #first chunkfile + for i in range(1, self.info.getCount()): + #input file + fo.seek( + self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks + fname = fs_encode("%s.chunk%d" % (self.filename, i)) + fi = open(fname, "rb") + buf = 32 * 1024 + while True: #copy in chunks, consumes less memory + data = fi.read(buf) + if not data: + break + fo.write(data) + fi.close() + if fo.tell() < self.info.getChunkRange(i)[1]: + fo.close() + remove(init) + self.info.remove() #there are probably invalid chunks + raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") + remove(fname) #remove chunk + fo.close() + + if self.nameDisposition and self.disposition: + self.filename = safe_join(dirname(self.filename), self.nameDisposition) + + move(init, fs_encode(self.filename)) + self.info.remove() #remove info file + + def download(self, chunks=1, resume=False): + """ returns new filename or None """ + + chunks = max(1, chunks) + resume = self.info.resume and resume + + try: + self._download(chunks, resume) + except pycurl.error, e: + #code 33 - no resume + code = e.args[0] + if code == 33: + # try again without resume + self.log.debug("Errno 33 -> Restart without resume") + + #remove old handles + for chunk in self.chunks: + self.closeChunk(chunk) + + return self._download(chunks, False) + else: + raise + finally: + self.close() + + if self.nameDisposition and self.disposition: return self.nameDisposition + return None + + def _download(self, chunks, resume): + if not resume: + self.info.clear() + self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry + + self.chunks = [] + + init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) + + self.chunks.append(init) + self.m.add_handle(init.getHandle()) + + lastFinishCheck = 0 + lastTimeCheck = 0 + chunksDone = set() # list of curl handles that are finished + chunksCreated = False + done = False + if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can + self.chunkSupport = True + + while 1: + #need to create chunks + if not chunksCreated and self.chunkSupport and self.size: #will be setted later by first chunk + + if not resume: + self.info.setSize(self.size) + self.info.createChunks(chunks) + self.info.save() + + chunks = self.info.getCount() + + init.setRange(self.info.getChunkRange(0)) + + for i in range(1, chunks): + c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) + + handle = c.getHandle() + if handle: + self.chunks.append(c) + self.m.add_handle(handle) + else: + #close immediatly + self.log.debug("Invalid curl handle -> closed") + c.close() + + chunksCreated = True + + while 1: + ret, num_handles = self.m.perform() + if ret != pycurl.E_CALL_MULTI_PERFORM: + break + + t = time() + + # reduce these calls + while lastFinishCheck + 0.5 < t: + # list of failed curl handles + failed = [] + ex = None # save only last exception, we can only raise one anyway + + num_q, ok_list, err_list = self.m.info_read() + for c in ok_list: + chunk = self.findChunk(c) + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except BadHeader, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(c) + + for c in err_list: + curl, errno, msg = c + chunk = self.findChunk(curl) + #test if chunk was finished + if errno != 23 or "0 !=" not in msg: + failed.append(chunk) + ex = pycurl.error(errno, msg) + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) + continue + + try: # check if the header implies success, else add it to failed list + chunk.verifyHeader() + except BadHeader, e: + self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) + failed.append(chunk) + ex = e + else: + chunksDone.add(curl) + if not num_q: # no more infos to get + + # check if init is not finished so we reset download connections + # note that other chunks are closed and downloaded with init too + if failed and init not in failed and init.c not in chunksDone: + self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) + + #list of chunks to clean and remove + to_clean = filter(lambda x: x is not init, self.chunks) + for chunk in to_clean: + self.closeChunk(chunk) + self.chunks.remove(chunk) + remove(fs_encode(self.info.getChunkName(chunk.id))) + + #let first chunk load the rest and update the info file + init.resetRange() + self.info.clear() + self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) + self.info.save() + elif failed: + raise ex + + lastFinishCheck = t + + if len(chunksDone) >= len(self.chunks): + if len(chunksDone) > len(self.chunks): + self.log.warning("Finished download chunks size incorrect, please report bug.") + done = True #all chunks loaded + + break + + if done: + break #all chunks loaded + + # calc speed once per second, averaging over 3 seconds + if lastTimeCheck + 1 < t: + diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in + enumerate(self.chunks)] + + self.lastSpeeds[1] = self.lastSpeeds[0] + self.lastSpeeds[0] = self.speeds + self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] + self.lastArrived = [c.arrived for c in self.chunks] + lastTimeCheck = t + self.updateProgress() + + if self.abort: + raise Abort() + + #sleep(0.003) #supress busy waiting - limits dl speed to (1 / x) * buffersize + self.m.select(1) + + for chunk in self.chunks: + chunk.flushFile() #make sure downloads are written to disk + + self._copyChunks() + + def updateProgress(self): + if self.progressNotify: + self.progressNotify(self.percent) + + def findChunk(self, handle): + """ linear search to find a chunk (should be ok since chunk size is usually low) """ + for chunk in self.chunks: + if chunk.c == handle: return chunk + + def closeChunk(self, chunk): + try: + self.m.remove_handle(chunk.c) + except pycurl.error, e: + self.log.debug("Error removing chunk: %s" % str(e)) + finally: + chunk.close() + + def close(self): + """ cleanup """ + for chunk in self.chunks: + self.closeChunk(chunk) + + self.chunks = [] + if hasattr(self, "m"): + self.m.close() + del self.m + if hasattr(self, "cj"): + del self.cj + if hasattr(self, "info"): + del self.info diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py new file mode 100644 index 000000000..66e355b77 --- /dev/null +++ b/pyload/network/HTTPRequest.py @@ -0,0 +1,303 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +import pycurl + +from codecs import getincrementaldecoder, lookup, BOM_UTF8 +from urllib import quote, urlencode +from httplib import responses +from logging import getLogger +from cStringIO import StringIO + +from pyload.plugins.Plugin import Abort + +def myquote(url): + return quote(url.encode('utf_8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") + +def myurlencode(data): + data = dict(data) + return urlencode(dict((x.encode('utf_8') if isinstance(x, unicode) else x, \ + y.encode('utf_8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) + +bad_headers = range(400, 404) + range(405, 418) + range(500, 506) + +class BadHeader(Exception): + def __init__(self, code, content=""): + Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) + self.code = code + self.content = content + + +class HTTPRequest: + def __init__(self, cookies=None, options=None): + self.c = pycurl.Curl() + self.rep = StringIO() + + self.cj = cookies #cookiejar + + self.lastURL = None + self.lastEffectiveURL = None + self.abort = False + self.code = 0 # last http code + + self.header = "" + + self.headers = [] #temporary request header + + self.initHandle() + self.setInterface(options) + + self.c.setopt(pycurl.WRITEFUNCTION, self.write) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + + self.log = getLogger("log") + + + def initHandle(self): + """ sets common options to curl handle """ + self.c.setopt(pycurl.FOLLOWLOCATION, 1) + self.c.setopt(pycurl.MAXREDIRS, 5) + self.c.setopt(pycurl.CONNECTTIMEOUT, 30) + self.c.setopt(pycurl.NOSIGNAL, 1) + self.c.setopt(pycurl.NOPROGRESS, 1) + if hasattr(pycurl, "AUTOREFERER"): + self.c.setopt(pycurl.AUTOREFERER, 1) + self.c.setopt(pycurl.SSL_VERIFYPEER, 0) + self.c.setopt(pycurl.LOW_SPEED_TIME, 30) + self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) + + #self.c.setopt(pycurl.VERBOSE, 1) + + self.c.setopt(pycurl.USERAGENT, + "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") + if pycurl.version_info()[7]: + self.c.setopt(pycurl.ENCODING, "gzip, deflate") + self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", + "Accept-Language: en-US, en", + "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7", + "Connection: keep-alive", + "Keep-Alive: 300", + "Expect:"]) + + def setInterface(self, options): + + interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] + + if interface and interface.lower() != "none": + self.c.setopt(pycurl.INTERFACE, str(interface)) + + if proxy: + if proxy["type"] == "socks4": + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) + elif proxy["type"] == "socks5": + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) + else: + self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) + + self.c.setopt(pycurl.PROXY, str(proxy["address"])) + self.c.setopt(pycurl.PROXYPORT, proxy["port"]) + + if proxy["username"]: + self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) + + if ipv6: + self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) + else: + self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + + if "auth" in options: + self.c.setopt(pycurl.USERPWD, str(options["auth"])) + + if "timeout" in options: + self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) + + + def addCookies(self): + """ put cookies from curl handle to cj """ + if self.cj: + self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) + + def getCookies(self): + """ add cookies from cj to curl handle """ + if self.cj: + for c in self.cj.getCookies(): + self.c.setopt(pycurl.COOKIELIST, c) + return + + def clearCookies(self): + self.c.setopt(pycurl.COOKIELIST, "") + + def setRequestContext(self, url, get, post, referer, cookies, multipart=False): + """ sets everything needed for the request """ + + url = myquote(url) + + if get: + get = urlencode(get) + url = "%s?%s" % (url, get) + + self.c.setopt(pycurl.URL, url) + self.c.lastUrl = url + + if post: + self.c.setopt(pycurl.POST, 1) + if not multipart: + if type(post) == unicode: + post = str(post) #unicode not allowed + elif type(post) == str: + pass + else: + post = myurlencode(post) + + self.c.setopt(pycurl.POSTFIELDS, post) + else: + post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] + self.c.setopt(pycurl.HTTPPOST, post) + else: + self.c.setopt(pycurl.POST, 0) + + if referer and self.lastURL: + self.c.setopt(pycurl.REFERER, str(self.lastURL)) + + if cookies: + self.c.setopt(pycurl.COOKIEFILE, "") + self.c.setopt(pycurl.COOKIEJAR, "") + self.getCookies() + + + def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): + """ load and returns a given page """ + + self.setRequestContext(url, get, post, referer, cookies, multipart) + + self.header = "" + + self.c.setopt(pycurl.HTTPHEADER, self.headers) + + if just_header: + self.c.setopt(pycurl.FOLLOWLOCATION, 0) + self.c.setopt(pycurl.NOBODY, 1) + if post: + self.c.setopt(pycurl.POST, 1) + else: + self.c.setopt(pycurl.HTTPGET, 1) + self.c.perform() + rep = self.header + + self.c.setopt(pycurl.FOLLOWLOCATION, 1) + self.c.setopt(pycurl.NOBODY, 0) + + else: + self.c.perform() + rep = self.getResponse() + + self.c.setopt(pycurl.POSTFIELDS, "") + self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) + self.code = self.verifyHeader() + + self.addCookies() + + if decode: + rep = self.decodeResponse(rep) + + return rep + + def verifyHeader(self): + """ raise an exceptions on bad headers """ + code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) + if code in bad_headers: + #404 will NOT raise an exception + raise BadHeader(code, self.getResponse()) + return code + + def checkHeader(self): + """ check if header indicates failure""" + return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers + + def getResponse(self): + """ retrieve response from string io """ + if self.rep is None: return "" + value = self.rep.getvalue() + self.rep.close() + self.rep = StringIO() + return value + + def decodeResponse(self, rep): + """ decode with correct encoding, relies on header """ + header = self.header.splitlines() + encoding = "utf8" # default encoding + + for line in header: + line = line.lower().replace(" ", "") + if not line.startswith("content-type:") or\ + ("text" not in line and "application" not in line): + continue + + none, delemiter, charset = line.rpartition("charset=") + if delemiter: + charset = charset.split(";") + if charset: + encoding = charset[0] + + try: + #self.log.debug("Decoded %s" % encoding ) + if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): + encoding = 'utf-8-sig' + + decoder = getincrementaldecoder(encoding)("replace") + rep = decoder.decode(rep, True) + + #TODO: html_unescape as default + + except LookupError: + self.log.debug("No Decoder foung for %s" % encoding) + except Exception: + self.log.debug("Error when decoding string from %s." % encoding) + + return rep + + def write(self, buf): + """ writes response """ + if self.rep.tell() > 1000000 or self.abort: + rep = self.getResponse() + if self.abort: raise Abort() + f = open("response.dump", "wb") + f.write(rep) + f.close() + raise Exception("Loaded Url exceeded limit") + + self.rep.write(buf) + + def writeHeader(self, buf): + """ writes header """ + self.header += buf + + def putHeader(self, name, value): + self.headers.append("%s: %s" % (name, value)) + + def clearHeaders(self): + self.headers = [] + + def close(self): + """ cleanup, unusable after this """ + self.rep.close() + if hasattr(self, "cj"): + del self.cj + if hasattr(self, "c"): + self.c.close() + del self.c diff --git a/pyload/network/RequestFactory.py b/pyload/network/RequestFactory.py new file mode 100644 index 000000000..6811b11d8 --- /dev/null +++ b/pyload/network/RequestFactory.py @@ -0,0 +1,126 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay, RaNaN +""" + +from threading import Lock + +from Browser import Browser +from Bucket import Bucket +from HTTPRequest import HTTPRequest +from CookieJar import CookieJar + +from XDCCRequest import XDCCRequest + +class RequestFactory: + def __init__(self, core): + self.lock = Lock() + self.core = core + self.bucket = Bucket() + self.updateBucket() + self.cookiejars = {} + + def iface(self): + return self.core.config["download"]["interface"] + + def getRequest(self, pluginName, account=None, type="HTTP"): + self.lock.acquire() + + if type == "XDCC": + return XDCCRequest(proxies=self.getProxies()) + + req = Browser(self.bucket, self.getOptions()) + + if account: + cj = self.getCookieJar(pluginName, account) + req.setCookieJar(cj) + else: + req.setCookieJar(CookieJar(pluginName)) + + self.lock.release() + return req + + def getHTTPRequest(self, **kwargs): + """ returns a http request, dont forget to close it ! """ + options = self.getOptions() + options.update(kwargs) # submit kwargs as additional options + return HTTPRequest(CookieJar(None), options) + + def getURL(self, *args, **kwargs): + """ see HTTPRequest for argument list """ + h = HTTPRequest(None, self.getOptions()) + try: + rep = h.load(*args, **kwargs) + finally: + h.close() + + return rep + + def getCookieJar(self, pluginName, account=None): + if (pluginName, account) in self.cookiejars: + return self.cookiejars[(pluginName, account)] + + cj = CookieJar(pluginName, account) + self.cookiejars[(pluginName, account)] = cj + return cj + + def getProxies(self): + """ returns a proxy list for the request classes """ + if not self.core.config["proxy"]["proxy"]: + return {} + else: + type = "http" + setting = self.core.config["proxy"]["type"].lower() + if setting == "socks4": type = "socks4" + elif setting == "socks5": type = "socks5" + + username = None + if self.core.config["proxy"]["username"] and self.core.config["proxy"]["username"].lower() != "none": + username = self.core.config["proxy"]["username"] + + pw = None + if self.core.config["proxy"]["password"] and self.core.config["proxy"]["password"].lower() != "none": + pw = self.core.config["proxy"]["password"] + + return { + "type": type, + "address": self.core.config["proxy"]["address"], + "port": self.core.config["proxy"]["port"], + "username": username, + "password": pw, + } + + def getOptions(self): + """returns options needed for pycurl""" + return {"interface": self.iface(), + "proxies": self.getProxies(), + "ipv6": self.core.config["download"]["ipv6"]} + + def updateBucket(self): + """ set values in the bucket according to settings""" + if not self.core.config["download"]["limit_speed"]: + self.bucket.setRate(-1) + else: + self.bucket.setRate(self.core.config["download"]["max_speed"] * 1024) + +# needs pyreq in global namespace +def getURL(*args, **kwargs): + return pyreq.getURL(*args, **kwargs) + + +def getRequest(*args, **kwargs): + return pyreq.getHTTPRequest() diff --git a/pyload/network/XDCCRequest.py b/pyload/network/XDCCRequest.py new file mode 100644 index 000000000..9ae52f72b --- /dev/null +++ b/pyload/network/XDCCRequest.py @@ -0,0 +1,159 @@ +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: jeix +""" + +import socket +import re + +from os import remove +from os.path import exists + +from time import time + +import struct +from select import select + +from pyload.plugins.Plugin import Abort + + +class XDCCRequest: + def __init__(self, timeout=30, proxies={}): + + self.proxies = proxies + self.timeout = timeout + + self.filesize = 0 + self.recv = 0 + self.speed = 0 + + self.abort = False + + def createSocket(self): + # proxytype = None + # proxy = None + # if self.proxies.has_key("socks5"): + # proxytype = socks.PROXY_TYPE_SOCKS5 + # proxy = self.proxies["socks5"] + # elif self.proxies.has_key("socks4"): + # proxytype = socks.PROXY_TYPE_SOCKS4 + # proxy = self.proxies["socks4"] + # if proxytype: + # sock = socks.socksocket() + # t = _parse_proxy(proxy) + # sock.setproxy(proxytype, addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2]) + # else: + # sock = socket.socket() + # return sock + + return socket.socket() + + def download(self, ip, port, filename, irc, progressNotify=None): + + ircbuffer = "" + lastUpdate = time() + cumRecvLen = 0 + + dccsock = self.createSocket() + + dccsock.settimeout(self.timeout) + dccsock.connect((ip, port)) + + if exists(filename): + i = 0 + nameParts = filename.rpartition(".") + while True: + newfilename = "%s-%d%s%s" % (nameParts[0], i, nameParts[1], nameParts[2]) + i += 1 + + if not exists(newfilename): + filename = newfilename + break + + fh = open(filename, "wb") + + # recv loop for dcc socket + while True: + if self.abort: + dccsock.close() + fh.close() + remove(filename) + raise Abort() + + self._keepAlive(irc, ircbuffer) + + data = dccsock.recv(4096) + dataLen = len(data) + self.recv += dataLen + + cumRecvLen += dataLen + + now = time() + timespan = now - lastUpdate + if timespan > 1: + self.speed = cumRecvLen / timespan + cumRecvLen = 0 + lastUpdate = now + + if progressNotify: + progressNotify(self.percent) + + if not data: + break + + fh.write(data) + + # acknowledge data by sending number of recceived bytes + dccsock.send(struct.pack('!I', self.recv)) + + dccsock.close() + fh.close() + + return filename + + def _keepAlive(self, sock, readbuffer): + fdset = select([sock], [], [], 0) + if sock not in fdset[0]: + return + + readbuffer += sock.recv(1024) + temp = readbuffer.split("\n") + readbuffer = temp.pop() + + for line in temp: + line = line.rstrip() + first = line.split() + if first[0] == "PING": + sock.send("PONG %s\r\n" % first[1]) + + def abortDownloads(self): + self.abort = True + + @property + def size(self): + return self.filesize + + @property + def arrived(self): + return self.recv + + @property + def percent(self): + if not self.filesize: return 0 + return (self.recv * 100) / self.filesize + + def close(self): + pass diff --git a/pyload/network/__init__.py b/pyload/network/__init__.py new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pyload/network/__init__.py @@ -0,0 +1 @@ + |