diff options
author | Walter Purcaro <vuolter@gmail.com> | 2014-09-08 00:29:57 +0200 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2014-09-14 11:02:23 +0200 |
commit | 68d662e689cd42687341c550fb6ebb74e6968d21 (patch) | |
tree | 486cef41bd928b8db704894233b2cef94a6e346f /module/network | |
parent | save_join -> safe_join & save_path -> safe_filename (diff) | |
download | pyload-68d662e689cd42687341c550fb6ebb74e6968d21.tar.xz |
module -> pyload
Diffstat (limited to 'module/network')
-rw-r--r-- | module/network/Browser.py | 132 | ||||
-rw-r--r-- | module/network/Bucket.py | 59 | ||||
-rw-r--r-- | module/network/CookieJar.py | 50 | ||||
-rw-r--r-- | module/network/HTTPChunk.py | 292 | ||||
-rw-r--r-- | module/network/HTTPDownload.py | 325 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 303 | ||||
-rw-r--r-- | module/network/RequestFactory.py | 126 | ||||
-rw-r--r-- | module/network/XDCCRequest.py | 159 | ||||
-rw-r--r-- | module/network/__init__.py | 1 |
9 files changed, 0 insertions, 1447 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py deleted file mode 100644 index e78d24688..000000000 --- a/module/network/Browser.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- - -from logging import getLogger - -from HTTPRequest import HTTPRequest -from HTTPDownload import HTTPDownload - - -class Browser(object): - __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") - - def __init__(self, bucket=None, options={}): - self.log = getLogger("log") - - self.options = options #holds pycurl options - self.bucket = bucket - - self.cj = None # needs to be setted later - self._size = 0 - - self.renewHTTPRequest() - self.dl = None - - - def renewHTTPRequest(self): - if hasattr(self, "http"): self.http.close() - self.http = HTTPRequest(self.cj, self.options) - - def setLastURL(self, val): - self.http.lastURL = val - - # tunnel some attributes from HTTP Request to Browser - lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) - lastURL = property(lambda self: self.http.lastURL, setLastURL) - code = property(lambda self: self.http.code) - cookieJar = property(lambda self: self.cj) - - def setCookieJar(self, cj): - self.cj = cj - self.http.cj = cj - - @property - def speed(self): - if self.dl: - return self.dl.speed - return 0 - - @property - def size(self): - if self._size: - return self._size - if self.dl: - return self.dl.size - return 0 - - @property - def arrived(self): - if self.dl: - return self.dl.arrived - return 0 - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - def clearCookies(self): - if self.cj: - self.cj.clear() - self.http.clearCookies() - - def clearReferer(self): - self.http.lastURL = None - - def abortDownloads(self): - self.http.abort = True - if self.dl: - self._size = self.dl.size - self.dl.abort = True - - def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False, - progressNotify=None, disposition=False): - """ this can also download ftp """ - self._size = 0 - self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, - self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) - name = self.dl.download(chunks, resume) - self._size = self.dl.size - - self.dl = None - - return name - - def load(self, *args, **kwargs): - """ retrieves page """ - return self.http.load(*args, **kwargs) - - def putHeader(self, name, value): - """ add a header to the request """ - self.http.putHeader(name, value) - - def addAuth(self, pwd): - """Adds user and pw for http auth - - :param pwd: string, user:password - """ - self.options["auth"] = pwd - self.renewHTTPRequest() #we need a new request - - def removeAuth(self): - if "auth" in self.options: del self.options["auth"] - self.renewHTTPRequest() - - def setOption(self, name, value): - """Adds an option to the request, see HTTPRequest for existing ones""" - self.options[name] = value - - def deleteOption(self, name): - if name in self.options: del self.options[name] - - def clearHeaders(self): - self.http.clearHeaders() - - def close(self): - """ cleanup """ - if hasattr(self, "http"): - self.http.close() - del self.http - if hasattr(self, "dl"): - del self.dl - if hasattr(self, "cj"): - del self.cj diff --git a/module/network/Bucket.py b/module/network/Bucket.py deleted file mode 100644 index a096d644a..000000000 --- a/module/network/Bucket.py +++ /dev/null @@ -1,59 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -from time import time -from threading import Lock - -class Bucket: - def __init__(self): - self.rate = 0 - self.tokens = 0 - self.timestamp = time() - self.lock = Lock() - - def __nonzero__(self): - return False if self.rate < 10240 else True - - def setRate(self, rate): - self.lock.acquire() - self.rate = int(rate) - self.lock.release() - - def consumed(self, amount): - """ return time the process have to sleep, after consumed specified amount """ - if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise - self.lock.acquire() - - self.calc_tokens() - self.tokens -= amount - - if self.tokens < 0: - time = -self.tokens/float(self.rate) - else: - time = 0 - - - self.lock.release() - return time - - def calc_tokens(self): - if self.tokens < self.rate: - now = time() - delta = self.rate * (now - self.timestamp) - self.tokens = min(self.rate, self.tokens + delta) - self.timestamp = now diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py deleted file mode 100644 index a6ae090bc..000000000 --- a/module/network/CookieJar.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay, RaNaN -""" - -from time import time - -class CookieJar: - def __init__(self, pluginname, account=None): - self.cookies = {} - self.plugin = pluginname - self.account = account - - def addCookies(self, clist): - for c in clist: - name = c.split("\t")[5] - self.cookies[name] = c - - def getCookies(self): - return self.cookies.values() - - def parseCookie(self, name): - if name in self.cookies: - return self.cookies[name].split("\t")[6] - else: - return None - - def getCookie(self, name): - return self.parseCookie(name) - - def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180): - s = ".%s TRUE %s FALSE %s %s %s" % (domain, path, exp, name, value) - self.cookies[name] = s - - def clear(self): - self.cookies = {} diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py deleted file mode 100644 index 719c3ed0b..000000000 --- a/module/network/HTTPChunk.py +++ /dev/null @@ -1,292 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" -from os import remove, stat, fsync -from os.path import exists -from time import sleep -from re import search -from module.utils import fs_encode -import codecs -import pycurl - -from HTTPRequest import HTTPRequest - -class WrongFormat(Exception): - pass - - -class ChunkInfo: - def __init__(self, name): - self.name = unicode(name) - self.size = 0 - self.resume = False - self.chunks = [] - - def __repr__(self): - ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) - for i, c in enumerate(self.chunks): - ret += "%s# %s\n" % (i, c[1]) - - return ret - - def setSize(self, size): - self.size = int(size) - - def addChunk(self, name, range): - self.chunks.append((name, range)) - - def clear(self): - self.chunks = [] - - def createChunks(self, chunks): - self.clear() - chunk_size = self.size / chunks - - current = 0 - for i in range(chunks): - end = self.size - 1 if (i == chunks - 1) else current + chunk_size - self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) - current += chunk_size + 1 - - - def save(self): - fs_name = fs_encode("%s.chunks" % self.name) - fh = codecs.open(fs_name, "w", "utf_8") - fh.write("name:%s\n" % self.name) - fh.write("size:%s\n" % self.size) - for i, c in enumerate(self.chunks): - fh.write("#%d:\n" % i) - fh.write("\tname:%s\n" % c[0]) - fh.write("\trange:%i-%i\n" % c[1]) - fh.close() - - @staticmethod - def load(name): - fs_name = fs_encode("%s.chunks" % name) - if not exists(fs_name): - raise IOError() - fh = codecs.open(fs_name, "r", "utf_8") - name = fh.readline()[:-1] - size = fh.readline()[:-1] - if name.startswith("name:") and size.startswith("size:"): - name = name[5:] - size = size[5:] - else: - fh.close() - raise WrongFormat() - ci = ChunkInfo(name) - ci.loaded = True - ci.setSize(size) - while True: - if not fh.readline(): #skip line - break - name = fh.readline()[1:-1] - range = fh.readline()[1:-1] - if name.startswith("name:") and range.startswith("range:"): - name = name[5:] - range = range[6:].split("-") - else: - raise WrongFormat() - - ci.addChunk(name, (long(range[0]), long(range[1]))) - fh.close() - return ci - - def remove(self): - fs_name = fs_encode("%s.chunks" % self.name) - if exists(fs_name): remove(fs_name) - - def getCount(self): - return len(self.chunks) - - def getChunkName(self, index): - return self.chunks[index][0] - - def getChunkRange(self, index): - return self.chunks[index][1] - - -class HTTPChunk(HTTPRequest): - def __init__(self, id, parent, range=None, resume=False): - self.id = id - self.p = parent # HTTPDownload instance - self.range = range # tuple (start, end) - self.resume = resume - self.log = parent.log - - self.size = range[1] - range[0] if range else -1 - self.arrived = 0 - self.lastURL = self.p.referer - - self.c = pycurl.Curl() - - self.header = "" - self.headerParsed = False #indicates if the header has been processed - - self.fp = None #file handle - - self.initHandle() - self.setInterface(self.p.options) - - self.BOMChecked = False # check and remove byte order mark - - self.rep = None - - self.sleep = 0.000 - self.lastSize = 0 - - def __repr__(self): - return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) - - @property - def cj(self): - return self.p.cj - - def getHandle(self): - """ returns a Curl handle ready to use for perform/multiperform """ - - self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) - self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - # request all bytes, since some servers in russia seems to have a defect arihmetic unit - - fs_name = fs_encode(self.p.info.getChunkName(self.id)) - if self.resume: - self.fp = open(fs_name, "ab") - self.arrived = self.fp.tell() - if not self.arrived: - self.arrived = stat(fs_name).st_size - - if self.range: - #do nothing if chunk already finished - if self.arrived + self.range[0] >= self.range[1]: return None - - if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything - range = "%i-" % (self.arrived + self.range[0]) - else: - range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked resume with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - else: - self.log.debug("Resume File from %i" % self.arrived) - self.c.setopt(pycurl.RESUME_FROM, self.arrived) - - else: - if self.range: - if self.id == len(self.p.info.chunks) - 1: # see above - range = "%i-" % self.range[0] - else: - range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - - self.fp = open(fs_name, "wb") - - return self.c - - def writeHeader(self, buf): - self.header += buf - #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers - # as first chunk, we will parse the headers - if not self.range and self.header.endswith("\r\n\r\n"): - self.parseHeader() - elif not self.range and buf.startswith("150") and "data connection" in buf.lower(): #: ftp file size parsing - size = search(r"(\d+) bytes", buf) - if size: - self.p.size = int(size.group(1)) - self.p.chunkSupport = True - - self.headerParsed = True - - def writeBody(self, buf): - #ignore BOM, it confuses unrar - if not self.BOMChecked: - if [ord(b) for b in buf[:3]] == [239, 187, 191]: - buf = buf[3:] - self.BOMChecked = True - - size = len(buf) - - self.arrived += size - - self.fp.write(buf) - - if self.p.bucket: - sleep(self.p.bucket.consumed(size)) - else: - # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller - # otherwise reduce sleep time percentual (values are based on tests) - # So in general cpu time is saved without reducing bandwith too much - - if size < self.lastSize: - self.sleep += 0.002 - else: - self.sleep *= 0.7 - - self.lastSize = size - - sleep(self.sleep) - - if self.range and self.arrived > self.size: - return 0 #close if we have enough data - - - def parseHeader(self): - """parse data from recieved header""" - for orgline in self.decodeResponse(self.header).splitlines(): - line = orgline.strip().lower() - if line.startswith("accept-ranges") and "bytes" in line: - self.p.chunkSupport = True - - if line.startswith("content-disposition") and "filename=" in line: - name = orgline.partition("filename=")[2] - name = name.replace('"', "").replace("'", "").replace(";", "").strip() - self.p.nameDisposition = name - self.log.debug("Content-Disposition: %s" % name) - - if not self.resume and line.startswith("content-length"): - self.p.size = int(line.split(":")[1]) - - self.headerParsed = True - - def stop(self): - """The download will not proceed after next call of writeBody""" - self.range = [0, 0] - self.size = 0 - - def resetRange(self): - """ Reset the range, so the download will load all data available """ - self.range = None - - def setRange(self, range): - self.range = range - self.size = range[1] - range[0] - - def flushFile(self): - """ flush and close file """ - self.fp.flush() - fsync(self.fp.fileno()) #make sure everything was written to disk - self.fp.close() #needs to be closed, or merging chunks will fail - - def close(self): - """ closes everything, unusable after this """ - if self.fp: self.fp.close() - self.c.close() - if hasattr(self, "p"): del self.p diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py deleted file mode 100644 index 20ac00e48..000000000 --- a/module/network/HTTPDownload.py +++ /dev/null @@ -1,325 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -from os import remove, fsync -from os.path import dirname -from time import sleep, time -from shutil import move -from logging import getLogger - -import pycurl - -from HTTPChunk import ChunkInfo, HTTPChunk -from HTTPRequest import BadHeader - -from module.plugins.Plugin import Abort -from module.utils import safe_join, fs_encode - -class HTTPDownload: - """ loads a url http + ftp """ - - def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, - options={}, progressNotify=None, disposition=False): - self.url = url - self.filename = filename #complete file destination, not only name - self.get = get - self.post = post - self.referer = referer - self.cj = cj #cookiejar if cookies are needed - self.bucket = bucket - self.options = options - self.disposition = disposition - # all arguments - - self.abort = False - self.size = 0 - self.nameDisposition = None #will be parsed from content disposition - - self.chunks = [] - - self.log = getLogger("log") - - try: - self.info = ChunkInfo.load(filename) - self.info.resume = True #resume is only possible with valid info file - self.size = self.info.size - self.infoSaved = True - except IOError: - self.info = ChunkInfo(filename) - - self.chunkSupport = None - self.m = pycurl.CurlMulti() - - #needed for speed calculation - self.lastArrived = [] - self.speeds = [] - self.lastSpeeds = [0, 0] - - self.progressNotify = progressNotify - - @property - def speed(self): - last = [sum(x) for x in self.lastSpeeds if x] - return (sum(self.speeds) + sum(last)) / (1 + len(last)) - - @property - def arrived(self): - return sum([c.arrived for c in self.chunks]) - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - def _copyChunks(self): - init = fs_encode(self.info.getChunkName(0)) #initial chunk name - - if self.info.getCount() > 1: - fo = open(init, "rb+") #first chunkfile - for i in range(1, self.info.getCount()): - #input file - fo.seek( - self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks - fname = fs_encode("%s.chunk%d" % (self.filename, i)) - fi = open(fname, "rb") - buf = 32 * 1024 - while True: #copy in chunks, consumes less memory - data = fi.read(buf) - if not data: - break - fo.write(data) - fi.close() - if fo.tell() < self.info.getChunkRange(i)[1]: - fo.close() - remove(init) - self.info.remove() #there are probably invalid chunks - raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") - remove(fname) #remove chunk - fo.close() - - if self.nameDisposition and self.disposition: - self.filename = safe_join(dirname(self.filename), self.nameDisposition) - - move(init, fs_encode(self.filename)) - self.info.remove() #remove info file - - def download(self, chunks=1, resume=False): - """ returns new filename or None """ - - chunks = max(1, chunks) - resume = self.info.resume and resume - - try: - self._download(chunks, resume) - except pycurl.error, e: - #code 33 - no resume - code = e.args[0] - if code == 33: - # try again without resume - self.log.debug("Errno 33 -> Restart without resume") - - #remove old handles - for chunk in self.chunks: - self.closeChunk(chunk) - - return self._download(chunks, False) - else: - raise - finally: - self.close() - - if self.nameDisposition and self.disposition: return self.nameDisposition - return None - - def _download(self, chunks, resume): - if not resume: - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry - - self.chunks = [] - - init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) - - self.chunks.append(init) - self.m.add_handle(init.getHandle()) - - lastFinishCheck = 0 - lastTimeCheck = 0 - chunksDone = set() # list of curl handles that are finished - chunksCreated = False - done = False - if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can - self.chunkSupport = True - - while 1: - #need to create chunks - if not chunksCreated and self.chunkSupport and self.size: #will be setted later by first chunk - - if not resume: - self.info.setSize(self.size) - self.info.createChunks(chunks) - self.info.save() - - chunks = self.info.getCount() - - init.setRange(self.info.getChunkRange(0)) - - for i in range(1, chunks): - c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) - - handle = c.getHandle() - if handle: - self.chunks.append(c) - self.m.add_handle(handle) - else: - #close immediatly - self.log.debug("Invalid curl handle -> closed") - c.close() - - chunksCreated = True - - while 1: - ret, num_handles = self.m.perform() - if ret != pycurl.E_CALL_MULTI_PERFORM: - break - - t = time() - - # reduce these calls - while lastFinishCheck + 0.5 < t: - # list of failed curl handles - failed = [] - ex = None # save only last exception, we can only raise one anyway - - num_q, ok_list, err_list = self.m.info_read() - for c in ok_list: - chunk = self.findChunk(c) - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(c) - - for c in err_list: - curl, errno, msg = c - chunk = self.findChunk(curl) - #test if chunk was finished - if errno != 23 or "0 !=" not in msg: - failed.append(chunk) - ex = pycurl.error(errno, msg) - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) - continue - - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(curl) - if not num_q: # no more infos to get - - # check if init is not finished so we reset download connections - # note that other chunks are closed and downloaded with init too - if failed and init not in failed and init.c not in chunksDone: - self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) - - #list of chunks to clean and remove - to_clean = filter(lambda x: x is not init, self.chunks) - for chunk in to_clean: - self.closeChunk(chunk) - self.chunks.remove(chunk) - remove(fs_encode(self.info.getChunkName(chunk.id))) - - #let first chunk load the rest and update the info file - init.resetRange() - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) - self.info.save() - elif failed: - raise ex - - lastFinishCheck = t - - if len(chunksDone) >= len(self.chunks): - if len(chunksDone) > len(self.chunks): - self.log.warning("Finished download chunks size incorrect, please report bug.") - done = True #all chunks loaded - - break - - if done: - break #all chunks loaded - - # calc speed once per second, averaging over 3 seconds - if lastTimeCheck + 1 < t: - diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in - enumerate(self.chunks)] - - self.lastSpeeds[1] = self.lastSpeeds[0] - self.lastSpeeds[0] = self.speeds - self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] - self.lastArrived = [c.arrived for c in self.chunks] - lastTimeCheck = t - self.updateProgress() - - if self.abort: - raise Abort() - - #sleep(0.003) #supress busy waiting - limits dl speed to (1 / x) * buffersize - self.m.select(1) - - for chunk in self.chunks: - chunk.flushFile() #make sure downloads are written to disk - - self._copyChunks() - - def updateProgress(self): - if self.progressNotify: - self.progressNotify(self.percent) - - def findChunk(self, handle): - """ linear search to find a chunk (should be ok since chunk size is usually low) """ - for chunk in self.chunks: - if chunk.c == handle: return chunk - - def closeChunk(self, chunk): - try: - self.m.remove_handle(chunk.c) - except pycurl.error, e: - self.log.debug("Error removing chunk: %s" % str(e)) - finally: - chunk.close() - - def close(self): - """ cleanup """ - for chunk in self.chunks: - self.closeChunk(chunk) - - self.chunks = [] - if hasattr(self, "m"): - self.m.close() - del self.m - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "info"): - del self.info diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py deleted file mode 100644 index 67635f944..000000000 --- a/module/network/HTTPRequest.py +++ /dev/null @@ -1,303 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -import pycurl - -from codecs import getincrementaldecoder, lookup, BOM_UTF8 -from urllib import quote, urlencode -from httplib import responses -from logging import getLogger -from cStringIO import StringIO - -from module.plugins.Plugin import Abort - -def myquote(url): - return quote(url.encode('utf_8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") - -def myurlencode(data): - data = dict(data) - return urlencode(dict((x.encode('utf_8') if isinstance(x, unicode) else x, \ - y.encode('utf_8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) - -bad_headers = range(400, 404) + range(405, 418) + range(500, 506) - -class BadHeader(Exception): - def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) - self.code = code - self.content = content - - -class HTTPRequest: - def __init__(self, cookies=None, options=None): - self.c = pycurl.Curl() - self.rep = StringIO() - - self.cj = cookies #cookiejar - - self.lastURL = None - self.lastEffectiveURL = None - self.abort = False - self.code = 0 # last http code - - self.header = "" - - self.headers = [] #temporary request header - - self.initHandle() - self.setInterface(options) - - self.c.setopt(pycurl.WRITEFUNCTION, self.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - self.log = getLogger("log") - - - def initHandle(self): - """ sets common options to curl handle """ - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.MAXREDIRS, 5) - self.c.setopt(pycurl.CONNECTTIMEOUT, 30) - self.c.setopt(pycurl.NOSIGNAL, 1) - self.c.setopt(pycurl.NOPROGRESS, 1) - if hasattr(pycurl, "AUTOREFERER"): - self.c.setopt(pycurl.AUTOREFERER, 1) - self.c.setopt(pycurl.SSL_VERIFYPEER, 0) - self.c.setopt(pycurl.LOW_SPEED_TIME, 30) - self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) - - #self.c.setopt(pycurl.VERBOSE, 1) - - self.c.setopt(pycurl.USERAGENT, - "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") - if pycurl.version_info()[7]: - self.c.setopt(pycurl.ENCODING, "gzip, deflate") - self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US, en", - "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300", - "Expect:"]) - - def setInterface(self, options): - - interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - - if interface and interface.lower() != "none": - self.c.setopt(pycurl.INTERFACE, str(interface)) - - if proxy: - if proxy["type"] == "socks4": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) - elif proxy["type"] == "socks5": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) - else: - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - - self.c.setopt(pycurl.PROXY, str(proxy["address"])) - self.c.setopt(pycurl.PROXYPORT, proxy["port"]) - - if proxy["username"]: - self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) - - if ipv6: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) - else: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) - - if "auth" in options: - self.c.setopt(pycurl.USERPWD, str(options["auth"])) - - if "timeout" in options: - self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) - - - def addCookies(self): - """ put cookies from curl handle to cj """ - if self.cj: - self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) - - def getCookies(self): - """ add cookies from cj to curl handle """ - if self.cj: - for c in self.cj.getCookies(): - self.c.setopt(pycurl.COOKIELIST, c) - return - - def clearCookies(self): - self.c.setopt(pycurl.COOKIELIST, "") - - def setRequestContext(self, url, get, post, referer, cookies, multipart=False): - """ sets everything needed for the request """ - - url = myquote(url) - - if get: - get = urlencode(get) - url = "%s?%s" % (url, get) - - self.c.setopt(pycurl.URL, url) - self.c.lastUrl = url - - if post: - self.c.setopt(pycurl.POST, 1) - if not multipart: - if type(post) == unicode: - post = str(post) #unicode not allowed - elif type(post) == str: - pass - else: - post = myurlencode(post) - - self.c.setopt(pycurl.POSTFIELDS, post) - else: - post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] - self.c.setopt(pycurl.HTTPPOST, post) - else: - self.c.setopt(pycurl.POST, 0) - - if referer and self.lastURL: - self.c.setopt(pycurl.REFERER, str(self.lastURL)) - - if cookies: - self.c.setopt(pycurl.COOKIEFILE, "") - self.c.setopt(pycurl.COOKIEJAR, "") - self.getCookies() - - - def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): - """ load and returns a given page """ - - self.setRequestContext(url, get, post, referer, cookies, multipart) - - self.header = "" - - self.c.setopt(pycurl.HTTPHEADER, self.headers) - - if just_header: - self.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.c.setopt(pycurl.NOBODY, 1) - if post: - self.c.setopt(pycurl.POST, 1) - else: - self.c.setopt(pycurl.HTTPGET, 1) - self.c.perform() - rep = self.header - - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.NOBODY, 0) - - else: - self.c.perform() - rep = self.getResponse() - - self.c.setopt(pycurl.POSTFIELDS, "") - self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) - self.code = self.verifyHeader() - - self.addCookies() - - if decode: - rep = self.decodeResponse(rep) - - return rep - - def verifyHeader(self): - """ raise an exceptions on bad headers """ - code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in bad_headers: - #404 will NOT raise an exception - raise BadHeader(code, self.getResponse()) - return code - - def checkHeader(self): - """ check if header indicates failure""" - return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers - - def getResponse(self): - """ retrieve response from string io """ - if self.rep is None: return "" - value = self.rep.getvalue() - self.rep.close() - self.rep = StringIO() - return value - - def decodeResponse(self, rep): - """ decode with correct encoding, relies on header """ - header = self.header.splitlines() - encoding = "utf8" # default encoding - - for line in header: - line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or\ - ("text" not in line and "application" not in line): - continue - - none, delemiter, charset = line.rpartition("charset=") - if delemiter: - charset = charset.split(";") - if charset: - encoding = charset[0] - - try: - #self.log.debug("Decoded %s" % encoding ) - if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): - encoding = 'utf-8-sig' - - decoder = getincrementaldecoder(encoding)("replace") - rep = decoder.decode(rep, True) - - #TODO: html_unescape as default - - except LookupError: - self.log.debug("No Decoder foung for %s" % encoding) - except Exception: - self.log.debug("Error when decoding string from %s." % encoding) - - return rep - - def write(self, buf): - """ writes response """ - if self.rep.tell() > 1000000 or self.abort: - rep = self.getResponse() - if self.abort: raise Abort() - f = open("response.dump", "wb") - f.write(rep) - f.close() - raise Exception("Loaded Url exceeded limit") - - self.rep.write(buf) - - def writeHeader(self, buf): - """ writes header """ - self.header += buf - - def putHeader(self, name, value): - self.headers.append("%s: %s" % (name, value)) - - def clearHeaders(self): - self.headers = [] - - def close(self): - """ cleanup, unusable after this """ - self.rep.close() - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "c"): - self.c.close() - del self.c diff --git a/module/network/RequestFactory.py b/module/network/RequestFactory.py deleted file mode 100644 index 6811b11d8..000000000 --- a/module/network/RequestFactory.py +++ /dev/null @@ -1,126 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay, RaNaN -""" - -from threading import Lock - -from Browser import Browser -from Bucket import Bucket -from HTTPRequest import HTTPRequest -from CookieJar import CookieJar - -from XDCCRequest import XDCCRequest - -class RequestFactory: - def __init__(self, core): - self.lock = Lock() - self.core = core - self.bucket = Bucket() - self.updateBucket() - self.cookiejars = {} - - def iface(self): - return self.core.config["download"]["interface"] - - def getRequest(self, pluginName, account=None, type="HTTP"): - self.lock.acquire() - - if type == "XDCC": - return XDCCRequest(proxies=self.getProxies()) - - req = Browser(self.bucket, self.getOptions()) - - if account: - cj = self.getCookieJar(pluginName, account) - req.setCookieJar(cj) - else: - req.setCookieJar(CookieJar(pluginName)) - - self.lock.release() - return req - - def getHTTPRequest(self, **kwargs): - """ returns a http request, dont forget to close it ! """ - options = self.getOptions() - options.update(kwargs) # submit kwargs as additional options - return HTTPRequest(CookieJar(None), options) - - def getURL(self, *args, **kwargs): - """ see HTTPRequest for argument list """ - h = HTTPRequest(None, self.getOptions()) - try: - rep = h.load(*args, **kwargs) - finally: - h.close() - - return rep - - def getCookieJar(self, pluginName, account=None): - if (pluginName, account) in self.cookiejars: - return self.cookiejars[(pluginName, account)] - - cj = CookieJar(pluginName, account) - self.cookiejars[(pluginName, account)] = cj - return cj - - def getProxies(self): - """ returns a proxy list for the request classes """ - if not self.core.config["proxy"]["proxy"]: - return {} - else: - type = "http" - setting = self.core.config["proxy"]["type"].lower() - if setting == "socks4": type = "socks4" - elif setting == "socks5": type = "socks5" - - username = None - if self.core.config["proxy"]["username"] and self.core.config["proxy"]["username"].lower() != "none": - username = self.core.config["proxy"]["username"] - - pw = None - if self.core.config["proxy"]["password"] and self.core.config["proxy"]["password"].lower() != "none": - pw = self.core.config["proxy"]["password"] - - return { - "type": type, - "address": self.core.config["proxy"]["address"], - "port": self.core.config["proxy"]["port"], - "username": username, - "password": pw, - } - - def getOptions(self): - """returns options needed for pycurl""" - return {"interface": self.iface(), - "proxies": self.getProxies(), - "ipv6": self.core.config["download"]["ipv6"]} - - def updateBucket(self): - """ set values in the bucket according to settings""" - if not self.core.config["download"]["limit_speed"]: - self.bucket.setRate(-1) - else: - self.bucket.setRate(self.core.config["download"]["max_speed"] * 1024) - -# needs pyreq in global namespace -def getURL(*args, **kwargs): - return pyreq.getURL(*args, **kwargs) - - -def getRequest(*args, **kwargs): - return pyreq.getHTTPRequest() diff --git a/module/network/XDCCRequest.py b/module/network/XDCCRequest.py deleted file mode 100644 index 74769da96..000000000 --- a/module/network/XDCCRequest.py +++ /dev/null @@ -1,159 +0,0 @@ -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: jeix -""" - -import socket -import re - -from os import remove -from os.path import exists - -from time import time - -import struct -from select import select - -from module.plugins.Plugin import Abort - - -class XDCCRequest: - def __init__(self, timeout=30, proxies={}): - - self.proxies = proxies - self.timeout = timeout - - self.filesize = 0 - self.recv = 0 - self.speed = 0 - - self.abort = False - - def createSocket(self): - # proxytype = None - # proxy = None - # if self.proxies.has_key("socks5"): - # proxytype = socks.PROXY_TYPE_SOCKS5 - # proxy = self.proxies["socks5"] - # elif self.proxies.has_key("socks4"): - # proxytype = socks.PROXY_TYPE_SOCKS4 - # proxy = self.proxies["socks4"] - # if proxytype: - # sock = socks.socksocket() - # t = _parse_proxy(proxy) - # sock.setproxy(proxytype, addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2]) - # else: - # sock = socket.socket() - # return sock - - return socket.socket() - - def download(self, ip, port, filename, irc, progressNotify=None): - - ircbuffer = "" - lastUpdate = time() - cumRecvLen = 0 - - dccsock = self.createSocket() - - dccsock.settimeout(self.timeout) - dccsock.connect((ip, port)) - - if exists(filename): - i = 0 - nameParts = filename.rpartition(".") - while True: - newfilename = "%s-%d%s%s" % (nameParts[0], i, nameParts[1], nameParts[2]) - i += 1 - - if not exists(newfilename): - filename = newfilename - break - - fh = open(filename, "wb") - - # recv loop for dcc socket - while True: - if self.abort: - dccsock.close() - fh.close() - remove(filename) - raise Abort() - - self._keepAlive(irc, ircbuffer) - - data = dccsock.recv(4096) - dataLen = len(data) - self.recv += dataLen - - cumRecvLen += dataLen - - now = time() - timespan = now - lastUpdate - if timespan > 1: - self.speed = cumRecvLen / timespan - cumRecvLen = 0 - lastUpdate = now - - if progressNotify: - progressNotify(self.percent) - - if not data: - break - - fh.write(data) - - # acknowledge data by sending number of recceived bytes - dccsock.send(struct.pack('!I', self.recv)) - - dccsock.close() - fh.close() - - return filename - - def _keepAlive(self, sock, readbuffer): - fdset = select([sock], [], [], 0) - if sock not in fdset[0]: - return - - readbuffer += sock.recv(1024) - temp = readbuffer.split("\n") - readbuffer = temp.pop() - - for line in temp: - line = line.rstrip() - first = line.split() - if first[0] == "PING": - sock.send("PONG %s\r\n" % first[1]) - - def abortDownloads(self): - self.abort = True - - @property - def size(self): - return self.filesize - - @property - def arrived(self): - return self.recv - - @property - def percent(self): - if not self.filesize: return 0 - return (self.recv * 100) / self.filesize - - def close(self): - pass diff --git a/module/network/__init__.py b/module/network/__init__.py deleted file mode 100644 index 8b1378917..000000000 --- a/module/network/__init__.py +++ /dev/null @@ -1 +0,0 @@ - |