diff options
Diffstat (limited to 'pyload/network')
-rw-r--r-- | pyload/network/Browser.py | 132 | ||||
-rw-r--r-- | pyload/network/Bucket.py | 52 | ||||
-rw-r--r-- | pyload/network/CookieJar.py | 42 | ||||
-rw-r--r-- | pyload/network/HTTPChunk.py | 294 | ||||
-rw-r--r-- | pyload/network/HTTPDownload.py | 312 | ||||
-rw-r--r-- | pyload/network/HTTPRequest.py | 302 | ||||
-rw-r--r-- | pyload/network/JsEngine.py | 256 | ||||
-rw-r--r-- | pyload/network/RequestFactory.py | 121 | ||||
-rw-r--r-- | pyload/network/XDCCRequest.py | 144 | ||||
-rw-r--r-- | pyload/network/__init__.py | 1 |
10 files changed, 0 insertions, 1656 deletions
diff --git a/pyload/network/Browser.py b/pyload/network/Browser.py deleted file mode 100644 index 89341ff25..000000000 --- a/pyload/network/Browser.py +++ /dev/null @@ -1,132 +0,0 @@ -# -*- coding: utf-8 -*- - -from logging import getLogger - -from pyload.network.HTTPRequest import HTTPRequest -from pyload.network.HTTPDownload import HTTPDownload - - -class Browser(object): - __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") - - def __init__(self, bucket=None, options={}): - self.log = getLogger("log") - - self.options = options #holds pycurl options - self.bucket = bucket - - self.cj = None # needs to be setted later - self._size = 0 - - self.renewHTTPRequest() - self.dl = None - - - def renewHTTPRequest(self): - if hasattr(self, "http"): self.http.close() - self.http = HTTPRequest(self.cj, self.options) - - def setLastURL(self, val): - self.http.lastURL = val - - # tunnel some attributes from HTTP Request to Browser - lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) - lastURL = property(lambda self: self.http.lastURL, setLastURL) - code = property(lambda self: self.http.code) - cookieJar = property(lambda self: self.cj) - - def setCookieJar(self, cj): - self.cj = cj - self.http.cj = cj - - @property - def speed(self): - if self.dl: - return self.dl.speed - return 0 - - @property - def size(self): - if self._size: - return self._size - if self.dl: - return self.dl.size - return 0 - - @property - def arrived(self): - if self.dl: - return self.dl.arrived - return 0 - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - def clearCookies(self): - if self.cj: - self.cj.clear() - self.http.clearCookies() - - def clearReferer(self): - self.http.lastURL = None - - def abortDownloads(self): - self.http.abort = True - if self.dl: - self._size = self.dl.size - self.dl.abort = True - - def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False, - progressNotify=None, disposition=False): - """ this can also download ftp """ - self._size = 0 - self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, - self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) - name = self.dl.download(chunks, resume) - self._size = self.dl.size - - self.dl = None - - return name - - def load(self, *args, **kwargs): - """ retrieves page """ - return self.http.load(*args, **kwargs) - - def putHeader(self, name, value): - """ add a header to the request """ - self.http.putHeader(name, value) - - def addAuth(self, pwd): - """Adds user and pw for http auth - - :param pwd: string, user:password - """ - self.options["auth"] = pwd - self.renewHTTPRequest() #we need a new request - - def removeAuth(self): - if "auth" in self.options: del self.options["auth"] - self.renewHTTPRequest() - - def setOption(self, name, value): - """Adds an option to the request, see HTTPRequest for existing ones""" - self.options[name] = value - - def deleteOption(self, name): - if name in self.options: del self.options[name] - - def clearHeaders(self): - self.http.clearHeaders() - - def close(self): - """ cleanup """ - if hasattr(self, "http"): - self.http.close() - del self.http - if hasattr(self, "dl"): - del self.dl - if hasattr(self, "cj"): - del self.cj diff --git a/pyload/network/Bucket.py b/pyload/network/Bucket.py deleted file mode 100644 index 408a1e240..000000000 --- a/pyload/network/Bucket.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -from time import time -from threading import Lock - -MIN_RATE = 10240 #: 10kb minimum rate - - -class Bucket(object): - - def __init__(self): - self.rate = 0 # bytes per second, maximum targeted throughput - self.tokens = 0 - self.timestamp = time() - self.lock = Lock() - - - def __nonzero__(self): - return False if self.rate < MIN_RATE else True - - - def setRate(self, rate): - self.lock.acquire() - self.rate = int(rate) - self.lock.release() - - - def consumed(self, amount): - """ return the time the process has to sleep, after it consumed a specified amount """ - if self.rate < MIN_RATE: - return 0 #: May become unresponsive otherwise - - self.lock.acquire() - self.calc_tokens() - self.tokens -= amount - - if self.tokens < 0: - time = -self.tokens/float(self.rate) - else: - time = 0 - - self.lock.release() - return time - - - def calc_tokens(self): - if self.tokens < self.rate: - now = time() - delta = self.rate * (now - self.timestamp) - self.tokens = min(self.rate, self.tokens + delta) - self.timestamp = now diff --git a/pyload/network/CookieJar.py b/pyload/network/CookieJar.py deleted file mode 100644 index a2b302776..000000000 --- a/pyload/network/CookieJar.py +++ /dev/null @@ -1,42 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -import Cookie - -from datetime import datetime, timedelta -from time import time - - -# monkey patch for 32 bit systems -def _getdate(future=0, weekdayname=Cookie._weekdayname, monthname=Cookie._monthname): - dt = datetime.now() + timedelta(seconds=int(future)) - return "%s, %02d %3s %4d %02d:%02d:%02d GMT" % \ - (weekdayname[dt.weekday()], dt.day, monthname[dt.month], dt.year, dt.hour, dt.minute, dt.second) - -Cookie._getdate = _getdate - - -class CookieJar(Cookie.SimpleCookie): - - def getCookie(self, name): - return self[name].value - - def setCookie(self, domain, name, value, path="/", exp=None, secure="FALSE"): - self[name] = value - self[name]["domain"] = domain - self[name]["path"] = path - - # Value of expires should be integer if possible - # otherwise the cookie won't be used - if not exp: - expires = time() + 3600 * 24 * 180 - else: - try: - expires = int(exp) - except ValueError: - expires = exp - - self[name]["expires"] = expires - - if secure == "TRUE": - self[name]["secure"] = secure diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py deleted file mode 100644 index 41752b940..000000000 --- a/pyload/network/HTTPChunk.py +++ /dev/null @@ -1,294 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -from os import remove, stat, fsync -from os.path import exists -from time import sleep -from re import search -from pyload.utils import fs_encode -import codecs -import pycurl -import urllib - -from pyload.network.HTTPRequest import HTTPRequest - -class WrongFormat(Exception): - pass - - -class ChunkInfo(object): - def __init__(self, name): - self.name = unicode(name) - self.size = 0 - self.resume = False - self.chunks = [] - - def __repr__(self): - ret = "ChunkInfo: %s, %s\n" % (self.name, self.size) - for i, c in enumerate(self.chunks): - ret += "%s# %s\n" % (i, c[1]) - - return ret - - def setSize(self, size): - self.size = int(size) - - def addChunk(self, name, range): - self.chunks.append((name, range)) - - def clear(self): - self.chunks = [] - - def createChunks(self, chunks): - self.clear() - chunk_size = self.size / chunks - - current = 0 - for i in range(chunks): - end = self.size - 1 if (i == chunks - 1) else current + chunk_size - self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) - current += chunk_size + 1 - - - def save(self): - fs_name = fs_encode("%s.chunks" % self.name) - fh = codecs.open(fs_name, "w", "utf_8") - fh.write("name:%s\n" % self.name) - fh.write("size:%s\n" % self.size) - for i, c in enumerate(self.chunks): - fh.write("#%d:\n" % i) - fh.write("\tname:%s\n" % c[0]) - fh.write("\trange:%i-%i\n" % c[1]) - fh.close() - - @staticmethod - def load(name): - fs_name = fs_encode("%s.chunks" % name) - if not exists(fs_name): - raise IOError() - fh = codecs.open(fs_name, "r", "utf_8") - name = fh.readline()[:-1] - size = fh.readline()[:-1] - if name.startswith("name:") and size.startswith("size:"): - name = name[5:] - size = size[5:] - else: - fh.close() - raise WrongFormat() - ci = ChunkInfo(name) - ci.loaded = True - ci.setSize(size) - while True: - if not fh.readline(): #skip line - break - name = fh.readline()[1:-1] - range = fh.readline()[1:-1] - if name.startswith("name:") and range.startswith("range:"): - name = name[5:] - range = range[6:].split("-") - else: - raise WrongFormat() - - ci.addChunk(name, (long(range[0]), long(range[1]))) - fh.close() - return ci - - def remove(self): - fs_name = fs_encode("%s.chunks" % self.name) - if exists(fs_name): remove(fs_name) - - def getCount(self): - return len(self.chunks) - - def getChunkName(self, index): - return self.chunks[index][0] - - def getChunkRange(self, index): - return self.chunks[index][1] - - -class HTTPChunk(HTTPRequest): - def __init__(self, id, parent, range=None, resume=False): - self.id = id - self.p = parent # HTTPDownload instance - self.range = range # tuple (start, end) - self.resume = resume - self.log = parent.log - - self.size = range[1] - range[0] if range else -1 - self.arrived = 0 - self.lastURL = self.p.referer - - self.c = pycurl.Curl() - - self.header = "" - self.headerParsed = False #indicates if the header has been processed - - self.fp = None #file handle - - self.initHandle() - self.setInterface(self.p.options) - - self.BOMChecked = False # check and remove byte order mark - - self.rep = None - - self.sleep = 0.000 - self.lastSize = 0 - - def __repr__(self): - return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived) - - @property - def cj(self): - return self.p.cj - - def getHandle(self): - """ returns a Curl handle ready to use for perform/multiperform """ - - self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) - self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - # request all bytes, since some servers in russia seems to have a defect arihmetic unit - - fs_name = fs_encode(self.p.info.getChunkName(self.id)) - if self.resume: - self.fp = open(fs_name, "ab") - self.arrived = self.fp.tell() - if not self.arrived: - self.arrived = stat(fs_name).st_size - - if self.range: - #do nothing if chunk already finished - if self.arrived + self.range[0] >= self.range[1]: return None - - if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything - range = "%i-" % (self.arrived + self.range[0]) - else: - range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked resume with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - else: - self.log.debug("Resume File from %i" % self.arrived) - self.c.setopt(pycurl.RESUME_FROM, self.arrived) - - else: - if self.range: - if self.id == len(self.p.info.chunks) - 1: # see above - range = "%i-" % self.range[0] - else: - range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1)) - - self.log.debug("Chunked with range %s" % range) - self.c.setopt(pycurl.RANGE, range) - - self.fp = open(fs_name, "wb") - - return self.c - - def writeHeader(self, buf): - self.header += buf - #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers - # as first chunk, we will parse the headers - if not self.range and self.header.endswith("\r\n\r\n"): - self.parseHeader() - elif not self.range and buf.startswith("150") and "data connection" in buf.lower(): #: ftp file size parsing - size = search(r"(\d+) bytes", buf) - if size: - self.p.size = int(size.group(1)) - self.p.chunkSupport = True - - self.headerParsed = True - - def writeBody(self, buf): - #ignore BOM, it confuses unrar - if not self.BOMChecked: - if [ord(b) for b in buf[:3]] == [239, 187, 191]: - buf = buf[3:] - self.BOMChecked = True - - size = len(buf) - - self.arrived += size - - self.fp.write(buf) - - if self.p.bucket: - sleep(self.p.bucket.consumed(size)) - else: - # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller - # otherwise reduce sleep time percentual (values are based on tests) - # So in general cpu time is saved without reducing bandwith too much - - if size < self.lastSize: - self.sleep += 0.002 - else: - self.sleep *= 0.7 - - self.lastSize = size - - sleep(self.sleep) - - if self.range and self.arrived > self.size: - return 0 #close if we have enough data - - - def parseHeader(self): - """parse data from recieved header""" - for orgline in self.decodeResponse(self.header).splitlines(): - line = orgline.strip().lower() - - if line.startswith("accept-ranges") and "bytes" in line: - self.p.chunkSupport = True - - if line.startswith("content-disposition") and ("filename=" in line or "filename*=" in line): - if "filename*=" in line: - # extended version according to RFC 6266 and RFC 5987. - encoding = line.partition("*=")[2].partition("''")[0] - name = orgline.partition("''")[2] - name = urllib.unquote(str(name)).decode(charEnc(encoding)) - else: - # basic version, US-ASCII only - name = orgline.partition("filename=")[2] - - name = name.replace('"', "").replace("'", "").replace(";", "").replace("/", "_").strip() - self.p.nameDisposition = name - - self.log.debug("Content-Disposition: %s" % name) - - if not self.resume and line.startswith("content-length"): - self.p.size = int(line.split(":")[1]) - - self.headerParsed = True - - def stop(self): - """The download will not proceed after next call of writeBody""" - self.range = [0, 0] - self.size = 0 - - def resetRange(self): - """ Reset the range, so the download will load all data available """ - self.range = None - - def setRange(self, range): - self.range = range - self.size = range[1] - range[0] - - def flushFile(self): - """ flush and close file """ - self.fp.flush() - fsync(self.fp.fileno()) #make sure everything was written to disk - self.fp.close() #needs to be closed, or merging chunks will fail - - def close(self): - """ closes everything, unusable after this """ - if self.fp: self.fp.close() - self.c.close() - if hasattr(self, "p"): del self.p - - -def charEnc(enc): - return {'utf-8' : "utf_8", - 'iso-8859-1': "latin_1"}.get(enc, "unknown") diff --git a/pyload/network/HTTPDownload.py b/pyload/network/HTTPDownload.py deleted file mode 100644 index 3b2bf26ca..000000000 --- a/pyload/network/HTTPDownload.py +++ /dev/null @@ -1,312 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -from os import remove, fsync -from os.path import dirname -from time import sleep, time -from shutil import move -from logging import getLogger - -import pycurl - -from pyload.network.HTTPChunk import ChunkInfo, HTTPChunk -from pyload.network.HTTPRequest import BadHeader - -from pyload.plugin.Plugin import Abort -from pyload.utils import safe_join, fs_encode - - -class HTTPDownload(object): - """ loads a url http + ftp """ - - def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, - options={}, progress=None, disposition=False): - self.url = url - self.filename = filename #complete file destination, not only name - self.get = get - self.post = post - self.referer = referer - self.cj = cj #cookiejar if cookies are needed - self.bucket = bucket - self.options = options - self.disposition = disposition - # all arguments - - self.abort = False - self.size = 0 - self.nameDisposition = None #will be parsed from content disposition - - self.chunks = [] - - self.log = getLogger("log") - - try: - self.info = ChunkInfo.load(filename) - self.info.resume = True #resume is only possible with valid info file - self.size = self.info.size - self.infoSaved = True - except IOError: - self.info = ChunkInfo(filename) - - self.chunkSupport = True - self.m = pycurl.CurlMulti() - - #needed for speed calculation - self.lastArrived = [] - self.speeds = [] - self.lastSpeeds = [0, 0] - - self.progress = progress - - @property - def speed(self): - last = [sum(x) for x in self.lastSpeeds if x] - return (sum(self.speeds) + sum(last)) / (1 + len(last)) - - @property - def arrived(self): - return sum([c.arrived for c in self.chunks]) - - @property - def percent(self): - if not self.size: return 0 - return (self.arrived * 100) / self.size - - def _copyChunks(self): - init = fs_encode(self.info.getChunkName(0)) #initial chunk name - - if self.info.getCount() > 1: - fo = open(init, "rb+") #first chunkfile - for i in range(1, self.info.getCount()): - #input file - fo.seek( - self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks - fname = fs_encode("%s.chunk%d" % (self.filename, i)) - fi = open(fname, "rb") - buf = 32 * 1024 - while True: #copy in chunks, consumes less memory - data = fi.read(buf) - if not data: - break - fo.write(data) - fi.close() - if fo.tell() < self.info.getChunkRange(i)[1]: - fo.close() - remove(init) - self.info.remove() #there are probably invalid chunks - raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.") - remove(fname) #remove chunk - fo.close() - - if self.nameDisposition and self.disposition: - self.filename = safe_join(dirname(self.filename), self.nameDisposition) - - move(init, fs_encode(self.filename)) - self.info.remove() #remove info file - - def download(self, chunks=1, resume=False): - """ returns new filename or None """ - - chunks = max(1, chunks) - resume = self.info.resume and resume - - try: - self._download(chunks, resume) - except pycurl.error, e: - #code 33 - no resume - code = e.args[0] - if resume is True and code == 33: - # try again without resume - self.log.debug("Errno 33 -> Restart without resume") - - #remove old handles - for chunk in self.chunks: - self.closeChunk(chunk) - - return self._download(chunks, False) - else: - raise - finally: - self.close() - - if self.nameDisposition and self.disposition: return self.nameDisposition - return None - - def _download(self, chunks, resume): - if not resume: - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry - self.info.save() - - self.chunks = [] - - init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) - - self.chunks.append(init) - self.m.add_handle(init.getHandle()) - - lastFinishCheck = 0 - lastTimeCheck = 0 - chunksDone = set() # list of curl handles that are finished - chunksCreated = False - done = False - if self.info.getCount() is 0: # This is a resume, if we were chunked originally assume still can - self.chunkSupport = False - - while 1: - #need to create chunks - if not chunksCreated and self.chunkSupport and self.size: #will be setted later by first chunk - - if not resume: - self.info.setSize(self.size) - self.info.createChunks(chunks) - self.info.save() - - chunks = self.info.getCount() - - init.setRange(self.info.getChunkRange(0)) - - for i in range(1, chunks): - c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) - - handle = c.getHandle() - if handle: - self.chunks.append(c) - self.m.add_handle(handle) - else: - #close immediatly - self.log.debug("Invalid curl handle -> closed") - c.close() - - chunksCreated = True - - while 1: - ret, num_handles = self.m.perform() - if ret != pycurl.E_CALL_MULTI_PERFORM: - break - - t = time() - - # reduce these calls - while lastFinishCheck + 0.5 < t: - # list of failed curl handles - failed = [] - ex = None # save only last exception, we can only raise one anyway - - num_q, ok_list, err_list = self.m.info_read() - for c in ok_list: - chunk = self.findChunk(c) - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(c) - - for c in err_list: - curl, errno, msg = c - chunk = self.findChunk(curl) - #test if chunk was finished - if errno != 23 or "0 !=" not in msg: - failed.append(chunk) - ex = pycurl.error(errno, msg) - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex))) - continue - - try: # check if the header implies success, else add it to failed list - chunk.verifyHeader() - except BadHeader, e: - self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e))) - failed.append(chunk) - ex = e - else: - chunksDone.add(curl) - if not num_q: # no more infos to get - - # check if init is not finished so we reset download connections - # note that other chunks are closed and downloaded with init too - if failed and init not in failed and init.c not in chunksDone: - self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex)))) - - #list of chunks to clean and remove - to_clean = filter(lambda x: x is not init, self.chunks) - for chunk in to_clean: - self.closeChunk(chunk) - self.chunks.remove(chunk) - remove(fs_encode(self.info.getChunkName(chunk.id))) - - #let first chunk load the rest and update the info file - init.resetRange() - self.info.clear() - self.info.addChunk("%s.chunk0" % self.filename, (0, self.size)) - self.info.save() - elif failed: - raise ex - - lastFinishCheck = t - - if len(chunksDone) >= len(self.chunks): - if len(chunksDone) > len(self.chunks): - self.log.warning("Finished download chunks size incorrect, please report bug.") - done = True #all chunks loaded - - break - - if done: - break #all chunks loaded - - # calc speed once per second, averaging over 3 seconds - if lastTimeCheck + 1 < t: - diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in - enumerate(self.chunks)] - - self.lastSpeeds[1] = self.lastSpeeds[0] - self.lastSpeeds[0] = self.speeds - self.speeds = [float(a) / (t - lastTimeCheck) for a in diff] - self.lastArrived = [c.arrived for c in self.chunks] - lastTimeCheck = t - self.updateProgress() - - if self.abort: - raise Abort - - #sleep(0.003) #supress busy waiting - limits dl speed to (1 / x) * buffersize - self.m.select(1) - - for chunk in self.chunks: - chunk.flushFile() #make sure downloads are written to disk - - self._copyChunks() - - def updateProgress(self): - if self.progress: - self.progress(self.percent) - - def findChunk(self, handle): - """ linear search to find a chunk (should be ok since chunk size is usually low) """ - for chunk in self.chunks: - if chunk.c == handle: return chunk - - def closeChunk(self, chunk): - try: - self.m.remove_handle(chunk.c) - except pycurl.error, e: - self.log.debug("Error removing chunk: %s" % str(e)) - finally: - chunk.close() - - def close(self): - """ cleanup """ - for chunk in self.chunks: - self.closeChunk(chunk) - - self.chunks = [] - if hasattr(self, "m"): - self.m.close() - del self.m - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "info"): - del self.info diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py deleted file mode 100644 index eac03a365..000000000 --- a/pyload/network/HTTPRequest.py +++ /dev/null @@ -1,302 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -from __future__ import with_statement - -import pycurl - -from codecs import getincrementaldecoder, lookup, BOM_UTF8 -from urllib import quote, urlencode -from httplib import responses -from logging import getLogger -from cStringIO import StringIO - -from pyload.plugin.Plugin import Abort, Fail - -from pyload.utils import encode - - -def myquote(url): - return quote(encode(url), safe="%/:=&?~#+!$,;'@()*[]") - -def myurlencode(data): - data = dict(data) - return urlencode(dict((encode(x), encode(y)) for x, y in data.iteritems())) - -bad_headers = range(400, 404) + range(405, 418) + range(500, 506) - -class BadHeader(Exception): - def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) - self.code = code - self.content = content - - -class HTTPRequest(object): - def __init__(self, cookies=None, options=None): - self.c = pycurl.Curl() - self.rep = StringIO() - - self.cj = cookies #cookiejar - - self.lastURL = None - self.lastEffectiveURL = None - self.abort = False - self.code = 0 # last http code - - self.header = "" - - self.headers = [] #temporary request header - - self.initHandle() - self.setInterface(options) - - self.c.setopt(pycurl.WRITEFUNCTION, self.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - self.log = getLogger("log") - - - def initHandle(self): - """ sets common options to curl handle """ - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.MAXREDIRS, 5) - self.c.setopt(pycurl.CONNECTTIMEOUT, 30) - self.c.setopt(pycurl.NOSIGNAL, 1) - self.c.setopt(pycurl.NOPROGRESS, 1) - if hasattr(pycurl, "AUTOREFERER"): - self.c.setopt(pycurl.AUTOREFERER, 1) - self.c.setopt(pycurl.SSL_VERIFYPEER, 0) - self.c.setopt(pycurl.LOW_SPEED_TIME, 60) - self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) - self.c.setopt(pycurl.USE_SSL, pycurl.CURLUSESSL_TRY) - - #self.c.setopt(pycurl.VERBOSE, 1) - - self.c.setopt(pycurl.USERAGENT, - "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") - if pycurl.version_info()[7]: - self.c.setopt(pycurl.ENCODING, "gzip, deflate") - self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US, en", - "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300", - "Expect:"]) - - def setInterface(self, options): - - interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - - if interface and interface.lower() != "none": - self.c.setopt(pycurl.INTERFACE, str(interface)) - - if proxy: - if proxy["type"] == "socks4": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) - elif proxy["type"] == "socks5": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) - else: - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - - self.c.setopt(pycurl.PROXY, str(proxy["address"])) - self.c.setopt(pycurl.PROXYPORT, proxy["port"]) - - if proxy["username"]: - self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) - - if ipv6: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) - else: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) - - if "auth" in options: - self.c.setopt(pycurl.USERPWD, str(options["auth"])) - - if "timeout" in options: - self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) - - - def addCookies(self): - """ put cookies from curl handle to cj """ - if self.cj: - self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) - - def getCookies(self): - """ add cookies from cj to curl handle """ - if self.cj: - for c in self.cj.getCookies(): - self.c.setopt(pycurl.COOKIELIST, c) - return - - def clearCookies(self): - self.c.setopt(pycurl.COOKIELIST, "") - - def setRequestContext(self, url, get, post, referer, cookies, multipart=False): - """ sets everything needed for the request """ - - url = myquote(url) - - if get: - get = urlencode(get) - url = "%s?%s" % (url, get) - - self.c.setopt(pycurl.URL, url) - self.c.lastUrl = url - - if post: - self.c.setopt(pycurl.POST, 1) - if not multipart: - if type(post) == unicode: - post = str(post) #unicode not allowed - elif type(post) == str: - pass - else: - post = myurlencode(post) - - self.c.setopt(pycurl.POSTFIELDS, post) - else: - post = [(x, encode(y)) for x, y in post.iteritems()] - self.c.setopt(pycurl.HTTPPOST, post) - else: - self.c.setopt(pycurl.POST, 0) - - if referer and self.lastURL: - self.c.setopt(pycurl.REFERER, str(self.lastURL)) - - if cookies: - self.c.setopt(pycurl.COOKIEFILE, "") - self.c.setopt(pycurl.COOKIEJAR, "") - self.getCookies() - - - def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False, follow_location=True, save_cookies=True): - """ load and returns a given page """ - - self.setRequestContext(url, get, post, referer, cookies, multipart) - - self.header = "" - - self.c.setopt(pycurl.HTTPHEADER, self.headers) - - if post: - self.c.setopt(pycurl.POST, 1) - else: - self.c.setopt(pycurl.HTTPGET, 1) - - if not follow_location: - self.c.setopt(pycurl.FOLLOWLOCATION, 0) - - if just_header: - self.c.setopt(pycurl.NOBODY, 1) - - self.c.perform() - rep = self.header if just_header else self.getResponse() - - if not follow_location: - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - - if just_header: - self.c.setopt(pycurl.NOBODY, 0) - - self.c.setopt(pycurl.POSTFIELDS, "") - self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) - self.code = self.verifyHeader() - - if save_cookies: - self.addCookies() - - if decode: - rep = self.decodeResponse(rep) - - return rep - - def verifyHeader(self): - """ raise an exceptions on bad headers """ - code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in bad_headers: - #404 will NOT raise an exception - raise BadHeader(code, self.getResponse()) - return code - - def checkHeader(self): - """ check if header indicates failure""" - return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers - - def getResponse(self): - """ retrieve response from string io """ - if self.rep is None: - return "" - else: - value = self.rep.getvalue() - self.rep.close() - self.rep = StringIO() - return value - - def decodeResponse(self, rep): - """ decode with correct encoding, relies on header """ - header = self.header.splitlines() - encoding = "utf8" # default encoding - - for line in header: - line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or\ - ("text" not in line and "application" not in line): - continue - - none, delemiter, charset = line.rpartition("charset=") - if delemiter: - charset = charset.split(";") - if charset: - encoding = charset[0] - - try: - #self.log.debug("Decoded %s" % encoding ) - if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): - encoding = 'utf-8-sig' - - decoder = getincrementaldecoder(encoding)("replace") - rep = decoder.decode(rep, True) - - #TODO: html_unescape as default - - except LookupError: - self.log.debug("No Decoder foung for %s" % encoding) - - except Exception: - self.log.debug("Error when decoding string from %s." % encoding) - - return rep - - def write(self, buf): - """ writes response """ - if self.rep.tell() > 1000000 or self.abort: - rep = self.getResponse() - - if self.abort: - raise Abort - - with open("response.dump", "wb") as f: - f.write(rep) - raise Fail("Loaded url exceeded size limit") - else: - self.rep.write(buf) - - def writeHeader(self, buf): - """ writes header """ - self.header += buf - - def putHeader(self, name, value): - self.headers.append("%s: %s" % (name, value)) - - def clearHeaders(self): - self.headers = [] - - def close(self): - """ cleanup, unusable after this """ - self.rep.close() - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "c"): - self.c.close() - del self.c diff --git a/pyload/network/JsEngine.py b/pyload/network/JsEngine.py deleted file mode 100644 index 2e98fa37d..000000000 --- a/pyload/network/JsEngine.py +++ /dev/null @@ -1,256 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: vuolter - -import subprocess -import sys - -from os import path -from urllib import quote - -from pyload.utils import encode, decode, uniqify - - -class JsEngine(object): - """ JS Engine superclass """ - - def __init__(self, core, engine=None): - self.core = core - self.engine = None #: Engine Instance - - if not engine: - engine = self.core.config.get("general", "jsengine") - - if engine != "auto" and self.set(engine) is False: - engine = "auto" - self.core.log.warning("JS Engine set to \"auto\" for safely") - - if engine == "auto": - for E in self.find(): - if self.set(E) is True: - break - else: - self.core.log.error("No JS Engine available") - - - @classmethod - def find(cls): - """ Check if there is any engine available """ - return [E for E in ENGINES if E.find()] - - - def get(self, engine=None): - """ Convert engine name (string) to relative JSE class (AbstractEngine extended) """ - if not engine: - JSE = self.engine - - elif isinstance(engine, basestring): - engine_name = engine.lower() - for E in ENGINES: - if E.__name == engine_name: #: doesn't check if E(NGINE) is available, just convert string to class - JSE = E - break - else: - raise ValueError("JSE") - - elif issubclass(engine, AbstractEngine): - JSE = engine - - else: - raise TypeError("engine") - - return JSE - - - def set(self, engine): - """ Set engine name (string) or JSE class (AbstractEngine extended) as default engine """ - if isinstance(engine, basestring): - return self.set(self.get(engine)) - - elif issubclass(engine, AbstractEngine) and engine.find(): - self.engine = engine - return True - - else: - return False - - - def eval(self, script, engine=None): #: engine can be a jse name """string""" or an AbstractEngine """class""" - JSE = self.get(engine) - - if not JSE: - raise TypeError("engine") - - script = encode(script) - - out, err = JSE.eval(script) - - results = [out] - - if self.core.config.get("general", "debug"): - if err: - self.core.log.debug(JSE.__name + ":", err) - - engines = self.find() - engines.remove(JSE) - for E in engines: - out, err = E.eval(script) - res = err or out - self.core.log.debug(E.__name + ":", res) - results.append(res) - - if len(results) > 1 and len(uniqify(results)) > 1: - self.core.log.warning("JS output of two or more engines mismatch") - - return results[0] - - -class AbstractEngine(object): - """ JSE base class """ - - __name = "" - - - def __init__(self): - self.setup() - self.available = self.find() - - - def setup(self): - pass - - - @classmethod - def find(cls): - """ Check if the engine is available """ - try: - __import__(cls.__name) - except Exception: - try: - out, err = cls().eval("print(23+19)") - except Exception: - res = False - else: - res = out == "42" - else: - res = True - - return res - - - def _eval(args): - if not self.available: - return None, "JS Engine \"%s\" not found" % self.__name - - try: - p = subprocess.Popen(args, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - bufsize=-1) - return map(lambda x: x.strip(), p.communicate()) - except Exception, e: - return None, e - - - def eval(script): - raise NotImplementedError - - -class Pyv8Engine(AbstractEngine): - - __name = "pyv8" - - - def eval(self, script): - if not self.available: - return None, "JS Engine \"%s\" not found" % self.__name - - try: - rt = PyV8.JSContext() - rt.enter() - res = rt.eval(script), None #@TODO: parse stderr - except Exception, e: - res = None, e - - return res - - -class CommonEngine(AbstractEngine): - - __name = "js" - - - def setup(self): - subprocess.Popen(["js", "-v"], bufsize=-1).communicate() - - - def eval(self, script): - script = "print(eval(unescape('%s')))" % quote(script) - args = ["js", "-e", script] - return self._eval(args) - - -class NodeEngine(AbstractEngine): - - __name = "nodejs" - - - def setup(self): - subprocess.Popen(["node", "-v"], bufsize=-1).communicate() - - - def eval(self, script): - script = "console.log(eval(unescape('%s')))" % quote(script) - args = ["node", "-e", script] - return self._eval(args) - - -class RhinoEngine(AbstractEngine): - - __name = "rhino" - - - def setup(self): - jspath = [ - "/usr/share/java*/js.jar", - "js.jar", - path.join(pypath, "js.jar") - ] - for p in jspath: - if path.exists(p): - self.path = p - break - else: - self.path = "" - - - def eval(self, script): - script = "print(eval(unescape('%s')))" % quote(script) - args = ["java", "-cp", self.path, "org.mozilla.javascript.tools.shell.Main", "-e", script] - res = decode(self._eval(args)) - try: - return res.encode("ISO-8859-1") - finally: - return res - - -class JscEngine(AbstractEngine): - - __name = "javascriptcore" - - - def setup(self): - jspath = "/System/Library/Frameworks/JavaScriptCore.framework/Resources/jsc" - self.path = jspath if path.exists(jspath) else "" - - - def eval(self, script): - script = "print(eval(unescape('%s')))" % quote(script) - args = [self.path, "-e", script] - return self._eval(args) - - -#@NOTE: Priority ordered -ENGINES = [CommonEngine, Pyv8Engine, NodeEngine, RhinoEngine] - -if sys.platform == "darwin": - ENGINES.insert(JscEngine) diff --git a/pyload/network/RequestFactory.py b/pyload/network/RequestFactory.py deleted file mode 100644 index 579eafea7..000000000 --- a/pyload/network/RequestFactory.py +++ /dev/null @@ -1,121 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN, mkaay - -from threading import Lock - -from pyload.network.Browser import Browser -from pyload.network.Bucket import Bucket -from pyload.network.HTTPRequest import HTTPRequest -from pyload.network.CookieJar import CookieJar - -from pyload.network.XDCCRequest import XDCCRequest - -class RequestFactory(object): - def __init__(self, core): - self.lock = Lock() - self.core = core - self.bucket = Bucket() - self.updateBucket() - self.cookiejars = {} - - def iface(self): - return self.core.config["download"]["interface"] - - def getRequest(self, pluginName, account=None, type="HTTP"): - self.lock.acquire() - - if type == "XDCC": - return XDCCRequest(proxies=self.getProxies()) - - req = Browser(self.bucket, self.getOptions()) - - if account: - cj = self.getCookieJar(pluginName, account) - req.setCookieJar(cj) - else: - req.setCookieJar(CookieJar(pluginName)) - - self.lock.release() - return req - - def getHTTPRequest(self, **kwargs): - """ returns a http request, dont forget to close it ! """ - options = self.getOptions() - options.update(kwargs) # submit kwargs as additional options - return HTTPRequest(CookieJar(None), options) - - def getURL(self, *args, **kwargs): - """ see HTTPRequest for argument list """ - cj = None - - if 'cookies' in kwargs: - if isinstance(kwargs['cookies'], CookieJar): - cj = kwargs['cookies'] - elif isinstance(kwargs['cookies'], list): - cj = CookieJar(None) - for cookie in kwargs['cookies']: - if isinstance(cookie, tuple) and len(cookie) == 3: - cj.setCookie(*cookie) - - h = HTTPRequest(cj, self.getOptions()) - try: - rep = h.load(*args, **kwargs) - finally: - h.close() - - return rep - - def getCookieJar(self, pluginName, account=None): - if (pluginName, account) in self.cookiejars: - return self.cookiejars[(pluginName, account)] - - cj = CookieJar(pluginName, account) - self.cookiejars[(pluginName, account)] = cj - return cj - - def getProxies(self): - """ returns a proxy list for the request classes """ - if not self.core.config["proxy"]["proxy"]: - return {} - else: - type = "http" - setting = self.core.config["proxy"]["type"].lower() - if setting == "socks4": type = "socks4" - elif setting == "socks5": type = "socks5" - - username = None - if self.core.config["proxy"]["username"] and self.core.config["proxy"]["username"].lower() != "none": - username = self.core.config["proxy"]["username"] - - pw = None - if self.core.config["proxy"]["password"] and self.core.config["proxy"]["password"].lower() != "none": - pw = self.core.config["proxy"]["password"] - - return { - "type": type, - "address": self.core.config["proxy"]["address"], - "port": self.core.config["proxy"]["port"], - "username": username, - "password": pw, - } - - def getOptions(self): - """returns options needed for pycurl""" - return {"interface": self.iface(), - "proxies": self.getProxies(), - "ipv6": self.core.config["download"]["ipv6"]} - - def updateBucket(self): - """ set values in the bucket according to settings""" - if not self.core.config["download"]["limit_speed"]: - self.bucket.setRate(-1) - else: - self.bucket.setRate(self.core.config["download"]["max_speed"] * 1024) - -# needs pyreq in global namespace -def getURL(*args, **kwargs): - return pyreq.getURL(*args, **kwargs) - - -def getRequest(*args, **kwargs): - return pyreq.getHTTPRequest() diff --git a/pyload/network/XDCCRequest.py b/pyload/network/XDCCRequest.py deleted file mode 100644 index c49f418c4..000000000 --- a/pyload/network/XDCCRequest.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: jeix - -import socket -import re - -from os import remove -from os.path import exists - -from time import time - -import struct -from select import select - -from pyload.plugin.Plugin import Abort - - -class XDCCRequest(object): - def __init__(self, timeout=30, proxies={}): - - self.proxies = proxies - self.timeout = timeout - - self.filesize = 0 - self.recv = 0 - self.speed = 0 - - self.abort = False - - def createSocket(self): - # proxytype = None - # proxy = None - # if self.proxies.has_key("socks5"): - # proxytype = socks.PROXY_TYPE_SOCKS5 - # proxy = self.proxies["socks5"] - # elif self.proxies.has_key("socks4"): - # proxytype = socks.PROXY_TYPE_SOCKS4 - # proxy = self.proxies["socks4"] - # if proxytype: - # sock = socks.socksocket() - # t = _parse_proxy(proxy) - # sock.setproxy(proxytype, addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2]) - # else: - # sock = socket.socket() - # return sock - - return socket.socket() - - def download(self, ip, port, filename, irc, progress=None): - - ircbuffer = "" - lastUpdate = time() - cumRecvLen = 0 - - dccsock = self.createSocket() - - dccsock.settimeout(self.timeout) - dccsock.connect((ip, port)) - - if exists(filename): - i = 0 - nameParts = filename.rpartition(".") - while True: - newfilename = "%s-%d%s%s" % (nameParts[0], i, nameParts[1], nameParts[2]) - i += 1 - - if not exists(newfilename): - filename = newfilename - break - - fh = open(filename, "wb") - - # recv loop for dcc socket - while True: - if self.abort: - dccsock.close() - fh.close() - remove(filename) - raise Abort - - self._keepAlive(irc, ircbuffer) - - data = dccsock.recv(4096) - dataLen = len(data) - self.recv += dataLen - - cumRecvLen += dataLen - - now = time() - timespan = now - lastUpdate - if timespan > 1: - self.speed = cumRecvLen / timespan - cumRecvLen = 0 - lastUpdate = now - - if progress: - progress(self.percent) - - if not data: - break - - fh.write(data) - - # acknowledge data by sending number of recceived bytes - dccsock.send(struct.pack('!I', self.recv)) - - dccsock.close() - fh.close() - - return filename - - def _keepAlive(self, sock, *readbuffer): - fdset = select([sock], [], [], 0) - if sock not in fdset[0]: - return - - readbuffer += sock.recv(1024) - temp = readbuffer.split("\n") - readbuffer = temp.pop() - - for line in temp: - line = line.rstrip() - first = line.split() - if first[0] == "PING": - sock.send("PONG %s\r\n" % first[1]) - - def abortDownloads(self): - self.abort = True - - @property - def size(self): - return self.filesize - - @property - def arrived(self): - return self.recv - - @property - def percent(self): - if not self.filesize: return 0 - return (self.recv * 100) / self.filesize - - def close(self): - pass diff --git a/pyload/network/__init__.py b/pyload/network/__init__.py deleted file mode 100644 index 40a96afc6..000000000 --- a/pyload/network/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# -*- coding: utf-8 -*- |