summaryrefslogtreecommitdiffstats
path: root/pyload/network
diff options
context:
space:
mode:
Diffstat (limited to 'pyload/network')
-rw-r--r--pyload/network/Browser.py132
-rw-r--r--pyload/network/Bucket.py59
-rw-r--r--pyload/network/CookieJar.py50
-rw-r--r--pyload/network/HTTPChunk.py292
-rw-r--r--pyload/network/HTTPDownload.py325
-rw-r--r--pyload/network/HTTPRequest.py303
-rw-r--r--pyload/network/RequestFactory.py126
-rw-r--r--pyload/network/XDCCRequest.py159
-rw-r--r--pyload/network/__init__.py1
9 files changed, 1447 insertions, 0 deletions
diff --git a/pyload/network/Browser.py b/pyload/network/Browser.py
new file mode 100644
index 000000000..e78d24688
--- /dev/null
+++ b/pyload/network/Browser.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+
+from logging import getLogger
+
+from HTTPRequest import HTTPRequest
+from HTTPDownload import HTTPDownload
+
+
+class Browser(object):
+ __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl")
+
+ def __init__(self, bucket=None, options={}):
+ self.log = getLogger("log")
+
+ self.options = options #holds pycurl options
+ self.bucket = bucket
+
+ self.cj = None # needs to be setted later
+ self._size = 0
+
+ self.renewHTTPRequest()
+ self.dl = None
+
+
+ def renewHTTPRequest(self):
+ if hasattr(self, "http"): self.http.close()
+ self.http = HTTPRequest(self.cj, self.options)
+
+ def setLastURL(self, val):
+ self.http.lastURL = val
+
+ # tunnel some attributes from HTTP Request to Browser
+ lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL)
+ lastURL = property(lambda self: self.http.lastURL, setLastURL)
+ code = property(lambda self: self.http.code)
+ cookieJar = property(lambda self: self.cj)
+
+ def setCookieJar(self, cj):
+ self.cj = cj
+ self.http.cj = cj
+
+ @property
+ def speed(self):
+ if self.dl:
+ return self.dl.speed
+ return 0
+
+ @property
+ def size(self):
+ if self._size:
+ return self._size
+ if self.dl:
+ return self.dl.size
+ return 0
+
+ @property
+ def arrived(self):
+ if self.dl:
+ return self.dl.arrived
+ return 0
+
+ @property
+ def percent(self):
+ if not self.size: return 0
+ return (self.arrived * 100) / self.size
+
+ def clearCookies(self):
+ if self.cj:
+ self.cj.clear()
+ self.http.clearCookies()
+
+ def clearReferer(self):
+ self.http.lastURL = None
+
+ def abortDownloads(self):
+ self.http.abort = True
+ if self.dl:
+ self._size = self.dl.size
+ self.dl.abort = True
+
+ def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False,
+ progressNotify=None, disposition=False):
+ """ this can also download ftp """
+ self._size = 0
+ self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None,
+ self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition)
+ name = self.dl.download(chunks, resume)
+ self._size = self.dl.size
+
+ self.dl = None
+
+ return name
+
+ def load(self, *args, **kwargs):
+ """ retrieves page """
+ return self.http.load(*args, **kwargs)
+
+ def putHeader(self, name, value):
+ """ add a header to the request """
+ self.http.putHeader(name, value)
+
+ def addAuth(self, pwd):
+ """Adds user and pw for http auth
+
+ :param pwd: string, user:password
+ """
+ self.options["auth"] = pwd
+ self.renewHTTPRequest() #we need a new request
+
+ def removeAuth(self):
+ if "auth" in self.options: del self.options["auth"]
+ self.renewHTTPRequest()
+
+ def setOption(self, name, value):
+ """Adds an option to the request, see HTTPRequest for existing ones"""
+ self.options[name] = value
+
+ def deleteOption(self, name):
+ if name in self.options: del self.options[name]
+
+ def clearHeaders(self):
+ self.http.clearHeaders()
+
+ def close(self):
+ """ cleanup """
+ if hasattr(self, "http"):
+ self.http.close()
+ del self.http
+ if hasattr(self, "dl"):
+ del self.dl
+ if hasattr(self, "cj"):
+ del self.cj
diff --git a/pyload/network/Bucket.py b/pyload/network/Bucket.py
new file mode 100644
index 000000000..a096d644a
--- /dev/null
+++ b/pyload/network/Bucket.py
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: RaNaN
+"""
+
+from time import time
+from threading import Lock
+
+class Bucket:
+ def __init__(self):
+ self.rate = 0
+ self.tokens = 0
+ self.timestamp = time()
+ self.lock = Lock()
+
+ def __nonzero__(self):
+ return False if self.rate < 10240 else True
+
+ def setRate(self, rate):
+ self.lock.acquire()
+ self.rate = int(rate)
+ self.lock.release()
+
+ def consumed(self, amount):
+ """ return time the process have to sleep, after consumed specified amount """
+ if self.rate < 10240: return 0 #min. 10kb, may become unresponsive otherwise
+ self.lock.acquire()
+
+ self.calc_tokens()
+ self.tokens -= amount
+
+ if self.tokens < 0:
+ time = -self.tokens/float(self.rate)
+ else:
+ time = 0
+
+
+ self.lock.release()
+ return time
+
+ def calc_tokens(self):
+ if self.tokens < self.rate:
+ now = time()
+ delta = self.rate * (now - self.timestamp)
+ self.tokens = min(self.rate, self.tokens + delta)
+ self.timestamp = now
diff --git a/pyload/network/CookieJar.py b/pyload/network/CookieJar.py
new file mode 100644
index 000000000..a6ae090bc
--- /dev/null
+++ b/pyload/network/CookieJar.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: mkaay, RaNaN
+"""
+
+from time import time
+
+class CookieJar:
+ def __init__(self, pluginname, account=None):
+ self.cookies = {}
+ self.plugin = pluginname
+ self.account = account
+
+ def addCookies(self, clist):
+ for c in clist:
+ name = c.split("\t")[5]
+ self.cookies[name] = c
+
+ def getCookies(self):
+ return self.cookies.values()
+
+ def parseCookie(self, name):
+ if name in self.cookies:
+ return self.cookies[name].split("\t")[6]
+ else:
+ return None
+
+ def getCookie(self, name):
+ return self.parseCookie(name)
+
+ def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180):
+ s = ".%s TRUE %s FALSE %s %s %s" % (domain, path, exp, name, value)
+ self.cookies[name] = s
+
+ def clear(self):
+ self.cookies = {}
diff --git a/pyload/network/HTTPChunk.py b/pyload/network/HTTPChunk.py
new file mode 100644
index 000000000..b9d2a5379
--- /dev/null
+++ b/pyload/network/HTTPChunk.py
@@ -0,0 +1,292 @@
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: RaNaN
+"""
+from os import remove, stat, fsync
+from os.path import exists
+from time import sleep
+from re import search
+from pyload.utils import fs_encode
+import codecs
+import pycurl
+
+from HTTPRequest import HTTPRequest
+
+class WrongFormat(Exception):
+ pass
+
+
+class ChunkInfo:
+ def __init__(self, name):
+ self.name = unicode(name)
+ self.size = 0
+ self.resume = False
+ self.chunks = []
+
+ def __repr__(self):
+ ret = "ChunkInfo: %s, %s\n" % (self.name, self.size)
+ for i, c in enumerate(self.chunks):
+ ret += "%s# %s\n" % (i, c[1])
+
+ return ret
+
+ def setSize(self, size):
+ self.size = int(size)
+
+ def addChunk(self, name, range):
+ self.chunks.append((name, range))
+
+ def clear(self):
+ self.chunks = []
+
+ def createChunks(self, chunks):
+ self.clear()
+ chunk_size = self.size / chunks
+
+ current = 0
+ for i in range(chunks):
+ end = self.size - 1 if (i == chunks - 1) else current + chunk_size
+ self.addChunk("%s.chunk%s" % (self.name, i), (current, end))
+ current += chunk_size + 1
+
+
+ def save(self):
+ fs_name = fs_encode("%s.chunks" % self.name)
+ fh = codecs.open(fs_name, "w", "utf_8")
+ fh.write("name:%s\n" % self.name)
+ fh.write("size:%s\n" % self.size)
+ for i, c in enumerate(self.chunks):
+ fh.write("#%d:\n" % i)
+ fh.write("\tname:%s\n" % c[0])
+ fh.write("\trange:%i-%i\n" % c[1])
+ fh.close()
+
+ @staticmethod
+ def load(name):
+ fs_name = fs_encode("%s.chunks" % name)
+ if not exists(fs_name):
+ raise IOError()
+ fh = codecs.open(fs_name, "r", "utf_8")
+ name = fh.readline()[:-1]
+ size = fh.readline()[:-1]
+ if name.startswith("name:") and size.startswith("size:"):
+ name = name[5:]
+ size = size[5:]
+ else:
+ fh.close()
+ raise WrongFormat()
+ ci = ChunkInfo(name)
+ ci.loaded = True
+ ci.setSize(size)
+ while True:
+ if not fh.readline(): #skip line
+ break
+ name = fh.readline()[1:-1]
+ range = fh.readline()[1:-1]
+ if name.startswith("name:") and range.startswith("range:"):
+ name = name[5:]
+ range = range[6:].split("-")
+ else:
+ raise WrongFormat()
+
+ ci.addChunk(name, (long(range[0]), long(range[1])))
+ fh.close()
+ return ci
+
+ def remove(self):
+ fs_name = fs_encode("%s.chunks" % self.name)
+ if exists(fs_name): remove(fs_name)
+
+ def getCount(self):
+ return len(self.chunks)
+
+ def getChunkName(self, index):
+ return self.chunks[index][0]
+
+ def getChunkRange(self, index):
+ return self.chunks[index][1]
+
+
+class HTTPChunk(HTTPRequest):
+ def __init__(self, id, parent, range=None, resume=False):
+ self.id = id
+ self.p = parent # HTTPDownload instance
+ self.range = range # tuple (start, end)
+ self.resume = resume
+ self.log = parent.log
+
+ self.size = range[1] - range[0] if range else -1
+ self.arrived = 0
+ self.lastURL = self.p.referer
+
+ self.c = pycurl.Curl()
+
+ self.header = ""
+ self.headerParsed = False #indicates if the header has been processed
+
+ self.fp = None #file handle
+
+ self.initHandle()
+ self.setInterface(self.p.options)
+
+ self.BOMChecked = False # check and remove byte order mark
+
+ self.rep = None
+
+ self.sleep = 0.000
+ self.lastSize = 0
+
+ def __repr__(self):
+ return "<HTTPChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived)
+
+ @property
+ def cj(self):
+ return self.p.cj
+
+ def getHandle(self):
+ """ returns a Curl handle ready to use for perform/multiperform """
+
+ self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj)
+ self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ # request all bytes, since some servers in russia seems to have a defect arihmetic unit
+
+ fs_name = fs_encode(self.p.info.getChunkName(self.id))
+ if self.resume:
+ self.fp = open(fs_name, "ab")
+ self.arrived = self.fp.tell()
+ if not self.arrived:
+ self.arrived = stat(fs_name).st_size
+
+ if self.range:
+ #do nothing if chunk already finished
+ if self.arrived + self.range[0] >= self.range[1]: return None
+
+ if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything
+ range = "%i-" % (self.arrived + self.range[0])
+ else:
+ range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1))
+
+ self.log.debug("Chunked resume with range %s" % range)
+ self.c.setopt(pycurl.RANGE, range)
+ else:
+ self.log.debug("Resume File from %i" % self.arrived)
+ self.c.setopt(pycurl.RESUME_FROM, self.arrived)
+
+ else:
+ if self.range:
+ if self.id == len(self.p.info.chunks) - 1: # see above
+ range = "%i-" % self.range[0]
+ else:
+ range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1))
+
+ self.log.debug("Chunked with range %s" % range)
+ self.c.setopt(pycurl.RANGE, range)
+
+ self.fp = open(fs_name, "wb")
+
+ return self.c
+
+ def writeHeader(self, buf):
+ self.header += buf
+ #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers
+ # as first chunk, we will parse the headers
+ if not self.range and self.header.endswith("\r\n\r\n"):
+ self.parseHeader()
+ elif not self.range and buf.startswith("150") and "data connection" in buf.lower(): #: ftp file size parsing
+ size = search(r"(\d+) bytes", buf)
+ if size:
+ self.p.size = int(size.group(1))
+ self.p.chunkSupport = True
+
+ self.headerParsed = True
+
+ def writeBody(self, buf):
+ #ignore BOM, it confuses unrar
+ if not self.BOMChecked:
+ if [ord(b) for b in buf[:3]] == [239, 187, 191]:
+ buf = buf[3:]
+ self.BOMChecked = True
+
+ size = len(buf)
+
+ self.arrived += size
+
+ self.fp.write(buf)
+
+ if self.p.bucket:
+ sleep(self.p.bucket.consumed(size))
+ else:
+ # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller
+ # otherwise reduce sleep time percentual (values are based on tests)
+ # So in general cpu time is saved without reducing bandwith too much
+
+ if size < self.lastSize:
+ self.sleep += 0.002
+ else:
+ self.sleep *= 0.7
+
+ self.lastSize = size
+
+ sleep(self.sleep)
+
+ if self.range and self.arrived > self.size:
+ return 0 #close if we have enough data
+
+
+ def parseHeader(self):
+ """parse data from recieved header"""
+ for orgline in self.decodeResponse(self.header).splitlines():
+ line = orgline.strip().lower()
+ if line.startswith("accept-ranges") and "bytes" in line:
+ self.p.chunkSupport = True
+
+ if line.startswith("content-disposition") and "filename=" in line:
+ name = orgline.partition("filename=")[2]
+ name = name.replace('"', "").replace("'", "").replace(";", "").strip()
+ self.p.nameDisposition = name
+ self.log.debug("Content-Disposition: %s" % name)
+
+ if not self.resume and line.startswith("content-length"):
+ self.p.size = int(line.split(":")[1])
+
+ self.headerParsed = True
+
+ def stop(self):
+ """The download will not proceed after next call of writeBody"""
+ self.range = [0, 0]
+ self.size = 0
+
+ def resetRange(self):
+ """ Reset the range, so the download will load all data available """
+ self.range = None
+
+ def setRange(self, range):
+ self.range = range
+ self.size = range[1] - range[0]
+
+ def flushFile(self):
+ """ flush and close file """
+ self.fp.flush()
+ fsync(self.fp.fileno()) #make sure everything was written to disk
+ self.fp.close() #needs to be closed, or merging chunks will fail
+
+ def close(self):
+ """ closes everything, unusable after this """
+ if self.fp: self.fp.close()
+ self.c.close()
+ if hasattr(self, "p"): del self.p
diff --git a/pyload/network/HTTPDownload.py b/pyload/network/HTTPDownload.py
new file mode 100644
index 000000000..50c6b4bdf
--- /dev/null
+++ b/pyload/network/HTTPDownload.py
@@ -0,0 +1,325 @@
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: RaNaN
+"""
+
+from os import remove, fsync
+from os.path import dirname
+from time import sleep, time
+from shutil import move
+from logging import getLogger
+
+import pycurl
+
+from HTTPChunk import ChunkInfo, HTTPChunk
+from HTTPRequest import BadHeader
+
+from pyload.plugins.Plugin import Abort
+from pyload.utils import safe_join, fs_encode
+
+class HTTPDownload:
+ """ loads a url http + ftp """
+
+ def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None,
+ options={}, progressNotify=None, disposition=False):
+ self.url = url
+ self.filename = filename #complete file destination, not only name
+ self.get = get
+ self.post = post
+ self.referer = referer
+ self.cj = cj #cookiejar if cookies are needed
+ self.bucket = bucket
+ self.options = options
+ self.disposition = disposition
+ # all arguments
+
+ self.abort = False
+ self.size = 0
+ self.nameDisposition = None #will be parsed from content disposition
+
+ self.chunks = []
+
+ self.log = getLogger("log")
+
+ try:
+ self.info = ChunkInfo.load(filename)
+ self.info.resume = True #resume is only possible with valid info file
+ self.size = self.info.size
+ self.infoSaved = True
+ except IOError:
+ self.info = ChunkInfo(filename)
+
+ self.chunkSupport = None
+ self.m = pycurl.CurlMulti()
+
+ #needed for speed calculation
+ self.lastArrived = []
+ self.speeds = []
+ self.lastSpeeds = [0, 0]
+
+ self.progressNotify = progressNotify
+
+ @property
+ def speed(self):
+ last = [sum(x) for x in self.lastSpeeds if x]
+ return (sum(self.speeds) + sum(last)) / (1 + len(last))
+
+ @property
+ def arrived(self):
+ return sum([c.arrived for c in self.chunks])
+
+ @property
+ def percent(self):
+ if not self.size: return 0
+ return (self.arrived * 100) / self.size
+
+ def _copyChunks(self):
+ init = fs_encode(self.info.getChunkName(0)) #initial chunk name
+
+ if self.info.getCount() > 1:
+ fo = open(init, "rb+") #first chunkfile
+ for i in range(1, self.info.getCount()):
+ #input file
+ fo.seek(
+ self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks
+ fname = fs_encode("%s.chunk%d" % (self.filename, i))
+ fi = open(fname, "rb")
+ buf = 32 * 1024
+ while True: #copy in chunks, consumes less memory
+ data = fi.read(buf)
+ if not data:
+ break
+ fo.write(data)
+ fi.close()
+ if fo.tell() < self.info.getChunkRange(i)[1]:
+ fo.close()
+ remove(init)
+ self.info.remove() #there are probably invalid chunks
+ raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.")
+ remove(fname) #remove chunk
+ fo.close()
+
+ if self.nameDisposition and self.disposition:
+ self.filename = safe_join(dirname(self.filename), self.nameDisposition)
+
+ move(init, fs_encode(self.filename))
+ self.info.remove() #remove info file
+
+ def download(self, chunks=1, resume=False):
+ """ returns new filename or None """
+
+ chunks = max(1, chunks)
+ resume = self.info.resume and resume
+
+ try:
+ self._download(chunks, resume)
+ except pycurl.error, e:
+ #code 33 - no resume
+ code = e.args[0]
+ if code == 33:
+ # try again without resume
+ self.log.debug("Errno 33 -> Restart without resume")
+
+ #remove old handles
+ for chunk in self.chunks:
+ self.closeChunk(chunk)
+
+ return self._download(chunks, False)
+ else:
+ raise
+ finally:
+ self.close()
+
+ if self.nameDisposition and self.disposition: return self.nameDisposition
+ return None
+
+ def _download(self, chunks, resume):
+ if not resume:
+ self.info.clear()
+ self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) #create an initial entry
+
+ self.chunks = []
+
+ init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed)
+
+ self.chunks.append(init)
+ self.m.add_handle(init.getHandle())
+
+ lastFinishCheck = 0
+ lastTimeCheck = 0
+ chunksDone = set() # list of curl handles that are finished
+ chunksCreated = False
+ done = False
+ if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can
+ self.chunkSupport = True
+
+ while 1:
+ #need to create chunks
+ if not chunksCreated and self.chunkSupport and self.size: #will be setted later by first chunk
+
+ if not resume:
+ self.info.setSize(self.size)
+ self.info.createChunks(chunks)
+ self.info.save()
+
+ chunks = self.info.getCount()
+
+ init.setRange(self.info.getChunkRange(0))
+
+ for i in range(1, chunks):
+ c = HTTPChunk(i, self, self.info.getChunkRange(i), resume)
+
+ handle = c.getHandle()
+ if handle:
+ self.chunks.append(c)
+ self.m.add_handle(handle)
+ else:
+ #close immediatly
+ self.log.debug("Invalid curl handle -> closed")
+ c.close()
+
+ chunksCreated = True
+
+ while 1:
+ ret, num_handles = self.m.perform()
+ if ret != pycurl.E_CALL_MULTI_PERFORM:
+ break
+
+ t = time()
+
+ # reduce these calls
+ while lastFinishCheck + 0.5 < t:
+ # list of failed curl handles
+ failed = []
+ ex = None # save only last exception, we can only raise one anyway
+
+ num_q, ok_list, err_list = self.m.info_read()
+ for c in ok_list:
+ chunk = self.findChunk(c)
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except BadHeader, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(c)
+
+ for c in err_list:
+ curl, errno, msg = c
+ chunk = self.findChunk(curl)
+ #test if chunk was finished
+ if errno != 23 or "0 !=" not in msg:
+ failed.append(chunk)
+ ex = pycurl.error(errno, msg)
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
+ continue
+
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except BadHeader, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(curl)
+ if not num_q: # no more infos to get
+
+ # check if init is not finished so we reset download connections
+ # note that other chunks are closed and downloaded with init too
+ if failed and init not in failed and init.c not in chunksDone:
+ self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))
+
+ #list of chunks to clean and remove
+ to_clean = filter(lambda x: x is not init, self.chunks)
+ for chunk in to_clean:
+ self.closeChunk(chunk)
+ self.chunks.remove(chunk)
+ remove(fs_encode(self.info.getChunkName(chunk.id)))
+
+ #let first chunk load the rest and update the info file
+ init.resetRange()
+ self.info.clear()
+ self.info.addChunk("%s.chunk0" % self.filename, (0, self.size))
+ self.info.save()
+ elif failed:
+ raise ex
+
+ lastFinishCheck = t
+
+ if len(chunksDone) >= len(self.chunks):
+ if len(chunksDone) > len(self.chunks):
+ self.log.warning("Finished download chunks size incorrect, please report bug.")
+ done = True #all chunks loaded
+
+ break
+
+ if done:
+ break #all chunks loaded
+
+ # calc speed once per second, averaging over 3 seconds
+ if lastTimeCheck + 1 < t:
+ diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in
+ enumerate(self.chunks)]
+
+ self.lastSpeeds[1] = self.lastSpeeds[0]
+ self.lastSpeeds[0] = self.speeds
+ self.speeds = [float(a) / (t - lastTimeCheck) for a in diff]
+ self.lastArrived = [c.arrived for c in self.chunks]
+ lastTimeCheck = t
+ self.updateProgress()
+
+ if self.abort:
+ raise Abort()
+
+ #sleep(0.003) #supress busy waiting - limits dl speed to (1 / x) * buffersize
+ self.m.select(1)
+
+ for chunk in self.chunks:
+ chunk.flushFile() #make sure downloads are written to disk
+
+ self._copyChunks()
+
+ def updateProgress(self):
+ if self.progressNotify:
+ self.progressNotify(self.percent)
+
+ def findChunk(self, handle):
+ """ linear search to find a chunk (should be ok since chunk size is usually low) """
+ for chunk in self.chunks:
+ if chunk.c == handle: return chunk
+
+ def closeChunk(self, chunk):
+ try:
+ self.m.remove_handle(chunk.c)
+ except pycurl.error, e:
+ self.log.debug("Error removing chunk: %s" % str(e))
+ finally:
+ chunk.close()
+
+ def close(self):
+ """ cleanup """
+ for chunk in self.chunks:
+ self.closeChunk(chunk)
+
+ self.chunks = []
+ if hasattr(self, "m"):
+ self.m.close()
+ del self.m
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "info"):
+ del self.info
diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py
new file mode 100644
index 000000000..66e355b77
--- /dev/null
+++ b/pyload/network/HTTPRequest.py
@@ -0,0 +1,303 @@
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: RaNaN
+"""
+
+import pycurl
+
+from codecs import getincrementaldecoder, lookup, BOM_UTF8
+from urllib import quote, urlencode
+from httplib import responses
+from logging import getLogger
+from cStringIO import StringIO
+
+from pyload.plugins.Plugin import Abort
+
+def myquote(url):
+ return quote(url.encode('utf_8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]")
+
+def myurlencode(data):
+ data = dict(data)
+ return urlencode(dict((x.encode('utf_8') if isinstance(x, unicode) else x, \
+ y.encode('utf_8') if isinstance(y, unicode) else y ) for x, y in data.iteritems()))
+
+bad_headers = range(400, 404) + range(405, 418) + range(500, 506)
+
+class BadHeader(Exception):
+ def __init__(self, code, content=""):
+ Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)]))
+ self.code = code
+ self.content = content
+
+
+class HTTPRequest:
+ def __init__(self, cookies=None, options=None):
+ self.c = pycurl.Curl()
+ self.rep = StringIO()
+
+ self.cj = cookies #cookiejar
+
+ self.lastURL = None
+ self.lastEffectiveURL = None
+ self.abort = False
+ self.code = 0 # last http code
+
+ self.header = ""
+
+ self.headers = [] #temporary request header
+
+ self.initHandle()
+ self.setInterface(options)
+
+ self.c.setopt(pycurl.WRITEFUNCTION, self.write)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ self.log = getLogger("log")
+
+
+ def initHandle(self):
+ """ sets common options to curl handle """
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.MAXREDIRS, 5)
+ self.c.setopt(pycurl.CONNECTTIMEOUT, 30)
+ self.c.setopt(pycurl.NOSIGNAL, 1)
+ self.c.setopt(pycurl.NOPROGRESS, 1)
+ if hasattr(pycurl, "AUTOREFERER"):
+ self.c.setopt(pycurl.AUTOREFERER, 1)
+ self.c.setopt(pycurl.SSL_VERIFYPEER, 0)
+ self.c.setopt(pycurl.LOW_SPEED_TIME, 30)
+ self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5)
+
+ #self.c.setopt(pycurl.VERBOSE, 1)
+
+ self.c.setopt(pycurl.USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
+ if pycurl.version_info()[7]:
+ self.c.setopt(pycurl.ENCODING, "gzip, deflate")
+ self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
+ "Accept-Language: en-US, en",
+ "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7",
+ "Connection: keep-alive",
+ "Keep-Alive: 300",
+ "Expect:"])
+
+ def setInterface(self, options):
+
+ interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"]
+
+ if interface and interface.lower() != "none":
+ self.c.setopt(pycurl.INTERFACE, str(interface))
+
+ if proxy:
+ if proxy["type"] == "socks4":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4)
+ elif proxy["type"] == "socks5":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
+ else:
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
+
+ self.c.setopt(pycurl.PROXY, str(proxy["address"]))
+ self.c.setopt(pycurl.PROXYPORT, proxy["port"])
+
+ if proxy["username"]:
+ self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"])))
+
+ if ipv6:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER)
+ else:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+
+ if "auth" in options:
+ self.c.setopt(pycurl.USERPWD, str(options["auth"]))
+
+ if "timeout" in options:
+ self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"])
+
+
+ def addCookies(self):
+ """ put cookies from curl handle to cj """
+ if self.cj:
+ self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST))
+
+ def getCookies(self):
+ """ add cookies from cj to curl handle """
+ if self.cj:
+ for c in self.cj.getCookies():
+ self.c.setopt(pycurl.COOKIELIST, c)
+ return
+
+ def clearCookies(self):
+ self.c.setopt(pycurl.COOKIELIST, "")
+
+ def setRequestContext(self, url, get, post, referer, cookies, multipart=False):
+ """ sets everything needed for the request """
+
+ url = myquote(url)
+
+ if get:
+ get = urlencode(get)
+ url = "%s?%s" % (url, get)
+
+ self.c.setopt(pycurl.URL, url)
+ self.c.lastUrl = url
+
+ if post:
+ self.c.setopt(pycurl.POST, 1)
+ if not multipart:
+ if type(post) == unicode:
+ post = str(post) #unicode not allowed
+ elif type(post) == str:
+ pass
+ else:
+ post = myurlencode(post)
+
+ self.c.setopt(pycurl.POSTFIELDS, post)
+ else:
+ post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()]
+ self.c.setopt(pycurl.HTTPPOST, post)
+ else:
+ self.c.setopt(pycurl.POST, 0)
+
+ if referer and self.lastURL:
+ self.c.setopt(pycurl.REFERER, str(self.lastURL))
+
+ if cookies:
+ self.c.setopt(pycurl.COOKIEFILE, "")
+ self.c.setopt(pycurl.COOKIEJAR, "")
+ self.getCookies()
+
+
+ def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False):
+ """ load and returns a given page """
+
+ self.setRequestContext(url, get, post, referer, cookies, multipart)
+
+ self.header = ""
+
+ self.c.setopt(pycurl.HTTPHEADER, self.headers)
+
+ if just_header:
+ self.c.setopt(pycurl.FOLLOWLOCATION, 0)
+ self.c.setopt(pycurl.NOBODY, 1)
+ if post:
+ self.c.setopt(pycurl.POST, 1)
+ else:
+ self.c.setopt(pycurl.HTTPGET, 1)
+ self.c.perform()
+ rep = self.header
+
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.NOBODY, 0)
+
+ else:
+ self.c.perform()
+ rep = self.getResponse()
+
+ self.c.setopt(pycurl.POSTFIELDS, "")
+ self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
+ self.code = self.verifyHeader()
+
+ self.addCookies()
+
+ if decode:
+ rep = self.decodeResponse(rep)
+
+ return rep
+
+ def verifyHeader(self):
+ """ raise an exceptions on bad headers """
+ code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
+ if code in bad_headers:
+ #404 will NOT raise an exception
+ raise BadHeader(code, self.getResponse())
+ return code
+
+ def checkHeader(self):
+ """ check if header indicates failure"""
+ return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers
+
+ def getResponse(self):
+ """ retrieve response from string io """
+ if self.rep is None: return ""
+ value = self.rep.getvalue()
+ self.rep.close()
+ self.rep = StringIO()
+ return value
+
+ def decodeResponse(self, rep):
+ """ decode with correct encoding, relies on header """
+ header = self.header.splitlines()
+ encoding = "utf8" # default encoding
+
+ for line in header:
+ line = line.lower().replace(" ", "")
+ if not line.startswith("content-type:") or\
+ ("text" not in line and "application" not in line):
+ continue
+
+ none, delemiter, charset = line.rpartition("charset=")
+ if delemiter:
+ charset = charset.split(";")
+ if charset:
+ encoding = charset[0]
+
+ try:
+ #self.log.debug("Decoded %s" % encoding )
+ if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8):
+ encoding = 'utf-8-sig'
+
+ decoder = getincrementaldecoder(encoding)("replace")
+ rep = decoder.decode(rep, True)
+
+ #TODO: html_unescape as default
+
+ except LookupError:
+ self.log.debug("No Decoder foung for %s" % encoding)
+ except Exception:
+ self.log.debug("Error when decoding string from %s." % encoding)
+
+ return rep
+
+ def write(self, buf):
+ """ writes response """
+ if self.rep.tell() > 1000000 or self.abort:
+ rep = self.getResponse()
+ if self.abort: raise Abort()
+ f = open("response.dump", "wb")
+ f.write(rep)
+ f.close()
+ raise Exception("Loaded Url exceeded limit")
+
+ self.rep.write(buf)
+
+ def writeHeader(self, buf):
+ """ writes header """
+ self.header += buf
+
+ def putHeader(self, name, value):
+ self.headers.append("%s: %s" % (name, value))
+
+ def clearHeaders(self):
+ self.headers = []
+
+ def close(self):
+ """ cleanup, unusable after this """
+ self.rep.close()
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "c"):
+ self.c.close()
+ del self.c
diff --git a/pyload/network/RequestFactory.py b/pyload/network/RequestFactory.py
new file mode 100644
index 000000000..6811b11d8
--- /dev/null
+++ b/pyload/network/RequestFactory.py
@@ -0,0 +1,126 @@
+# -*- coding: utf-8 -*-
+
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: mkaay, RaNaN
+"""
+
+from threading import Lock
+
+from Browser import Browser
+from Bucket import Bucket
+from HTTPRequest import HTTPRequest
+from CookieJar import CookieJar
+
+from XDCCRequest import XDCCRequest
+
+class RequestFactory:
+ def __init__(self, core):
+ self.lock = Lock()
+ self.core = core
+ self.bucket = Bucket()
+ self.updateBucket()
+ self.cookiejars = {}
+
+ def iface(self):
+ return self.core.config["download"]["interface"]
+
+ def getRequest(self, pluginName, account=None, type="HTTP"):
+ self.lock.acquire()
+
+ if type == "XDCC":
+ return XDCCRequest(proxies=self.getProxies())
+
+ req = Browser(self.bucket, self.getOptions())
+
+ if account:
+ cj = self.getCookieJar(pluginName, account)
+ req.setCookieJar(cj)
+ else:
+ req.setCookieJar(CookieJar(pluginName))
+
+ self.lock.release()
+ return req
+
+ def getHTTPRequest(self, **kwargs):
+ """ returns a http request, dont forget to close it ! """
+ options = self.getOptions()
+ options.update(kwargs) # submit kwargs as additional options
+ return HTTPRequest(CookieJar(None), options)
+
+ def getURL(self, *args, **kwargs):
+ """ see HTTPRequest for argument list """
+ h = HTTPRequest(None, self.getOptions())
+ try:
+ rep = h.load(*args, **kwargs)
+ finally:
+ h.close()
+
+ return rep
+
+ def getCookieJar(self, pluginName, account=None):
+ if (pluginName, account) in self.cookiejars:
+ return self.cookiejars[(pluginName, account)]
+
+ cj = CookieJar(pluginName, account)
+ self.cookiejars[(pluginName, account)] = cj
+ return cj
+
+ def getProxies(self):
+ """ returns a proxy list for the request classes """
+ if not self.core.config["proxy"]["proxy"]:
+ return {}
+ else:
+ type = "http"
+ setting = self.core.config["proxy"]["type"].lower()
+ if setting == "socks4": type = "socks4"
+ elif setting == "socks5": type = "socks5"
+
+ username = None
+ if self.core.config["proxy"]["username"] and self.core.config["proxy"]["username"].lower() != "none":
+ username = self.core.config["proxy"]["username"]
+
+ pw = None
+ if self.core.config["proxy"]["password"] and self.core.config["proxy"]["password"].lower() != "none":
+ pw = self.core.config["proxy"]["password"]
+
+ return {
+ "type": type,
+ "address": self.core.config["proxy"]["address"],
+ "port": self.core.config["proxy"]["port"],
+ "username": username,
+ "password": pw,
+ }
+
+ def getOptions(self):
+ """returns options needed for pycurl"""
+ return {"interface": self.iface(),
+ "proxies": self.getProxies(),
+ "ipv6": self.core.config["download"]["ipv6"]}
+
+ def updateBucket(self):
+ """ set values in the bucket according to settings"""
+ if not self.core.config["download"]["limit_speed"]:
+ self.bucket.setRate(-1)
+ else:
+ self.bucket.setRate(self.core.config["download"]["max_speed"] * 1024)
+
+# needs pyreq in global namespace
+def getURL(*args, **kwargs):
+ return pyreq.getURL(*args, **kwargs)
+
+
+def getRequest(*args, **kwargs):
+ return pyreq.getHTTPRequest()
diff --git a/pyload/network/XDCCRequest.py b/pyload/network/XDCCRequest.py
new file mode 100644
index 000000000..9ae52f72b
--- /dev/null
+++ b/pyload/network/XDCCRequest.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: jeix
+"""
+
+import socket
+import re
+
+from os import remove
+from os.path import exists
+
+from time import time
+
+import struct
+from select import select
+
+from pyload.plugins.Plugin import Abort
+
+
+class XDCCRequest:
+ def __init__(self, timeout=30, proxies={}):
+
+ self.proxies = proxies
+ self.timeout = timeout
+
+ self.filesize = 0
+ self.recv = 0
+ self.speed = 0
+
+ self.abort = False
+
+ def createSocket(self):
+ # proxytype = None
+ # proxy = None
+ # if self.proxies.has_key("socks5"):
+ # proxytype = socks.PROXY_TYPE_SOCKS5
+ # proxy = self.proxies["socks5"]
+ # elif self.proxies.has_key("socks4"):
+ # proxytype = socks.PROXY_TYPE_SOCKS4
+ # proxy = self.proxies["socks4"]
+ # if proxytype:
+ # sock = socks.socksocket()
+ # t = _parse_proxy(proxy)
+ # sock.setproxy(proxytype, addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2])
+ # else:
+ # sock = socket.socket()
+ # return sock
+
+ return socket.socket()
+
+ def download(self, ip, port, filename, irc, progressNotify=None):
+
+ ircbuffer = ""
+ lastUpdate = time()
+ cumRecvLen = 0
+
+ dccsock = self.createSocket()
+
+ dccsock.settimeout(self.timeout)
+ dccsock.connect((ip, port))
+
+ if exists(filename):
+ i = 0
+ nameParts = filename.rpartition(".")
+ while True:
+ newfilename = "%s-%d%s%s" % (nameParts[0], i, nameParts[1], nameParts[2])
+ i += 1
+
+ if not exists(newfilename):
+ filename = newfilename
+ break
+
+ fh = open(filename, "wb")
+
+ # recv loop for dcc socket
+ while True:
+ if self.abort:
+ dccsock.close()
+ fh.close()
+ remove(filename)
+ raise Abort()
+
+ self._keepAlive(irc, ircbuffer)
+
+ data = dccsock.recv(4096)
+ dataLen = len(data)
+ self.recv += dataLen
+
+ cumRecvLen += dataLen
+
+ now = time()
+ timespan = now - lastUpdate
+ if timespan > 1:
+ self.speed = cumRecvLen / timespan
+ cumRecvLen = 0
+ lastUpdate = now
+
+ if progressNotify:
+ progressNotify(self.percent)
+
+ if not data:
+ break
+
+ fh.write(data)
+
+ # acknowledge data by sending number of recceived bytes
+ dccsock.send(struct.pack('!I', self.recv))
+
+ dccsock.close()
+ fh.close()
+
+ return filename
+
+ def _keepAlive(self, sock, readbuffer):
+ fdset = select([sock], [], [], 0)
+ if sock not in fdset[0]:
+ return
+
+ readbuffer += sock.recv(1024)
+ temp = readbuffer.split("\n")
+ readbuffer = temp.pop()
+
+ for line in temp:
+ line = line.rstrip()
+ first = line.split()
+ if first[0] == "PING":
+ sock.send("PONG %s\r\n" % first[1])
+
+ def abortDownloads(self):
+ self.abort = True
+
+ @property
+ def size(self):
+ return self.filesize
+
+ @property
+ def arrived(self):
+ return self.recv
+
+ @property
+ def percent(self):
+ if not self.filesize: return 0
+ return (self.recv * 100) / self.filesize
+
+ def close(self):
+ pass
diff --git a/pyload/network/__init__.py b/pyload/network/__init__.py
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/pyload/network/__init__.py
@@ -0,0 +1 @@
+