summaryrefslogtreecommitdiffstats
path: root/pyload/plugins/network
diff options
context:
space:
mode:
Diffstat (limited to 'pyload/plugins/network')
-rw-r--r--pyload/plugins/network/CurlChunk.py299
-rw-r--r--pyload/plugins/network/CurlDownload.py323
-rw-r--r--pyload/plugins/network/CurlRequest.py314
-rw-r--r--pyload/plugins/network/DefaultRequest.py9
-rw-r--r--pyload/plugins/network/__init__.py1
5 files changed, 946 insertions, 0 deletions
diff --git a/pyload/plugins/network/CurlChunk.py b/pyload/plugins/network/CurlChunk.py
new file mode 100644
index 000000000..4250db2ce
--- /dev/null
+++ b/pyload/plugins/network/CurlChunk.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###############################################################################
+# Copyright(c) 2008-2012 pyLoad Team
+# http://www.pyload.org
+#
+# This file is part of pyLoad.
+# pyLoad is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Subjected to the terms and conditions in LICENSE
+#
+# @author: RaNaN
+###############################################################################
+
+from os import remove, stat, fsync
+from os.path import exists
+from time import sleep
+from re import search
+
+import codecs
+import pycurl
+
+from pyload.utils import remove_chars
+from pyload.utils.fs import fs_encode
+
+from CurlRequest import CurlRequest
+
+class WrongFormat(Exception):
+ pass
+
+
+class ChunkInfo():
+ def __init__(self, name):
+ self.name = unicode(name)
+ self.size = 0
+ self.resume = False
+ self.chunks = []
+
+ def __repr__(self):
+ ret = "ChunkInfo: %s, %s\n" % (self.name, self.size)
+ for i, c in enumerate(self.chunks):
+ ret += "%s# %s\n" % (i, c[1])
+
+ return ret
+
+ def setSize(self, size):
+ self.size = int(size)
+
+ def addChunk(self, name, range):
+ self.chunks.append((name, range))
+
+ def clear(self):
+ self.chunks = []
+
+ def createChunks(self, chunks):
+ self.clear()
+ chunk_size = self.size / chunks
+
+ current = 0
+ for i in range(chunks):
+ end = self.size - 1 if (i == chunks - 1) else current + chunk_size
+ self.addChunk("%s.chunk%s" % (self.name, i), (current, end))
+ current += chunk_size + 1
+
+
+ def save(self):
+ fs_name = fs_encode("%s.chunks" % self.name)
+ fh = codecs.open(fs_name, "w", "utf_8")
+ fh.write("name:%s\n" % self.name)
+ fh.write("size:%s\n" % self.size)
+ for i, c in enumerate(self.chunks):
+ fh.write("#%d:\n" % i)
+ fh.write("\tname:%s\n" % c[0])
+ fh.write("\trange:%i-%i\n" % c[1])
+ fh.close()
+
+ @staticmethod
+ def load(name):
+ fs_name = fs_encode("%s.chunks" % name)
+ if not exists(fs_name):
+ raise IOError()
+ fh = codecs.open(fs_name, "r", "utf_8")
+ name = fh.readline()[:-1]
+ size = fh.readline()[:-1]
+ if name.startswith("name:") and size.startswith("size:"):
+ name = name[5:]
+ size = size[5:]
+ else:
+ fh.close()
+ raise WrongFormat()
+ ci = ChunkInfo(name)
+ ci.loaded = True
+ ci.setSize(size)
+ while True:
+ if not fh.readline(): #skip line
+ break
+ name = fh.readline()[1:-1]
+ range = fh.readline()[1:-1]
+ if name.startswith("name:") and range.startswith("range:"):
+ name = name[5:]
+ range = range[6:].split("-")
+ else:
+ raise WrongFormat()
+
+ ci.addChunk(name, (long(range[0]), long(range[1])))
+ fh.close()
+ return ci
+
+ def remove(self):
+ fs_name = fs_encode("%s.chunks" % self.name)
+ if exists(fs_name): remove(fs_name)
+
+ def getCount(self):
+ return len(self.chunks)
+
+ def getChunkName(self, index):
+ return self.chunks[index][0]
+
+ def getChunkRange(self, index):
+ return self.chunks[index][1]
+
+
+class CurlChunk(CurlRequest):
+ def __init__(self, id, parent, range=None, resume=False):
+ self.setContext(*parent.getContext())
+
+ self.id = id
+ self.p = parent # CurlDownload instance
+ self.range = range # tuple (start, end)
+ self.resume = resume
+ self.log = parent.log
+
+ self.size = range[1] - range[0] if range else -1
+ self.arrived = 0
+ self.lastURL = self.p.referer
+
+ self.c = pycurl.Curl()
+
+ self.header = ""
+ self.headerParsed = False #indicates if the header has been processed
+
+ self.fp = None #file handle
+
+ self.initContext()
+
+ self.BOMChecked = False # check and remove byte order mark
+
+ self.rep = None
+
+ self.sleep = 0.000
+ self.lastSize = 0
+
+ def __repr__(self):
+ return "<CurlChunk id=%d, size=%d, arrived=%d>" % (self.id, self.size, self.arrived)
+
+ @property
+ def cj(self):
+ return self.p.context
+
+ def getHandle(self):
+ """ returns a Curl handle ready to use for perform/multiperform """
+
+ self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.cj)
+ self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ # request all bytes, since some servers in russia seems to have a defect arihmetic unit
+
+ fs_name = fs_encode(self.p.info.getChunkName(self.id))
+ if self.resume:
+ self.fp = open(fs_name, "ab")
+ self.arrived = self.fp.tell()
+ if not self.arrived:
+ self.arrived = stat(fs_name).st_size
+
+ if self.range:
+ #do nothing if chunk already finished
+ if self.arrived + self.range[0] >= self.range[1]: return None
+
+ if self.id == len(self.p.info.chunks) - 1: #as last chunk dont set end range, so we get everything
+ range = "%i-" % (self.arrived + self.range[0])
+ else:
+ range = "%i-%i" % (self.arrived + self.range[0], min(self.range[1] + 1, self.p.size - 1))
+
+ self.log.debug("Chunked resume with range %s" % range)
+ self.c.setopt(pycurl.RANGE, range)
+ else:
+ self.log.debug("Resume File from %i" % self.arrived)
+ self.c.setopt(pycurl.RESUME_FROM, self.arrived)
+
+ else:
+ if self.range:
+ if self.id == len(self.p.info.chunks) - 1: # see above
+ range = "%i-" % self.range[0]
+ else:
+ range = "%i-%i" % (self.range[0], min(self.range[1] + 1, self.p.size - 1))
+
+ self.log.debug("Chunked with range %s" % range)
+ self.c.setopt(pycurl.RANGE, range)
+
+ self.fp = open(fs_name, "wb")
+
+ return self.c
+
+ def writeHeader(self, buf):
+ self.header += buf
+ #@TODO forward headers?, this is possibly unneeded, when we just parse valid 200 headers
+ # as first chunk, we will parse the headers
+ if not self.range and self.header.endswith("\r\n\r\n"):
+ self.parseHeader()
+ elif not self.range and buf.startswith("150") and "data connection" in buf: #ftp file size parsing
+ size = search(r"(\d+) bytes", buf)
+ if size:
+ self.p._size = int(size.group(1))
+ self.p.chunkSupport = True
+
+ self.headerParsed = True
+
+ def writeBody(self, buf):
+ #ignore BOM, it confuses unrar
+ if not self.BOMChecked:
+ if [ord(b) for b in buf[:3]] == [239, 187, 191]:
+ buf = buf[3:]
+ self.BOMChecked = True
+
+ size = len(buf)
+
+ self.arrived += size
+
+ self.fp.write(buf)
+
+ if self.p.bucket:
+ sleep(self.p.bucket.consumed(size))
+ else:
+ # Avoid small buffers, increasing sleep time slowly if buffer size gets smaller
+ # otherwise reduce sleep time percentile (values are based on tests)
+ # So in general cpu time is saved without reducing bandwidth too much
+
+ if size < self.lastSize:
+ self.sleep += 0.002
+ else:
+ self.sleep *= 0.7
+
+ self.lastSize = size
+
+ sleep(self.sleep)
+
+ if self.range and self.arrived > self.size:
+ return 0 #close if we have enough data
+
+
+ def parseHeader(self):
+ """parse data from received header"""
+ for orgline in self.decodeResponse(self.header).splitlines():
+ line = orgline.strip().lower()
+ if line.startswith("accept-ranges") and "bytes" in line:
+ self.p.chunkSupport = True
+
+ if "content-disposition" in line:
+
+ m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", line)
+ if m:
+ name = remove_chars(m.groupdict()['name'], "\"';/").strip()
+ self.p._name = name
+ self.log.debug("Content-Disposition: %s" % name)
+
+ if not self.resume and line.startswith("content-length"):
+ self.p._size = int(line.split(":")[1])
+
+ self.headerParsed = True
+
+ def stop(self):
+ """The download will not proceed after next call of writeBody"""
+ self.range = [0,0]
+ self.size = 0
+
+ def resetRange(self):
+ """ Reset the range, so the download will load all data available """
+ self.range = None
+
+ def setRange(self, range):
+ self.range = range
+ self.size = range[1] - range[0]
+
+ def flushFile(self):
+ """ flush and close file """
+ self.fp.flush()
+ fsync(self.fp.fileno()) #make sure everything was written to disk
+ self.fp.close() #needs to be closed, or merging chunks will fail
+
+ def close(self):
+ """ closes everything, unusable after this """
+ if self.fp: self.fp.close()
+ self.c.close()
+ if hasattr(self, "p"): del self.p
diff --git a/pyload/plugins/network/CurlDownload.py b/pyload/plugins/network/CurlDownload.py
new file mode 100644
index 000000000..5de83ec7b
--- /dev/null
+++ b/pyload/plugins/network/CurlDownload.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+###############################################################################
+# Copyright(c) 2008-2012 pyLoad Team
+# http://www.pyload.org
+#
+# This file is part of pyLoad.
+# pyLoad is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Subjected to the terms and conditions in LICENSE
+#
+# @author: RaNaN
+###############################################################################
+
+from os import remove
+from os.path import dirname
+from time import time
+from shutil import move
+
+import pycurl
+
+from pyload.plugins.Base import Abort
+from pyload.utils.fs import save_join, fs_encode
+
+from ..Download import Download
+from CurlChunk import ChunkInfo, CurlChunk
+from CurlRequest import ResponseException
+
+# TODO: save content-disposition for resuming
+
+class CurlDownload(Download):
+ """ loads an url, http + ftp supported """
+
+ # def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None,
+ # options={}, disposition=False):
+
+ def __init__(self, *args, **kwargs):
+ Download.__init__(self, *args, **kwargs)
+
+ self.path = None
+ self.disposition = False
+
+ self.chunks = []
+ self.chunkSupport = None
+
+ self.m = pycurl.CurlMulti()
+
+ #needed for speed calculation
+ self.lastArrived = []
+ self.speeds = []
+ self.lastSpeeds = [0, 0]
+
+ @property
+ def speed(self):
+ last = [sum(x) for x in self.lastSpeeds if x]
+ return (sum(self.speeds) + sum(last)) / (1 + len(last))
+
+ @property
+ def arrived(self):
+ return sum(c.arrived for c in self.chunks) if self.chunks else self._size
+
+ @property
+ def name(self):
+ return self._name if self.disposition else None
+
+ def _copyChunks(self):
+ init = fs_encode(self.info.getChunkName(0)) #initial chunk name
+
+ if self.info.getCount() > 1:
+ fo = open(init, "rb+") #first chunkfile
+ for i in range(1, self.info.getCount()):
+ #input file
+ fo.seek(
+ self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks
+ fname = fs_encode("%s.chunk%d" % (self.path, i))
+ fi = open(fname, "rb")
+ buf = 32 * 1024
+ while True: #copy in chunks, consumes less memory
+ data = fi.read(buf)
+ if not data:
+ break
+ fo.write(data)
+ fi.close()
+ if fo.tell() < self.info.getChunkRange(i)[1]:
+ fo.close()
+ remove(init)
+ self.info.remove() #there are probably invalid chunks
+ raise Exception("Downloaded content was smaller than expected. Try to reduce download connections.")
+ remove(fname) #remove chunk
+ fo.close()
+
+ if self.name:
+ self.filename = save_join(dirname(self.path), self.name)
+
+ move(init, fs_encode(self.path))
+ self.info.remove() #remove info file
+
+ def checkResume(self):
+ try:
+ self.info = ChunkInfo.load(self.path)
+ self.info.resume = True #resume is only possible with valid info file
+ self._size = self.info.size
+ self.infoSaved = True
+ except IOError:
+ self.info = ChunkInfo(self.path)
+
+ def download(self, uri, path, get={}, post={}, referer=True, disposition=False, chunks=1, resume=False):
+ """ returns new filename or None """
+ self.url = uri
+ self.path = path
+ self.disposition = disposition
+ self.get = get
+ self.post = post
+ self.referer = referer
+
+ self.checkResume()
+ chunks = max(1, chunks)
+ resume = self.info.resume and resume
+
+ try:
+ self._download(chunks, resume)
+ except pycurl.error, e:
+ #code 33 - no resume
+ code = e.args[0]
+ if code == 33:
+ # try again without resume
+ self.log.debug("Errno 33 -> Restart without resume")
+
+ #remove old handles
+ for chunk in self.chunks:
+ self.closeChunk(chunk)
+
+ return self._download(chunks, False)
+ else:
+ raise
+ finally:
+ self.close()
+
+ return self.name
+
+ def _download(self, chunks, resume):
+ if not resume:
+ self.info.clear()
+ self.info.addChunk("%s.chunk0" % self.path, (0, 0)) #create an initial entry
+
+ self.chunks = []
+
+ init = CurlChunk(0, self, None, resume) #initial chunk that will load complete file (if needed)
+
+ self.chunks.append(init)
+ self.m.add_handle(init.getHandle())
+
+ lastFinishCheck = 0
+ lastTimeCheck = 0
+ chunksDone = set() # list of curl handles that are finished
+ chunksCreated = False
+ done = False
+ if self.info.getCount() > 1: # This is a resume, if we were chunked originally assume still can
+ self.chunkSupport = True
+
+ while 1:
+ #need to create chunks
+ if not chunksCreated and self.chunkSupport and self.size: #will be set later by first chunk
+
+ if not resume:
+ self.info.setSize(self.size)
+ self.info.createChunks(chunks)
+ self.info.save()
+
+ chunks = self.info.getCount()
+
+ init.setRange(self.info.getChunkRange(0))
+
+ for i in range(1, chunks):
+ c = CurlChunk(i, self, self.info.getChunkRange(i), resume)
+
+ handle = c.getHandle()
+ if handle:
+ self.chunks.append(c)
+ self.m.add_handle(handle)
+ else:
+ #close immediately
+ self.log.debug("Invalid curl handle -> closed")
+ c.close()
+
+ chunksCreated = True
+
+ while 1:
+ ret, num_handles = self.m.perform()
+ if ret != pycurl.E_CALL_MULTI_PERFORM:
+ break
+
+ t = time()
+
+ # reduce these calls
+ # when num_q is 0, the loop is exited
+ while lastFinishCheck + 0.5 < t:
+ # list of failed curl handles
+ failed = []
+ ex = None # save only last exception, we can only raise one anyway
+
+ num_q, ok_list, err_list = self.m.info_read()
+ for c in ok_list:
+ chunk = self.findChunk(c)
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except ResponseException, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(c)
+
+ for c in err_list:
+ curl, errno, msg = c
+ chunk = self.findChunk(curl)
+ #test if chunk was finished
+ if errno != 23 or "0 !=" not in msg:
+ failed.append(chunk)
+ ex = pycurl.error(errno, msg)
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(ex)))
+ continue
+
+ try: # check if the header implies success, else add it to failed list
+ chunk.verifyHeader()
+ except ResponseException, e:
+ self.log.debug("Chunk %d failed: %s" % (chunk.id + 1, str(e)))
+ failed.append(chunk)
+ ex = e
+ else:
+ chunksDone.add(curl)
+ if not num_q: # no more info to get
+
+ # check if init is not finished so we reset download connections
+ # note that other chunks are closed and everything downloaded with initial connection
+ if failed and init not in failed and init.c not in chunksDone:
+ self.log.error(_("Download chunks failed, fallback to single connection | %s" % (str(ex))))
+
+ #list of chunks to clean and remove
+ to_clean = filter(lambda x: x is not init, self.chunks)
+ for chunk in to_clean:
+ self.closeChunk(chunk)
+ self.chunks.remove(chunk)
+ remove(fs_encode(self.info.getChunkName(chunk.id)))
+
+ #let first chunk load the rest and update the info file
+ init.resetRange()
+ self.info.clear()
+ self.info.addChunk("%s.chunk0" % self.filename, (0, self.size))
+ self.info.save()
+ elif failed:
+ raise ex
+
+ lastFinishCheck = t
+
+ if len(chunksDone) >= len(self.chunks):
+ if len(chunksDone) > len(self.chunks):
+ self.log.warning("Finished download chunks size incorrect, please report bug.")
+ done = True #all chunks loaded
+
+ break
+
+ if done:
+ break #all chunks loaded
+
+ # calc speed once per second, averaging over 3 seconds
+ if lastTimeCheck + 1 < t:
+ diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in
+ enumerate(self.chunks)]
+
+ self.lastSpeeds[1] = self.lastSpeeds[0]
+ self.lastSpeeds[0] = self.speeds
+ self.speeds = [float(a) / (t - lastTimeCheck) for a in diff]
+ self.lastArrived = [c.arrived for c in self.chunks]
+ lastTimeCheck = t
+
+ if self.doAbort:
+ raise Abort()
+
+ self.m.select(1)
+
+ for chunk in self.chunks:
+ chunk.flushFile() #make sure downloads are written to disk
+
+ self._copyChunks()
+
+ def findChunk(self, handle):
+ """ linear search to find a chunk (should be ok since chunk size is usually low) """
+ for chunk in self.chunks:
+ if chunk.c == handle: return chunk
+
+ def closeChunk(self, chunk):
+ try:
+ self.m.remove_handle(chunk.c)
+ except pycurl.error, e:
+ self.log.debug("Error removing chunk: %s" % str(e))
+ finally:
+ chunk.close()
+
+ def close(self):
+ """ cleanup """
+ for chunk in self.chunks:
+ self.closeChunk(chunk)
+ else:
+ #Workaround: pycurl segfaults when closing multi, that never had any curl handles
+ if hasattr(self, "m"):
+ c = pycurl.Curl()
+ self.m.add_handle(c)
+ self.m.remove_handle(c)
+ c.close()
+
+ self.chunks = []
+ if hasattr(self, "m"):
+ self.m.close()
+ del self.m
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "info"):
+ del self.info \ No newline at end of file
diff --git a/pyload/plugins/network/CurlRequest.py b/pyload/plugins/network/CurlRequest.py
new file mode 100644
index 000000000..4630403df
--- /dev/null
+++ b/pyload/plugins/network/CurlRequest.py
@@ -0,0 +1,314 @@
+# -*- coding: utf-8 -*-
+
+###############################################################################
+# Copyright(c) 2008-2012 pyLoad Team
+# http://www.pyload.org
+#
+# This file is part of pyLoad.
+# pyLoad is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Subjected to the terms and conditions in LICENSE
+#
+# @author: RaNaN
+###############################################################################
+
+import pycurl
+
+from codecs import getincrementaldecoder, lookup, BOM_UTF8
+from urllib import quote, urlencode
+from httplib import responses
+from cStringIO import StringIO
+
+from pyload.plugins.Base import Abort
+from pyload.network.CookieJar import CookieJar
+
+from ..Request import Request, ResponseException
+
+
+def myquote(url):
+ return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]")
+
+
+def myurlencode(data):
+ data = dict(data)
+ return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \
+ y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems()))
+
+
+bad_headers = range(400, 418) + range(500, 506)
+
+
+class CurlRequest(Request):
+ """ Request class based on libcurl """
+
+ __version__ = "0.1"
+
+ CONTEXT_CLASS = CookieJar
+
+ def __init__(self, *args, **kwargs):
+ self.c = pycurl.Curl()
+ Request.__init__(self, *args, **kwargs)
+
+ self.rep = StringIO()
+ self.lastURL = None
+ self.lastEffectiveURL = None
+
+ # cookiejar defines the context
+ self.cj = self.context
+
+ self.c.setopt(pycurl.WRITEFUNCTION, self.write)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ # TODO: addAuth, addHeader
+
+ def initContext(self):
+ self.initHandle()
+
+ if self.config:
+ self.setInterface(self.config)
+ self.initOptions(self.config)
+
+ def initHandle(self):
+ """ sets common options to curl handle """
+
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.MAXREDIRS, 5)
+ self.c.setopt(pycurl.CONNECTTIMEOUT, 30)
+ self.c.setopt(pycurl.NOSIGNAL, 1)
+ self.c.setopt(pycurl.NOPROGRESS, 1)
+ if hasattr(pycurl, "AUTOREFERER"):
+ self.c.setopt(pycurl.AUTOREFERER, 1)
+ self.c.setopt(pycurl.SSL_VERIFYPEER, 0)
+ # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download
+ self.c.setopt(pycurl.LOW_SPEED_TIME, 45)
+ self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5)
+
+ # don't save the cookies
+ self.c.setopt(pycurl.COOKIEFILE, "")
+ self.c.setopt(pycurl.COOKIEJAR, "")
+
+ #self.c.setopt(pycurl.VERBOSE, 1)
+
+ self.c.setopt(pycurl.USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
+ if pycurl.version_info()[7]:
+ self.c.setopt(pycurl.ENCODING, "gzip, deflate")
+ self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
+ "Accept-Language: en-US,en",
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
+ "Connection: keep-alive",
+ "Keep-Alive: 300",
+ "Expect:"])
+
+ def setInterface(self, options):
+
+ interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"]
+
+ if interface and interface.lower() != "none":
+ self.c.setopt(pycurl.INTERFACE, str(interface))
+
+ if proxy:
+ if proxy["type"] == "socks4":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4)
+ elif proxy["type"] == "socks5":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
+ else:
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
+
+ self.c.setopt(pycurl.PROXY, str(proxy["address"]))
+ self.c.setopt(pycurl.PROXYPORT, proxy["port"])
+
+ if proxy["username"]:
+ self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"])))
+
+ if ipv6:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER)
+ else:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+
+ if "timeout" in options:
+ self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"])
+
+ def initOptions(self, options):
+ """ Sets same options as available in pycurl """
+ for k, v in options.iteritems():
+ if hasattr(pycurl, k):
+ self.c.setopt(getattr(pycurl, k), v)
+
+ def setRequestContext(self, url, get, post, referer, cookies, multipart=False):
+ """ sets everything needed for the request """
+ url = myquote(url)
+
+ if get:
+ get = urlencode(get)
+ url = "%s?%s" % (url, get)
+
+ self.c.setopt(pycurl.URL, url)
+ self.lastURL = url
+
+ if post:
+ self.c.setopt(pycurl.POST, 1)
+ if not multipart:
+ if type(post) == unicode:
+ post = str(post) #unicode not allowed
+ elif type(post) == str:
+ pass
+ else:
+ post = myurlencode(post)
+
+ self.c.setopt(pycurl.POSTFIELDS, post)
+ else:
+ post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()]
+ self.c.setopt(pycurl.HTTPPOST, post)
+ else:
+ self.c.setopt(pycurl.POST, 0)
+
+ if referer and self.lastURL:
+ self.c.setopt(pycurl.REFERER, str(self.lastURL))
+ else:
+ self.c.setopt(pycurl.REFERER, "")
+
+ if cookies:
+ self.c.setopt(pycurl.COOKIELIST, self.cj.output())
+ else:
+ # Magic string that erases all cookies
+ self.c.setopt(pycurl.COOKIELIST, "ALL")
+
+ # TODO: remove auth again
+ if "auth" in self.options:
+ self.c.setopt(pycurl.USERPWD, str(self.options["auth"]))
+
+
+ def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False):
+ """ load and returns a given page """
+
+ self.setRequestContext(url, get, post, referer, cookies, multipart)
+
+ # TODO: use http/rfc message instead
+ self.header = ""
+
+ if "header" in self.options:
+ self.c.setopt(pycurl.HTTPHEADER, self.options["header"])
+
+ if just_header:
+ self.c.setopt(pycurl.FOLLOWLOCATION, 0)
+ self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post?
+
+ # overwrite HEAD request, we want a common request type
+ if post:
+ self.c.setopt(pycurl.CUSTOMREQUEST, "POST")
+ else:
+ self.c.setopt(pycurl.CUSTOMREQUEST, "GET")
+
+ try:
+ self.c.perform()
+ rep = self.header
+ finally:
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.NOBODY, 0)
+ self.c.unsetopt(pycurl.CUSTOMREQUEST)
+
+ else:
+ self.c.perform()
+ rep = self.getResponse()
+
+ self.c.setopt(pycurl.POSTFIELDS, "")
+ self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
+ self.code = self.verifyHeader()
+
+ if cookies:
+ self.parseCookies()
+
+ if decode:
+ rep = self.decodeResponse(rep)
+
+ return rep
+
+ def parseCookies(self):
+ for c in self.c.getinfo(pycurl.INFO_COOKIELIST):
+ #http://xiix.wordpress.com/2006/03/23/mozillafirefox-cookie-format
+ domain, flag, path, secure, expires, name, value = c.split("\t")
+ # http only was added in py 2.6
+ domain = domain.replace("#HttpOnly_", "")
+ self.cj.setCookie(domain, name, value, path, expires, secure)
+
+ def verifyHeader(self):
+ """ raise an exceptions on bad headers """
+ code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
+ if code in bad_headers:
+ raise ResponseException(code, responses.get(code, "Unknown statuscode"))
+ return code
+
+ def getResponse(self):
+ """ retrieve response from string io """
+ if self.rep is None: return ""
+ value = self.rep.getvalue()
+ self.rep.close()
+ self.rep = StringIO()
+ return value
+
+ def decodeResponse(self, rep):
+ """ decode with correct encoding, relies on header """
+ header = self.header.splitlines()
+ encoding = "utf8" # default encoding
+
+ for line in header:
+ line = line.lower().replace(" ", "")
+ if not line.startswith("content-type:") or \
+ ("text" not in line and "application" not in line):
+ continue
+
+ none, delemiter, charset = line.rpartition("charset=")
+ if delemiter:
+ charset = charset.split(";")
+ if charset:
+ encoding = charset[0]
+
+ try:
+ #self.log.debug("Decoded %s" % encoding )
+ if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8):
+ encoding = 'utf-8-sig'
+
+ decoder = getincrementaldecoder(encoding)("replace")
+ rep = decoder.decode(rep, True)
+
+ #TODO: html_unescape as default
+
+ except LookupError:
+ self.log.debug("No Decoder found for %s" % encoding)
+ except Exception:
+ self.log.debug("Error when decoding string from %s." % encoding)
+
+ return rep
+
+ def write(self, buf):
+ """ writes response """
+ if self.rep.tell() > 1000000 or self.doAbort:
+ rep = self.getResponse()
+ if self.doAbort: raise Abort()
+ f = open("response.dump", "wb")
+ f.write(rep)
+ f.close()
+ raise Exception("Loaded Url exceeded limit")
+
+ self.rep.write(buf)
+
+ def writeHeader(self, buf):
+ """ writes header """
+ self.header += buf
+
+ def reset(self):
+ self.cj.clear()
+ self.options.clear()
+
+ def close(self):
+ """ cleanup, unusable after this """
+ self.rep.close()
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "c"):
+ self.c.close()
+ del self.c \ No newline at end of file
diff --git a/pyload/plugins/network/DefaultRequest.py b/pyload/plugins/network/DefaultRequest.py
new file mode 100644
index 000000000..dce486ea5
--- /dev/null
+++ b/pyload/plugins/network/DefaultRequest.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+from CurlRequest import CurlRequest
+from CurlDownload import CurlDownload
+
+__version__ = "0.1"
+
+DefaultRequest = CurlRequest
+DefaultDownload = CurlDownload \ No newline at end of file
diff --git a/pyload/plugins/network/__init__.py b/pyload/plugins/network/__init__.py
new file mode 100644
index 000000000..4b31e848b
--- /dev/null
+++ b/pyload/plugins/network/__init__.py
@@ -0,0 +1 @@
+__author__ = 'christian'