summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-12-27 21:18:29 +0100
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2010-12-27 21:18:29 +0100
commit9509a6444bbb538e136ed899d94aab32be629383 (patch)
treeac8532b20912a3e5be6ff73443520a7f31f5806a
parentencoding fix (diff)
downloadpyload-9509a6444bbb538e136ed899d94aab32be629383.tar.xz
new curl download backend - support for chunked dl, resume
-rw-r--r--module/PluginThread.py24
-rw-r--r--module/PyFile.py14
-rw-r--r--module/ThreadManager.py23
-rw-r--r--module/network/Browser.py196
-rw-r--r--module/network/Bucket.py42
-rw-r--r--module/network/CookieJar.py61
-rw-r--r--module/network/CookieRedirectHandler.py143
-rw-r--r--module/network/HTTPBase.py396
-rw-r--r--module/network/HTTPChunk.py353
-rw-r--r--module/network/HTTPDownload.py475
-rw-r--r--module/network/HTTPRequest.py164
-rw-r--r--module/network/RequestFactory.py12
-rw-r--r--module/network/helper.py156
-rw-r--r--module/network/socks.py442
-rw-r--r--module/plugins/Plugin.py13
-rw-r--r--module/plugins/hoster/BasePlugin.py4
16 files changed, 668 insertions, 1850 deletions
diff --git a/module/PluginThread.py b/module/PluginThread.py
index 3f24db345..f72e94eaf 100644
--- a/module/PluginThread.py
+++ b/module/PluginThread.py
@@ -29,8 +29,7 @@ from sys import exc_info, exc_clear
from types import MethodType
from os.path import join, exists
-from urllib2 import URLError
-from socket import error
+from pycurl import error
from module.plugins.Plugin import Abort
from module.plugins.Plugin import Fail
@@ -215,20 +214,14 @@ class DownloadThread(PluginThread):
continue
except error, e:
- #@TODO determine correct error codes
- if len(e.args) > 1:
- code = e.args[0]
- msg = e.args[1:]
- else:
- code = -1
+ try:
+ code, msg = e.args
+ except:
+ code = 0
msg = e.args
- if "timed out" in msg:
- code = 990
-
- self.m.log.debug("socket error %s: %s" % (code, msg))
- if code in (104, 990):
- self.m.log.warning(_("Couldn't connect to host or connection resetted, waiting 1 minute and retry."))
+ if code in (7, 18, 28, 52, 56):
+ self.m.log.warning(_("Couldn't connect to host or connection resetted waiting 1 minute and retry."))
wait = time() + 60
while time() < wait:
sleep(1)
@@ -241,14 +234,13 @@ class DownloadThread(PluginThread):
self.clean(pyfile)
else:
- pyfile.plugin.req.canContinue = False
self.queue.put(pyfile)
continue
else:
pyfile.setStatus("failed")
- self.m.log.error("socket error %s: %s" % (code, msg))
+ self.m.log.error("pycurl error %s: %s" % (code, msg))
if self.m.core.debug:
print_exc()
self.writeDebugReport(pyfile)
diff --git a/module/PyFile.py b/module/PyFile.py
index c198ce459..648b7e838 100644
--- a/module/PyFile.py
+++ b/module/PyFile.py
@@ -70,7 +70,7 @@ class PyFile():
# database information ends here
self.plugin = None
- self.download = None
+ #self.download = None
self.waitUntil = 0 # time() + time to wait
@@ -212,7 +212,7 @@ class PyFile():
def getSpeed(self):
""" calculates speed """
try:
- return self.download.speed
+ return self.plugin.req.speed
except:
return 0
@@ -226,21 +226,23 @@ class PyFile():
def getBytesLeft(self):
""" gets bytes left """
try:
- return self.download.size - self.download.arrived
+ return self.plugin.req.size - self.plugin.req.arrived
except:
return 0
def getPercent(self):
""" get % of download """
- return self.progress.getPercent()
+ try:
+ return self.plugin.req.percent
+ except:
+ return 0
def getSize(self):
""" get size of download """
if self.size: return self.size
else:
try:
- if not self.download.size: return 0
- return self.download.size
+ return self.plugin.req.size
except:
return 0
diff --git a/module/ThreadManager.py b/module/ThreadManager.py
index 0a2ce674b..c5744cb92 100644
--- a/module/ThreadManager.py
+++ b/module/ThreadManager.py
@@ -26,6 +26,8 @@ from time import sleep
from traceback import print_exc
from random import choice
+import pycurl
+
import PluginThread
from module.network.Request import getURL
@@ -50,7 +52,7 @@ class ThreadManager:
self.reconnecting.clear()
self.downloaded = 0 #number of files downloaded since last cleanup
- #pycurl.global_init(pycurl.GLOBAL_DEFAULT)
+ pycurl.global_init(pycurl.GLOBAL_DEFAULT)
for i in range(0, self.core.config.get("general", "max_downloads")):
self.createThread()
@@ -147,7 +149,6 @@ class ThreadManager:
sleep(1)
ip = self.getIP()
self.core.hookManager.afterReconnecting(ip)
- self.closeAllConnecions()
self.log.info(_("Reconnected, new IP: %s") % ip)
@@ -184,6 +185,16 @@ class ThreadManager:
free[0].put("quit")
+ def cleanPycurl(self):
+ """ make a global curl cleanup (currently ununused """
+ if self.downloadingIds() or self.processingIds():
+ return False
+ pycurl.global_cleanup()
+ pycurl.global_init(pycurl.GLOBAL_DEFAULT)
+ self.downloaded = 0
+ self.log.debug("Cleaned up pycurl")
+ return True
+
#----------------------------------------------------------------------
def assignJob(self):
"""assing a job to a thread if possible"""
@@ -229,8 +240,6 @@ class ThreadManager:
else:
thread = PluginThread.DecrypterThread(self, job)
- def closeAllConnecions(self):
- """closes all connections, when a reconnect was made """
- for pyfile in self.core.files.cache.itervalues():
- if pyfile.plugin and pyfile.plugin.req:
- pyfile.plugin.req.http.closeAll()
+ def cleanup(self):
+ """do global cleanup, should be called when finished with pycurl"""
+ pycurl.global_cleanup()
diff --git a/module/network/Browser.py b/module/network/Browser.py
index 2f0144f81..0bed8e395 100644
--- a/module/network/Browser.py
+++ b/module/network/Browser.py
@@ -1,161 +1,100 @@
-from random import randint
-from helper import *
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
from os.path import join
from logging import getLogger
-import zlib
-from CookieJar import CookieJar
-from HTTPBase import HTTPBase
+from HTTPRequest import HTTPRequest
from HTTPDownload import HTTPDownload
-from FTPBase import FTPDownload
-from XDCCBase import XDCCDownload
-from traceback import print_stack
class Browser(object):
- def __init__(self, interface=None, cookieJar=None, bucket=None, proxies={}):
+ def __init__(self, interface=None, cj=None, bucket=None, proxies={}):
self.log = getLogger("log")
- self.lastURL = None
self.interface = interface
+ self.cj = cj
self.bucket = bucket
-
- self.http = HTTPBase(interface=interface, proxies=proxies)
- self.setCookieJar(cookieJar)
self.proxies = proxies
- self.abort = property(lambda: False, lambda val: self.abortDownloads() if val else None)
-
- self.downloadConnections = []
- lastEffectiveURL = property(lambda self: self.lastURL) #@backward comp, @TODO real last effective url
+ self._size = 0
+
+ self.http = HTTPRequest(cj, interface, proxies)
+ self.dl = None
+
+ lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL)
- def setCookieJar(self, cookieJar):
- self.cookieJar = cookieJar
- self.http.cookieJar = self.cookieJar
+ def setCookieJar(self, cj):
+ self.cj = cj
+ self.http.cj = cj
+
+ @property
+ def speed(self):
+ if self.dl:
+ return self.dl.speed
+ return 0
+
+ @property
+ def size(self):
+ if self._size:
+ return self._size
+ if self.dl:
+ return self.dl.size
+ return 0
+
+ @property
+ def arrived(self):
+ if self.dl:
+ return self.dl.arrived
+ return 0
+
+ @property
+ def percent(self):
+ if not self.size: return 0
+ return (self.arrived * 100) / self.size
def clearCookies(self):
- self.cookieJar.clear()
+ if self.cj:
+ self.cj.clear()
def clearReferer(self):
self.lastURL = None
- def getPage(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}):
- if not referer:
- referer = self.lastURL
- self.http.followRedirect = True
- resp = self.http.getResponse(url, get=get, post=post, referer=referer, cookies=cookies,
- customHeaders=customHeaders)
- data = resp.read()
- try:
- if resp.info()["Content-Encoding"] == "gzip":
- data = zlib.decompress(data, 16 + zlib.MAX_WBITS)
- elif resp.info()["Content-Encoding"] == "deflate":
- data = zlib.decompress(data, -zlib.MAX_WBITS)
- except:
- pass
-
- try:
- content_type = resp.info()["Content-Type"]
- infos = [info.strip() for info in content_type.split(";")]
- charset = None
- for info in infos:
- if info.startswith("charset"):
- none, charset = info.split("=")
- if charset:
- data = data.decode(charset)
- except Exception, e:
- self.log.debug("Could not decode charset: %s" % e)
-
- self.lastURL = resp.geturl()
- return data
-
- def getRedirectLocation(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}):
- if not referer:
- referer = self.lastURL
- self.http.followRedirect = False
- resp = self.http.getResponse(url, get=get, post=post, referer=referer, cookies=cookies,
- customHeaders=customHeaders)
- resp.close()
- self.lastURL = resp.geturl()
- location = None
- try:
- location = resp.info()["Location"]
- except:
- pass
- return location
-
- def _removeConnection(self, *args, **kwargs):
- i = self.downloadConnections.index(args[-1])
- self.downloadConnections[i].download.clean()
- del self.downloadConnections[i]
-
def abortDownloads(self):
- for d in self.downloadConnections:
- d.download.setAbort(True)
- d.abort = True
+ self.http.abort = True
+ if self.dl:
+ self.dl.abort = True
+
+ def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False):
+ self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None,
+ self.cj if cookies else None, self.bucket, self.interface,
+ self.proxies)
+ self.dl.download(chunks, resume)
+ self._size = self.dl.size
+
+ self.dl.clean()
+ self.dl = None
- @property
- def speed(self):
- speed = 0
- for d in self.downloadConnections:
- speed += d.speed
- return speed
-
- def httpDownload(self, url, filename, get={}, post={}, referer=None, cookies=True, customHeaders={}, chunks=1,
- resume=False):
- if not referer:
- referer = self.lastURL
-
- dwnld = HTTPDownload(url, filename, get=get, post=post, referer=referer, cookies=cookies,
- customHeaders=customHeaders, bucket=self.bucket, interface=self.interface,
- proxies=self.proxies)
- dwnld.cookieJar = self.cookieJar
-
- d = dwnld.download(chunks=chunks, resume=resume)
- self.downloadConnections.append(d)
- d.addCallback(self._removeConnection, d)
- d.addErrback(self._removeConnection, d)
- return d
-
- def ftpDownload(self, url, filename, resume=False):
- dwnld = FTPDownload(url, filename, bucket=self.bucket, interface=self.interface, proxies=self.proxies)
-
- d = dwnld.download(resume=resume)
- self.downloadConnections.append(d)
- d.addCallback(self._removeConnection, d)
- return d
-
- def xdccDownload(self, server, port, channel, bot, pack, filename, nick="pyload_%d" % randint(1000, 9999),
- ident="pyload", real="pyloadreal"):
- dwnld = XDCCDownload(server, port, channel, bot, pack, nick, ident, real, filename, bucket=self.bucket,
- interface=self.interface, proxies=self.proxies)
-
- d = dwnld.download()
- self.downloadConnections.append(d)
- d.addCallback(self._removeConnection, d)
- return d
-
- def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, no_post_encode=False, raw_cookies={}):
- self.log.warning("Browser: deprecated call 'load'")
- print_stack()
- return self.getPage(url, get=get, post=post, cookies=cookies)
def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False):
- #@TODO
self.log.warning("Browser: deprecated call 'download'")
- print_stack()
- filename = join(folder, file_name)
- d = self.httpDownload(url, filename, get, post)
- waitFor(d)
+ return self.httpDownload(url, join(folder, file_name), get, post, ref, cookies)
+
- return filename
+ def getPage(self, url, get={}, post={}, ref=True, cookies=True):
+ """ retrieves page """
+ return self.http.load(url, get, post, ref, cookies)
def clean(self):
""" cleanup """
if hasattr(self, "http"):
- self.http.clean()
+ self.http.close()
del self.http
+ if hasattr(self, "dl"):
+ del self.dl
+ if hasattr(self, "cj"):
+ del self.cj
if __name__ == "__main__":
browser = Browser()#proxies={"socks5": "localhost:5000"})
@@ -167,8 +106,5 @@ if __name__ == "__main__":
#browser.getPage("https://encrypted.google.com/")
#browser.getPage("http://google.com/search?q=bar")
- browser.httpDownload("http://speedtest.netcologne.de/test_100mb.bin", "test_100mb.bin")
- from time import sleep
+ browser.httpDownload("http://speedtest.netcologne.de/test_10mb.bin", "test_10mb.bin")
- while True:
- sleep(1)
diff --git a/module/network/Bucket.py b/module/network/Bucket.py
index 434cbe662..dc1280ede 100644
--- a/module/network/Bucket.py
+++ b/module/network/Bucket.py
@@ -14,40 +14,44 @@
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
- @author: mkaay
+ @author: RaNaN
"""
-from time import time, sleep
+from time import time
from threading import Lock
class Bucket:
def __init__(self):
- self.content = 0
self.rate = 0
- self.lastDrip = time()
+ self.tokens = 0
+ self.timestamp = time()
self.lock = Lock()
def setRate(self, rate):
self.lock.acquire()
self.rate = rate
self.lock.release()
-
- def add(self, amount):
+
+ def consumed(self, amount):
+ """ return time the process have to sleep, after consumed specified amount """
self.lock.acquire()
- self.drip()
- allowable = min(amount, self.rate - self.content)
- if allowable > 0:
- sleep(0.005)
- self.content += allowable
+ self.calc_tokens()
+ self.tokens -= amount
+
+ if self.tokens < 0:
+ time = -self.tokens/float(self.rate)
+ else:
+ time = 0
+
+
self.lock.release()
- return allowable
+ return time
- def drip(self):
- if self.rate:
+ def calc_tokens(self):
+ if self.tokens < self.rate:
now = time()
- deltaT = now - self.lastDrip
- self.content = long(max(0, self.content - deltaT * self.rate))
- self.lastDrip = now
- else:
- self.content = 0
+ delta = self.rate * (now - self.timestamp)
+ self.tokens = min(self.rate, self.tokens + delta)
+ self.timestamp = now
+
diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py
index b2cbba504..c9ae6cb6c 100644
--- a/module/network/CookieJar.py
+++ b/module/network/CookieJar.py
@@ -17,51 +17,34 @@
@author: mkaay, RaNaN
"""
-from cookielib import CookieJar as PyCookieJar
-from cookielib import Cookie
from time import time
-class CookieJar(PyCookieJar):
- def __init__(self, pluginName=None, account=None):
- PyCookieJar.__init__(self)
- self.plugin = pluginName
+class CookieJar():
+ def __init__(self, pluginname, account=None):
+ self.cookies = {}
+ self.plugin = pluginname
self.account = account
- def getCookie(self, name):
- print "getCookie not implemented!"
- return None
-
- def setCookie(self, domain, name, value, path="/"):
- c = Cookie(version=0, name=name, value=value, port=None, port_specified=False,
- domain=domain, domain_specified=False,
- domain_initial_dot=(domain.startswith(".")), path=path, path_specified=True,
- secure=False, expires=None, discard=True, comment=None,
- comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
- self.set_cookie(c)
-
- def add_cookie_header(self, request):
- self._cookies_lock.acquire()
- try:
+ def addCookies(self, clist):
+ for c in clist:
+ name = c.split("\t")[5]
+ self.cookies[name] = c
- self._policy._now = self._now = int(time())
+ def getCookies(self):
+ return self.cookies.values()
- cookies = self._cookies_for_request(request)
+ def parseCookie(self, name):
+ if self.cookies.has_key(name):
+ return self.cookies[name].split("\t")[6]
+ else:
+ return None
- attrs = self._cookie_attrs(cookies)
- if attrs:
- if not request.has_header("Cookie"):
- request.add_header(
- "Cookie", "; ".join(attrs))
-
- # if necessary, advertise that we know RFC 2965
- if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
- not request.has_header("Cookie2")):
- for cookie in cookies:
- if cookie.version != 1:
- request.add_header("Cookie2", '$Version="1"')
- break
+ def getCookie(self, name):
+ return self.parseCookie(name)
- finally:
- self._cookies_lock.release()
+ def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180):
+ s = ".%s TRUE %s FALSE %s %s %s" % (domain, path, exp, name, value)
+ self.cookies[name] = s
- self.clear_expired_cookies()
+ def clear(self):
+ self.cookies = {} \ No newline at end of file
diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py
deleted file mode 100644
index b7c4c953e..000000000
--- a/module/network/CookieRedirectHandler.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>.
-
- @author: mkaay, RaNaN
-"""
-
-from urllib2 import BaseHandler, HTTPError
-from urllib import addinfourl
-from urllib2 import Request
-from urlparse import urlparse, urlunparse, urljoin
-from CookieJar import CookieJar
-
-class CookieRedirectHandler(BaseHandler):
- # maximum number of redirections to any single URL
- # this is needed because of the state that cookies introduce
- max_repeats = 4
- # maximum total number of redirections (regardless of URL) before
- # assuming we're in a loop
- max_redirections = 10
-
- def __init__(self, cookiejar=None, follow=True):
- if cookiejar is None:
- cookiejar = CookieJar()
- self.cookiejar = cookiejar
- self.follow = follow
-
- def http_request(self, request):
- self.cookiejar.add_cookie_header(request)
- return request
-
- def http_response(self, request, response):
- self.cookiejar.extract_cookies(response, request)
- return response
-
- def redirect_request(self, req, fp, code, msg, headers, newurl):
- """Return a Request or None in response to a redirect.
-
- This is called by the http_error_30x methods when a
- redirection response is received. If a redirection should
- take place, return a new Request to allow http_error_30x to
- perform the redirect. Otherwise, raise HTTPError if no-one
- else should try to handle this url. Return None if you can't
- but another Handler might.
- """
- m = req.get_method()
- if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
- or code in (301, 302, 303) and m == "POST"):
- # Strictly (according to RFC 2616), 301 or 302 in response
- # to a POST MUST NOT cause a redirection without confirmation
- # from the user (of urllib2, in this case). In practice,
- # essentially all clients do redirect in this case, so we
- # do the same.
- # be conciliant with URIs containing a space
- newurl = newurl.replace(' ', '%20')
- newheaders = dict((k,v) for k,v in req.headers.items()
- if k.lower() not in ("content-length", "content-type", "cookie")
- )
- req = Request(newurl,
- headers=newheaders,
- origin_req_host=req.get_origin_req_host(),
- unverifiable=True)
- self.cookiejar.add_cookie_header(req)
- return req
- else:
- raise HTTPError(req.get_full_url(), code, msg, headers, fp)
-
- # Implementation note: To avoid the server sending us into an
- # infinite loop, the request object needs to track what URLs we
- # have already seen. Do this by adding a handler-specific
- # attribute to the Request object.
- def http_error_302(self, req, fp, code, msg, headers):
- resp = addinfourl(fp, headers, req.get_full_url())
- resp.code = code
- resp.msg = msg
- self.cookiejar.extract_cookies(resp, req)
-
- if not self.follow:
- return resp
-
- # Some servers (incorrectly) return multiple Location headers
- # (so probably same goes for URI). Use first header.
- if 'location' in headers:
- newurl = headers.getheaders('location')[0]
- elif 'uri' in headers:
- newurl = headers.getheaders('uri')[0]
- else:
- return
-
- # fix a possible malformed URL
- urlparts = urlparse(newurl)
- if not urlparts.path:
- urlparts = list(urlparts)
- urlparts[2] = "/"
- newurl = urlunparse(urlparts)
-
- newurl = urljoin(req.get_full_url(), newurl)
-
- # XXX Probably want to forget about the state of the current
- # request, although that might interact poorly with other
- # handlers that also use handler-specific request attributes
- new = self.redirect_request(req, fp, code, msg, headers, newurl)
- if new is None:
- return
-
- # loop detection
- # .redirect_dict has a key url if url was previously visited.
- if hasattr(req, 'redirect_dict'):
- visited = new.redirect_dict = req.redirect_dict
- if (visited.get(newurl, 0) >= self.max_repeats or
- len(visited) >= self.max_redirections):
- raise HTTPError(req.get_full_url(), code,
- self.inf_msg + msg, headers, fp)
- else:
- visited = new.redirect_dict = req.redirect_dict = {}
- visited[newurl] = visited.get(newurl, 0) + 1
-
- # Don't close the fp until we are sure that we won't use it
- # with HTTPError.
- fp.read()
- fp.close()
- return self.parent.open(new) #, timeout=req.timeout)
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
- inf_msg = "The HTTP server returned a redirect error that would " \
- "lead to an infinite loop.\n" \
- "The last 30x error message was:\n"
-
- https_request = http_request
- https_response = http_response
diff --git a/module/network/HTTPBase.py b/module/network/HTTPBase.py
deleted file mode 100644
index f5cd7afcc..000000000
--- a/module/network/HTTPBase.py
+++ /dev/null
@@ -1,396 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-"""
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- See the GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, see <http://www.gnu.org/licenses/>.
-
- @author: mkaay
-"""
-
-from urllib import urlencode
-
-from urllib2 import Request
-from urllib2 import OpenerDirector
-from urllib2 import HTTPHandler
-from urllib2 import HTTPSHandler
-from urllib2 import HTTPDefaultErrorHandler
-from urllib2 import HTTPErrorProcessor
-from urllib2 import ProxyHandler
-from urllib2 import URLError
-from urllib2 import _parse_proxy
-
-from httplib import HTTPConnection
-from httplib import HTTPResponse
-from httplib import responses as HTTPStatusCodes
-from httplib import ResponseNotReady
-from httplib import BadStatusLine
-from httplib import CannotSendRequest
-
-from CookieJar import CookieJar
-from CookieRedirectHandler import CookieRedirectHandler
-
-import socket
-import socks
-
-from MultipartPostHandler import MultipartPostHandler
-
-DEBUG = 0
-HANDLE_ERRORS = 1
-
-class PyLoadHTTPResponse(HTTPResponse):
- def __init__(self, sock, debuglevel=0, strict=0, method=None):
- if method: # the httplib in python 2.3 uses the method arg
- HTTPResponse.__init__(self, sock, debuglevel, method)
- else: # 2.2 doesn't
- HTTPResponse.__init__(self, sock, debuglevel)
- self.fileno = sock.fileno
- self._rbuf = ''
- self._rbufsize = 8096
- self._handler = None # inserted by the handler later
- self._host = None # (same)
- self._url = None # (same)
-
- _raw_read = HTTPResponse.read
-
- def close_connection(self):
- self.close()
- self._handler._remove_connection(self._host, close=1)
-
- def info(self):
- return self.msg
-
- def geturl(self):
- return self._url
-
- def read(self, amt=None):
- # the _rbuf test is only in this first if for speed. It's not
- # logically necessary
- if self._rbuf and not amt is None:
- L = len(self._rbuf)
- if amt > L:
- amt -= L
- else:
- s = self._rbuf[:amt]
- self._rbuf = self._rbuf[amt:]
- return s
-
- s = self._rbuf + self._raw_read(amt)
- self._rbuf = ''
- return s
-
- def readline(self, limit=-1):
- i = self._rbuf.find('\n')
- while i < 0 and not (0 < limit <= len(self._rbuf)):
- new = self._raw_read(self._rbufsize)
- if not new: break
- i = new.find('\n')
- if i >= 0: i = i + len(self._rbuf)
- self._rbuf = self._rbuf + new
- if i < 0: i = len(self._rbuf)
- else: i += 1
- if 0 <= limit < len(self._rbuf): i = limit
- data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
- return data
-
- def readlines(self, sizehint = 0):
- total = 0
- list = []
- while 1:
- line = self.readline()
- if not line: break
- list.append(line)
- total += len(line)
- if sizehint and total >= sizehint:
- break
- return list
-
- @property
- def code(self):
- return self.status
-
- def getcode(self):
- return self.status
-
-class PyLoadHTTPConnection(HTTPConnection):
- sourceAddress = ('', 0)
- socksProxy = None
- response_class = PyLoadHTTPResponse
-
- def connect(self):
- if self.socksProxy:
- self.sock = socks.socksocket()
- t = _parse_proxy(self.socksProxy[1])
- self.sock.setproxy(self.socksProxy[0], addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2])
- else:
- self.sock = socket.socket()
- self.sock.settimeout(30)
- self.sock.bind(self.sourceAddress)
- self.sock.connect((self.host, self.port))
-
- try:
- if self._tunnel_host:
- self._tunnel()
- except: #python2.5
- pass
-
-class PyLoadHTTPHandler(HTTPHandler):
- sourceAddress = ('', 0)
- socksProxy = None
-
- def __init__(self):
- self._connections = {}
-
- def setInterface(self, interface):
- if interface is None:
- interface = ""
- self.sourceAddress = (interface, 0)
-
- def setSocksProxy(self, *t):
- self.socksProxy = t
-
- def close_connection(self, host):
- """close connection to <host>
- host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
- no error occurs if there is no connection to that host."""
- self._remove_connection(host, close=1)
-
- def open_connections(self):
- """return a list of connected hosts"""
- return self._connections.keys()
-
- def close_all(self):
- """close all open connections"""
- for host, conn in self._connections.items():
- conn.close()
- self._connections = {}
-
- def _remove_connection(self, host, close=0):
- if self._connections.has_key(host):
- if close: self._connections[host].close()
- del self._connections[host]
-
- def _start_connection(self, h, req):
- data = ""
-
- if req.has_data():
- data = req.get_data()
- h.putrequest('POST', req.get_selector(), skip_accept_encoding=1)
- if not req.headers.has_key('Content-type'):
- h.putheader('Content-type',
- 'application/x-www-form-urlencoded')
- if not req.headers.has_key('Content-length'):
- h.putheader('Content-length', '%d' % len(data))
- else:
- h.putrequest('GET', req.get_selector(), skip_accept_encoding=1)
-
- for args in self.parent.addheaders:
- h.putheader(*args)
- for k, v in req.headers.items():
- h.putheader(k, v)
- h.endheaders()
- if req.has_data():
- h.send(data)
-
- def do_open(self, http_class, req):
- host = req.get_host()
- if not host:
- raise URLError('no host given')
-
- need_new_connection = 1
- h = self._connections.get(host)
- if not h is None:
- try:
- self._start_connection(h, req)
- except socket.error:
- r = None
- except BadStatusLine:
- r = None
- except CannotSendRequest:
- r = None
- else:
- try: r = h.getresponse()
- except ResponseNotReady: r = None
- except BadStatusLine: r = None
-
- if r is None or r.version == 9:
- # httplib falls back to assuming HTTP 0.9 if it gets a
- # bad header back. This is most likely to happen if
- # the socket has been closed by the server since we
- # last used the connection.
- if DEBUG: print "failed to re-use connection to %s" % host
- h.close()
- else:
- if DEBUG: print "re-using connection to %s" % host
- need_new_connection = 0
- if need_new_connection:
- if DEBUG: print "creating new connection to %s" % host
- h = http_class(host)
- h.sourceAddress = self.sourceAddress
- h.socksProxy = self.socksProxy
- self._connections[host] = h
- self._start_connection(h, req)
- r = h.getresponse()
-
-
- # if not a persistent connection, don't try to reuse it
- if r.will_close: self._remove_connection(host)
-
- if DEBUG:
- print "STATUS: %s, %s" % (r.status, r.reason)
- r._handler = self
- r._host = host
- r._url = req.get_full_url()
-
- if r.status in (200, 206) or not HANDLE_ERRORS:
- return r
- else:
- return self.parent.error('http', req, r, r.status, r.reason, r.msg)
-
- def http_open(self, req):
- return self.do_open(PyLoadHTTPConnection, req)
-
-class HTTPBase():
- def __init__(self, interface=None, proxies={}):
- self.followRedirect = True
- self.interface = interface
- self.proxies = proxies
-
- self.size = None
-
- self.referer = None
-
- self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10"
-
- self.handler = PyLoadHTTPHandler()
- self.handler.setInterface(interface)
- if proxies.has_key("socks5"):
- self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS5, proxies["socks5"])
- elif proxies.has_key("socks4"):
- self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS4, proxies["socks4"])
-
- self.cookieJar = CookieJar()
-
- self.opener = None
-
- self.debug = DEBUG
-
- def getOpener(self, cookies=True):
- if not self.opener:
- self.opener = self.createOpener(cookies)
-
- return self.opener
-
- def createOpener(self, cookies=True):
- opener = OpenerDirector()
- opener.add_handler(self.handler)
- opener.add_handler(MultipartPostHandler())
- opener.add_handler(HTTPSHandler())
- opener.add_handler(HTTPDefaultErrorHandler())
- opener.add_handler(HTTPErrorProcessor())
- opener.add_handler(CookieRedirectHandler(self.cookieJar, self.followRedirect))
- if self.proxies.has_key("http") or self.proxies.has_key("https"):
- opener.add_handler(ProxyHandler(self.proxies))
- opener.version = self.userAgent
- opener.addheaders[0] = ("User-Agent", self.userAgent)
- opener.addheaders.append(("Accept", "*/*"))
- opener.addheaders.append(("Accept-Language", "en-US,en"))
- opener.addheaders.append(("Accept-Encoding", "gzip, deflate"))
- opener.addheaders.append(("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7"))
- return opener
-
- def createRequest(self, url, get={}, post={}, referer=None, customHeaders={}):
- if get:
- if isinstance(get, dict):
- get = urlencode(get)
- url = "%s?%s" % (url, get)
-
- req = Request(url)
-
- if post:
- if isinstance(post, dict):
- post = urlencode(post)
- req.add_data(post)
-
- if referer:
- req.add_header("Referer", referer)
-
- for key, val in customHeaders.iteritems():
- req.add_header(key, val)
-
- return req
-
- def getResponse(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}):
- req = self.createRequest(url, get, post, referer, customHeaders)
- opener = self.getOpener(cookies)
-
- if self.debug:
- print "[HTTP] ----"
- print "[HTTP] creating request"
- print "[HTTP] URL:", url
- print "[HTTP] GET"
- if get:
- for key, value in get.iteritems():
- print "[HTTP] \t", key, ":", value
- if post:
- print "[HTTP] POST"
- for key, value in post.iteritems():
- print "[HTTP] \t", key, ":", value
- print "[HTTP] headers"
- for key, value in opener.addheaders:
- print "[HTTP] \t", key, ":", value
- for key, value in req.headers.iteritems():
- print "[HTTP] \t", key, ":", value
- print "[HTTP] cookies"
- if self.cookieJar:
- from pprint import pprint
- pprint(self.cookieJar._cookies)
- print "[HTTP] ----"
-
- resp = opener.open(req)
- resp.getcode = lambda: resp.code
-
- if self.debug:
- print "[HTTP] ----"
- print "[HTTP] got response"
- print "[HTTP] status:", resp.getcode()
- print "[HTTP] headers"
- for key, value in resp.info().dict.iteritems():
- print "[HTTP] \t", key, ":", value
- print "[HTTP] cookies"
- if self.cookieJar:
- from pprint import pprint
- pprint(self.cookieJar._cookies)
- print "[HTTP] ----"
- try:
- self.size = int(resp.info()["Content-Length"])
- except: #chunked transfer
- pass
- return resp
-
- def closeAll(self):
- """ closes all connections """
- if hasattr(self, "handler"):
- self.handler.close_all()
-
- def clean(self):
- """ cleanup """
- self.closeAll()
- if hasattr(self, "opener"):
- del self.opener
- if hasattr(self, "handler"):
- del self.handler
-
-if __name__ == "__main__":
- base = HTTPBase()
- resp = base.getResponse("http://python.org/")
- print resp.read()
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index 02134ca63..0c184db94 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -14,185 +14,184 @@
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
- @author: mkaay
+ @author: RaNaN
"""
-
-from HTTPBase import HTTPBase
-from urllib2 import HTTPError
-from helper import *
+from os import remove
+from os.path import exists
from time import sleep
-from traceback import print_exc
-from module.plugins.Plugin import Abort
-from module.plugins.Plugin import Fail
-
-class HTTPChunk(HTTPBase):
- def __init__(self, url, fh, get={}, post={}, referer=None, cookies=True, customHeaders={}, range=None, bucket=None, interface=None, proxies={}):
- HTTPBase.__init__(self, interface=interface, proxies=proxies)
-
- self.url = url
- self.bucket = bucket
- self.range = range
- self.noRangeHeader = False
- self.fh = fh
-
- self.get = get
- self.post = post
- self.referer = referer
- self.cookies = cookies
- self.customHeaders = customHeaders
-
- self.deferred = Deferred()
-
- self.abort = False
- self.finished = False
-
- self.arrived = 0
-
- self.startTime = None
- self.endTime = None
-
- self.speed = 0 #byte/sec
- self.speedCalcTime = None
- self.speedCalcLen = 0
-
- self.bufferSize = 18*1024 #tune if performance is poor
- self.resp = None
-
- def getSpeed(self):
- return self.speed
-
- @threaded
- def _download(self, resp):
- self.arrived = 0
- self.lastSpeed = self.startTime = inttime()
-
- if self.noRangeHeader and not self.range[0] == 0:
- self.deferred.error("range starts not at 0")
-
- running = True
- while running:
- if self.abort:
- break
- count = self.bufferSize
- if self.noRangeHeader:
- count = min(count, self.range[1] - self.arrived)
- if self.bucket:
- count = self.bucket.add(count)
- if not count:
- sleep(0.01)
- continue
-
- try:
- data = resp.read(count)
- except:
- self.deferred.error(Fail, "timeout")
- break
-
- if self.speedCalcTime < inttime():
- self.speed = self.speedCalcLen
- self.speedCalcTime = inttime()
- self.speedCalcLen = 0
- try:
- self.deferred.progress("percent", 100-int((self.size - self.arrived)/float(self.size)*100))
- except:
- pass
- size = len(data)
-
- self.arrived += size
- self.speedCalcLen += size
-
- if self.noRangeHeader and self.arrived == self.range[1]:
- running = False
-
- if size:
- self.fh.write(data)
- else:
- break
-
- self.speed = 0
- self.endTime = inttime()
- self.finished = True
- self.fh.close()
-
- if self.abort:
- self.deferred.error(Abort)
- elif self.size == self.arrived:
- self.deferred.callback()
- else:
- print self.arrived, self.size
- self.deferred.error(Fail, "wrong content-length")
-
- def getEncoding(self):
- try:
- if self.resp.info()["Content-Encoding"] in ("gzip", "deflate"):
- return self.resp.info()["Content-Encoding"]
- except:
- pass
- return "plain"
-
- def download(self):
- if self.range:
- self.customHeaders["Range"] = "bytes=%i-%i" % self.range
- try:
- resp = self.getResponse(self.url, self.get, self.post, self.referer, self.cookies, self.customHeaders)
- self.resp = resp
- except HTTPError, e:
- print_exc()
- self.deferred.error(e)
- return self.deferred
-
- if resp.getcode() in (200, 206):
- self._download(resp)
+
+import pycurl
+
+from HTTPRequest import HTTPRequest
+
+class WrongFormat(Exception):
+ pass
+
+class ChunkInfo():
+ def __init__(self, name):
+ self.name = name
+ self.size = 0
+ self.resume = False
+ self.chunks = []
+
+ def setSize(self, size):
+ self.size = int(size)
+
+ def addChunk(self, name, range):
+ self.chunks.append((name, range))
+
+ def clear(self):
+ self.chunks = []
+
+ def createChunks(self, chunks):
+ self.clear()
+ chunk_size = self.size / chunks
+
+ current = 0
+ for i in range(chunks):
+ end = self.size-1 if (i == chunks-1) else current+chunk_size
+ self.addChunk("%s.chunk%s" % (self.name, i), (current, end))
+ current += chunk_size + 1
+
+
+ def save(self):
+ fh = open("%s.chunks" % self.name, "w")
+ fh.write("name:%s\n" % self.name)
+ fh.write("size:%s\n" % self.size)
+ for i, c in enumerate(self.chunks):
+ fh.write("#%d:\n" % i)
+ fh.write("\tname:%s\n" % c[0])
+ fh.write("\trange:%i-%i\n" % c[1])
+ fh.close()
+
+ @staticmethod
+ def load(name):
+ if not exists("%s.chunks" % name):
+ raise IOError()
+ fh = open("%s.chunks" % name, "r")
+ name = fh.readline()[:-1]
+ size = fh.readline()[:-1]
+ if name.startswith("name:") and size.startswith("size:"):
+ name = name[5:]
+ size = size[5:]
else:
- self.deferred.error(resp.getcode(), resp)
- return self.deferred
-
-if __name__ == "__main__":
- import sys
- from Bucket import Bucket
- bucket = Bucket()
- bucket.setRate(200*1000)
- #bucket = None
-
- url = "http://speedtest.netcologne.de/test_100mb.bin"
-
- finished = 0
- def err(*a, **b):
- print a, b
- def callb(*a, **b):
- global finished
- finished += 1
- print a, b
-
- print "starting"
-
- conns = 4
-
- chunks = []
- for a in range(conns):
- fh = open("file.part%d" % a, "wb")
- chunk = HTTPChunk(url, fh, bucket=bucket, range=(a*5*1024*1024, (a+1)*5*1024*1024))
- print "fireing chunk #%d" % a
- d = chunk.download()
- d.addCallback(callb)
- d.addErrback(err)
- chunks.append(chunk)
-
- try:
+ fh.close()
+ raise WrongFormat()
+ ci = ChunkInfo(name)
+ ci.loaded = True
+ ci.setSize(size)
while True:
- aspeed = 0
- for a, chunk in enumerate(chunks):
- if not chunk.finished:
- print "#%d" % a, chunk.getSpeed()/1024, "kb/s"
- else:
- print "#%d" % a, "finished"
- aspeed += chunk.getSpeed()
- print "sum", aspeed/1024
- if finished == conns:
- print "- finished"
+ if not fh.readline(): #skip line
break
- sleep(1)
- except KeyboardInterrupt:
- for chunk in chunks:
- chunk.abort = True
- sys.exit()
+ name = fh.readline()[1:-1]
+ range = fh.readline()[1:-1]
+ if name.startswith("name:") and range.startswith("range:"):
+ name = name[5:]
+ range = range[6:].split("-")
+ else:
+ raise WrongFormat()
+
+ ci.addChunk(name, (long(range[0]), long(range[1])))
+ fh.close()
+ return ci
+
+ def remove(self):
+ if exists("%s.chunks" % self.name): remove("%s.chunks" % self.name)
+
+ def getCount(self):
+ return len(self.chunks)
+
+ def getChunkName(self, index):
+ return self.chunks[index][0]
+
+ def getChunkRange(self, index):
+ return self.chunks[index][1]
+
+class HTTPChunk(HTTPRequest):
+ def __init__(self, id, parent, range=None, resume=False):
+ self.id = id
+ self.p = parent # HTTPDownload instance
+ self.range = range # tuple (start, end)
+ self.resume = resume
+
+ self.arrived = 0
+ self.lastURL = self.p.referer
+
+ self.c = pycurl.Curl()
+
+ self.header = ""
+ self.headerParsed = False #indicates if the header has been processed
+
+ self.fp = None #file handle
+
+ self.initHandle()
+ self.setInterface(self.p.interface, self.p.proxies)
+
+ @property
+ def cj(self):
+ return self.p.cj
+
+ def getHandle(self):
+ """ returns a Curl handle ready to use for perform/multiperform """
+
+ self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj)
+ self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ if self.resume:
+ self.fp = open(self.p.info.getChunkName(self.id), "ab")
+ self.arrived = self.fp.tell()
+
+ if self.range:
+ #print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1])
+ self.c.setopt(pycurl.RANGE, "%i-%i" % (self.arrived+self.range[0], self.range[1]))
+ else:
+ #print "Resume File from %i" % self.arrived
+ self.c.setopt(pycurl.RESUME_FROM, self.arrived)
+
+ else:
+ if self.range:
+ #print "Chunked with range %i-%i" % self.range
+ self.c.setopt(pycurl.RANGE, "%i-%i" % self.range)
+
+ self.fp = open(self.p.info.getChunkName(self.id), "wb")
+
+ return self.c
+
+ def writeHeader(self, buf):
+ self.header += buf
+ #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers
+ # as first chunk, we will parse the headers
+ if self.header.endswith("\r\n\r\n") and not self.range:
+ self.parseHeader()
+
+ def writeBody(self, buf):
+ size = len(buf)
+ self.arrived += size
+
+ self.fp.write(buf)
+
+ if self.p.bucket:
+ sleep(self.p.bucket.consumed(size))
+
+ if self.range and self.arrived > (self.range[1]-self.range[0]):
+ return 0 #close if we have enough data
+
+
+ def parseHeader(self):
+ """parse data from recieved header"""
+ for line in self.header.splitlines():
+ line = line.strip().lower()
+ if line.startswith("accept-ranges") and "bytes" in line:
+ self.p.chunkSupport = True
+
+ if not self.resume and line.startswith("content-length"):
+ self.p.size = int(line.split(":")[1])
+
+ self.headerParsed = True
+
+ def close(self):
+ """ closes everything, unusable after this """
+ if self.fp: self.fp.close()
+ self.c.close()
+ if hasattr(self, "p"): del self.p \ No newline at end of file
diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py
index bce698e1e..e3ac09e84 100644
--- a/module/network/HTTPDownload.py
+++ b/module/network/HTTPDownload.py
@@ -14,337 +14,208 @@
You should have received a copy of the GNU General Public License
along with this program; if not, see <http://www.gnu.org/licenses/>.
- @author: mkaay
+ @author: RaNaN
"""
-from HTTPChunk import HTTPChunk
-from helper import *
-from os.path import exists, getsize
from os import remove
-#from shutil import move, copyfileobj
-from zlib import decompressobj, MAX_WBITS
+from time import sleep, time
+from shutil import move
-from cookielib import CookieJar
+import pycurl
-class WrongFormat(Exception):
- pass
+from HTTPRequest import HTTPRequest
+from HTTPChunk import ChunkInfo, HTTPChunk
-class ChunkInfo():
- def __init__(self, name):
- self.name = name
- self.size = None
- self.loaded = False
- self.chunks = []
-
- def setSize(self, size):
- self.size = int(size)
-
- def addChunk(self, name, range, encoding):
- self.chunks.append((name, range, encoding))
-
- def clear(self):
- self.chunks = []
- self.loaded = False
-
- def save(self):
- fh = open("%s.chunks" % self.name, "w")
- fh.write("name:%s\n" % self.name)
- fh.write("size:%s\n" % self.size)
- for i, c in enumerate(self.chunks):
- fh.write("#%d:\n" % i)
- fh.write("\tname:%s\n" % c[0])
- fh.write("\tencoding:%s\n" % c[2])
- fh.write("\trange:%i-%i\n" % c[1])
-
- @staticmethod
- def load(name):
- if not exists("%s.chunks" % name):
- raise IOError()
- fh = open("%s.chunks" % name, "r")
- name = fh.readline()[:-1]
- size = fh.readline()[:-1]
- if name.startswith("name:") and size.startswith("size:"):
- name = name[5:]
- size = size[5:]
- else:
- raise WrongFormat()
- ci = ChunkInfo(name)
- ci.loaded = True
- ci.setSize(size)
- while True:
- if not fh.readline(): #skip line
- break
- name = fh.readline()[1:-1]
- encoding = fh.readline()[1:-1]
- range = fh.readline()[1:-1]
- if name.startswith("name:") and encoding.startswith("encoding:") and range.startswith("range:"):
- name = name[5:]
- encoding = encoding[9:]
- range = range[6:].split("-")
- else:
- raise WrongFormat()
-
- ci.addChunk(name, (long(range[0]), long(range[1])), encoding)
- return ci
-
- def removeInfo(self):
- remove("%s.chunks" % self.name)
-
- def getCount(self):
- return len(self.chunks)
-
- def getChunkName(self, index):
- return self.chunks[index][0]
-
- def getChunkRange(self, index):
- return self.chunks[index][1]
-
- def getChunkEncoding(self, index):
- return self.chunks[index][2]
-
-class WrappedHTTPDeferred(WrappedDeferred):
- pass
-
-class HTTPDownload():
- def __init__(self, url, filename, get={}, post={}, referer=None, cookies=True, customHeaders={}, bucket=None, interface=None, proxies={}):
+from module.plugins.Plugin import Abort
+
+class HTTPDownload(HTTPRequest):
+ def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None,
+ interface=None, proxies={}):
self.url = url
- self.filename = filename
- self.interface = interface
- self.proxies = proxies
-
+ self.filename = filename #complete file destination, not only name
self.get = get
self.post = post
-
self.referer = referer
- self.cookies = cookies
-
- self.customHeaders = customHeaders
-
+ self.cj = cj #cookiejar if cookies are needed
self.bucket = bucket
-
- self.deferred = Deferred()
-
- self.finished = False
- self._abort = False
- self.size = None
-
- self.cookieJar = CookieJar()
-
+ self.interface = interface
+ self.proxies = proxies
+ # all arguments
+
+ self.abort = False
+ self.size = 0
+
self.chunks = []
self.chunksDone = 0
+
try:
self.info = ChunkInfo.load(filename)
+ self.info.resume = True #resume is only possible with valid info file
+ self.size = self.info.size
except IOError:
self.info = ChunkInfo(filename)
- self.noChunkSupport = False
-
+
+ self.chunkSupport = None
+ self.m = pycurl.CurlMulti()
+
+ #needed for speed calculation
+ self.lastChecked = 0
+ self.lastArrived = []
+ self.speeds = []
+
+
+ @property
+ def speed(self):
+ return sum(self.speeds)
+
@property
def arrived(self):
- arrived = 0
- try:
- for i in range(self.info.getCount()):
- arrived += getsize(self.info.getChunkName(i)) #ugly, but difficult to calc otherwise due chunk resume
- except OSError:
- arrived = self.size
- return arrived
-
- def setAbort(self, val):
- self._abort = val
- for chunk in self.chunks:
- chunk.abort = val
-
- def getAbort(self):
- return self._abort
-
- abort = property(getAbort, setAbort)
-
- def getSpeed(self):
- speed = 0
- for chunk in self.chunks:
- speed += chunk.getSpeed()
- return speed
-
+ return sum([c.arrived for c in self.chunks])
+
@property
- def speed(self):
- return self.getSpeed()
-
- def calcProgress(self, p):
- self.deferred.progress("percent", 100-int((self.size - self.arrived)/float(self.size)*100))
-
- def _chunkDone(self):
- self.chunksDone += 1
- #print self.chunksDone, "/", len(self.chunks)
- if self.chunksDone == len(self.chunks):
- self._copyChunks()
-
+ def percent(self):
+ if not self.size: return 0
+ return (self.arrived * 100) / self.size
+
def _copyChunks(self):
- fo = open(self.filename, "wb") #out file
- for i in range(self.info.getCount()):
- encoding = self.info.getChunkEncoding(i)
-
- #decompress method, if any
- decompress = lambda data: data
- if encoding == "gzip":
- gz = decompressobj(16+MAX_WBITS)
- decompress = lambda data: gz.decompress(data)
- if encoding == "deflate":
- df = decompressobj(-MAX_WBITS)
- decompress = lambda data: df.decompress(data)
-
- #input file
- fname = "%s.chunk%d" % (self.filename, i)
- fi = open(fname, "rb")
- while True: #copy in chunks, consumes less memory
- data = fi.read(512*1024)
- if not data:
- break
- fo.write(decompress(data)) #decompressing
- fi.close()
- remove(fname) #remove
- fo.close()
- self.info.removeInfo() #remove info file
- self.deferred.callback() #done, emit callbacks
-
- def _createChunk(self, fh, range=None):
- chunk = HTTPChunk(self.url, fh, get=self.get, post=self.post,
- referer=self.referer, cookies=self.cookies,
- customHeaders=self.customHeaders,
- bucket=self.bucket, range=range,
- interface=self.interface, proxies=self.proxies)
- chunk.cookieJar = self.cookieJar
- return chunk
-
- def _addChunk(self, chunk, d):
- self.chunks.append(chunk)
- d.addProgress("percent", self.calcProgress)
- d.addCallback(self._chunkDone)
- d.addErrback(lambda *args, **kwargs: self.setAbort(True))
- d.addErrback(self.deferred.error)
-
+ init = self.info.getChunkName(0) #initial chunk name
+
+ if len(self.chunks) > 1:
+ fo = open(init, "rb+") #first chunkfile
+ for i in range(1, self.info.getCount()):
+ #input file
+ fo.seek(self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks
+ fname = "%s.chunk%d" % (self.filename, i)
+ fi = open(fname, "rb")
+ buf = 512 * 1024
+ while True: #copy in chunks, consumes less memory
+ data = fi.read(buf)
+ if not data:
+ break
+ fo.write(data)
+ fi.close()
+ remove(fname) #remove chunk
+ fo.close()
+
+ move(init, self.filename)
+ self.info.remove() #remove info file
+
def download(self, chunks=1, resume=False):
- self.chunksDone = 0
- if chunks > 0:
- #diffentent chunk count in info, resetting
- if self.info.loaded and not self.info.getCount() == chunks:
- self.info.clear()
-
- #if resuming, calculate range with offset
- crange = None
- if resume:
- if self.info.getCount() == chunks and exists("%s.chunk0" % (self.filename, )):
- crange = self.info.getChunkRange(0)
- crange = (crange[0]+getsize("%s.chunk0" % (self.filename, )), crange[1]-1)
-
- #if firstpart not done
- if crange is None or crange[1]-crange[0] > 0:
- fh = open("%s.chunk0" % (self.filename, ), "ab" if crange else "wb")
-
- chunk = self._createChunk(fh, range=crange)
-
- d = chunk.download() #start downloading
- self._addChunk(chunk, d)
-
- #no info file, need to calculate ranges
- if not self.info.loaded:
- size = chunk.size #overall size
- chunksize = size/chunks #chunk size
-
- chunk.range = (0, chunksize) #setting range for first part
- chunk.noRangeHeader = True
- chunk.size = chunksize #setting size for first chunk
-
- self.size = size #setting overall size
- self.info.setSize(self.size) #saving overall size
- self.info.addChunk("%s.chunk0" % (self.filename, ), chunk.range, chunk.getEncoding()) #add chunk to infofile
+ chunks = max(1, chunks)
+ resume = self.info.resume and resume
+ self.chunks = []
+
+ try:
+ self._download(chunks, resume)
+ finally:
+ self.clean()
+
+ def _download(self, chunks, resume):
+ if not resume:
+ self.info.addChunk("%s.chunk0" % self.filename, (0, 0))
+
+ init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed)
+
+ self.chunks.append(init)
+ self.m.add_handle(init.getHandle())
+
+ while 1:
+ #need to create chunks
+ if len(self.chunks) < chunks and self.chunkSupport and self.size: #will be set later by first chunk
+
+ if not resume:
+ self.info.setSize(self.size)
+ self.info.createChunks(chunks)
+ self.info.save()
+
+ chunks = self.info.getCount()
+
+ init.range = self.info.getChunkRange(0)
+
+ for i in range(1, chunks):
+ c = HTTPChunk(i, self, self.info.getChunkRange(i), resume)
+ self.chunks.append(c)
+ self.m.add_handle(c.getHandle())
+
+ while 1:
+ ret, num_handles = self.m.perform()
+
+ if ret != pycurl.E_CALL_MULTI_PERFORM:
+ break
+
+ while 1:
+ num_q, ok_list, err_list = self.m.info_read()
+ for c in ok_list:
+ self.chunksDone += 1
+ for c in err_list:
+ curl, errno, msg = c
+ #test if chunk was finished, otherwise raise the exception
+ if errno != 23 or "0 !=" not in msg:
+ raise pycurl.error(errno, msg)
+
+ #@TODO KeyBoardInterrupts are seen as finished chunks,
+ #but normally not handled to this process, only in the testcase
- lastchunk = size - chunksize*(chunks-1) #calculating size for last chunk
- self.firstchunk = chunk #remeber first chunk
-
- if self.info.loaded and not self.size:
- self.size = self.info.size #setting overall size
-
- for i in range(1, chunks): #other chunks
- cont = False
- if not self.info.loaded: #first time load
- if i+1 == chunks: #last chunk?
- rng = (i*chunksize, i*chunksize+lastchunk-1)
- else:
- rng = (i*chunksize, (i+1)*chunksize-1) #adjusting range
- else: #info available
- rng = self.info.getChunkRange(i) #loading previous range
- if resume and exists("%s.chunk%d" % (self.filename, i)): #continue chunk
- rng = (rng[0]+getsize("%s.chunk%d" % (self.filename, i)), rng[1]) #adjusting offset
- cont = True #set append mode
-
- if rng[1]-rng[0] <= 0: #chunk done
- continue
-
- fh = open("%s.chunk%d" % (self.filename, i), "ab" if cont else "wb")
- chunk = self._createChunk(fh, range=rng)
- d = chunk.download() #try
-
- if not chunk.resp.getcode() == 206 and i == 1: #no range supported, tell first chunk to download everything
- chunk.abort = True
- self.noChunkSupport = True
- self.firstchunk.size = self.size
- self.firstchunk.range = None
- self.info.clear() #clear info
- self.info.addChunk("%s.chunk0" % (self.filename, ), (0, self.firstchunk.size), chunk.getEncoding()) #re-adding info with correct ranges
+ self.chunksDone += 1
+ if not num_q:
break
-
- self._addChunk(chunk, d)
-
- if not self.info.loaded: #adding info
- self.info.addChunk("%s.chunk%d" % (self.filename, i), chunk.range, chunk.getEncoding())
-
- self.info.save() #saving info
- if not len(self.chunks):
- self._copyChunks()
- return WrappedHTTPDeferred(self, self.deferred)
- else:
- raise Exception("no chunks")
+
+ if self.chunksDone == len(self.chunks):
+ break #all chunks loaded
+
+ # calc speed once per second
+ t = time()
+ if self.lastChecked + 1 < t:
+ diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in
+ enumerate(self.chunks)]
+
+ #for i, c in enumerate(self.chunks):
+ # diff[i] = c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0)
+
+ self.speeds = [float(a) / (t - self.lastChecked) for a in diff]
+ self.lastArrived = [c.arrived for c in self.chunks]
+ self.lastChecked = t
+ #print "------------------------"
+ #print self.speed / 1024, "kb/s"
+ #print "Arrived:", self.arrived
+ #print "Size:", self.size
+ #print self.percent, "%"
+
+ if self.abort:
+ raise Abort()
+
+ sleep(0.001) #supress busy waiting - limits dl speed to (1 / x) * buffersize
+ self.m.select(1)
+
+ for chunk in self.chunks:
+ chunk.fp.close()
+ self.m.remove_handle(chunk.c)
+
+ self._copyChunks()
def clean(self):
""" cleanup """
- for c in self.chunks:
- c.clean()
+ for chunk in self.chunks:
+ chunk.close()
+ self.m.remove_handle(chunk.c)
+
+ self.m.close()
+ self.chunks = []
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "info"):
+ del self.info
if __name__ == "__main__":
- import sys
+ url = "http://speedtest.netcologne.de/test_10mb.bin"
+
from Bucket import Bucket
+
bucket = Bucket()
- bucket.setRate(200*1024)
- #bucket = None
-
- url = "http://speedtest.netcologne.de/test_100mb.bin"
-
- finished = False
- def err(*a, **b):
- print a, b
- def callb(*a, **b):
- global finished
- finished = True
- print a, b
-
+ bucket.setRate(200 * 1024)
+ bucket = None
+
print "starting"
-
- dwnld = HTTPDownload(url, "test_100mb.bin", bucket=bucket)
- d = dwnld.download(chunks=5, resume=True)
- d.addCallback(callb)
- d.addErrback(err)
-
- try:
- while True:
- for a, chunk in enumerate(dwnld.chunks):
- if not chunk.finished:
- print "#%d" % a, chunk.getSpeed()/1024, "kb/s", "size", int(float(chunk.arrived)/chunk.size*100), "%"
- else:
- print "#%d" % a, "finished"
- print "sum", dwnld.speed/1024, dwnld.arrived, "/", dwnld.size, int(float(dwnld.arrived)/dwnld.size*100), "%"
- if finished:
- print "- finished"
- break
- sleep(1)
- except KeyboardInterrupt:
- dwnld.abort = True
- sys.exit()
+
+ dwnld = HTTPDownload(url, "test_10mb.bin", bucket=bucket)
+ dwnld.download(chunks=3, resume=True) \ No newline at end of file
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
new file mode 100644
index 000000000..3a240b081
--- /dev/null
+++ b/module/network/HTTPRequest.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: RaNaN
+"""
+
+import pycurl
+
+from urllib import quote, urlencode
+from cStringIO import StringIO
+
+def myquote(url):
+ return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
+
+class HTTPRequest():
+ def __init__(self, cookies=None, interface=None, proxies=None):
+ self.c = pycurl.Curl()
+ self.rep = StringIO()
+
+ self.cj = cookies #cookiejar
+
+ self.lastURL = None
+ self.lastEffectiveURL = None
+ self.abort = False
+
+ self.header = ""
+
+ self.initHandle()
+ self.setInterface(interface, proxies)
+
+
+ def initHandle(self):
+ """ sets common options to curl handle """
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.MAXREDIRS, 5)
+ self.c.setopt(pycurl.CONNECTTIMEOUT, 30)
+ self.c.setopt(pycurl.NOSIGNAL, 1)
+ self.c.setopt(pycurl.NOPROGRESS, 1)
+ if hasattr(pycurl, "AUTOREFERER"):
+ self.c.setopt(pycurl.AUTOREFERER, 1)
+ self.c.setopt(pycurl.BUFFERSIZE, 32 * 1024)
+ self.c.setopt(pycurl.SSL_VERIFYPEER, 0)
+ self.c.setopt(pycurl.LOW_SPEED_TIME, 30)
+ self.c.setopt(pycurl.LOW_SPEED_LIMIT, 20)
+
+ #self.c.setopt(pycurl.VERBOSE, 1)
+
+ self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.2.10) Gecko/20100916 Firefox/3.6.10")
+ if pycurl.version_info()[7]:
+ self.c.setopt(pycurl.ENCODING, "gzip, deflate")
+ self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
+ "Accept-Language: en-US,en",
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
+ "Connection: keep-alive",
+ "Keep-Alive: 300"])
+
+ def setInterface(self, interface, proxies):
+ if interface and interface.lower() != "none":
+ self.c.setopt(pycurl.INTERFACE, interface)
+
+ #@TODO set proxies
+
+ def addCookies(self):
+ if self.cj:
+ self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST))
+
+ def getCookies(self):
+ if self.cj:
+ for c in self.cj.getCookies():
+ self.c.setopt(pycurl.COOKIELIST, c)
+ return
+
+ def setRequestContext(self, url, get, post, referer, cookies):
+ """ sets everything needed for the request """
+
+ url = myquote(str(url))
+
+ if get:
+ get = urlencode(get)
+ url = "%s?%s" % (url, get)
+
+ self.c.setopt(pycurl.URL, url)
+ self.c.lastUrl = url
+
+ if post:
+ post = urlencode(post)
+ self.c.setopt(pycurl.POSTFIELDS, post)
+
+ if referer and self.lastURL:
+ self.c.setopt(pycurl.REFERER, self.lastURL)
+
+ if cookies:
+ self.c.setopt(pycurl.COOKIEFILE, "")
+ self.c.setopt(pycurl.COOKIEJAR, "")
+ self.getCookies()
+
+
+ def load(self, url, get={}, post={}, referer=True, cookies=True):
+ """ load and returns a given page """
+
+ self.setRequestContext(url, get, post, referer, cookies)
+
+ self.header = ""
+ self.c.setopt(pycurl.WRITEFUNCTION, self.write)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+ #@TODO header_only, raw_cookies and some things in old backend, which are apperently not needed
+
+ self.c.perform()
+
+ self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
+ self.addCookies()
+
+ return self.getResponse()
+
+
+ def getResponse(self):
+ """ retrieve response from string io """
+ value = self.rep.getvalue()
+ self.rep.close()
+ self.rep = StringIO()
+ return value
+
+ def write(self, buf):
+ """ writes response """
+ if self.rep.tell() > 500000 or self.abort:
+ rep = self.getResponse()
+ if self.abort: raise Abort()
+ f = open("response.dump", "wb")
+ f.write(rep)
+ f.close()
+ raise Exception("Loaded Url exceeded limit")
+
+ self.rep.write(buf)
+
+ def writeHeader(self, buf):
+ """ writes header """
+ self.header += buf
+
+ def close(self):
+ """ cleanup, unusable after this """
+ self.c.close()
+ self.rep.close()
+ if hasattr(self, "cj"):
+ del self.cj
+
+
+if __name__ == "__main__":
+ url = "http://pyload.org"
+ c = HTTPRequest()
+ print c.load(url)
+ \ No newline at end of file
diff --git a/module/network/RequestFactory.py b/module/network/RequestFactory.py
index 89d1aaf64..6ad64589a 100644
--- a/module/network/RequestFactory.py
+++ b/module/network/RequestFactory.py
@@ -20,7 +20,7 @@
from threading import Lock
from Browser import Browser
-from HTTPBase import HTTPBase
+from HTTPRequest import HTTPRequest
from CookieJar import CookieJar
class RequestFactory():
@@ -29,8 +29,6 @@ class RequestFactory():
self.core = core
self.cookiejars = {}
- iface = property(lambda self: self.core.config["general"]["download_interface"])
-
def getRequest(self, pluginName, account=None):
self.lock.acquire()
@@ -48,10 +46,10 @@ class RequestFactory():
def getURL(self, url, get={}, post={}):
#a bit to much overhead for single url
- b = Browser()
- #@TODO proxies, iface
-
- return b.getPage(url, get, post)
+ h = HTTPRequest()
+ rep = h.load(url, get, post)
+ h.close()
+ return rep
def getCookieJar(self, pluginName, account=None):
if self.cookiejars.has_key((pluginName, account)):
diff --git a/module/network/helper.py b/module/network/helper.py
deleted file mode 100644
index 5ce21a8dd..000000000
--- a/module/network/helper.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from threading import Thread
-from time import time, sleep
-
-def inttime():
- return int(time())
-
-class AlreadyCalled(Exception):
- pass
-
-def callInThread(f, *args, **kwargs):
- class FThread(Thread):
- def __init__(self):
- Thread.__init__(self)
- self.setDaemon(True)
- self.d = Deferred()
- def run(self):
- ret = f(*args, **kwargs)
- self.d.callback(ret)
- t = FThread()
- t.start()
- return t.d
-
-class Deferred():
- def __init__(self):
- self.call = []
- self.err = []
- self.prgr = {}
- self.result = ()
- self.errresult = ()
-
- def addCallback(self, f, *cargs, **ckwargs):
- self.call.append((f, cargs, ckwargs))
- if self.result:
- args, kwargs = self.result
- args+=tuple(cargs)
- kwargs.update(ckwargs)
- callInThread(f, *args, **kwargs)
-
- def addProgress(self, chain, f):
- if self.prgr.has_key(chain):
- self.prgr[chain].append(f)
- else:
- self.prgr[chain] = [f]
-
- def addErrback(self, f, *cargs, **ckwargs):
- self.err.append((f, cargs, ckwargs))
- if self.errresult:
- args, kwargs = self.errresult
- args+=tuple(cargs)
- kwargs.update(ckwargs)
- callInThread(f, *args, **kwargs)
-
- def callback(self, *args, **kwargs):
- self.result = (args, kwargs)
- for f, cargs, ckwargs in self.call:
- args+=tuple(cargs)
- kwargs.update(ckwargs)
- callInThread(f, *args, **kwargs)
- self.call = []
- self.result = ()
-
- def error(self, *args, **kwargs):
- self.errresult = (args, kwargs)
- for f, cargs, ckwargs in self.err:
- args+=tuple(cargs)
- kwargs.update(ckwargs)
- callInThread(f, *args, **kwargs)
- self.err = []
- self.errresult = ()
-
- def progress(self, chain, *args, **kwargs):
- if not self.prgr.has_key(chain):
- return
- for f in self.prgr[chain]:
- f(*args, **kwargs)
-
-#decorator
-def threaded(f):
- def ret(*args, **kwargs):
- return callInThread(f, *args, **kwargs)
- return ret
-
-def waitFor(d):
- class Waiter():
- waiting = True
- args = ()
- err = None
-
- def __init__(self, d):
- self.d = d
-
- def wait(self):
- self.d.addCallback(self.callb)
- self.d.addErrback(self.errb)
- while self.waiting and not self.d.abort:
- sleep(0.5)
- if self.err:
- #try:
- if issubclass(self.err[0][0], Exception):
- raise self.err[0][0](*self.err[0][1:], **self.err[1])
- #except:
- # pass
- raise Exception(*self.err[0], **self.err[1])
- return self.args
-
- def callb(self, *args, **kwargs):
- self.waiting = False
- self.args = (args, kwargs)
-
- def errb(self, *args, **kwargs):
- self.waiting = False
- self.err = (args, kwargs)
- w = Waiter(d)
- return w.wait()
-
-class DeferredGroup(Deferred):
- def __init__(self, group=[]):
- Deferred.__init__(self)
- self.group = group
- self.done = 0
-
- for d in self.group:
- d.addCallback(self._cb)
- d.addErrback(self.error)
-
- def addDeferred(self, d):
- d.addCallback(self._cb)
- d.addErrback(self.error)
- self.group.append(d)
-
- def _cb(self, *args, **kwargs):
- self.done += 1
- if len(self.group) == self.done:
- self.callback()
- self.group = []
- self.done = 0
-
- def error(self, *args, **kwargs):
- Deferred.error(self, *args, **kwargs)
- self.group = []
- self.done = 0
-
-class WrappedDeferred(object):
- def __init__(self, download, d):
- self.__dict__["download"] = download
- self.__dict__["d"] = d
-
- def __getattr__(self, attr):
- if attr in ("addCallback", "addErrback", "addProgress", "callback", "error", "progress"):
- return getattr(self.__dict__["d"], attr)
- return getattr(self.__dict__["download"], attr)
-
- def __setattr__(self, attr, val):
- if attr in ("addCallback", "addErrback", "addProgress", "callback", "error", "progress"):
- return setattr(self.__dict__["d"], attr, val)
- return setattr(self.__dict__["download"], attr, val)
diff --git a/module/network/socks.py b/module/network/socks.py
deleted file mode 100644
index 626ffe176..000000000
--- a/module/network/socks.py
+++ /dev/null
@@ -1,442 +0,0 @@
-"""SocksiPy - Python SOCKS module.
-Version 1.00
-
-Copyright 2006 Dan-Haim. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-1. Redistributions of source code must retain the above copyright notice, this
- list of conditions and the following disclaimer.
-2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-3. Neither the name of Dan Haim nor the names of his contributors may be used
- to endorse or promote products derived from this software without specific
- prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA
-OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-This module provides a standard socket-like interface for Python
-for tunneling connections through SOCKS proxies.
-
-"""
-
-"""
-
-Minor modifications made by Christopher Gilbert (http://motomastyle.com/)
-for use in PyLoris (http://pyloris.sourceforge.net/)
-
-Minor modifications made by Mario Vilas (http://breakingcode.wordpress.com/)
-mainly to merge bug fixes found in Sourceforge
-
-"""
-
-import socket
-
-if getattr(socket, 'socket', None) is None:
- raise ImportError('socket.socket missing, proxy support unusable')
-
-import struct
-import sys
-
-PROXY_TYPE_SOCKS4 = 1
-PROXY_TYPE_SOCKS5 = 2
-PROXY_TYPE_HTTP = 3
-
-_defaultproxy = None
-
-# Small hack for Python 2.x
-if sys.version_info[0] <= 2:
- def bytes(obj, enc=None):
- return obj
-
-class ProxyError(Exception):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-class GeneralProxyError(ProxyError):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-class Socks5AuthError(ProxyError):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-class Socks5Error(ProxyError):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-class Socks4Error(ProxyError):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-class HTTPError(ProxyError):
- def __init__(self, value):
- self.value = value
- def __str__(self):
- return repr(self.value)
-
-_generalerrors = ("success",
- "invalid data",
- "not connected",
- "not available",
- "bad proxy type",
- "bad input")
-
-_socks5errors = ("succeeded",
- "general SOCKS server failure",
- "connection not allowed by ruleset",
- "Network unreachable",
- "Host unreachable",
- "Connection refused",
- "TTL expired",
- "Command not supported",
- "Address type not supported",
- "Unknown error")
-
-_socks5autherrors = ("succeeded",
- "authentication is required",
- "all offered authentication methods were rejected",
- "unknown username or invalid password",
- "unknown error")
-
-_socks4errors = ("request granted",
- "request rejected or failed",
- ("request rejected because SOCKS server cannot connect to "
- "identd on the client"),
- ("request rejected because the client program and identd"
- " report different user-ids"),
- "unknown error")
-
-
-def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True,
- username=None, password=None):
- """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
- Sets a default proxy which all further socksocket objects will use,
- unless explicitly changed.
- """
- global _defaultproxy
- _defaultproxy = (proxytype, addr, port, rdns, username, password)
-
-
-class socksocket(socket.socket):
- """socksocket([family[, type[, proto]]]) -> socket object
-
- Open a SOCKS enabled socket. The parameters are the same as
- those of the standard socket init. In order for SOCKS to work,
- you must specify family=AF_INET, type=SOCK_STREAM and proto=0.
- """
-
- def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM,
- proto=0, _sock=None):
- socket.socket.__init__(self, family, type, proto, _sock)
- if _defaultproxy != None:
- self.__proxy = _defaultproxy
- else:
- self.__proxy = (None, None, None, None, None, None)
- self.__proxysockname = None
- self.__proxypeername = None
-
- def __decode(self, bytes):
- if getattr(bytes, 'decode', False):
- try:
- bytes = bytes.decode()
- except Exception:
- pass
- return bytes
-
- def __encode(self, bytes):
- if getattr(bytes, 'encode', False):
- try:
- bytes = bytes.encode()
- except Exception:
- pass
- return bytes
-
- def __recvall(self, count):
- """__recvall(count) -> data
- Receive EXACTLY the number of bytes requested from the socket.
- Blocks until the required number of bytes have been received.
- """
- data = bytes("")
- while len(data) < count:
- d = self.recv(count - len(data))
- if not d:
- raise GeneralProxyError(
- (0, "connection closed unexpectedly"))
- data = data + self.__decode(d)
- return data
-
- def sendall(self, bytes):
- socket.socket.sendall(self, self.__encode(bytes))
-
- def setproxy(self, proxytype=None, addr=None, port=None, rdns=True,
- username=None, password=None):
- """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]])
- Sets the proxy to be used.
- proxytype - The type of the proxy to be used. Three types
- are supported: PROXY_TYPE_SOCKS4 (including socks4a),
- PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP
- addr - The address of the server (IP or DNS).
- port - The port of the server. Defaults to 1080 for SOCKS
- servers and 8080 for HTTP proxy servers.
- rdns - Should DNS queries be preformed on the remote side
- (rather than the local side). The default is True.
- Note: This has no effect with SOCKS4 servers.
- username - Username to authenticate with to the server.
- The default is no authentication.
- password - Password to authenticate with to the server.
- Only relevant when username is also provided.
- """
- self.__proxy = (proxytype, addr, port, rdns, username, password)
-
- def __negotiatesocks5(self, destaddr, destport):
- """__negotiatesocks5(self,destaddr,destport)
- Negotiates a connection through a SOCKS5 server.
- """
- # First we'll send the authentication packages we support.
- if (self.__proxy[4] != None) and (self.__proxy[5] != None):
- # The username/password details were supplied to the
- # setproxy method so we support the USERNAME/PASSWORD
- # authentication (in addition to the standard none).
- self.sendall("\x05\x02\x00\x02")
- else:
- # No username/password were entered, therefore we
- # only support connections with no authentication.
- self.sendall("\x05\x01\x00")
- # We'll receive the server's response to determine which
- # method was selected
- chosenauth = self.__recvall(2)
- if chosenauth[0] != "\x05":
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- # Check the chosen authentication method
- if chosenauth[1] == "\x00":
- # No authentication is required
- pass
- elif chosenauth[1] == "\x02":
- # Okay, we need to perform a basic username/password
- # authentication.
- self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] +
- chr(len(self.__proxy[5])) + self.__proxy[5])
- authstat = self.__recvall(2)
- if authstat[0] != "\x01":
- # Bad response
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- if authstat[1] != "\x00":
- # Authentication failed
- self.close()
- raise Socks5AuthError((3, _socks5autherrors[3]))
- # Authentication succeeded
- else:
- # Reaching here is always bad
- self.close()
- if chosenauth[1] == "\xFF":
- raise Socks5AuthError((2, _socks5autherrors[2]))
- else:
- raise GeneralProxyError((1, _generalerrors[1]))
- # Now we can request the actual connection
- req = "\x05\x01\x00"
- # If the given destination address is an IP address, we'll
- # use the IPv4 address request even if remote resolving was specified.
- try:
- ipaddr = socket.inet_aton(destaddr)
- req = req + "\x01" + ipaddr
- except socket.error:
- # Well it's not an IP number, so it's probably a DNS name.
- if self.__proxy[3] == True:
- # Resolve remotely
- ipaddr = None
- req = req + "\x03" + chr(len(destaddr)) + destaddr
- else:
- # Resolve locally
- ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
- req = req + "\x01" + ipaddr
- req = req + self.__decode(struct.pack(">H", destport))
- self.sendall(req)
- # Get the response
- resp = self.__recvall(4)
- if resp[0] != "\x05":
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- elif resp[1] != "\x00":
- # Connection failed
- self.close()
- if ord(resp[1]) <= 8:
- raise Socks5Error((ord(resp[1]), _socks5errors[ord(resp[1])]))
- else:
- raise Socks5Error((9, _socks5errors[9]))
- # Get the bound address/port
- elif resp[3] == "\x01":
- boundaddr = self.__recvall(4)
- elif resp[3] == "\x03":
- resp = resp + self.recv(1)
- boundaddr = self.__recvall(ord(resp[4]))
- else:
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- d = bytes(self.__recvall(2), 'utf8')
- d = str(d) #python2.5 no unicode in struct
- boundport = struct.unpack(">H", d)[0]
- self.__proxysockname = boundaddr, boundport
- if ipaddr != None:
- self.__proxypeername = (socket.inet_ntoa(ipaddr), destport)
- else:
- self.__proxypeername = (destaddr, destport)
-
- def getproxysockname(self):
- """getsockname() -> address info
- Returns the bound IP address and port number at the proxy.
- """
- return self.__proxysockname
-
- def getproxypeername(self):
- """getproxypeername() -> address info
- Returns the IP and port number of the proxy.
- """
- return socket.socket.getpeername(self)
-
- def getpeername(self):
- """getpeername() -> address info
- Returns the IP address and port number of the destination
- machine (note: getproxypeername returns the proxy)
- """
- return self.__proxypeername
-
- def __negotiatesocks4(self, destaddr, destport):
- """__negotiatesocks4(self,destaddr,destport)
- Negotiates a connection through a SOCKS4 server.
- """
- # Check if the destination address provided is an IP address
- rmtrslv = False
- try:
- ipaddr = socket.inet_aton(destaddr)
- except socket.error:
- # It's a DNS name. Check where it should be resolved.
- if self.__proxy[3] == True:
- ipaddr = "\x00\x00\x00\x01"
- rmtrslv = True
- else:
- ipaddr = socket.inet_aton(socket.gethostbyname(destaddr))
- # Construct the request packet
- req = "\x04\x01" + self.__decode(struct.pack(">H", destport)) + ipaddr
- # The username parameter is considered userid for SOCKS4
- if self.__proxy[4] != None:
- req = req + self.__proxy[4]
- req = req + "\x00"
- # DNS name if remote resolving is required
- # NOTE: This is actually an extension to the SOCKS4 protocol
- # called SOCKS4A and may not be supported in all cases.
- if rmtrslv==True:
- req = req + destaddr + "\x00"
- self.sendall(req)
- # Get the response from the server
- resp = self.__recvall(8)
- if resp[0] != "\x00":
- # Bad data
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- if resp[1] != "\x5A":
- # Server returned an error
- self.close()
- if ord(resp[1]) in (91,92,93):
- self.close()
- raise Socks4Error((ord(resp[1]), _socks4errors[ord(resp[1])-90]))
- else:
- raise Socks4Error((94,_socks4errors[4]))
- # Get the bound address/port
- self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",bytes(resp[2:4],'utf8'))[0])
- if rmtrslv != None:
- self.__proxypeername = (socket.inet_ntoa(ipaddr),destport)
- else:
- self.__proxypeername = (destaddr, destport)
-
- def __negotiatehttp(self, destaddr, destport):
- """__negotiatehttp(self,destaddr,destport)
- Negotiates a connection through an HTTP server.
- """
- # If we need to resolve locally, we do this now
- if self.__proxy[3] == False:
- addr = socket.gethostbyname(destaddr)
- else:
- addr = destaddr
- self.sendall(("CONNECT %s:%s HTTP/1.1\r\n"
- "Host: %s\r\n\r\n") % (addr, destport, destaddr))
- # We read the response until we get the string "\r\n\r\n"
- resp = self.recv(1)
- while resp.find("\r\n\r\n") == -1:
- resp = resp + self.recv(1)
- # We just need the first line to check if the connection
- # was successful
- statusline = resp.splitlines()[0].split(" ", 2)
- if statusline[0] not in ("HTTP/1.0", "HTTP/1.1"):
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- try:
- statuscode = int(statusline[1])
- except ValueError:
- self.close()
- raise GeneralProxyError((1, _generalerrors[1]))
- if statuscode != 200:
- self.close()
- raise HTTPError((statuscode, statusline[2]))
- self.__proxysockname = ("0.0.0.0", 0)
- self.__proxypeername = (addr, destport)
-
- def connect(self, destpair):
- """connect(self,despair)
- Connects to the specified destination through a proxy.
- destpar - A tuple of the IP/DNS address and the port number.
- (identical to socket's connect).
- To select the proxy server use setproxy().
- """
- # Do a minimal input check first
- # TODO(durin42): seriously? type checking? do we care?
- if ((not isinstance(destpair, (list, tuple))) or len(destpair) < 2
- or not isinstance(destpair[0], str) or not isinstance(destpair[1], int)):
- raise GeneralProxyError((5, _generalerrors[5]))
- if self.__proxy[0] == PROXY_TYPE_SOCKS5:
- if self.__proxy[2] != None:
- portnum = self.__proxy[2]
- else:
- portnum = 1080
- socket.socket.connect(self,(self.__proxy[1], portnum))
- self.__negotiatesocks5(destpair[0], destpair[1])
- elif self.__proxy[0] == PROXY_TYPE_SOCKS4:
- if self.__proxy[2] != None:
- portnum = self.__proxy[2]
- else:
- portnum = 1080
- socket.socket.connect(self, (self.__proxy[1], portnum))
- self.__negotiatesocks4(destpair[0], destpair[1])
- elif self.__proxy[0] == PROXY_TYPE_HTTP:
- if self.__proxy[2] != None:
- portnum = self.__proxy[2]
- else:
- portnum = 8080
- socket.socket.connect(self, (self.__proxy[1], portnum))
- self.__negotiatehttp(destpair[0], destpair[1])
- elif self.__proxy[0] == None:
- socket.socket.connect(self, (destpair[0], destpair[1]))
- else:
- raise GeneralProxyError((4, _generalerrors[4]))
diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py
index ffac26864..4f26c72b4 100644
--- a/module/plugins/Plugin.py
+++ b/module/plugins/Plugin.py
@@ -105,7 +105,6 @@ class Plugin(object):
#self.req.canContinue = True
else:
self.req = pyfile.m.core.requestFactory.getRequest(self.__name__)
- self.req.progressNotify = pyfile.progress.setValue
self.log = pyfile.m.core.log
@@ -292,7 +291,7 @@ class Plugin(object):
""" returns the content loaded """
if self.pyfile.abort: raise Abort
- res = self.req.getPage(url, get=get, post=post, cookies=cookies)
+ res = self.req.getPage(url, get, post, ref, cookies)
if self.core.debug:
from inspect import currentframe
frame = currentframe()
@@ -335,17 +334,11 @@ class Plugin(object):
name = self.pyfile.name.encode(sys.getfilesystemencoding(), "replace")
filename = join(location, name)
- d = self.req.httpDownload(url, filename, get=get, post=post, chunks=self.getChunkCount(), resume=self.resumeDownload)
- self.pyfile.download = d
- d.addProgress("percent", self.pyfile.progress.setValue)
- waitFor(d)
+ self.req.httpDownload(url, filename, get=get, post=post, chunks=self.getChunkCount(), resume=self.resumeDownload)
- if d.abort: raise Abort
-
- self.pyfile.download = None
newname = basename(filename)
- self.pyfile.size = d.size
+ self.pyfile.size = self.req.size
if newname and newname != name:
self.log.info("%(name)s saved as %(newname)s" % {"name": name, "newname": newname})
diff --git a/module/plugins/hoster/BasePlugin.py b/module/plugins/hoster/BasePlugin.py
index 292570d6c..08c53a616 100644
--- a/module/plugins/hoster/BasePlugin.py
+++ b/module/plugins/hoster/BasePlugin.py
@@ -13,6 +13,10 @@ class BasePlugin(Hoster):
__author_name__ = ("RaNaN")
__author_mail__ = ("RaNaN@pyload.org")
+ def setup(self):
+ self.chunkLimit = 3
+ self.resumeDownload = True
+
def process(self, pyfile):
"""main function"""