diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2010-12-27 21:18:29 +0100 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2010-12-27 21:18:29 +0100 |
commit | 9509a6444bbb538e136ed899d94aab32be629383 (patch) | |
tree | ac8532b20912a3e5be6ff73443520a7f31f5806a | |
parent | encoding fix (diff) | |
download | pyload-9509a6444bbb538e136ed899d94aab32be629383.tar.xz |
new curl download backend - support for chunked dl, resume
-rw-r--r-- | module/PluginThread.py | 24 | ||||
-rw-r--r-- | module/PyFile.py | 14 | ||||
-rw-r--r-- | module/ThreadManager.py | 23 | ||||
-rw-r--r-- | module/network/Browser.py | 196 | ||||
-rw-r--r-- | module/network/Bucket.py | 42 | ||||
-rw-r--r-- | module/network/CookieJar.py | 61 | ||||
-rw-r--r-- | module/network/CookieRedirectHandler.py | 143 | ||||
-rw-r--r-- | module/network/HTTPBase.py | 396 | ||||
-rw-r--r-- | module/network/HTTPChunk.py | 353 | ||||
-rw-r--r-- | module/network/HTTPDownload.py | 475 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 164 | ||||
-rw-r--r-- | module/network/RequestFactory.py | 12 | ||||
-rw-r--r-- | module/network/helper.py | 156 | ||||
-rw-r--r-- | module/network/socks.py | 442 | ||||
-rw-r--r-- | module/plugins/Plugin.py | 13 | ||||
-rw-r--r-- | module/plugins/hoster/BasePlugin.py | 4 |
16 files changed, 668 insertions, 1850 deletions
diff --git a/module/PluginThread.py b/module/PluginThread.py index 3f24db345..f72e94eaf 100644 --- a/module/PluginThread.py +++ b/module/PluginThread.py @@ -29,8 +29,7 @@ from sys import exc_info, exc_clear from types import MethodType from os.path import join, exists -from urllib2 import URLError -from socket import error +from pycurl import error from module.plugins.Plugin import Abort from module.plugins.Plugin import Fail @@ -215,20 +214,14 @@ class DownloadThread(PluginThread): continue except error, e: - #@TODO determine correct error codes - if len(e.args) > 1: - code = e.args[0] - msg = e.args[1:] - else: - code = -1 + try: + code, msg = e.args + except: + code = 0 msg = e.args - if "timed out" in msg: - code = 990 - - self.m.log.debug("socket error %s: %s" % (code, msg)) - if code in (104, 990): - self.m.log.warning(_("Couldn't connect to host or connection resetted, waiting 1 minute and retry.")) + if code in (7, 18, 28, 52, 56): + self.m.log.warning(_("Couldn't connect to host or connection resetted waiting 1 minute and retry.")) wait = time() + 60 while time() < wait: sleep(1) @@ -241,14 +234,13 @@ class DownloadThread(PluginThread): self.clean(pyfile) else: - pyfile.plugin.req.canContinue = False self.queue.put(pyfile) continue else: pyfile.setStatus("failed") - self.m.log.error("socket error %s: %s" % (code, msg)) + self.m.log.error("pycurl error %s: %s" % (code, msg)) if self.m.core.debug: print_exc() self.writeDebugReport(pyfile) diff --git a/module/PyFile.py b/module/PyFile.py index c198ce459..648b7e838 100644 --- a/module/PyFile.py +++ b/module/PyFile.py @@ -70,7 +70,7 @@ class PyFile(): # database information ends here self.plugin = None - self.download = None + #self.download = None self.waitUntil = 0 # time() + time to wait @@ -212,7 +212,7 @@ class PyFile(): def getSpeed(self): """ calculates speed """ try: - return self.download.speed + return self.plugin.req.speed except: return 0 @@ -226,21 +226,23 @@ class PyFile(): def getBytesLeft(self): """ gets bytes left """ try: - return self.download.size - self.download.arrived + return self.plugin.req.size - self.plugin.req.arrived except: return 0 def getPercent(self): """ get % of download """ - return self.progress.getPercent() + try: + return self.plugin.req.percent + except: + return 0 def getSize(self): """ get size of download """ if self.size: return self.size else: try: - if not self.download.size: return 0 - return self.download.size + return self.plugin.req.size except: return 0 diff --git a/module/ThreadManager.py b/module/ThreadManager.py index 0a2ce674b..c5744cb92 100644 --- a/module/ThreadManager.py +++ b/module/ThreadManager.py @@ -26,6 +26,8 @@ from time import sleep from traceback import print_exc from random import choice +import pycurl + import PluginThread from module.network.Request import getURL @@ -50,7 +52,7 @@ class ThreadManager: self.reconnecting.clear() self.downloaded = 0 #number of files downloaded since last cleanup - #pycurl.global_init(pycurl.GLOBAL_DEFAULT) + pycurl.global_init(pycurl.GLOBAL_DEFAULT) for i in range(0, self.core.config.get("general", "max_downloads")): self.createThread() @@ -147,7 +149,6 @@ class ThreadManager: sleep(1) ip = self.getIP() self.core.hookManager.afterReconnecting(ip) - self.closeAllConnecions() self.log.info(_("Reconnected, new IP: %s") % ip) @@ -184,6 +185,16 @@ class ThreadManager: free[0].put("quit") + def cleanPycurl(self): + """ make a global curl cleanup (currently ununused """ + if self.downloadingIds() or self.processingIds(): + return False + pycurl.global_cleanup() + pycurl.global_init(pycurl.GLOBAL_DEFAULT) + self.downloaded = 0 + self.log.debug("Cleaned up pycurl") + return True + #---------------------------------------------------------------------- def assignJob(self): """assing a job to a thread if possible""" @@ -229,8 +240,6 @@ class ThreadManager: else: thread = PluginThread.DecrypterThread(self, job) - def closeAllConnecions(self): - """closes all connections, when a reconnect was made """ - for pyfile in self.core.files.cache.itervalues(): - if pyfile.plugin and pyfile.plugin.req: - pyfile.plugin.req.http.closeAll() + def cleanup(self): + """do global cleanup, should be called when finished with pycurl""" + pycurl.global_cleanup() diff --git a/module/network/Browser.py b/module/network/Browser.py index 2f0144f81..0bed8e395 100644 --- a/module/network/Browser.py +++ b/module/network/Browser.py @@ -1,161 +1,100 @@ -from random import randint -from helper import * +#!/usr/bin/env python +# -*- coding: utf-8 -*- + from os.path import join from logging import getLogger -import zlib -from CookieJar import CookieJar -from HTTPBase import HTTPBase +from HTTPRequest import HTTPRequest from HTTPDownload import HTTPDownload -from FTPBase import FTPDownload -from XDCCBase import XDCCDownload -from traceback import print_stack class Browser(object): - def __init__(self, interface=None, cookieJar=None, bucket=None, proxies={}): + def __init__(self, interface=None, cj=None, bucket=None, proxies={}): self.log = getLogger("log") - self.lastURL = None self.interface = interface + self.cj = cj self.bucket = bucket - - self.http = HTTPBase(interface=interface, proxies=proxies) - self.setCookieJar(cookieJar) self.proxies = proxies - self.abort = property(lambda: False, lambda val: self.abortDownloads() if val else None) - - self.downloadConnections = [] - lastEffectiveURL = property(lambda self: self.lastURL) #@backward comp, @TODO real last effective url + self._size = 0 + + self.http = HTTPRequest(cj, interface, proxies) + self.dl = None + + lastEffectiveURL = property(lambda self: self.http.lastEffectiveURL) - def setCookieJar(self, cookieJar): - self.cookieJar = cookieJar - self.http.cookieJar = self.cookieJar + def setCookieJar(self, cj): + self.cj = cj + self.http.cj = cj + + @property + def speed(self): + if self.dl: + return self.dl.speed + return 0 + + @property + def size(self): + if self._size: + return self._size + if self.dl: + return self.dl.size + return 0 + + @property + def arrived(self): + if self.dl: + return self.dl.arrived + return 0 + + @property + def percent(self): + if not self.size: return 0 + return (self.arrived * 100) / self.size def clearCookies(self): - self.cookieJar.clear() + if self.cj: + self.cj.clear() def clearReferer(self): self.lastURL = None - def getPage(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}): - if not referer: - referer = self.lastURL - self.http.followRedirect = True - resp = self.http.getResponse(url, get=get, post=post, referer=referer, cookies=cookies, - customHeaders=customHeaders) - data = resp.read() - try: - if resp.info()["Content-Encoding"] == "gzip": - data = zlib.decompress(data, 16 + zlib.MAX_WBITS) - elif resp.info()["Content-Encoding"] == "deflate": - data = zlib.decompress(data, -zlib.MAX_WBITS) - except: - pass - - try: - content_type = resp.info()["Content-Type"] - infos = [info.strip() for info in content_type.split(";")] - charset = None - for info in infos: - if info.startswith("charset"): - none, charset = info.split("=") - if charset: - data = data.decode(charset) - except Exception, e: - self.log.debug("Could not decode charset: %s" % e) - - self.lastURL = resp.geturl() - return data - - def getRedirectLocation(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}): - if not referer: - referer = self.lastURL - self.http.followRedirect = False - resp = self.http.getResponse(url, get=get, post=post, referer=referer, cookies=cookies, - customHeaders=customHeaders) - resp.close() - self.lastURL = resp.geturl() - location = None - try: - location = resp.info()["Location"] - except: - pass - return location - - def _removeConnection(self, *args, **kwargs): - i = self.downloadConnections.index(args[-1]) - self.downloadConnections[i].download.clean() - del self.downloadConnections[i] - def abortDownloads(self): - for d in self.downloadConnections: - d.download.setAbort(True) - d.abort = True + self.http.abort = True + if self.dl: + self.dl.abort = True + + def httpDownload(self, url, filename, get={}, post={}, ref=True, cookies=True, chunks=1, resume=False): + self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, + self.cj if cookies else None, self.bucket, self.interface, + self.proxies) + self.dl.download(chunks, resume) + self._size = self.dl.size + + self.dl.clean() + self.dl = None - @property - def speed(self): - speed = 0 - for d in self.downloadConnections: - speed += d.speed - return speed - - def httpDownload(self, url, filename, get={}, post={}, referer=None, cookies=True, customHeaders={}, chunks=1, - resume=False): - if not referer: - referer = self.lastURL - - dwnld = HTTPDownload(url, filename, get=get, post=post, referer=referer, cookies=cookies, - customHeaders=customHeaders, bucket=self.bucket, interface=self.interface, - proxies=self.proxies) - dwnld.cookieJar = self.cookieJar - - d = dwnld.download(chunks=chunks, resume=resume) - self.downloadConnections.append(d) - d.addCallback(self._removeConnection, d) - d.addErrback(self._removeConnection, d) - return d - - def ftpDownload(self, url, filename, resume=False): - dwnld = FTPDownload(url, filename, bucket=self.bucket, interface=self.interface, proxies=self.proxies) - - d = dwnld.download(resume=resume) - self.downloadConnections.append(d) - d.addCallback(self._removeConnection, d) - return d - - def xdccDownload(self, server, port, channel, bot, pack, filename, nick="pyload_%d" % randint(1000, 9999), - ident="pyload", real="pyloadreal"): - dwnld = XDCCDownload(server, port, channel, bot, pack, nick, ident, real, filename, bucket=self.bucket, - interface=self.interface, proxies=self.proxies) - - d = dwnld.download() - self.downloadConnections.append(d) - d.addCallback(self._removeConnection, d) - return d - - def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, no_post_encode=False, raw_cookies={}): - self.log.warning("Browser: deprecated call 'load'") - print_stack() - return self.getPage(url, get=get, post=post, cookies=cookies) def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False): - #@TODO self.log.warning("Browser: deprecated call 'download'") - print_stack() - filename = join(folder, file_name) - d = self.httpDownload(url, filename, get, post) - waitFor(d) + return self.httpDownload(url, join(folder, file_name), get, post, ref, cookies) + - return filename + def getPage(self, url, get={}, post={}, ref=True, cookies=True): + """ retrieves page """ + return self.http.load(url, get, post, ref, cookies) def clean(self): """ cleanup """ if hasattr(self, "http"): - self.http.clean() + self.http.close() del self.http + if hasattr(self, "dl"): + del self.dl + if hasattr(self, "cj"): + del self.cj if __name__ == "__main__": browser = Browser()#proxies={"socks5": "localhost:5000"}) @@ -167,8 +106,5 @@ if __name__ == "__main__": #browser.getPage("https://encrypted.google.com/") #browser.getPage("http://google.com/search?q=bar") - browser.httpDownload("http://speedtest.netcologne.de/test_100mb.bin", "test_100mb.bin") - from time import sleep + browser.httpDownload("http://speedtest.netcologne.de/test_10mb.bin", "test_10mb.bin") - while True: - sleep(1) diff --git a/module/network/Bucket.py b/module/network/Bucket.py index 434cbe662..dc1280ede 100644 --- a/module/network/Bucket.py +++ b/module/network/Bucket.py @@ -14,40 +14,44 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - @author: mkaay + @author: RaNaN """ -from time import time, sleep +from time import time from threading import Lock class Bucket: def __init__(self): - self.content = 0 self.rate = 0 - self.lastDrip = time() + self.tokens = 0 + self.timestamp = time() self.lock = Lock() def setRate(self, rate): self.lock.acquire() self.rate = rate self.lock.release() - - def add(self, amount): + + def consumed(self, amount): + """ return time the process have to sleep, after consumed specified amount """ self.lock.acquire() - self.drip() - allowable = min(amount, self.rate - self.content) - if allowable > 0: - sleep(0.005) - self.content += allowable + self.calc_tokens() + self.tokens -= amount + + if self.tokens < 0: + time = -self.tokens/float(self.rate) + else: + time = 0 + + self.lock.release() - return allowable + return time - def drip(self): - if self.rate: + def calc_tokens(self): + if self.tokens < self.rate: now = time() - deltaT = now - self.lastDrip - self.content = long(max(0, self.content - deltaT * self.rate)) - self.lastDrip = now - else: - self.content = 0 + delta = self.rate * (now - self.timestamp) + self.tokens = min(self.rate, self.tokens + delta) + self.timestamp = now + diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py index b2cbba504..c9ae6cb6c 100644 --- a/module/network/CookieJar.py +++ b/module/network/CookieJar.py @@ -17,51 +17,34 @@ @author: mkaay, RaNaN """ -from cookielib import CookieJar as PyCookieJar -from cookielib import Cookie from time import time -class CookieJar(PyCookieJar): - def __init__(self, pluginName=None, account=None): - PyCookieJar.__init__(self) - self.plugin = pluginName +class CookieJar(): + def __init__(self, pluginname, account=None): + self.cookies = {} + self.plugin = pluginname self.account = account - def getCookie(self, name): - print "getCookie not implemented!" - return None - - def setCookie(self, domain, name, value, path="/"): - c = Cookie(version=0, name=name, value=value, port=None, port_specified=False, - domain=domain, domain_specified=False, - domain_initial_dot=(domain.startswith(".")), path=path, path_specified=True, - secure=False, expires=None, discard=True, comment=None, - comment_url=None, rest={'HttpOnly': None}, rfc2109=False) - self.set_cookie(c) - - def add_cookie_header(self, request): - self._cookies_lock.acquire() - try: + def addCookies(self, clist): + for c in clist: + name = c.split("\t")[5] + self.cookies[name] = c - self._policy._now = self._now = int(time()) + def getCookies(self): + return self.cookies.values() - cookies = self._cookies_for_request(request) + def parseCookie(self, name): + if self.cookies.has_key(name): + return self.cookies[name].split("\t")[6] + else: + return None - attrs = self._cookie_attrs(cookies) - if attrs: - if not request.has_header("Cookie"): - request.add_header( - "Cookie", "; ".join(attrs)) - - # if necessary, advertise that we know RFC 2965 - if (self._policy.rfc2965 and not self._policy.hide_cookie2 and - not request.has_header("Cookie2")): - for cookie in cookies: - if cookie.version != 1: - request.add_header("Cookie2", '$Version="1"') - break + def getCookie(self, name): + return self.parseCookie(name) - finally: - self._cookies_lock.release() + def setCookie(self, domain, name, value, path="/", exp=time()+3600*24*180): + s = ".%s TRUE %s FALSE %s %s %s" % (domain, path, exp, name, value) + self.cookies[name] = s - self.clear_expired_cookies() + def clear(self): + self.cookies = {}
\ No newline at end of file diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py deleted file mode 100644 index b7c4c953e..000000000 --- a/module/network/CookieRedirectHandler.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- - -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay, RaNaN -""" - -from urllib2 import BaseHandler, HTTPError -from urllib import addinfourl -from urllib2 import Request -from urlparse import urlparse, urlunparse, urljoin -from CookieJar import CookieJar - -class CookieRedirectHandler(BaseHandler): - # maximum number of redirections to any single URL - # this is needed because of the state that cookies introduce - max_repeats = 4 - # maximum total number of redirections (regardless of URL) before - # assuming we're in a loop - max_redirections = 10 - - def __init__(self, cookiejar=None, follow=True): - if cookiejar is None: - cookiejar = CookieJar() - self.cookiejar = cookiejar - self.follow = follow - - def http_request(self, request): - self.cookiejar.add_cookie_header(request) - return request - - def http_response(self, request, response): - self.cookiejar.extract_cookies(response, request) - return response - - def redirect_request(self, req, fp, code, msg, headers, newurl): - """Return a Request or None in response to a redirect. - - This is called by the http_error_30x methods when a - redirection response is received. If a redirection should - take place, return a new Request to allow http_error_30x to - perform the redirect. Otherwise, raise HTTPError if no-one - else should try to handle this url. Return None if you can't - but another Handler might. - """ - m = req.get_method() - if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") - or code in (301, 302, 303) and m == "POST"): - # Strictly (according to RFC 2616), 301 or 302 in response - # to a POST MUST NOT cause a redirection without confirmation - # from the user (of urllib2, in this case). In practice, - # essentially all clients do redirect in this case, so we - # do the same. - # be conciliant with URIs containing a space - newurl = newurl.replace(' ', '%20') - newheaders = dict((k,v) for k,v in req.headers.items() - if k.lower() not in ("content-length", "content-type", "cookie") - ) - req = Request(newurl, - headers=newheaders, - origin_req_host=req.get_origin_req_host(), - unverifiable=True) - self.cookiejar.add_cookie_header(req) - return req - else: - raise HTTPError(req.get_full_url(), code, msg, headers, fp) - - # Implementation note: To avoid the server sending us into an - # infinite loop, the request object needs to track what URLs we - # have already seen. Do this by adding a handler-specific - # attribute to the Request object. - def http_error_302(self, req, fp, code, msg, headers): - resp = addinfourl(fp, headers, req.get_full_url()) - resp.code = code - resp.msg = msg - self.cookiejar.extract_cookies(resp, req) - - if not self.follow: - return resp - - # Some servers (incorrectly) return multiple Location headers - # (so probably same goes for URI). Use first header. - if 'location' in headers: - newurl = headers.getheaders('location')[0] - elif 'uri' in headers: - newurl = headers.getheaders('uri')[0] - else: - return - - # fix a possible malformed URL - urlparts = urlparse(newurl) - if not urlparts.path: - urlparts = list(urlparts) - urlparts[2] = "/" - newurl = urlunparse(urlparts) - - newurl = urljoin(req.get_full_url(), newurl) - - # XXX Probably want to forget about the state of the current - # request, although that might interact poorly with other - # handlers that also use handler-specific request attributes - new = self.redirect_request(req, fp, code, msg, headers, newurl) - if new is None: - return - - # loop detection - # .redirect_dict has a key url if url was previously visited. - if hasattr(req, 'redirect_dict'): - visited = new.redirect_dict = req.redirect_dict - if (visited.get(newurl, 0) >= self.max_repeats or - len(visited) >= self.max_redirections): - raise HTTPError(req.get_full_url(), code, - self.inf_msg + msg, headers, fp) - else: - visited = new.redirect_dict = req.redirect_dict = {} - visited[newurl] = visited.get(newurl, 0) + 1 - - # Don't close the fp until we are sure that we won't use it - # with HTTPError. - fp.read() - fp.close() - return self.parent.open(new) #, timeout=req.timeout) - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - - inf_msg = "The HTTP server returned a redirect error that would " \ - "lead to an infinite loop.\n" \ - "The last 30x error message was:\n" - - https_request = http_request - https_response = http_response diff --git a/module/network/HTTPBase.py b/module/network/HTTPBase.py deleted file mode 100644 index f5cd7afcc..000000000 --- a/module/network/HTTPBase.py +++ /dev/null @@ -1,396 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: mkaay -""" - -from urllib import urlencode - -from urllib2 import Request -from urllib2 import OpenerDirector -from urllib2 import HTTPHandler -from urllib2 import HTTPSHandler -from urllib2 import HTTPDefaultErrorHandler -from urllib2 import HTTPErrorProcessor -from urllib2 import ProxyHandler -from urllib2 import URLError -from urllib2 import _parse_proxy - -from httplib import HTTPConnection -from httplib import HTTPResponse -from httplib import responses as HTTPStatusCodes -from httplib import ResponseNotReady -from httplib import BadStatusLine -from httplib import CannotSendRequest - -from CookieJar import CookieJar -from CookieRedirectHandler import CookieRedirectHandler - -import socket -import socks - -from MultipartPostHandler import MultipartPostHandler - -DEBUG = 0 -HANDLE_ERRORS = 1 - -class PyLoadHTTPResponse(HTTPResponse): - def __init__(self, sock, debuglevel=0, strict=0, method=None): - if method: # the httplib in python 2.3 uses the method arg - HTTPResponse.__init__(self, sock, debuglevel, method) - else: # 2.2 doesn't - HTTPResponse.__init__(self, sock, debuglevel) - self.fileno = sock.fileno - self._rbuf = '' - self._rbufsize = 8096 - self._handler = None # inserted by the handler later - self._host = None # (same) - self._url = None # (same) - - _raw_read = HTTPResponse.read - - def close_connection(self): - self.close() - self._handler._remove_connection(self._host, close=1) - - def info(self): - return self.msg - - def geturl(self): - return self._url - - def read(self, amt=None): - # the _rbuf test is only in this first if for speed. It's not - # logically necessary - if self._rbuf and not amt is None: - L = len(self._rbuf) - if amt > L: - amt -= L - else: - s = self._rbuf[:amt] - self._rbuf = self._rbuf[amt:] - return s - - s = self._rbuf + self._raw_read(amt) - self._rbuf = '' - return s - - def readline(self, limit=-1): - i = self._rbuf.find('\n') - while i < 0 and not (0 < limit <= len(self._rbuf)): - new = self._raw_read(self._rbufsize) - if not new: break - i = new.find('\n') - if i >= 0: i = i + len(self._rbuf) - self._rbuf = self._rbuf + new - if i < 0: i = len(self._rbuf) - else: i += 1 - if 0 <= limit < len(self._rbuf): i = limit - data, self._rbuf = self._rbuf[:i], self._rbuf[i:] - return data - - def readlines(self, sizehint = 0): - total = 0 - list = [] - while 1: - line = self.readline() - if not line: break - list.append(line) - total += len(line) - if sizehint and total >= sizehint: - break - return list - - @property - def code(self): - return self.status - - def getcode(self): - return self.status - -class PyLoadHTTPConnection(HTTPConnection): - sourceAddress = ('', 0) - socksProxy = None - response_class = PyLoadHTTPResponse - - def connect(self): - if self.socksProxy: - self.sock = socks.socksocket() - t = _parse_proxy(self.socksProxy[1]) - self.sock.setproxy(self.socksProxy[0], addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2]) - else: - self.sock = socket.socket() - self.sock.settimeout(30) - self.sock.bind(self.sourceAddress) - self.sock.connect((self.host, self.port)) - - try: - if self._tunnel_host: - self._tunnel() - except: #python2.5 - pass - -class PyLoadHTTPHandler(HTTPHandler): - sourceAddress = ('', 0) - socksProxy = None - - def __init__(self): - self._connections = {} - - def setInterface(self, interface): - if interface is None: - interface = "" - self.sourceAddress = (interface, 0) - - def setSocksProxy(self, *t): - self.socksProxy = t - - def close_connection(self, host): - """close connection to <host> - host is the host:port spec, as in 'www.cnn.com:8080' as passed in. - no error occurs if there is no connection to that host.""" - self._remove_connection(host, close=1) - - def open_connections(self): - """return a list of connected hosts""" - return self._connections.keys() - - def close_all(self): - """close all open connections""" - for host, conn in self._connections.items(): - conn.close() - self._connections = {} - - def _remove_connection(self, host, close=0): - if self._connections.has_key(host): - if close: self._connections[host].close() - del self._connections[host] - - def _start_connection(self, h, req): - data = "" - - if req.has_data(): - data = req.get_data() - h.putrequest('POST', req.get_selector(), skip_accept_encoding=1) - if not req.headers.has_key('Content-type'): - h.putheader('Content-type', - 'application/x-www-form-urlencoded') - if not req.headers.has_key('Content-length'): - h.putheader('Content-length', '%d' % len(data)) - else: - h.putrequest('GET', req.get_selector(), skip_accept_encoding=1) - - for args in self.parent.addheaders: - h.putheader(*args) - for k, v in req.headers.items(): - h.putheader(k, v) - h.endheaders() - if req.has_data(): - h.send(data) - - def do_open(self, http_class, req): - host = req.get_host() - if not host: - raise URLError('no host given') - - need_new_connection = 1 - h = self._connections.get(host) - if not h is None: - try: - self._start_connection(h, req) - except socket.error: - r = None - except BadStatusLine: - r = None - except CannotSendRequest: - r = None - else: - try: r = h.getresponse() - except ResponseNotReady: r = None - except BadStatusLine: r = None - - if r is None or r.version == 9: - # httplib falls back to assuming HTTP 0.9 if it gets a - # bad header back. This is most likely to happen if - # the socket has been closed by the server since we - # last used the connection. - if DEBUG: print "failed to re-use connection to %s" % host - h.close() - else: - if DEBUG: print "re-using connection to %s" % host - need_new_connection = 0 - if need_new_connection: - if DEBUG: print "creating new connection to %s" % host - h = http_class(host) - h.sourceAddress = self.sourceAddress - h.socksProxy = self.socksProxy - self._connections[host] = h - self._start_connection(h, req) - r = h.getresponse() - - - # if not a persistent connection, don't try to reuse it - if r.will_close: self._remove_connection(host) - - if DEBUG: - print "STATUS: %s, %s" % (r.status, r.reason) - r._handler = self - r._host = host - r._url = req.get_full_url() - - if r.status in (200, 206) or not HANDLE_ERRORS: - return r - else: - return self.parent.error('http', req, r, r.status, r.reason, r.msg) - - def http_open(self, req): - return self.do_open(PyLoadHTTPConnection, req) - -class HTTPBase(): - def __init__(self, interface=None, proxies={}): - self.followRedirect = True - self.interface = interface - self.proxies = proxies - - self.size = None - - self.referer = None - - self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10" - - self.handler = PyLoadHTTPHandler() - self.handler.setInterface(interface) - if proxies.has_key("socks5"): - self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS5, proxies["socks5"]) - elif proxies.has_key("socks4"): - self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS4, proxies["socks4"]) - - self.cookieJar = CookieJar() - - self.opener = None - - self.debug = DEBUG - - def getOpener(self, cookies=True): - if not self.opener: - self.opener = self.createOpener(cookies) - - return self.opener - - def createOpener(self, cookies=True): - opener = OpenerDirector() - opener.add_handler(self.handler) - opener.add_handler(MultipartPostHandler()) - opener.add_handler(HTTPSHandler()) - opener.add_handler(HTTPDefaultErrorHandler()) - opener.add_handler(HTTPErrorProcessor()) - opener.add_handler(CookieRedirectHandler(self.cookieJar, self.followRedirect)) - if self.proxies.has_key("http") or self.proxies.has_key("https"): - opener.add_handler(ProxyHandler(self.proxies)) - opener.version = self.userAgent - opener.addheaders[0] = ("User-Agent", self.userAgent) - opener.addheaders.append(("Accept", "*/*")) - opener.addheaders.append(("Accept-Language", "en-US,en")) - opener.addheaders.append(("Accept-Encoding", "gzip, deflate")) - opener.addheaders.append(("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7")) - return opener - - def createRequest(self, url, get={}, post={}, referer=None, customHeaders={}): - if get: - if isinstance(get, dict): - get = urlencode(get) - url = "%s?%s" % (url, get) - - req = Request(url) - - if post: - if isinstance(post, dict): - post = urlencode(post) - req.add_data(post) - - if referer: - req.add_header("Referer", referer) - - for key, val in customHeaders.iteritems(): - req.add_header(key, val) - - return req - - def getResponse(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}): - req = self.createRequest(url, get, post, referer, customHeaders) - opener = self.getOpener(cookies) - - if self.debug: - print "[HTTP] ----" - print "[HTTP] creating request" - print "[HTTP] URL:", url - print "[HTTP] GET" - if get: - for key, value in get.iteritems(): - print "[HTTP] \t", key, ":", value - if post: - print "[HTTP] POST" - for key, value in post.iteritems(): - print "[HTTP] \t", key, ":", value - print "[HTTP] headers" - for key, value in opener.addheaders: - print "[HTTP] \t", key, ":", value - for key, value in req.headers.iteritems(): - print "[HTTP] \t", key, ":", value - print "[HTTP] cookies" - if self.cookieJar: - from pprint import pprint - pprint(self.cookieJar._cookies) - print "[HTTP] ----" - - resp = opener.open(req) - resp.getcode = lambda: resp.code - - if self.debug: - print "[HTTP] ----" - print "[HTTP] got response" - print "[HTTP] status:", resp.getcode() - print "[HTTP] headers" - for key, value in resp.info().dict.iteritems(): - print "[HTTP] \t", key, ":", value - print "[HTTP] cookies" - if self.cookieJar: - from pprint import pprint - pprint(self.cookieJar._cookies) - print "[HTTP] ----" - try: - self.size = int(resp.info()["Content-Length"]) - except: #chunked transfer - pass - return resp - - def closeAll(self): - """ closes all connections """ - if hasattr(self, "handler"): - self.handler.close_all() - - def clean(self): - """ cleanup """ - self.closeAll() - if hasattr(self, "opener"): - del self.opener - if hasattr(self, "handler"): - del self.handler - -if __name__ == "__main__": - base = HTTPBase() - resp = base.getResponse("http://python.org/") - print resp.read() diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index 02134ca63..0c184db94 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -14,185 +14,184 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - @author: mkaay + @author: RaNaN """ - -from HTTPBase import HTTPBase -from urllib2 import HTTPError -from helper import * +from os import remove +from os.path import exists from time import sleep -from traceback import print_exc -from module.plugins.Plugin import Abort -from module.plugins.Plugin import Fail - -class HTTPChunk(HTTPBase): - def __init__(self, url, fh, get={}, post={}, referer=None, cookies=True, customHeaders={}, range=None, bucket=None, interface=None, proxies={}): - HTTPBase.__init__(self, interface=interface, proxies=proxies) - - self.url = url - self.bucket = bucket - self.range = range - self.noRangeHeader = False - self.fh = fh - - self.get = get - self.post = post - self.referer = referer - self.cookies = cookies - self.customHeaders = customHeaders - - self.deferred = Deferred() - - self.abort = False - self.finished = False - - self.arrived = 0 - - self.startTime = None - self.endTime = None - - self.speed = 0 #byte/sec - self.speedCalcTime = None - self.speedCalcLen = 0 - - self.bufferSize = 18*1024 #tune if performance is poor - self.resp = None - - def getSpeed(self): - return self.speed - - @threaded - def _download(self, resp): - self.arrived = 0 - self.lastSpeed = self.startTime = inttime() - - if self.noRangeHeader and not self.range[0] == 0: - self.deferred.error("range starts not at 0") - - running = True - while running: - if self.abort: - break - count = self.bufferSize - if self.noRangeHeader: - count = min(count, self.range[1] - self.arrived) - if self.bucket: - count = self.bucket.add(count) - if not count: - sleep(0.01) - continue - - try: - data = resp.read(count) - except: - self.deferred.error(Fail, "timeout") - break - - if self.speedCalcTime < inttime(): - self.speed = self.speedCalcLen - self.speedCalcTime = inttime() - self.speedCalcLen = 0 - try: - self.deferred.progress("percent", 100-int((self.size - self.arrived)/float(self.size)*100)) - except: - pass - size = len(data) - - self.arrived += size - self.speedCalcLen += size - - if self.noRangeHeader and self.arrived == self.range[1]: - running = False - - if size: - self.fh.write(data) - else: - break - - self.speed = 0 - self.endTime = inttime() - self.finished = True - self.fh.close() - - if self.abort: - self.deferred.error(Abort) - elif self.size == self.arrived: - self.deferred.callback() - else: - print self.arrived, self.size - self.deferred.error(Fail, "wrong content-length") - - def getEncoding(self): - try: - if self.resp.info()["Content-Encoding"] in ("gzip", "deflate"): - return self.resp.info()["Content-Encoding"] - except: - pass - return "plain" - - def download(self): - if self.range: - self.customHeaders["Range"] = "bytes=%i-%i" % self.range - try: - resp = self.getResponse(self.url, self.get, self.post, self.referer, self.cookies, self.customHeaders) - self.resp = resp - except HTTPError, e: - print_exc() - self.deferred.error(e) - return self.deferred - - if resp.getcode() in (200, 206): - self._download(resp) + +import pycurl + +from HTTPRequest import HTTPRequest + +class WrongFormat(Exception): + pass + +class ChunkInfo(): + def __init__(self, name): + self.name = name + self.size = 0 + self.resume = False + self.chunks = [] + + def setSize(self, size): + self.size = int(size) + + def addChunk(self, name, range): + self.chunks.append((name, range)) + + def clear(self): + self.chunks = [] + + def createChunks(self, chunks): + self.clear() + chunk_size = self.size / chunks + + current = 0 + for i in range(chunks): + end = self.size-1 if (i == chunks-1) else current+chunk_size + self.addChunk("%s.chunk%s" % (self.name, i), (current, end)) + current += chunk_size + 1 + + + def save(self): + fh = open("%s.chunks" % self.name, "w") + fh.write("name:%s\n" % self.name) + fh.write("size:%s\n" % self.size) + for i, c in enumerate(self.chunks): + fh.write("#%d:\n" % i) + fh.write("\tname:%s\n" % c[0]) + fh.write("\trange:%i-%i\n" % c[1]) + fh.close() + + @staticmethod + def load(name): + if not exists("%s.chunks" % name): + raise IOError() + fh = open("%s.chunks" % name, "r") + name = fh.readline()[:-1] + size = fh.readline()[:-1] + if name.startswith("name:") and size.startswith("size:"): + name = name[5:] + size = size[5:] else: - self.deferred.error(resp.getcode(), resp) - return self.deferred - -if __name__ == "__main__": - import sys - from Bucket import Bucket - bucket = Bucket() - bucket.setRate(200*1000) - #bucket = None - - url = "http://speedtest.netcologne.de/test_100mb.bin" - - finished = 0 - def err(*a, **b): - print a, b - def callb(*a, **b): - global finished - finished += 1 - print a, b - - print "starting" - - conns = 4 - - chunks = [] - for a in range(conns): - fh = open("file.part%d" % a, "wb") - chunk = HTTPChunk(url, fh, bucket=bucket, range=(a*5*1024*1024, (a+1)*5*1024*1024)) - print "fireing chunk #%d" % a - d = chunk.download() - d.addCallback(callb) - d.addErrback(err) - chunks.append(chunk) - - try: + fh.close() + raise WrongFormat() + ci = ChunkInfo(name) + ci.loaded = True + ci.setSize(size) while True: - aspeed = 0 - for a, chunk in enumerate(chunks): - if not chunk.finished: - print "#%d" % a, chunk.getSpeed()/1024, "kb/s" - else: - print "#%d" % a, "finished" - aspeed += chunk.getSpeed() - print "sum", aspeed/1024 - if finished == conns: - print "- finished" + if not fh.readline(): #skip line break - sleep(1) - except KeyboardInterrupt: - for chunk in chunks: - chunk.abort = True - sys.exit() + name = fh.readline()[1:-1] + range = fh.readline()[1:-1] + if name.startswith("name:") and range.startswith("range:"): + name = name[5:] + range = range[6:].split("-") + else: + raise WrongFormat() + + ci.addChunk(name, (long(range[0]), long(range[1]))) + fh.close() + return ci + + def remove(self): + if exists("%s.chunks" % self.name): remove("%s.chunks" % self.name) + + def getCount(self): + return len(self.chunks) + + def getChunkName(self, index): + return self.chunks[index][0] + + def getChunkRange(self, index): + return self.chunks[index][1] + +class HTTPChunk(HTTPRequest): + def __init__(self, id, parent, range=None, resume=False): + self.id = id + self.p = parent # HTTPDownload instance + self.range = range # tuple (start, end) + self.resume = resume + + self.arrived = 0 + self.lastURL = self.p.referer + + self.c = pycurl.Curl() + + self.header = "" + self.headerParsed = False #indicates if the header has been processed + + self.fp = None #file handle + + self.initHandle() + self.setInterface(self.p.interface, self.p.proxies) + + @property + def cj(self): + return self.p.cj + + def getHandle(self): + """ returns a Curl handle ready to use for perform/multiperform """ + + self.setRequestContext(self.p.url, self.p.get, self.p.post, self.p.referer, self.p.cj) + self.c.setopt(pycurl.WRITEFUNCTION, self.writeBody) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + + if self.resume: + self.fp = open(self.p.info.getChunkName(self.id), "ab") + self.arrived = self.fp.tell() + + if self.range: + #print "Chunked resume with range %i-%i" % (self.arrived+self.range[0], self.range[1]) + self.c.setopt(pycurl.RANGE, "%i-%i" % (self.arrived+self.range[0], self.range[1])) + else: + #print "Resume File from %i" % self.arrived + self.c.setopt(pycurl.RESUME_FROM, self.arrived) + + else: + if self.range: + #print "Chunked with range %i-%i" % self.range + self.c.setopt(pycurl.RANGE, "%i-%i" % self.range) + + self.fp = open(self.p.info.getChunkName(self.id), "wb") + + return self.c + + def writeHeader(self, buf): + self.header += buf + #@TODO forward headers?, this is possibly unneeeded, when we just parse valid 200 headers + # as first chunk, we will parse the headers + if self.header.endswith("\r\n\r\n") and not self.range: + self.parseHeader() + + def writeBody(self, buf): + size = len(buf) + self.arrived += size + + self.fp.write(buf) + + if self.p.bucket: + sleep(self.p.bucket.consumed(size)) + + if self.range and self.arrived > (self.range[1]-self.range[0]): + return 0 #close if we have enough data + + + def parseHeader(self): + """parse data from recieved header""" + for line in self.header.splitlines(): + line = line.strip().lower() + if line.startswith("accept-ranges") and "bytes" in line: + self.p.chunkSupport = True + + if not self.resume and line.startswith("content-length"): + self.p.size = int(line.split(":")[1]) + + self.headerParsed = True + + def close(self): + """ closes everything, unusable after this """ + if self.fp: self.fp.close() + self.c.close() + if hasattr(self, "p"): del self.p
\ No newline at end of file diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py index bce698e1e..e3ac09e84 100644 --- a/module/network/HTTPDownload.py +++ b/module/network/HTTPDownload.py @@ -14,337 +14,208 @@ You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. - @author: mkaay + @author: RaNaN """ -from HTTPChunk import HTTPChunk -from helper import * -from os.path import exists, getsize from os import remove -#from shutil import move, copyfileobj -from zlib import decompressobj, MAX_WBITS +from time import sleep, time +from shutil import move -from cookielib import CookieJar +import pycurl -class WrongFormat(Exception): - pass +from HTTPRequest import HTTPRequest +from HTTPChunk import ChunkInfo, HTTPChunk -class ChunkInfo(): - def __init__(self, name): - self.name = name - self.size = None - self.loaded = False - self.chunks = [] - - def setSize(self, size): - self.size = int(size) - - def addChunk(self, name, range, encoding): - self.chunks.append((name, range, encoding)) - - def clear(self): - self.chunks = [] - self.loaded = False - - def save(self): - fh = open("%s.chunks" % self.name, "w") - fh.write("name:%s\n" % self.name) - fh.write("size:%s\n" % self.size) - for i, c in enumerate(self.chunks): - fh.write("#%d:\n" % i) - fh.write("\tname:%s\n" % c[0]) - fh.write("\tencoding:%s\n" % c[2]) - fh.write("\trange:%i-%i\n" % c[1]) - - @staticmethod - def load(name): - if not exists("%s.chunks" % name): - raise IOError() - fh = open("%s.chunks" % name, "r") - name = fh.readline()[:-1] - size = fh.readline()[:-1] - if name.startswith("name:") and size.startswith("size:"): - name = name[5:] - size = size[5:] - else: - raise WrongFormat() - ci = ChunkInfo(name) - ci.loaded = True - ci.setSize(size) - while True: - if not fh.readline(): #skip line - break - name = fh.readline()[1:-1] - encoding = fh.readline()[1:-1] - range = fh.readline()[1:-1] - if name.startswith("name:") and encoding.startswith("encoding:") and range.startswith("range:"): - name = name[5:] - encoding = encoding[9:] - range = range[6:].split("-") - else: - raise WrongFormat() - - ci.addChunk(name, (long(range[0]), long(range[1])), encoding) - return ci - - def removeInfo(self): - remove("%s.chunks" % self.name) - - def getCount(self): - return len(self.chunks) - - def getChunkName(self, index): - return self.chunks[index][0] - - def getChunkRange(self, index): - return self.chunks[index][1] - - def getChunkEncoding(self, index): - return self.chunks[index][2] - -class WrappedHTTPDeferred(WrappedDeferred): - pass - -class HTTPDownload(): - def __init__(self, url, filename, get={}, post={}, referer=None, cookies=True, customHeaders={}, bucket=None, interface=None, proxies={}): +from module.plugins.Plugin import Abort + +class HTTPDownload(HTTPRequest): + def __init__(self, url, filename, get={}, post={}, referer=None, cj=None, bucket=None, + interface=None, proxies={}): self.url = url - self.filename = filename - self.interface = interface - self.proxies = proxies - + self.filename = filename #complete file destination, not only name self.get = get self.post = post - self.referer = referer - self.cookies = cookies - - self.customHeaders = customHeaders - + self.cj = cj #cookiejar if cookies are needed self.bucket = bucket - - self.deferred = Deferred() - - self.finished = False - self._abort = False - self.size = None - - self.cookieJar = CookieJar() - + self.interface = interface + self.proxies = proxies + # all arguments + + self.abort = False + self.size = 0 + self.chunks = [] self.chunksDone = 0 + try: self.info = ChunkInfo.load(filename) + self.info.resume = True #resume is only possible with valid info file + self.size = self.info.size except IOError: self.info = ChunkInfo(filename) - self.noChunkSupport = False - + + self.chunkSupport = None + self.m = pycurl.CurlMulti() + + #needed for speed calculation + self.lastChecked = 0 + self.lastArrived = [] + self.speeds = [] + + + @property + def speed(self): + return sum(self.speeds) + @property def arrived(self): - arrived = 0 - try: - for i in range(self.info.getCount()): - arrived += getsize(self.info.getChunkName(i)) #ugly, but difficult to calc otherwise due chunk resume - except OSError: - arrived = self.size - return arrived - - def setAbort(self, val): - self._abort = val - for chunk in self.chunks: - chunk.abort = val - - def getAbort(self): - return self._abort - - abort = property(getAbort, setAbort) - - def getSpeed(self): - speed = 0 - for chunk in self.chunks: - speed += chunk.getSpeed() - return speed - + return sum([c.arrived for c in self.chunks]) + @property - def speed(self): - return self.getSpeed() - - def calcProgress(self, p): - self.deferred.progress("percent", 100-int((self.size - self.arrived)/float(self.size)*100)) - - def _chunkDone(self): - self.chunksDone += 1 - #print self.chunksDone, "/", len(self.chunks) - if self.chunksDone == len(self.chunks): - self._copyChunks() - + def percent(self): + if not self.size: return 0 + return (self.arrived * 100) / self.size + def _copyChunks(self): - fo = open(self.filename, "wb") #out file - for i in range(self.info.getCount()): - encoding = self.info.getChunkEncoding(i) - - #decompress method, if any - decompress = lambda data: data - if encoding == "gzip": - gz = decompressobj(16+MAX_WBITS) - decompress = lambda data: gz.decompress(data) - if encoding == "deflate": - df = decompressobj(-MAX_WBITS) - decompress = lambda data: df.decompress(data) - - #input file - fname = "%s.chunk%d" % (self.filename, i) - fi = open(fname, "rb") - while True: #copy in chunks, consumes less memory - data = fi.read(512*1024) - if not data: - break - fo.write(decompress(data)) #decompressing - fi.close() - remove(fname) #remove - fo.close() - self.info.removeInfo() #remove info file - self.deferred.callback() #done, emit callbacks - - def _createChunk(self, fh, range=None): - chunk = HTTPChunk(self.url, fh, get=self.get, post=self.post, - referer=self.referer, cookies=self.cookies, - customHeaders=self.customHeaders, - bucket=self.bucket, range=range, - interface=self.interface, proxies=self.proxies) - chunk.cookieJar = self.cookieJar - return chunk - - def _addChunk(self, chunk, d): - self.chunks.append(chunk) - d.addProgress("percent", self.calcProgress) - d.addCallback(self._chunkDone) - d.addErrback(lambda *args, **kwargs: self.setAbort(True)) - d.addErrback(self.deferred.error) - + init = self.info.getChunkName(0) #initial chunk name + + if len(self.chunks) > 1: + fo = open(init, "rb+") #first chunkfile + for i in range(1, self.info.getCount()): + #input file + fo.seek(self.info.getChunkRange(i - 1)[1] + 1) #seek to beginning of chunk, to get rid of overlapping chunks + fname = "%s.chunk%d" % (self.filename, i) + fi = open(fname, "rb") + buf = 512 * 1024 + while True: #copy in chunks, consumes less memory + data = fi.read(buf) + if not data: + break + fo.write(data) + fi.close() + remove(fname) #remove chunk + fo.close() + + move(init, self.filename) + self.info.remove() #remove info file + def download(self, chunks=1, resume=False): - self.chunksDone = 0 - if chunks > 0: - #diffentent chunk count in info, resetting - if self.info.loaded and not self.info.getCount() == chunks: - self.info.clear() - - #if resuming, calculate range with offset - crange = None - if resume: - if self.info.getCount() == chunks and exists("%s.chunk0" % (self.filename, )): - crange = self.info.getChunkRange(0) - crange = (crange[0]+getsize("%s.chunk0" % (self.filename, )), crange[1]-1) - - #if firstpart not done - if crange is None or crange[1]-crange[0] > 0: - fh = open("%s.chunk0" % (self.filename, ), "ab" if crange else "wb") - - chunk = self._createChunk(fh, range=crange) - - d = chunk.download() #start downloading - self._addChunk(chunk, d) - - #no info file, need to calculate ranges - if not self.info.loaded: - size = chunk.size #overall size - chunksize = size/chunks #chunk size - - chunk.range = (0, chunksize) #setting range for first part - chunk.noRangeHeader = True - chunk.size = chunksize #setting size for first chunk - - self.size = size #setting overall size - self.info.setSize(self.size) #saving overall size - self.info.addChunk("%s.chunk0" % (self.filename, ), chunk.range, chunk.getEncoding()) #add chunk to infofile + chunks = max(1, chunks) + resume = self.info.resume and resume + self.chunks = [] + + try: + self._download(chunks, resume) + finally: + self.clean() + + def _download(self, chunks, resume): + if not resume: + self.info.addChunk("%s.chunk0" % self.filename, (0, 0)) + + init = HTTPChunk(0, self, None, resume) #initial chunk that will load complete file (if needed) + + self.chunks.append(init) + self.m.add_handle(init.getHandle()) + + while 1: + #need to create chunks + if len(self.chunks) < chunks and self.chunkSupport and self.size: #will be set later by first chunk + + if not resume: + self.info.setSize(self.size) + self.info.createChunks(chunks) + self.info.save() + + chunks = self.info.getCount() + + init.range = self.info.getChunkRange(0) + + for i in range(1, chunks): + c = HTTPChunk(i, self, self.info.getChunkRange(i), resume) + self.chunks.append(c) + self.m.add_handle(c.getHandle()) + + while 1: + ret, num_handles = self.m.perform() + + if ret != pycurl.E_CALL_MULTI_PERFORM: + break + + while 1: + num_q, ok_list, err_list = self.m.info_read() + for c in ok_list: + self.chunksDone += 1 + for c in err_list: + curl, errno, msg = c + #test if chunk was finished, otherwise raise the exception + if errno != 23 or "0 !=" not in msg: + raise pycurl.error(errno, msg) + + #@TODO KeyBoardInterrupts are seen as finished chunks, + #but normally not handled to this process, only in the testcase - lastchunk = size - chunksize*(chunks-1) #calculating size for last chunk - self.firstchunk = chunk #remeber first chunk - - if self.info.loaded and not self.size: - self.size = self.info.size #setting overall size - - for i in range(1, chunks): #other chunks - cont = False - if not self.info.loaded: #first time load - if i+1 == chunks: #last chunk? - rng = (i*chunksize, i*chunksize+lastchunk-1) - else: - rng = (i*chunksize, (i+1)*chunksize-1) #adjusting range - else: #info available - rng = self.info.getChunkRange(i) #loading previous range - if resume and exists("%s.chunk%d" % (self.filename, i)): #continue chunk - rng = (rng[0]+getsize("%s.chunk%d" % (self.filename, i)), rng[1]) #adjusting offset - cont = True #set append mode - - if rng[1]-rng[0] <= 0: #chunk done - continue - - fh = open("%s.chunk%d" % (self.filename, i), "ab" if cont else "wb") - chunk = self._createChunk(fh, range=rng) - d = chunk.download() #try - - if not chunk.resp.getcode() == 206 and i == 1: #no range supported, tell first chunk to download everything - chunk.abort = True - self.noChunkSupport = True - self.firstchunk.size = self.size - self.firstchunk.range = None - self.info.clear() #clear info - self.info.addChunk("%s.chunk0" % (self.filename, ), (0, self.firstchunk.size), chunk.getEncoding()) #re-adding info with correct ranges + self.chunksDone += 1 + if not num_q: break - - self._addChunk(chunk, d) - - if not self.info.loaded: #adding info - self.info.addChunk("%s.chunk%d" % (self.filename, i), chunk.range, chunk.getEncoding()) - - self.info.save() #saving info - if not len(self.chunks): - self._copyChunks() - return WrappedHTTPDeferred(self, self.deferred) - else: - raise Exception("no chunks") + + if self.chunksDone == len(self.chunks): + break #all chunks loaded + + # calc speed once per second + t = time() + if self.lastChecked + 1 < t: + diff = [c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) for i, c in + enumerate(self.chunks)] + + #for i, c in enumerate(self.chunks): + # diff[i] = c.arrived - (self.lastArrived[i] if len(self.lastArrived) > i else 0) + + self.speeds = [float(a) / (t - self.lastChecked) for a in diff] + self.lastArrived = [c.arrived for c in self.chunks] + self.lastChecked = t + #print "------------------------" + #print self.speed / 1024, "kb/s" + #print "Arrived:", self.arrived + #print "Size:", self.size + #print self.percent, "%" + + if self.abort: + raise Abort() + + sleep(0.001) #supress busy waiting - limits dl speed to (1 / x) * buffersize + self.m.select(1) + + for chunk in self.chunks: + chunk.fp.close() + self.m.remove_handle(chunk.c) + + self._copyChunks() def clean(self): """ cleanup """ - for c in self.chunks: - c.clean() + for chunk in self.chunks: + chunk.close() + self.m.remove_handle(chunk.c) + + self.m.close() + self.chunks = [] + if hasattr(self, "cj"): + del self.cj + if hasattr(self, "info"): + del self.info if __name__ == "__main__": - import sys + url = "http://speedtest.netcologne.de/test_10mb.bin" + from Bucket import Bucket + bucket = Bucket() - bucket.setRate(200*1024) - #bucket = None - - url = "http://speedtest.netcologne.de/test_100mb.bin" - - finished = False - def err(*a, **b): - print a, b - def callb(*a, **b): - global finished - finished = True - print a, b - + bucket.setRate(200 * 1024) + bucket = None + print "starting" - - dwnld = HTTPDownload(url, "test_100mb.bin", bucket=bucket) - d = dwnld.download(chunks=5, resume=True) - d.addCallback(callb) - d.addErrback(err) - - try: - while True: - for a, chunk in enumerate(dwnld.chunks): - if not chunk.finished: - print "#%d" % a, chunk.getSpeed()/1024, "kb/s", "size", int(float(chunk.arrived)/chunk.size*100), "%" - else: - print "#%d" % a, "finished" - print "sum", dwnld.speed/1024, dwnld.arrived, "/", dwnld.size, int(float(dwnld.arrived)/dwnld.size*100), "%" - if finished: - print "- finished" - break - sleep(1) - except KeyboardInterrupt: - dwnld.abort = True - sys.exit() + + dwnld = HTTPDownload(url, "test_10mb.bin", bucket=bucket) + dwnld.download(chunks=3, resume=True)
\ No newline at end of file diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py new file mode 100644 index 000000000..3a240b081 --- /dev/null +++ b/module/network/HTTPRequest.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: RaNaN +""" + +import pycurl + +from urllib import quote, urlencode +from cStringIO import StringIO + +def myquote(url): + return quote(url, safe="%/:=&?~#+!$,;'@()*[]") + +class HTTPRequest(): + def __init__(self, cookies=None, interface=None, proxies=None): + self.c = pycurl.Curl() + self.rep = StringIO() + + self.cj = cookies #cookiejar + + self.lastURL = None + self.lastEffectiveURL = None + self.abort = False + + self.header = "" + + self.initHandle() + self.setInterface(interface, proxies) + + + def initHandle(self): + """ sets common options to curl handle """ + self.c.setopt(pycurl.FOLLOWLOCATION, 1) + self.c.setopt(pycurl.MAXREDIRS, 5) + self.c.setopt(pycurl.CONNECTTIMEOUT, 30) + self.c.setopt(pycurl.NOSIGNAL, 1) + self.c.setopt(pycurl.NOPROGRESS, 1) + if hasattr(pycurl, "AUTOREFERER"): + self.c.setopt(pycurl.AUTOREFERER, 1) + self.c.setopt(pycurl.BUFFERSIZE, 32 * 1024) + self.c.setopt(pycurl.SSL_VERIFYPEER, 0) + self.c.setopt(pycurl.LOW_SPEED_TIME, 30) + self.c.setopt(pycurl.LOW_SPEED_LIMIT, 20) + + #self.c.setopt(pycurl.VERBOSE, 1) + + self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.2.10) Gecko/20100916 Firefox/3.6.10") + if pycurl.version_info()[7]: + self.c.setopt(pycurl.ENCODING, "gzip, deflate") + self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", + "Accept-Language: en-US,en", + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Connection: keep-alive", + "Keep-Alive: 300"]) + + def setInterface(self, interface, proxies): + if interface and interface.lower() != "none": + self.c.setopt(pycurl.INTERFACE, interface) + + #@TODO set proxies + + def addCookies(self): + if self.cj: + self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) + + def getCookies(self): + if self.cj: + for c in self.cj.getCookies(): + self.c.setopt(pycurl.COOKIELIST, c) + return + + def setRequestContext(self, url, get, post, referer, cookies): + """ sets everything needed for the request """ + + url = myquote(str(url)) + + if get: + get = urlencode(get) + url = "%s?%s" % (url, get) + + self.c.setopt(pycurl.URL, url) + self.c.lastUrl = url + + if post: + post = urlencode(post) + self.c.setopt(pycurl.POSTFIELDS, post) + + if referer and self.lastURL: + self.c.setopt(pycurl.REFERER, self.lastURL) + + if cookies: + self.c.setopt(pycurl.COOKIEFILE, "") + self.c.setopt(pycurl.COOKIEJAR, "") + self.getCookies() + + + def load(self, url, get={}, post={}, referer=True, cookies=True): + """ load and returns a given page """ + + self.setRequestContext(url, get, post, referer, cookies) + + self.header = "" + self.c.setopt(pycurl.WRITEFUNCTION, self.write) + self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + #@TODO header_only, raw_cookies and some things in old backend, which are apperently not needed + + self.c.perform() + + self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) + self.addCookies() + + return self.getResponse() + + + def getResponse(self): + """ retrieve response from string io """ + value = self.rep.getvalue() + self.rep.close() + self.rep = StringIO() + return value + + def write(self, buf): + """ writes response """ + if self.rep.tell() > 500000 or self.abort: + rep = self.getResponse() + if self.abort: raise Abort() + f = open("response.dump", "wb") + f.write(rep) + f.close() + raise Exception("Loaded Url exceeded limit") + + self.rep.write(buf) + + def writeHeader(self, buf): + """ writes header """ + self.header += buf + + def close(self): + """ cleanup, unusable after this """ + self.c.close() + self.rep.close() + if hasattr(self, "cj"): + del self.cj + + +if __name__ == "__main__": + url = "http://pyload.org" + c = HTTPRequest() + print c.load(url) +
\ No newline at end of file diff --git a/module/network/RequestFactory.py b/module/network/RequestFactory.py index 89d1aaf64..6ad64589a 100644 --- a/module/network/RequestFactory.py +++ b/module/network/RequestFactory.py @@ -20,7 +20,7 @@ from threading import Lock from Browser import Browser -from HTTPBase import HTTPBase +from HTTPRequest import HTTPRequest from CookieJar import CookieJar class RequestFactory(): @@ -29,8 +29,6 @@ class RequestFactory(): self.core = core self.cookiejars = {} - iface = property(lambda self: self.core.config["general"]["download_interface"]) - def getRequest(self, pluginName, account=None): self.lock.acquire() @@ -48,10 +46,10 @@ class RequestFactory(): def getURL(self, url, get={}, post={}): #a bit to much overhead for single url - b = Browser() - #@TODO proxies, iface - - return b.getPage(url, get, post) + h = HTTPRequest() + rep = h.load(url, get, post) + h.close() + return rep def getCookieJar(self, pluginName, account=None): if self.cookiejars.has_key((pluginName, account)): diff --git a/module/network/helper.py b/module/network/helper.py deleted file mode 100644 index 5ce21a8dd..000000000 --- a/module/network/helper.py +++ /dev/null @@ -1,156 +0,0 @@ -from threading import Thread -from time import time, sleep - -def inttime(): - return int(time()) - -class AlreadyCalled(Exception): - pass - -def callInThread(f, *args, **kwargs): - class FThread(Thread): - def __init__(self): - Thread.__init__(self) - self.setDaemon(True) - self.d = Deferred() - def run(self): - ret = f(*args, **kwargs) - self.d.callback(ret) - t = FThread() - t.start() - return t.d - -class Deferred(): - def __init__(self): - self.call = [] - self.err = [] - self.prgr = {} - self.result = () - self.errresult = () - - def addCallback(self, f, *cargs, **ckwargs): - self.call.append((f, cargs, ckwargs)) - if self.result: - args, kwargs = self.result - args+=tuple(cargs) - kwargs.update(ckwargs) - callInThread(f, *args, **kwargs) - - def addProgress(self, chain, f): - if self.prgr.has_key(chain): - self.prgr[chain].append(f) - else: - self.prgr[chain] = [f] - - def addErrback(self, f, *cargs, **ckwargs): - self.err.append((f, cargs, ckwargs)) - if self.errresult: - args, kwargs = self.errresult - args+=tuple(cargs) - kwargs.update(ckwargs) - callInThread(f, *args, **kwargs) - - def callback(self, *args, **kwargs): - self.result = (args, kwargs) - for f, cargs, ckwargs in self.call: - args+=tuple(cargs) - kwargs.update(ckwargs) - callInThread(f, *args, **kwargs) - self.call = [] - self.result = () - - def error(self, *args, **kwargs): - self.errresult = (args, kwargs) - for f, cargs, ckwargs in self.err: - args+=tuple(cargs) - kwargs.update(ckwargs) - callInThread(f, *args, **kwargs) - self.err = [] - self.errresult = () - - def progress(self, chain, *args, **kwargs): - if not self.prgr.has_key(chain): - return - for f in self.prgr[chain]: - f(*args, **kwargs) - -#decorator -def threaded(f): - def ret(*args, **kwargs): - return callInThread(f, *args, **kwargs) - return ret - -def waitFor(d): - class Waiter(): - waiting = True - args = () - err = None - - def __init__(self, d): - self.d = d - - def wait(self): - self.d.addCallback(self.callb) - self.d.addErrback(self.errb) - while self.waiting and not self.d.abort: - sleep(0.5) - if self.err: - #try: - if issubclass(self.err[0][0], Exception): - raise self.err[0][0](*self.err[0][1:], **self.err[1]) - #except: - # pass - raise Exception(*self.err[0], **self.err[1]) - return self.args - - def callb(self, *args, **kwargs): - self.waiting = False - self.args = (args, kwargs) - - def errb(self, *args, **kwargs): - self.waiting = False - self.err = (args, kwargs) - w = Waiter(d) - return w.wait() - -class DeferredGroup(Deferred): - def __init__(self, group=[]): - Deferred.__init__(self) - self.group = group - self.done = 0 - - for d in self.group: - d.addCallback(self._cb) - d.addErrback(self.error) - - def addDeferred(self, d): - d.addCallback(self._cb) - d.addErrback(self.error) - self.group.append(d) - - def _cb(self, *args, **kwargs): - self.done += 1 - if len(self.group) == self.done: - self.callback() - self.group = [] - self.done = 0 - - def error(self, *args, **kwargs): - Deferred.error(self, *args, **kwargs) - self.group = [] - self.done = 0 - -class WrappedDeferred(object): - def __init__(self, download, d): - self.__dict__["download"] = download - self.__dict__["d"] = d - - def __getattr__(self, attr): - if attr in ("addCallback", "addErrback", "addProgress", "callback", "error", "progress"): - return getattr(self.__dict__["d"], attr) - return getattr(self.__dict__["download"], attr) - - def __setattr__(self, attr, val): - if attr in ("addCallback", "addErrback", "addProgress", "callback", "error", "progress"): - return setattr(self.__dict__["d"], attr, val) - return setattr(self.__dict__["download"], attr, val) diff --git a/module/network/socks.py b/module/network/socks.py deleted file mode 100644 index 626ffe176..000000000 --- a/module/network/socks.py +++ /dev/null @@ -1,442 +0,0 @@ -"""SocksiPy - Python SOCKS module. -Version 1.00 - -Copyright 2006 Dan-Haim. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -3. Neither the name of Dan Haim nor the names of his contributors may be used - to endorse or promote products derived from this software without specific - prior written permission. - -THIS SOFTWARE IS PROVIDED BY DAN HAIM "AS IS" AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL DAN HAIM OR HIS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA -OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT -OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -This module provides a standard socket-like interface for Python -for tunneling connections through SOCKS proxies. - -""" - -""" - -Minor modifications made by Christopher Gilbert (http://motomastyle.com/) -for use in PyLoris (http://pyloris.sourceforge.net/) - -Minor modifications made by Mario Vilas (http://breakingcode.wordpress.com/) -mainly to merge bug fixes found in Sourceforge - -""" - -import socket - -if getattr(socket, 'socket', None) is None: - raise ImportError('socket.socket missing, proxy support unusable') - -import struct -import sys - -PROXY_TYPE_SOCKS4 = 1 -PROXY_TYPE_SOCKS5 = 2 -PROXY_TYPE_HTTP = 3 - -_defaultproxy = None - -# Small hack for Python 2.x -if sys.version_info[0] <= 2: - def bytes(obj, enc=None): - return obj - -class ProxyError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class GeneralProxyError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class Socks5AuthError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class Socks5Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class Socks4Error(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class HTTPError(ProxyError): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -_generalerrors = ("success", - "invalid data", - "not connected", - "not available", - "bad proxy type", - "bad input") - -_socks5errors = ("succeeded", - "general SOCKS server failure", - "connection not allowed by ruleset", - "Network unreachable", - "Host unreachable", - "Connection refused", - "TTL expired", - "Command not supported", - "Address type not supported", - "Unknown error") - -_socks5autherrors = ("succeeded", - "authentication is required", - "all offered authentication methods were rejected", - "unknown username or invalid password", - "unknown error") - -_socks4errors = ("request granted", - "request rejected or failed", - ("request rejected because SOCKS server cannot connect to " - "identd on the client"), - ("request rejected because the client program and identd" - " report different user-ids"), - "unknown error") - - -def setdefaultproxy(proxytype=None, addr=None, port=None, rdns=True, - username=None, password=None): - """setdefaultproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets a default proxy which all further socksocket objects will use, - unless explicitly changed. - """ - global _defaultproxy - _defaultproxy = (proxytype, addr, port, rdns, username, password) - - -class socksocket(socket.socket): - """socksocket([family[, type[, proto]]]) -> socket object - - Open a SOCKS enabled socket. The parameters are the same as - those of the standard socket init. In order for SOCKS to work, - you must specify family=AF_INET, type=SOCK_STREAM and proto=0. - """ - - def __init__(self, family=socket.AF_INET, type=socket.SOCK_STREAM, - proto=0, _sock=None): - socket.socket.__init__(self, family, type, proto, _sock) - if _defaultproxy != None: - self.__proxy = _defaultproxy - else: - self.__proxy = (None, None, None, None, None, None) - self.__proxysockname = None - self.__proxypeername = None - - def __decode(self, bytes): - if getattr(bytes, 'decode', False): - try: - bytes = bytes.decode() - except Exception: - pass - return bytes - - def __encode(self, bytes): - if getattr(bytes, 'encode', False): - try: - bytes = bytes.encode() - except Exception: - pass - return bytes - - def __recvall(self, count): - """__recvall(count) -> data - Receive EXACTLY the number of bytes requested from the socket. - Blocks until the required number of bytes have been received. - """ - data = bytes("") - while len(data) < count: - d = self.recv(count - len(data)) - if not d: - raise GeneralProxyError( - (0, "connection closed unexpectedly")) - data = data + self.__decode(d) - return data - - def sendall(self, bytes): - socket.socket.sendall(self, self.__encode(bytes)) - - def setproxy(self, proxytype=None, addr=None, port=None, rdns=True, - username=None, password=None): - """setproxy(proxytype, addr[, port[, rdns[, username[, password]]]]) - Sets the proxy to be used. - proxytype - The type of the proxy to be used. Three types - are supported: PROXY_TYPE_SOCKS4 (including socks4a), - PROXY_TYPE_SOCKS5 and PROXY_TYPE_HTTP - addr - The address of the server (IP or DNS). - port - The port of the server. Defaults to 1080 for SOCKS - servers and 8080 for HTTP proxy servers. - rdns - Should DNS queries be preformed on the remote side - (rather than the local side). The default is True. - Note: This has no effect with SOCKS4 servers. - username - Username to authenticate with to the server. - The default is no authentication. - password - Password to authenticate with to the server. - Only relevant when username is also provided. - """ - self.__proxy = (proxytype, addr, port, rdns, username, password) - - def __negotiatesocks5(self, destaddr, destport): - """__negotiatesocks5(self,destaddr,destport) - Negotiates a connection through a SOCKS5 server. - """ - # First we'll send the authentication packages we support. - if (self.__proxy[4] != None) and (self.__proxy[5] != None): - # The username/password details were supplied to the - # setproxy method so we support the USERNAME/PASSWORD - # authentication (in addition to the standard none). - self.sendall("\x05\x02\x00\x02") - else: - # No username/password were entered, therefore we - # only support connections with no authentication. - self.sendall("\x05\x01\x00") - # We'll receive the server's response to determine which - # method was selected - chosenauth = self.__recvall(2) - if chosenauth[0] != "\x05": - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - # Check the chosen authentication method - if chosenauth[1] == "\x00": - # No authentication is required - pass - elif chosenauth[1] == "\x02": - # Okay, we need to perform a basic username/password - # authentication. - self.sendall("\x01" + chr(len(self.__proxy[4])) + self.__proxy[4] + - chr(len(self.__proxy[5])) + self.__proxy[5]) - authstat = self.__recvall(2) - if authstat[0] != "\x01": - # Bad response - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - if authstat[1] != "\x00": - # Authentication failed - self.close() - raise Socks5AuthError((3, _socks5autherrors[3])) - # Authentication succeeded - else: - # Reaching here is always bad - self.close() - if chosenauth[1] == "\xFF": - raise Socks5AuthError((2, _socks5autherrors[2])) - else: - raise GeneralProxyError((1, _generalerrors[1])) - # Now we can request the actual connection - req = "\x05\x01\x00" - # If the given destination address is an IP address, we'll - # use the IPv4 address request even if remote resolving was specified. - try: - ipaddr = socket.inet_aton(destaddr) - req = req + "\x01" + ipaddr - except socket.error: - # Well it's not an IP number, so it's probably a DNS name. - if self.__proxy[3] == True: - # Resolve remotely - ipaddr = None - req = req + "\x03" + chr(len(destaddr)) + destaddr - else: - # Resolve locally - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - req = req + "\x01" + ipaddr - req = req + self.__decode(struct.pack(">H", destport)) - self.sendall(req) - # Get the response - resp = self.__recvall(4) - if resp[0] != "\x05": - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - elif resp[1] != "\x00": - # Connection failed - self.close() - if ord(resp[1]) <= 8: - raise Socks5Error((ord(resp[1]), _socks5errors[ord(resp[1])])) - else: - raise Socks5Error((9, _socks5errors[9])) - # Get the bound address/port - elif resp[3] == "\x01": - boundaddr = self.__recvall(4) - elif resp[3] == "\x03": - resp = resp + self.recv(1) - boundaddr = self.__recvall(ord(resp[4])) - else: - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - d = bytes(self.__recvall(2), 'utf8') - d = str(d) #python2.5 no unicode in struct - boundport = struct.unpack(">H", d)[0] - self.__proxysockname = boundaddr, boundport - if ipaddr != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr), destport) - else: - self.__proxypeername = (destaddr, destport) - - def getproxysockname(self): - """getsockname() -> address info - Returns the bound IP address and port number at the proxy. - """ - return self.__proxysockname - - def getproxypeername(self): - """getproxypeername() -> address info - Returns the IP and port number of the proxy. - """ - return socket.socket.getpeername(self) - - def getpeername(self): - """getpeername() -> address info - Returns the IP address and port number of the destination - machine (note: getproxypeername returns the proxy) - """ - return self.__proxypeername - - def __negotiatesocks4(self, destaddr, destport): - """__negotiatesocks4(self,destaddr,destport) - Negotiates a connection through a SOCKS4 server. - """ - # Check if the destination address provided is an IP address - rmtrslv = False - try: - ipaddr = socket.inet_aton(destaddr) - except socket.error: - # It's a DNS name. Check where it should be resolved. - if self.__proxy[3] == True: - ipaddr = "\x00\x00\x00\x01" - rmtrslv = True - else: - ipaddr = socket.inet_aton(socket.gethostbyname(destaddr)) - # Construct the request packet - req = "\x04\x01" + self.__decode(struct.pack(">H", destport)) + ipaddr - # The username parameter is considered userid for SOCKS4 - if self.__proxy[4] != None: - req = req + self.__proxy[4] - req = req + "\x00" - # DNS name if remote resolving is required - # NOTE: This is actually an extension to the SOCKS4 protocol - # called SOCKS4A and may not be supported in all cases. - if rmtrslv==True: - req = req + destaddr + "\x00" - self.sendall(req) - # Get the response from the server - resp = self.__recvall(8) - if resp[0] != "\x00": - # Bad data - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - if resp[1] != "\x5A": - # Server returned an error - self.close() - if ord(resp[1]) in (91,92,93): - self.close() - raise Socks4Error((ord(resp[1]), _socks4errors[ord(resp[1])-90])) - else: - raise Socks4Error((94,_socks4errors[4])) - # Get the bound address/port - self.__proxysockname = (socket.inet_ntoa(resp[4:]),struct.unpack(">H",bytes(resp[2:4],'utf8'))[0]) - if rmtrslv != None: - self.__proxypeername = (socket.inet_ntoa(ipaddr),destport) - else: - self.__proxypeername = (destaddr, destport) - - def __negotiatehttp(self, destaddr, destport): - """__negotiatehttp(self,destaddr,destport) - Negotiates a connection through an HTTP server. - """ - # If we need to resolve locally, we do this now - if self.__proxy[3] == False: - addr = socket.gethostbyname(destaddr) - else: - addr = destaddr - self.sendall(("CONNECT %s:%s HTTP/1.1\r\n" - "Host: %s\r\n\r\n") % (addr, destport, destaddr)) - # We read the response until we get the string "\r\n\r\n" - resp = self.recv(1) - while resp.find("\r\n\r\n") == -1: - resp = resp + self.recv(1) - # We just need the first line to check if the connection - # was successful - statusline = resp.splitlines()[0].split(" ", 2) - if statusline[0] not in ("HTTP/1.0", "HTTP/1.1"): - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - try: - statuscode = int(statusline[1]) - except ValueError: - self.close() - raise GeneralProxyError((1, _generalerrors[1])) - if statuscode != 200: - self.close() - raise HTTPError((statuscode, statusline[2])) - self.__proxysockname = ("0.0.0.0", 0) - self.__proxypeername = (addr, destport) - - def connect(self, destpair): - """connect(self,despair) - Connects to the specified destination through a proxy. - destpar - A tuple of the IP/DNS address and the port number. - (identical to socket's connect). - To select the proxy server use setproxy(). - """ - # Do a minimal input check first - # TODO(durin42): seriously? type checking? do we care? - if ((not isinstance(destpair, (list, tuple))) or len(destpair) < 2 - or not isinstance(destpair[0], str) or not isinstance(destpair[1], int)): - raise GeneralProxyError((5, _generalerrors[5])) - if self.__proxy[0] == PROXY_TYPE_SOCKS5: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - socket.socket.connect(self,(self.__proxy[1], portnum)) - self.__negotiatesocks5(destpair[0], destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_SOCKS4: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 1080 - socket.socket.connect(self, (self.__proxy[1], portnum)) - self.__negotiatesocks4(destpair[0], destpair[1]) - elif self.__proxy[0] == PROXY_TYPE_HTTP: - if self.__proxy[2] != None: - portnum = self.__proxy[2] - else: - portnum = 8080 - socket.socket.connect(self, (self.__proxy[1], portnum)) - self.__negotiatehttp(destpair[0], destpair[1]) - elif self.__proxy[0] == None: - socket.socket.connect(self, (destpair[0], destpair[1])) - else: - raise GeneralProxyError((4, _generalerrors[4])) diff --git a/module/plugins/Plugin.py b/module/plugins/Plugin.py index ffac26864..4f26c72b4 100644 --- a/module/plugins/Plugin.py +++ b/module/plugins/Plugin.py @@ -105,7 +105,6 @@ class Plugin(object): #self.req.canContinue = True else: self.req = pyfile.m.core.requestFactory.getRequest(self.__name__) - self.req.progressNotify = pyfile.progress.setValue self.log = pyfile.m.core.log @@ -292,7 +291,7 @@ class Plugin(object): """ returns the content loaded """ if self.pyfile.abort: raise Abort - res = self.req.getPage(url, get=get, post=post, cookies=cookies) + res = self.req.getPage(url, get, post, ref, cookies) if self.core.debug: from inspect import currentframe frame = currentframe() @@ -335,17 +334,11 @@ class Plugin(object): name = self.pyfile.name.encode(sys.getfilesystemencoding(), "replace") filename = join(location, name) - d = self.req.httpDownload(url, filename, get=get, post=post, chunks=self.getChunkCount(), resume=self.resumeDownload) - self.pyfile.download = d - d.addProgress("percent", self.pyfile.progress.setValue) - waitFor(d) + self.req.httpDownload(url, filename, get=get, post=post, chunks=self.getChunkCount(), resume=self.resumeDownload) - if d.abort: raise Abort - - self.pyfile.download = None newname = basename(filename) - self.pyfile.size = d.size + self.pyfile.size = self.req.size if newname and newname != name: self.log.info("%(name)s saved as %(newname)s" % {"name": name, "newname": newname}) diff --git a/module/plugins/hoster/BasePlugin.py b/module/plugins/hoster/BasePlugin.py index 292570d6c..08c53a616 100644 --- a/module/plugins/hoster/BasePlugin.py +++ b/module/plugins/hoster/BasePlugin.py @@ -13,6 +13,10 @@ class BasePlugin(Hoster): __author_name__ = ("RaNaN") __author_mail__ = ("RaNaN@pyload.org") + def setup(self): + self.chunkLimit = 3 + self.resumeDownload = True + def process(self, pyfile): """main function""" |