diff options
Diffstat (limited to 'module/network')
-rw-r--r-- | module/network/Browser.py | 22 | ||||
-rw-r--r-- | module/network/HTTPChunk.py | 2 | ||||
-rw-r--r-- | module/network/HTTPDownload.py | 16 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 44 | ||||
-rw-r--r-- | module/network/RequestFactory.py | 6 |
5 files changed, 63 insertions, 27 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py index 822e2ed6d..23cf7666b 100644 --- a/module/network/Browser.py +++ b/module/network/Browser.py @@ -8,7 +8,6 @@ from HTTPDownload import HTTPDownload class Browser(object): - __slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl") def __init__(self, bucket=None, options={}): @@ -20,9 +19,14 @@ class Browser(object): self.cj = None # needs to be setted later self._size = 0 - self.http = HTTPRequest(self.cj, options) + self.renewHTTPRequest() self.dl = None + + def renewHTTPRequest(self): + if hasattr(self, "http"): self.http.close() + self.http = HTTPRequest(self.cj, self.options) + def setLastURL(self, val): self.http.lastURL = val @@ -80,7 +84,7 @@ class Browser(object): """ this can also download ftp """ self._size = 0 self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None, - self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) + self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition) name = self.dl.download(chunks, resume) self._size = self.dl.size @@ -96,6 +100,18 @@ class Browser(object): """ add a header to the request """ self.http.putHeader(name, value) + def addAuth(self, pwd): + """Adds user and pw for http auth + + :param pwd: string, user:password + """ + self.options["auth"] = pwd + self.renewHTTPRequest() #we need a new request + + def removeAuth(self): + if "auth" in self.options: del self.options["auth"] + self.renewHTTPRequest() + def clearHeaders(self): self.http.clearHeaders() diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index 680b982d3..69eedb19c 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -137,7 +137,7 @@ class HTTPChunk(HTTPRequest): self.fp = None #file handle self.initHandle() - self.setInterface(self.p.options["interface"], self.p.options["proxies"], self.p.options["ipv6"]) + self.setInterface(self.p.options) self.BOMChecked = False # check and remove byte order mark diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py index 3edf56d98..1a2886332 100644 --- a/module/network/HTTPDownload.py +++ b/module/network/HTTPDownload.py @@ -136,8 +136,7 @@ class HTTPDownload(): #remove old handles for chunk in self.chunks: - self.m.remove_handle(chunk.c) - chunk.close() + self.closeChunk(chunk) return self._download(chunks, False) else: @@ -211,7 +210,7 @@ class HTTPDownload(): curl, errno, msg = c #test if chunk was finished, otherwise raise the exception if errno != 23 or "0 !=" not in msg: - raise + raise pycurl.error(errno, msg) #@TODO KeyBoardInterrupts are seen as finished chunks, #but normally not handled to this process, only in the testcase @@ -266,11 +265,18 @@ class HTTPDownload(): if self.progressNotify: self.progressNotify(self.percent) + def closeChunk(self, chunk): + try: + self.m.remove_handle(chunk.c) + except pycurl.error: + self.log.debug("Error removing chunk") + finally: + chunk.close() + def close(self): """ cleanup """ for chunk in self.chunks: - self.m.remove_handle(chunk.c) - chunk.close() + self.closeChunk(chunk) self.chunks = [] if hasattr(self, "m"): diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index d8d57e76f..6672a58e6 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -19,7 +19,7 @@ import pycurl -from codecs import getincrementaldecoder +from codecs import getincrementaldecoder, lookup, BOM_UTF8 from urllib import quote, urlencode from httplib import responses from logging import getLogger @@ -28,11 +28,12 @@ from cStringIO import StringIO from module.plugins.Plugin import Abort def myquote(url): - return quote(url, safe="%/:=&?~#+!$,;'@()*[]") + return quote(url, safe="%/:=&?~#+!$,;'@()*[]") + class BadHeader(Exception): def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s"% (code, responses[int(code)])) + Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) self.code = code self.content = content @@ -54,7 +55,7 @@ class HTTPRequest(): self.headers = [] #temporary request header self.initHandle() - self.setInterface(options["interface"], options["proxies"], options["ipv6"]) + self.setInterface(options) self.c.setopt(pycurl.WRITEFUNCTION, self.write) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) @@ -77,16 +78,21 @@ class HTTPRequest(): #self.c.setopt(pycurl.VERBOSE, 1) - self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") + self.c.setopt(pycurl.USERAGENT, + "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") if pycurl.version_info()[7]: self.c.setopt(pycurl.ENCODING, "gzip, deflate") self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US,en", - "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300"]) + "Accept-Language: en-US,en", + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Connection: keep-alive", + "Keep-Alive: 300", + "Expect:"]) + + def setInterface(self, options): + + interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - def setInterface(self, interface, proxy, ipv6=False): if interface and interface.lower() != "none": self.c.setopt(pycurl.INTERFACE, str(interface)) @@ -97,7 +103,7 @@ class HTTPRequest(): self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) else: self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - + self.c.setopt(pycurl.PROXY, str(proxy["address"])) self.c.setopt(pycurl.PROXYPORT, proxy["port"]) @@ -109,6 +115,9 @@ class HTTPRequest(): else: self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + if "auth" in options: + self.c.setopt(pycurl.USERPWD, str(options["auth"])) + def addCookies(self): """ put cookies from curl handle to cj """ if self.cj: @@ -145,14 +154,14 @@ class HTTPRequest(): pass else: post = urlencode(post) - + self.c.setopt(pycurl.POSTFIELDS, post) else: - post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x,y in post.iteritems()] + post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x, y in post.iteritems()] self.c.setopt(pycurl.HTTPPOST, post) else: self.c.setopt(pycurl.POST, 0) - + if referer and self.lastURL: self.c.setopt(pycurl.REFERER, str(self.lastURL)) @@ -198,7 +207,7 @@ class HTTPRequest(): def verifyHeader(self): """ raise an exceptions on bad headers """ code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in range(400,404) or code in range(405,418) or code in range(500,506): + if code in range(400, 404) or code in range(405, 418) or code in range(500, 506): #404 will NOT raise an exception raise BadHeader(code, self.getResponse()) return code @@ -218,7 +227,7 @@ class HTTPRequest(): for line in header: line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or \ + if not line.startswith("content-type:") or\ ("text" not in line and "application" not in line): continue @@ -230,6 +239,9 @@ class HTTPRequest(): try: #self.log.debug("Decoded %s" % encoding ) + if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): + encoding = 'utf-8-sig' + decoder = getincrementaldecoder(encoding)("replace") rep = decoder.decode(rep, True) diff --git a/module/network/RequestFactory.py b/module/network/RequestFactory.py index 774249a70..5b1528281 100644 --- a/module/network/RequestFactory.py +++ b/module/network/RequestFactory.py @@ -54,9 +54,11 @@ class RequestFactory(): self.lock.release() return req - def getHTTPRequest(self): + def getHTTPRequest(self, **kwargs): """ returns a http request, dont forget to close it ! """ - return HTTPRequest(CookieJar(None), self.getOptions()) + options = self.getOptions() + options.update(kwargs) # submit kwargs as additional options + return HTTPRequest(CookieJar(None), options) def getURL(self, *args, **kwargs): """ see HTTPRequest for argument list """ |