summaryrefslogtreecommitdiffstats
path: root/module/network
diff options
context:
space:
mode:
Diffstat (limited to 'module/network')
-rw-r--r--module/network/Browser.py22
-rw-r--r--module/network/HTTPChunk.py2
-rw-r--r--module/network/HTTPDownload.py16
-rw-r--r--module/network/HTTPRequest.py44
-rw-r--r--module/network/RequestFactory.py6
5 files changed, 63 insertions, 27 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py
index 822e2ed6d..23cf7666b 100644
--- a/module/network/Browser.py
+++ b/module/network/Browser.py
@@ -8,7 +8,6 @@ from HTTPDownload import HTTPDownload
class Browser(object):
-
__slots__ = ("log", "options", "bucket", "cj", "_size", "http", "dl")
def __init__(self, bucket=None, options={}):
@@ -20,9 +19,14 @@ class Browser(object):
self.cj = None # needs to be setted later
self._size = 0
- self.http = HTTPRequest(self.cj, options)
+ self.renewHTTPRequest()
self.dl = None
+
+ def renewHTTPRequest(self):
+ if hasattr(self, "http"): self.http.close()
+ self.http = HTTPRequest(self.cj, self.options)
+
def setLastURL(self, val):
self.http.lastURL = val
@@ -80,7 +84,7 @@ class Browser(object):
""" this can also download ftp """
self._size = 0
self.dl = HTTPDownload(url, filename, get, post, self.lastEffectiveURL if ref else None,
- self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition)
+ self.cj if cookies else None, self.bucket, self.options, progressNotify, disposition)
name = self.dl.download(chunks, resume)
self._size = self.dl.size
@@ -96,6 +100,18 @@ class Browser(object):
""" add a header to the request """
self.http.putHeader(name, value)
+ def addAuth(self, pwd):
+ """Adds user and pw for http auth
+
+ :param pwd: string, user:password
+ """
+ self.options["auth"] = pwd
+ self.renewHTTPRequest() #we need a new request
+
+ def removeAuth(self):
+ if "auth" in self.options: del self.options["auth"]
+ self.renewHTTPRequest()
+
def clearHeaders(self):
self.http.clearHeaders()
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index 680b982d3..69eedb19c 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -137,7 +137,7 @@ class HTTPChunk(HTTPRequest):
self.fp = None #file handle
self.initHandle()
- self.setInterface(self.p.options["interface"], self.p.options["proxies"], self.p.options["ipv6"])
+ self.setInterface(self.p.options)
self.BOMChecked = False # check and remove byte order mark
diff --git a/module/network/HTTPDownload.py b/module/network/HTTPDownload.py
index 3edf56d98..1a2886332 100644
--- a/module/network/HTTPDownload.py
+++ b/module/network/HTTPDownload.py
@@ -136,8 +136,7 @@ class HTTPDownload():
#remove old handles
for chunk in self.chunks:
- self.m.remove_handle(chunk.c)
- chunk.close()
+ self.closeChunk(chunk)
return self._download(chunks, False)
else:
@@ -211,7 +210,7 @@ class HTTPDownload():
curl, errno, msg = c
#test if chunk was finished, otherwise raise the exception
if errno != 23 or "0 !=" not in msg:
- raise
+ raise pycurl.error(errno, msg)
#@TODO KeyBoardInterrupts are seen as finished chunks,
#but normally not handled to this process, only in the testcase
@@ -266,11 +265,18 @@ class HTTPDownload():
if self.progressNotify:
self.progressNotify(self.percent)
+ def closeChunk(self, chunk):
+ try:
+ self.m.remove_handle(chunk.c)
+ except pycurl.error:
+ self.log.debug("Error removing chunk")
+ finally:
+ chunk.close()
+
def close(self):
""" cleanup """
for chunk in self.chunks:
- self.m.remove_handle(chunk.c)
- chunk.close()
+ self.closeChunk(chunk)
self.chunks = []
if hasattr(self, "m"):
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index d8d57e76f..6672a58e6 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -19,7 +19,7 @@
import pycurl
-from codecs import getincrementaldecoder
+from codecs import getincrementaldecoder, lookup, BOM_UTF8
from urllib import quote, urlencode
from httplib import responses
from logging import getLogger
@@ -28,11 +28,12 @@ from cStringIO import StringIO
from module.plugins.Plugin import Abort
def myquote(url):
- return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
+ return quote(url, safe="%/:=&?~#+!$,;'@()*[]")
+
class BadHeader(Exception):
def __init__(self, code, content=""):
- Exception.__init__(self, "Bad server response: %s %s"% (code, responses[int(code)]))
+ Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)]))
self.code = code
self.content = content
@@ -54,7 +55,7 @@ class HTTPRequest():
self.headers = [] #temporary request header
self.initHandle()
- self.setInterface(options["interface"], options["proxies"], options["ipv6"])
+ self.setInterface(options)
self.c.setopt(pycurl.WRITEFUNCTION, self.write)
self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
@@ -77,16 +78,21 @@ class HTTPRequest():
#self.c.setopt(pycurl.VERBOSE, 1)
- self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
+ self.c.setopt(pycurl.USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
if pycurl.version_info()[7]:
self.c.setopt(pycurl.ENCODING, "gzip, deflate")
self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
- "Accept-Language: en-US,en",
- "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
- "Connection: keep-alive",
- "Keep-Alive: 300"])
+ "Accept-Language: en-US,en",
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
+ "Connection: keep-alive",
+ "Keep-Alive: 300",
+ "Expect:"])
+
+ def setInterface(self, options):
+
+ interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"]
- def setInterface(self, interface, proxy, ipv6=False):
if interface and interface.lower() != "none":
self.c.setopt(pycurl.INTERFACE, str(interface))
@@ -97,7 +103,7 @@ class HTTPRequest():
self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
else:
self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
-
+
self.c.setopt(pycurl.PROXY, str(proxy["address"]))
self.c.setopt(pycurl.PROXYPORT, proxy["port"])
@@ -109,6 +115,9 @@ class HTTPRequest():
else:
self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+ if "auth" in options:
+ self.c.setopt(pycurl.USERPWD, str(options["auth"]))
+
def addCookies(self):
""" put cookies from curl handle to cj """
if self.cj:
@@ -145,14 +154,14 @@ class HTTPRequest():
pass
else:
post = urlencode(post)
-
+
self.c.setopt(pycurl.POSTFIELDS, post)
else:
- post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x,y in post.iteritems()]
+ post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x, y in post.iteritems()]
self.c.setopt(pycurl.HTTPPOST, post)
else:
self.c.setopt(pycurl.POST, 0)
-
+
if referer and self.lastURL:
self.c.setopt(pycurl.REFERER, str(self.lastURL))
@@ -198,7 +207,7 @@ class HTTPRequest():
def verifyHeader(self):
""" raise an exceptions on bad headers """
code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
- if code in range(400,404) or code in range(405,418) or code in range(500,506):
+ if code in range(400, 404) or code in range(405, 418) or code in range(500, 506):
#404 will NOT raise an exception
raise BadHeader(code, self.getResponse())
return code
@@ -218,7 +227,7 @@ class HTTPRequest():
for line in header:
line = line.lower().replace(" ", "")
- if not line.startswith("content-type:") or \
+ if not line.startswith("content-type:") or\
("text" not in line and "application" not in line):
continue
@@ -230,6 +239,9 @@ class HTTPRequest():
try:
#self.log.debug("Decoded %s" % encoding )
+ if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8):
+ encoding = 'utf-8-sig'
+
decoder = getincrementaldecoder(encoding)("replace")
rep = decoder.decode(rep, True)
diff --git a/module/network/RequestFactory.py b/module/network/RequestFactory.py
index 774249a70..5b1528281 100644
--- a/module/network/RequestFactory.py
+++ b/module/network/RequestFactory.py
@@ -54,9 +54,11 @@ class RequestFactory():
self.lock.release()
return req
- def getHTTPRequest(self):
+ def getHTTPRequest(self, **kwargs):
""" returns a http request, dont forget to close it ! """
- return HTTPRequest(CookieJar(None), self.getOptions())
+ options = self.getOptions()
+ options.update(kwargs) # submit kwargs as additional options
+ return HTTPRequest(CookieJar(None), options)
def getURL(self, *args, **kwargs):
""" see HTTPRequest for argument list """