diff options
Diffstat (limited to 'module/network/HTTPRequest.py')
-rw-r--r-- | module/network/HTTPRequest.py | 44 |
1 files changed, 28 insertions, 16 deletions
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index d8d57e76f..6672a58e6 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -19,7 +19,7 @@ import pycurl -from codecs import getincrementaldecoder +from codecs import getincrementaldecoder, lookup, BOM_UTF8 from urllib import quote, urlencode from httplib import responses from logging import getLogger @@ -28,11 +28,12 @@ from cStringIO import StringIO from module.plugins.Plugin import Abort def myquote(url): - return quote(url, safe="%/:=&?~#+!$,;'@()*[]") + return quote(url, safe="%/:=&?~#+!$,;'@()*[]") + class BadHeader(Exception): def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s"% (code, responses[int(code)])) + Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) self.code = code self.content = content @@ -54,7 +55,7 @@ class HTTPRequest(): self.headers = [] #temporary request header self.initHandle() - self.setInterface(options["interface"], options["proxies"], options["ipv6"]) + self.setInterface(options) self.c.setopt(pycurl.WRITEFUNCTION, self.write) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) @@ -77,16 +78,21 @@ class HTTPRequest(): #self.c.setopt(pycurl.VERBOSE, 1) - self.c.setopt(pycurl.USERAGENT, "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") + self.c.setopt(pycurl.USERAGENT, + "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") if pycurl.version_info()[7]: self.c.setopt(pycurl.ENCODING, "gzip, deflate") self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US,en", - "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300"]) + "Accept-Language: en-US,en", + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", + "Connection: keep-alive", + "Keep-Alive: 300", + "Expect:"]) + + def setInterface(self, options): + + interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - def setInterface(self, interface, proxy, ipv6=False): if interface and interface.lower() != "none": self.c.setopt(pycurl.INTERFACE, str(interface)) @@ -97,7 +103,7 @@ class HTTPRequest(): self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) else: self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - + self.c.setopt(pycurl.PROXY, str(proxy["address"])) self.c.setopt(pycurl.PROXYPORT, proxy["port"]) @@ -109,6 +115,9 @@ class HTTPRequest(): else: self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) + if "auth" in options: + self.c.setopt(pycurl.USERPWD, str(options["auth"])) + def addCookies(self): """ put cookies from curl handle to cj """ if self.cj: @@ -145,14 +154,14 @@ class HTTPRequest(): pass else: post = urlencode(post) - + self.c.setopt(pycurl.POSTFIELDS, post) else: - post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x,y in post.iteritems()] + post = [(x, str(quote(y)) if type(y) in (str, unicode) else y ) for x, y in post.iteritems()] self.c.setopt(pycurl.HTTPPOST, post) else: self.c.setopt(pycurl.POST, 0) - + if referer and self.lastURL: self.c.setopt(pycurl.REFERER, str(self.lastURL)) @@ -198,7 +207,7 @@ class HTTPRequest(): def verifyHeader(self): """ raise an exceptions on bad headers """ code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in range(400,404) or code in range(405,418) or code in range(500,506): + if code in range(400, 404) or code in range(405, 418) or code in range(500, 506): #404 will NOT raise an exception raise BadHeader(code, self.getResponse()) return code @@ -218,7 +227,7 @@ class HTTPRequest(): for line in header: line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or \ + if not line.startswith("content-type:") or\ ("text" not in line and "application" not in line): continue @@ -230,6 +239,9 @@ class HTTPRequest(): try: #self.log.debug("Decoded %s" % encoding ) + if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): + encoding = 'utf-8-sig' + decoder = getincrementaldecoder(encoding)("replace") rep = decoder.decode(rep, True) |