diff options
author | Walter Purcaro <vuolter@gmail.com> | 2015-02-16 10:46:28 +0100 |
---|---|---|
committer | Walter Purcaro <vuolter@gmail.com> | 2015-02-16 10:46:28 +0100 |
commit | ce1c2b6b05c08b669357947e61ae40efce7fc50f (patch) | |
tree | 0b5f7996960cf35c4eface53a89eba18a37519b7 /pyload/network/HTTPRequest.py | |
parent | Fix filename case (diff) | |
download | pyload-ce1c2b6b05c08b669357947e61ae40efce7fc50f.tar.xz |
module temp
Diffstat (limited to 'pyload/network/HTTPRequest.py')
-rw-r--r-- | pyload/network/HTTPRequest.py | 302 |
1 files changed, 0 insertions, 302 deletions
diff --git a/pyload/network/HTTPRequest.py b/pyload/network/HTTPRequest.py deleted file mode 100644 index eac03a365..000000000 --- a/pyload/network/HTTPRequest.py +++ /dev/null @@ -1,302 +0,0 @@ -# -*- coding: utf-8 -*- -# @author: RaNaN - -from __future__ import with_statement - -import pycurl - -from codecs import getincrementaldecoder, lookup, BOM_UTF8 -from urllib import quote, urlencode -from httplib import responses -from logging import getLogger -from cStringIO import StringIO - -from pyload.plugin.Plugin import Abort, Fail - -from pyload.utils import encode - - -def myquote(url): - return quote(encode(url), safe="%/:=&?~#+!$,;'@()*[]") - -def myurlencode(data): - data = dict(data) - return urlencode(dict((encode(x), encode(y)) for x, y in data.iteritems())) - -bad_headers = range(400, 404) + range(405, 418) + range(500, 506) - -class BadHeader(Exception): - def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s" % (code, responses[int(code)])) - self.code = code - self.content = content - - -class HTTPRequest(object): - def __init__(self, cookies=None, options=None): - self.c = pycurl.Curl() - self.rep = StringIO() - - self.cj = cookies #cookiejar - - self.lastURL = None - self.lastEffectiveURL = None - self.abort = False - self.code = 0 # last http code - - self.header = "" - - self.headers = [] #temporary request header - - self.initHandle() - self.setInterface(options) - - self.c.setopt(pycurl.WRITEFUNCTION, self.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - self.log = getLogger("log") - - - def initHandle(self): - """ sets common options to curl handle """ - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.MAXREDIRS, 5) - self.c.setopt(pycurl.CONNECTTIMEOUT, 30) - self.c.setopt(pycurl.NOSIGNAL, 1) - self.c.setopt(pycurl.NOPROGRESS, 1) - if hasattr(pycurl, "AUTOREFERER"): - self.c.setopt(pycurl.AUTOREFERER, 1) - self.c.setopt(pycurl.SSL_VERIFYPEER, 0) - self.c.setopt(pycurl.LOW_SPEED_TIME, 60) - self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) - self.c.setopt(pycurl.USE_SSL, pycurl.CURLUSESSL_TRY) - - #self.c.setopt(pycurl.VERBOSE, 1) - - self.c.setopt(pycurl.USERAGENT, - "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") - if pycurl.version_info()[7]: - self.c.setopt(pycurl.ENCODING, "gzip, deflate") - self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US, en", - "Accept-Charset: ISO-8859-1, utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300", - "Expect:"]) - - def setInterface(self, options): - - interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - - if interface and interface.lower() != "none": - self.c.setopt(pycurl.INTERFACE, str(interface)) - - if proxy: - if proxy["type"] == "socks4": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) - elif proxy["type"] == "socks5": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) - else: - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - - self.c.setopt(pycurl.PROXY, str(proxy["address"])) - self.c.setopt(pycurl.PROXYPORT, proxy["port"]) - - if proxy["username"]: - self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) - - if ipv6: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) - else: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) - - if "auth" in options: - self.c.setopt(pycurl.USERPWD, str(options["auth"])) - - if "timeout" in options: - self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) - - - def addCookies(self): - """ put cookies from curl handle to cj """ - if self.cj: - self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) - - def getCookies(self): - """ add cookies from cj to curl handle """ - if self.cj: - for c in self.cj.getCookies(): - self.c.setopt(pycurl.COOKIELIST, c) - return - - def clearCookies(self): - self.c.setopt(pycurl.COOKIELIST, "") - - def setRequestContext(self, url, get, post, referer, cookies, multipart=False): - """ sets everything needed for the request """ - - url = myquote(url) - - if get: - get = urlencode(get) - url = "%s?%s" % (url, get) - - self.c.setopt(pycurl.URL, url) - self.c.lastUrl = url - - if post: - self.c.setopt(pycurl.POST, 1) - if not multipart: - if type(post) == unicode: - post = str(post) #unicode not allowed - elif type(post) == str: - pass - else: - post = myurlencode(post) - - self.c.setopt(pycurl.POSTFIELDS, post) - else: - post = [(x, encode(y)) for x, y in post.iteritems()] - self.c.setopt(pycurl.HTTPPOST, post) - else: - self.c.setopt(pycurl.POST, 0) - - if referer and self.lastURL: - self.c.setopt(pycurl.REFERER, str(self.lastURL)) - - if cookies: - self.c.setopt(pycurl.COOKIEFILE, "") - self.c.setopt(pycurl.COOKIEJAR, "") - self.getCookies() - - - def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False, follow_location=True, save_cookies=True): - """ load and returns a given page """ - - self.setRequestContext(url, get, post, referer, cookies, multipart) - - self.header = "" - - self.c.setopt(pycurl.HTTPHEADER, self.headers) - - if post: - self.c.setopt(pycurl.POST, 1) - else: - self.c.setopt(pycurl.HTTPGET, 1) - - if not follow_location: - self.c.setopt(pycurl.FOLLOWLOCATION, 0) - - if just_header: - self.c.setopt(pycurl.NOBODY, 1) - - self.c.perform() - rep = self.header if just_header else self.getResponse() - - if not follow_location: - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - - if just_header: - self.c.setopt(pycurl.NOBODY, 0) - - self.c.setopt(pycurl.POSTFIELDS, "") - self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) - self.code = self.verifyHeader() - - if save_cookies: - self.addCookies() - - if decode: - rep = self.decodeResponse(rep) - - return rep - - def verifyHeader(self): - """ raise an exceptions on bad headers """ - code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - if code in bad_headers: - #404 will NOT raise an exception - raise BadHeader(code, self.getResponse()) - return code - - def checkHeader(self): - """ check if header indicates failure""" - return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers - - def getResponse(self): - """ retrieve response from string io """ - if self.rep is None: - return "" - else: - value = self.rep.getvalue() - self.rep.close() - self.rep = StringIO() - return value - - def decodeResponse(self, rep): - """ decode with correct encoding, relies on header """ - header = self.header.splitlines() - encoding = "utf8" # default encoding - - for line in header: - line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or\ - ("text" not in line and "application" not in line): - continue - - none, delemiter, charset = line.rpartition("charset=") - if delemiter: - charset = charset.split(";") - if charset: - encoding = charset[0] - - try: - #self.log.debug("Decoded %s" % encoding ) - if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): - encoding = 'utf-8-sig' - - decoder = getincrementaldecoder(encoding)("replace") - rep = decoder.decode(rep, True) - - #TODO: html_unescape as default - - except LookupError: - self.log.debug("No Decoder foung for %s" % encoding) - - except Exception: - self.log.debug("Error when decoding string from %s." % encoding) - - return rep - - def write(self, buf): - """ writes response """ - if self.rep.tell() > 1000000 or self.abort: - rep = self.getResponse() - - if self.abort: - raise Abort - - with open("response.dump", "wb") as f: - f.write(rep) - raise Fail("Loaded url exceeded size limit") - else: - self.rep.write(buf) - - def writeHeader(self, buf): - """ writes header """ - self.header += buf - - def putHeader(self, name, value): - self.headers.append("%s: %s" % (name, value)) - - def clearHeaders(self): - self.headers = [] - - def close(self): - """ cleanup, unusable after this """ - self.rep.close() - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "c"): - self.c.close() - del self.c |