diff options
Diffstat (limited to 'module/network/HTTPRequest.py')
-rw-r--r-- | module/network/HTTPRequest.py | 324 |
1 files changed, 0 insertions, 324 deletions
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py deleted file mode 100644 index 874da368b..000000000 --- a/module/network/HTTPRequest.py +++ /dev/null @@ -1,324 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -""" - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - See the GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, see <http://www.gnu.org/licenses/>. - - @author: RaNaN -""" - -import pycurl - -from codecs import getincrementaldecoder, lookup, BOM_UTF8 -from urllib import quote, urlencode -from httplib import responses -from logging import getLogger -from cStringIO import StringIO - -from module.plugins.Base import Abort - -def myquote(url): - return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]") - -def myurlencode(data): - data = dict(data) - return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \ - y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems())) - -bad_headers = range(400, 404) + range(405, 418) + range(500, 506) - -class BadHeader(Exception): - def __init__(self, code, content=""): - Exception.__init__(self, "Bad server response: %s %s" % (code, responses.get(int(code), "Unknown Header"))) - self.code = code - self.content = content - - -class HTTPRequest(): - def __init__(self, cookies=None, options=None): - self.c = pycurl.Curl() - self.rep = StringIO() - - self.cj = cookies #cookiejar - - self.lastURL = None - self.lastEffectiveURL = None - self.abort = False - self.code = 0 # last http code - - self.header = "" - - self.headers = [] #temporary request header - - self.initHandle() - self.setInterface(options) - self.setOptions(options) - - self.c.setopt(pycurl.WRITEFUNCTION, self.write) - self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) - - self.log = getLogger("log") - - - def initHandle(self): - """ sets common options to curl handle """ - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.MAXREDIRS, 5) - self.c.setopt(pycurl.CONNECTTIMEOUT, 30) - self.c.setopt(pycurl.NOSIGNAL, 1) - self.c.setopt(pycurl.NOPROGRESS, 1) - if hasattr(pycurl, "AUTOREFERER"): - self.c.setopt(pycurl.AUTOREFERER, 1) - self.c.setopt(pycurl.SSL_VERIFYPEER, 0) - # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download - self.c.setopt(pycurl.LOW_SPEED_TIME, 45) - self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5) - - #self.c.setopt(pycurl.VERBOSE, 1) - - self.c.setopt(pycurl.USERAGENT, - "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0") - if pycurl.version_info()[7]: - self.c.setopt(pycurl.ENCODING, "gzip, deflate") - self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*", - "Accept-Language: en-US,en", - "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7", - "Connection: keep-alive", - "Keep-Alive: 300", - "Expect:"]) - - def setInterface(self, options): - - interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"] - - if interface and interface.lower() != "none": - self.c.setopt(pycurl.INTERFACE, str(interface)) - - if proxy: - if proxy["type"] == "socks4": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4) - elif proxy["type"] == "socks5": - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5) - else: - self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP) - - self.c.setopt(pycurl.PROXY, str(proxy["address"])) - self.c.setopt(pycurl.PROXYPORT, proxy["port"]) - - if proxy["username"]: - self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"]))) - - if ipv6: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER) - else: - self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4) - - if "auth" in options: - self.c.setopt(pycurl.USERPWD, str(options["auth"])) - - if "timeout" in options: - self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"]) - - def setOptions(self, options): - """ Sets same options as available in pycurl """ - for k, v in options.iteritems(): - if hasattr(pycurl, k): - self.c.setopt(getattr(pycurl, k), v) - - def addCookies(self): - """ put cookies from curl handle to cj """ - if self.cj: - self.cj.addCookies(self.c.getinfo(pycurl.INFO_COOKIELIST)) - - def getCookies(self): - """ add cookies from cj to curl handle """ - if self.cj: - for c in self.cj.getCookies(): - self.c.setopt(pycurl.COOKIELIST, c) - return - - def clearCookies(self): - self.c.setopt(pycurl.COOKIELIST, "") - - def setRequestContext(self, url, get, post, referer, cookies, multipart=False): - """ sets everything needed for the request """ - - url = myquote(url) - - if get: - get = urlencode(get) - url = "%s?%s" % (url, get) - - self.c.setopt(pycurl.URL, url) - self.lastURL = url - - if post: - self.c.setopt(pycurl.POST, 1) - if not multipart: - if type(post) == unicode: - post = str(post) #unicode not allowed - elif type(post) == str: - pass - else: - post = myurlencode(post) - - self.c.setopt(pycurl.POSTFIELDS, post) - else: - post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()] - self.c.setopt(pycurl.HTTPPOST, post) - else: - self.c.setopt(pycurl.POST, 0) - - if referer and self.lastURL: - self.c.setopt(pycurl.REFERER, str(self.lastURL)) - - if cookies: - self.c.setopt(pycurl.COOKIEFILE, "") - self.c.setopt(pycurl.COOKIEJAR, "") - self.getCookies() - - - def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False): - """ load and returns a given page """ - - self.setRequestContext(url, get, post, referer, cookies, multipart) - - # TODO: use http/rfc message instead - self.header = "" - - self.c.setopt(pycurl.HTTPHEADER, self.headers) - - if just_header: - self.c.setopt(pycurl.FOLLOWLOCATION, 0) - self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post? - - # overwrite HEAD request, we want a common request type - if post: - self.c.setopt(pycurl.CUSTOMREQUEST, "POST") - else: - self.c.setopt(pycurl.CUSTOMREQUEST, "GET") - - try: - self.c.perform() - rep = self.header - finally: - self.c.setopt(pycurl.FOLLOWLOCATION, 1) - self.c.setopt(pycurl.NOBODY, 0) - self.c.unsetopt(pycurl.CUSTOMREQUEST) - - else: - self.c.perform() - rep = self.getResponse() - - self.c.setopt(pycurl.POSTFIELDS, "") - self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) - self.code = self.verifyHeader() - - self.addCookies() - - if decode: - rep = self.decodeResponse(rep) - - return rep - - def verifyHeader(self): - """ raise an exceptions on bad headers """ - code = int(self.c.getinfo(pycurl.RESPONSE_CODE)) - # TODO: raise anyway to be consistent, also rename exception - if code in bad_headers: - #404 will NOT raise an exception - raise BadHeader(code, self.getResponse()) - return code - - def checkHeader(self): - """ check if header indicates failure""" - return int(self.c.getinfo(pycurl.RESPONSE_CODE)) not in bad_headers - - def getResponse(self): - """ retrieve response from string io """ - if self.rep is None: return "" - value = self.rep.getvalue() - self.rep.close() - self.rep = StringIO() - return value - - def decodeResponse(self, rep): - """ decode with correct encoding, relies on header """ - header = self.header.splitlines() - encoding = "utf8" # default encoding - - for line in header: - line = line.lower().replace(" ", "") - if not line.startswith("content-type:") or\ - ("text" not in line and "application" not in line): - continue - - none, delemiter, charset = line.rpartition("charset=") - if delemiter: - charset = charset.split(";") - if charset: - encoding = charset[0] - - try: - #self.log.debug("Decoded %s" % encoding ) - if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8): - encoding = 'utf-8-sig' - - decoder = getincrementaldecoder(encoding)("replace") - rep = decoder.decode(rep, True) - - #TODO: html_unescape as default - - except LookupError: - self.log.debug("No Decoder found for %s" % encoding) - except Exception: - self.log.debug("Error when decoding string from %s." % encoding) - - return rep - - def write(self, buf): - """ writes response """ - if self.rep.tell() > 1000000 or self.abort: - rep = self.getResponse() - if self.abort: raise Abort() - f = open("response.dump", "wb") - f.write(rep) - f.close() - raise Exception("Loaded Url exceeded limit") - - self.rep.write(buf) - - def writeHeader(self, buf): - """ writes header """ - self.header += buf - - def putHeader(self, name, value): - self.headers.append("%s: %s" % (name, value)) - - def clearHeaders(self): - self.headers = [] - - def close(self): - """ cleanup, unusable after this """ - self.rep.close() - if hasattr(self, "cj"): - del self.cj - if hasattr(self, "c"): - self.c.close() - del self.c - -if __name__ == "__main__": - url = "http://pyload.org" - c = HTTPRequest() - print c.load(url) - |