summaryrefslogtreecommitdiffstats
path: root/pyload/plugins/network/CurlRequest.py
diff options
context:
space:
mode:
Diffstat (limited to 'pyload/plugins/network/CurlRequest.py')
-rw-r--r--pyload/plugins/network/CurlRequest.py314
1 files changed, 314 insertions, 0 deletions
diff --git a/pyload/plugins/network/CurlRequest.py b/pyload/plugins/network/CurlRequest.py
new file mode 100644
index 000000000..4630403df
--- /dev/null
+++ b/pyload/plugins/network/CurlRequest.py
@@ -0,0 +1,314 @@
+# -*- coding: utf-8 -*-
+
+###############################################################################
+# Copyright(c) 2008-2012 pyLoad Team
+# http://www.pyload.org
+#
+# This file is part of pyLoad.
+# pyLoad is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of the
+# License, or (at your option) any later version.
+#
+# Subjected to the terms and conditions in LICENSE
+#
+# @author: RaNaN
+###############################################################################
+
+import pycurl
+
+from codecs import getincrementaldecoder, lookup, BOM_UTF8
+from urllib import quote, urlencode
+from httplib import responses
+from cStringIO import StringIO
+
+from pyload.plugins.Base import Abort
+from pyload.network.CookieJar import CookieJar
+
+from ..Request import Request, ResponseException
+
+
+def myquote(url):
+ return quote(url.encode('utf8') if isinstance(url, unicode) else url, safe="%/:=&?~#+!$,;'@()*[]")
+
+
+def myurlencode(data):
+ data = dict(data)
+ return urlencode(dict((x.encode('utf8') if isinstance(x, unicode) else x, \
+ y.encode('utf8') if isinstance(y, unicode) else y ) for x, y in data.iteritems()))
+
+
+bad_headers = range(400, 418) + range(500, 506)
+
+
+class CurlRequest(Request):
+ """ Request class based on libcurl """
+
+ __version__ = "0.1"
+
+ CONTEXT_CLASS = CookieJar
+
+ def __init__(self, *args, **kwargs):
+ self.c = pycurl.Curl()
+ Request.__init__(self, *args, **kwargs)
+
+ self.rep = StringIO()
+ self.lastURL = None
+ self.lastEffectiveURL = None
+
+ # cookiejar defines the context
+ self.cj = self.context
+
+ self.c.setopt(pycurl.WRITEFUNCTION, self.write)
+ self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+
+ # TODO: addAuth, addHeader
+
+ def initContext(self):
+ self.initHandle()
+
+ if self.config:
+ self.setInterface(self.config)
+ self.initOptions(self.config)
+
+ def initHandle(self):
+ """ sets common options to curl handle """
+
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.MAXREDIRS, 5)
+ self.c.setopt(pycurl.CONNECTTIMEOUT, 30)
+ self.c.setopt(pycurl.NOSIGNAL, 1)
+ self.c.setopt(pycurl.NOPROGRESS, 1)
+ if hasattr(pycurl, "AUTOREFERER"):
+ self.c.setopt(pycurl.AUTOREFERER, 1)
+ self.c.setopt(pycurl.SSL_VERIFYPEER, 0)
+ # Interval for low speed, detects connection loss, but can abort dl if hoster stalls the download
+ self.c.setopt(pycurl.LOW_SPEED_TIME, 45)
+ self.c.setopt(pycurl.LOW_SPEED_LIMIT, 5)
+
+ # don't save the cookies
+ self.c.setopt(pycurl.COOKIEFILE, "")
+ self.c.setopt(pycurl.COOKIEJAR, "")
+
+ #self.c.setopt(pycurl.VERBOSE, 1)
+
+ self.c.setopt(pycurl.USERAGENT,
+ "Mozilla/5.0 (Windows NT 6.1; Win64; x64;en; rv:5.0) Gecko/20110619 Firefox/5.0")
+ if pycurl.version_info()[7]:
+ self.c.setopt(pycurl.ENCODING, "gzip, deflate")
+ self.c.setopt(pycurl.HTTPHEADER, ["Accept: */*",
+ "Accept-Language: en-US,en",
+ "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7",
+ "Connection: keep-alive",
+ "Keep-Alive: 300",
+ "Expect:"])
+
+ def setInterface(self, options):
+
+ interface, proxy, ipv6 = options["interface"], options["proxies"], options["ipv6"]
+
+ if interface and interface.lower() != "none":
+ self.c.setopt(pycurl.INTERFACE, str(interface))
+
+ if proxy:
+ if proxy["type"] == "socks4":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS4)
+ elif proxy["type"] == "socks5":
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_SOCKS5)
+ else:
+ self.c.setopt(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
+
+ self.c.setopt(pycurl.PROXY, str(proxy["address"]))
+ self.c.setopt(pycurl.PROXYPORT, proxy["port"])
+
+ if proxy["username"]:
+ self.c.setopt(pycurl.PROXYUSERPWD, str("%s:%s" % (proxy["username"], proxy["password"])))
+
+ if ipv6:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_WHATEVER)
+ else:
+ self.c.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
+
+ if "timeout" in options:
+ self.c.setopt(pycurl.LOW_SPEED_TIME, options["timeout"])
+
+ def initOptions(self, options):
+ """ Sets same options as available in pycurl """
+ for k, v in options.iteritems():
+ if hasattr(pycurl, k):
+ self.c.setopt(getattr(pycurl, k), v)
+
+ def setRequestContext(self, url, get, post, referer, cookies, multipart=False):
+ """ sets everything needed for the request """
+ url = myquote(url)
+
+ if get:
+ get = urlencode(get)
+ url = "%s?%s" % (url, get)
+
+ self.c.setopt(pycurl.URL, url)
+ self.lastURL = url
+
+ if post:
+ self.c.setopt(pycurl.POST, 1)
+ if not multipart:
+ if type(post) == unicode:
+ post = str(post) #unicode not allowed
+ elif type(post) == str:
+ pass
+ else:
+ post = myurlencode(post)
+
+ self.c.setopt(pycurl.POSTFIELDS, post)
+ else:
+ post = [(x, y.encode('utf8') if type(y) == unicode else y ) for x, y in post.iteritems()]
+ self.c.setopt(pycurl.HTTPPOST, post)
+ else:
+ self.c.setopt(pycurl.POST, 0)
+
+ if referer and self.lastURL:
+ self.c.setopt(pycurl.REFERER, str(self.lastURL))
+ else:
+ self.c.setopt(pycurl.REFERER, "")
+
+ if cookies:
+ self.c.setopt(pycurl.COOKIELIST, self.cj.output())
+ else:
+ # Magic string that erases all cookies
+ self.c.setopt(pycurl.COOKIELIST, "ALL")
+
+ # TODO: remove auth again
+ if "auth" in self.options:
+ self.c.setopt(pycurl.USERPWD, str(self.options["auth"]))
+
+
+ def load(self, url, get={}, post={}, referer=True, cookies=True, just_header=False, multipart=False, decode=False):
+ """ load and returns a given page """
+
+ self.setRequestContext(url, get, post, referer, cookies, multipart)
+
+ # TODO: use http/rfc message instead
+ self.header = ""
+
+ if "header" in self.options:
+ self.c.setopt(pycurl.HTTPHEADER, self.options["header"])
+
+ if just_header:
+ self.c.setopt(pycurl.FOLLOWLOCATION, 0)
+ self.c.setopt(pycurl.NOBODY, 1) #TODO: nobody= no post?
+
+ # overwrite HEAD request, we want a common request type
+ if post:
+ self.c.setopt(pycurl.CUSTOMREQUEST, "POST")
+ else:
+ self.c.setopt(pycurl.CUSTOMREQUEST, "GET")
+
+ try:
+ self.c.perform()
+ rep = self.header
+ finally:
+ self.c.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.c.setopt(pycurl.NOBODY, 0)
+ self.c.unsetopt(pycurl.CUSTOMREQUEST)
+
+ else:
+ self.c.perform()
+ rep = self.getResponse()
+
+ self.c.setopt(pycurl.POSTFIELDS, "")
+ self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
+ self.code = self.verifyHeader()
+
+ if cookies:
+ self.parseCookies()
+
+ if decode:
+ rep = self.decodeResponse(rep)
+
+ return rep
+
+ def parseCookies(self):
+ for c in self.c.getinfo(pycurl.INFO_COOKIELIST):
+ #http://xiix.wordpress.com/2006/03/23/mozillafirefox-cookie-format
+ domain, flag, path, secure, expires, name, value = c.split("\t")
+ # http only was added in py 2.6
+ domain = domain.replace("#HttpOnly_", "")
+ self.cj.setCookie(domain, name, value, path, expires, secure)
+
+ def verifyHeader(self):
+ """ raise an exceptions on bad headers """
+ code = int(self.c.getinfo(pycurl.RESPONSE_CODE))
+ if code in bad_headers:
+ raise ResponseException(code, responses.get(code, "Unknown statuscode"))
+ return code
+
+ def getResponse(self):
+ """ retrieve response from string io """
+ if self.rep is None: return ""
+ value = self.rep.getvalue()
+ self.rep.close()
+ self.rep = StringIO()
+ return value
+
+ def decodeResponse(self, rep):
+ """ decode with correct encoding, relies on header """
+ header = self.header.splitlines()
+ encoding = "utf8" # default encoding
+
+ for line in header:
+ line = line.lower().replace(" ", "")
+ if not line.startswith("content-type:") or \
+ ("text" not in line and "application" not in line):
+ continue
+
+ none, delemiter, charset = line.rpartition("charset=")
+ if delemiter:
+ charset = charset.split(";")
+ if charset:
+ encoding = charset[0]
+
+ try:
+ #self.log.debug("Decoded %s" % encoding )
+ if lookup(encoding).name == 'utf-8' and rep.startswith(BOM_UTF8):
+ encoding = 'utf-8-sig'
+
+ decoder = getincrementaldecoder(encoding)("replace")
+ rep = decoder.decode(rep, True)
+
+ #TODO: html_unescape as default
+
+ except LookupError:
+ self.log.debug("No Decoder found for %s" % encoding)
+ except Exception:
+ self.log.debug("Error when decoding string from %s." % encoding)
+
+ return rep
+
+ def write(self, buf):
+ """ writes response """
+ if self.rep.tell() > 1000000 or self.doAbort:
+ rep = self.getResponse()
+ if self.doAbort: raise Abort()
+ f = open("response.dump", "wb")
+ f.write(rep)
+ f.close()
+ raise Exception("Loaded Url exceeded limit")
+
+ self.rep.write(buf)
+
+ def writeHeader(self, buf):
+ """ writes header """
+ self.header += buf
+
+ def reset(self):
+ self.cj.clear()
+ self.options.clear()
+
+ def close(self):
+ """ cleanup, unusable after this """
+ self.rep.close()
+ if hasattr(self, "cj"):
+ del self.cj
+ if hasattr(self, "c"):
+ self.c.close()
+ del self.c \ No newline at end of file