#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License,
or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, see .
@author: mkaay
"""
from urllib import urlencode
#from urlparse import urlparse
from urllib2 import Request
from urllib2 import OpenerDirector
from urllib2 import BaseHandler
from urllib2 import HTTPHandler
from urllib2 import HTTPRedirectHandler
from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
from urllib2 import HTTPDefaultErrorHandler
from urllib2 import HTTPErrorProcessor
from urllib2 import ProxyHandler
from urllib2 import URLError
from urllib2 import addinfourl
from urllib2 import _parse_proxy
from httplib import HTTPConnection
from httplib import HTTPResponse
from httplib import responses as HTTPStatusCodes
from httplib import ResponseNotReady
from cookielib import CookieJar
import socket
import socks
from MultipartPostHandler import MultipartPostHandler
DEBUG = 1
HANDLE_ERRORS = 1
class PyLoadHTTPResponse(HTTPResponse):
def __init__(self, sock, debuglevel=0, strict=0, method=None):
if method: # the httplib in python 2.3 uses the method arg
HTTPResponse.__init__(self, sock, debuglevel, method)
else: # 2.2 doesn't
HTTPResponse.__init__(self, sock, debuglevel)
self.fileno = sock.fileno
self._rbuf = ''
self._rbufsize = 8096
self._handler = None # inserted by the handler later
self._host = None # (same)
self._url = None # (same)
_raw_read = HTTPResponse.read
def close_connection(self):
self.close()
self._handler._remove_connection(self._host, close=1)
def info(self):
return self.msg
def geturl(self):
return self._url
def read(self, amt=None):
# the _rbuf test is only in this first if for speed. It's not
# logically necessary
if self._rbuf and not amt is None:
L = len(self._rbuf)
if amt > L:
amt -= L
else:
s = self._rbuf[:amt]
self._rbuf = self._rbuf[amt:]
return s
s = self._rbuf + self._raw_read(amt)
self._rbuf = ''
return s
def readline(self, limit=-1):
i = self._rbuf.find('\n')
while i < 0 and not (0 < limit <= len(self._rbuf)):
new = self._raw_read(self._rbufsize)
if not new: break
i = new.find('\n')
if i >= 0: i = i + len(self._rbuf)
self._rbuf = self._rbuf + new
if i < 0: i = len(self._rbuf)
else: i += 1
if 0 <= limit < len(self._rbuf): i = limit
data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
return data
def readlines(self, sizehint = 0):
total = 0
list = []
while 1:
line = self.readline()
if not line: break
list.append(line)
total += len(line)
if sizehint and total >= sizehint:
break
return list
@property
def code(self):
return self.status
def getcode(self):
return self.status
class PyLoadHTTPConnection(HTTPConnection):
sourceAddress = ('', 0)
socksProxy = None
response_class = PyLoadHTTPResponse
def connect(self):
if self.socksProxy:
self.sock = socks.socksocket()
t = _parse_proxy(self.socksProxy[1])
self.sock.setproxy(self.socksProxy[0], addr=t[3].split(":")[0], port=int(t[3].split(":")[1]), username=t[1], password=t[2])
else:
self.sock = socket.socket()
self.sock.settimeout(30)
self.sock.bind(self.sourceAddress)
self.sock.connect((self.host, self.port))
try:
if self._tunnel_host:
self._tunnel()
except: #python2.5
pass
class PyLoadHTTPHandler(HTTPHandler):
sourceAddress = ('', 0)
socksProxy = None
def __init__(self):
self._connections = {}
def setInterface(self, interface):
if interface is None:
interface = ""
self.sourceAddress = (interface, 0)
def setSocksProxy(self, *t):
self.socksProxy = t
def close_connection(self, host):
"""close connection to
host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
no error occurs if there is no connection to that host."""
self._remove_connection(host, close=1)
def open_connections(self):
"""return a list of connected hosts"""
return self._connections.keys()
def close_all(self):
"""close all open connections"""
for host, conn in self._connections.items():
conn.close()
self._connections = {}
def _remove_connection(self, host, close=0):
if self._connections.has_key(host):
if close: self._connections[host].close()
del self._connections[host]
def _start_connection(self, h, req):
data = ""
try:
if req.has_data():
data = req.get_data()
h.putrequest('POST', req.get_selector())
if not req.headers.has_key('Content-type'):
h.putheader('Content-type',
'application/x-www-form-urlencoded')
if not req.headers.has_key('Content-length'):
h.putheader('Content-length', '%d' % len(data))
else:
h.putrequest('GET', req.get_selector(), skip_accept_encoding=1)
except socket.error, err:
raise URLError(err)
for args in self.parent.addheaders:
h.putheader(*args)
for k, v in req.headers.items():
h.putheader(k, v)
h.endheaders()
if req.has_data():
h.send(data)
def do_open(self, http_class, req):
host = req.get_host()
if not host:
raise URLError('no host given')
try:
need_new_connection = 1
h = self._connections.get(host)
if not h is None:
try:
self._start_connection(h, req)
except socket.error, e:
r = None
else:
try: r = h.getresponse()
except ResponseNotReady, e: r = None
if r is None or r.version == 9:
# httplib falls back to assuming HTTP 0.9 if it gets a
# bad header back. This is most likely to happen if
# the socket has been closed by the server since we
# last used the connection.
if DEBUG: print "failed to re-use connection to %s" % host
h.close()
else:
if DEBUG: print "re-using connection to %s" % host
need_new_connection = 0
if need_new_connection:
if DEBUG: print "creating new connection to %s" % host
h = http_class(host)
h.sourceAddress = self.sourceAddress
h.socksProxy = self.socksProxy
self._connections[host] = h
self._start_connection(h, req)
r = h.getresponse()
except socket.error, err:
raise URLError(err)
# if not a persistent connection, don't try to reuse it
if r.will_close: self._remove_connection(host)
if DEBUG:
print "STATUS: %s, %s" % (r.status, r.reason)
r._handler = self
r._host = host
r._url = req.get_full_url()
if r.status in (200, 206) or not HANDLE_ERRORS:
return r
else:
return self.parent.error('http', req, r, r.status, r.reason, r.msg)
def http_open(self, req):
return self.do_open(PyLoadHTTPConnection, req)
class NoRedirectHandler(BaseHandler): #supress error
def http_error_302(self, req, fp, code, msg, headers):
resp = addinfourl(fp, headers, req.get_full_url())
resp.code = code
resp.msg = msg
return resp
http_error_301 = http_error_303 = http_error_307 = http_error_302
class HTTPBase():
def __init__(self, interface=None, proxies={}):
self.followRedirect = True
self.interface = interface
self.proxies = proxies
self.size = None
self.referer = None
self.cookieJar = None
self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10"
self.handler = PyLoadHTTPHandler()
self.handler.setInterface(interface)
if proxies.has_key("socks5"):
self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS5, proxies["socks5"])
elif proxies.has_key("socks4"):
self.handler.setSocksProxy(socks.PROXY_TYPE_SOCKS4, proxies["socks4"])
self.cookieJar = CookieJar()
self.debug = True
def createOpener(self, cookies=True):
opener = OpenerDirector()
opener.add_handler(self.handler)
opener.add_handler(MultipartPostHandler())
opener.add_handler(HTTPSHandler())
opener.add_handler(HTTPDefaultErrorHandler())
opener.add_handler(HTTPErrorProcessor())
if self.proxies.has_key("http") or self.proxies.has_key("https"):
opener.add_handler(ProxyHandler(self.proxies))
opener.add_handler(HTTPRedirectHandler() if self.followRedirect else NoRedirectHandler())
if cookies:
opener.add_handler(HTTPCookieProcessor(self.cookieJar))
opener.version = self.userAgent
opener.addheaders[0] = ("User-Agent", self.userAgent)
return opener
def createRequest(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}):
if get:
if isinstance(get, dict):
get = urlencode(get)
url = "%s?%s" % (url, get)
req = Request(url)
if post:
if isinstance(post, dict):
post = urlencode(post)
req.add_data(post)
req.add_header("Accept", "application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5")
if referer:
req.add_header("Referer", referer)
req.add_header("Accept-Encoding", "gzip, deflate")
for key, val in customHeaders.iteritems():
req.add_header(key, val)
return req
def getResponse(self, url, get={}, post={}, referer=None, cookies=True, customHeaders={}):
req = self.createRequest(url, get, post, referer, cookies, customHeaders)
opener = self.createOpener(cookies)
if self.debug:
print "[HTTP] ----"
print "[HTTP] creating request"
print "[HTTP] URL:", url
print "[HTTP] GET"
for key, value in get.iteritems():
print "[HTTP] \t", key, ":", value
if post:
print "[HTTP] POST"
for key, value in post.iteritems():
print "[HTTP] \t", key, ":", value
print "[HTTP] headers"
for key, value in opener.addheaders:
print "[HTTP] \t", key, ":", value
for key, value in req.headers.iteritems():
print "[HTTP] \t", key, ":", value
print "[HTTP] ----"
resp = opener.open(req)
resp.getcode = lambda: resp.code
if self.debug:
print "[HTTP] ----"
print "[HTTP] got response"
print "[HTTP] status:", resp.getcode()
print "[HTTP] headers"
for key, value in resp.info().dict.iteritems():
print "[HTTP] \t", key, ":", value
print "[HTTP] ----"
try:
self.size = int(resp.info()["Content-Length"])
except: #chunked transfer
pass
return resp
if __name__ == "__main__":
base = HTTPBase()
resp = base.getResponse("http://python.org/")
print resp.read()