diff options
author | mkaay <mkaay@mkaay.de> | 2010-12-22 20:33:23 +0100 |
---|---|---|
committer | mkaay <mkaay@mkaay.de> | 2010-12-22 20:33:23 +0100 |
commit | 0fd06af30e6ec943b6ddcfed2e2cf4cd64095309 (patch) | |
tree | f82c64a4504412ac848285cbf5a235e4295cb106 | |
parent | fixed getURL (diff) | |
download | pyload-0fd06af30e6ec943b6ddcfed2e2cf4cd64095309.tar.xz |
cookie handling WIP -.-
-rw-r--r-- | module/network/Browser.py | 4 | ||||
-rw-r--r-- | module/network/CookieJar.py | 33 | ||||
-rw-r--r-- | module/network/CookieRedirectHandler.py | 146 | ||||
-rw-r--r-- | module/network/HTTPBase.py | 27 | ||||
-rw-r--r-- | module/plugins/accounts/UploadedTo.py | 6 |
5 files changed, 191 insertions, 25 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py index dc4fd11aa..b68c39c53 100644 --- a/module/network/Browser.py +++ b/module/network/Browser.py @@ -111,10 +111,10 @@ class Browser(object): self.log.warning("Browser: deprecated call 'clean'") print_stack() - def load(self, *args, **kwargs): + def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, no_post_encode=False, raw_cookies={}): self.log.warning("Browser: deprecated call 'load'") print_stack() - return self.getPage(*args, **kwargs) + return self.getPage(url, get=get, post=post, cookies=cookies) def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False): #@TODO diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py index fc6b5e076..372e2001e 100644 --- a/module/network/CookieJar.py +++ b/module/network/CookieJar.py @@ -19,7 +19,7 @@ from cookielib import CookieJar as PyCookieJar from cookielib import Cookie - +from time import time class CookieJar(PyCookieJar): def __init__(self, pluginName=None, account=None): @@ -27,7 +27,6 @@ class CookieJar(PyCookieJar): self.plugin = pluginName self.account = account - def getCookie(self, name): print "getCookie not implemented!" return None @@ -38,4 +37,32 @@ class CookieJar(PyCookieJar): domain_initial_dot=(domain.startswith(".")), path=path, path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) - self.set_cookie(c)
\ No newline at end of file + self.set_cookie(c) + + def add_cookie_header(self, request): + self._cookies_lock.acquire() + try: + + self._policy._now = self._now = int(time()) + + cookies = self._cookies_for_request(request) + + attrs = self._cookie_attrs(cookies) + print attrs + if attrs: + if not request.has_header("Cookie"): + request.add_header( + "Cookie", "; ".join(attrs)) + + # if necessary, advertise that we know RFC 2965 + if (self._policy.rfc2965 and not self._policy.hide_cookie2 and + not request.has_header("Cookie2")): + for cookie in cookies: + if cookie.version != 1: + request.add_header("Cookie2", '$Version="1"') + break + + finally: + self._cookies_lock.release() + + self.clear_expired_cookies() diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py new file mode 100644 index 000000000..3eeb3e711 --- /dev/null +++ b/module/network/CookieRedirectHandler.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +""" + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see <http://www.gnu.org/licenses/>. + + @author: mkaay, RaNaN +""" + +from urllib2 import BaseHandler +from urllib import addinfourl +from urllib2 import Request +from urlparse import urlparse, urlunparse, urljoin +from CookieJar import CookieJar + +class CookieRedirectHandler(BaseHandler): + # maximum number of redirections to any single URL + # this is needed because of the state that cookies introduce + max_repeats = 4 + # maximum total number of redirections (regardless of URL) before + # assuming we're in a loop + max_redirections = 10 + + def __init__(self, cookiejar=None, follow=True): + if cookiejar is None: + cookiejar = CookieJar() + self.cookiejar = cookiejar + self.follow = follow + + def http_request(self, request): + print "add", self.cookiejar + self.cookiejar.add_cookie_header(request) + return request + + def http_response(self, request, response): + print "get", self.cookiejar + self.cookiejar.extract_cookies(response, request) + return response + + def redirect_request(self, req, fp, code, msg, headers, newurl): + """Return a Request or None in response to a redirect. + + This is called by the http_error_30x methods when a + redirection response is received. If a redirection should + take place, return a new Request to allow http_error_30x to + perform the redirect. Otherwise, raise HTTPError if no-one + else should try to handle this url. Return None if you can't + but another Handler might. + """ + m = req.get_method() + if (code in (301, 302, 303, 307) and m in ("GET", "HEAD") + or code in (301, 302, 303) and m == "POST"): + # Strictly (according to RFC 2616), 301 or 302 in response + # to a POST MUST NOT cause a redirection without confirmation + # from the user (of urllib2, in this case). In practice, + # essentially all clients do redirect in this case, so we + # do the same. + # be conciliant with URIs containing a space + newurl = newurl.replace(' ', '%20') + newheaders = dict((k,v) for k,v in req.headers.items() + if k.lower() not in ("content-length", "content-type") + ) + req = Request(newurl, + headers=newheaders, + origin_req_host=req.get_origin_req_host(), + unverifiable=True) + self.cookiejar.add_cookie_header(req) + print req.headers + return req + else: + raise HTTPError(req.get_full_url(), code, msg, headers, fp) + + # Implementation note: To avoid the server sending us into an + # infinite loop, the request object needs to track what URLs we + # have already seen. Do this by adding a handler-specific + # attribute to the Request object. + def http_error_302(self, req, fp, code, msg, headers): + resp = addinfourl(fp, headers, req.get_full_url()) + resp.code = code + resp.msg = msg + self.cookiejar.extract_cookies(resp, req) + + if not self.follow: + return resp + + # Some servers (incorrectly) return multiple Location headers + # (so probably same goes for URI). Use first header. + if 'location' in headers: + newurl = headers.getheaders('location')[0] + elif 'uri' in headers: + newurl = headers.getheaders('uri')[0] + else: + return + + # fix a possible malformed URL + urlparts = urlparse(newurl) + if not urlparts.path: + urlparts = list(urlparts) + urlparts[2] = "/" + newurl = urlunparse(urlparts) + + newurl = urljoin(req.get_full_url(), newurl) + + # XXX Probably want to forget about the state of the current + # request, although that might interact poorly with other + # handlers that also use handler-specific request attributes + new = self.redirect_request(req, fp, code, msg, headers, newurl) + if new is None: + return + + # loop detection + # .redirect_dict has a key url if url was previously visited. + if hasattr(req, 'redirect_dict'): + visited = new.redirect_dict = req.redirect_dict + if (visited.get(newurl, 0) >= self.max_repeats or + len(visited) >= self.max_redirections): + raise HTTPError(req.get_full_url(), code, + self.inf_msg + msg, headers, fp) + else: + visited = new.redirect_dict = req.redirect_dict = {} + visited[newurl] = visited.get(newurl, 0) + 1 + + # Don't close the fp until we are sure that we won't use it + # with HTTPError. + fp.read() + fp.close() + return self.parent.open(new, timeout=req.timeout) + + http_error_301 = http_error_303 = http_error_307 = http_error_302 + + inf_msg = "The HTTP server returned a redirect error that would " \ + "lead to an infinite loop.\n" \ + "The last 30x error message was:\n" + + https_request = http_request + https_response = http_response diff --git a/module/network/HTTPBase.py b/module/network/HTTPBase.py index 4fff15335..3fd8fee49 100644 --- a/module/network/HTTPBase.py +++ b/module/network/HTTPBase.py @@ -18,15 +18,12 @@ """ from urllib import urlencode -#from urlparse import urlparse from urllib2 import Request from urllib2 import OpenerDirector from urllib2 import BaseHandler from urllib2 import HTTPHandler -from urllib2 import HTTPRedirectHandler -from urllib2 import HTTPCookieProcessor from urllib2 import HTTPSHandler from urllib2 import HTTPDefaultErrorHandler from urllib2 import HTTPErrorProcessor @@ -42,7 +39,8 @@ from httplib import HTTPResponse from httplib import responses as HTTPStatusCodes from httplib import ResponseNotReady -from cookielib import CookieJar +from CookieJar import CookieJar +from CookieRedirectHandler import CookieRedirectHandler import socket import socks @@ -261,15 +259,6 @@ class PyLoadHTTPHandler(HTTPHandler): def http_open(self, req): return self.do_open(PyLoadHTTPConnection, req) -class NoRedirectHandler(BaseHandler): #supress error - def http_error_302(self, req, fp, code, msg, headers): - resp = addinfourl(fp, headers, req.get_full_url()) - resp.code = code - resp.msg = msg - return resp - - http_error_301 = http_error_303 = http_error_307 = http_error_302 - class HTTPBase(): def __init__(self, interface=None, proxies={}): self.followRedirect = True @@ -280,8 +269,6 @@ class HTTPBase(): self.referer = None - self.cookieJar = None - self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10" self.handler = PyLoadHTTPHandler() @@ -302,11 +289,9 @@ class HTTPBase(): opener.add_handler(HTTPSHandler()) opener.add_handler(HTTPDefaultErrorHandler()) opener.add_handler(HTTPErrorProcessor()) + opener.add_handler(CookieRedirectHandler(self.cookieJar, self.followRedirect)) if self.proxies.has_key("http") or self.proxies.has_key("https"): opener.add_handler(ProxyHandler(self.proxies)) - opener.add_handler(HTTPRedirectHandler() if self.followRedirect else NoRedirectHandler()) - if cookies: - opener.add_handler(HTTPCookieProcessor(self.cookieJar)) opener.version = self.userAgent opener.addheaders[0] = ("User-Agent", self.userAgent) return opener @@ -356,6 +341,9 @@ class HTTPBase(): print "[HTTP] \t", key, ":", value for key, value in req.headers.iteritems(): print "[HTTP] \t", key, ":", value + print "[HTTP] cookies" + from pprint import pprint + pprint(self.cookieJar._cookies) print "[HTTP] ----" resp = opener.open(req) @@ -368,6 +356,9 @@ class HTTPBase(): print "[HTTP] headers" for key, value in resp.info().dict.iteritems(): print "[HTTP] \t", key, ":", value + print "[HTTP] cookies" + from pprint import pprint + pprint(self.cookieJar._cookies) print "[HTTP] ----" try: self.size = int(resp.info()["Content-Length"]) diff --git a/module/plugins/accounts/UploadedTo.py b/module/plugins/accounts/UploadedTo.py index fcc2daf22..9b17debf5 100644 --- a/module/plugins/accounts/UploadedTo.py +++ b/module/plugins/accounts/UploadedTo.py @@ -20,6 +20,7 @@ from module.plugins.Account import Account import re from time import strptime, mktime +from cookielib import Cookie class UploadedTo(Account): __name__ = "UploadedTo" @@ -30,7 +31,7 @@ class UploadedTo(Account): __author_mail__ = ("mkaay@mkaay.de") def loadAccountInfo(self, user, req): - html = req.getPage("http://uploaded.to/?setlang=en", cookies=True) + html = req.getPage("http://uploaded.to/", cookies=True) raw_traffic = re.search(r"Traffic left: </span><span class=.*?>(.*?)</span>", html).group(1) raw_valid = re.search(r"Valid until: </span> <span class=.*?>(.*?)</span>", html).group(1) traffic = int(self.parseTraffic(raw_traffic)) @@ -40,6 +41,7 @@ class UploadedTo(Account): return tmp def login(self, user, data, req): - page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]}, cookies=True) + req.cookieJar.set_cookie(Cookie(version=0, name='lang', value='en', port=None, port_specified=False, domain='.uploaded.to', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False)) + page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]}) if "Login failed!" in page: self.wrongPassword() |