summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGravatar mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
committerGravatar mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
commit0fd06af30e6ec943b6ddcfed2e2cf4cd64095309 (patch)
treef82c64a4504412ac848285cbf5a235e4295cb106
parentfixed getURL (diff)
downloadpyload-0fd06af30e6ec943b6ddcfed2e2cf4cd64095309.tar.xz
cookie handling WIP -.-
-rw-r--r--module/network/Browser.py4
-rw-r--r--module/network/CookieJar.py33
-rw-r--r--module/network/CookieRedirectHandler.py146
-rw-r--r--module/network/HTTPBase.py27
-rw-r--r--module/plugins/accounts/UploadedTo.py6
5 files changed, 191 insertions, 25 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py
index dc4fd11aa..b68c39c53 100644
--- a/module/network/Browser.py
+++ b/module/network/Browser.py
@@ -111,10 +111,10 @@ class Browser(object):
self.log.warning("Browser: deprecated call 'clean'")
print_stack()
- def load(self, *args, **kwargs):
+ def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, no_post_encode=False, raw_cookies={}):
self.log.warning("Browser: deprecated call 'load'")
print_stack()
- return self.getPage(*args, **kwargs)
+ return self.getPage(url, get=get, post=post, cookies=cookies)
def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False):
#@TODO
diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py
index fc6b5e076..372e2001e 100644
--- a/module/network/CookieJar.py
+++ b/module/network/CookieJar.py
@@ -19,7 +19,7 @@
from cookielib import CookieJar as PyCookieJar
from cookielib import Cookie
-
+from time import time
class CookieJar(PyCookieJar):
def __init__(self, pluginName=None, account=None):
@@ -27,7 +27,6 @@ class CookieJar(PyCookieJar):
self.plugin = pluginName
self.account = account
-
def getCookie(self, name):
print "getCookie not implemented!"
return None
@@ -38,4 +37,32 @@ class CookieJar(PyCookieJar):
domain_initial_dot=(domain.startswith(".")), path=path, path_specified=True,
secure=False, expires=None, discard=True, comment=None,
comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
- self.set_cookie(c) \ No newline at end of file
+ self.set_cookie(c)
+
+ def add_cookie_header(self, request):
+ self._cookies_lock.acquire()
+ try:
+
+ self._policy._now = self._now = int(time())
+
+ cookies = self._cookies_for_request(request)
+
+ attrs = self._cookie_attrs(cookies)
+ print attrs
+ if attrs:
+ if not request.has_header("Cookie"):
+ request.add_header(
+ "Cookie", "; ".join(attrs))
+
+ # if necessary, advertise that we know RFC 2965
+ if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
+ not request.has_header("Cookie2")):
+ for cookie in cookies:
+ if cookie.version != 1:
+ request.add_header("Cookie2", '$Version="1"')
+ break
+
+ finally:
+ self._cookies_lock.release()
+
+ self.clear_expired_cookies()
diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py
new file mode 100644
index 000000000..3eeb3e711
--- /dev/null
+++ b/module/network/CookieRedirectHandler.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+"""
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+ @author: mkaay, RaNaN
+"""
+
+from urllib2 import BaseHandler
+from urllib import addinfourl
+from urllib2 import Request
+from urlparse import urlparse, urlunparse, urljoin
+from CookieJar import CookieJar
+
+class CookieRedirectHandler(BaseHandler):
+ # maximum number of redirections to any single URL
+ # this is needed because of the state that cookies introduce
+ max_repeats = 4
+ # maximum total number of redirections (regardless of URL) before
+ # assuming we're in a loop
+ max_redirections = 10
+
+ def __init__(self, cookiejar=None, follow=True):
+ if cookiejar is None:
+ cookiejar = CookieJar()
+ self.cookiejar = cookiejar
+ self.follow = follow
+
+ def http_request(self, request):
+ print "add", self.cookiejar
+ self.cookiejar.add_cookie_header(request)
+ return request
+
+ def http_response(self, request, response):
+ print "get", self.cookiejar
+ self.cookiejar.extract_cookies(response, request)
+ return response
+
+ def redirect_request(self, req, fp, code, msg, headers, newurl):
+ """Return a Request or None in response to a redirect.
+
+ This is called by the http_error_30x methods when a
+ redirection response is received. If a redirection should
+ take place, return a new Request to allow http_error_30x to
+ perform the redirect. Otherwise, raise HTTPError if no-one
+ else should try to handle this url. Return None if you can't
+ but another Handler might.
+ """
+ m = req.get_method()
+ if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+ or code in (301, 302, 303) and m == "POST"):
+ # Strictly (according to RFC 2616), 301 or 302 in response
+ # to a POST MUST NOT cause a redirection without confirmation
+ # from the user (of urllib2, in this case). In practice,
+ # essentially all clients do redirect in this case, so we
+ # do the same.
+ # be conciliant with URIs containing a space
+ newurl = newurl.replace(' ', '%20')
+ newheaders = dict((k,v) for k,v in req.headers.items()
+ if k.lower() not in ("content-length", "content-type")
+ )
+ req = Request(newurl,
+ headers=newheaders,
+ origin_req_host=req.get_origin_req_host(),
+ unverifiable=True)
+ self.cookiejar.add_cookie_header(req)
+ print req.headers
+ return req
+ else:
+ raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+ # Implementation note: To avoid the server sending us into an
+ # infinite loop, the request object needs to track what URLs we
+ # have already seen. Do this by adding a handler-specific
+ # attribute to the Request object.
+ def http_error_302(self, req, fp, code, msg, headers):
+ resp = addinfourl(fp, headers, req.get_full_url())
+ resp.code = code
+ resp.msg = msg
+ self.cookiejar.extract_cookies(resp, req)
+
+ if not self.follow:
+ return resp
+
+ # Some servers (incorrectly) return multiple Location headers
+ # (so probably same goes for URI). Use first header.
+ if 'location' in headers:
+ newurl = headers.getheaders('location')[0]
+ elif 'uri' in headers:
+ newurl = headers.getheaders('uri')[0]
+ else:
+ return
+
+ # fix a possible malformed URL
+ urlparts = urlparse(newurl)
+ if not urlparts.path:
+ urlparts = list(urlparts)
+ urlparts[2] = "/"
+ newurl = urlunparse(urlparts)
+
+ newurl = urljoin(req.get_full_url(), newurl)
+
+ # XXX Probably want to forget about the state of the current
+ # request, although that might interact poorly with other
+ # handlers that also use handler-specific request attributes
+ new = self.redirect_request(req, fp, code, msg, headers, newurl)
+ if new is None:
+ return
+
+ # loop detection
+ # .redirect_dict has a key url if url was previously visited.
+ if hasattr(req, 'redirect_dict'):
+ visited = new.redirect_dict = req.redirect_dict
+ if (visited.get(newurl, 0) >= self.max_repeats or
+ len(visited) >= self.max_redirections):
+ raise HTTPError(req.get_full_url(), code,
+ self.inf_msg + msg, headers, fp)
+ else:
+ visited = new.redirect_dict = req.redirect_dict = {}
+ visited[newurl] = visited.get(newurl, 0) + 1
+
+ # Don't close the fp until we are sure that we won't use it
+ # with HTTPError.
+ fp.read()
+ fp.close()
+ return self.parent.open(new, timeout=req.timeout)
+
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+ inf_msg = "The HTTP server returned a redirect error that would " \
+ "lead to an infinite loop.\n" \
+ "The last 30x error message was:\n"
+
+ https_request = http_request
+ https_response = http_response
diff --git a/module/network/HTTPBase.py b/module/network/HTTPBase.py
index 4fff15335..3fd8fee49 100644
--- a/module/network/HTTPBase.py
+++ b/module/network/HTTPBase.py
@@ -18,15 +18,12 @@
"""
from urllib import urlencode
-#from urlparse import urlparse
from urllib2 import Request
from urllib2 import OpenerDirector
from urllib2 import BaseHandler
from urllib2 import HTTPHandler
-from urllib2 import HTTPRedirectHandler
-from urllib2 import HTTPCookieProcessor
from urllib2 import HTTPSHandler
from urllib2 import HTTPDefaultErrorHandler
from urllib2 import HTTPErrorProcessor
@@ -42,7 +39,8 @@ from httplib import HTTPResponse
from httplib import responses as HTTPStatusCodes
from httplib import ResponseNotReady
-from cookielib import CookieJar
+from CookieJar import CookieJar
+from CookieRedirectHandler import CookieRedirectHandler
import socket
import socks
@@ -261,15 +259,6 @@ class PyLoadHTTPHandler(HTTPHandler):
def http_open(self, req):
return self.do_open(PyLoadHTTPConnection, req)
-class NoRedirectHandler(BaseHandler): #supress error
- def http_error_302(self, req, fp, code, msg, headers):
- resp = addinfourl(fp, headers, req.get_full_url())
- resp.code = code
- resp.msg = msg
- return resp
-
- http_error_301 = http_error_303 = http_error_307 = http_error_302
-
class HTTPBase():
def __init__(self, interface=None, proxies={}):
self.followRedirect = True
@@ -280,8 +269,6 @@ class HTTPBase():
self.referer = None
- self.cookieJar = None
-
self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10"
self.handler = PyLoadHTTPHandler()
@@ -302,11 +289,9 @@ class HTTPBase():
opener.add_handler(HTTPSHandler())
opener.add_handler(HTTPDefaultErrorHandler())
opener.add_handler(HTTPErrorProcessor())
+ opener.add_handler(CookieRedirectHandler(self.cookieJar, self.followRedirect))
if self.proxies.has_key("http") or self.proxies.has_key("https"):
opener.add_handler(ProxyHandler(self.proxies))
- opener.add_handler(HTTPRedirectHandler() if self.followRedirect else NoRedirectHandler())
- if cookies:
- opener.add_handler(HTTPCookieProcessor(self.cookieJar))
opener.version = self.userAgent
opener.addheaders[0] = ("User-Agent", self.userAgent)
return opener
@@ -356,6 +341,9 @@ class HTTPBase():
print "[HTTP] \t", key, ":", value
for key, value in req.headers.iteritems():
print "[HTTP] \t", key, ":", value
+ print "[HTTP] cookies"
+ from pprint import pprint
+ pprint(self.cookieJar._cookies)
print "[HTTP] ----"
resp = opener.open(req)
@@ -368,6 +356,9 @@ class HTTPBase():
print "[HTTP] headers"
for key, value in resp.info().dict.iteritems():
print "[HTTP] \t", key, ":", value
+ print "[HTTP] cookies"
+ from pprint import pprint
+ pprint(self.cookieJar._cookies)
print "[HTTP] ----"
try:
self.size = int(resp.info()["Content-Length"])
diff --git a/module/plugins/accounts/UploadedTo.py b/module/plugins/accounts/UploadedTo.py
index fcc2daf22..9b17debf5 100644
--- a/module/plugins/accounts/UploadedTo.py
+++ b/module/plugins/accounts/UploadedTo.py
@@ -20,6 +20,7 @@
from module.plugins.Account import Account
import re
from time import strptime, mktime
+from cookielib import Cookie
class UploadedTo(Account):
__name__ = "UploadedTo"
@@ -30,7 +31,7 @@ class UploadedTo(Account):
__author_mail__ = ("mkaay@mkaay.de")
def loadAccountInfo(self, user, req):
- html = req.getPage("http://uploaded.to/?setlang=en", cookies=True)
+ html = req.getPage("http://uploaded.to/", cookies=True)
raw_traffic = re.search(r"Traffic left: </span><span class=.*?>(.*?)</span>", html).group(1)
raw_valid = re.search(r"Valid until: </span> <span class=.*?>(.*?)</span>", html).group(1)
traffic = int(self.parseTraffic(raw_traffic))
@@ -40,6 +41,7 @@ class UploadedTo(Account):
return tmp
def login(self, user, data, req):
- page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]}, cookies=True)
+ req.cookieJar.set_cookie(Cookie(version=0, name='lang', value='en', port=None, port_specified=False, domain='.uploaded.to', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False))
+ page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]})
if "Login failed!" in page:
self.wrongPassword()