cookie handling WIP -.-

author: mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
committer: mkaay <mkaay@mkaay.de> 2010-12-22 20:33:23 +0100
commit: 0fd06af30e6ec943b6ddcfed2e2cf4cd64095309 (patch)
tree: f82c64a4504412ac848285cbf5a235e4295cb106
parent: fixed getURL (diff)
download: pyload-0fd06af30e6ec943b6ddcfed2e2cf4cd64095309.tar.xz
5 files changed, 191 insertions, 25 deletions
diff --git a/module/network/Browser.py b/module/network/Browser.py
index dc4fd11aa..b68c39c53 100644
--- a/module/network/Browser.py
+++ b/module/network/Browser.py
@@ -111,10 +111,10 @@ class Browser(object):
         self.log.warning("Browser: deprecated call 'clean'")
         print_stack()
     
-    def load(self, *args, **kwargs):
+    def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, no_post_encode=False, raw_cookies={}):
         self.log.warning("Browser: deprecated call 'load'")
         print_stack()
-        return self.getPage(*args, **kwargs)
+        return self.getPage(url, get=get, post=post, cookies=cookies)
 
     def download(self, url, file_name, folder, get={}, post={}, ref=True, cookies=True, no_post_encode=False):
         #@TODO
diff --git a/module/network/CookieJar.py b/module/network/CookieJar.py
index fc6b5e076..372e2001e 100644
--- a/module/network/CookieJar.py
+++ b/module/network/CookieJar.py
@@ -19,7 +19,7 @@
 
 from cookielib import CookieJar as PyCookieJar
 from cookielib import Cookie
-
+from time import time
 
 class CookieJar(PyCookieJar):
     def __init__(self, pluginName=None, account=None):
@@ -27,7 +27,6 @@ class CookieJar(PyCookieJar):
         self.plugin = pluginName
         self.account = account
 
-
     def getCookie(self, name):
         print "getCookie not implemented!"
         return None
@@ -38,4 +37,32 @@ class CookieJar(PyCookieJar):
                    domain_initial_dot=(domain.startswith(".")), path=path, path_specified=True,
                    secure=False, expires=None, discard=True, comment=None,
                    comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
-        self.set_cookie(c)
-\ No newline at end of file
+        self.set_cookie(c)
+
+    def add_cookie_header(self, request):
+        self._cookies_lock.acquire()
+        try:
+
+            self._policy._now = self._now = int(time())
+
+            cookies = self._cookies_for_request(request)
+
+            attrs = self._cookie_attrs(cookies)
+            print attrs
+            if attrs:
+                if not request.has_header("Cookie"):
+                    request.add_header(
+                        "Cookie", "; ".join(attrs))
+
+            # if necessary, advertise that we know RFC 2965
+            if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
+                not request.has_header("Cookie2")):
+                for cookie in cookies:
+                    if cookie.version != 1:
+                        request.add_header("Cookie2", '$Version="1"')
+                        break
+
+        finally:
+            self._cookies_lock.release()
+
+        self.clear_expired_cookies()
diff --git a/module/network/CookieRedirectHandler.py b/module/network/CookieRedirectHandler.py
new file mode 100644
index 000000000..3eeb3e711
--- /dev/null
+++ b/module/network/CookieRedirectHandler.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+
+"""
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 3 of the License,
+    or (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+    See the GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+    @author: mkaay, RaNaN
+"""
+
+from urllib2 import BaseHandler
+from urllib import addinfourl
+from urllib2 import Request
+from urlparse import urlparse, urlunparse, urljoin
+from CookieJar import CookieJar
+
+class CookieRedirectHandler(BaseHandler):
+    # maximum number of redirections to any single URL
+    # this is needed because of the state that cookies introduce
+    max_repeats = 4
+    # maximum total number of redirections (regardless of URL) before
+    # assuming we're in a loop
+    max_redirections = 10
+    
+    def __init__(self, cookiejar=None, follow=True):
+        if cookiejar is None:
+            cookiejar = CookieJar()
+        self.cookiejar = cookiejar
+        self.follow = follow
+
+    def http_request(self, request):
+        print "add", self.cookiejar
+        self.cookiejar.add_cookie_header(request)
+        return request
+
+    def http_response(self, request, response):
+        print "get", self.cookiejar
+        self.cookiejar.extract_cookies(response, request)
+        return response
+
+    def redirect_request(self, req, fp, code, msg, headers, newurl):
+        """Return a Request or None in response to a redirect.
+
+        This is called by the http_error_30x methods when a
+        redirection response is received.  If a redirection should
+        take place, return a new Request to allow http_error_30x to
+        perform the redirect.  Otherwise, raise HTTPError if no-one
+        else should try to handle this url.  Return None if you can't
+        but another Handler might.
+        """
+        m = req.get_method()
+        if (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
+            or code in (301, 302, 303) and m == "POST"):
+            # Strictly (according to RFC 2616), 301 or 302 in response
+            # to a POST MUST NOT cause a redirection without confirmation
+            # from the user (of urllib2, in this case).  In practice,
+            # essentially all clients do redirect in this case, so we
+            # do the same.
+            # be conciliant with URIs containing a space
+            newurl = newurl.replace(' ', '%20')
+            newheaders = dict((k,v) for k,v in req.headers.items()
+                              if k.lower() not in ("content-length", "content-type")
+                             )
+            req = Request(newurl,
+                           headers=newheaders,
+                           origin_req_host=req.get_origin_req_host(),
+                           unverifiable=True)
+            self.cookiejar.add_cookie_header(req)
+            print req.headers
+            return req
+        else:
+            raise HTTPError(req.get_full_url(), code, msg, headers, fp)
+
+    # Implementation note: To avoid the server sending us into an
+    # infinite loop, the request object needs to track what URLs we
+    # have already seen.  Do this by adding a handler-specific
+    # attribute to the Request object.
+    def http_error_302(self, req, fp, code, msg, headers):
+        resp = addinfourl(fp, headers, req.get_full_url())
+        resp.code = code
+        resp.msg = msg
+        self.cookiejar.extract_cookies(resp, req)
+        
+        if not self.follow:
+            return resp
+        
+        # Some servers (incorrectly) return multiple Location headers
+        # (so probably same goes for URI).  Use first header.
+        if 'location' in headers:
+            newurl = headers.getheaders('location')[0]
+        elif 'uri' in headers:
+            newurl = headers.getheaders('uri')[0]
+        else:
+            return
+
+        # fix a possible malformed URL
+        urlparts = urlparse(newurl)
+        if not urlparts.path:
+            urlparts = list(urlparts)
+            urlparts[2] = "/"
+        newurl = urlunparse(urlparts)
+
+        newurl = urljoin(req.get_full_url(), newurl)
+
+        # XXX Probably want to forget about the state of the current
+        # request, although that might interact poorly with other
+        # handlers that also use handler-specific request attributes
+        new = self.redirect_request(req, fp, code, msg, headers, newurl)
+        if new is None:
+            return
+
+        # loop detection
+        # .redirect_dict has a key url if url was previously visited.
+        if hasattr(req, 'redirect_dict'):
+            visited = new.redirect_dict = req.redirect_dict
+            if (visited.get(newurl, 0) >= self.max_repeats or
+                len(visited) >= self.max_redirections):
+                raise HTTPError(req.get_full_url(), code,
+                                self.inf_msg + msg, headers, fp)
+        else:
+            visited = new.redirect_dict = req.redirect_dict = {}
+        visited[newurl] = visited.get(newurl, 0) + 1
+
+        # Don't close the fp until we are sure that we won't use it
+        # with HTTPError.
+        fp.read()
+        fp.close()
+        return self.parent.open(new, timeout=req.timeout)
+
+    http_error_301 = http_error_303 = http_error_307 = http_error_302
+
+    inf_msg = "The HTTP server returned a redirect error that would " \
+              "lead to an infinite loop.\n" \
+              "The last 30x error message was:\n"
+
+    https_request = http_request
+    https_response = http_response
diff --git a/module/network/HTTPBase.py b/module/network/HTTPBase.py
index 4fff15335..3fd8fee49 100644
--- a/module/network/HTTPBase.py
+++ b/module/network/HTTPBase.py
@@ -18,15 +18,12 @@
 """
 
 from urllib import urlencode
-#from urlparse import urlparse
 
 from urllib2 import Request
 from urllib2 import OpenerDirector
 
 from urllib2 import BaseHandler
 from urllib2 import HTTPHandler
-from urllib2 import HTTPRedirectHandler
-from urllib2 import HTTPCookieProcessor
 from urllib2 import HTTPSHandler
 from urllib2 import HTTPDefaultErrorHandler
 from urllib2 import HTTPErrorProcessor
@@ -42,7 +39,8 @@ from httplib import HTTPResponse
 from httplib import responses as HTTPStatusCodes
 from httplib import ResponseNotReady
 
-from cookielib import CookieJar
+from CookieJar import CookieJar
+from CookieRedirectHandler import CookieRedirectHandler
 
 import socket
 import socks
@@ -261,15 +259,6 @@ class PyLoadHTTPHandler(HTTPHandler):
     def http_open(self, req):
         return self.do_open(PyLoadHTTPConnection, req)
 
-class NoRedirectHandler(BaseHandler): #supress error
-    def http_error_302(self, req, fp, code, msg, headers):
-        resp = addinfourl(fp, headers, req.get_full_url())
-        resp.code = code
-        resp.msg = msg
-        return resp
-
-    http_error_301 = http_error_303 = http_error_307 = http_error_302
-
 class HTTPBase():
     def __init__(self, interface=None, proxies={}):
         self.followRedirect = True
@@ -280,8 +269,6 @@ class HTTPBase():
         
         self.referer = None
         
-        self.cookieJar = None
-        
         self.userAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en; rv:1.9.0.8) Gecko/2009032609 Firefox/3.0.10"
         
         self.handler = PyLoadHTTPHandler()
@@ -302,11 +289,9 @@ class HTTPBase():
         opener.add_handler(HTTPSHandler())
         opener.add_handler(HTTPDefaultErrorHandler())
         opener.add_handler(HTTPErrorProcessor())
+        opener.add_handler(CookieRedirectHandler(self.cookieJar, self.followRedirect))
         if self.proxies.has_key("http") or self.proxies.has_key("https"):
             opener.add_handler(ProxyHandler(self.proxies))
-        opener.add_handler(HTTPRedirectHandler() if self.followRedirect else NoRedirectHandler())
-        if cookies:
-            opener.add_handler(HTTPCookieProcessor(self.cookieJar))
         opener.version = self.userAgent
         opener.addheaders[0] = ("User-Agent", self.userAgent)
         return opener
@@ -356,6 +341,9 @@ class HTTPBase():
                 print "[HTTP] \t", key, ":", value
             for key, value in req.headers.iteritems(): 
                 print "[HTTP] \t", key, ":", value
+            print "[HTTP] cookies"
+            from pprint import pprint
+            pprint(self.cookieJar._cookies)
             print "[HTTP] ----"
         
         resp = opener.open(req)
@@ -368,6 +356,9 @@ class HTTPBase():
             print "[HTTP] headers"
             for key, value in resp.info().dict.iteritems(): 
                 print "[HTTP] \t", key, ":", value
+            print "[HTTP] cookies"
+            from pprint import pprint
+            pprint(self.cookieJar._cookies)
             print "[HTTP] ----"
         try:
             self.size = int(resp.info()["Content-Length"])
diff --git a/module/plugins/accounts/UploadedTo.py b/module/plugins/accounts/UploadedTo.py
index fcc2daf22..9b17debf5 100644
--- a/module/plugins/accounts/UploadedTo.py
+++ b/module/plugins/accounts/UploadedTo.py
@@ -20,6 +20,7 @@
 from module.plugins.Account import Account
 import re
 from time import strptime, mktime
+from cookielib import Cookie
 
 class UploadedTo(Account):
     __name__ = "UploadedTo"
@@ -30,7 +31,7 @@ class UploadedTo(Account):
     __author_mail__ = ("mkaay@mkaay.de")
     
     def loadAccountInfo(self, user, req):
-        html = req.getPage("http://uploaded.to/?setlang=en", cookies=True)
+        html = req.getPage("http://uploaded.to/", cookies=True)
         raw_traffic = re.search(r"Traffic left: </span><span class=.*?>(.*?)</span>", html).group(1)
         raw_valid = re.search(r"Valid until: </span> <span class=.*?>(.*?)</span>", html).group(1)
         traffic = int(self.parseTraffic(raw_traffic))
@@ -40,6 +41,7 @@ class UploadedTo(Account):
         return tmp
 
     def login(self, user, data, req):
-        page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]}, cookies=True)
+        req.cookieJar.set_cookie(Cookie(version=0, name='lang', value='en', port=None, port_specified=False, domain='.uploaded.to', domain_specified=True, domain_initial_dot=True, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={}, rfc2109=False))
+        page = req.getPage("http://uploaded.to/login", post={ "email" : user, "password" : data["password"]})
         if "Login failed!" in page:
             self.wrongPassword()
author	mkaay <mkaay@mkaay.de>	2010-12-22 20:33:23 +0100
committer	mkaay <mkaay@mkaay.de>	2010-12-22 20:33:23 +0100
commit	0fd06af30e6ec943b6ddcfed2e2cf4cd64095309 (patch)
tree	f82c64a4504412ac848285cbf5a235e4295cb106
parent	fixed getURL (diff)
download	pyload-0fd06af30e6ec943b6ddcfed2e2cf4cd64095309.tar.xz