diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-05-31 21:56:25 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-05-31 21:56:25 +0200 |
commit | b26daf96006e16cfca296b9f78021254b6e6fa8f (patch) | |
tree | d1166be13ba2942a68bf55656b82845708d09e91 | |
parent | fixed positional captchas + availbillity over thrift (diff) | |
download | pyload-b26daf96006e16cfca296b9f78021254b6e6fa8f.tar.xz |
automatically try to decode all request according to http header
-rw-r--r-- | module/Utils.py | 2 | ||||
-rw-r--r-- | module/network/HTTPChunk.py | 3 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 35 | ||||
-rw-r--r-- | module/plugins/hoster/UploadedTo.py | 4 |
4 files changed, 38 insertions, 6 deletions
diff --git a/module/Utils.py b/module/Utils.py index 6808f6862..4605aaf75 100644 --- a/module/Utils.py +++ b/module/Utils.py @@ -18,7 +18,7 @@ def chmod(*args): def decode(string): """ decode string with utf if possible """ try: - return string.decode("utf8", "ignore") + return string.decode("utf8", "replace") except: return string diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py index cf77ccae6..d1d56c02b 100644 --- a/module/network/HTTPChunk.py +++ b/module/network/HTTPChunk.py @@ -20,7 +20,6 @@ from os import remove from os.path import exists from time import sleep from re import search -from logging import getLogger import pycurl @@ -139,8 +138,6 @@ class HTTPChunk(HTTPRequest): self.BOMChecked = False # check and remove byte order mark - self.log = getLogger("log") - @property def cj(self): return self.p.cj diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index 598be78c7..87cd6a882 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -19,7 +19,9 @@ import pycurl +from codecs import getincrementaldecoder from urllib import quote, urlencode +from logging import getLogger from cStringIO import StringIO from module.plugins.Plugin import Abort @@ -55,6 +57,8 @@ class HTTPRequest(): self.c.setopt(pycurl.WRITEFUNCTION, self.write) self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader) + self.log = getLogger("log") + def initHandle(self): """ sets common options to curl handle """ @@ -176,6 +180,7 @@ class HTTPRequest(): self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL) self.addCookies() + rep = self.decodeResponse(rep) return rep def verifyHeader(self): @@ -193,6 +198,35 @@ class HTTPRequest(): self.rep = StringIO() return value + def decodeResponse(self, rep): + """ decode with correct encoding, relies on header """ + header = self.header.splitlines() + encoding = None + + for line in header: + line = line.lower().replace(" ", "") + if not line.startswith("content-type:") or "charset" not in line or \ + ("text" not in line and "application" not in line): + continue + + none, delemiter, charset = line.rpartition("charset=") + charset = charset.split(";") + if charset: + encoding = charset[0] + + if encoding: + try: + #self.log.debug("Decoded %s" % encoding ) + decoder = getincrementaldecoder(encoding)("replace") + rep = decoder.decode(rep, True) + + except LookupError: + self.log.debug("No Decoder foung for %s" % encoding) + except Exception: + self.log.debug("Error when decoding string from %s." % encoding) + + return rep + def write(self, buf): """ writes response """ if self.rep.tell() > 1000000 or self.abort: @@ -224,7 +258,6 @@ class HTTPRequest(): self.c.close() del self.c - if __name__ == "__main__": url = "http://pyload.org" c = HTTPRequest() diff --git a/module/plugins/hoster/UploadedTo.py b/module/plugins/hoster/UploadedTo.py index 18d886947..3e2b657d9 100644 --- a/module/plugins/hoster/UploadedTo.py +++ b/module/plugins/hoster/UploadedTo.py @@ -2,6 +2,8 @@ import re +from module.utils import decode + from module.plugins.Hoster import Hoster from module.network.RequestFactory import getURL from module.plugins.Plugin import chunks @@ -34,7 +36,7 @@ def getAPIData(urls): post["id_%s" % i] = id idMap[id] = url - api = getURL("http://uploaded.to/api/filemultiple", post=post) + api = decode(getURL("http://uploaded.to/api/filemultiple", post=post)) result = {} |