summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-05-31 21:56:25 +0200
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-05-31 21:56:25 +0200
commitb26daf96006e16cfca296b9f78021254b6e6fa8f (patch)
treed1166be13ba2942a68bf55656b82845708d09e91 /module
parentfixed positional captchas + availbillity over thrift (diff)
downloadpyload-b26daf96006e16cfca296b9f78021254b6e6fa8f.tar.xz
automatically try to decode all request according to http header
Diffstat (limited to 'module')
-rw-r--r--module/Utils.py2
-rw-r--r--module/network/HTTPChunk.py3
-rw-r--r--module/network/HTTPRequest.py35
-rw-r--r--module/plugins/hoster/UploadedTo.py4
4 files changed, 38 insertions, 6 deletions
diff --git a/module/Utils.py b/module/Utils.py
index 6808f6862..4605aaf75 100644
--- a/module/Utils.py
+++ b/module/Utils.py
@@ -18,7 +18,7 @@ def chmod(*args):
def decode(string):
""" decode string with utf if possible """
try:
- return string.decode("utf8", "ignore")
+ return string.decode("utf8", "replace")
except:
return string
diff --git a/module/network/HTTPChunk.py b/module/network/HTTPChunk.py
index cf77ccae6..d1d56c02b 100644
--- a/module/network/HTTPChunk.py
+++ b/module/network/HTTPChunk.py
@@ -20,7 +20,6 @@ from os import remove
from os.path import exists
from time import sleep
from re import search
-from logging import getLogger
import pycurl
@@ -139,8 +138,6 @@ class HTTPChunk(HTTPRequest):
self.BOMChecked = False
# check and remove byte order mark
- self.log = getLogger("log")
-
@property
def cj(self):
return self.p.cj
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index 598be78c7..87cd6a882 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -19,7 +19,9 @@
import pycurl
+from codecs import getincrementaldecoder
from urllib import quote, urlencode
+from logging import getLogger
from cStringIO import StringIO
from module.plugins.Plugin import Abort
@@ -55,6 +57,8 @@ class HTTPRequest():
self.c.setopt(pycurl.WRITEFUNCTION, self.write)
self.c.setopt(pycurl.HEADERFUNCTION, self.writeHeader)
+ self.log = getLogger("log")
+
def initHandle(self):
""" sets common options to curl handle """
@@ -176,6 +180,7 @@ class HTTPRequest():
self.lastEffectiveURL = self.c.getinfo(pycurl.EFFECTIVE_URL)
self.addCookies()
+ rep = self.decodeResponse(rep)
return rep
def verifyHeader(self):
@@ -193,6 +198,35 @@ class HTTPRequest():
self.rep = StringIO()
return value
+ def decodeResponse(self, rep):
+ """ decode with correct encoding, relies on header """
+ header = self.header.splitlines()
+ encoding = None
+
+ for line in header:
+ line = line.lower().replace(" ", "")
+ if not line.startswith("content-type:") or "charset" not in line or \
+ ("text" not in line and "application" not in line):
+ continue
+
+ none, delemiter, charset = line.rpartition("charset=")
+ charset = charset.split(";")
+ if charset:
+ encoding = charset[0]
+
+ if encoding:
+ try:
+ #self.log.debug("Decoded %s" % encoding )
+ decoder = getincrementaldecoder(encoding)("replace")
+ rep = decoder.decode(rep, True)
+
+ except LookupError:
+ self.log.debug("No Decoder foung for %s" % encoding)
+ except Exception:
+ self.log.debug("Error when decoding string from %s." % encoding)
+
+ return rep
+
def write(self, buf):
""" writes response """
if self.rep.tell() > 1000000 or self.abort:
@@ -224,7 +258,6 @@ class HTTPRequest():
self.c.close()
del self.c
-
if __name__ == "__main__":
url = "http://pyload.org"
c = HTTPRequest()
diff --git a/module/plugins/hoster/UploadedTo.py b/module/plugins/hoster/UploadedTo.py
index 18d886947..3e2b657d9 100644
--- a/module/plugins/hoster/UploadedTo.py
+++ b/module/plugins/hoster/UploadedTo.py
@@ -2,6 +2,8 @@
import re
+from module.utils import decode
+
from module.plugins.Hoster import Hoster
from module.network.RequestFactory import getURL
from module.plugins.Plugin import chunks
@@ -34,7 +36,7 @@ def getAPIData(urls):
post["id_%s" % i] = id
idMap[id] = url
- api = getURL("http://uploaded.to/api/filemultiple", post=post)
+ api = decode(getURL("http://uploaded.to/api/filemultiple", post=post))
result = {}