diff options
author | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-06-15 17:35:48 +0200 |
---|---|---|
committer | RaNaN <Mast3rRaNaN@hotmail.de> | 2011-06-15 17:35:48 +0200 |
commit | a0805f27015748638a5fb05fd55b746852c53362 (patch) | |
tree | 1f56b476ae3ce998f62abe1617303319b04f1c3f | |
parent | hagg's rapidshare patch (diff) | |
download | pyload-a0805f27015748638a5fb05fd55b746852c53362.tar.xz |
html_unescape function, little plugin improvements
-rw-r--r-- | module/Utils.py | 27 | ||||
-rw-r--r-- | module/network/HTTPRequest.py | 3 | ||||
-rw-r--r-- | module/plugins/hoster/FreakshareCom.py | 5 | ||||
-rw-r--r-- | module/plugins/hoster/MegauploadCom.py | 12 | ||||
-rw-r--r-- | module/plugins/hoster/RapidshareCom.py | 6 | ||||
-rw-r--r-- | module/plugins/hoster/YoutubeCom.py | 4 |
6 files changed, 47 insertions, 10 deletions
diff --git a/module/Utils.py b/module/Utils.py index e6e40c956..cdf76c144 100644 --- a/module/Utils.py +++ b/module/Utils.py @@ -8,6 +8,7 @@ import time import re from os.path import join from string import maketrans +from htmlentitydefs import name2codepoint def chmod(*args): try: @@ -129,6 +130,32 @@ def lock(func): return new + +def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + name = text[1:-1] + text = unichr(name2codepoint[name]) + except KeyError: + pass + + return text # leave as is + +def html_unescape(text): + """Removes HTML or XML character references and entities from a text string""" + return re.sub("&#?\w+;", fixup, text) + if __name__ == "__main__": print freeSpace(".") diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py index f90048f4d..7904070e8 100644 --- a/module/network/HTTPRequest.py +++ b/module/network/HTTPRequest.py @@ -24,6 +24,7 @@ from urllib import quote, urlencode from logging import getLogger from cStringIO import StringIO +from module.utils import html_unescape from module.plugins.Plugin import Abort def myquote(url): @@ -228,6 +229,8 @@ class HTTPRequest(): #self.log.debug("Decoded %s" % encoding ) decoder = getincrementaldecoder(encoding)("replace") rep = decoder.decode(rep, True) + + #TODO: html_unescape as default except LookupError: self.log.debug("No Decoder foung for %s" % encoding) diff --git a/module/plugins/hoster/FreakshareCom.py b/module/plugins/hoster/FreakshareCom.py index c20206bf5..869b8a99e 100644 --- a/module/plugins/hoster/FreakshareCom.py +++ b/module/plugins/hoster/FreakshareCom.py @@ -34,6 +34,11 @@ class FreakshareCom(Hoster): self.get_file_url()
self.download(self.pyfile.url, post=self.req_opts)
+
+
+ check = self.checkDownload({"bad": "bad try"})
+ if check == "bad":
+ self.fail("Bad Try.")
def prepare(self):
diff --git a/module/plugins/hoster/MegauploadCom.py b/module/plugins/hoster/MegauploadCom.py index fa16fdf31..342a8024d 100644 --- a/module/plugins/hoster/MegauploadCom.py +++ b/module/plugins/hoster/MegauploadCom.py @@ -7,7 +7,7 @@ from module.plugins.Hoster import Hoster from module.network.RequestFactory import getURL
-from module.unescape import unescape
+from module.utils import html_unescape
from module.PyFile import statusMap
from pycurl import error
@@ -36,7 +36,7 @@ def getInfo(urls): # File info
fileInfo = _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap)
url = urls[i]
- name = fileInfo.get('name', url)
+ name = html_unescape(fileInfo.get('name', url))
size = fileInfo.get('size', 0)
status = fileInfo.get('status', statusMap['queued'])
@@ -51,7 +51,7 @@ def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap): fileInfo = {}
try:
fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
- fileInfo['name'] = apiFileDataMap['n']
+ fileInfo['name'] = apiFileDataMap['n']
fileInfo['size'] = int(apiFileDataMap['s'])
fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]
except:
@@ -215,10 +215,12 @@ class MegauploadCom(Hoster): def get_file_name(self):
try:
- return self.api["name"]
+ name = self.api["name"]
except KeyError:
file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
- return re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
+ name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
+
+ return html_unescape(name)
def get_wait_time(self):
time = re.search(r"count=(\d+);", self.html[1])
diff --git a/module/plugins/hoster/RapidshareCom.py b/module/plugins/hoster/RapidshareCom.py index 95f6f91f9..96fa6fd36 100644 --- a/module/plugins/hoster/RapidshareCom.py +++ b/module/plugins/hoster/RapidshareCom.py @@ -8,8 +8,6 @@ # * removed some (old?) comment blocks import re -from os import stat, remove -from time import sleep from module.network.RequestFactory import getURL from module.plugins.Hoster import Hoster @@ -99,7 +97,7 @@ class RapidshareCom(Hoster): self.log.info(_("Rapidshare: Traffic Share (direct download)")) self.pyfile.name = self.get_file_name() - self.download(self.pyfile.url, get={"directstart":1}, cookies=True) + self.download(self.pyfile.url, get={"directstart":1}) elif self.api_data["status"] in ("0","4","5"): self.offline() @@ -133,7 +131,7 @@ class RapidshareCom(Hoster): info = self.account.getAccountInfo(self.user, True) self.log.debug("%s: Use Premium Account" % self.__name__) url = self.api_data["mirror"] - self.download(url, get={"directstart":1}, cookies=True) + self.download(url, get={"directstart":1}) def download_api_data(self, force=False): diff --git a/module/plugins/hoster/YoutubeCom.py b/module/plugins/hoster/YoutubeCom.py index 1b8cf6b4b..908869236 100644 --- a/module/plugins/hoster/YoutubeCom.py +++ b/module/plugins/hoster/YoutubeCom.py @@ -3,6 +3,8 @@ import re import urllib + +from module.utils import html_unescape from module.plugins.Hoster import Hoster class YoutubeCom(Hoster): @@ -89,6 +91,6 @@ class YoutubeCom(Hoster): if fmt in self.formats: file_suffix = self.formats[fmt][0] name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix - pyfile.name = name #.replace("&", "&").replace("ö", "oe").replace("ä", "ae").replace("ü", "ue") + pyfile.name = html_unescape(name) self.download(fmt_dict[fmt]) |