summaryrefslogtreecommitdiffstats
path: root/module
diff options
context:
space:
mode:
authorGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-06-15 17:35:48 +0200
committerGravatar RaNaN <Mast3rRaNaN@hotmail.de> 2011-06-15 17:35:48 +0200
commita0805f27015748638a5fb05fd55b746852c53362 (patch)
tree1f56b476ae3ce998f62abe1617303319b04f1c3f /module
parenthagg's rapidshare patch (diff)
downloadpyload-a0805f27015748638a5fb05fd55b746852c53362.tar.xz
html_unescape function, little plugin improvements
Diffstat (limited to 'module')
-rw-r--r--module/Utils.py27
-rw-r--r--module/network/HTTPRequest.py3
-rw-r--r--module/plugins/hoster/FreakshareCom.py5
-rw-r--r--module/plugins/hoster/MegauploadCom.py12
-rw-r--r--module/plugins/hoster/RapidshareCom.py6
-rw-r--r--module/plugins/hoster/YoutubeCom.py4
6 files changed, 47 insertions, 10 deletions
diff --git a/module/Utils.py b/module/Utils.py
index e6e40c956..cdf76c144 100644
--- a/module/Utils.py
+++ b/module/Utils.py
@@ -8,6 +8,7 @@ import time
import re
from os.path import join
from string import maketrans
+from htmlentitydefs import name2codepoint
def chmod(*args):
try:
@@ -129,6 +130,32 @@ def lock(func):
return new
+
+def fixup(m):
+ text = m.group(0)
+ if text[:2] == "&#":
+ # character reference
+ try:
+ if text[:3] == "&#x":
+ return unichr(int(text[3:-1], 16))
+ else:
+ return unichr(int(text[2:-1]))
+ except ValueError:
+ pass
+ else:
+ # named entity
+ try:
+ name = text[1:-1]
+ text = unichr(name2codepoint[name])
+ except KeyError:
+ pass
+
+ return text # leave as is
+
+def html_unescape(text):
+ """Removes HTML or XML character references and entities from a text string"""
+ return re.sub("&#?\w+;", fixup, text)
+
if __name__ == "__main__":
print freeSpace(".")
diff --git a/module/network/HTTPRequest.py b/module/network/HTTPRequest.py
index f90048f4d..7904070e8 100644
--- a/module/network/HTTPRequest.py
+++ b/module/network/HTTPRequest.py
@@ -24,6 +24,7 @@ from urllib import quote, urlencode
from logging import getLogger
from cStringIO import StringIO
+from module.utils import html_unescape
from module.plugins.Plugin import Abort
def myquote(url):
@@ -228,6 +229,8 @@ class HTTPRequest():
#self.log.debug("Decoded %s" % encoding )
decoder = getincrementaldecoder(encoding)("replace")
rep = decoder.decode(rep, True)
+
+ #TODO: html_unescape as default
except LookupError:
self.log.debug("No Decoder foung for %s" % encoding)
diff --git a/module/plugins/hoster/FreakshareCom.py b/module/plugins/hoster/FreakshareCom.py
index c20206bf5..869b8a99e 100644
--- a/module/plugins/hoster/FreakshareCom.py
+++ b/module/plugins/hoster/FreakshareCom.py
@@ -34,6 +34,11 @@ class FreakshareCom(Hoster):
self.get_file_url()
self.download(self.pyfile.url, post=self.req_opts)
+
+
+ check = self.checkDownload({"bad": "bad try"})
+ if check == "bad":
+ self.fail("Bad Try.")
def prepare(self):
diff --git a/module/plugins/hoster/MegauploadCom.py b/module/plugins/hoster/MegauploadCom.py
index fa16fdf31..342a8024d 100644
--- a/module/plugins/hoster/MegauploadCom.py
+++ b/module/plugins/hoster/MegauploadCom.py
@@ -7,7 +7,7 @@ from module.plugins.Hoster import Hoster
from module.network.RequestFactory import getURL
-from module.unescape import unescape
+from module.utils import html_unescape
from module.PyFile import statusMap
from pycurl import error
@@ -36,7 +36,7 @@ def getInfo(urls):
# File info
fileInfo = _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap)
url = urls[i]
- name = fileInfo.get('name', url)
+ name = html_unescape(fileInfo.get('name', url))
size = fileInfo.get('size', 0)
status = fileInfo.get('status', statusMap['queued'])
@@ -51,7 +51,7 @@ def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
fileInfo = {}
try:
fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
- fileInfo['name'] = apiFileDataMap['n']
+ fileInfo['name'] = apiFileDataMap['n']
fileInfo['size'] = int(apiFileDataMap['s'])
fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]
except:
@@ -215,10 +215,12 @@ class MegauploadCom(Hoster):
def get_file_name(self):
try:
- return self.api["name"]
+ name = self.api["name"]
except KeyError:
file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
- return re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
+ name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
+
+ return html_unescape(name)
def get_wait_time(self):
time = re.search(r"count=(\d+);", self.html[1])
diff --git a/module/plugins/hoster/RapidshareCom.py b/module/plugins/hoster/RapidshareCom.py
index 95f6f91f9..96fa6fd36 100644
--- a/module/plugins/hoster/RapidshareCom.py
+++ b/module/plugins/hoster/RapidshareCom.py
@@ -8,8 +8,6 @@
# * removed some (old?) comment blocks
import re
-from os import stat, remove
-from time import sleep
from module.network.RequestFactory import getURL
from module.plugins.Hoster import Hoster
@@ -99,7 +97,7 @@ class RapidshareCom(Hoster):
self.log.info(_("Rapidshare: Traffic Share (direct download)"))
self.pyfile.name = self.get_file_name()
- self.download(self.pyfile.url, get={"directstart":1}, cookies=True)
+ self.download(self.pyfile.url, get={"directstart":1})
elif self.api_data["status"] in ("0","4","5"):
self.offline()
@@ -133,7 +131,7 @@ class RapidshareCom(Hoster):
info = self.account.getAccountInfo(self.user, True)
self.log.debug("%s: Use Premium Account" % self.__name__)
url = self.api_data["mirror"]
- self.download(url, get={"directstart":1}, cookies=True)
+ self.download(url, get={"directstart":1})
def download_api_data(self, force=False):
diff --git a/module/plugins/hoster/YoutubeCom.py b/module/plugins/hoster/YoutubeCom.py
index 1b8cf6b4b..908869236 100644
--- a/module/plugins/hoster/YoutubeCom.py
+++ b/module/plugins/hoster/YoutubeCom.py
@@ -3,6 +3,8 @@
import re
import urllib
+
+from module.utils import html_unescape
from module.plugins.Hoster import Hoster
class YoutubeCom(Hoster):
@@ -89,6 +91,6 @@ class YoutubeCom(Hoster):
if fmt in self.formats:
file_suffix = self.formats[fmt][0]
name = re.search(file_name_pattern, html).group(1).replace("/", "") + file_suffix
- pyfile.name = name #.replace("&amp;", "&").replace("ö", "oe").replace("ä", "ae").replace("ü", "ue")
+ pyfile.name = html_unescape(name)
self.download(fmt_dict[fmt])