diff options
author | sebdelsol <seb.morin@gmail.com> | 2015-02-23 14:18:42 +0100 |
---|---|---|
committer | sebdelsol <seb.morin@gmail.com> | 2015-02-23 14:18:42 +0100 |
commit | 9599ff3a6217aeb38c7d9d4c4257106c1ff79f1b (patch) | |
tree | f699657a22ba77f99de1dd9022d6c6020dfe919d /module/plugins/hoster/ZippyshareCom.py | |
parent | Merge pull request #1197 from immenz/dev_extract (diff) | |
download | pyload-9599ff3a6217aeb38c7d9d4c4257106c1ff79f1b.tar.xz |
Zippyshare bug #1191 correction
This version use the internal `Beatifulsoup`. When you decide to get rid of it I suggest to add a dependency to `lxml`.
I've found a way to handle deliberate errors in the JS scripts without relying on PyV8: I've added JS `try/catch` statements around zippyshare scripts.
Diffstat (limited to 'module/plugins/hoster/ZippyshareCom.py')
-rw-r--r-- | module/plugins/hoster/ZippyshareCom.py | 49 |
1 files changed, 36 insertions, 13 deletions
diff --git a/module/plugins/hoster/ZippyshareCom.py b/module/plugins/hoster/ZippyshareCom.py index 615559989..ad4688bac 100644 --- a/module/plugins/hoster/ZippyshareCom.py +++ b/module/plugins/hoster/ZippyshareCom.py @@ -5,17 +5,18 @@ import re from module.plugins.internal.CaptchaService import ReCaptcha from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo +from module.lib.BeautifulSoup import BeautifulSoup class ZippyshareCom(SimpleHoster): __name__ = "ZippyshareCom" __type__ = "hoster" - __version__ = "0.73" + __version__ = "0.74" __pattern__ = r'http://www\d{0,2}\.zippyshare\.com/v(/|iew\.jsp.*key=)(?P<KEY>[\w^_]+)' __description__ = """Zippyshare.com hoster plugin""" __license__ = "GPLv3" - __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com", "sebdelsol")] COOKIES = [("zippyshare.com", "ziplocale", "en")] @@ -46,20 +47,42 @@ class ZippyshareCom(SimpleHoster): self.error(e) else: - self.link = '/'.join(("d", self.info['pattern']['KEY'], str(self.get_checksum()), self.pyfile.name)) + self.link = self.get_link() - - def get_checksum(self): + def get_link(self): try: - b1 = eval(re.search(r'\.omg = (.+?);', self.html).group(1)) - b2 = eval(re.search(r'\* \((.+?)\)', self.html).group(1)) - checksum = b1 * b2 + 18 + # get all the scripts inside the html body + soup = BeautifulSoup(self.html) + scripts = (s.getText() for s in soup.body.findAll('script', type='text/javascript')) + + # meant to be populated with the initialization of all the DOM elements found in the scripts + initScripts = set() + + def replElementById(element): + id, attr = element.group(1), element.group(4) # attr might be None + + varName = '%s_%s' %(id, attr) + + initValues = (elt.get(attr, None) for elt in soup.findAll(id=id)) + initValues = [v for v in initValues if v is not None] + initValue = '"%s"' %initValues[-1] if initValues else 'null' + + initScripts.add('var %s = %s;' %(varName, initValue)) + return varName + + # handle all getElementById + reVar = r'document.getElementById\([\'"](\w+)[\'"]\)(\.)?(getAttribute\([\'"])?(\w+)?([\'"]\))?' + scripts = [re.sub(reVar, replElementById, script) for script in scripts] + + # add try/catch in JS to handle deliberate errors + tryJS, catchJS = u'try{', u'} catch(err){}' # '', '' to see where the script fails + scripts = ['\n'.join((tryJS, script, catchJS)) for script in scripts if script.strip()] + + # get the file's url by evaluating all the scripts + scripts = '\n'.join(list(initScripts) + scripts + ['dlbutton_href']) + return self.js.eval(scripts) except Exception: - self.error(_("Unable to calculate checksum")) - - else: - return checksum - + self.error(_("Unable to calculate the link")) getInfo = create_getInfo(ZippyshareCom) |