From 7c28259f92c2b3c608583ff128a5ae4134d4c48f Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 30 Dec 2009 17:33:14 +0100 Subject: moved captcha stuff, extended serienjunkies, some other stuff --- module/plugins/decrypter/SerienjunkiesOrg.py | 178 +++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 13 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 46f380857..a73779dd3 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,6 +6,25 @@ from time import sleep from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup +from module.download_thread import CaptchaError + +from htmlentitydefs import name2codepoint as n2cp +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def decode_htmlentities(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, string)[0] + + class SerienjunkiesOrg(Plugin): def __init__(self, parent): Plugin.__init__(self, parent) @@ -13,7 +32,7 @@ class SerienjunkiesOrg(Plugin): props['name'] = "SerienjunkiesOrg" props['type'] = "container" props['pattern'] = r"http://.*?serienjunkies.org/.*?" - props['version'] = "0.1" + props['version'] = "0.2" props['description'] = """serienjunkies.org Container Plugin""" props['author_name'] = ("mkaay") props['author_mail'] = ("mkaay@mkaay.de") @@ -21,6 +40,43 @@ class SerienjunkiesOrg(Plugin): self.parent = parent self.html = None self.multi_dl = False + + self.hosterMap = { + "rc": "RapidshareCom", + "ff": "FilefactoryCom", + "ut": "UploadedTo", + "ul": "UploadedTo", + "nl": "NetloadIn", + "rs": "RapidshareDe" + } + self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) + episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") + if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): + self.decryptNow = False + else: + self.decryptNow = True + + def prepare(self, thread): + pyfile = self.parent + + self.want_reconnect = False + + pyfile.status.exists = self.file_exists() + + if not pyfile.status.exists: + raise Exception, "File not found" + return False + + pyfile.status.filename = self.get_file_name() + + pyfile.status.waituntil = self.time_plus_wait + pyfile.status.url = self.get_file_url() + pyfile.status.want_reconnect = self.want_reconnect + + thread.wait(self.parent) + + return True def getSJSrc(self, url): src = self.req.load(str(url)) @@ -31,9 +87,88 @@ class SerienjunkiesOrg(Plugin): def file_exists(self): return True + def waitForCaptcha(self, captchaData, imgType): + captchaManager = self.parent.core.captchaManager + task = captchaManager.newTask(self) + task.setCaptcha(captchaData, imgType) + task.setWaiting() + while not task.getStatus() == "done": + if not self.parent.core.isGUIConnected(): + task.removeTask() + raise CaptchaError + sleep(1) + result = task.getResult() + task.removeTask() + return result + + def handleSeason(self, url): + src = self.getSJSrc(url) + soup = BeautifulSoup(src) + post = soup.find("div", attrs={"class": "post-content"}) + ps = post.findAll("p") + hosterPattern = re.compile("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_.*?\.html$") + preferredHoster = self.get_config("preferredHoster").split(",") + self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) + groups = {} + gid = -1 + seasonName = soup.find("a", attrs={"rel":"bookmark"}).string + for p in ps: + if re.search("Dauer|Sprache|Format", str(p)): + var = p.findAll("strong") + opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} + for v in var: + n = decode_htmlentities(v.string) + val = v.nextSibling + val = val.encode("utf-8") + val = decode_htmlentities(val) + val = val.replace(" |", "") + n = n.strip() + n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) + val = val.strip() + val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) + opts[n.strip()] = val.strip() + gid += 1 + groups[gid] = {} + groups[gid]["ep"] = [] + groups[gid]["opts"] = opts + elif re.search("Download:", str(p)): + links1 = p.findAll("a", attrs={"href": hosterPattern}) + links2 = p.findAll("a", attrs={"href": re.compile("^http://serienjunkies.org/safe/.*$")}) + for link in links1 + links2: + groups[gid]["ep"].append(link["href"]) + packages = {} + for g in groups.values(): + links = [] + linklist = g["ep"] + package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) + linkgroups = {} + for link in linklist: + key = re.sub("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_", "", link) + if not linkgroups.has_key(key): + linkgroups[key] = [] + linkgroups[key].append(link) + for group in linkgroups.values(): + print "group", group + for pHoster in preferredHoster: + print "phoster", pHoster + hmatch = False + for link in group: + print "link", link + m = hosterPattern.match(link) + if m: + if pHoster == self.hosterMap[m.group(1)]: + links.append(link) + hmatch = True + print "match" + break + if hmatch: + break + packages[package] = links + return packages + def handleEpisode(self, url): if not self.parent.core.isGUIConnected(): - return False + raise CaptchaError for i in range(3): src = self.getSJSrc(url) if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: @@ -45,17 +180,7 @@ class SerienjunkiesOrg(Plugin): captchaTag = soup.find(attrs={"src":re.compile("^/secure/")}) captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"] captchaData = self.req.load(str(captchaUrl)) - captchaManager = self.parent.core.captchaManager - task = captchaManager.newTask(self) - task.setCaptcha(captchaData, "png") - task.setWaiting() - while not task.getStatus() == "done": - if not self.parent.core.isGUIConnected(): - task.removeTask() - return False - sleep(1) - result = task.getResult() - task.removeTask() + result = self.waitForCaptcha(captchaData, "png") url = "http://download.serienjunkies.org"+form["action"] sinp = form.find(attrs={"name":"s"}) @@ -73,6 +198,27 @@ class SerienjunkiesOrg(Plugin): links.append(self.handleFrame(frameUrl)) return links + def handleOldStyleLink(self, url): + if not self.parent.core.isGUIConnected(): + raise CaptchaError + for i in range(3): + sj = self.req.load(str(url)) + soup = BeautifulSoup(sj) + form = soup.find("form", attrs={"action":re.compile("^http://serienjunkies.org")}) + captchaTag = form.find(attrs={"src":re.compile("^/safe/secure/")}) + captchaUrl = "http://serienjunkies.org"+captchaTag["src"] + captchaData = self.req.load(str(captchaUrl)) + result = self.waitForCaptcha(captchaData, "png") + url = form["action"] + sinp = form.find(attrs={"name":"s"}) + + self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'dl.start': "Download"}, cookies=False, just_header=True) + decrypted = self.req.lastEffectiveURL + if decrypted == str(url): + continue + return [decrypted] + return False + def handleFrame(self, url): self.req.load(str(url), cookies=False, just_header=True) return self.req.lastEffectiveURL @@ -80,9 +226,15 @@ class SerienjunkiesOrg(Plugin): def proceed(self, url, location): links = False episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") + seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") if framePattern.match(url): links = [self.handleFrame(url)] elif episodePattern.match(url): links = self.handleEpisode(url) + elif oldStyleLink.match(url): + links = self.handleOldStyleLink(url) + elif seasonPattern.match(url): + links = self.handleSeason(url) self.links = links -- cgit v1.2.3