From c1516088e4e7f76dddd68ef71f58c6413862e31c Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 30 Dec 2009 12:35:03 +0100 Subject: show captchas in gui, SerienjunkiesOrg plugin --- module/plugins/decrypter/SerienjunkiesOrg.py | 86 ++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 module/plugins/decrypter/SerienjunkiesOrg.py (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py new file mode 100644 index 000000000..407d14c53 --- /dev/null +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- + +import re +from time import sleep + +from module.Plugin import Plugin +from module.BeautifulSoup import BeautifulSoup + +class SerienjunkiesOrg(Plugin): + def __init__(self, parent): + Plugin.__init__(self, parent) + props = {} + props['name'] = "SerienjunkiesOrg" + props['type'] = "container" + props['pattern'] = r"http://.*?serienjunkies.org/.*?" + props['version'] = "0.1" + props['description'] = """serienjunkies.org Container Plugin""" + props['author_name'] = ("mkaay") + props['author_mail'] = ("mkaay@mkaay.de") + self.props = props + self.parent = parent + self.html = None + self.multi_dl = False + + def getSJSrc(self, url): + src = self.req.load(str(url)) + if not src.find("Enter Serienjunkies") == -1: + src = self.req.load(str(url)) + return src + + def file_exists(self): + return True + + def handleEpisode(self, url): + if not self.parent.core.isGUIConnected(): + return False + for i in range(3): + src = self.getSJSrc(url) + if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: + self.logger.info("Downloadlimit reached") + return False + else: + soup = BeautifulSoup(src) + form = soup.find("form") + captchaTag = soup.find(attrs={"src":re.compile("^/secure/")}) + captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"] + captchaData = self.req.load(str(captchaUrl)) + captchaManager = self.parent.core.captchaManager + task = captchaManager.newTask(self) + task.setCaptcha(captchaData, "png") + task.setWaiting() + while not task.getStatus() == "done": + if not self.parent.core.isGUIConnected(): + return False + sleep(1) + result = task.getResult() + url = "http://download.serienjunkies.org"+form["action"] + sinp = form.find(attrs={"name":"s"}) + + sj = self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'action': "Download"}) + + soup = BeautifulSoup(sj) + rawLinks = soup.findAll(attrs={"action": re.compile("^http://download.serienjunkies.org/")}) + + if not len(rawLinks) > 0: + continue + + links = [] + for link in rawLinks: + frameUrl = link["action"].replace("/go-", "/frame/go-") + links.append(self.handleFrame(frameUrl)) + return links + + def handleFrame(self, url): + self.req.load(str(url), cookies=False, just_header=True) + return self.req.lastEffectiveURL + + def proceed(self, url, location): + links = False + episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") + if framePattern.match(url): + links = [self.handleFrame(url)] + elif episodePattern.match(url): + links = self.handleEpisode(url) + self.links = links -- cgit v1.2.3 From 08f12b089eb6fade689134b8170aa2b81218ddf7 Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 30 Dec 2009 12:50:10 +0100 Subject: signal slot stuff --- module/plugins/decrypter/SerienjunkiesOrg.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 407d14c53..46f380857 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -51,9 +51,11 @@ class SerienjunkiesOrg(Plugin): task.setWaiting() while not task.getStatus() == "done": if not self.parent.core.isGUIConnected(): + task.removeTask() return False sleep(1) result = task.getResult() + task.removeTask() url = "http://download.serienjunkies.org"+form["action"] sinp = form.find(attrs={"name":"s"}) -- cgit v1.2.3 From 7c28259f92c2b3c608583ff128a5ae4134d4c48f Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 30 Dec 2009 17:33:14 +0100 Subject: moved captcha stuff, extended serienjunkies, some other stuff --- module/plugins/decrypter/SerienjunkiesOrg.py | 178 +++++++++++++++++++++++++-- 1 file changed, 165 insertions(+), 13 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 46f380857..a73779dd3 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,6 +6,25 @@ from time import sleep from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup +from module.download_thread import CaptchaError + +from htmlentitydefs import name2codepoint as n2cp +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def decode_htmlentities(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, string)[0] + + class SerienjunkiesOrg(Plugin): def __init__(self, parent): Plugin.__init__(self, parent) @@ -13,7 +32,7 @@ class SerienjunkiesOrg(Plugin): props['name'] = "SerienjunkiesOrg" props['type'] = "container" props['pattern'] = r"http://.*?serienjunkies.org/.*?" - props['version'] = "0.1" + props['version'] = "0.2" props['description'] = """serienjunkies.org Container Plugin""" props['author_name'] = ("mkaay") props['author_mail'] = ("mkaay@mkaay.de") @@ -21,6 +40,43 @@ class SerienjunkiesOrg(Plugin): self.parent = parent self.html = None self.multi_dl = False + + self.hosterMap = { + "rc": "RapidshareCom", + "ff": "FilefactoryCom", + "ut": "UploadedTo", + "ul": "UploadedTo", + "nl": "NetloadIn", + "rs": "RapidshareDe" + } + self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) + episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") + if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): + self.decryptNow = False + else: + self.decryptNow = True + + def prepare(self, thread): + pyfile = self.parent + + self.want_reconnect = False + + pyfile.status.exists = self.file_exists() + + if not pyfile.status.exists: + raise Exception, "File not found" + return False + + pyfile.status.filename = self.get_file_name() + + pyfile.status.waituntil = self.time_plus_wait + pyfile.status.url = self.get_file_url() + pyfile.status.want_reconnect = self.want_reconnect + + thread.wait(self.parent) + + return True def getSJSrc(self, url): src = self.req.load(str(url)) @@ -31,9 +87,88 @@ class SerienjunkiesOrg(Plugin): def file_exists(self): return True + def waitForCaptcha(self, captchaData, imgType): + captchaManager = self.parent.core.captchaManager + task = captchaManager.newTask(self) + task.setCaptcha(captchaData, imgType) + task.setWaiting() + while not task.getStatus() == "done": + if not self.parent.core.isGUIConnected(): + task.removeTask() + raise CaptchaError + sleep(1) + result = task.getResult() + task.removeTask() + return result + + def handleSeason(self, url): + src = self.getSJSrc(url) + soup = BeautifulSoup(src) + post = soup.find("div", attrs={"class": "post-content"}) + ps = post.findAll("p") + hosterPattern = re.compile("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_.*?\.html$") + preferredHoster = self.get_config("preferredHoster").split(",") + self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) + groups = {} + gid = -1 + seasonName = soup.find("a", attrs={"rel":"bookmark"}).string + for p in ps: + if re.search("Dauer|Sprache|Format", str(p)): + var = p.findAll("strong") + opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} + for v in var: + n = decode_htmlentities(v.string) + val = v.nextSibling + val = val.encode("utf-8") + val = decode_htmlentities(val) + val = val.replace(" |", "") + n = n.strip() + n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) + val = val.strip() + val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) + opts[n.strip()] = val.strip() + gid += 1 + groups[gid] = {} + groups[gid]["ep"] = [] + groups[gid]["opts"] = opts + elif re.search("Download:", str(p)): + links1 = p.findAll("a", attrs={"href": hosterPattern}) + links2 = p.findAll("a", attrs={"href": re.compile("^http://serienjunkies.org/safe/.*$")}) + for link in links1 + links2: + groups[gid]["ep"].append(link["href"]) + packages = {} + for g in groups.values(): + links = [] + linklist = g["ep"] + package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) + linkgroups = {} + for link in linklist: + key = re.sub("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_", "", link) + if not linkgroups.has_key(key): + linkgroups[key] = [] + linkgroups[key].append(link) + for group in linkgroups.values(): + print "group", group + for pHoster in preferredHoster: + print "phoster", pHoster + hmatch = False + for link in group: + print "link", link + m = hosterPattern.match(link) + if m: + if pHoster == self.hosterMap[m.group(1)]: + links.append(link) + hmatch = True + print "match" + break + if hmatch: + break + packages[package] = links + return packages + def handleEpisode(self, url): if not self.parent.core.isGUIConnected(): - return False + raise CaptchaError for i in range(3): src = self.getSJSrc(url) if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: @@ -45,17 +180,7 @@ class SerienjunkiesOrg(Plugin): captchaTag = soup.find(attrs={"src":re.compile("^/secure/")}) captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"] captchaData = self.req.load(str(captchaUrl)) - captchaManager = self.parent.core.captchaManager - task = captchaManager.newTask(self) - task.setCaptcha(captchaData, "png") - task.setWaiting() - while not task.getStatus() == "done": - if not self.parent.core.isGUIConnected(): - task.removeTask() - return False - sleep(1) - result = task.getResult() - task.removeTask() + result = self.waitForCaptcha(captchaData, "png") url = "http://download.serienjunkies.org"+form["action"] sinp = form.find(attrs={"name":"s"}) @@ -73,6 +198,27 @@ class SerienjunkiesOrg(Plugin): links.append(self.handleFrame(frameUrl)) return links + def handleOldStyleLink(self, url): + if not self.parent.core.isGUIConnected(): + raise CaptchaError + for i in range(3): + sj = self.req.load(str(url)) + soup = BeautifulSoup(sj) + form = soup.find("form", attrs={"action":re.compile("^http://serienjunkies.org")}) + captchaTag = form.find(attrs={"src":re.compile("^/safe/secure/")}) + captchaUrl = "http://serienjunkies.org"+captchaTag["src"] + captchaData = self.req.load(str(captchaUrl)) + result = self.waitForCaptcha(captchaData, "png") + url = form["action"] + sinp = form.find(attrs={"name":"s"}) + + self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'dl.start': "Download"}, cookies=False, just_header=True) + decrypted = self.req.lastEffectiveURL + if decrypted == str(url): + continue + return [decrypted] + return False + def handleFrame(self, url): self.req.load(str(url), cookies=False, just_header=True) return self.req.lastEffectiveURL @@ -80,9 +226,15 @@ class SerienjunkiesOrg(Plugin): def proceed(self, url, location): links = False episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") + seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") if framePattern.match(url): links = [self.handleFrame(url)] elif episodePattern.match(url): links = self.handleEpisode(url) + elif oldStyleLink.match(url): + links = self.handleOldStyleLink(url) + elif seasonPattern.match(url): + links = self.handleSeason(url) self.links = links -- cgit v1.2.3 From 9a5ae6ebec9a82170d194c9dd5465bd489477f22 Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 30 Dec 2009 17:49:46 +0100 Subject: removed prints --- module/plugins/decrypter/SerienjunkiesOrg.py | 4 ---- 1 file changed, 4 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index a73779dd3..ce1721e92 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -148,18 +148,14 @@ class SerienjunkiesOrg(Plugin): linkgroups[key] = [] linkgroups[key].append(link) for group in linkgroups.values(): - print "group", group for pHoster in preferredHoster: - print "phoster", pHoster hmatch = False for link in group: - print "link", link m = hosterPattern.match(link) if m: if pHoster == self.hosterMap[m.group(1)]: links.append(link) hmatch = True - print "match" break if hmatch: break -- cgit v1.2.3 From 4f904bd9610795c36d9e896bdf44c263ff43f5fd Mon Sep 17 00:00:00 2001 From: mkaay Date: Fri, 1 Jan 2010 17:13:43 +0100 Subject: fixed SerienjunkiesOrg, no more segfault in gui? --- module/plugins/decrypter/SerienjunkiesOrg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index ce1721e92..2f77b81ae 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -52,7 +52,7 @@ class SerienjunkiesOrg(Plugin): self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") - if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): + if episodePattern.match(self.parent.url) or oldStyleLink.match(self.parent.url): self.decryptNow = False else: self.decryptNow = True @@ -121,7 +121,7 @@ class SerienjunkiesOrg(Plugin): val = v.nextSibling val = val.encode("utf-8") val = decode_htmlentities(val) - val = val.replace(" |", "") + val = val.replace("|", "").strip() n = n.strip() n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) val = val.strip() -- cgit v1.2.3 From ae4f0dfc38c49e19ef2b290f0974df9923bf1b94 Mon Sep 17 00:00:00 2001 From: mkaay Date: Sun, 3 Jan 2010 16:21:53 +0100 Subject: SecuredIn plugin --- module/plugins/decrypter/SerienjunkiesOrg.py | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 2f77b81ae..15bee52ac 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,8 +6,6 @@ from time import sleep from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup -from module.download_thread import CaptchaError - from htmlentitydefs import name2codepoint as n2cp def substitute_entity(match): ent = match.group(2) @@ -87,20 +85,6 @@ class SerienjunkiesOrg(Plugin): def file_exists(self): return True - def waitForCaptcha(self, captchaData, imgType): - captchaManager = self.parent.core.captchaManager - task = captchaManager.newTask(self) - task.setCaptcha(captchaData, imgType) - task.setWaiting() - while not task.getStatus() == "done": - if not self.parent.core.isGUIConnected(): - task.removeTask() - raise CaptchaError - sleep(1) - result = task.getResult() - task.removeTask() - return result - def handleSeason(self, url): src = self.getSJSrc(url) soup = BeautifulSoup(src) -- cgit v1.2.3 From 3d655ddbfbd96abecb9a9c9bebf6e43eb710ab12 Mon Sep 17 00:00:00 2001 From: RaNaN Date: Sun, 10 Jan 2010 16:20:31 +0100 Subject: fixed manage.py, addBox working, some code formatted and cleaned --- module/plugins/decrypter/SerienjunkiesOrg.py | 1 - 1 file changed, 1 deletion(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 15bee52ac..7302f904e 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import re -from time import sleep from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup -- cgit v1.2.3 From 2edf36eb45d427262e9f83af90b8b4fc2f69aab8 Mon Sep 17 00:00:00 2001 From: spoob Date: Fri, 15 Jan 2010 15:03:00 +0100 Subject: Better Argument Parsing in Core, littel fixes --- module/plugins/decrypter/SerienjunkiesOrg.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 7302f904e..7d45fd705 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -4,23 +4,7 @@ import re from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup - -from htmlentitydefs import name2codepoint as n2cp -def substitute_entity(match): - ent = match.group(2) - if match.group(1) == "#": - return unichr(int(ent)) - else: - cp = n2cp.get(ent) - if cp: - return unichr(cp) - else: - return match.group() - -def decode_htmlentities(string): - entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") - return entity_re.subn(substitute_entity, string)[0] - +from module.unescape import unescape class SerienjunkiesOrg(Plugin): def __init__(self, parent): @@ -100,10 +84,10 @@ class SerienjunkiesOrg(Plugin): var = p.findAll("strong") opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} for v in var: - n = decode_htmlentities(v.string) + n = unescape(v.string) val = v.nextSibling val = val.encode("utf-8") - val = decode_htmlentities(val) + val = unescape(val) val = val.replace("|", "").strip() n = n.strip() n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) -- cgit v1.2.3 From b4c21e149308dad7fca1ebd309b405864d38ad52 Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 27 Jan 2010 15:51:36 +0100 Subject: fixed serienjunkies.org (new url pattern) --- module/plugins/decrypter/SerienjunkiesOrg.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'module/plugins/decrypter/SerienjunkiesOrg.py') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 7d45fd705..af7dc8169 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -59,6 +59,16 @@ class SerienjunkiesOrg(Plugin): return True + def get_file_name(self): + showPattern = re.compile("^http://serienjunkies.org/serie/(.*)/$") + seasonPattern = re.compile("^http://serienjunkies.org/.*?/(.*)/$") + m = showPattern.match(self.parent.url) + if not m: + m = seasonPattern.match(self.parent.url) + if m: + return m.group(1) + return "n/a" + def getSJSrc(self, url): src = self.req.load(str(url)) if not src.find("Enter Serienjunkies") == -1: @@ -78,7 +88,7 @@ class SerienjunkiesOrg(Plugin): self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) groups = {} gid = -1 - seasonName = soup.find("a", attrs={"rel":"bookmark"}).string + seasonName = unescape(soup.find("a", attrs={"rel":"bookmark"}).string) for p in ps: if re.search("Dauer|Sprache|Format", str(p)): var = p.findAll("strong") @@ -191,13 +201,16 @@ class SerienjunkiesOrg(Plugin): episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") - seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") + showPattern = re.compile("^http://serienjunkies.org/serie/.*/$") + seasonPattern = re.compile("^http://serienjunkies.org/.*?/.*/$") if framePattern.match(url): links = [self.handleFrame(url)] elif episodePattern.match(url): links = self.handleEpisode(url) elif oldStyleLink.match(url): links = self.handleOldStyleLink(url) + elif showPattern.match(url): + pass elif seasonPattern.match(url): links = self.handleSeason(url) self.links = links -- cgit v1.2.3