diff options
author | mkaay <mkaay@mkaay.de> | 2009-12-30 17:33:14 +0100 |
---|---|---|
committer | mkaay <mkaay@mkaay.de> | 2009-12-30 17:33:14 +0100 |
commit | 7c28259f92c2b3c608583ff128a5ae4134d4c48f (patch) | |
tree | 1cc8d9e95c38f51b0efaef927e0036b677355068 | |
parent | signal slot stuff (diff) | |
download | pyload-7c28259f92c2b3c608583ff128a5ae4134d4c48f.tar.xz |
moved captcha stuff, extended serienjunkies, some other stuff
24 files changed, 206 insertions, 27 deletions
diff --git a/module/Plugin.py b/module/Plugin.py index f3830595d..c33e0d565 100644 --- a/module/Plugin.py +++ b/module/Plugin.py @@ -32,7 +32,7 @@ class Plugin(): self.config = {} props = {} props['name'] = "BasePlugin" - props['version'] = "0.2" + props['version'] = "0.3" props['pattern'] = None props['type'] = "hoster" props['description'] = """Base Plugin""" @@ -47,6 +47,7 @@ class Plugin(): self.multi_dl = True self.ocr = None #captcha reader instance self.logger = logging.getLogger("log") + self.decryptNow = True def prepare(self, thread): pyfile = self.parent @@ -127,7 +128,7 @@ class Plugin(): pass def init_ocr(self): - modul = __import__("module.captcha." + self.props['name'], fromlist=['captcha']) + modul = __import__("module.plugins.captcha." + self.props['name'], fromlist=['captcha']) captchaClass = getattr(modul, self.props['name']) self.ocr = captchaClass() diff --git a/module/captcha/LinksaveIn/bg/flecken_1.gif b/module/captcha/LinksaveIn/bg/flecken_1.gif Binary files differdeleted file mode 100644 index df2f51217..000000000 --- a/module/captcha/LinksaveIn/bg/flecken_1.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/flecken_2.gif b/module/captcha/LinksaveIn/bg/flecken_2.gif Binary files differdeleted file mode 100644 index 838276188..000000000 --- a/module/captcha/LinksaveIn/bg/flecken_2.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gewebe_fein.gif b/module/captcha/LinksaveIn/bg/gewebe_fein.gif Binary files differdeleted file mode 100644 index 502f18cc4..000000000 --- a/module/captcha/LinksaveIn/bg/gewebe_fein.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gewebe_grob.gif b/module/captcha/LinksaveIn/bg/gewebe_grob.gif Binary files differdeleted file mode 100644 index e66a365ad..000000000 --- a/module/captcha/LinksaveIn/bg/gewebe_grob.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/gitter.gif b/module/captcha/LinksaveIn/bg/gitter.gif Binary files differdeleted file mode 100644 index ec52ef68d..000000000 --- a/module/captcha/LinksaveIn/bg/gitter.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif b/module/captcha/LinksaveIn/bg/mauer_horizontal.gif Binary files differdeleted file mode 100644 index 3d75fafa8..000000000 --- a/module/captcha/LinksaveIn/bg/mauer_horizontal.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif b/module/captcha/LinksaveIn/bg/mauer_vertikal.gif Binary files differdeleted file mode 100644 index 2ada6fdae..000000000 --- a/module/captcha/LinksaveIn/bg/mauer_vertikal.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/scheckig.gif b/module/captcha/LinksaveIn/bg/scheckig.gif Binary files differdeleted file mode 100644 index 8bfb45c56..000000000 --- a/module/captcha/LinksaveIn/bg/scheckig.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/bg/wellen.gif b/module/captcha/LinksaveIn/bg/wellen.gif Binary files differdeleted file mode 100644 index a181ebe74..000000000 --- a/module/captcha/LinksaveIn/bg/wellen.gif +++ /dev/null diff --git a/module/captcha/LinksaveIn/tesser_conf b/module/captcha/LinksaveIn/tesser_conf deleted file mode 100644 index 34ca8fa02..000000000 --- a/module/captcha/LinksaveIn/tesser_conf +++ /dev/null @@ -1 +0,0 @@ -tessedit_char_whitelist 0123456789 diff --git a/module/config/plugin_default.xml b/module/config/plugin_default.xml index 01c0e7ed6..88628cb5e 100644 --- a/module/config/plugin_default.xml +++ b/module/config/plugin_default.xml @@ -36,4 +36,7 @@ <!-- False for no limitation --> <max_videos>False</max_videos> </YoutubeChannel> + <SerienjunkiesOrg> + <preferredHoster>RapidshareCom,UploadedTo,NetloadIn,FilefactoryCom</preferredHoster> + </SerienjunkiesOrg> </config> diff --git a/module/download_thread.py b/module/download_thread.py index 3c008d000..a07f4511f 100644 --- a/module/download_thread.py +++ b/module/download_thread.py @@ -66,6 +66,9 @@ class Checksum(Exception): def getFile(self): return self.file +class CaptchaError(Exception): + pass + class Download_Thread(threading.Thread): def __init__(self, parent): threading.Thread.__init__(self) @@ -93,8 +96,10 @@ class Download_Thread(threading.Thread): f = open("%s.info" % e.getFile(), "w") f.write("Checksum not matched!") f.close() + except CaptchaError: + self.loadedPyFile.status.type = "failed" + self.loadedPyFile.status.error = "Can't solve captcha" except Exception, e: - try: if self.parent.parent.config['general']['debug_mode']: traceback.print_exc() @@ -123,8 +128,11 @@ class Download_Thread(threading.Thread): pyfile.plugin.prepare(self) pyfile.plugin.req.set_timeout(self.parent.parent.config['general']['max_download_time']) - - status.type = "downloading" + + if pyfile.plugin.props["type"] == "container": + status.type = "decrypting" + else: + status.type = "downloading" location = join(pyfile.folder, status.filename) pyfile.plugin.proceed(status.url, location) diff --git a/module/file_list.py b/module/file_list.py index cc3b63006..8af66d5ed 100644 --- a/module/file_list.py +++ b/module/file_list.py @@ -129,7 +129,11 @@ class File_List(object): files = [] for pypack in self.data["queue"] + self.data["packages"]: for pyfile in pypack.files: - if pyfile.plugin.props['type'] == "container" and not pyfile.active: + if pyfile.status.type == None and pyfile.plugin.props['type'] == "container" and not pyfile.active: + files.append(pyfile) + for pypack in self.data["packages"]: + for pyfile in pypack.files: + if pyfile.status.type == None and pyfile.plugin.props['type'] == "container" and pyfile.plugin.decryptNow and not pyfile.active: files.append(pyfile) for pypack in self.data["queue"]: for pyfile in pypack.files: @@ -424,7 +428,7 @@ class PyLoadFile(): for dir in ["hoster", "decrypter", "container"]: try: self.modul = __import__("%s.%s" % (dir, pluginName), globals(), locals(), [pluginName], -1) - except: + except Exception, e: pass pluginClass = getattr(self.modul, pluginName) else: diff --git a/module/gui/CaptchaDock.py b/module/gui/CaptchaDock.py index 3dc9441a4..8a7e8010e 100644 --- a/module/gui/CaptchaDock.py +++ b/module/gui/CaptchaDock.py @@ -41,6 +41,7 @@ class CaptchaDock(QDockWidget): data = QByteArray(img) self.currentID = tid self.widget.emit(SIGNAL("setImage"), data) + self.widget.input.setText("") self.show() class CaptchaDockWidget(QWidget): diff --git a/module/captcha/GigasizeCom.py b/module/plugins/captcha/GigasizeCom.py index 136092181..136092181 100644 --- a/module/captcha/GigasizeCom.py +++ b/module/plugins/captcha/GigasizeCom.py diff --git a/module/captcha/LinksaveIn.py b/module/plugins/captcha/LinksaveIn.py index d6f61e362..d6f61e362 100644 --- a/module/captcha/LinksaveIn.py +++ b/module/plugins/captcha/LinksaveIn.py diff --git a/module/captcha/MegauploadCom.py b/module/plugins/captcha/MegauploadCom.py index 374bcd678..374bcd678 100644 --- a/module/captcha/MegauploadCom.py +++ b/module/plugins/captcha/MegauploadCom.py diff --git a/module/captcha/NetloadIn.py b/module/plugins/captcha/NetloadIn.py index 9799a6a2b..9799a6a2b 100644 --- a/module/captcha/NetloadIn.py +++ b/module/plugins/captcha/NetloadIn.py diff --git a/module/captcha/ShareonlineBiz.py b/module/plugins/captcha/ShareonlineBiz.py index 91124f181..91124f181 100644 --- a/module/captcha/ShareonlineBiz.py +++ b/module/plugins/captcha/ShareonlineBiz.py diff --git a/module/captcha/__init__.py b/module/plugins/captcha/__init__.py index e69de29bb..e69de29bb 100644 --- a/module/captcha/__init__.py +++ b/module/plugins/captcha/__init__.py diff --git a/module/captcha/captcha.py b/module/plugins/captcha/captcha.py index 283b171e0..283b171e0 100644 --- a/module/captcha/captcha.py +++ b/module/plugins/captcha/captcha.py diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 46f380857..a73779dd3 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -6,6 +6,25 @@ from time import sleep from module.Plugin import Plugin from module.BeautifulSoup import BeautifulSoup +from module.download_thread import CaptchaError + +from htmlentitydefs import name2codepoint as n2cp +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def decode_htmlentities(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, string)[0] + + class SerienjunkiesOrg(Plugin): def __init__(self, parent): Plugin.__init__(self, parent) @@ -13,7 +32,7 @@ class SerienjunkiesOrg(Plugin): props['name'] = "SerienjunkiesOrg" props['type'] = "container" props['pattern'] = r"http://.*?serienjunkies.org/.*?" - props['version'] = "0.1" + props['version'] = "0.2" props['description'] = """serienjunkies.org Container Plugin""" props['author_name'] = ("mkaay") props['author_mail'] = ("mkaay@mkaay.de") @@ -21,6 +40,43 @@ class SerienjunkiesOrg(Plugin): self.parent = parent self.html = None self.multi_dl = False + + self.hosterMap = { + "rc": "RapidshareCom", + "ff": "FilefactoryCom", + "ut": "UploadedTo", + "ul": "UploadedTo", + "nl": "NetloadIn", + "rs": "RapidshareDe" + } + self.hosterMapReverse = dict((v,k) for k, v in self.hosterMap.iteritems()) + episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") + if re.match(episodePattern, self.parent.url) or re.match(oldStyleLink, self.parent.url): + self.decryptNow = False + else: + self.decryptNow = True + + def prepare(self, thread): + pyfile = self.parent + + self.want_reconnect = False + + pyfile.status.exists = self.file_exists() + + if not pyfile.status.exists: + raise Exception, "File not found" + return False + + pyfile.status.filename = self.get_file_name() + + pyfile.status.waituntil = self.time_plus_wait + pyfile.status.url = self.get_file_url() + pyfile.status.want_reconnect = self.want_reconnect + + thread.wait(self.parent) + + return True def getSJSrc(self, url): src = self.req.load(str(url)) @@ -31,9 +87,88 @@ class SerienjunkiesOrg(Plugin): def file_exists(self): return True + def waitForCaptcha(self, captchaData, imgType): + captchaManager = self.parent.core.captchaManager + task = captchaManager.newTask(self) + task.setCaptcha(captchaData, imgType) + task.setWaiting() + while not task.getStatus() == "done": + if not self.parent.core.isGUIConnected(): + task.removeTask() + raise CaptchaError + sleep(1) + result = task.getResult() + task.removeTask() + return result + + def handleSeason(self, url): + src = self.getSJSrc(url) + soup = BeautifulSoup(src) + post = soup.find("div", attrs={"class": "post-content"}) + ps = post.findAll("p") + hosterPattern = re.compile("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_.*?\.html$") + preferredHoster = self.get_config("preferredHoster").split(",") + self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) + groups = {} + gid = -1 + seasonName = soup.find("a", attrs={"rel":"bookmark"}).string + for p in ps: + if re.search("<strong>Dauer|<strong>Sprache|<strong>Format", str(p)): + var = p.findAll("strong") + opts = {"Dauer": "", "Uploader": "", "Sprache": "", "Format": "", u"Größe": ""} + for v in var: + n = decode_htmlentities(v.string) + val = v.nextSibling + val = val.encode("utf-8") + val = decode_htmlentities(val) + val = val.replace(" |", "") + n = n.strip() + n = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', n) + val = val.strip() + val = re.sub(r"^([:]?)(.*?)([:]?)$", r'\2', val) + opts[n.strip()] = val.strip() + gid += 1 + groups[gid] = {} + groups[gid]["ep"] = [] + groups[gid]["opts"] = opts + elif re.search("<strong>Download:", str(p)): + links1 = p.findAll("a", attrs={"href": hosterPattern}) + links2 = p.findAll("a", attrs={"href": re.compile("^http://serienjunkies.org/safe/.*$")}) + for link in links1 + links2: + groups[gid]["ep"].append(link["href"]) + packages = {} + for g in groups.values(): + links = [] + linklist = g["ep"] + package = "%s (%s, %s)" % (seasonName, g["opts"]["Format"], g["opts"]["Sprache"]) + linkgroups = {} + for link in linklist: + key = re.sub("^http://download\.serienjunkies\.org/f-.*?/([rcfultns]{2})_", "", link) + if not linkgroups.has_key(key): + linkgroups[key] = [] + linkgroups[key].append(link) + for group in linkgroups.values(): + print "group", group + for pHoster in preferredHoster: + print "phoster", pHoster + hmatch = False + for link in group: + print "link", link + m = hosterPattern.match(link) + if m: + if pHoster == self.hosterMap[m.group(1)]: + links.append(link) + hmatch = True + print "match" + break + if hmatch: + break + packages[package] = links + return packages + def handleEpisode(self, url): if not self.parent.core.isGUIConnected(): - return False + raise CaptchaError for i in range(3): src = self.getSJSrc(url) if not src.find("Du hast das Download-Limit überschritten! Bitte versuche es später nocheinmal.") == -1: @@ -45,17 +180,7 @@ class SerienjunkiesOrg(Plugin): captchaTag = soup.find(attrs={"src":re.compile("^/secure/")}) captchaUrl = "http://download.serienjunkies.org"+captchaTag["src"] captchaData = self.req.load(str(captchaUrl)) - captchaManager = self.parent.core.captchaManager - task = captchaManager.newTask(self) - task.setCaptcha(captchaData, "png") - task.setWaiting() - while not task.getStatus() == "done": - if not self.parent.core.isGUIConnected(): - task.removeTask() - return False - sleep(1) - result = task.getResult() - task.removeTask() + result = self.waitForCaptcha(captchaData, "png") url = "http://download.serienjunkies.org"+form["action"] sinp = form.find(attrs={"name":"s"}) @@ -73,6 +198,27 @@ class SerienjunkiesOrg(Plugin): links.append(self.handleFrame(frameUrl)) return links + def handleOldStyleLink(self, url): + if not self.parent.core.isGUIConnected(): + raise CaptchaError + for i in range(3): + sj = self.req.load(str(url)) + soup = BeautifulSoup(sj) + form = soup.find("form", attrs={"action":re.compile("^http://serienjunkies.org")}) + captchaTag = form.find(attrs={"src":re.compile("^/safe/secure/")}) + captchaUrl = "http://serienjunkies.org"+captchaTag["src"] + captchaData = self.req.load(str(captchaUrl)) + result = self.waitForCaptcha(captchaData, "png") + url = form["action"] + sinp = form.find(attrs={"name":"s"}) + + self.req.load(str(url), post={'s': sinp["value"], 'c': result, 'dl.start': "Download"}, cookies=False, just_header=True) + decrypted = self.req.lastEffectiveURL + if decrypted == str(url): + continue + return [decrypted] + return False + def handleFrame(self, url): self.req.load(str(url), cookies=False, just_header=True) return self.req.lastEffectiveURL @@ -80,9 +226,15 @@ class SerienjunkiesOrg(Plugin): def proceed(self, url, location): links = False episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") + oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") + seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") if framePattern.match(url): links = [self.handleFrame(url)] elif episodePattern.match(url): links = self.handleEpisode(url) + elif oldStyleLink.match(url): + links = self.handleOldStyleLink(url) + elif seasonPattern.match(url): + links = self.handleSeason(url) self.links = links diff --git a/module/thread_list.py b/module/thread_list.py index ad0d0c8fb..d3eb4d203 100644 --- a/module/thread_list.py +++ b/module/thread_list.py @@ -110,15 +110,26 @@ class Thread_List(object): if pyfile.plugin.props['type'] == "container": newLinks = 0 if pyfile.plugin.links: - for link in pyfile.plugin.links: - newFile = self.list.collector.addLink(link) - self.list.packager.addFileToPackage(pyfile.package.data["id"], self.list.collector.popFile(newFile)) - newLinks += 1 - self.list.packager.pushPackage2Queue(pyfile.package.data["id"]) + if isinstance(pyfile.plugin.links, dict): + packmap = {} + for packname in pyfile.plugin.links.keys(): + packmap[packname] = self.list.packager.addNewPackage(packname) + for packname, links in pyfile.plugin.links.items(): + pid = packmap[packname] + for link in links: + newFile = self.list.collector.addLink(link) + self.list.packager.addFileToPackage(pid, self.list.collector.popFile(newFile)) + newLinks += 1 + else: + for link in pyfile.plugin.links: + newFile = self.list.collector.addLink(link) + self.list.packager.addFileToPackage(pyfile.package.data["id"], self.list.collector.popFile(newFile)) + newLinks += 1 + #self.list.packager.pushPackage2Queue(pyfile.package.data["id"]) self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.data["id"]) if newLinks: - self.parent.logger.info("Parsed link from %s: %i" % (pyfile.status.filename, newLinks)) + self.parent.logger.info("Parsed links from %s: %i" % (pyfile.status.filename, newLinks)) else: self.parent.logger.info("No links in %s" % pyfile.status.filename) #~ self.list.packager.removeFileFromPackage(pyfile.id, pyfile.package.id) |