From b4c21e149308dad7fca1ebd309b405864d38ad52 Mon Sep 17 00:00:00 2001 From: mkaay Date: Wed, 27 Jan 2010 15:51:36 +0100 Subject: fixed serienjunkies.org (new url pattern) --- module/plugins/decrypter/SerienjunkiesOrg.py | 17 +++++++++++++++-- module/unescape.py | 28 +++++++++++++++++++++------- 2 files changed, 36 insertions(+), 9 deletions(-) (limited to 'module') diff --git a/module/plugins/decrypter/SerienjunkiesOrg.py b/module/plugins/decrypter/SerienjunkiesOrg.py index 7d45fd705..af7dc8169 100644 --- a/module/plugins/decrypter/SerienjunkiesOrg.py +++ b/module/plugins/decrypter/SerienjunkiesOrg.py @@ -59,6 +59,16 @@ class SerienjunkiesOrg(Plugin): return True + def get_file_name(self): + showPattern = re.compile("^http://serienjunkies.org/serie/(.*)/$") + seasonPattern = re.compile("^http://serienjunkies.org/.*?/(.*)/$") + m = showPattern.match(self.parent.url) + if not m: + m = seasonPattern.match(self.parent.url) + if m: + return m.group(1) + return "n/a" + def getSJSrc(self, url): src = self.req.load(str(url)) if not src.find("Enter Serienjunkies") == -1: @@ -78,7 +88,7 @@ class SerienjunkiesOrg(Plugin): self.logger.debug("Preferred hoster: %s" % ", ".join(preferredHoster)) groups = {} gid = -1 - seasonName = soup.find("a", attrs={"rel":"bookmark"}).string + seasonName = unescape(soup.find("a", attrs={"rel":"bookmark"}).string) for p in ps: if re.search("Dauer|Sprache|Format", str(p)): var = p.findAll("strong") @@ -191,13 +201,16 @@ class SerienjunkiesOrg(Plugin): episodePattern = re.compile("^http://download.serienjunkies.org/f-.*?.html$") oldStyleLink = re.compile("^http://serienjunkies.org/safe/(.*)$") framePattern = re.compile("^http://download.serienjunkies.org/frame/go-.*?/$") - seasonPattern = re.compile("^http://serienjunkies.org/\?p=.*?$") + showPattern = re.compile("^http://serienjunkies.org/serie/.*/$") + seasonPattern = re.compile("^http://serienjunkies.org/.*?/.*/$") if framePattern.match(url): links = [self.handleFrame(url)] elif episodePattern.match(url): links = self.handleEpisode(url) elif oldStyleLink.match(url): links = self.handleOldStyleLink(url) + elif showPattern.match(url): + pass elif seasonPattern.match(url): links = self.handleSeason(url) self.links = links diff --git a/module/unescape.py b/module/unescape.py index 462423b03..59f35f36b 100644 --- a/module/unescape.py +++ b/module/unescape.py @@ -1,12 +1,25 @@ +from htmlentitydefs import name2codepoint as n2cp +import re + +def substitute_entity(match): + ent = match.group(2) + if match.group(1) == "#": + return unichr(int(ent)) + else: + cp = n2cp.get(ent) + if cp: + return unichr(cp) + else: + return match.group() + +def unescape(string): + entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});") + return entity_re.subn(substitute_entity, string)[0] + +""" import re def unescape(text): - """Removes HTML or XML character references - and entities from a text string. - keep &, >, < in the source code. - from Fredrik Lundh - http://effbot.org/zone/re-sub.htm#unescape-html - """ def fixup(m): text = m.group(0) if text[:2] == "&#": @@ -35,4 +48,5 @@ def unescape(text): print "keyerror" pass return text # leave as is - return str(re.sub("&#?\w+;", fixup, text)) + return re.sub("&#?\w+;", fixup, text) +""" -- cgit v1.2.3