From 121024ca7ee705e2fed5e8705d25484288922b41 Mon Sep 17 00:00:00 2001 From: Christopher <4Christopher@gmx.de> Date: Tue, 12 Mar 2013 12:55:55 +0100 Subject: Movie2kTo: fixed hoster recognition It appeared to me that the hoster and the id of a video can be present in either a JavaScript section or in a HTML section on *one* page. I first assumed that all hosters on one page can be found ether in a JavaScript section or in a HTML section. --- module/plugins/crypter/Movie2kTo.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'module/plugins/crypter/Movie2kTo.py') diff --git a/module/plugins/crypter/Movie2kTo.py b/module/plugins/crypter/Movie2kTo.py index c32f6f930..3be3be73c 100644 --- a/module/plugins/crypter/Movie2kTo.py +++ b/module/plugins/crypter/Movie2kTo.py @@ -89,17 +89,12 @@ class Movie2kTo(Crypter): re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)') re_hoster_id_html = re.compile(r'') ## I assume that the ID is 7 digits longs - if re_hoster_id_js.search(self.html): - re_hoster_id = re_hoster_id_js - self.logDebug('Assuming that the ID can be found in a JavaScript section.') - elif re_hoster_id_html.search(self.html): - re_hoster_id = re_hoster_id_html - self.logDebug('Assuming that the ID can be found in a HTML section.') count = defaultdict(int) - for h_id, hoster in re_hoster_id.findall(self.html): - # self.logDebug('Hoster %s' % hoster) + matches = re_hoster_id_js.findall(self.html) + matches += re_hoster_id_html.findall(self.html) + for h_id, hoster in matches: if hoster in accepted_hosters: - # self.logDebug('Accepted %s' % hoster) + self.logDebug('Accepted: %s, ID: %s' % (hoster, h_id)) count[hoster] += 1 if count[hoster] <= firstN: if h_id != self.id: @@ -112,6 +107,8 @@ class Movie2kTo(Crypter): links.append(url) except: self.logDebug('Failed to find the URL') + else: + self.logDebug('Not accepted: %s, ID: %s' % (hoster, h_id)) self.logDebug(links) return links -- cgit v1.2.3 From c8867a5291cb1ccdd5e7f9cadd06f7f600d6920d Mon Sep 17 00:00:00 2001 From: Christopher <4Christopher@gmx.de> Date: Wed, 13 Mar 2013 17:37:38 +0100 Subject: Movie2kTo: Changed regex. I hope it now matches every site. --- module/plugins/crypter/Movie2kTo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'module/plugins/crypter/Movie2kTo.py') diff --git a/module/plugins/crypter/Movie2kTo.py b/module/plugins/crypter/Movie2kTo.py index 3be3be73c..506ad2eaf 100644 --- a/module/plugins/crypter/Movie2kTo.py +++ b/module/plugins/crypter/Movie2kTo.py @@ -87,7 +87,7 @@ class Movie2kTo(Crypter): links = [] ## h_id: hoster_id of a possible hoster re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)') - re_hoster_id_html = re.compile(r'') + re_hoster_id_html = re.compile(r']+?)') ## I assume that the ID is 7 digits longs count = defaultdict(int) matches = re_hoster_id_js.findall(self.html) -- cgit v1.2.3 From 843afa1d72956f4e5ab35c1908586501e50c4e99 Mon Sep 17 00:00:00 2001 From: Christopher <4Christopher@gmx.de> Date: Wed, 13 Mar 2013 19:06:24 +0100 Subject: Movie2kTo: Added quality recognition. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sites without a quality value will fail with this version … But on the other hand the current pattern relies on the quality value … --- module/plugins/crypter/Movie2kTo.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'module/plugins/crypter/Movie2kTo.py') diff --git a/module/plugins/crypter/Movie2kTo.py b/module/plugins/crypter/Movie2kTo.py index 506ad2eaf..7595c845d 100644 --- a/module/plugins/crypter/Movie2kTo.py +++ b/module/plugins/crypter/Movie2kTo.py @@ -27,6 +27,7 @@ class Movie2kTo(Crypter): def decrypt(self, pyfile): self.package = pyfile.package() self.folder = self.package.folder + self.q = [] ## to calculate the average, min and max of the quality whole_season = self.getConfig('whole_season') everything = self.getConfig('everything') self.getInfo(pyfile.url) @@ -46,10 +47,18 @@ class Movie2kTo(Crypter): season_links += self.getInfoAndLinks('%s/%s' % (self.BASE_URL, url_path)) self.logDebug(season_links) - self.packages.append(('%s: Season %s' % (self.name, season), season_links, 'Season %s' % season)) + self.packages.append(('%s: Season %s (%s)' + % (self.name, season, self.qStat()), season_links, 'Season %s' % season)) + self.q = [] else: - self.packages.append((self.package.name, self.getLinks(), self.package.folder)) + links = self.getLinks() + self.package.name = '%s%s' % (self.package.name, self.qStat()) + self.packages.append((self.package.name, links , self.package.folder)) + def qStat(self): + if len(self.q) == 0: return '' + return (' (Average quality: %d, min: %d, max: %d)' + % (sum(self.q) / float(len(self.q)), min(self.q), max(self.q))) def tvshow_number(self, number): if int(number) < 10: return '0%s' % number @@ -85,18 +94,22 @@ class Movie2kTo(Crypter): accepted_hosters = re.findall(r'\b(\w+?)\b', self.getConfig('accepted_hosters')) firstN = self.getConfig('firstN') links = [] - ## h_id: hoster_id of a possible hoster - re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)') - re_hoster_id_html = re.compile(r']+?)') + re_quality = r'.+?Quality:.+?smileys/(\d)\.gif' + ## The quality is one digit. 0 is the worst and 5 is the best. + ## Is not always there … + re_hoster_id_html = re.compile(r']+?)' + re_quality) + re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)' + re_quality) ## I assume that the ID is 7 digits longs count = defaultdict(int) - matches = re_hoster_id_js.findall(self.html) - matches += re_hoster_id_html.findall(self.html) - for h_id, hoster in matches: + matches = re_hoster_id_html.findall(self.html) + matches += re_hoster_id_js.findall(self.html) + ## h_id: hoster_id of a possible hoster + for h_id, hoster, quality in matches: if hoster in accepted_hosters: - self.logDebug('Accepted: %s, ID: %s' % (hoster, h_id)) + self.logDebug('Accepted: %s, ID: %s, Quality: %s' % (hoster, h_id, quality)) count[hoster] += 1 if count[hoster] <= firstN: + self.q.append(int(quality)) if h_id != self.id: self.html = self.load('%s/tvshows-%s-%s.html' % (self.BASE_URL, h_id, self.name)) else: @@ -108,7 +121,7 @@ class Movie2kTo(Crypter): except: self.logDebug('Failed to find the URL') else: - self.logDebug('Not accepted: %s, ID: %s' % (hoster, h_id)) + self.logDebug('Not accepted: %s, ID: %s, Quality: %s' % (hoster, h_id, quality)) - self.logDebug(links) + # self.logDebug(links) return links -- cgit v1.2.3 From 1e8bd2ee4ed5818eb1b79c36946579229adede9e Mon Sep 17 00:00:00 2001 From: Christopher <4Christopher@gmx.de> Date: Wed, 13 Mar 2013 20:28:44 +0100 Subject: Movie2kTo: Fixed and tested it. --- module/plugins/crypter/Movie2kTo.py | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'module/plugins/crypter/Movie2kTo.py') diff --git a/module/plugins/crypter/Movie2kTo.py b/module/plugins/crypter/Movie2kTo.py index 7595c845d..f5800b498 100644 --- a/module/plugins/crypter/Movie2kTo.py +++ b/module/plugins/crypter/Movie2kTo.py @@ -27,7 +27,7 @@ class Movie2kTo(Crypter): def decrypt(self, pyfile): self.package = pyfile.package() self.folder = self.package.folder - self.q = [] ## to calculate the average, min and max of the quality + self.qStatReset() whole_season = self.getConfig('whole_season') everything = self.getConfig('everything') self.getInfo(pyfile.url) @@ -49,7 +49,7 @@ class Movie2kTo(Crypter): self.logDebug(season_links) self.packages.append(('%s: Season %s (%s)' % (self.name, season, self.qStat()), season_links, 'Season %s' % season)) - self.q = [] + self.qStatReset() else: links = self.getLinks() self.package.name = '%s%s' % (self.package.name, self.qStat()) @@ -57,8 +57,11 @@ class Movie2kTo(Crypter): def qStat(self): if len(self.q) == 0: return '' - return (' (Average quality: %d, min: %d, max: %d)' - % (sum(self.q) / float(len(self.q)), min(self.q), max(self.q))) + return (' (Average quality: %d, min: %d, max: %d, %s, max (all hosters): %d)' + % (sum(self.q) / float(len(self.q)), min(self.q), max(self.q), self.q, self.max_q)) + def qStatReset(self): + self.q = [] ## to calculate the average, min and max of the quality + self.max_q = None def tvshow_number(self, number): if int(number) < 10: return '0%s' % number @@ -94,22 +97,33 @@ class Movie2kTo(Crypter): accepted_hosters = re.findall(r'\b(\w+?)\b', self.getConfig('accepted_hosters')) firstN = self.getConfig('firstN') links = [] - re_quality = r'.+?Quality:.+?smileys/(\d)\.gif' + re_quality = re.compile(r'.+?Quality:.+?smileys/(\d)\.gif') ## The quality is one digit. 0 is the worst and 5 is the best. ## Is not always there … - re_hoster_id_html = re.compile(r']+?)' + re_quality) - re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)' + re_quality) + re_hoster_id_html = re.compile(r'(?:]+?)(.+?)') + re_hoster_id_js = re.compile(r'links\[(\d+?)\].+ (.+?)(.+?)') ## I assume that the ID is 7 digits longs count = defaultdict(int) matches = re_hoster_id_html.findall(self.html) matches += re_hoster_id_js.findall(self.html) + # self.logDebug(matches) ## h_id: hoster_id of a possible hoster - for h_id, hoster, quality in matches: + for h_id, hoster, q_html in matches: + match_q = re_quality.search(q_html) + if match_q: + quality = int(match_q.group(1)) + if self.max_q: + if self.max_q < quality: self.max_q = quality + else: ## was None before + self.max_q = quality + q_s = ', Quality: %d' % quality + else: + q_s = ', unknown quality' if hoster in accepted_hosters: - self.logDebug('Accepted: %s, ID: %s, Quality: %s' % (hoster, h_id, quality)) + self.logDebug('Accepted: %s, ID: %s%s' % (hoster, h_id, q_s)) count[hoster] += 1 if count[hoster] <= firstN: - self.q.append(int(quality)) + if match_q: self.q.append(quality) if h_id != self.id: self.html = self.load('%s/tvshows-%s-%s.html' % (self.BASE_URL, h_id, self.name)) else: @@ -121,7 +135,7 @@ class Movie2kTo(Crypter): except: self.logDebug('Failed to find the URL') else: - self.logDebug('Not accepted: %s, ID: %s, Quality: %s' % (hoster, h_id, quality)) + self.logDebug('Not accepted: %s, ID: %s%s' % (hoster, h_id, q_s)) # self.logDebug(links) return links -- cgit v1.2.3