diff options
-rw-r--r-- | module/plugins/crypter/EasybytezComFolder.py | 33 | ||||
-rw-r--r-- | module/plugins/internal/SimpleCrypter.py | 34 |
2 files changed, 33 insertions, 34 deletions
diff --git a/module/plugins/crypter/EasybytezComFolder.py b/module/plugins/crypter/EasybytezComFolder.py index 1b887e421..83ec6472e 100644 --- a/module/plugins/crypter/EasybytezComFolder.py +++ b/module/plugins/crypter/EasybytezComFolder.py @@ -15,8 +15,6 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. # ############################################################################ -import re - from module.plugins.internal.SimpleCrypter import SimpleCrypter @@ -24,37 +22,14 @@ class EasybytezComFolder(SimpleCrypter): __name__ = "EasybytezComFolder" __type__ = "crypter" __pattern__ = r"https?://(www\.)?easybytez\.com/users/\w+/\w+" - __version__ = "0.01" + __version__ = "0.02" __description__ = """Easybytez Crypter Plugin""" __author_name__ = ("stickell") __author_mail__ = ("l.stickell@yahoo.it") LINK_PATTERN = r'<div class="link"><a href="(http://www\.easybytez\.com/\w+)" target="_blank">.+</a></div>' TITLE_PATTERN = r'<Title>Files of (?P<title>.+) folder</Title>' - PAGES_PATTERN = r"<a href='[^']+'>(\d+)</a><a href='[^']+'>Next »</a><br><small>\(\d+ total\)</small></div>" - - def decrypt(self, pyfile): - self.html = self.load(pyfile.url, decode=True) - - package_name, folder_name = self.getPackageNameAndFolder() - - package_links = re.findall(self.LINK_PATTERN, self.html) - - pages = re.search(self.PAGES_PATTERN, self.html) - if pages: - pages = int(pages.group(1)) - else: - pages = 1 - - p = 2 - while p <= pages: - self.html = self.load(pyfile.url, get={'page': p}, decode=True) - package_links += re.findall(self.LINK_PATTERN, self.html) - p += 1 - - self.logDebug('Package has %d links' % len(package_links)) + PAGES_PATTERN = r"<a href='[^']+'>(?P<pages>\d+)</a><a href='[^']+'>Next »</a><br><small>\(\d+ total\)</small></div>" - if package_links: - self.packages = [(package_name, package_links, folder_name)] - else: - self.fail('Could not extract any links')
\ No newline at end of file + def loadPage(self, page_n): + return self.load(self.pyfile.url, get={'page': page_n}, decode=True) diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index d935bf1da..546b920e0 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -25,7 +25,7 @@ from module.utils import html_unescape class SimpleCrypter(Crypter): __name__ = "SimpleCrypter" - __version__ = "0.04" + __version__ = "0.05" __pattern__ = None __type__ = "crypter" __description__ = """Base crypter plugin""" @@ -39,6 +39,15 @@ class SimpleCrypter(Crypter): TITLE_PATTERN: (optional) the group defined by 'title' should be the title example: <title>Files of: (?P<title>[^<]+) folder</title> + + If the links are disposed on multiple pages you need to define a pattern: + + PAGES_PATTERN: the group defined by 'pages' must be the total number of pages + + and a function: + + loadPage(self, page_n): + must return the html of the page number 'page_n' """ def decrypt(self, pyfile): @@ -46,11 +55,15 @@ class SimpleCrypter(Crypter): package_name, folder_name = self.getPackageNameAndFolder() - package_links = re.findall(self.LINK_PATTERN, self.html) - self.logDebug('Package has %d links' % len(package_links)) + self.package_links = re.findall(self.LINK_PATTERN, self.html) + + if hasattr(self, 'PAGES_PATTERN') and hasattr(self, 'loadPage'): + self.handleMultiPages() - if package_links: - self.packages = [(package_name, package_links, folder_name)] + self.logDebug('Package has %d links' % len(self.package_links)) + + if self.package_links: + self.packages = [(package_name, self.package_links, folder_name)] else: self.fail('Could not extract any links') @@ -66,3 +79,14 @@ class SimpleCrypter(Crypter): folder = self.pyfile.package().folder self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder)) return name, folder + + def handleMultiPages(self): + pages = re.search(self.PAGES_PATTERN, self.html) + if pages: + pages = int(pages.group('pages')) + else: + pages = 1 + + for p in range(2, pages + 1): + self.html = self.loadPage(p) + self.package_links += re.findall(self.LINK_PATTERN, self.html) |