summaryrefslogtreecommitdiffstats
path: root/module/plugins/internal
diff options
context:
space:
mode:
authorGravatar Walter Purcaro <vuolter@gmail.com> 2014-10-06 14:45:15 +0200
committerGravatar Walter Purcaro <vuolter@gmail.com> 2014-10-06 14:45:15 +0200
commitcf57dbf9a57bd4078051688f8984a4c1fe6e4996 (patch)
tree4dab1da3fc0d107ceacec4cf74b70c6b589ab5ed /module/plugins/internal
parent[SimpleHoster] Better filename processing + info about new pattern attributes (diff)
downloadpyload-cf57dbf9a57bd4078051688f8984a4c1fe6e4996.tar.xz
[SimpleCrypter] Improve patterns
Diffstat (limited to 'module/plugins/internal')
-rw-r--r--module/plugins/internal/SimpleCrypter.py18
1 files changed, 10 insertions, 8 deletions
diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py
index 2873a7fa7..d69995402 100644
--- a/module/plugins/internal/SimpleCrypter.py
+++ b/module/plugins/internal/SimpleCrypter.py
@@ -4,13 +4,13 @@ import re
from module.plugins.Crypter import Crypter
from module.plugins.internal.SimpleHoster import PluginParseError, replace_patterns, set_cookies
-from module.utils import html_unescape
+from module.utils import fixup, html_unescape
class SimpleCrypter(Crypter):
__name__ = "SimpleCrypter"
__type__ = "crypter"
- __version__ = "0.12"
+ __version__ = "0.13"
__pattern__ = None
@@ -24,8 +24,8 @@ class SimpleCrypter(Crypter):
LINK_PATTERN: group(1) must be a download link or a regex to catch more links
example: LINK_PATTERN = r'<div class="link"><a href="(http://speedload.org/\w+)'
- TITLE_PATTERN: (optional) The group defined by 'title' should be the folder name or the webpage title
- example: TITLE_PATTERN = r'<title>Files of: (?P<title>[^<]+) folder</title>'
+ TITLE_PATTERN: (optional) group(1) should be the folder name or the webpage title
+ example: TITLE_PATTERN = r'<title>Files of: ([^<]+) folder</title>'
OFFLINE_PATTERN: (optional) Checks if the file is yet available online
example: OFFLINE_PATTERN = r'File (deleted|not found)'
@@ -39,8 +39,8 @@ class SimpleCrypter(Crypter):
If the links are splitted on multiple pages you can define the PAGES_PATTERN regex:
- PAGES_PATTERN: (optional) The group defined by 'pages' should be the number of overall pages containing the links
- example: PAGES_PATTERN = r'Pages: (?P<pages>\d+)'
+ PAGES_PATTERN: (optional) group(1) should be the number of overall pages containing the links
+ example: PAGES_PATTERN = r'Pages: (\d+)'
and its loadPage method:
@@ -49,6 +49,7 @@ class SimpleCrypter(Crypter):
"""
+ TITLE_REPLACEMENTS = [("&#?\w+;", fixup)]
URL_REPLACEMENTS = []
TEXT_ENCODING = False #: Set to True or encoding name if encoding in http header is not correct
@@ -112,7 +113,8 @@ class SimpleCrypter(Crypter):
if hasattr(self, 'TITLE_PATTERN'):
m = re.search(self.TITLE_PATTERN, self.html)
if m:
- name = folder = html_unescape(m.group('title').strip())
+ name = replace_patterns(m.group(1).strip(), self.TITLE_REPLACEMENTS)
+ folder = html_unescape(name)
self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
return name, folder
@@ -125,7 +127,7 @@ class SimpleCrypter(Crypter):
def handleMultiPages(self):
pages = re.search(self.PAGES_PATTERN, self.html)
if pages:
- pages = int(pages.group('pages'))
+ pages = int(pages.group(1))
else:
pages = 1