diff options
Diffstat (limited to 'module/plugins/internal')
25 files changed, 1253 insertions, 956 deletions
diff --git a/module/plugins/internal/Account.py b/module/plugins/internal/Account.py index a6d2ffbf1..469e27cfa 100644 --- a/module/plugins/internal/Account.py +++ b/module/plugins/internal/Account.py @@ -2,18 +2,17 @@ import random import re -import time import threading +import time -from module.plugins.Plugin import SkipDownload as Skip -from module.plugins.internal.Plugin import Plugin, parse_size -from module.utils import compare_time, lock +from module.plugins.internal.Plugin import Plugin, Skip +from module.plugins.internal.utils import compare_time, isiterable, lock, parse_size class Account(Plugin): __name__ = "Account" __type__ = "account" - __version__ = "0.62" + __version__ = "0.63" __status__ = "testing" __description__ = """Base account plugin""" @@ -40,7 +39,7 @@ class Account(Plugin): #: Callback of periodical job task self.cb = None - self.interval = self.PERIODICAL_INTERVAL + self.interval = None self.init() @@ -144,7 +143,7 @@ class Account(Plugin): self.signin(self.user, self.info['login']['password'], self.info['data']) except Skip, e: - self.log_debug(e) + self.log_warning(_("Skipped login user `%s`"), e) self.info['login']['valid'] = True new_timeout = timestamp - self.info['login']['timestamp'] @@ -190,14 +189,13 @@ class Account(Plugin): u.update(self.info['login']) else: - d = {'login': {'password' : u['password'], - 'timestamp': u['timestamp'], - 'valid' : u['valid']}, - 'data' : {'maxtraffic' : u['maxtraffic'], - 'options' : u['options'], - 'premium' : u['premium'], - 'trafficleft': u['trafficleft'], - 'validuntil' : u['validuntil']}} + d = {'login': {}, 'data': {}} + + for k, v in u.items(): + if k in ('password', 'timestamp', 'valid'): + d['login'][k] = v + else: + d['data'][k] = v self.info.update(d) @@ -209,13 +207,9 @@ class Account(Plugin): def reset(self): self.sync() - d = {'maxtraffic' : None, - 'options' : {'limitdl': ['0']}, - 'premium' : None, - 'trafficleft': None, - 'validuntil' : None} - - self.info['data'].update(d) + clear = lambda x: {} if isinstance(x, dict) else [] if isiterable(x) else None + self.info['data'] = dict((k, clear(v)) for k, v in self.info['data']) + self.info['data']['options'] = {'limitdl': ['0']} self.syncback() @@ -242,9 +236,7 @@ class Account(Plugin): self.syncback() - safe_info = dict(self.info) - safe_info['login']['password'] = "**********" - self.log_debug("Account info for user `%s`: %s" % (self.user, safe_info)) + self.log_debug("Account info for user `%s`: %s" % (self.user, self.info)) return self.info @@ -460,18 +452,9 @@ class Account(Plugin): ########################################################################### def parse_traffic(self, size, unit="byte"): #@NOTE: Returns kilobytes in 0.4.9 - unit = unit.lower().strip() #@TODO: Remove in 0.4.10 - size = re.search(r'(\d*[\.,]?\d+)', size).group(1) #@TODO: Recheck in 0.4.10 - - self.log_debug("Size: %s" % size, "Unit: %s" % unit) - - #@NOTE: Remove in 0.4.10 - if unit.startswith('t'): - traffic = float(size.replace(',', '.')) * 1 << 40 - else: - traffic = parse_size(size, unit) - - return traffic / 1024 #@TODO: Remove `/ 1024` in 0.4.10 + self.log_debug("Size: %s" % size, + "Unit: %s" % (unit if unit is not "byte" else "N/D")) + return parse_size(size, unit) / 1024 #@TODO: Remove `/ 1024` in 0.4.10 def fail_login(self, msg=_("Login handshake has failed")): diff --git a/module/plugins/internal/Addon.py b/module/plugins/internal/Addon.py index e41325a80..5f2e53bf0 100644 --- a/module/plugins/internal/Addon.py +++ b/module/plugins/internal/Addon.py @@ -13,7 +13,6 @@ class Expose(object): def threaded(fn): - def run(*args, **kwargs): hookManager.startThread(fn, *args, **kwargs) @@ -23,7 +22,7 @@ def threaded(fn): class Addon(Plugin): __name__ = "Addon" __type__ = "hook" #@TODO: Change to `addon` in 0.4.10 - __version__ = "0.13" + __version__ = "0.14" __status__ = "testing" __threaded__ = [] #@TODO: Remove in 0.4.10 @@ -53,12 +52,18 @@ class Addon(Plugin): #: Callback of periodical job task, used by HookManager self.cb = None - self.interval = self.PERIODICAL_INTERVAL + self.interval = None self.init() self.init_events() + #@TODO: Remove in 0.4.10 + def _log(self, level, plugintype, pluginname, messages): + plugintype = "addon" if plugintype is "hook" else plugintype + return super(Addon, self)._log(level, plugintype, pluginname, messages) + + def init_events(self): if self.event_map: for event, funcs in self.event_map.items(): @@ -126,14 +131,6 @@ class Addon(Plugin): raise NotImplementedError - def save_info(self): - self.store("info", self.info) - - - def restore_info(self): - self.retrieve("info", self.info) - - @property def activated(self): """ @@ -156,7 +153,7 @@ class Addon(Plugin): #: Deprecated method, use `deactivate` instead (Remove in 0.4.10) def unload(self, *args, **kwargs): - self.save_info() + self.store("info", self.info) return self.deactivate(*args, **kwargs) @@ -169,7 +166,11 @@ class Addon(Plugin): #: Deprecated method, use `activate` instead (Remove in 0.4.10) def coreReady(self, *args, **kwargs): - self.restore_info() + self.retrieve("info", self.info) + + if self.PERIODICAL_INTERVAL: + self.start_periodical(self.PERIODICAL_INTERVAL, delay=5) + return self.activate(*args, **kwargs) diff --git a/module/plugins/internal/Base.py b/module/plugins/internal/Base.py index 6f0a902f3..494d1a8ac 100644 --- a/module/plugins/internal/Base.py +++ b/module/plugins/internal/Base.py @@ -34,20 +34,10 @@ def create_getInfo(klass): return get_info -#@NOTE: `check_abort` decorator -def check_abort(fn): - - def wrapper(self, *args, **kwargs): - self.check_abort() - return fn(self, *args, **kwargs) - - return wrapper - - class Base(Plugin): __name__ = "Base" __type__ = "base" - __version__ = "0.11" + __version__ = "0.13" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -106,11 +96,11 @@ class Base(Plugin): def _log(self, level, plugintype, pluginname, messages): log = getattr(self.pyload.log, level) msg = u" | ".join(decode(a).strip() for a in messages if a) - log("%(plugintype)s %(pluginname)s[%(id)s]: %(msg)s" - % {'plugintype': plugintype.upper(), - 'pluginname': pluginname, - 'id' : self.pyfile.id, - 'msg' : msg}) + log("%(plugintype)s %(pluginname)s[%(id)s]: %(msg)s" % + {'plugintype': plugintype.upper(), + 'pluginname': pluginname, + 'id' : self.pyfile.id, + 'msg' : msg}) @classmethod @@ -176,6 +166,7 @@ class Base(Plugin): self.req = self.pyload.requestFactory.getRequest(self.classname) self.premium = False + self.grab_info() self.setup_base() self.setup() @@ -195,22 +186,101 @@ class Base(Plugin): self.account = False + def _update_name(self): + name = self.info.get('name') + + if name and name is not self.info.get('url'): + self.pyfile.name = name + else: + name = self.pyfile.name + + self.log_info(_("Link name: ") + name) + + + def _update_size(self): + size = self.info.get('size') + + if size > 0: + self.pyfile.size = int(self.info['size']) #@TODO: Fix int conversion in 0.4.10 + else: + size = self.pyfile.size + + if size: + self.log_info(_("Link size: %s bytes") % size) + else: + self.log_info(_("Link size: N/D")) + + + def _update_status(self): + self.pyfile.status = self.info.get('status', 14) + self.pyfile.sync() + + self.log_info(_("Link status: ") + self.pyfile.getStatusName()) + + + def sync_info(self): + self._update_name() + self._update_size() + self._update_status() + + + def grab_info(self): + self.log_info(_("Grabbing link info...")) + + old_info = dict(self.info) + new_info = self.get_info(self.pyfile.url, self.html) + + self.info.update(new_info) + + self.log_debug("Link info: %s" % self.info) + self.log_debug("Previous link info: %s" % old_info) + + self.sync_info() + + + def check_status(self): + status = self.pyfile.status + + if status is 1: + self.offline() + + elif status is 4: + self.skip(self.pyfile.statusname) + + elif status is 6: + self.temp_offline() + + elif status is 8: + self.fail() + + elif status is 9 or self.pyfile.abort: + self.abort() + + def _process(self, thread): """ Handles important things to do before starting """ - self.thread = thread + self.log_debug("Plugin version: " + self.__version__) + self.log_debug("Plugin status: " + self.__status__) + + if self.__status__ is "broken": + self.fail(_("Plugin is temporarily unavailable")) + elif self.__status__ is "testing": + self.log_warning(_("Plugin may be unstable")) + + self.thread = thread self._setup() # self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10 - self.check_abort() + self.check_status() self.pyfile.setStatus("starting") - self.log_debug("PROCESS URL " + self.pyfile.url, - "PLUGIN VERSION %s" % self.__version__) + self.log_info(_("Processing url: ") + self.pyfile.url) self.process(self.pyfile) + self.check_status() #: Deprecated method, use `_process` instead (Remove in 0.4.10) @@ -276,16 +346,16 @@ class Base(Plugin): if self.wantReconnect: self.log_info(_("Requiring reconnection...")) if self.account: - self.log_warning("Ignore reconnection due logged account") + self.log_warning("Reconnection ignored due logged account") if not self.wantReconnect or self.account: while pyfile.waitUntil > time.time(): - self.check_abort() + self.check_status() time.sleep(2) else: while pyfile.waitUntil > time.time(): - self.check_abort() + self.check_status() self.thread.m.reconnecting.wait(1) if self.thread.m.reconnecting.isSet(): @@ -309,7 +379,7 @@ class Base(Plugin): #@TODO: Remove in 0.4.10 - def fail(self, msg): + def fail(self, msg=""): """ Fail and give msg """ @@ -363,7 +433,7 @@ class Base(Plugin): if not premium: if self.premium: - self.rst_free = True + self.restart_free = True else: self.fail("%s | %s" % (msg, _("Url was already processed as free"))) @@ -405,10 +475,8 @@ class Base(Plugin): def fixurl(self, url, baseurl=None, unquote=True): - #url = fixurl(url, unquote=False) - - if not baseurl: - baseurl = fixurl(self.pyfile.url) + url = fixurl(url, unquote=True) + baseurl = fixurl(baseurl or self.pyfile.url, unquote=True) if not urlparse.urlparse(url).scheme: url_p = urlparse.urlparse(baseurl) @@ -418,114 +486,11 @@ class Base(Plugin): return fixurl(url, unquote) - @check_abort def load(self, *args, **kwargs): + self.check_status() return super(Base, self).load(*args, **kwargs) - def check_abort(self): - if not self.pyfile.abort: - return - - if self.pyfile.status is 8: - self.fail() - - elif self.pyfile.status is 4: - self.skip(self.pyfile.statusname) - - elif self.pyfile.status is 1: - self.offline() - - elif self.pyfile.status is 6: - self.temp_offline() - - else: - self.abort() - - - def direct_link(self, url, redirect=False): - link = "" - - if not redirect: - conn = 1 - - elif type(redirect) is int: - conn = max(redirect, 1) - - else: - conn = self.get_config("maxredirs", 5, plugin="UserAgentSwitcher") - - for i in xrange(conn): - try: - self.log_debug("Redirect #%d to: %s" % (i, url)) - header = self.load(url, just_header=True) - - except Exception: #: Bad bad bad... rewrite this part in 0.4.10 - res = self.load(url, - just_header=True, - req=self.pyload.requestFactory.getRequest(self.classname)) - - header = {'code': req.code} - for line in res.splitlines(): - line = line.strip() - if not line or ":" not in line: - continue - - key, none, value = line.partition(":") - key = key.lower().strip() - value = value.strip() - - if key in header: - header_key = header.get(key) - if type(header_key) is list: - header_key.append(value) - else: - header[key] = [header_key, value] - else: - header[key] = value - - if 'content-disposition' in header: - link = url - - elif header.get('location'): - location = self.fixurl(header.get('location'), url) - - if header.get('code') == 302: - link = location - - if redirect: - url = location - continue - - else: - extension = os.path.splitext(parse_name(url))[-1] - - if header.get('content-type'): - mimetype = header.get('content-type').split(';')[0].strip() - - elif extension: - mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" - - else: - mimetype = "" - - if mimetype and (link or 'html' not in mimetype): - link = url - else: - link = "" - - break - - else: - try: - self.log_error(_("Too many redirects")) - - except Exception: - pass - - return link - - def parse_html_form(self, attr_str="", input_names={}): return parse_html_form(attr_str, self.html, input_names) diff --git a/module/plugins/internal/Captcha.py b/module/plugins/internal/Captcha.py index a8f48b5e4..5993f0771 100644 --- a/module/plugins/internal/Captcha.py +++ b/module/plugins/internal/Captcha.py @@ -6,12 +6,13 @@ import os import time from module.plugins.internal.Plugin import Plugin +from module.plugins.internal.utils import encode class Captcha(Plugin): __name__ = "Captcha" __type__ = "captcha" - __version__ = "0.46" + __version__ = "0.47" __status__ = "testing" __description__ = """Base anti-captcha plugin""" @@ -36,10 +37,8 @@ class Captcha(Plugin): def _log(self, level, plugintype, pluginname, messages): - return self.plugin._log(level, - plugintype, - self.plugin.__name__, - (self.__name__,) + messages) + messages = (self.__name__,) + messages + return self.plugin._log(level, plugintype, self.plugin.__name__, messages) def recognize(self, image): @@ -75,7 +74,7 @@ class Captcha(Plugin): time_ref = ("%.2f" % time.time())[-6:].replace(".", "") with open(os.path.join("tmp", "captcha_image_%s_%s.%s" % (self.plugin.__name__, time_ref, input_type)), "wb") as tmp_img: - tmp_img.write(data) + tmp_img.write(encode(data)) if ocr: if isinstance(ocr, basestring): @@ -94,7 +93,7 @@ class Captcha(Plugin): self.task.setWaiting(max(timeout, 50)) #@TODO: Move to `CaptchaManager` in 0.4.10 while self.task.isWaiting(): - self.plugin.check_abort() + self.plugin.check_status() time.sleep(1) finally: @@ -124,7 +123,7 @@ class Captcha(Plugin): if not self.task: return - self.log_error(_("Invalid captcha")) + self.log_warning(_("Invalid captcha")) self.task.invalid() diff --git a/module/plugins/internal/CaptchaService.py b/module/plugins/internal/CaptchaService.py index 20dc60427..96195a6f2 100644 --- a/module/plugins/internal/CaptchaService.py +++ b/module/plugins/internal/CaptchaService.py @@ -6,7 +6,7 @@ from module.plugins.internal.Captcha import Captcha class CaptchaService(Captcha): __name__ = "CaptchaService" __type__ = "captcha" - __version__ = "0.32" + __version__ = "0.33" __status__ = "testing" __description__ = """Base anti-captcha service plugin""" diff --git a/module/plugins/internal/Container.py b/module/plugins/internal/Container.py index 96c7a450c..946953db5 100644 --- a/module/plugins/internal/Container.py +++ b/module/plugins/internal/Container.py @@ -6,14 +6,13 @@ import os import re from module.plugins.internal.Crypter import Crypter -from module.plugins.internal.Plugin import exists -from module.utils import save_join as fs_join +from module.plugins.internal.utils import encode, exists, fs_join class Container(Crypter): __name__ = "Container" __type__ = "container" - __version__ = "0.07" + __version__ = "0.08" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -55,7 +54,7 @@ class Container(Crypter): self.pyfile.url = fs_join(self.pyload.config.get("general", "download_folder"), self.pyfile.name) try: with open(self.pyfile.url, "wb") as f: - f.write(content) + f.write(encode(content)) except IOError, e: self.fail(e) diff --git a/module/plugins/internal/Crypter.py b/module/plugins/internal/Crypter.py index 77b5d74e9..3e5b1f59d 100644 --- a/module/plugins/internal/Crypter.py +++ b/module/plugins/internal/Crypter.py @@ -1,14 +1,13 @@ # -*- coding: utf-8 -*- -from module.plugins.internal.Base import Base, check_abort, create_getInfo, parse_fileInfo -from module.plugins.internal.Plugin import parse_name -from module.utils import save_path as safe_filename +from module.plugins.internal.Base import Base, create_getInfo, parse_fileInfo +from module.plugins.internal.utils import fixname, parse_name class Crypter(Base): __name__ = "Crypter" __type__ = "crypter" - __version__ = "0.13" + __version__ = "0.14" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -88,7 +87,7 @@ class Crypter(Base): self.pyload.api.setPackageData(pid, {'password': package_password}) #: Workaround to do not break API addPackage method - set_folder = lambda x="": self.pyload.api.setPackageData(pid, {'folder': safe_filename(x)}) + set_folder = lambda x="": self.pyload.api.setPackageData(pid, {'folder': fixname(x)}) if use_subfolder: if not subfolder_per_package: diff --git a/module/plugins/internal/DeadCrypter.py b/module/plugins/internal/DeadCrypter.py index 28ae6ffdb..5618667ba 100644 --- a/module/plugins/internal/DeadCrypter.py +++ b/module/plugins/internal/DeadCrypter.py @@ -6,8 +6,8 @@ from module.plugins.internal.Crypter import Crypter, create_getInfo class DeadCrypter(Crypter): __name__ = "DeadCrypter" __type__ = "crypter" - __version__ = "0.09" - __status__ = "testing" + __version__ = "0.10" + __status__ = "stable" __pattern__ = r'^unmatchable$' __config__ = [("activated", "bool", "Activated", True)] diff --git a/module/plugins/internal/DeadHoster.py b/module/plugins/internal/DeadHoster.py index b7c90ffee..329f2fdea 100644 --- a/module/plugins/internal/DeadHoster.py +++ b/module/plugins/internal/DeadHoster.py @@ -6,8 +6,8 @@ from module.plugins.internal.Hoster import Hoster, create_getInfo class DeadHoster(Hoster): __name__ = "DeadHoster" __type__ = "hoster" - __version__ = "0.19" - __status__ = "testing" + __version__ = "0.20" + __status__ = "stable" __pattern__ = r'^unmatchable$' __config__ = [("activated", "bool", "Activated", True)] diff --git a/module/plugins/internal/Extractor.py b/module/plugins/internal/Extractor.py index 3ab5d6a0d..6629b0652 100644 --- a/module/plugins/internal/Extractor.py +++ b/module/plugins/internal/Extractor.py @@ -5,7 +5,19 @@ import re from module.PyFile import PyFile from module.plugins.internal.Plugin import Plugin -from module.utils import fs_encode + + +def renice(pid, value): + if not value or os.name is "nt": + return + + try: + subprocess.Popen(["renice", str(value), str(pid)], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + bufsize=-1) + except Exception: + pass class ArchiveError(Exception): @@ -23,7 +35,7 @@ class PasswordError(Exception): class Extractor(Plugin): __name__ = "Extractor" __type__ = "extractor" - __version__ = "0.35" + __version__ = "0.36" __status__ = "testing" __description__ = """Base extractor plugin""" @@ -73,15 +85,14 @@ class Extractor(Plugin): @property def target(self): - return fs_encode(self.filename) + return encode(self.filename) def __init__(self, plugin, filename, out, fullpath=True, overwrite=False, excludefiles=[], - renice=0, - delete='No', + renice=False, keepbroken=False, fid=None): """ @@ -95,8 +106,7 @@ class Extractor(Plugin): self.fullpath = fullpath self.overwrite = overwrite self.excludefiles = excludefiles - self.renice = renice - self.delete = delete + self.priority = int(priority) self.keepbroken = keepbroken self.files = [] #: Store extracted files here @@ -114,10 +124,8 @@ class Extractor(Plugin): def _log(self, level, plugintype, pluginname, messages): - return self.plugin._log(level, - plugintype, - self.plugin.__name__, - (self.__name__,) + messages) + messages = (self.__name__,) + messages + return self.plugin._log(level, plugintype, self.plugin.__name__, messages) def verify(self, password=None): diff --git a/module/plugins/internal/Hoster.py b/module/plugins/internal/Hoster.py index b233f8755..f042fb257 100644 --- a/module/plugins/internal/Hoster.py +++ b/module/plugins/internal/Hoster.py @@ -5,22 +5,23 @@ from __future__ import with_statement import os import re -from module.plugins.internal.Base import Base, check_abort, create_getInfo, parse_fileInfo -from module.plugins.internal.Plugin import Fail, Retry, encode, exists, fixurl, parse_name -from module.utils import fs_decode, fs_encode, save_join as fs_join, save_path as safe_filename +from module.network.HTTPRequest import BadHeader +from module.plugins.internal.Base import Base, create_getInfo, parse_fileInfo +from module.plugins.internal.Plugin import Fail, Retry +from module.plugins.internal.utils import encode, exists, fixurl, fs_join, parse_name class Hoster(Base): __name__ = "Hoster" __type__ = "hoster" - __version__ = "0.37" + __version__ = "0.38" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("activated" , "bool", "Activated" , True), - ("use_premium" , "bool", "Use premium account if available" , True), - ("fallback_premium", "bool", "Fallback to free download if premium fails", True), - ("chk_filesize" , "bool", "Check file size" , True)] + __config__ = [("activated" , "bool", "Activated" , True), + ("use_premium" , "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True), + ("chk_filesize", "bool", "Check file size" , True)] __description__ = """Base hoster plugin""" __license__ = "GPLv3" @@ -44,13 +45,13 @@ class Hoster(Base): self.last_check = None #: Restart flag - self.rst_free = False #@TODO: Recheck in 0.4.10 + self.restart_free = False #@TODO: Recheck in 0.4.10 def setup_base(self): self.last_download = None self.last_check = None - self.rst_free = False + self.restart_free = False if self.account: self.chunk_limit = -1 #: -1 for unlimited @@ -61,39 +62,39 @@ class Hoster(Base): def load_account(self): - if self.rst_free: + if self.restart_free: self.account = False self.user = None #@TODO: Remove in 0.4.10 else: super(Hoster, self).load_account() - # self.rst_free = False + # self.restart_free = False def _process(self, thread): - """ - Handles important things to do before starting - """ - self.thread = thread + self.log_debug("Plugin version: " + self.__version__) + self.log_debug("Plugin status: " + self.__status__) + if self.__status__ is "broken": + self.fail(_("Plugin is temporarily unavailable")) + + elif self.__status__ is "testing": + self.log_warning(_("Plugin may be unstable")) + + self.thread = thread self._setup() # self.pyload.hookManager.downloadPreparing(self.pyfile) #@TODO: Recheck in 0.4.10 - self.check_abort() + self.check_status() self.pyfile.setStatus("starting") try: - self.log_debug("PROCESS URL " + self.pyfile.url, - "PLUGIN VERSION %s" % self.__version__) #@TODO: Remove in 0.4.10 self.process(self.pyfile) - - self.check_abort() - - self.log_debug("CHECK DOWNLOAD") #@TODO: Recheck in 0.4.10 + self.check_status() self._check_download() except Fail, e: #@TODO: Move to PluginThread in 0.4.10 - if self.get_config('fallback_premium', True) and self.premium: + if self.get_config('fallback', True) and self.premium: self.log_warning(_("Premium download failed"), e) self.restart(premium=False) @@ -101,7 +102,62 @@ class Hoster(Base): raise Fail(e) - @check_abort + def is_download(self, url, resume=None, redirect=True): + link = False + maxredirs = 10 + + if resume is None: + resume = self.resume_download + + if type(redirect) is int: + maxredirs = max(redirect, 1) + + elif redirect: + maxredirs = self.get_config("maxredirs", default=maxredirs, plugin="UserAgentSwitcher") + + for i in xrange(maxredirs): + self.log_debug("Redirect #%d to: %s" % (i, url)) + + header = self.load(url, just_header=True) + + if 'content-disposition' in header: + link = url + + elif header.get('location'): + location = self.fixurl(header.get('location'), url) + code = header.get('code') + + if code is 302: + link = location + + elif code is 301: + url = location + if redirect: + continue + + if resume: + url = location + continue + + else: + mimetype = "" + contenttype = header.get('content-type') + extension = os.path.splitext(parse_name(url))[-1] + + if contenttype: + mimetype = contenttype.split(';')[0].strip() + + elif extension: + mimetype = mimetypes.guess_type(extension, False)[0] or "application/octet-stream" + + if mimetype and (link or 'html' not in mimetype): + link = url + else: + link = False + + return link + + def download(self, url, get={}, post={}, ref=True, cookies=True, disposition=True, resume=None, chunks=None): """ Downloads the content at url to download folder @@ -115,13 +171,19 @@ class Hoster(Base): the filename will be changed if needed :return: The location where the file was saved """ + self.check_status() + if self.pyload.debug: self.log_debug("DOWNLOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url", "_[1]")]) - url = self.fixurl(url) + dl_url = self.is_download(url, resume) + dl_basename = parse_name(self.pyfile.name) + + self.pyfile.name = dl_basename - self.pyfile.name = parse_name(self.pyfile.name) #: Safe check + if not dl_url: + self.error("Invalid download url") self.captcha.correct() @@ -130,63 +192,75 @@ class Hoster(Base): self.pyfile.setStatus("downloading") - download_folder = self.pyload.config.get("general", "download_folder") - download_location = fs_join(download_folder, self.pyfile.package().folder) + dl_folder = self.pyload.config.get("general", "download_folder") + dl_dirname = os.path.join(dl_folder, self.pyfile.package().folder) + dl_filename = os.path.join(dl_dirname, dl_basename) - if not exists(download_location): + dl_dir = encode(dl_dirname) + dl_file = encode(dl_filename) #@TODO: Move safe-filename check to HTTPDownload in 0.4.10 + + if not exists(dl_dir): try: - os.makedirs(download_location) + os.makedirs(dl_dir) except Exception, e: self.fail(e) - self.set_permissions(download_location) - - location = fs_decode(download_location) - filename = os.path.join(location, safe_filename(self.pyfile.name)) #@TODO: Move `safe_filename` check to HTTPDownload in 0.4.10 + self.set_permissions(dl_dir) - self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, url, filename) + self.pyload.hookManager.dispatchEvent("download_start", self.pyfile, dl_url, dl_filename) + self.check_status() - self.check_abort() + dl_chunks = self.pyload.config.get("download", "chunks") + chunk_limit = chunks or self.chunk_limit or -1 - chunks = min(self.pyload.config.get("download", "chunks"), chunks or self.chunk_limit or -1) + if dl_chunks is -1 or chunk_limit is -1: + chunks = max(dl_chunks, chunk_limit) + else: + chunks = min(dl_chunks, chunk_limit) - if resume is None: - resume = self.resume_download + resume = self.resume_download if resume is None else bool(resume) try: - newname = self.req.httpDownload(url, filename, get=get, post=post, ref=ref, + newname = self.req.httpDownload(dl_url, dl_file, get=get, post=post, ref=ref, cookies=cookies, chunks=chunks, resume=resume, progressNotify=self.pyfile.setProgress, disposition=disposition) + except BadHeader, e: + if e.code in (404, 410): + self.pyfile.setStatus("offline") + raise BadHeader(e) + finally: self.pyfile.size = self.req.size #@TODO: Recheck in 0.4.10 if disposition and newname: - finalname = parse_name(newname).split(' filename*=')[0] + safename = parse_name(newname.split(' filename*=')[0]) - if finalname != newname: + if safename != newname: try: - oldname_enc = fs_join(download_location, newname) - newname_enc = fs_join(download_location, finalname) - os.rename(oldname_enc, newname_enc) + old_file = fs_join(dl_dirname, newname) + new_file = fs_join(dl_dirname, safename) + os.rename(old_file, new_file) except OSError, e: self.log_warning(_("Error renaming `%s` to `%s`") - % (newname, finalname), e) - finalname = newname + % (newname, safename), e) + safename = newname + + self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, safename)) - self.log_info(_("`%s` saved as `%s`") % (self.pyfile.name, finalname)) + self.pyfile.name = safename - self.pyfile.name = finalname - filename = os.path.join(location, finalname) + dl_filename = os.path.join(dl_dirname, safename) + dl_file = encode(dl_filename) - self.set_permissions(fs_encode(filename)) + self.set_permissions(dl_file) - self.last_download = filename + self.last_download = dl_filename - return filename + return dl_filename def check_filesize(self, file_size, size_tolerance=1024): @@ -199,18 +273,18 @@ class Hoster(Base): if not self.last_download: return - download_location = fs_encode(self.last_download) - download_size = os.stat(download_location).st_size + dl_location = encode(self.last_download) + dl_size = os.stat(dl_location).st_size - if download_size < 1: + if dl_size < 1: self.fail(_("Empty file")) elif file_size > 0: - diff = abs(file_size - download_size) + diff = abs(file_size - dl_size) if diff > size_tolerance: self.fail(_("File size mismatch | Expected file size: %s | Downloaded file size: %s") - % (file_size, download_size)) + % (file_size, dl_size)) elif diff != 0: self.log_warning(_("File size is not equal to expected size")) @@ -228,7 +302,7 @@ class Hoster(Base): :return: dictionary key of the first rule that matched """ do_delete = False - last_download = fs_encode(self.last_download) #@TODO: Recheck in 0.4.10 + last_download = encode(self.last_download) #@TODO: Recheck in 0.4.10 if not self.last_download or not exists(last_download): self.fail(self.pyfile.error or _("No file downloaded")) @@ -267,6 +341,8 @@ class Hoster(Base): def _check_download(self): + self.log_info(_("Checking downloaded file...")) + if self.captcha.task and not self.last_download: self.retry_captcha() @@ -279,6 +355,9 @@ class Hoster(Base): # For example displayed size can be 1.46GB for example, but real size can be 1.4649853GB self.check_filesize(self.info['size'], size_tolerance=10485760) + else: + self.log_info(_("File is OK")) + def check_traffic(self): if not self.account: @@ -319,18 +398,18 @@ class Hoster(Base): if pyfile.status in (0, 5, 7, 12): #: (finished, waiting, starting, downloading) self.skip(pyfile.pluginname) - download_folder = self.pyload.config.get("general", "download_folder") - package_folder = pack.folder if self.pyload.config.get("general", "folder_per_package") else "" - download_location = fs_join(download_folder, package_folder, self.pyfile.name) + dl_folder = self.pyload.config.get("general", "download_folder") + package_folder = pack.folder if self.pyload.config.get("general", "folder_per_package") else "" + dl_location = fs_join(dl_folder, package_folder, self.pyfile.name) - if not exists(download_location): + if not exists(dl_location): return pyfile = self.pyload.db.findDuplicates(self.pyfile.id, package_folder, self.pyfile.name) if pyfile: self.skip(pyfile[0]) - size = os.stat(download_location).st_size + size = os.stat(dl_location).st_size if size >= self.pyfile.size: self.skip(_("File exists")) diff --git a/module/plugins/internal/MultiAccount.py b/module/plugins/internal/MultiAccount.py index b38670ce7..f9252cc10 100644 --- a/module/plugins/internal/MultiAccount.py +++ b/module/plugins/internal/MultiAccount.py @@ -4,27 +4,28 @@ import re import time from module.plugins.internal.Account import Account -from module.utils import decode, remove_chars +from module.plugins.internal.utils import decode, remove_chars, uniqify class MultiAccount(Account): __name__ = "MultiAccount" __type__ = "account" - __version__ = "0.02" - __status__ = "testing" + __version__ = "0.04" + __status__ = "broken" - __config__ = [("pluginmode" , "all;listed;unlisted", "Use for plugins" , "all"), - ("pluginlist" , "str" , "Plugin list (comma separated)", "" ), - ("reload" , "bool" , "Reload plugin list" , True ), - ("reloadinterval", "int" , "Reload interval in hours" , 12 )] + __config__ = [("activated" , "bool" , "Activated" , True ), + ("multi" , "bool" , "Multi-hoster" , True ), + ("multi_mode" , "all;listed;unlisted", "Hosters to use" , "all"), + ("multi_list" , "str" , "Hoster list (comma separated)", "" ), + ("multi_interval", "int" , "Reload interval in hours" , 12 )] - __description__ = """Multi hoster account plugin""" + __description__ = """Multi-hoster account plugin""" __license__ = "GPLv3" - __authors__ = [("pyLoad Team" , "admin@pyload.org" ), - ("Walter Purcaro", "vuolter@gmail.com")] + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] - REFRESH_INTERVAL = 1 * 60 * 60 #: 1 hour + # PERIODICAL_INTERVAL = 1 * 60 * 60 #: 1 hour + PERIODICAL_LOGIN = False DOMAIN_REPLACEMENTS = [(r'180upload\.com' , "hundredeightyupload.com"), (r'bayfiles\.net' , "bayfiles.com" ), @@ -54,153 +55,96 @@ class MultiAccount(Account): (r'^0' , "zero" )] - - - - - - - - - - - - - - - - - - - - - - - - - - - - def init(self): - self.plugins = [] - self.supported = [] - self.new_supported = [] + self.plugins = [] + self.supported = [] + self.newsupported = [] - self.account = None self.pluginclass = None self.pluginmodule = None - self.pluginname = None self.plugintype = None self.init_plugin() def init_plugin(self): - self.pluginname = self.__name__.rsplit("Hook", 1)[0] - plugin, self.plugintype = self.pyload.pluginManager.findPlugin(self.pluginname) + plugin, self.plugintype = self.pyload.pluginManager.findPlugin(self.classname) if plugin: - self.pluginmodule = self.pyload.pluginManager.loadModule(self.plugintype, self.pluginname) - self.pluginclass = getattr(self.pluginmodule, self.pluginname) + self.pluginmodule = self.pyload.pluginManager.loadModule(self.plugintype, self.classname) + self.pluginclass = self.pyload.pluginManager.loadClass(self.plugintype, self.classname) else: - self.log_warning(_("Hook plugin will be deactivated due missing plugin reference")) - self.set_config('activated', False) + self.log_warning(_("Multi-hoster feature will be deactivated due missing plugin reference")) + self.set_config('multi', False) - def load_account(self): - self.account = self.pyload.accountManager.getAccountPlugin(self.pluginname) + def activate(self): + interval = self.get_config('multi_interval') * 60 * 60 + self.start_periodical(interval, threaded=True) - if self.account and not self.account.select()[0]: - self.account = False - if not self.account and hasattr(self.pluginclass, "LOGIN_ACCOUNT") and self.pluginclass.LOGIN_ACCOUNT: - self.log_warning(_("Hook plugin will be deactivated due missing account reference")) - self.set_config('activated', False) + def replace_domains(self, list): + for r in self.DOMAIN_REPLACEMENTS: + pattern, repl = r + regex = re.compile(pattern, re.I | re.U) + domains = [regex.sub(repl, domain) if regex.match(domain) else domain for domain in list] + return domains - def activate(self): - self.init_periodical(threaded=True) + def parse_domains(self, list): + regexp = re.compile(r'^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:[\d.]+|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)', + re.I | re.U) - def plugins_cached(self): - if self.plugins: - return self.plugins + r'^(?:https?://)?(?:www\.)?(?:\w+\.)*((?:[\d.]+|[\w\-^_]{3,63}(?:\.[a-zA-Z]{2,}){1,2})(?:\:\d+)?)' - for _i in xrange(5): - try: - pluginset = self._plugin_set(self.grab_hosters()) - break + domains = [decode(domain).strip().lower() for url in list for domain in regexp.findall(url)] + return self.replace_domains(uniqify(domains)) - except Exception, e: - self.log_warning(e, _("Waiting 1 minute and retry")) - time.sleep(60) - else: - self.log_error(_("No hoster list retrieved")) - self.interval = self.REFRESH_INTERVAL - return list() + def _grab_hosters(self): try: - configmode = self.get_config('pluginmode', 'all') - if configmode in ("listed", "unlisted"): - pluginlist = self.get_config('pluginlist', '').replace('|', ',').replace(';', ',').split(',') - configset = self._plugin_set(pluginlist) + hosterlist = self.grab_hosters(self.user, self.info['login']['password'], self.info['data']) - if configmode == "listed": - pluginset &= configset - else: - pluginset -= configset + if hosterlist and isinstance(hosterlist, list): + domains = self.parse_domains(hosterlist) + self.info['data']['hosters'] = sorted(domains) except Exception, e: - self.log_error(e) + self.log_warning(_("Error loading hoster list for user `%s`") % self.user, e, trace=True) - self.plugins = list(pluginset) - - return self.plugins - - - def _plugin_set(self, plugins): - regexp = re.compile(r'^[\w\-.^_]{3,63}\.[a-zA-Z]{2,}$', re.U) - plugins = [decode(p.strip()).lower() for p in plugins if regexp.match(p.strip())] - - for r in self.DOMAIN_REPLACEMENTS: - rf, rt = r - repr = re.compile(rf, re.I|re.U) - plugins = [re.sub(rf, rt, p) if repr.match(p) else p for p in plugins] - - return set(plugins) + finally: + return self.info['data']['hosters'] def grab_hosters(self, user, password, data): """ Load list of supported hoster - :return: List of domain names """ raise NotImplementedError def periodical(self): - """ - Reload plugin list periodically - """ - self.load_account() - - if self.get_config('reload', True): - self.interval = max(self.get_config('reloadinterval', 12) * 60 * 60, self.REFRESH_INTERVAL) + if not self.info['data'].get('hosters'): + self.log_info(_("Loading hoster list for user `%s`...") % self.user) else: - self.pyload.scheduler.removeJob(self.cb) - self.cb = None + self.log_info(_("Reloading hoster list for user `%s`...") % self.user) + + if self.PERIODICAL_LOGIN and not self.logged: + self.relogin() - self.log_info(_("Reloading supported %s list") % self.plugintype) + hosters = self._grab_hosters() + + self.log_debug("Hoster list for user `%s`: %s" % (self.user, hosters)) old_supported = self.supported - self.supported = [] - self.new_supported = [] - self.plugins = [] + self.supported = [] + self.newsupported = [] + self.plugins = [] - self.override_plugins() + self._override() old_supported = [plugin for plugin in old_supported if plugin not in self.supported] @@ -209,8 +153,10 @@ class MultiAccount(Account): for plugin in old_supported: self.unload_plugin(plugin) + self.set_interval(self.get_config('multi_interval') * 60 * 60) + - def override_plugins(self): + def _override(self): excludedList = [] if self.plugintype == "hoster": @@ -229,9 +175,9 @@ class MultiAccount(Account): if name in pluginMap: self.supported.append(pluginMap[name]) else: - self.new_supported.append(plugin) + self.newsupported.append(plugin) - if not self.supported and not self.new_supported: + if not self.supported and not self.newsupported: self.log_error(_("No %s loaded") % self.plugintype) return @@ -241,13 +187,13 @@ class MultiAccount(Account): for plugin in self.supported: hdict = self.pyload.pluginManager.plugins[self.plugintype][plugin] hdict['new_module'] = self.pluginmodule - hdict['new_name'] = self.pluginname + hdict['new_name'] = self.classname if excludedList: self.log_info(_("%ss not overwritten: %s") % (self.plugintype.capitalize(), ", ".join(sorted(excludedList)))) - if self.new_supported: - plugins = sorted(self.new_supported) + if self.newsupported: + plugins = sorted(self.newsupported) self.log_debug("New %ss: %s" % (self.plugintype, ", ".join(plugins))) @@ -258,30 +204,66 @@ class MultiAccount(Account): self.log_debug("Regexp: %s" % regexp) - hdict = self.pyload.pluginManager.plugins[self.plugintype][self.pluginname] + hdict = self.pyload.pluginManager.plugins[self.plugintype][self.classname] hdict['pattern'] = regexp hdict['re'] = re.compile(regexp) - def unload_plugin(self, plugin): - hdict = self.pyload.pluginManager.plugins[self.plugintype][plugin] - if "module" in hdict: - hdict.pop('module', None) + def plugins_cached(self): + if self.plugins: + return self.plugins - if "new_module" in hdict: - hdict.pop('new_module', None) - hdict.pop('new_name', None) + for _i in xrange(5): + try: + pluginset = self._plugin_set(self.grab_hosters()) + break + except Exception, e: + self.log_warning(e, _("Waiting 1 minute and retry"), trace=True) + time.sleep(60) + else: + self.log_warning(_("No hoster list retrieved")) + self.interval = self.PERIODICAL_INTERVAL + return list() - def deactivate(self): - """ - Remove override for all plugins. Scheduler job is removed by hookmanager - """ - for plugin in self.supported: - self.unload_plugin(plugin) + try: + configmode = self.get_config('pluginmode', 'all') + if configmode in ("listed", "unlisted"): + pluginlist = self.get_config('pluginlist', '').replace('|', ',').replace(';', ',').split(',') + configset = self._plugin_set(pluginlist) + + if configmode == "listed": + pluginset &= configset + else: + pluginset -= configset + + except Exception, e: + self.log_error(e) + + self.plugins = list(pluginset) + + return self.plugins + + + # def unload_plugin(self, plugin): + # hdict = self.pyload.pluginManager.plugins[self.plugintype][plugin] + # if "module" in hdict: + # hdict.pop('module', None) + + # if "new_module" in hdict: + # hdict.pop('new_module', None) + # hdict.pop('new_name', None) + + + # def deactivate(self): + # """ + # Remove override for all plugins. Scheduler job is removed by hookmanager + # """ + # for plugin in self.supported: + # self.unload_plugin(plugin) #: Reset pattern - hdict = self.pyload.pluginManager.plugins[self.plugintype][self.pluginname] + # hdict = self.pyload.pluginManager.plugins[self.plugintype][self.classname] - hdict['pattern'] = getattr(self.pluginclass, "__pattern__", r'^unmatchable$') - hdict['re'] = re.compile(hdict['pattern']) + # hdict['pattern'] = getattr(self.pluginclass, "__pattern__", r'^unmatchable$') + # hdict['re'] = re.compile(hdict['pattern']) diff --git a/module/plugins/internal/MultiCrypter.py b/module/plugins/internal/MultiCrypter.py index 900f72589..916ac9cdd 100644 --- a/module/plugins/internal/MultiCrypter.py +++ b/module/plugins/internal/MultiCrypter.py @@ -6,7 +6,7 @@ from module.plugins.internal.SimpleCrypter import SimpleCrypter class MultiCrypter(SimpleCrypter): __name__ = "MultiCrypter" __type__ = "hoster" - __version__ = "0.04" + __version__ = "0.05" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -25,7 +25,5 @@ class MultiCrypter(SimpleCrypter): def _log(self, level, plugintype, pluginname, messages): - return super(MultiCrypter, self)._log(level, - plugintype, - pluginname, - (self.PLUGIN_NAME,) + messages) + messages = (self.PLUGIN_NAME,) + messages + return self.plugin._log(level, plugintype, pluginname, messages) diff --git a/module/plugins/internal/MultiHoster.py b/module/plugins/internal/MultiHoster.py index 5655571b8..8baa577f9 100644 --- a/module/plugins/internal/MultiHoster.py +++ b/module/plugins/internal/MultiHoster.py @@ -9,15 +9,15 @@ from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, r class MultiHoster(SimpleHoster): __name__ = "MultiHoster" __type__ = "hoster" - __version__ = "0.53" + __version__ = "0.54" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("activated" , "bool", "Activated" , True), - ("use_premium" , "bool", "Use premium account if available" , True), - ("fallback_premium", "bool", "Fallback to free download if premium fails", True), - ("chk_filesize" , "bool", "Check file size" , True), - ("revertfailed" , "bool", "Revert to standard download if fails" , True)] + __config__ = [("activated" , "bool", "Activated" , True), + ("use_premium" , "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True), + ("chk_filesize", "bool", "Check file size" , True), + ("revertfailed", "bool", "Revert to standard download if fails" , True)] __description__ = """Multi hoster plugin""" __license__ = "GPLv3" @@ -35,10 +35,8 @@ class MultiHoster(SimpleHoster): def _log(self, level, plugintype, pluginname, messages): - return super(MultiHoster, self)._log(level, - plugintype, - pluginname, - (self.PLUGIN_NAME,) + messages) + messages = (self.PLUGIN_NAME,) + messages + return self.plugin._log(level, plugintype, pluginname, messages) def setup(self): @@ -66,47 +64,13 @@ class MultiHoster(SimpleHoster): self.direct_dl = direct_dl - def process(self, pyfile): + def _process(self, thread): try: - self.prepare() - self.check_info() #@TODO: Remove in 0.4.10 + super(MultiHoster, self)._process(thread) - if self.direct_dl: - self.log_info(_("Looking for direct download link...")) - self.handle_direct(pyfile) - - if self.link or was_downloaded(): - self.log_info(_("Direct download link detected")) - else: - self.log_info(_("Direct download link not found")) - - if not self.link and not self.last_download: - self.preload() - - self.check_errors() - self.check_status(getinfo=False) - - if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic()): - self.log_info(_("Processing as premium download...")) - self.handle_premium(pyfile) - - elif not self.LOGIN_ACCOUNT or (not self.CHECK_TRAFFIC or self.check_traffic()): - self.log_info(_("Processing as free download...")) - self.handle_free(pyfile) - - if not self.last_download: - self.log_info(_("Downloading file...")) - self.download(self.link, disposition=self.DISPOSITION) - - self.check_download() - - except Fail, e: #@TODO: Move to PluginThread in 0.4.10 - if self.premium: - self.log_warning(_("Premium download failed")) - self.restart(premium=False) - - elif self.get_config("revertfailed", True) and \ - self.pyload.pluginManager.hosterPlugins[self.classname].get('new_module'): + except Fail, e: + if self.get_config("revertfailed", True) and \ + self.pyload.pluginManager.hosterPlugins[self.classname].get('new_module'): hdict = self.pyload.pluginManager.hosterPlugins[self.classname] tmp_module = hdict['new_module'] @@ -122,7 +86,7 @@ class MultiHoster(SimpleHoster): self.restart(_("Revert to original hoster plugin")) else: - raise Fail(encode(e)) #@TODO: Remove `encode` in 0.4.10 + raise Fail(e) def handle_premium(self, pyfile): diff --git a/module/plugins/internal/OCR.py b/module/plugins/internal/OCR.py index ccadbbcbc..78ad383ca 100644 --- a/module/plugins/internal/OCR.py +++ b/module/plugins/internal/OCR.py @@ -14,13 +14,13 @@ import subprocess # import tempfile from module.plugins.internal.Plugin import Plugin -from module.utils import save_join as fs_join +from module.plugins.internal.utils import fs_join class OCR(Plugin): __name__ = "OCR" __type__ = "ocr" - __version__ = "0.20" + __version__ = "0.21" __status__ = "testing" __description__ = """OCR base plugin""" @@ -42,10 +42,8 @@ class OCR(Plugin): def _log(self, level, plugintype, pluginname, messages): - return self.plugin._log(level, - plugintype, - self.plugin.__name__, - (self.__name__,) + messages) + messages = (self.__name__,) + messages + return self.plugin._log(level, plugintype, self.plugin.__name__, messages) def load_image(self, image): @@ -88,7 +86,7 @@ class OCR(Plugin): tmpTxt.close() except IOError, e: - self.log_error(e, trace=True) + self.log_error(e) return self.pyload.log_debug("Saving tiff...") diff --git a/module/plugins/internal/Plugin.py b/module/plugins/internal/Plugin.py index c1b994d02..ccecb8c47 100644 --- a/module/plugins/internal/Plugin.py +++ b/module/plugins/internal/Plugin.py @@ -2,289 +2,25 @@ from __future__ import with_statement -import datetime import inspect import os -import re -import sys -import time -import traceback -import urllib -import urlparse - -import pycurl if os.name is not "nt": import grp import pwd -from module.common.json_layer import json_dumps, json_loads -from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 -from module.utils import (fs_encode, fs_decode, get_console_encoding, html_unescape, - parseFileSize as parse_size, save_join as fs_join) - - -#@TODO: Move to utils in 0.4.10 -def isiterable(obj): - return hasattr(obj, "__iter__") - - -#@TODO: Move to utils in 0.4.10 -def decode(string, encoding=None): - """Encoded string (default to UTF-8) -> unicode string""" - if type(string) is str: - try: - res = unicode(string, encoding or "utf-8") - - except UnicodeDecodeError, e: - if encoding: - raise UnicodeDecodeError(e) - - encoding = get_console_encoding(sys.stdout.encoding) - res = unicode(string, encoding) - - elif type(string) is unicode: - res = string - - else: - res = unicode(string) - - return res - - -#@TODO: Remove in 0.4.10 -def _decode(*args, **kwargs): - return decode(*args, **kwargs) - - -#@TODO: Move to utils in 0.4.10 -def encode(string, encoding=None, decoding=None): - """Unicode or decoded string -> encoded string (default to UTF-8)""" - if type(string) is unicode: - res = string.encode(encoding or "utf-8") - - elif type(string) is str: - res = encode(decode(string, decoding), encoding) - - else: - res = str(string) - - return res - - -#@TODO: Move to utils in 0.4.10 -def exists(path): - if os.path.exists(path): - if os.name is "nt": - dir, name = os.path.split(path.rstrip(os.sep)) - return name in os.listdir(dir) - else: - return True - else: - return False - - -def fixurl(url, unquote=None): - newurl = urllib.unquote(url) - - if unquote is None: - unquote = newurl == url - - newurl = html_unescape(decode(newurl).decode('unicode-escape')) - newurl = re.sub(r'(?<!:)/{2,}', '/', newurl).strip().lstrip('.') - - if not unquote: - newurl = urllib.quote(newurl) - - return newurl - - -def parse_name(string): - path = fixurl(decode(string), unquote=False) - url_p = urlparse.urlparse(path.rstrip('/')) - name = (url_p.path.split('/')[-1] or - url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or - url_p.netloc.split('.', 1)[0]) - - return urllib.unquote(name) - - -#@TODO: Move to utils in 0.4.10 -def str2int(string): - try: - return int(string) - except: - pass - - ones = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", - "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", - "sixteen", "seventeen", "eighteen", "nineteen"] - tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", - "eighty", "ninety"] - - o_tuple = [(w, i) for i, w in enumerate(ones)] - t_tuple = [(w, i * 10) for i, w in enumerate(tens)] - - numwords = dict(o_tuple + t_tuple) - tokens = re.split(r"[\s\-]+", string.lower()) - - try: - return sum(numwords[word] for word in tokens) - except: - return 0 - - -def parse_time(string): - if re.search("da(il)?y|today", string): - seconds = seconds_to_midnight() - - else: - regex = re.compile(r'(\d+| (?:this|an?) )\s*(hr|hour|min|sec|)', re.I) - seconds = sum((int(v) if v.strip() not in ("this", "a", "an") else 1) * - {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, '': 1}[u.lower()] - for v, u in regex.findall(string)) - return seconds - - -#@TODO: Move to utils in 0.4.10 -def timestamp(): - return int(time.time() * 1000) - - -#@TODO: Move to utils in 0.4.10 -def which(program): - """ - Works exactly like the unix command which - Courtesy of http://stackoverflow.com/a/377028/675646 - """ - isExe = lambda x: os.path.isfile(x) and os.access(x, os.X_OK) - - fpath, fname = os.path.split(program) - - if fpath: - if isExe(program): - return program - else: - for path in os.environ['PATH'].split(os.pathsep): - exe_file = os.path.join(path.strip('"'), program) - if isExe(exe_file): - return exe_file - - -#@TODO: Move to utils in 0.4.10 -def format_exc(frame=None): - """ - Format call-stack and display exception information (if availible) - """ - exception_info = sys.exc_info() - callstack_list = traceback.extract_stack(frame) - callstack_list = callstack_list[:-1] - - exception_desc = "" - if exception_info[0] is not None: - exception_callstack_list = traceback.extract_tb(exception_info[2]) - if callstack_list[-1][0] == exception_callstack_list[0][0]: #Does this exception belongs to us? - callstack_list = callstack_list[:-1] - callstack_list.extend(exception_callstack_list) - exception_desc = "".join(traceback.format_exception_only(exception_info[0], exception_info[1])) - - traceback_str = "Traceback (most recent call last):\n" - traceback_str += "".join(traceback.format_list(callstack_list)) - traceback_str += exception_desc - traceback_str = traceback_str[:-1] #Removing the last '\n' - return traceback_str - -def seconds_to_nexthour(strict=False): - now = datetime.datetime.today() - nexthour = now.replace(minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(hours=1) - return (nexthour - now).seconds - - -def seconds_to_midnight(utc=None, strict=False): - if utc is None: - now = datetime.datetime.today() - else: - now = datetime.datetime.utcnow() + datetime.timedelta(hours=utc) - - midnight = now.replace(hour=0, minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(days=1) - - return (midnight - now).seconds - - -def replace_patterns(string, ruleslist): - for r in ruleslist: - rf, rt = r - string = re.sub(rf, rt, string) - return string - - -#@TODO: Remove in 0.4.10 and fix CookieJar.setCookie -def set_cookie(cj, domain, name, value): - return cj.setCookie(domain, name, encode(value)) - - -def set_cookies(cj, cookies): - for cookie in cookies: - if isinstance(cookie, tuple) and len(cookie) == 3: - set_cookie(cj, *cookie) - - -def parse_html_tag_attr_value(attr_name, tag): - m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) - return m.group(2) if m else None - - -def parse_html_form(attr_str, html, input_names={}): - for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, - html, re.I | re.S): - inputs = {} - action = parse_html_tag_attr_value("action", form.group('TAG')) - - for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.I | re.S): - name = parse_html_tag_attr_value("name", inputtag.group(1)) - if name: - value = parse_html_tag_attr_value("value", inputtag.group(1)) - if not value: - inputs[name] = inputtag.group(3) or "" - else: - inputs[name] = value - - if not input_names: - #: No attribute check - return action, inputs - else: - #: Check input attributes - for key, val in input_names.items(): - if key in inputs: - if isinstance(val, basestring) and inputs[key] is val: - continue - elif isinstance(val, tuple) and inputs[key] in val: - continue - elif hasattr(val, "search") and re.match(val, inputs[key]): - continue - else: - break #: Attibute value does not match - else: - break #: Attibute name does not match - else: - return action, inputs #: Passed attribute check - - return {}, None #: No matching form found +import pycurl +import module.plugins.internal.utils as utils -#@TODO: Move to utils in 0.4.10 -def chunks(iterable, size): - it = iter(iterable) - item = list(islice(it, size)) - while item: - yield item - item = list(islice(it, size)) +from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 +from module.plugins.internal.utils import * class Plugin(object): __name__ = "Plugin" __type__ = "plugin" - __version__ = "0.58" + __version__ = "0.59" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -327,46 +63,45 @@ class Plugin(object): def _log(self, level, plugintype, pluginname, messages): log = getattr(self.pyload.log, level) msg = u" | ".join(decode(a).strip() for a in messages if a) - log("%(plugintype)s %(pluginname)s: %(msg)s" - % {'plugintype': plugintype.upper(), - 'pluginname': pluginname, - 'msg' : msg}) + log("%(plugintype)s %(pluginname)s: %(msg)s" % + {'plugintype': plugintype.upper(), + 'pluginname': pluginname, + 'msg' : msg}) def log_debug(self, *args, **kwargs): self._log("debug", self.__type__, self.__name__, args) - if self.pyload.debug and kwargs.get('trace', False): - self.log_exc("debug") + if self.pyload.debug and kwargs.get('trace'): + self.print_exc() def log_info(self, *args, **kwargs): self._log("info", self.__type__, self.__name__, args) - if kwargs.get('trace', False): - self.log_exc("info") + if self.pyload.debug and kwargs.get('trace'): + self.print_exc() def log_warning(self, *args, **kwargs): self._log("warning", self.__type__, self.__name__, args) - if kwargs.get('trace', False): - self.log_exc("warning") + if self.pyload.debug and kwargs.get('trace'): + self.print_exc() def log_error(self, *args, **kwargs): self._log("error", self.__type__, self.__name__, args) - if kwargs.get('trace', False): - self.log_exc("error") + if self.pyload.debug and kwargs.get('trace', True): + self.print_exc() def log_critical(self, *args, **kwargs): self._log("critical", self.__type__, self.__name__, args) if kwargs.get('trace', True): - self.log_exc("critical") + self.print_exc() - def log_exc(self, level): + def print_exc(self): frame = inspect.currentframe() - log = getattr(self.pyload.log, level) - log(format_exc(frame.f_back)) + print format_exc(frame.f_back) del frame @@ -426,7 +161,7 @@ class Plugin(object): Saves a value persistently to the database """ value = map(decode, value) if isiterable(value) else decode(value) - entry = json_dumps(value).encode('base64') + entry = json.dumps(value).encode('base64') self.pyload.db.setStorage(self.classname, key, entry) @@ -440,12 +175,12 @@ class Plugin(object): if entry is None: value = default else: - value = json_loads(entry.decode('base64')) + value = json.loads(entry.decode('base64')) else: if not entry: value = default else: - value = dict((k, json_loads(v.decode('base64'))) for k, v in value.items()) + value = dict((k, json.loads(v.decode('base64'))) for k, v in value.items()) return value @@ -505,8 +240,8 @@ class Plugin(object): req.http.c.setopt(pycurl.FOLLOWLOCATION, 1) elif type(redirect) is int: - req.http.c.setopt(pycurl.MAXREDIRS, - self.get_config("maxredirs", 5, plugin="UserAgentSwitcher")) + maxredirs = self.get_config("maxredirs", default=5, plugin="UserAgentSwitcher") + req.http.c.setopt(pycurl.MAXREDIRS, maxredirs) #@TODO: Move to network in 0.4.10 if decode: @@ -514,7 +249,7 @@ class Plugin(object): #@TODO: Move to network in 0.4.10 if isinstance(decode, basestring): - html = _decode(html, decode) #@NOTE: Use `utils.decode()` in 0.4.10 + html = utils.decode(html, decode) self.last_html = html @@ -543,13 +278,15 @@ class Plugin(object): else: #@TODO: Move to network in 0.4.10 header = {'code': req.code} + for line in html.splitlines(): line = line.strip() if not line or ":" not in line: continue key, none, value = line.partition(":") - key = key.strip().lower() + + key = key.strip().lower() value = value.strip() if key in header: diff --git a/module/plugins/internal/SevenZip.py b/module/plugins/internal/SevenZip.py index f73e935e8..abf413fb7 100644 --- a/module/plugins/internal/SevenZip.py +++ b/module/plugins/internal/SevenZip.py @@ -5,12 +5,12 @@ import re import subprocess from module.plugins.internal.UnRar import ArchiveError, CRCError, PasswordError, UnRar, renice -from module.utils import save_join as fs_join +from module.plugins.internal.utils import fs_join class SevenZip(UnRar): __name__ = "SevenZip" - __version__ = "0.16" + __version__ = "0.17" __status__ = "testing" __description__ = """7-Zip extractor plugin""" @@ -78,8 +78,6 @@ class SevenZip(UnRar): p = self.call_cmd(command, '-o' + self.out, self.target, password=password) - renice(p.pid, self.renice) - #: Communicate and retrieve stderr self._progress(p) err = p.stderr.read().strip() @@ -139,4 +137,7 @@ class SevenZip(UnRar): self.log_debug(" ".join(call)) p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + renice(p.pid, self.priority) + return p diff --git a/module/plugins/internal/SimpleCrypter.py b/module/plugins/internal/SimpleCrypter.py index ba4235072..1457c6fe5 100644 --- a/module/plugins/internal/SimpleCrypter.py +++ b/module/plugins/internal/SimpleCrypter.py @@ -2,21 +2,22 @@ import re -from module.plugins.internal.Crypter import Crypter -from module.plugins.internal.Plugin import replace_patterns, set_cookie, set_cookies -from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo +from module.plugins.internal.Crypter import Crypter, create_getInfo, parse_fileInfo +from module.plugins.internal.utils import replace_patterns, set_cookie, set_cookies -class SimpleCrypter(Crypter, SimpleHoster): +class SimpleCrypter(Crypter): __name__ = "SimpleCrypter" __type__ = "crypter" - __version__ = "0.67" + __version__ = "0.71" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("use_premium" , "bool", "Use premium account if available" , True), - ("use_subfolder" , "bool", "Save package to subfolder" , True), - ("subfolder_per_package", "bool", "Create a subfolder for each package", True)] + __config__ = [("activated" , "bool", "Activated" , True), + ("use_premium" , "bool", "Use premium account if available" , True), + ("use_subfolder" , "bool", "Save package to subfolder" , True), + ("subfolder_per_package", "bool", "Create a subfolder for each package" , True), + ("max_wait" , "int" , "Reconnect if waiting time is greater than minutes", 10 )] __description__ = """Simple decrypter plugin""" __license__ = "GPLv3" @@ -59,14 +60,14 @@ class SimpleCrypter(Crypter, SimpleHoster): DIRECT_LINK = True #: Set to True to looking for direct link (as defined in handle_direct method), set to None to do it if self.account is True else False LOGIN_ACCOUNT = False #: Set to True to require account login LOGIN_PREMIUM = False #: Set to True to require premium account login - # LEECH_HOSTER = False #: Set to True to leech other hoster link (as defined in handle_multi method) TEXT_ENCODING = True #: Set to encoding name if encoding value in http header is not correct - PAGES_PATTERN = None LINK_PATTERN = None + LINK_FREE_PATTERN = None + LINK_PREMIUM_PATTERN = None + PAGES_PATTERN = None NAME_PATTERN = None - HASHSUM_PATTERN = None OFFLINE_PATTERN = None TEMP_OFFLINE_PATTERN = None @@ -79,68 +80,170 @@ class SimpleCrypter(Crypter, SimpleHoster): ERROR_PATTERN = None + @classmethod + def api_info(cls, url): + return {} + + + @classmethod + def get_info(cls, url="", html=""): + info = super(SimpleHoster, cls).get_info(url) + + info.update(cls.api_info(url)) + + if not html and info['status'] is not 2: + if not url: + info['error'] = "missing url" + info['status'] = 1 + + elif info['status'] is 3: + try: + html = get_url(url, cookies=cls.COOKIES, decode=cls.TEXT_ENCODING) + + except BadHeader, e: + info['error'] = "%d: %s" % (e.code, e.content) + + if e.code in (404, 410): + info['status'] = 1 + + elif e.code is 503: + info['status'] = 6 + + except Exception: + pass + + if html: + if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None: + info['status'] = 1 + + elif cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None: + info['status'] = 6 + + elif cls.NAME_PATTERN: + m = re.search(cls.NAME_PATTERN, html) + if m is not None: + info['status'] = 2 + info['pattern'].update(m.groupdict()) + + if 'N' in info['pattern']: + name = replace_patterns(info['pattern']['N'], cls.NAME_REPLACEMENTS) + info['name'] = parse_name(name) + + return info + + #@TODO: Remove in 0.4.10 def _setup(self): - orig_name = self.__name__ - self.__name__ = re.sub(r'Folder$', "", self.__name__) + orig_name = self.classname + self.classname = orig_name.rstrip("Folder") super(SimpleCrypter, self)._setup() - self.__name__ = orig_name + self.classname = orig_name #@TODO: Remove in 0.4.10 def load_account(self): - orig_name = self.__name__ - self.__name__ = re.sub(r'Folder$', "", self.__name__) + orig_name = self.classname + self.classname = orig_name.rstrip("Folder") super(SimpleCrypter, self).load_account() - self.__name__ = orig_name + self.classname = orig_name def handle_direct(self, pyfile): - for i in xrange(self.get_config("maxredirs", plugin="UserAgentSwitcher")): - redirect = self.link or pyfile.url - self.log_debug("Redirect #%d to: %s" % (i, redirect)) + link = None + maxredirs = self.get_config("maxredirs", default=10, plugin="UserAgentSwitcher") - header = self.load(redirect, just_header=True) - if header.get('location'): - self.link = header.get('location') - else: - break + for i in xrange(maxredirs): + url = link or pyfile.url + self.log_debug("Redirect #%d to: %s" % (i, url)) + + header = self.load(url, just_header=True) + location = header.get('location') + + if location: + link = location + + elif link: + self.urls.append(link) + return else: - self.log_error(_("Too many redirects")) + self.log_warning(_("Too many redirects")) + + + def preload(self): + self.html = self.load(self.pyfile.url, + cookies=self.COOKIES, + ref=False, + decode=self.TEXT_ENCODING) def prepare(self): - self.links = [] - return super(SimpleCrypter, self).prepare() + self.direct_dl = False + + if self.LOGIN_PREMIUM and not self.premium: + self.fail(_("Required premium account not found")) + + if self.LOGIN_ACCOUNT and not self.account: + self.fail(_("Required account not found")) + + self.req.setOption("timeout", 120) + + if self.LINK_PATTERN: + if self.LINK_FREE_PATTERN is None: + self.LINK_FREE_PATTERN = self.LINK_PATTERN + + if self.LINK_PREMIUM_PATTERN is None: + self.LINK_PREMIUM_PATTERN = self.LINK_PATTERN + + if self.DIRECT_LINK is None: + self.direct_dl = bool(self.account) + else: + self.direct_dl = self.DIRECT_LINK + + self.pyfile.url = replace_patterns(self.pyfile.url, self.URL_REPLACEMENTS) def decrypt(self, pyfile): self.prepare() - self.check_info() #@TODO: Remove in 0.4.10 if self.direct_dl: - self.log_debug(_("Looking for direct download link...")) + self.log_info(_("Looking for direct link...")) self.handle_direct(pyfile) - if self.link or self.links or self.urls or self.packages: - self.log_info(_("Direct download link detected")) + if self.urls or self.packages: + self.log_info(_("Direct link detected")) else: - self.log_info(_("Direct download link not found")) + self.log_info(_("Direct link not found")) - if not (self.link or self.links or self.urls or self.packages): + if not self.urls and not self.packages: self.preload() - self.links = self.get_links() or list() + self.urls.extend(self.get_links()) if self.PAGES_PATTERN: self.handle_pages(pyfile) - if self.link: - self.urls.append(self.link) - if self.links: - name = folder = pyfile.name - self.packages.append((name, self.links, folder)) + def handle_free(self, pyfile): + if not self.LINK_FREE_PATTERN: + self.log_warning(_("Free decrypting not implemented")) + + links = re.findall(self.LINK_FREE_PATTERN, self.html) + if not links: + self.error(_("Free decrypted link not found")) + else: + self.urls.extend(links) + + + def handle_premium(self, pyfile): + if not self.LINK_PREMIUM_PATTERN: + self.log_warning(_("Premium decrypting not implemented")) + self.restart(premium=False) + + links = re.findall(self.LINK_PREMIUM_PATTERN, self.html) + if not links: + self.error(_("Premium decrypted link found")) + else: + self.urls.extend(links) def get_links(self): @@ -148,7 +251,15 @@ class SimpleCrypter(Crypter, SimpleHoster): Returns the links extracted from self.html You should override this only if it's impossible to extract links using only the LINK_PATTERN. """ - return re.findall(self.LINK_PATTERN, self.html) + if self.premium: + self.log_info(_("Decrypting as premium link...")) + self.handle_premium(pyfile) + + elif not self.LOGIN_ACCOUNT: + self.log_info(_("Decrypting as free link...")) + self.handle_free(pyfile) + + return self.urls def load_page(self, number): @@ -164,4 +275,103 @@ class SimpleCrypter(Crypter, SimpleHoster): for p in xrange(2, pages + 1): self.html = self.load_page(p) - self.links += self.get_links() + self.urls.append(self.get_links()) + + + def check_errors(self): + if not self.html: + self.log_warning(_("No html code to check")) + return + + if self.IP_BLOCKED_PATTERN and re.search(self.IP_BLOCKED_PATTERN, self.html): + self.fail(_("Connection from your current IP address is not allowed")) + + elif not self.premium: + if self.PREMIUM_ONLY_PATTERN and re.search(self.PREMIUM_ONLY_PATTERN, self.html): + self.fail(_("Link can be decrypted by premium users only")) + + elif self.SIZE_LIMIT_PATTERN and re.search(self.SIZE_LIMIT_PATTERN, self.html): + self.fail(_("Link list too large for free decrypt")) + + elif self.DL_LIMIT_PATTERN and re.search(self.DL_LIMIT_PATTERN, self.html): + m = re.search(self.DL_LIMIT_PATTERN, self.html) + try: + errmsg = m.group(1).strip() + + except (AttributeError, IndexError): + errmsg = m.group(0).strip() + + finally: + errmsg = re.sub(r'<.*?>', " ", errmsg) + + self.info['error'] = errmsg + self.log_warning(errmsg) + + wait_time = parse_time(errmsg) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) + self.restart(_("Download limit exceeded")) + + if self.HAPPY_HOUR_PATTERN and re.search(self.HAPPY_HOUR_PATTERN, self.html): + self.multiDL = True + + if self.ERROR_PATTERN: + m = re.search(self.ERROR_PATTERN, self.html) + if m is not None: + try: + errmsg = m.group(1) + + except (AttributeError, IndexError): + errmsg = m.group(0) + + finally: + errmsg = re.sub(r'<.*?>', " ", errmsg.strip()) + + self.info['error'] = errmsg + self.log_warning(errmsg) + + if re.search('limit|wait|slot', errmsg, re.I): + wait_time = parse_time(errmsg) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) + self.restart(_("Download limit exceeded")) + + elif re.search('country|ip|region|nation', errmsg, re.I): + self.fail(_("Connection from your current IP address is not allowed")) + + elif re.search('captcha|code', errmsg, re.I): + self.retry_captcha() + + elif re.search('countdown|expired', errmsg, re.I): + self.retry(10, 60, _("Link expired")) + + elif re.search('maint(e|ai)nance|temp', errmsg, re.I): + self.temp_offline() + + elif re.search('up to|size', errmsg, re.I): + self.fail(_("Link list too large for free decrypt")) + + elif re.search('offline|delet|remov|not? (found|(longer)? available)', errmsg, re.I): + self.offline() + + elif re.search('filename', errmsg, re.I): + self.fail(_("Invalid url")) + + elif re.search('premium', errmsg, re.I): + self.fail(_("Link can be decrypted by premium users only")) + + else: + self.wait(60, reconnect=True) + self.restart(errmsg) + + elif self.WAIT_PATTERN: + m = re.search(self.WAIT_PATTERN, self.html) + if m is not None: + try: + waitmsg = m.group(1).strip() + + except (AttributeError, IndexError): + waitmsg = m.group(0).strip() + + wait_time = parse_time(waitmsg) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) + + self.info.pop('error', None) diff --git a/module/plugins/internal/SimpleHoster.py b/module/plugins/internal/SimpleHoster.py index 744a1f686..8307c6236 100644 --- a/module/plugins/internal/SimpleHoster.py +++ b/module/plugins/internal/SimpleHoster.py @@ -9,21 +9,24 @@ import time from module.network.HTTPRequest import BadHeader from module.network.RequestFactory import getURL as get_url from module.plugins.internal.Hoster import Hoster, create_getInfo, parse_fileInfo -from module.plugins.internal.Plugin import Fail, encode, parse_name, parse_size, parse_time, replace_patterns, seconds_to_midnight, set_cookie, set_cookies -from module.utils import fixup, fs_encode +from module.plugins.internal.Plugin import Fail +from module.plugins.internal.utils import (encode, fixup, parse_name, parse_size, + parse_time, replace_patterns, seconds_to_midnight, + set_cookie, set_cookies) class SimpleHoster(Hoster): __name__ = "SimpleHoster" __type__ = "hoster" - __version__ = "1.98" + __version__ = "1.99" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("activated" , "bool", "Activated" , True), - ("use_premium" , "bool", "Use premium account if available" , True), - ("fallback_premium", "bool", "Fallback to free download if premium fails", True), - ("chk_filesize" , "bool", "Check file size" , True)] + __config__ = [("activated" , "bool", "Activated" , True), + ("use_premium" , "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails" , True), + ("chk_filesize", "bool", "Check file size" , True), + ("max_wait" , "int" , "Reconnect if waiting time is greater than minutes", 10 )] __description__ = """Simple hoster plugin""" __license__ = "GPLv3" @@ -133,7 +136,6 @@ class SimpleHoster(Hoster): @classmethod def get_info(cls, url="", html=""): info = super(SimpleHoster, cls).get_info(url) - info.update(cls.api_info(url)) if not html and info['status'] is not 2: @@ -148,7 +150,7 @@ class SimpleHoster(Hoster): except BadHeader, e: info['error'] = "%d: %s" % (e.code, e.content) - if e.code is 404: + if e.code in (404, 410): info['status'] = 1 elif e.code is 503: @@ -158,10 +160,10 @@ class SimpleHoster(Hoster): pass if html: - if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html): + if cls.OFFLINE_PATTERN and re.search(cls.OFFLINE_PATTERN, html) is not None: info['status'] = 1 - elif cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html): + elif cls.TEMP_OFFLINE_PATTERN and re.search(cls.TEMP_OFFLINE_PATTERN, html) is not None: info['status'] = 6 else: @@ -200,7 +202,8 @@ class SimpleHoster(Hoster): def setup(self): - self.resume_download = self.multiDL = self.premium + self.multiDL = self.premium + self.resume_download = self.premium def prepare(self): @@ -249,7 +252,6 @@ class SimpleHoster(Hoster): def process(self, pyfile): self.prepare() - self.check_info() #@TODO: Remove in 0.4.10 if self.leech_dl: self.log_info(_("Processing as debrid download...")) @@ -271,8 +273,8 @@ class SimpleHoster(Hoster): if not self.link and not self.last_download: self.preload() - if self.info.get('status', 3) is 3: #@TODO: Recheck in 0.4.10 - self.check_info() + if self.info.get('status', 3) is not 2: + self.grab_info() if self.premium and (not self.CHECK_TRAFFIC or self.check_traffic()): self.log_info(_("Processing as premium download...")) @@ -286,12 +288,15 @@ class SimpleHoster(Hoster): self.log_info(_("Downloading file...")) self.download(self.link, disposition=self.DISPOSITION) + + def _check_download(self): + super(SimpleHoster, self)._check_download() self.check_download() def check_download(self): - self.log_info(_("Checking downloaded file...")) - self.log_debug("Using default check rules...") + self.log_debug("Performing default check rules...") + for r, p in self.FILE_ERRORS: errmsg = self.check_file({r: re.compile(p)}) if errmsg is not None: @@ -308,12 +313,12 @@ class SimpleHoster(Hoster): self.restart(errmsg) else: if self.CHECK_FILE: - self.log_debug("Using custom check rules...") - with open(fs_encode(self.last_download), "rb") as f: + self.log_debug("Performing custom check rules...") + + with open(encode(self.last_download), "rb") as f: self.html = f.read(1048576) #@TODO: Recheck in 0.4.10 - self.check_errors() - self.log_info(_("No errors found")) + self.check_errors() def check_errors(self): @@ -346,7 +351,7 @@ class SimpleHoster(Hoster): self.log_warning(errmsg) wait_time = parse_time(errmsg) - self.wait(wait_time, reconnect=wait_time > 300) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) self.restart(_("Download limit exceeded")) if self.HAPPY_HOUR_PATTERN and re.search(self.HAPPY_HOUR_PATTERN, self.html): @@ -369,7 +374,7 @@ class SimpleHoster(Hoster): if re.search('limit|wait|slot', errmsg, re.I): wait_time = parse_time(errmsg) - self.wait(wait_time, reconnect=wait_time > 300) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) self.restart(_("Download limit exceeded")) elif re.search('country|ip|region|nation', errmsg, re.I): @@ -410,86 +415,20 @@ class SimpleHoster(Hoster): waitmsg = m.group(0).strip() wait_time = parse_time(waitmsg) - self.wait(wait_time, reconnect=wait_time > 300) + self.wait(wait_time, reconnect=wait_time > self.get_config("max_wait", 10) * 60) self.info.pop('error', None) - def check_status(self, getinfo=True): - if not self.info or getinfo: - self.log_info(_("Updating file info...")) - old_info = self.info.copy() - self.info.update(self.get_info(self.pyfile.url, self.html)) - self.log_debug("File info: %s" % self.info) - self.log_debug("Previous file info: %s" % old_info) - - try: - status = self.info['status'] or 14 - - if status is 1: - self.offline() - - elif status is 6: - self.temp_offline() - - elif status is 8: - self.fail() - - finally: - self.log_info(_("File status: ") + self.pyfile.getStatusName()) - - - def check_name_size(self, getinfo=True): - if not self.info or getinfo: - self.log_info(_("Updating file info...")) - old_info = self.info.copy() - self.info.update(self.get_info(self.pyfile.url, self.html)) - self.log_debug("File info: %s" % self.info) - self.log_debug("Previous file info: %s" % old_info) - - try: - url = self.info['url'] - name = self.info['name'] - - except KeyError: - pass - - else: - if name and name is not url: - self.pyfile.name = name - - if self.info.get('size') > 0: - self.pyfile.size = int(self.info['size']) #@TODO: Fix int conversion in 0.4.10 - - # self.pyfile.sync() - - name = self.pyfile.name - size = self.pyfile.size - - self.log_info(_("File name: ") + name) - self.log_info(_("File size: %s bytes") % size if size > 0 else _("File size: Unknown")) - - - #@TODO: Rewrite in 0.4.10 - def check_info(self): - self.check_name_size() - - if self.html: - self.check_errors() - self.check_name_size() - - self.check_status(getinfo=False) - - #: Deprecated method (Remove in 0.4.10) def get_fileInfo(self): - self.info = {} - self.check_info() + self.info.clear() + self.grab_info() return self.info def handle_direct(self, pyfile): - self.link = self.direct_link(pyfile.url, self.resume_download) + self.link = self.is_download(pyfile.url) def handle_multi(self, pyfile): #: Multi-hoster handler @@ -498,7 +437,7 @@ class SimpleHoster(Hoster): def handle_free(self, pyfile): if not self.LINK_FREE_PATTERN: - self.log_error(_("Free download not implemented")) + self.log_warning(_("Free download not implemented")) m = re.search(self.LINK_FREE_PATTERN, self.html) if m is None: @@ -509,7 +448,7 @@ class SimpleHoster(Hoster): def handle_premium(self, pyfile): if not self.LINK_PREMIUM_PATTERN: - self.log_error(_("Premium download not implemented")) + self.log_warning(_("Premium download not implemented")) self.restart(premium=False) m = re.search(self.LINK_PREMIUM_PATTERN, self.html) diff --git a/module/plugins/internal/UnRar.py b/module/plugins/internal/UnRar.py index 6f85c286a..c1ecccf40 100644 --- a/module/plugins/internal/UnRar.py +++ b/module/plugins/internal/UnRar.py @@ -2,27 +2,16 @@ import os import re +import string import subprocess -from glob import glob -from string import digits - -from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError -from module.utils import fs_decode, save_join as fs_join - - -def renice(pid, value): - if value and os.name is not "nt": - try: - subprocess.Popen(["renice", str(value), str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1) - - except Exception: - pass +from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, PasswordError, renice +from module.plugins.internal.utils import decode, fs_join class UnRar(Extractor): __name__ = "UnRar" - __version__ = "1.27" + __version__ = "1.28" __status__ = "testing" __description__ = """Rar extractor plugin""" @@ -138,7 +127,7 @@ class UnRar(Extractor): self.notify_progress(int(s)) s = "" #: Not reading a digit -> therefore restart - elif c not in digits: + elif c not in string.digits: s = "" #: Add digit to progressstring else: @@ -150,8 +139,6 @@ class UnRar(Extractor): p = self.call_cmd(command, self.target, self.out, password=password) - renice(p.pid, self.renice) - #: Communicate and retrieve stderr self._progress(p) err = p.stderr.read().strip() @@ -200,12 +187,12 @@ class UnRar(Extractor): result = set() if not self.fullpath and self.VERSION.startswith('5'): #@NOTE: Unrar 5 always list full path - for f in fs_decode(out).splitlines(): + for f in decode(out).splitlines(): f = fs_join(self.out, os.path.basename(f.strip())) if os.path.isfile(f): result.add(fs_join(self.out, os.path.basename(f))) else: - for f in fs_decode(out).splitlines(): + for f in decode(out).splitlines(): result.add(fs_join(self.out, f.strip())) return list(result) @@ -219,8 +206,7 @@ class UnRar(Extractor): args.append("-o+") else: args.append("-o-") - if self.delete != 'No': - args.append("-or") + args.append("-or") for word in self.excludefiles: args.append("-x'%s'" % word.strip()) @@ -243,4 +229,7 @@ class UnRar(Extractor): self.log_debug(" ".join(call)) p = subprocess.Popen(call, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + renice(p.pid, self.priority) + return p diff --git a/module/plugins/internal/UnZip.py b/module/plugins/internal/UnZip.py index 87cbd568a..d26a39f44 100644 --- a/module/plugins/internal/UnZip.py +++ b/module/plugins/internal/UnZip.py @@ -11,7 +11,7 @@ from module.plugins.internal.Extractor import Extractor, ArchiveError, CRCError, class UnZip(Extractor): __name__ = "UnZip" - __version__ = "1.16" + __version__ = "1.17" __status__ = "testing" __description__ = """Zip extractor plugin""" diff --git a/module/plugins/internal/XFSAccount.py b/module/plugins/internal/XFSAccount.py index 5a6f0ee9c..e09f7dddc 100644 --- a/module/plugins/internal/XFSAccount.py +++ b/module/plugins/internal/XFSAccount.py @@ -4,17 +4,22 @@ import re import time import urlparse -from module.plugins.internal.Account import Account -# from module.plugins.internal.MultiAccount import MultiAccount -from module.plugins.internal.Plugin import parse_html_form, parse_time, set_cookie +from module.plugins.internal.MultiAccount import MultiAccount +from module.plugins.internal.utils import parse_html_form, parse_time, set_cookie -class XFSAccount(Account): +class XFSAccount(MultiAccount): __name__ = "XFSAccount" __type__ = "account" - __version__ = "0.52" + __version__ = "0.54" __status__ = "testing" + __config__ = [("activated" , "bool" , "Activated" , True ), + ("multi" , "bool" , "Multi-hoster" , True ), + ("multi_mode" , "all;listed;unlisted", "Hosters to use" , "all"), + ("multi_list" , "str" , "Hoster list (comma separated)", "" ), + ("multi_interval", "int" , "Reload interval in hours" , 12 )] + __description__ = """XFileSharing account plugin""" __license__ = "GPLv3" __authors__ = [("zoidberg" , "zoidberg@mujmail.cz"), @@ -44,7 +49,7 @@ class XFSAccount(Account): def set_xfs_cookie(self): if not self.PLUGIN_DOMAIN: - self.log_error(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) + self.log_warning(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) return cookie = (self.PLUGIN_DOMAIN, "lang", "english") @@ -55,6 +60,10 @@ class XFSAccount(Account): set_cookie(self.req.cj, *cookie) + def grab_hosters(self, user, password, data): + pass + + def grab_info(self, user, password, data): validuntil = None trafficleft = None @@ -207,7 +216,10 @@ class XFSAccount(Account): finally: errmsg = re.sub(r'<.*?>', " ", errmsg.strip()) - self.timeout = parse_time(errmsg) + new_timeout = parse_time(errmsg) + if new_timeout > self.timeout: + self.timeout = new_timeout + self.fail_login(errmsg) m = re.search(self.LOGIN_FAIL_PATTERN, self.html) diff --git a/module/plugins/internal/XFSCrypter.py b/module/plugins/internal/XFSCrypter.py index da9c9cff7..327834dab 100644 --- a/module/plugins/internal/XFSCrypter.py +++ b/module/plugins/internal/XFSCrypter.py @@ -1,13 +1,13 @@ # -*- coding: utf-8 -*- -from module.plugins.internal.Plugin import set_cookie from module.plugins.internal.SimpleCrypter import SimpleCrypter, create_getInfo +from module.plugins.internal.utils import set_cookie class XFSCrypter(SimpleCrypter): __name__ = "XFSCrypter" __type__ = "crypter" - __version__ = "0.18" + __version__ = "0.19" __status__ = "testing" __pattern__ = r'^unmatchable$' @@ -28,13 +28,13 @@ class XFSCrypter(SimpleCrypter): NAME_PATTERN = r'<[Tt]itle>.*?\: (?P<N>.+) folder</[Tt]itle>' LINK_PATTERN = r'<(?:td|TD).*?>\s*(?:<.+>\s*)?<a href="(.+?)".*?>.+?(?:</a>)?\s*(?:<.+>\s*)?</(?:td|TD)>' - OFFLINE_PATTERN = r'>\s*(No such user|\w+ (Not Found|file (was|has been) removed|no longer available)' + OFFLINE_PATTERN = r'>\s*(No such user|\w+ (Not Found|file (was|has been) removed|no longer available))' TEMP_OFFLINE_PATTERN = r'>\s*\w+ server (is in )?(maintenance|maintainance)' def set_xfs_cookie(self): if not self.PLUGIN_DOMAIN: - self.log_error(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) + self.log_warning(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) return cookie = (self.PLUGIN_DOMAIN, "lang", "english") diff --git a/module/plugins/internal/XFSHoster.py b/module/plugins/internal/XFSHoster.py index 63b53be41..30191a7e7 100644 --- a/module/plugins/internal/XFSHoster.py +++ b/module/plugins/internal/XFSHoster.py @@ -6,21 +6,21 @@ import re from module.plugins.captcha.ReCaptcha import ReCaptcha from module.plugins.captcha.SolveMedia import SolveMedia from module.plugins.internal.Plugin import set_cookie -from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo, seconds_to_midnight -from module.utils import html_unescape +from module.plugins.internal.SimpleHoster import SimpleHoster, create_getInfo +from module.plugins.internal.utils import html_unescape, seconds_to_midnight class XFSHoster(SimpleHoster): __name__ = "XFSHoster" __type__ = "hoster" - __version__ = "0.65" + __version__ = "0.66" __status__ = "testing" __pattern__ = r'^unmatchable$' - __config__ = [("activated" , "bool", "Activated" , True), - ("use_premium" , "bool", "Use premium account if available" , True), - ("fallback_premium", "bool", "Fallback to free download if premium fails", True), - ("chk_filesize" , "bool", "Check file size" , True)] + __config__ = [("activated" , "bool", "Activated" , True), + ("use_premium" , "bool", "Use premium account if available" , True), + ("fallback" , "bool", "Fallback to free download if premium fails", True), + ("chk_filesize", "bool", "Check file size" , True)] __description__ = """XFileSharing hoster plugin""" __license__ = "GPLv3" @@ -31,7 +31,7 @@ class XFSHoster(SimpleHoster): PLUGIN_DOMAIN = None - LEECH_HOSTER = True #@NOTE: Should be default to False for safe, but I'm lazy... + LEECH_HOSTER = True #@NOTE: hould be set to `False` by default for safe, but I am lazy... NAME_PATTERN = r'(Filename[ ]*:[ ]*</b>(</td><td nowrap>)?|name="fname"[ ]+value="|<[\w^_]+ class="(file)?name">)\s*(?P<N>.+?)(\s*<|")' SIZE_PATTERN = r'(Size[ ]*:[ ]*</b>(</td><td>)?|File:.*>|</font>\s*\(|<[\w^_]+ class="size">)\s*(?P<S>[\d.,]+)\s*(?P<U>[\w^_]+)' @@ -52,17 +52,18 @@ class XFSHoster(SimpleHoster): SOLVEMEDIA_PATTERN = None FORM_PATTERN = None - FORM_INPUTS_MAP = None #: Dict passed as input_names to parse_html_form + FORM_INPUTS_MAP = None #: Dict passed as `input_names` to `parse_html_form` def setup(self): self.chunk_limit = -1 if self.premium else 1 - self.resume_download = self.multiDL = self.premium + self.multiDL = self.premium + self.resume_download = self.premium def set_xfs_cookie(self): if not self.PLUGIN_DOMAIN: - self.log_error(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) + self.log_warning(_("Unable to set xfs cookie due missing PLUGIN_DOMAIN")) return cookie = (self.PLUGIN_DOMAIN, "lang", "english") diff --git a/module/plugins/internal/utils.py b/module/plugins/internal/utils.py new file mode 100644 index 000000000..f7244f2df --- /dev/null +++ b/module/plugins/internal/utils.py @@ -0,0 +1,433 @@ +# -*- coding: utf-8 -*- +# +#@TODO: Move to utils directory 0.4.10 + +import datetime +import htmlentitydefs +import itertools +import os +import re +import string +import sys +import time +import traceback +import urllib +import urlparse + +try: + import simplejson as json + +except ImportError: + import json + + +class utils(object): + __name__ = "utils" + __type__ = "plugin" + __version__ = "0.02" + __status__ = "testing" + + __pattern__ = r'^unmatchable$' + __config__ = [] + + __description__ = """Dummy utils class""" + __license__ = "GPLv3" + __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] + + +def lock(fn): + def new(*args): + # print "Handler: %s args: %s" % (fn, args[1:]) + args[0].lock.acquire() + try: + return fn(*args) + + finally: + args[0].lock.release() + + return new + + +def compare_time(start, end): + start = map(int, start) + end = map(int, end) + + if start == end: + return True + + now = list(time.localtime()[3:5]) + + if start < end: + if now < end: + return True + + elif now > start or now < end: + return True + + return False + + +def uniqify(seq): + """ + Remove duplicates from list preserving order + Originally by Dave Kirby + """ + seen = set() + seen_add = seen.add + return [x for x in seq if x not in seen and not seen_add(x)] + + +def parse_size(value, unit=""): #: returns bytes + m = re.match(r"([\d.,]+)\s*([\w^_]*)", value.lower()) + + if m is None: + return 0 + + traffic = float(m.group(1).replace(',', '.')) + unit = (unit.strip().lower() or m.group(2) or "byte")[0] + + if unit is "b": + return int(traffic) + + sizes = ['b', 'k', 'm', 'g', 't', 'p', 'e'] + sizemap = dict((u, i * 10) for i, u in enumerate(sizes)) + + increment = sizemap[unit] + integer, decimal = map(int, ("%.3f" % traffic).split('.')) + + return (integer << increment) + (decimal << increment - 10) + + +def fixup(m): + text = m.group(0) + if text[:2] == "&#": + # character reference + try: + if text[:3] == "&#x": + return unichr(int(text[3:-1], 16)) + else: + return unichr(int(text[2:-1])) + except ValueError: + pass + else: + # named entity + try: + name = text[1:-1] + text = unichr(htmlentitydefs.name2codepoint[name]) + except KeyError: + pass + + return text #: leave as is + + +def has_method(obj, name): + """ + Check if name was defined in obj (return false if inhereted) + """ + return hasattr(obj, '__dict__') and name in obj.__dict__ + + +def html_unescape(text): + """ + Removes HTML or XML character references and entities from a text string + """ + return re.sub("&#?\w+;", fixup, text) + + +def isiterable(obj): + return hasattr(obj, "__iter__") + + +def get_console_encoding(enc): + if os.name is "nt": + if enc is "cp65001": #: aka UTF-8 + enc = "cp850" + print "WARNING: Windows codepage 65001 (UTF-8) is not supported, used `%s` instead" % enc + else: + enc = "utf8" + + return enc + + +#@NOTE: Revert to `decode` in Python 3 +def decode(value, encoding=None): + """ + Encoded string (default to UTF-8) -> unicode string + """ + if type(value) is str: + try: + # res = value.decode(encoding or 'utf-8') + res = unicode(value, encoding or 'utf-8') + + except UnicodeDecodeError, e: + if encoding: + raise UnicodeDecodeError(e) + + encoding = get_console_encoding(sys.stdout.encoding) + # res = value.decode(encoding) + res = unicode(value, encoding) + + elif type(value) is unicode: + res = value + + else: + res = unicode(value) + + return res + + +def encode(value, encoding=None, decoding=None): + """ + Unicode or decoded string -> encoded string (default to UTF-8) + """ + if type(value) is unicode: + res = value.encode(encoding or "utf-8") + + elif type(value) is str: + res = encode(decode(value, decoding), encoding) + + else: + res = str(value) + + return res + + +def fs_join(*args): + """ + Like os.path.join, but encoding aware + """ + return os.path.join(*map(encode, args)) + + +def exists(path): + if os.path.exists(path): + if os.name is "nt": + dir, name = os.path.split(path.rstrip(os.sep)) + return name in os.listdir(dir) + else: + return True + else: + return False + + +def remove_chars(value, repl): + """ + Remove all chars in repl from string + """ + if type(repl) is unicode: + for badc in list(repl): + value = value.replace(badc, "") + return value + + elif type(value) is unicode: + return value.translate(dict((ord(s), None) for s in repl)) + + elif type(value) is str: + return value.translate(string.maketrans("", ""), repl) + + +def fixurl(url, unquote=None): + old = url + url = urllib.unquote(url) + + if unquote is None: + unquote = url is old + + url = html_unescape(decode(url).decode('unicode-escape')) + url = re.sub(r'(?<!:)/{2,}', '/', url).strip().lstrip('.') + + if not unquote: + url = urllib.quote(url) + + return url + + +def fixname(value): + repl = '<>:"/\\|?*' if os.name is "nt" else '\0/\\"' + return remove_chars(value, repl) + + +def parse_name(value, safechar=True): + path = fixurl(decode(value), unquote=False) + url_p = urlparse.urlparse(path.rstrip('/')) + name = (url_p.path.split('/')[-1] or + url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or + url_p.netloc.split('.', 1)[0]) + + name = urllib.unquote(name) + return fixname(name) if safechar else name + + +def str2int(value): + try: + return int(value) + except: + pass + + ones = ("zero", "one", "two", "three", "four", "five", "six", "seven", "eight", + "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", + "sixteen", "seventeen", "eighteen", "nineteen") + tens = ("", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", + "eighty", "ninety") + + o_tuple = [(w, i) for i, w in enumerate(ones)] + t_tuple = [(w, i * 10) for i, w in enumerate(tens)] + + numwords = dict(o_tuple + t_tuple) + tokens = re.split(r"[\s\-]+", value.lower()) + + try: + return sum(numwords[word] for word in tokens) + except: + return 0 + + +def parse_time(value): + if re.search("da(il)?y|today", value): + seconds = seconds_to_midnight() + + else: + regex = re.compile(r'(\d+| (?:this|an?) )\s*(hr|hour|min|sec|)', re.I) + seconds = sum((int(v) if v.strip() not in ("this", "a", "an") else 1) * + {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, '': 1}[u.lower()] + for v, u in regex.findall(value)) + return seconds + + +def timestamp(): + return int(time.time() * 1000) + + +def which(program): + """ + Works exactly like the unix command which + Courtesy of http://stackoverflow.com/a/377028/675646 + """ + isExe = lambda x: os.path.isfile(x) and os.access(x, os.X_OK) + + fpath, fname = os.path.split(program) + + if fpath: + if isExe(program): + return program + else: + for path in os.environ['PATH'].split(os.pathsep): + exe_file = os.path.join(path.strip('"'), program) + if isExe(exe_file): + return exe_file + + +def format_exc(frame=None): + """ + Format call-stack and display exception information (if availible) + """ + exception_info = sys.exc_info() + callstack_list = traceback.extract_stack(frame) + callstack_list = callstack_list[:-1] + + exception_desc = "" + if exception_info[0] is not None: + exception_callstack_list = traceback.extract_tb(exception_info[2]) + if callstack_list[-1][0] == exception_callstack_list[0][0]: #Does this exception belongs to us? + callstack_list = callstack_list[:-1] + callstack_list.extend(exception_callstack_list) + exception_desc = "".join(traceback.format_exception_only(exception_info[0], exception_info[1])) + + traceback_str = "Traceback (most recent call last):\n" + traceback_str += "".join(traceback.format_list(callstack_list)) + traceback_str += exception_desc + + return traceback_str + + +def seconds_to_nexthour(strict=False): + now = datetime.datetime.today() + nexthour = now.replace(minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(hours=1) + return (nexthour - now).seconds + + +def seconds_to_midnight(utc=None, strict=False): + if utc is None: + now = datetime.datetime.today() + else: + now = datetime.datetime.utcnow() + datetime.timedelta(hours=utc) + + midnight = now.replace(hour=0, minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(days=1) + + return (midnight - now).seconds + + +def replace_patterns(value, rules): + for r in rules: + try: + pattern, repl, flags = r + + except ValueError: + pattern, repl = r + flags = 0 + + value = re.sub(pattern, repl, value, flags) + + return value + + +#@TODO: Remove in 0.4.10 and fix CookieJar.setCookie +def set_cookie(cj, domain, name, value): + return cj.setCookie(domain, name, encode(value)) + + +def set_cookies(cj, cookies): + for cookie in cookies: + if isinstance(cookie, tuple) and len(cookie) == 3: + set_cookie(cj, *cookie) + + +def parse_html_tag_attr_value(attr_name, tag): + m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) + return m.group(2) if m else None + + +def parse_html_form(attr_str, html, input_names={}): + for form in re.finditer(r"(?P<TAG><form[^>]*%s[^>]*>)(?P<CONTENT>.*?)</?(form|body|html)[^>]*>" % attr_str, + html, re.I | re.S): + inputs = {} + action = parse_html_tag_attr_value("action", form.group('TAG')) + + for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=</\2)|)', form.group('CONTENT'), re.I | re.S): + name = parse_html_tag_attr_value("name", inputtag.group(1)) + if name: + value = parse_html_tag_attr_value("value", inputtag.group(1)) + if not value: + inputs[name] = inputtag.group(3) or "" + else: + inputs[name] = value + + if not input_names: + #: No attribute check + return action, inputs + else: + #: Check input attributes + for key, val in input_names.items(): + if key in inputs: + if isinstance(val, basestring) and inputs[key] is val: + continue + elif isinstance(val, tuple) and inputs[key] in val: + continue + elif hasattr(val, "search") and re.match(val, inputs[key]): + continue + else: + break #: Attibute value does not match + else: + break #: Attibute name does not match + else: + return action, inputs #: Passed attribute check + + return {}, None #: No matching form found + + +def chunks(iterable, size): + it = iter(iterable) + item = list(itertools.islice(it, size)) + while item: + yield item + item = list(itertools.islice(it, size)) |