# -*- coding: utf-8 -*- from __future__ import with_statement import inspect import os import re import urllib from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 from module.utils import fs_encode, fs_decode, html_unescape, save_join as fs_join #@TODO: Move to utils in 0.4.10 def decode(string, encoding='utf8'): """ Decode string to unicode with utf8 """ if type(string) is str: return string.decode(encoding, "replace") else: return string #@TODO: Move to utils in 0.4.10 def encode(string, encoding='utf8'): """ Decode string to utf8 """ if type(string) is unicode: return string.encode(encoding, "replace") else: return string #@TODO: Move to utils in 0.4.10 def fixurl(url): return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip() #@TODO: Move to utils in 0.4.10 def timestamp(): return int(time.time() * 1000) def seconds_to_midnight(gmt=0): now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) if now.hour == 0 and now.minute < 10: midnight = now else: midnight = now + datetime.timedelta(days=1) td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now if hasattr(td, 'total_seconds'): res = td.total_seconds() else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return int(res) def replace_patterns(string, ruleslist): for r in ruleslist: rf, rt = r string = re.sub(rf, rt, string) return string def set_cookies(cj, cookies): for cookie in cookies: if isinstance(cookie, tuple) and len(cookie) == 3: domain, name, value = cookie cj.setCookie(domain, name, value) def parse_html_tag_attr_value(attr_name, tag): m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) return m.group(2) if m else None def parse_html_form(attr_str, html, input_names={}): for form in re.finditer(r"(?P]*%s[^>]*>)(?P.*?)]*>" % attr_str, html, re.S | re.I): inputs = {} action = parseHtmlTagAttrValue("action", form.group('TAG')) for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?= `encode` in 0.4.10 log("%(plugin)s%(id)s: %(msg)s" % {'plugin': self.__name__, 'id' : ("[%s]" % self.pyfile.id) if hasattr(self, 'pyfile') else "", 'msg' : msg or _(level.upper() + " MARK")}) def log_debug(self, *args): if self.pyload.debug: return self._log("debug", args) def log_info(self, *args): return self._log("info", args) def log_warning(self, *args): return self._log("warning", args) def log_error(self, *args): return self._log("error", args) def log_critical(self, *args): return self._log("critical", args) def set_config(self, option, value): """ Set config value for current plugin :param option: :param value: :return: """ self.pyload.config.setPlugin(self.__name__, option, value) def get_config(self, option, default="", plugin=None): """ Returns config value for current plugin :param option: :return: """ try: return self.pyload.config.getPlugin(plugin or self.__name__, option) except KeyError: self.log_warning(_("Config option or plugin not found")) return default def store(self, key, value): """ Saves a value persistently to the database """ self.pyload.db.setStorage(self.__name__, key, value) def retrieve(self, key, default=None): """ Retrieves saved value or dict of all saved entries if key is None """ return self.pyload.db.getStorage(self.__name__, key) or default def delete(self, key): """ Delete entry in db """ self.pyload.db.delStorage(self.__name__, key) def fail(self, reason): """ Fail and give reason """ raise Fail(encode(reason)) #: Move `encode(reason)` to manager in 0.4.10 def error(self, reason="", type=_("Parse")): if not reason: type = _("Unknown") msg = _("%s error") % type.strip().capitalize() if type else _("Error") msg += (": %s" % reason.strip()) if reason else "" msg += _(" | Plugin may be out of date") raise Fail(msg) def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, req=None): """ Load content at url and returns it :param url: :param get: :param post: :param ref: :param cookies: :param just_header: If True only the header will be retrieved and returned as dict :param decode: Wether to decode the output according to http header, should be True in most cases :return: Loaded content """ if hasattr(self, 'pyfile') and self.pyfile.abort: self.abort() url = fixurl(url) if not url or not isinstance(url, basestring): self.fail(_("No url given")) if self.pyload.debug: self.log_debug("LOAD URL " + url, *["%s=%s" % (key, val) for key, val in locals().iteritems() if key not in ("self", "url")]) if req is None: if hasattr(self, "req"): req = self.req else: req = self.pyload.requestFactory.getRequest(self.__name__) res = req.load(url, get, post, ref, cookies, just_header, isinstance(post, dict), decode is True) #@TODO: Fix network multipart in 0.4.10 if decode: #@TODO: Move to network in 0.4.10 res = html_unescape(res) if isinstance(decode, basestring): #@TODO: Move to network in 0.4.10 res = decode(res, decode) if self.pyload.debug: frame = inspect.currentframe() framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno)) try: if not os.path.exists(os.path.join("tmp", self.__name__)): os.makedirs(os.path.join("tmp", self.__name__)) with open(framefile, "wb") as f: del frame #: Delete the frame or it wont be cleaned f.write(encode(res)) except IOError, e: self.log_error(e) if just_header: #: Parse header header = {'code': req.code} for line in res.splitlines(): line = line.strip() if not line or ":" not in line: continue key, none, value = line.partition(":") key = key.strip().lower() value = value.strip() if key in header: if type(header[key]) is list: header[key].append(value) else: header[key] = [header[key], value] else: header[key] = value res = header return res