# -*- coding: utf-8 -*- from __future__ import with_statement import datetime import inspect import os import re import urllib from module.plugins.Plugin import Abort, Fail, Reconnect, Retry, SkipDownload as Skip #@TODO: Remove in 0.4.10 from module.utils import fs_encode, fs_decode, html_unescape, save_join as fs_join #@TODO: Move to utils in 0.4.10 def decode(string, encoding='utf8'): """ Decode string to unicode with utf8 """ if type(string) is str: return string.decode(encoding, "replace") else: return string #@TODO: Move to utils in 0.4.10 def encode(string, encoding='utf8'): """ Decode string to utf8 """ if type(string) is unicode: return string.encode(encoding, "replace") else: return string #@TODO: Move to utils in 0.4.10 def exists(path): if os.path.exists(path): if os.name == "nt": dir, name = os.path.split(path) return name in os.listdir(dir) else: return True else: return False #@TODO: Move to utils in 0.4.10 def fixurl(url): return html_unescape(urllib.unquote(url.decode('unicode-escape'))).strip() #@TODO: Move to utils in 0.4.10 def timestamp(): return int(time.time() * 1000) def seconds_to_midnight(gmt=0): now = datetime.datetime.utcnow() + datetime.timedelta(hours=gmt) if now.hour == 0 and now.minute < 10: midnight = now else: midnight = now + datetime.timedelta(days=1) td = midnight.replace(hour=0, minute=10, second=0, microsecond=0) - now if hasattr(td, 'total_seconds'): res = td.total_seconds() else: #@NOTE: work-around for python 2.5 and 2.6 missing datetime.timedelta.total_seconds res = (td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 return int(res) def replace_patterns(string, ruleslist): for r in ruleslist: rf, rt = r string = re.sub(rf, rt, string) return string def set_cookies(cj, cookies): for cookie in cookies: if isinstance(cookie, tuple) and len(cookie) == 3: domain, name, value = cookie cj.setCookie(domain, name, encode(value)) #@TODO: Remove `encode` in 0.4.10 def parse_html_tag_attr_value(attr_name, tag): m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) return m.group(2) if m else None def parse_html_form(attr_str, html, input_names={}): for form in re.finditer(r"(?P]*%s[^>]*>)(?P.*?)]*>" % attr_str, html, re.S | re.I): inputs = {} action = parse_html_tag_attr_value("action", form.group('TAG')) for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=