# -*- coding: utf-8 -*- # #@TODO: Move to utils directory 0.4.10 import datetime import htmlentitydefs import itertools import os import re import string import sys import time import traceback import urllib import urlparse try: import simplejson as json except ImportError: import json class utils(object): __name__ = "utils" __type__ = "plugin" __version__ = "0.02" __status__ = "testing" __pattern__ = r'^unmatchable$' __config__ = [] __description__ = """Dummy utils class""" __license__ = "GPLv3" __authors__ = [("Walter Purcaro", "vuolter@gmail.com")] def lock(fn): def new(*args): # print "Handler: %s args: %s" % (fn, args[1:]) args[0].lock.acquire() try: return fn(*args) finally: args[0].lock.release() return new def compare_time(start, end): start = map(int, start) end = map(int, end) if start == end: return True now = list(time.localtime()[3:5]) if start < end: if now < end: return True elif now > start or now < end: return True return False def uniqify(seq): """ Remove duplicates from list preserving order Originally by Dave Kirby """ seen = set() seen_add = seen.add return [x for x in seq if x not in seen and not seen_add(x)] def parse_size(value, unit=""): #: returns bytes m = re.match(r"([\d.,]+)\s*([\w^_]*)", value.lower()) if m is None: return 0 traffic = float(m.group(1).replace(',', '.')) unit = (unit.strip().lower() or m.group(2) or "byte")[0] if unit is "b": return int(traffic) sizes = ['b', 'k', 'm', 'g', 't', 'p', 'e'] sizemap = dict((u, i * 10) for i, u in enumerate(sizes)) increment = sizemap[unit] integer, decimal = map(int, ("%.3f" % traffic).split('.')) return (integer << increment) + (decimal << increment - 10) def fixup(m): text = m.group(0) if text[:2] == "&#": # character reference try: if text[:3] == "&#x": return unichr(int(text[3:-1], 16)) else: return unichr(int(text[2:-1])) except ValueError: pass else: # named entity try: name = text[1:-1] text = unichr(htmlentitydefs.name2codepoint[name]) except KeyError: pass return text #: leave as is def has_method(obj, name): """ Check if name was defined in obj (return false if inhereted) """ return hasattr(obj, '__dict__') and name in obj.__dict__ def html_unescape(text): """ Removes HTML or XML character references and entities from a text string """ return re.sub("&#?\w+;", fixup, text) def isiterable(obj): return hasattr(obj, "__iter__") def get_console_encoding(enc): if os.name is "nt": if enc is "cp65001": #: aka UTF-8 enc = "cp850" print "WARNING: Windows codepage 65001 (UTF-8) is not supported, used `%s` instead" % enc else: enc = "utf8" return enc #@NOTE: Revert to `decode` in Python 3 def decode(value, encoding=None): """ Encoded string (default to UTF-8) -> unicode string """ if type(value) is str: try: # res = value.decode(encoding or 'utf-8') res = unicode(value, encoding or 'utf-8') except UnicodeDecodeError, e: if encoding: raise UnicodeDecodeError(e) encoding = get_console_encoding(sys.stdout.encoding) # res = value.decode(encoding) res = unicode(value, encoding) elif type(value) is unicode: res = value else: res = unicode(value) return res def encode(value, encoding=None, decoding=None): """ Unicode or decoded string -> encoded string (default to UTF-8) """ if type(value) is unicode: res = value.encode(encoding or "utf-8") elif type(value) is str: res = encode(decode(value, decoding), encoding) else: res = str(value) return res def fs_join(*args): """ Like os.path.join, but encoding aware """ return os.path.join(*map(encode, args)) def exists(path): if os.path.exists(path): if os.name is "nt": dir, name = os.path.split(path.rstrip(os.sep)) return name in os.listdir(dir) else: return True else: return False def remove_chars(value, repl): """ Remove all chars in repl from string """ if type(repl) is unicode: for badc in list(repl): value = value.replace(badc, "") return value elif type(value) is unicode: return value.translate(dict((ord(s), None) for s in repl)) elif type(value) is str: return value.translate(string.maketrans("", ""), repl) def fixurl(url, unquote=None): old = url url = urllib.unquote(url) if unquote is None: unquote = url is old url = html_unescape(decode(url).decode('unicode-escape')) url = re.sub(r'(?:"/\\|?*' if os.name is "nt" else '\0/\\"' return remove_chars(value, repl) def parse_name(value, safechar=True): path = fixurl(decode(value), unquote=False) url_p = urlparse.urlparse(path.rstrip('/')) name = (url_p.path.split('/')[-1] or url_p.query.split('=', 1)[::-1][0].split('&', 1)[0] or url_p.netloc.split('.', 1)[0]) name = urllib.unquote(name) return fixname(name) if safechar else name def str2int(value): try: return int(value) except: pass ones = ("zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen") tens = ("", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety") o_tuple = [(w, i) for i, w in enumerate(ones)] t_tuple = [(w, i * 10) for i, w in enumerate(tens)] numwords = dict(o_tuple + t_tuple) tokens = re.split(r"[\s\-]+", value.lower()) try: return sum(numwords[word] for word in tokens) except: return 0 def parse_time(value): if re.search("da(il)?y|today", value): seconds = seconds_to_midnight() else: regex = re.compile(r'(\d+| (?:this|an?) )\s*(hr|hour|min|sec|)', re.I) seconds = sum((int(v) if v.strip() not in ("this", "a", "an") else 1) * {'hr': 3600, 'hour': 3600, 'min': 60, 'sec': 1, '': 1}[u.lower()] for v, u in regex.findall(value)) return seconds def timestamp(): return int(time.time() * 1000) def which(program): """ Works exactly like the unix command which Courtesy of http://stackoverflow.com/a/377028/675646 """ isExe = lambda x: os.path.isfile(x) and os.access(x, os.X_OK) fpath, fname = os.path.split(program) if fpath: if isExe(program): return program else: for path in os.environ['PATH'].split(os.pathsep): exe_file = os.path.join(path.strip('"'), program) if isExe(exe_file): return exe_file def format_exc(frame=None): """ Format call-stack and display exception information (if availible) """ exception_info = sys.exc_info() callstack_list = traceback.extract_stack(frame) callstack_list = callstack_list[:-1] exception_desc = "" if exception_info[0] is not None: exception_callstack_list = traceback.extract_tb(exception_info[2]) if callstack_list[-1][0] == exception_callstack_list[0][0]: #Does this exception belongs to us? callstack_list = callstack_list[:-1] callstack_list.extend(exception_callstack_list) exception_desc = "".join(traceback.format_exception_only(exception_info[0], exception_info[1])) traceback_str = "Traceback (most recent call last):\n" traceback_str += "".join(traceback.format_list(callstack_list)) traceback_str += exception_desc return traceback_str def seconds_to_nexthour(strict=False): now = datetime.datetime.today() nexthour = now.replace(minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(hours=1) return (nexthour - now).seconds def seconds_to_midnight(utc=None, strict=False): if utc is None: now = datetime.datetime.today() else: now = datetime.datetime.utcnow() + datetime.timedelta(hours=utc) midnight = now.replace(hour=0, minute=0 if strict else 1, second=0, microsecond=0) + datetime.timedelta(days=1) return (midnight - now).seconds def replace_patterns(value, rules): for r in rules: try: pattern, repl, flags = r except ValueError: pattern, repl = r flags = 0 value = re.sub(pattern, repl, value, flags) return value #@TODO: Remove in 0.4.10 and fix CookieJar.setCookie def set_cookie(cj, domain, name, value): return cj.setCookie(domain, name, encode(value)) def set_cookies(cj, cookies): for cookie in cookies: if isinstance(cookie, tuple) and len(cookie) == 3: set_cookie(cj, *cookie) def parse_html_tag_attr_value(attr_name, tag): m = re.search(r"%s\s*=\s*([\"']?)((?<=\")[^\"]+|(?<=')[^']+|[^>\s\"'][^>\s]*)\1" % attr_name, tag, re.I) return m.group(2) if m else None def parse_html_form(attr_str, html, input_names={}): for form in re.finditer(r"(?P]*%s[^>]*>)(?P.*?)]*>" % attr_str, html, re.I | re.S): inputs = {} action = parse_html_tag_attr_value("action", form.group('TAG')) for inputtag in re.finditer(r'(<(input|textarea)[^>]*>)([^<]*(?=