# -*- coding: utf-8 -*- import operator import os import re import subprocess import time import urllib from module.plugins.Plugin import Abort from module.network.HTTPRequest import HTTPRequest from module.network.CookieJar import CookieJar from module.plugins.internal.Hoster import Hoster from module.plugins.internal.misc import html_unescape, json, replace_patterns, which class BIGHTTPRequest(HTTPRequest): """ Overcome HTTPRequest's load() size limit to allow loading very big web pages by overrding HTTPRequest's write() function """ def __init__(self, cookies=None, options=None, limit=1000000): #@TODO: Add 'limit' parameter to HTTPRequest in v0.4.10 self.limit = limit HTTPRequest.__init__(self, cookies=cookies, options=options) def write(self, buf): """ writes response """ if self.limit and self.rep.tell() > self.limit or self.abort: rep = self.getResponse() if self.abort: raise Abort() f = open("response.dump", "wb") f.write(rep) f.close() raise Exception("Loaded Url exceeded limit") self.rep.write(buf) class YoutubeCom(Hoster): __name__ = "YoutubeCom" __type__ = "hoster" __version__ = "0.55" __status__ = "testing" __pattern__ = r'https?://(?:[^/]*\.)?(?:youtu\.be/|youtube\.com/watch\?(?:.*&)?v=)[\w\-]+' __config__ = [("activated", "bool", "Activated", True), ("quality", "sd;hd;fullhd;240p;360p;480p;720p;1080p;3072p", "Quality Setting" , "hd" ), ("fmt" , "int" , "FMT/ITAG Number (0 for auto)", 0 ), (".mp4" , "bool" , "Allow .mp4" , True ), (".flv" , "bool" , "Allow .flv" , True ), (".webm" , "bool" , "Allow .webm" , False), (".3gp" , "bool" , "Allow .3gp" , False), ("3d" , "bool" , "Prefer 3D" , False)] __description__ = """Youtube.com hoster plugin""" __license__ = "GPLv3" __authors__ = [("spoob", "spoob@pyload.org" ), ("zoidberg", "zoidberg@mujmail.cz" ), ("GammaC0de", "nitzo2001[AT]yahoo[DOT]com")] URL_REPLACEMENTS = [(r'youtu\.be/', 'youtube.com/watch?v=')] #: Invalid characters that must be removed from the file name invalid_chars = u'\u2605:?><"|\\' #: name, width, height, quality ranking, 3D formats = {5 : (".flv" , 400 , 240 , 1 , False), 6 : (".flv" , 640 , 400 , 4 , False), 17 : (".3gp" , 176 , 144 , 0 , False), 18 : (".mp4" , 480 , 360 , 2 , False), 22 : (".mp4" , 1280, 720 , 8 , False), 43 : (".webm", 640 , 360 , 3 , False), 34 : (".flv" , 640 , 360 , 4 , False), 35 : (".flv" , 854 , 480 , 6 , False), 36 : (".3gp" , 400 , 240 , 1 , False), 37 : (".mp4" , 1920, 1080, 9 , False), 38 : (".mp4" , 4096, 3072, 10, False), 44 : (".webm", 854 , 480 , 5 , False), 45 : (".webm", 1280, 720 , 7 , False), 46 : (".webm", 1920, 1080, 9 , False), 82 : (".mp4" , 640 , 360 , 3 , True ), 83 : (".mp4" , 400 , 240 , 1 , True ), 84 : (".mp4" , 1280, 720 , 8 , True ), 85 : (".mp4" , 1920, 1080, 9 , True ), 100: (".webm", 640 , 360 , 3 , True ), 101: (".webm", 640 , 360 , 4 , True ), 102: (".webm", 1280, 720 , 8 , True )} def _decrypt_signature(self, encrypted_sig): """Turn the encrypted 's' field into a working signature""" try: player_url = json.loads(re.search(r'"assets":.+?"js":\s*("[^"]+")', self.data).group(1)) except (AttributeError, IndexError): self.fail(_("Player URL not found")) if player_url.startswith("//"): player_url = 'https:' + player_url if not player_url.endswith(".js"): self.fail(_("Unsupported player type %s") % player_url) cache_info = self.db.retrieve("cache") cache_dirty = False if cache_info is None or 'version' not in cache_info or cache_info['version'] != self.__version__: cache_info = {'version': self.__version__, 'cache' : {}} cache_dirty = True if player_url in cache_info['cache'] and time.time() < cache_info['cache'][player_url]['time'] + 24 * 60 * 60: self.log_debug("Using cached decode function to decrypt the URL") decrypt_func = lambda s: ''.join(s[_i] for _i in cache_info['cache'][player_url]['decrypt_map']) decrypted_sig = decrypt_func(encrypted_sig) else: player_data = self.load(player_url) try: function_name = re.search(r'\.sig\|\|([a-zA-Z0-9$]+)\(', player_data).group(1) except (AttributeError, IndexError): self.fail(_("Signature decode function name not found")) try: jsi = JSInterpreter(player_data) decrypt_func = lambda s: jsi.extract_function(function_name)([s]) #: Since Youtube just scrambles the order of the characters in the signature #: and does not change any byte value, we can store just a transformation map as a cached function decrypt_map = [ord(c) for c in decrypt_func(''.join(map(unichr, xrange(len(encrypted_sig)))))] cache_info['cache'][player_url] = {'decrypt_map': decrypt_map, 'time' : time.time()} cache_dirty = True decrypted_sig = decrypt_func(encrypted_sig) except (JSInterpreterError, AssertionError), e: self.log_error(_("Signature decode failed"), e) self.fail(e.message) #: Remove old records from cache for _k in list(cache_info['cache'].iterkeys()): if time.time() >= cache_info['cache'][_k]['time'] + 24 * 60 * 60: cache_info['cache'].pop(_k, None) cache_dirty = True if cache_dirty: self.db.store("cache", cache_info) return decrypted_sig def setup(self): self.resume_download = True self.multiDL = True try: self.req.http.close() except Exception: pass self.req.http = BIGHTTPRequest(cookies=CookieJar(None), options=self.pyload.requestFactory.getOptions(), limit=2000000) def process(self, pyfile): pyfile.url = replace_patterns(pyfile.url, self.URL_REPLACEMENTS) self.data = self.load(pyfile.url) if re.search(r'
', self.data): self.offline() if "We have been receiving a large volume of requests from your network." in self.data: self.temp_offline() #: Get config use3d = self.config.get('3d') if use3d: quality = {'sd': 82, 'hd': 84, 'fullhd': 85, '240p': 83, '360p': 82, '480p': 82, '720p': 84, '1080p': 85, '3072p': 85} else: quality = {'sd': 18, 'hd': 22, 'fullhd': 37, '240p': 5, '360p': 18, '480p': 35, '720p': 22, '1080p': 37, '3072p': 38} desired_fmt = self.config.get('fmt') if not desired_fmt: desired_fmt = quality.get(self.config.get('quality'), 18) elif desired_fmt not in self.formats: self.log_warning(_("FMT %d unknown, using default") % desired_fmt) desired_fmt = 0 #: Parse available streams streams = re.search(r'"url_encoded_fmt_stream_map":"(.+?)",', self.data).group(1) streams = [x.split('\u0026') for x in streams.split(',')] streams = [dict((y.split('=', 1)) for y in x) for x in streams] streams = [(int(x['itag']), urllib.unquote(x['url']), x.get('s', x.get('sig', None)), True if 's' in x else False) for x in streams] # self.log_debug("Found links: %s" % streams) self.log_debug("AVAILABLE STREAMS: %s" % [x[0] for x in streams]) #: Build dictionary of supported itags (3D/2D) allowed = lambda x: self.config.get(self.formats[x][0]) streams = [x for x in streams if x[0] in self.formats and allowed(x[0])] if not streams: self.fail(_("No available stream meets your preferences")) fmt_dict = dict([(x[0], x[1:]) for x in streams if self.formats[x[0]][4] == use3d] or streams) self.log_debug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" % (desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt], "" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT ")) #: Return fmt nearest to quality index if desired_fmt in fmt_dict and allowed(desired_fmt): choosen_fmt = desired_fmt else: sel = lambda x: self.formats[x][3] #: Select quality index comp = lambda x, y: abs(sel(x) - sel(y)) self.log_debug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()]) choosen_fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and sel(x) > sel(y) else y, fmt_dict.keys()) self.log_debug("Chosen fmt: %s" % choosen_fmt) url = fmt_dict[choosen_fmt][0] if fmt_dict[choosen_fmt][1]: if fmt_dict[choosen_fmt][2]: signature = self._decrypt_signature(fmt_dict[choosen_fmt][1]) else: signature = fmt_dict[choosen_fmt][1] url += "&signature=" + signature if "&ratebypass=" not in url: url += "&ratebypass=yes" #: Set file name file_suffix = self.formats[choosen_fmt][0] if choosen_fmt in self.formats else ".flv" file_name_pattern = '' name = re.search(file_name_pattern, self.data).group(1).replace("/", "") #: Cleaning invalid characters from the file name name = name.encode('ascii', 'replace') for c in self.invalid_chars: name = name.replace(c, '_') pyfile.name = html_unescape(name) time = re.search(r't=((\d+)m)?(\d+)s', pyfile.url) ffmpeg = which("ffmpeg") if ffmpeg and time: m, s = time.groups()[1:] if m is None: m = "0" pyfile.name += " (starting at %s:%s)" % (m, s) pyfile.name += file_suffix filename = self.download(url) if ffmpeg and time: inputfile = filename + "_" os.rename(filename, inputfile) subprocess.call([ ffmpeg, "-ss", "00:%s:%s" % (m, s), "-i", inputfile, "-vcodec", "copy", "-acodec", "copy", filename]) self.remove(inputfile, trash=False) """Credit to this awesome piece of code below goes to the 'youtube_dl' project, kudos!""" class JSInterpreterError(Exception): pass class JSInterpreter(object): def __init__(self, code, objects=None): self._OPERATORS = [ ('|', operator.or_), ('^', operator.xor), ('&', operator.and_), ('>>', operator.rshift), ('<<', operator.lshift), ('-', operator.sub), ('+', operator.add), ('%', operator.mod), ('/', operator.truediv), ('*', operator.mul), ] self._ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in self._OPERATORS] self._ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) self._VARNAME_PATTERN = r'[a-zA-Z_$][a-zA-Z_$0-9]*' if objects is None: objects = {} self.code = code self._functions = {} self._objects = objects def interpret_statement(self, stmt, local_vars, allow_recursion=100): if allow_recursion < 0: raise JSInterpreterError('Recursion limit reached') should_abort = False stmt = stmt.lstrip() stmt_m = re.match(r'var\s', stmt) if stmt_m: expr = stmt[len(stmt_m.group(0)):] else: return_m = re.match(r'return(?:\s+|$)', stmt) if return_m: expr = stmt[len(return_m.group(0)):] should_abort = True else: # Try interpreting it as an expression expr = stmt v = self.interpret_expression(expr, local_vars, allow_recursion) return v, should_abort def interpret_expression(self, expr, local_vars, allow_recursion): expr = expr.strip() if expr == '': # Empty expression return None if expr.startswith('('): parens_count = 0 for m in re.finditer(r'[()]', expr): if m.group(0) == '(': parens_count += 1 else: parens_count -= 1 if parens_count == 0: sub_expr = expr[1:m.start()] sub_result = self.interpret_expression(sub_expr, local_vars, allow_recursion) remaining_expr = expr[m.end():].strip() if not remaining_expr: return sub_result else: expr = json.dumps(sub_result) + remaining_expr break else: raise JSInterpreterError('Premature end of parens in %r' % expr) for op, opfunc in self._ASSIGN_OPERATORS: m = re.match(r'(?x)(?P%s)(?:\[(?P[^\]]+?)\])?\s*%s(?P.*)$' % (self._VARNAME_PATTERN, re.escape(op)), expr) if not m: continue right_val = self.interpret_expression(m.group('expr'), local_vars, allow_recursion - 1) if m.groupdict().get('index'): lvar = local_vars[m.group('out')] idx = self.interpret_expression(m.group('index'), local_vars, allow_recursion) assert isinstance(idx, int) cur = lvar[idx] val = opfunc(cur, right_val) lvar[idx] = val return val else: cur = local_vars.get(m.group('out')) val = opfunc(cur, right_val) local_vars[m.group('out')] = val return val if expr.isdigit(): return int(expr) var_m = re.match(r'(?!if|return|true|false)(?P%s)$' % self._VARNAME_PATTERN, expr) if var_m: return local_vars[var_m.group('name')] try: return json.loads(expr) except ValueError: pass m = re.match(r'(?P%s)\.(?P[^(]+)(?:\(+(?P[^()]*)\))?$' % self._VARNAME_PATTERN, expr) if m: variable = m.group('var') member = m.group('member') arg_str = m.group('args') if variable in local_vars: obj = local_vars[variable] else: if variable not in self._objects: self._objects[variable] = self.extract_object(variable) obj = self._objects[variable] if arg_str is None: # Member access if member == 'length': return len(obj) return obj[member] assert expr.endswith(')') # Function call if arg_str == '': argvals = tuple() else: argvals = tuple([self.interpret_expression(v, local_vars, allow_recursion) for v in arg_str.split(',')]) if member == 'split': assert argvals == ('',) return list(obj) if member == 'join': assert len(argvals) == 1 return argvals[0].join(obj) if member == 'reverse': assert len(argvals) == 0 obj.reverse() return obj if member == 'slice': assert len(argvals) == 1 return obj[argvals[0]:] if member == 'splice': assert isinstance(obj, list) index, howMany = argvals res = [] for i in range(index, min(index + howMany, len(obj))): res.append(obj.pop(index)) return res return obj[member](argvals) m = re.match(r'(?P%s)\[(?P.+)\]$' % self._VARNAME_PATTERN, expr) if m: val = local_vars[m.group('in')] idx = self.interpret_expression(m.group('idx'), local_vars, allow_recursion - 1) return val[idx] for op, opfunc in self._OPERATORS: m = re.match(r'(?P.+?)%s(?P.+)' % re.escape(op), expr) if not m: continue x, abort = self.interpret_statement(m.group('x'), local_vars, allow_recursion - 1) if abort: raise JSInterpreterError('Premature left-side return of %s in %r' % (op, expr)) y, abort = self.interpret_statement(m.group('y'), local_vars, allow_recursion - 1) if abort: raise JSInterpreterError('Premature right-side return of %s in %r' % (op, expr)) return opfunc(x, y) m = re.match(r'^(?P%s)\((?P[a-zA-Z0-9_$,]+)\)$' % self._VARNAME_PATTERN, expr) if m: fname = m.group('func') argvals = tuple([int(v) if v.isdigit() else local_vars[v] for v in m.group('args').split(',')]) if fname not in self._functions: self._functions[fname] = self.extract_function(fname) return self._functions[fname](argvals) raise JSInterpreterError('Unsupported JS expression %r' % expr) def extract_object(self, objname): obj = {} obj_m = re.search(r'(?:var\s+)?%s\s*=\s*\{\s*(?P([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)\}\s*;' % re.escape(objname), self.code) fields = obj_m.group('fields') # Currently, it only supports function definitions fields_m = re.finditer(r'(?P[a-zA-Z$0-9]+)\s*:\s*function\((?P[a-z,]+)\){(?P[^}]+)}', fields) for f in fields_m: argnames = f.group('args').split(',') obj[f.group('key')] = self.build_function(argnames, f.group('code')) return obj def extract_function(self, function_name): func_m = re.search(r'(?x)(?:function\s+%s|[{;,]\s*%s\s*=\s*function|var\s+%s\s*=\s*function)\s*\((?P[^)]*)\)\s*\{(?P[^}]+)\}' % (re.escape(function_name), re.escape(function_name), re.escape(function_name)), self.code) if func_m is None: raise JSInterpreterError('Could not find JS function %r' % function_name) argnames = func_m.group('args').split(',') return self.build_function(argnames, func_m.group('code')) def call_function(self, function_name, *args): f = self.extract_function(function_name) return f(args) def build_function(self, argnames, code): def resf(argvals): local_vars = dict(zip(argnames, argvals)) for stmt in code.split(';'): res, abort = self.interpret_statement(stmt, local_vars) if abort: break return res return resf