# -*- coding: utf-8 -*- """ Copyright (C) 2014-2016 bromix (plugin.video.youtube) Copyright (C) 2016-2018 plugin.video.youtube SPDX-License-Identifier: GPL-2.0-only See LICENSES/GPL-2.0-only for more information. """ from six.moves import range import re import requests from ....kodion.utils import FunctionCache from .json_script_engine import JsonScriptEngine class Cipher(object): def __init__(self, context, javascript_url): self._context = context self._verify = context.get_settings().verify_ssl() self._javascript_url = javascript_url self._object_cache = {} def get_signature(self, signature): function_cache = self._context.get_function_cache() json_script = function_cache.get_cached_only(self._load_json_script, self._javascript_url) if not json_script: json_script = function_cache.get(FunctionCache.ONE_DAY, self._load_json_script, self._javascript_url) if json_script: json_script_engine = JsonScriptEngine(json_script) return json_script_engine.execute(signature) return u'' def _load_json_script(self, javascript_url): headers = {'Connection': 'keep-alive', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.36 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'DNT': '1', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'en-US,en;q=0.8,de;q=0.6'} url = javascript_url if not url.startswith('http'): url = ''.join(['http://', url]) result = requests.get(url, headers=headers, verify=self._verify, allow_redirects=True) javascript = result.text return self._load_javascript(javascript) def _load_javascript(self, javascript): function_name = self._find_signature_function_name(javascript) if not function_name: raise Exception('Signature function not found') _function = self._find_function_body(function_name, javascript) function_parameter = _function[0].replace('\n', '').split(',') function_body = _function[1].replace('\n', '').split(';') json_script = {'actions': []} for line in function_body: # list of characters split_match = re.match(r'%s\s?=\s?%s.split\(""\)' % (function_parameter[0], function_parameter[0]), line) if split_match: json_script['actions'].append({'func': 'list', 'params': ['%SIG%']}) # return return_match = re.match(r'return\s+%s.join\(""\)' % function_parameter[0], line) if return_match: json_script['actions'].append({'func': 'join', 'params': ['%SIG%']}) # real object functions cipher_match = re.match( r'(?P[$a-zA-Z0-9]+)\.?\[?"?(?P[$a-zA-Z0-9]+)"?\]?\((?P[^)]+)\)', line) if cipher_match: object_name = cipher_match.group('object_name') function_name = cipher_match.group('function_name') parameter = cipher_match.group('parameter').split(',') for i in range(len(parameter)): param = parameter[i].strip() if i == 0: param = '%SIG%' else: param = int(param) parameter[i] = param # get function from object _function = self._get_object_function(object_name, function_name, javascript) # try to find known functions and convert them to our json_script slice_match = re.match(r'[a-zA-Z]+.slice\((?P\d+),[a-zA-Z]+\)', _function['body'][0]) if slice_match: a = int(slice_match.group('a')) params = ['%SIG%', a, parameter[1]] json_script['actions'].append({'func': 'slice', 'params': params}) splice_match = re.match(r'[a-zA-Z]+.splice\((?P\d+),[a-zA-Z]+\)', _function['body'][0]) if splice_match: a = int(splice_match.group('a')) params = ['%SIG%', a, parameter[1]] json_script['actions'].append({'func': 'splice', 'params': params}) swap_match = re.match(r'var\s?[a-zA-Z]+=\s?[a-zA-Z]+\[0\]', _function['body'][0]) if swap_match: params = ['%SIG%', parameter[1]] json_script['actions'].append({'func': 'swap', 'params': params}) reverse_match = re.match(r'[a-zA-Z].reverse\(\)', _function['body'][0]) if reverse_match: params = ['%SIG%'] json_script['actions'].append({'func': 'reverse', 'params': params}) return json_script @staticmethod def _find_signature_function_name(javascript): # match_patterns source is youtube-dl # https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/extractor/youtube.py#L1344 # LICENSE: The Unlicense match_patterns = [ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'(?:\b|[^a-zA-Z0-9$])(?P[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(?P[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)', r'(["\'])signature\1\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\.sig\|\|(?P[a-zA-Z0-9$]+)\(', r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*' r'(?P[a-zA-Z0-9$]+)\(', r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(', r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P[a-zA-Z0-9$]+)\(' ] for pattern in match_patterns: match = re.search(pattern, javascript) if match: return match.group('name') return '' @staticmethod def _find_function_body(function_name, javascript): # normalize function name function_name = function_name.replace('$', '\\$') match = re.search(r'\s?%s=function\((?P[^)]+)\)\s?{\s?(?P[^}]+)\s?\}' % function_name, javascript) if match: return match.group('parameter'), match.group('body') return '', '' @staticmethod def _find_object_body(object_name, javascript): object_name = object_name.replace('$', '\\$') match = re.search(r'var %s={(?P.*?})};' % object_name, javascript, re.S) if match: return match.group('object_body') return '' def _get_object_function(self, object_name, function_name, javascript): if object_name not in self._object_cache: self._object_cache[object_name] = {} else: if function_name in self._object_cache[object_name]: return self._object_cache[object_name][function_name] _object_body = self._find_object_body(object_name, javascript) _object_body = _object_body.split('},') for _function in _object_body: if not _function.endswith('}'): _function = ''.join([_function, '}']) _function = _function.strip() match = re.match(r'(?P[^:]*):function\((?P[^)]*)\){(?P[^}]+)}', _function) if match: name = match.group('name').replace('"', '') parameter = match.group('parameter') body = match.group('body').split(';') self._object_cache[object_name][name] = {'name': name, 'body': body, 'params': parameter} return self._object_cache[object_name][function_name]