astrXbian/.install/.kodi/addons/plugin.video.youtube/resources/lib/youtube_plugin/youtube/helper/subtitles.py

246 lines
9.7 KiB
Python
Raw Normal View History

2020-12-17 21:52:17 +01:00
# -*- coding: utf-8 -*-
"""
Copyright (C) 2017-2018 plugin.video.youtube
SPDX-License-Identifier: GPL-2.0-only
See LICENSES/GPL-2.0-only for more information.
"""
import xbmcvfs
import requests
from ...kodion.utils import make_dirs
from six import PY2
try:
from six.moves import html_parser
unescape = html_parser.HTMLParser().unescape
except AttributeError:
import html
unescape = html.unescape
class Subtitles(object):
LANG_NONE = 0
LANG_PROMPT = 1
LANG_CURR_FALLBACK = 2
LANG_CURR = 3
LANG_CURR_NO_ASR = 4
BASE_PATH = 'special://temp/plugin.video.youtube/'
SRT_FILE = ''.join([BASE_PATH, '%s.%s.srt'])
def __init__(self, context, video_id, captions):
self.context = context
self._verify = context.get_settings().verify_ssl()
self.video_id = video_id
self.language = context.get_settings().get_string('youtube.language', 'en_US').replace('_', '-')
self.headers = {'Host': 'www.youtube.com',
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
'Accept': '*/*',
'DNT': '1',
'Referer': 'https://www.youtube.com/tv',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.8,de;q=0.6'}
self.caption_track = {}
self.renderer = captions.get('playerCaptionsTracklistRenderer', {})
self.caption_tracks = self.renderer.get('captionTracks', [])
self.translation_langs = self.renderer.get('translationLanguages', [])
default_audio = self.renderer.get('defaultAudioTrackIndex')
if default_audio is not None:
audio_tracks = self.renderer.get('audioTracks', [])
try:
audio_track = audio_tracks[default_audio]
except:
audio_track = None
if audio_track:
default_caption = audio_track.get('defaultCaptionTrackIndex')
if default_caption is None:
default_caption = audio_track.get('captionTrackIndices')
if (default_caption is not None) and (isinstance(default_caption, list)):
default_caption = default_caption[0]
if default_caption is not None:
try:
self.caption_track = self.caption_tracks[default_caption]
except:
pass
ui = self.context.get_ui()
self.prompt_override = ui.get_home_window_property('prompt_for_subtitles') == video_id
ui.clear_home_window_property('prompt_for_subtitles')
def srt_filename(self, sub_language):
return self.SRT_FILE % (self.video_id, sub_language)
def _write_file(self, _file, contents):
if not make_dirs(self.BASE_PATH):
self.context.log_debug('Failed to create directories: %s' % self.BASE_PATH)
return False
self.context.log_debug('Writing subtitle file: %s' % _file)
try:
f = xbmcvfs.File(_file, 'w')
f.write(contents)
f.close()
return True
except:
self.context.log_debug('File write failed for: %s' % _file)
return False
def _unescape(self, text):
try:
text = text.decode('utf8', 'ignore')
except:
self.context.log_debug('Subtitle unescape: failed to decode utf-8')
try:
text = unescape(text)
except:
self.context.log_debug('Subtitle unescape: failed to unescape text')
return text
def get_subtitles(self):
if self.prompt_override:
languages = self.LANG_PROMPT
else:
languages = self.context.get_settings().subtitle_languages()
self.context.log_debug('Subtitle get_subtitles: for setting |%s|' % str(languages))
if languages == self.LANG_NONE:
return []
elif languages == self.LANG_CURR:
list_of_subs = []
list_of_subs.extend(self._get(self.language))
list_of_subs.extend(self._get(language=self.language.split('-')[0]))
return list(set(list_of_subs))
elif languages == self.LANG_CURR_NO_ASR:
list_of_subs = []
list_of_subs.extend(self._get(self.language, no_asr=True))
list_of_subs.extend(self._get(language=self.language.split('-')[0], no_asr=True))
return list(set(list_of_subs))
elif languages == self.LANG_PROMPT:
return self._prompt()
elif languages == self.LANG_CURR_FALLBACK:
list_of_subs = []
list_of_subs.extend(self._get(language=self.language))
list_of_subs.extend(self._get(language=self.language.split('-')[0]))
list_of_subs.extend(self._get('en'))
list_of_subs.extend(self._get('en-US'))
list_of_subs.extend(self._get('en-GB'))
return list(set(list_of_subs))
else:
self.context.log_debug('Unknown language_enum: %s for subtitles' % str(languages))
def _get_all(self):
list_of_subs = []
for language in self.translation_langs:
list_of_subs.extend(self._get(language=language.get('languageCode')))
return list(set(list_of_subs))
def _prompt(self):
tracks = [(track.get('languageCode'), self._get_language_name(track)) for track in self.caption_tracks]
translations = [(track.get('languageCode'), self._get_language_name(track)) for track in self.translation_langs]
languages = tracks + translations
if languages:
choice = self.context.get_ui().on_select(self.context.localize(30560), [language_name for language, language_name in languages])
if choice != -1:
return self._get(language=languages[choice][0], language_name=languages[choice][1])
else:
self.context.log_debug('Subtitle selection cancelled')
return []
else:
self.context.log_debug('No subtitles found for prompt')
return []
def _get(self, language='en', language_name=None, no_asr=False):
fname = self.srt_filename(language)
if xbmcvfs.exists(fname):
self.context.log_debug('Subtitle exists for: %s, filename: %s' % (language, fname))
return [fname]
caption_track = None
asr_track = None
has_translation = False
for track in self.caption_tracks:
if language == track.get('languageCode'):
if language_name is not None:
if language_name == self._get_language_name(track):
caption_track = track
break
else:
if no_asr and (track.get('kind') == 'asr'):
continue
elif track.get('kind') == 'asr':
asr_track = track
else:
caption_track = track
if (caption_track is None) and (asr_track is not None):
caption_track = asr_track
for lang in self.translation_langs:
if language == lang.get('languageCode'):
has_translation = True
break
if (self.caption_track.get('languageCode') != language) and (not has_translation) and (caption_track is None):
self.context.log_debug('No subtitles found for: %s' % language)
return []
subtitle_url = None
if (caption_track is None) and has_translation:
base_url = self.caption_track.get('baseUrl')
if base_url:
subtitle_url = ''.join([base_url, '&fmt=vtt&type=track&tlang=', language])
elif caption_track is not None:
base_url = caption_track.get('baseUrl')
if base_url:
subtitle_url = ''.join([base_url, '&fmt=vtt&type=track'])
if subtitle_url:
self.context.log_debug('Subtitle url: %s' % subtitle_url)
if not self.context.get_settings().subtitle_download():
return [subtitle_url]
else:
result_auto = requests.get(subtitle_url, headers=self.headers,
verify=self._verify, allow_redirects=True)
if result_auto.text:
self.context.log_debug('Subtitle found for: %s' % language)
self._write_file(fname, bytearray(self._unescape(result_auto.text), encoding='utf8', errors='ignore'))
return [fname]
else:
self.context.log_debug('Failed to retrieve subtitles for: %s' % language)
return []
else:
self.context.log_debug('No subtitles found for: %s' % language)
return []
def _get_language_name(self, track):
key = 'languageName' if 'languageName' in track else 'name'
lang_name = track.get(key, {}).get('simpleText')
if not lang_name:
track_name = track.get(key, {}).get('runs', [{}])
if isinstance(track_name, list) and len(track_name) >= 1:
lang_name = track_name[0].get('text')
if lang_name:
return self._decode_language_name(lang_name)
return None
@staticmethod
def _decode_language_name(language_name):
language_name = language_name.encode('raw_unicode_escape')
if PY2:
language_name = language_name.decode('utf-8')
else:
language_name = language_name.decode('raw_unicode_escape')
return language_name