[youtube] Add alternative automatic captions extraction approach (Closes #8667)
This commit is contained in:
parent
efbd6fb8bb
commit
b78b292f0c
1 changed files with 55 additions and 28 deletions
|
@ -975,10 +975,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
return {}
|
||||
try:
|
||||
args = player_config['args']
|
||||
caption_url = args['ttsurl']
|
||||
if not caption_url:
|
||||
self._downloader.report_warning(err_msg)
|
||||
return {}
|
||||
caption_url = args.get('ttsurl')
|
||||
if caption_url:
|
||||
timestamp = args['timestamp']
|
||||
# We get the available subtitles
|
||||
list_params = compat_urllib_parse.urlencode({
|
||||
|
@ -1013,6 +1011,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||
})
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
|
||||
# Some videos don't provide ttsurl but rather caption_tracks and
|
||||
# caption_translation_languages (e.g. 20LmZk1hakA)
|
||||
caption_tracks = args['caption_tracks']
|
||||
caption_translation_languages = args['caption_translation_languages']
|
||||
caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
|
||||
parsed_caption_url = compat_urlparse.urlparse(caption_url)
|
||||
caption_qs = compat_parse_qs(parsed_caption_url.query)
|
||||
|
||||
sub_lang_list = {}
|
||||
for lang in caption_translation_languages.split(','):
|
||||
lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
|
||||
sub_lang = lang_qs.get('lc', [None])[0]
|
||||
if not sub_lang:
|
||||
continue
|
||||
sub_formats = []
|
||||
for ext in self._SUBTITLE_FORMATS:
|
||||
caption_qs.update({
|
||||
'tlang': [sub_lang],
|
||||
'fmt': [ext],
|
||||
})
|
||||
sub_url = compat_urlparse.urlunparse(parsed_caption_url._replace(
|
||||
query=compat_urllib_parse.urlencode(caption_qs, True)))
|
||||
sub_formats.append({
|
||||
'url': sub_url,
|
||||
'ext': ext,
|
||||
})
|
||||
sub_lang_list[sub_lang] = sub_formats
|
||||
return sub_lang_list
|
||||
# An extractor error can be raise by the download process if there are
|
||||
# no automatic captions but there are subtitles
|
||||
except (KeyError, ExtractorError):
|
||||
|
|
Loading…
Reference in a new issue