[youtube] improve m3u8 format extraction
This commit is contained in:
parent
e109f1ff43
commit
89beedd31f
1 changed files with 15 additions and 32 deletions
|
@ -1253,21 +1253,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
video_id = mobj.group(2)
|
video_id = mobj.group(2)
|
||||||
return video_id
|
return video_id
|
||||||
|
|
||||||
def _extract_from_m3u8(self, manifest_url, video_id):
|
|
||||||
url_map = {}
|
|
||||||
|
|
||||||
def _get_urls(_manifest):
|
|
||||||
lines = _manifest.split('\n')
|
|
||||||
urls = filter(lambda l: l and not l.startswith('#'),
|
|
||||||
lines)
|
|
||||||
return urls
|
|
||||||
manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest')
|
|
||||||
formats_urls = _get_urls(manifest)
|
|
||||||
for format_url in formats_urls:
|
|
||||||
itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag')
|
|
||||||
url_map[itag] = format_url
|
|
||||||
return url_map
|
|
||||||
|
|
||||||
def _extract_annotations(self, video_id):
|
def _extract_annotations(self, video_id):
|
||||||
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id
|
||||||
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.')
|
||||||
|
@ -1573,19 +1558,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
if self._downloader.params.get('writeannotations', False):
|
if self._downloader.params.get('writeannotations', False):
|
||||||
video_annotations = self._extract_annotations(video_id)
|
video_annotations = self._extract_annotations(video_id)
|
||||||
|
|
||||||
def _map_to_format_list(urlmap):
|
|
||||||
formats = []
|
|
||||||
for itag, video_real_url in urlmap.items():
|
|
||||||
dct = {
|
|
||||||
'format_id': itag,
|
|
||||||
'url': video_real_url,
|
|
||||||
'player_url': player_url,
|
|
||||||
}
|
|
||||||
if itag in self._formats:
|
|
||||||
dct.update(self._formats[itag])
|
|
||||||
formats.append(dct)
|
|
||||||
return formats
|
|
||||||
|
|
||||||
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
||||||
self.report_rtmp_download()
|
self.report_rtmp_download()
|
||||||
formats = [{
|
formats = [{
|
||||||
|
@ -1718,11 +1690,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||||
formats.append(dct)
|
formats.append(dct)
|
||||||
elif video_info.get('hlsvp'):
|
elif video_info.get('hlsvp'):
|
||||||
manifest_url = video_info['hlsvp'][0]
|
manifest_url = video_info['hlsvp'][0]
|
||||||
url_map = self._extract_from_m3u8(manifest_url, video_id)
|
formats = []
|
||||||
formats = _map_to_format_list(url_map)
|
m3u8_formats = self._extract_m3u8_formats(
|
||||||
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
manifest_url, video_id, 'mp4', fatal=False)
|
||||||
for a_format in formats:
|
for a_format in m3u8_formats:
|
||||||
|
itag = self._search_regex(
|
||||||
|
r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
|
||||||
|
if itag:
|
||||||
|
a_format['format_id'] = itag
|
||||||
|
if itag in self._formats:
|
||||||
|
dct = self._formats[itag].copy()
|
||||||
|
dct.update(a_format)
|
||||||
|
a_format = dct
|
||||||
|
a_format['player_url'] = player_url
|
||||||
|
# Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
|
||||||
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
|
||||||
|
formats.append(a_format)
|
||||||
else:
|
else:
|
||||||
unavailable_message = self._html_search_regex(
|
unavailable_message = self._html_search_regex(
|
||||||
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
|
r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
|
||||||
|
|
Loading…
Reference in a new issue