[youtube] Improve extraction robustness

Fail on missing token only when no formats found
This commit is contained in:
Sergey M․ 2019-04-30 04:32:55 +07:00
parent 6e07b5a6d5
commit 026fbedc85
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D

View file

@ -27,6 +27,7 @@ from ..compat import (
) )
from ..utils import ( from ..utils import (
clean_html, clean_html,
dict_get,
error_to_compat_str, error_to_compat_str,
ExtractorError, ExtractorError,
float_or_none, float_or_none,
@ -1652,6 +1653,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def extract_view_count(v_info): def extract_view_count(v_info):
return int_or_none(try_get(v_info, lambda x: x['view_count'][0])) return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
def extract_token(v_info):
return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
player_response = {} player_response = {}
# Get video info # Get video info
@ -1741,7 +1745,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
view_count = extract_view_count(get_video_info) view_count = extract_view_count(get_video_info)
if not video_info: if not video_info:
video_info = get_video_info video_info = get_video_info
get_token = get_video_info.get('token') or get_video_info.get('account_playback_token') get_token = extract_token(get_video_info)
if get_token: if get_token:
# Different get_video_info requests may report different results, e.g. # Different get_video_info requests may report different results, e.g.
# some may report video unavailability, but some may serve it without # some may report video unavailability, but some may serve it without
@ -1752,7 +1756,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# due to YouTube measures against IP ranges of hosting providers. # due to YouTube measures against IP ranges of hosting providers.
# Working around by preferring the first succeeded video_info containing # Working around by preferring the first succeeded video_info containing
# the token if no such video_info yet was found. # the token if no such video_info yet was found.
token = video_info.get('token') or video_info.get('account_playback_token') token = extract_token(video_info)
if not token: if not token:
video_info = get_video_info video_info = get_video_info
break break
@ -1769,28 +1773,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
raise ExtractorError( raise ExtractorError(
'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id) 'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
token = video_info.get('token') or video_info.get('account_playback_token')
if not token:
if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']:
regions_allowed = self._html_search_meta(
'regionsAllowed', video_webpage, default=None)
countries = regions_allowed.split(',') if regions_allowed else None
self.raise_geo_restricted(
msg=video_info['reason'][0], countries=countries)
reason = video_info['reason'][0]
if 'Invalid parameters' in reason:
unavailable_message = extract_unavailable_message()
if unavailable_message:
reason = unavailable_message
raise ExtractorError(
'YouTube said: %s' % reason,
expected=True, video_id=video_id)
else:
raise ExtractorError(
'"token" parameter not in video info for unknown reason',
video_id=video_id)
if video_info.get('license_info'): if video_info.get('license_info'):
raise ExtractorError('This video is DRM protected.', expected=True) raise ExtractorError('This video is DRM protected.', expected=True)
@ -2296,6 +2278,29 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if f.get('vcodec') != 'none': if f.get('vcodec') != 'none':
f['stretched_ratio'] = ratio f['stretched_ratio'] = ratio
if not formats:
token = extract_token(video_info)
if not token:
if 'reason' in video_info:
if 'The uploader has not made this video available in your country.' in video_info['reason']:
regions_allowed = self._html_search_meta(
'regionsAllowed', video_webpage, default=None)
countries = regions_allowed.split(',') if regions_allowed else None
self.raise_geo_restricted(
msg=video_info['reason'][0], countries=countries)
reason = video_info['reason'][0]
if 'Invalid parameters' in reason:
unavailable_message = extract_unavailable_message()
if unavailable_message:
reason = unavailable_message
raise ExtractorError(
'YouTube said: %s' % reason,
expected=True, video_id=video_id)
else:
raise ExtractorError(
'"token" parameter not in video info for unknown reason',
video_id=video_id)
self._sort_formats(formats) self._sort_formats(formats)
self.mark_watched(video_id, video_info, player_response) self.mark_watched(video_id, video_info, player_response)