[vice] update tests and add support for ooyala embeds in article pages

This commit is contained in:
Remita Amine 2017-05-05 16:12:40 +01:00
parent 7ad53cb7ff
commit 1d9e0a4f40

View file

@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
'VICELAND', title, video_id, 'VICELAND', title, video_id,
watch_hub_data.get('video-rating')) watch_hub_data.get('video-rating'))
query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) query['tvetoken'] = self._extract_mvpd_auth(
url, video_id, 'VICELAND', resource)
# signature generation algorithm is reverse engineered from signatureGenerator in # signature generation algorithm is reverse engineered from signatureGenerator in
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in # webpack:///../shared/~/vice-player/dist/js/vice-player.js in
@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE):
try: try:
host = 'www.viceland' if is_locked else self._PREPLAY_HOST host = 'www.viceland' if is_locked else self._PREPLAY_HOST
preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query) preplay = self._download_json(
'https://%s.com/%s/preplay/%s' % (host, locale, video_id),
video_id, query=query)
except ExtractorError as e: except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400:
error = json.loads(e.cause.read().decode()) error = json.loads(e.cause.read().decode())
raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) raise ExtractorError('%s said: %s' % (
self.IE_NAME, error['details']), expected=True)
raise raise
video_data = preplay['video'] video_data = preplay['video']
@ -88,16 +92,17 @@ class ViceBaseIE(AdobePassIE):
class ViceIE(ViceBaseIE): class ViceIE(ViceBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P<locale>[^/]+)/(?:[^/]+/)?videos?/(?P<id>[^/?#&]+)' IE_NAME = 'vice'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P<locale>[^/]+)/)?videos?/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab',
'md5': 'e9d77741f9e42ba583e683cd170660f7', 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2',
'info_dict': { 'info_dict': {
'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj',
'ext': 'flv', 'ext': 'flv',
'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', 'title': 'Monkey Labs of Holland',
'duration': 725.983, 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149',
}, },
'add_ie': ['Ooyala'], 'add_ie': ['Ooyala'],
}, { }, {
@ -136,22 +141,13 @@ class ViceIE(ViceBaseIE):
}, },
'add_ie': ['UplynkPreplay'], 'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4',
'only_matching': True,
}, {
'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229',
'only_matching': True,
}, {
'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show',
'only_matching': True, 'only_matching': True,
}] }]
_PREPLAY_HOST = 'video.vice' _PREPLAY_HOST = 'video.vice'
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) locale, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
locale = mobj.group('locale')
video_id = self._match_id(url)
webpage, urlh = self._download_webpage_handle(url, video_id) webpage, urlh = self._download_webpage_handle(url, video_id)
embed_code = self._search_regex( embed_code = self._search_regex(
r'embedCode=([^&\'"]+)', webpage, r'embedCode=([^&\'"]+)', webpage,
@ -166,6 +162,7 @@ class ViceIE(ViceBaseIE):
class ViceShowIE(InfoExtractor): class ViceShowIE(InfoExtractor):
IE_NAME = 'vice:show'
_VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P<id>[^/?#&]+)'
_TEST = { _TEST = {
@ -192,12 +189,14 @@ class ViceShowIE(InfoExtractor):
r'<title>(.+?)</title>', webpage, 'title', default=None) r'<title>(.+?)</title>', webpage, 'title', default=None)
if title: if title:
title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip()
description = self._html_search_meta('description', webpage, 'description') description = self._html_search_meta(
'description', webpage, 'description')
return self.playlist_result(entries, show_id, title, description) return self.playlist_result(entries, show_id, title, description)
class ViceArticleIE(InfoExtractor): class ViceArticleIE(InfoExtractor):
IE_NAME = 'vice:article'
_VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)' _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P<id>[^?#]+)'
_TESTS = [{ _TESTS = [{
@ -216,8 +215,9 @@ class ViceArticleIE(InfoExtractor):
# AES-encrypted m3u8 # AES-encrypted m3u8
'skip_download': True, 'skip_download': True,
}, },
'add_ie': ['UplynkPreplay'],
}, { }, {
'url': 'http://www.vice.com/video/how-to-hack-a-car', 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car',
'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2',
'info_dict': { 'info_dict': {
'id': '3jstaBeXgAs', 'id': '3jstaBeXgAs',
@ -229,6 +229,12 @@ class ViceArticleIE(InfoExtractor):
'upload_date': '20140529', 'upload_date': '20140529',
}, },
'add_ie': ['Youtube'], 'add_ie': ['Youtube'],
}, {
'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1',
'only_matching': True,
}, {
'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -240,22 +246,29 @@ class ViceArticleIE(InfoExtractor):
r'window\.__PREFETCH_DATA\s*=\s*({.*});', r'window\.__PREFETCH_DATA\s*=\s*({.*});',
webpage, 'prefetch data'), display_id) webpage, 'prefetch data'), display_id)
body = prefetch_data['body'] body = prefetch_data['body']
youtube_url = self._html_search_regex(
r'<iframe[^>]+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None) def _url_res(video_url, ie_key):
if youtube_url:
return { return {
'_type': 'url_transparent', '_type': 'url_transparent',
'url': youtube_url, 'url': video_url,
'display_id': display_id, 'display_id': display_id,
'ie_key': 'Youtube', 'ie_key': ie_key,
} }
video_url = self._html_search_regex( embed_code = self._search_regex(
r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL') r'embedCode=([^&\'"]+)', body,
'ooyala embed code', default=None)
if embed_code:
return _url_res('ooyala:%s' % embed_code, 'Ooyala')
return { youtube_url = self._html_search_regex(
'_type': 'url_transparent', r'<iframe[^>]+src="(.*youtube\.com/.*)"',
'url': video_url, body, 'YouTube URL', default=None)
'display_id': display_id, if youtube_url:
'ie_key': ViceIE.ie_key(), return _url_res(youtube_url, 'Youtube')
}
video_url = self._html_search_regex(
r'data-video-url="([^"]+)"',
prefetch_data['embed_code'], 'video URL')
return _url_res(video_url, ViceIE.ie_key())