[cbsnews] add support for live videos(fixes )

This commit is contained in:
remitamine 2016-02-02 23:02:18 +01:00
parent 87de7069b9
commit fd3a1f3d60
2 changed files with 48 additions and 9 deletions
youtube_dl/extractor

View file

@ -90,7 +90,10 @@ from .canalplus import CanalplusIE
from .canalc2 import Canalc2IE from .canalc2 import Canalc2IE
from .canvas import CanvasIE from .canvas import CanvasIE
from .cbs import CBSIE from .cbs import CBSIE
from .cbsnews import CBSNewsIE from .cbsnews import (
CBSNewsIE,
CBSNewsLiveVideoIE,
)
from .cbssports import CBSSportsIE from .cbssports import CBSSportsIE
from .ccc import CCCIE from .ccc import CCCIE
from .ceskatelevize import CeskaTelevizeIE from .ceskatelevize import CeskaTelevizeIE

View file

@ -1,15 +1,14 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re from .common import InfoExtractor
import json
from .theplatform import ThePlatformIE from .theplatform import ThePlatformIE
from ..utils import parse_duration
class CBSNewsIE(ThePlatformIE): class CBSNewsIE(ThePlatformIE):
IE_DESC = 'CBS News' IE_DESC = 'CBS News'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:[^/]+/)+(?P<id>[\da-z_-]+)' _VALID_URL = r'http://(?:www\.)?cbsnews\.com/(?:news|videos)/(?P<id>[\da-z_-]+)'
_TESTS = [ _TESTS = [
{ {
@ -48,14 +47,13 @@ class CBSNewsIE(ThePlatformIE):
] ]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
video_info = json.loads(self._html_search_regex( video_info = self._parse_json(self._html_search_regex(
r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'', r'(?:<ul class="media-list items" id="media-related-items"><li data-video-info|<div id="cbsNewsVideoPlayer" data-video-player-options)=\'({.+?})\'',
webpage, 'video JSON info')) webpage, 'video JSON info'), video_id)
item = video_info['item'] if 'item' in video_info else video_info item = video_info['item'] if 'item' in video_info else video_info
title = item.get('articleTitle') or item.get('hed') title = item.get('articleTitle') or item.get('hed')
@ -88,3 +86,41 @@ class CBSNewsIE(ThePlatformIE):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
} }
class CBSNewsLiveVideoIE(InfoExtractor):
IE_DESC = 'CBS News Live Videos'
_VALID_URL = r'http://(?:www\.)?cbsnews\.com/live/video/(?P<id>[\da-z_-]+)'
_TEST = {
'url': 'http://www.cbsnews.com/live/video/clinton-sanders-prepare-to-face-off-in-nh/',
'info_dict': {
'id': 'clinton-sanders-prepare-to-face-off-in-nh',
'ext': 'flv',
'title': 'Clinton, Sanders Prepare To Face Off In NH',
'duration': 334,
},
}
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
video_info = self._parse_json(self._html_search_regex(
r'data-story-obj=\'({.+?})\'', webpage, 'video JSON info'), video_id)['story']
hdcore_sign = 'hdcore=3.3.1'
f4m_formats = self._extract_f4m_formats(video_info['url'] + '&' + hdcore_sign, video_id)
if f4m_formats:
for entry in f4m_formats:
# URLs without the extra param induce an 404 error
entry.update({'extra_param_to_segment_url': hdcore_sign})
return {
'id': video_id,
'title': video_info['headline'],
'thumbnail': video_info.get('thumbnail_url_hd') or video_info.get('thumbnail_url_sd'),
'duration': parse_duration(video_info.get('segmentDur')),
'formats': f4m_formats,
}