[nba] extract all video formats and extract more info

This commit is contained in:
remitamine 2015-10-02 17:24:30 +01:00
parent 59a9efe85b
commit 8fc226ef99
2 changed files with 74 additions and 33 deletions

View file

@ -351,7 +351,10 @@ from .myvideo import MyVideoIE
from .myvidster import MyVidsterIE from .myvidster import MyVidsterIE
from .nationalgeographic import NationalGeographicIE from .nationalgeographic import NationalGeographicIE
from .naver import NaverIE from .naver import NaverIE
from .nba import NBAIE from .nba import (
NBAIE,
NBAWatchIE,
)
from .nbc import ( from .nbc import (
NBCIE, NBCIE,
NBCNewsIE, NBCNewsIE,

View file

@ -2,62 +2,100 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
remove_end,
parse_duration, parse_duration,
parse_iso8601,
int_or_none,
) )
class NBAIE(InfoExtractor): class NBABaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:watch\.|www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$' def _get_formats(self, video_id):
base_url = 'http://nba.cdn.turner.com/nba/big%s' % video_id
return [{
'url': base_url + '_nba_android_high.mp4',
'width': 480,
'height': 320,
'format_id': '320p',
},{
'url': base_url + '_640x360_664b.mp4',
'width': 640,
'height': 360,
'format_id': '360p',
},{
'url': base_url + '_768x432_1404.mp4',
'width': 768,
'height': 432,
'format_id': '432p',
},{
'url': base_url + '_1280x720.mp4',
'width': 1280,
'height': 720,
'format_id': '720p',
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
ret = self._extract_metadata(webpage, video_id)
ret['id'] = video_id.rpartition('/')[2]
ret['formats'] = self._get_formats(video_id)
return ret
class NBAIE(NBABaseIE):
IE_NAME = 'nba'
_VALID_URL = r'https?://(?:www\.)?nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{ _TESTS = [{
'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html', 'url': 'http://www.nba.com/video/games/nets/2012/12/04/0021200253-okc-bkn-recap.nba/index.html',
'md5': 'c0edcfc37607344e2ff8f13c378c88a4', 'md5': '9d902940d2a127af3f7f9d2f3dc79c96',
'info_dict': { 'info_dict': {
'id': '0021200253-okc-bkn-recap.nba', 'id': '0021200253-okc-bkn-recap.nba',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Thunder vs. Nets', 'title': 'Thunder vs. Nets',
'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.', 'description': 'Kevin Durant scores 32 points and dishes out six assists as the Thunder beat the Nets in Brooklyn.',
'duration': 181, 'duration': 181,
'timestamp': 1354680189,
'upload_date': '20121205',
}, },
}, { }, {
'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/', 'url': 'http://www.nba.com/video/games/hornets/2014/12/05/0021400276-nyk-cha-play5.nba/',
'only_matching': True, 'only_matching': True,
}, { }]
def _extract_metadata(self, webpage, video_id):
return {
'title': self._html_search_meta('name', webpage),
'description': self._html_search_meta('description', webpage),
'duration': parse_duration(self._html_search_meta('duration', webpage)),
'thumbnail': self._html_search_meta('thumbnailUrl', webpage),
'timestamp': parse_iso8601(self._html_search_meta('uploadDate', webpage))
}
class NBAWatchIE(NBABaseIE):
IE_NAME = 'nba:watch'
_VALID_URL = r'https?://watch.nba\.com/(?:nba/)?video(?P<id>/[^?]*?)/?(?:/index\.html)?(?:\?.*)?$'
_TESTS = [{
'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba', 'url': 'http://watch.nba.com/nba/video/channels/playoffs/2015/05/20/0041400301-cle-atl-recap.nba',
'md5': 'b2b39b81cf28615ae0c3360a3f9668c4',
'info_dict': { 'info_dict': {
'id': '0041400301-cle-atl-recap.nba', 'id': '0041400301-cle-atl-recap.nba',
'ext': 'mp4', 'ext': 'mp4',
'title': 'NBA GAME TIME | Video: Hawks vs. Cavaliers Game 1', 'title': 'Hawks vs. Cavaliers Game 1',
'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d', 'description': 'md5:8094c3498d35a9bd6b1a8c396a071b4d',
'duration': 228, 'duration': 228,
}, 'timestamp': 1432094400,
'params': { 'upload_date': '20150520',
'skip_download': True,
} }
}] }]
def _real_extract(self, url): def _extract_metadata(self, webpage, video_id):
video_id = self._match_id(url) program_id = self._search_regex(r'var\s+programId\s*=\s*(\d+);', webpage, 'program id')
webpage = self._download_webpage(url, video_id) metadata = self._download_json(
'http://smbsolr.cdnak.neulion.com/solr_nbav6/nba/nba/mlt/?wt=json&fl=name,description,image,runtime,releaseDate&q=sequence%3A' + program_id, video_id)['match']['docs'][0]
video_url = 'http://ht-mobile.cdn.turner.com/nba/big' + video_id + '_nba_1280x720.mp4'
shortened_video_id = video_id.rpartition('/')[2]
title = remove_end(
self._og_search_title(webpage, default=shortened_video_id), ' : NBA.com')
description = self._og_search_description(webpage)
duration_str = self._html_search_meta(
'duration', webpage, 'duration', default=None)
if not duration_str:
duration_str = self._html_search_regex(
r'Duration:</b>\s*(\d+:\d+)', webpage, 'duration', fatal=False)
duration = parse_duration(duration_str)
return { return {
'id': shortened_video_id, 'title': metadata['name'],
'url': video_url, 'description': metadata.get('description'),
'title': title, 'duration': int_or_none(metadata.get('runtime')),
'description': description, 'thumbnail': metadata.get('image'),
'duration': duration, 'timestamp': parse_iso8601(metadata.get('releaseDate'))
} }