[Yahoo/NBCSports] Generalize NBC sports info extractor
This commit is contained in:
parent
a28ccbabc6
commit
a2a4d5fa31
3 changed files with 37 additions and 5 deletions
|
@ -310,6 +310,7 @@ from .naver import NaverIE
|
||||||
from .nba import NBAIE
|
from .nba import NBAIE
|
||||||
from .nbc import (
|
from .nbc import (
|
||||||
NBCIE,
|
NBCIE,
|
||||||
|
NBCSportsVPlayerIE,
|
||||||
NBCSportsIE,
|
NBCSportsIE,
|
||||||
NBCNewsIE,
|
NBCNewsIE,
|
||||||
)
|
)
|
||||||
|
|
|
@ -50,7 +50,7 @@ class NBCIE(InfoExtractor):
|
||||||
return self.url_result(theplatform_url)
|
return self.url_result(theplatform_url)
|
||||||
|
|
||||||
|
|
||||||
class NBCSportsIE(InfoExtractor):
|
class NBCSportsVPlayerIE(InfoExtractor):
|
||||||
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
|
_VALID_URL = r'https?://vplayer\.nbcsports\.com/(?:[^/]+/)+(?P<id>[0-9a-zA-Z]+)'
|
||||||
|
|
||||||
_TEST = {
|
_TEST = {
|
||||||
|
@ -64,6 +64,13 @@ class NBCSportsIE(InfoExtractor):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _extract_url(webpage):
|
||||||
|
iframe_m = re.search(
|
||||||
|
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
||||||
|
if iframe_m:
|
||||||
|
return iframe_m.group('url')
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
video_id = self._match_id(url)
|
video_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, video_id)
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
@ -71,6 +78,28 @@ class NBCSportsIE(InfoExtractor):
|
||||||
return self.url_result(theplatform_url, 'ThePlatform')
|
return self.url_result(theplatform_url, 'ThePlatform')
|
||||||
|
|
||||||
|
|
||||||
|
class NBCSportsIE(InfoExtractor):
|
||||||
|
# Does not include https becuase its certificate is invalid
|
||||||
|
_VALID_URL = r'http://www\.nbcsports\.com//?(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
'url': 'http://www.nbcsports.com//college-basketball/ncaab/tom-izzo-michigan-st-has-so-much-respect-duke',
|
||||||
|
'md5': 'ba6c93f96b67bf05344f78bd523dac0f',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'PHJSaFWbrTY9',
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': 'Tom Izzo, Michigan St. has \'so much respect\' for Duke',
|
||||||
|
'description': 'md5:ecb459c9d59e0766ac9c7d5d0eda8113',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
return self.url_result(
|
||||||
|
NBCSportsVPlayerIE._extract_url(webpage), 'NBCSportsVPlayer')
|
||||||
|
|
||||||
|
|
||||||
class NBCNewsIE(InfoExtractor):
|
class NBCNewsIE(InfoExtractor):
|
||||||
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
_VALID_URL = r'''(?x)https?://(?:www\.)?nbcnews\.com/
|
||||||
(?:video/.+?/(?P<id>\d+)|
|
(?:video/.+?/(?P<id>\d+)|
|
||||||
|
|
|
@ -17,6 +17,8 @@ from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
from .nbc import NBCSportsVPlayerIE
|
||||||
|
|
||||||
|
|
||||||
class YahooIE(InfoExtractor):
|
class YahooIE(InfoExtractor):
|
||||||
IE_DESC = 'Yahoo screen and movies'
|
IE_DESC = 'Yahoo screen and movies'
|
||||||
|
@ -132,6 +134,7 @@ class YahooIE(InfoExtractor):
|
||||||
}, {
|
}, {
|
||||||
'note': 'NBC Sports embeds',
|
'note': 'NBC Sports embeds',
|
||||||
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
|
'url': 'http://sports.yahoo.com/blogs/ncaab-the-dagger/tyler-kalinoski-s-buzzer-beater-caps-davidson-s-comeback-win-185609842.html?guid=nbc_cbk_davidsonbuzzerbeater_150313',
|
||||||
|
'md5': 'ceae8dced5c14a1c1ffcb7a32194cca5',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'id': '9CsDKds0kvHI',
|
'id': '9CsDKds0kvHI',
|
||||||
'ext': 'flv',
|
'ext': 'flv',
|
||||||
|
@ -161,10 +164,9 @@ class YahooIE(InfoExtractor):
|
||||||
video_id = items[0]['id']
|
video_id = items[0]['id']
|
||||||
return self._get_info(video_id, display_id, webpage)
|
return self._get_info(video_id, display_id, webpage)
|
||||||
# Look for NBCSports iframes
|
# Look for NBCSports iframes
|
||||||
iframe_m = re.search(
|
nbc_sports_url = NBCSportsVPlayerIE._extract_url(webpage)
|
||||||
r'<iframe[^>]+src="(?P<url>https?://vplayer\.nbcsports\.com/[^"]+)"', webpage)
|
if nbc_sports_url:
|
||||||
if iframe_m:
|
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
|
||||||
return self.url_result(iframe_m.group('url'), 'NBCSports')
|
|
||||||
|
|
||||||
items_json = self._search_regex(
|
items_json = self._search_regex(
|
||||||
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
r'mediaItems: ({.*?})$', webpage, 'items', flags=re.MULTILINE,
|
||||||
|
|
Loading…
Reference in a new issue