[foxnews] Add support for iframe embeds (closes , closes )

This commit is contained in:
Sergey M․ 2018-06-20 23:51:14 +07:00
parent c9b983ff82
commit f51f526b0a
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 41 additions and 7 deletions
youtube_dl/extractor

View file

@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE):
}, },
] ]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url')
for mobj in re.finditer(
r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
host, video_id = re.match(self._VALID_URL, url).groups() host, video_id = re.match(self._VALID_URL, url).groups()
@ -71,18 +79,35 @@ class FoxNewsArticleIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)' _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P<id>[a-z-]+)'
IE_NAME = 'foxnews:article' IE_NAME = 'foxnews:article'
_TEST = { _TESTS = [{
# data-video-id
'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html',
'md5': '62aa5a781b308fdee212ebb6f33ae7ef', 'md5': '83d44e1aff1433e7a29a7b537d1700b5',
'info_dict': { 'info_dict': {
'id': '5116295019001', 'id': '5116295019001',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Trump and Clinton asked to defend positions on Iraq War', 'title': 'Trump and Clinton asked to defend positions on Iraq War',
'description': 'Veterans react on \'The Kelly File\'', 'description': 'Veterans react on \'The Kelly File\'',
'timestamp': 1473299755, 'timestamp': 1473301045,
'upload_date': '20160908', 'upload_date': '20160908',
}, },
} }, {
# iframe embed
'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true',
'info_dict': {
'id': '5748266721001',
'ext': 'flv',
'title': 'Kyle Kashuv has a positive message for the Trump White House',
'description': 'Marjory Stoneman Douglas student disagrees with classmates.',
'thumbnail': r're:^https?://.*\.jpg$',
'duration': 229,
'timestamp': 1520594670,
'upload_date': '20180309',
},
'params': {
'skip_download': True,
},
}]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
@ -90,10 +115,13 @@ class FoxNewsArticleIE(InfoExtractor):
video_id = self._html_search_regex( video_id = self._html_search_regex(
r'data-video-id=([\'"])(?P<id>[^\'"]+)\1', r'data-video-id=([\'"])(?P<id>[^\'"]+)\1',
webpage, 'video ID', group='id') webpage, 'video ID', group='id', default=None)
if video_id:
return self.url_result(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key())
return self.url_result( return self.url_result(
'http://video.foxnews.com/v/' + video_id, FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())
FoxNewsIE.ie_key())
class FoxNewsInsiderIE(InfoExtractor): class FoxNewsInsiderIE(InfoExtractor):

View file

@ -111,6 +111,7 @@ from .cloudflarestream import CloudflareStreamIE
from .peertube import PeerTubeIE from .peertube import PeerTubeIE
from .indavideo import IndavideoEmbedIE from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -3091,6 +3092,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
apa_urls, video_id, video_title, ie=APAIE.ie_key()) apa_urls, video_id, video_title, ie=APAIE.ie_key())
foxnews_urls = FoxNewsIE._extract_urls(webpage)
if foxnews_urls:
return self.playlist_from_matches(
foxnews_urls, video_id, video_title, ie=FoxNewsIE.ie_key())
sharevideos_urls = [mobj.group('url') for mobj in re.finditer( sharevideos_urls = [mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1', r'<iframe[^>]+?\bsrc\s*=\s*(["\'])(?P<url>(?:https?:)?//embed\.share-videos\.se/auto/embed/\d+\?.*?\buid=\d+.*?)\1',
webpage)] webpage)]