[mediaset] Improve embed support (closes )

This commit is contained in:
Sergey M․ 2018-09-26 05:38:41 +07:00
parent 60ce0c67fd
commit 8fd12a0831
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 33 additions and 7 deletions
youtube_dl/extractor

View file

@ -3023,7 +3023,7 @@ class GenericIE(InfoExtractor):
wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key()) wapo_urls, video_id, video_title, ie=WashingtonPostIE.ie_key())
# Look for Mediaset embeds # Look for Mediaset embeds
mediaset_urls = MediasetIE._extract_urls(webpage) mediaset_urls = MediasetIE._extract_urls(self, webpage)
if mediaset_urls: if mediaset_urls:
return self.playlist_from_matches( return self.playlist_from_matches(
mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key()) mediaset_urls, video_id, video_title, ie=MediasetIE.ie_key())

View file

@ -4,6 +4,11 @@ from __future__ import unicode_literals
import re import re
from .theplatform import ThePlatformBaseIE from .theplatform import ThePlatformBaseIE
from ..compat import (
compat_parse_qs,
compat_str,
compat_urllib_parse_urlparse,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
@ -76,12 +81,33 @@ class MediasetIE(ThePlatformBaseIE):
}] }]
@staticmethod @staticmethod
def _extract_urls(webpage): def _extract_urls(ie, webpage):
return [ def _qs(url):
mobj.group('url') return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>https?://(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml\?.*?\bid=\d+.*?)\1', def _program_guid(qs):
webpage)] return qs.get('programGuid', [None])[0]
entries = []
for mobj in re.finditer(
r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?video\.mediaset\.it/player/playerIFrame(?:Twitter)?\.shtml.*?)\1',
webpage):
embed_url = mobj.group('url')
embed_qs = _qs(embed_url)
program_guid = _program_guid(embed_qs)
if program_guid:
entries.append(embed_url)
continue
video_id = embed_qs.get('id', [None])[0]
if not video_id:
continue
urlh = ie._request_webpage(
embed_url, video_id, note='Following embed URL redirect')
embed_url = compat_str(urlh.geturl())
program_guid = _program_guid(_qs(embed_url))
if program_guid:
entries.append(embed_url)
return entries
def _real_extract(self, url): def _real_extract(self, url):
guid = self._match_id(url) guid = self._match_id(url)