[generic] Add support for expressen embeds

This commit is contained in:
Sergey M․ 2018-08-14 22:51:44 +07:00
parent 24e0cd709f
commit 57c68ec4c3
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 28 additions and 1 deletions

View file

@ -1,6 +1,8 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
@ -11,7 +13,13 @@ from ..utils import (
class ExpressenIE(InfoExtractor): class ExpressenIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?expressen\.se/tv/(?:[^/]+/)*(?P<id>[^/?#&]+)' _VALID_URL = r'''(?x)
https?://
(?:www\.)?expressen\.se/
(?:(?:tvspelare/video|videoplayer/embed)/)?
tv/(?:[^/]+/)*
(?P<id>[^/?#&]+)
'''
_TESTS = [{ _TESTS = [{
'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/', 'url': 'https://www.expressen.se/tv/ledare/ledarsnack/ledarsnack-om-arbetslosheten-bland-kvinnor-i-speciellt-utsatta-omraden/',
'md5': '2fbbe3ca14392a6b1b36941858d33a45', 'md5': '2fbbe3ca14392a6b1b36941858d33a45',
@ -28,8 +36,21 @@ class ExpressenIE(InfoExtractor):
}, { }, {
'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/', 'url': 'https://www.expressen.se/tv/kultur/kulturdebatt-med-expressens-karin-olsson/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.expressen.se/tvspelare/video/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}, {
'url': 'https://www.expressen.se/videoplayer/embed/tv/ditv/ekonomistudion/experterna-har-ar-fragorna-som-avgor-valet/?embed=true&external=true&autoplay=true&startVolume=0&partnerId=di',
'only_matching': True,
}] }]
@staticmethod
def _extract_urls(webpage):
return [
mobj.group('url') for mobj in re.finditer(
r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?expressen\.se/(?:tvspelare/video|videoplayer/embed)/tv/.+?)\1',
webpage)]
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)

View file

@ -114,6 +114,7 @@ from .indavideo import IndavideoEmbedIE
from .apa import APAIE from .apa import APAIE
from .foxnews import FoxNewsIE from .foxnews import FoxNewsIE
from .viqeo import ViqeoIE from .viqeo import ViqeoIE
from .expressen import ExpressenIE
class GenericIE(InfoExtractor): class GenericIE(InfoExtractor):
@ -3109,6 +3110,11 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches( return self.playlist_from_matches(
viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key()) viqeo_urls, video_id, video_title, ie=ViqeoIE.ie_key())
expressen_urls = ExpressenIE._extract_urls(webpage)
if expressen_urls:
return self.playlist_from_matches(
expressen_urls, video_id, video_title, ie=ExpressenIE.ie_key())
# Look for HTML5 media # Look for HTML5 media
entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls') entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
if entries: if entries: