[francetv] Add an extractor for francetvinfo.fr (closes #1317)

It uses the same system as Pluzz, create a base class for both extractors.
This commit is contained in:
Jaime Marquínez Ferrándiz 2013-09-10 15:50:34 +02:00
parent df3e61003a
commit 648d25d43d
2 changed files with 48 additions and 18 deletions

View file

@ -29,7 +29,10 @@ from .escapist import EscapistIE
from .exfm import ExfmIE from .exfm import ExfmIE
from .facebook import FacebookIE from .facebook import FacebookIE
from .flickr import FlickrIE from .flickr import FlickrIE
from .francetv import PluzzIE from .francetv import (
PluzzIE,
FranceTvInfoIE,
)
from .freesound import FreesoundIE from .freesound import FreesoundIE
from .funnyordie import FunnyOrDieIE from .funnyordie import FunnyOrDieIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE

View file

@ -8,7 +8,29 @@ from ..utils import (
) )
class PluzzIE(InfoExtractor): class FranceTVBaseInfoExtractor(InfoExtractor):
def _extract_video(self, video_id):
xml_desc = self._download_webpage(
'http://www.francetvinfo.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, video_id, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8')
video_url = video_url.replace('/z/', '/i/')
thumbnail_path = info.find('image').text
return {'id': video_id,
'ext': 'mp4',
'url': video_url,
'title': info.find('titre').text,
'thumbnail': compat_urlparse.urljoin('http://pluzz.francetv.fr', thumbnail_path),
'description': info.find('synopsis').text,
}
class PluzzIE(FranceTVBaseInfoExtractor):
IE_NAME = u'pluzz.francetv.fr' IE_NAME = u'pluzz.francetv.fr'
_VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html' _VALID_URL = r'https?://pluzz\.francetv\.fr/videos/(.*?)\.html'
@ -29,22 +51,27 @@ class PluzzIE(InfoExtractor):
webpage = self._download_webpage(url, title) webpage = self._download_webpage(url, title)
video_id = self._search_regex( video_id = self._search_regex(
r'data-diffusion="(\d+)"', webpage, 'ID') r'data-diffusion="(\d+)"', webpage, 'ID')
return self._extract_video(video_id)
xml_desc = self._download_webpage(
'http://www.pluzz.fr/appftv/webservices/video/'
'getInfosOeuvre.php?id-diffusion='
+ video_id, title, 'Downloading XML config')
info = xml.etree.ElementTree.fromstring(xml_desc.encode('utf-8'))
manifest_url = info.find('videos/video/url').text class FranceTvInfoIE(FranceTVBaseInfoExtractor):
video_url = manifest_url.replace('manifest.f4m', 'index_2_av.m3u8') IE_NAME = u'francetvinfo.fr'
video_url = video_url.replace('/z/', '/i/') _VALID_URL = r'https?://www\.francetvinfo\.fr/replay.*/(?P<title>.+).html'
thumbnail_path = info.find('image').text
return {'id': video_id, _TEST = {
'ext': 'mp4', u'url': u'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
'url': video_url, u'file': u'84981923.mp4',
'title': info.find('titre').text, u'info_dict': {
'thumbnail': compat_urlparse.urljoin(url, thumbnail_path), u'title': u'Soir 3',
'description': info.find('synopsis').text, },
u'params': {
u'skip_download': True,
},
} }
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
page_title = mobj.group('title')
webpage = self._download_webpage(url, page_title)
video_id = self._search_regex(r'id-video=(\d+?)"', webpage, u'video id')
return self._extract_video(video_id)