youtube-dl/youtube_dl/extractor/radiocanada.py

# coding: utf-8
from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import (
    determine_ext,
    ExtractorError,
    int_or_none,
    unified_strdate,
)


class RadioCanadaIE(InfoExtractor):
    IE_NAME = 'radiocanada'
    _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'
    _TESTS = [
        {
            'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',
            'info_dict': {
                'id': '7184272',
                'ext': 'mp4',
                'title': 'Le parcours du tireur capté sur vidéo',
                'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',
                'upload_date': '20141023',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            }
        },
        {
            # empty Title
            'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',
            'info_dict': {
                'id': '7754998',
                'ext': 'mp4',
                'title': 'letelejournal22h',
                'description': 'INTEGRALE WEB 22H-TJ',
                'upload_date': '20170720',
            },
            'params': {
                # m3u8 download
                'skip_download': True,
            },
        },
        {
            # with protectionType but not actually DRM protected
            'url': 'radiocanada:toutv:140872',
            'info_dict': {
                'id': '140872',
                'title': 'Épisode 1',
                'series': 'District 31',
            },
            'only_matching': True,
        }
    ]
    _GEO_COUNTRIES = ['CA']
    _access_token = None
    _claims = None

    def _call_api(self, path, video_id=None, app_code=None, query=None):
        if not query:
            query = {}
        query.update({
            'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',
            'output': 'json',
        })
        if video_id:
            query.update({
                'appCode': app_code,
                'idMedia': video_id,
            })
        if self._access_token:
            query['access_token'] = self._access_token
        try:
            return self._download_json(
                'https://services.radio-canada.ca/media/' + path, video_id, query=query)
        except ExtractorError as e:
            if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):
                data = self._parse_json(e.cause.read().decode(), None)
                error = data.get('error_description') or data['errorMessage']['text']
                raise ExtractorError(error, expected=True)
            raise

    def _extract_info(self, app_code, video_id):
        metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']

        def get_meta(name):
            for meta in metas:
                if meta.get('name') == name:
                    text = meta.get('text')
                    if text:
                        return text

        # protectionType does not necessarily mean the video is DRM protected (see
        # https://github.com/rg3/youtube-dl/pull/18609).
        if get_meta('protectionType'):
            self.report_warning('This video is probably DRM protected.')

        query = {
            'connectionType': 'hd',
            'deviceType': 'ipad',
            'multibitrate': 'true',
        }
        if self._claims:
            query['claims'] = self._claims
        v_data = self._call_api('validation/v2/', video_id, app_code, query)
        v_url = v_data.get('url')
        if not v_url:
            error = v_data['message']
            if error == "Le contenu sélectionné n'est pas disponible dans votre pays":
                raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)
            if error == 'Le contenu sélectionné est disponible seulement en premium':
                self.raise_login_required(error)
            raise ExtractorError(
                '%s said: %s' % (self.IE_NAME, error), expected=True)
        formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')
        self._sort_formats(formats)

        subtitles = {}
        closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')
        if closed_caption_url:
            subtitles['fr'] = [{
                'url': closed_caption_url,
                'ext': determine_ext(closed_caption_url, 'vtt'),
            }]

        return {
            'id': video_id,
            'title': get_meta('Title') or get_meta('AV-nomEmission'),
            'description': get_meta('Description') or get_meta('ShortDescription'),
            'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),
            'duration': int_or_none(get_meta('length')),
            'series': get_meta('Emission'),
            'season_number': int_or_none('SrcSaison'),
            'episode_number': int_or_none('SrcEpisode'),
            'upload_date': unified_strdate(get_meta('Date')),
            'subtitles': subtitles,
            'formats': formats,
        }

    def _real_extract(self, url):
        return self._extract_info(*re.match(self._VALID_URL, url).groups())


class RadioCanadaAudioVideoIE(InfoExtractor):
    'radiocanada:audiovideo'
    _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',
        'info_dict': {
            'id': '7527184',
            'ext': 'mp4',
            'title': 'Barack Obama au Vietnam',
            'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',
            'upload_date': '20160523',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        return self.url_result('radiocanada:medianet:%s' % self._match_id(url))
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`# coding: utf-8`
			`from __future__ import unicode_literals`

			`import re`

			`from .common import InfoExtractor`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`from ..compat import compat_HTTPError`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`from ..utils import (`
			`determine_ext,`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`ExtractorError,`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`int_or_none,`
			`unified_strdate,`
			`)`


			`class RadioCanadaIE(InfoExtractor):`
			`IE_NAME = 'radiocanada'`
			`_VALID_URL = r'(?:radiocanada:\|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)'`
[radiocanada] Add fallback for title extraction 2017-09-08 14:53:24 +00:00			`_TESTS = [`
			`{`
			`'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272',`
			`'info_dict': {`
			`'id': '7184272',`
			`'ext': 'mp4',`
			`'title': 'Le parcours du tireur capté sur vidéo',`
			`'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa',`
			`'upload_date': '20141023',`
			`},`
			`'params': {`
			`# m3u8 download`
			`'skip_download': True,`
			`}`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`},`
[radiocanada] Add fallback for title extraction 2017-09-08 14:53:24 +00:00			`{`
			`# empty Title`
			`'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/',`
			`'info_dict': {`
			`'id': '7754998',`
			`'ext': 'mp4',`
			`'title': 'letelejournal22h',`
			`'description': 'INTEGRALE WEB 22H-TJ',`
			`'upload_date': '20170720',`
			`},`
			`'params': {`
			`# m3u8 download`
			`'skip_download': True,`
			`},`
[radiocanada] Relax DRM check 2019-01-20 11:33:09 +00:00			`},`
			`{`
			`# with protectionType but not actually DRM protected`
			`'url': 'radiocanada:toutv:140872',`
			`'info_dict': {`
			`'id': '140872',`
			`'title': 'Épisode 1',`
			`'series': 'District 31',`
			`},`
			`'only_matching': True,`
[radiocanada] Add fallback for title extraction 2017-09-08 14:53:24 +00:00			`}`
			`]`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`_GEO_COUNTRIES = ['CA']`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`_access_token = None`
			`_claims = None`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`def _call_api(self, path, video_id=None, app_code=None, query=None):`
			`if not query:`
			`query = {}`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`query.update({`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb',`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`'output': 'json',`
			`})`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`if video_id:`
			`query.update({`
			`'appCode': app_code,`
			`'idMedia': video_id,`
			`})`
			`if self._access_token:`
			`query['access_token'] = self._access_token`
			`try:`
			`return self._download_json(`
			`'https://services.radio-canada.ca/media/' + path, video_id, query=query)`
			`except ExtractorError as e:`
			`if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 422):`
			`data = self._parse_json(e.cause.read().decode(), None)`
			`error = data.get('error_description') or data['errorMessage']['text']`
			`raise ExtractorError(error, expected=True)`
			`raise`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`def _extract_info(self, app_code, video_id):`
			`metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas']`
[toutv] add support for authentication(closes #10669) 2016-09-16 16:36:22 +00:00
			`def get_meta(name):`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`for meta in metas:`
			`if meta.get('name') == name:`
			`text = meta.get('text')`
			`if text:`
			`return text`
[toutv] add support for authentication(closes #10669) 2016-09-16 16:36:22 +00:00
[radiocanada] Relax DRM check 2019-01-20 11:33:09 +00:00			`# protectionType does not necessarily mean the video is DRM protected (see`
			`# https://github.com/rg3/youtube-dl/pull/18609).`
[toutv] add support for authentication(closes #10669) 2016-09-16 16:36:22 +00:00			`if get_meta('protectionType'):`
[radiocanada] Relax DRM check 2019-01-20 11:33:09 +00:00			`self.report_warning('This video is probably DRM protected.')`
[toutv] add support for authentication(closes #10669) 2016-09-16 16:36:22 +00:00
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`query = {`
			`'connectionType': 'hd',`
			`'deviceType': 'ipad',`
			`'multibitrate': 'true',`
			`}`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`if self._claims:`
			`query['claims'] = self._claims`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`v_data = self._call_api('validation/v2/', video_id, app_code, query)`
			`v_url = v_data.get('url')`
			`if not v_url:`
			`error = v_data['message']`
			`if error == "Le contenu sélectionné n'est pas disponible dans votre pays":`
			`raise self.raise_geo_restricted(error, self._GEO_COUNTRIES)`
[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`if error == 'Le contenu sélectionné est disponible seulement en premium':`
			`self.raise_login_required(error)`
[radiocanada] Skip unsupported platforms (closes #14100) 2017-09-02 08:33:54 +00:00			`raise ExtractorError(`
			`'%s said: %s' % (self.IE_NAME, error), expected=True)`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`formats = self._extract_m3u8_formats(v_url, video_id, 'mp4')`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`self._sort_formats(formats)`

[radiocanada] extract subtitle(closes #11096) 2016-11-02 12:55:40 +00:00			`subtitles = {}`
			`closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5')`
			`if closed_caption_url:`
			`subtitles['fr'] = [{`
			`'url': closed_caption_url,`
			`'ext': determine_ext(closed_caption_url, 'vtt'),`
			`}]`

[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`return {`
			`'id': video_id,`
[radiocanada] Add fallback for title extraction 2017-09-08 14:53:24 +00:00			`'title': get_meta('Title') or get_meta('AV-nomEmission'),`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`'description': get_meta('Description') or get_meta('ShortDescription'),`
			`'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'),`
			`'duration': int_or_none(get_meta('length')),`
			`'series': get_meta('Emission'),`
			`'season_number': int_or_none('SrcSaison'),`
			`'episode_number': int_or_none('SrcEpisode'),`
			`'upload_date': unified_strdate(get_meta('Date')),`
[radiocanada] extract subtitle(closes #11096) 2016-11-02 12:55:40 +00:00			`'subtitles': subtitles,`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`'formats': formats,`
			`}`

[toutv] fix authentication(closes #16398)(closes #18700) 2019-02-06 12:59:12 +00:00			`def _real_extract(self, url):`
			`return self._extract_info(*re.match(self._VALID_URL, url).groups())`

[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00
			`class RadioCanadaAudioVideoIE(InfoExtractor):`
			`'radiocanada:audiovideo'`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`_VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)'`
			`_TESTS = [{`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam',`
			`'info_dict': {`
			`'id': '7527184',`
[toutv] fix info extraction(closes #1792)(closes #2082) 2016-05-24 15:06:02 +00:00			`'ext': 'mp4',`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`'title': 'Barack Obama au Vietnam',`
			`'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam',`
			`'upload_date': '20160523',`
			`},`
			`'params': {`
[toutv] fix info extraction(closes #1792)(closes #2082) 2016-05-24 15:06:02 +00:00			`# m3u8 download`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00			`'skip_download': True,`
			`},`
[radiocanada] switch to the new media requests(closes #19115) 2019-02-03 11:10:41 +00:00			`}, {`
			`'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam',`
			`'only_matching': True,`
			`}]`
[radiocanada] Add new extractor(#4020) 2016-05-24 14:58:27 +00:00
			`def _real_extract(self, url):`
			`return self.url_result('radiocanada:medianet:%s' % self._match_id(url))`