[mdr] Modernize and include kika.de

This commit is contained in:
Sergey M․ 2015-10-31 22:17:09 +06:00
parent c3040bd00a
commit 2b1b2d83ca
3 changed files with 131 additions and 143 deletions

View file

@ -274,7 +274,6 @@ from .karrierevideos import KarriereVideosIE
from .keezmovies import KeezMoviesIE from .keezmovies import KeezMoviesIE
from .khanacademy import KhanAcademyIE from .khanacademy import KhanAcademyIE
from .kickstarter import KickStarterIE from .kickstarter import KickStarterIE
from .kika import KikaIE
from .keek import KeekIE from .keek import KeekIE
from .kontrtube import KontrTubeIE from .kontrtube import KontrTubeIE
from .krasview import KrasViewIE from .krasview import KrasViewIE

View file

@ -1,101 +0,0 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import ExtractorError
class KikaIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*'
_TESTS = [
{
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'description': None,
},
},
{
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
},
},
{
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'description': None,
},
},
{
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
},
},
]
def _real_extract(self, url):
# broadcast_id may be the same as the video_id
broadcast_id = self._match_id(url)
webpage = self._download_webpage(url, broadcast_id)
xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml'
video_id = self._search_regex(xml_re, webpage, "xml_url", default=None)
if not video_id:
err_msg = 'Video %s is not available online' % broadcast_id
raise ExtractorError(err_msg, expected=True)
xml_url = 'http://www.kika.de/video%s-avCustom.xml' % (video_id)
xml_tree = self._download_xml(xml_url, video_id)
title = xml_tree.find('title').text
webpage_url = xml_tree.find('htmlUrl').text
# Try to get the description, not available for all videos
try:
broadcast_elem = xml_tree.find('broadcast')
description = broadcast_elem.find('broadcastDescription').text
except AttributeError:
description = None
# duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42)
tmp = xml_tree.find('duration').text.split(':')
duration = int(tmp[0]) * 60 + int(tmp[1])
formats = [{
'url': elem.find('progressiveDownloadUrl').text,
'ext': elem.find('mediaType').text.lower(),
'format': elem.find('profileName').text,
'width': int(elem.find('frameWidth').text),
'height': int(elem.find('frameHeight').text),
'abr': int(elem.find('bitrateAudio').text),
'vbr': int(elem.find('bitrateVideo').text),
'filesize': int(elem.find('fileSize').text),
} for elem in xml_tree.find('assets')]
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'formats': formats,
'duration': duration,
'webpage_url': webpage_url,
}

View file

@ -1,64 +1,154 @@
# coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
determine_ext,
int_or_none,
parse_duration,
parse_iso8601,
xpath_text,
)
class MDRIE(InfoExtractor): class MDRIE(InfoExtractor):
_VALID_URL = r'^(?P<domain>https?://(?:www\.)?mdr\.de)/(?:.*)/(?P<type>video|audio)(?P<video_id>[^/_]+)(?:_|\.html)' IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z]+(?P<id>\d+)(?:_.+?)?\.html'
# No tests, MDR regularily deletes its videos _TESTS = [{
_TEST = { # MDR regularily deletes its videos
'url': 'http://www.mdr.de/fakt/video189002.html', 'url': 'http://www.mdr.de/fakt/video189002.html',
'only_matching': True, 'only_matching': True,
} }, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1419047100,
'upload_date': '20141220',
'duration': 4628,
'uploader': 'KIKA',
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
}]
def _real_extract(self, url): def _real_extract(self, url):
m = re.match(self._VALID_URL, url) video_id = self._match_id(url)
video_id = m.group('video_id')
domain = m.group('domain')
# determine title and media streams from webpage webpage = self._download_webpage(url, video_id)
html = self._download_webpage(url, video_id)
title = self._html_search_regex(r'<h[12]>(.*?)</h[12]>', html, 'title') data_url = self._search_regex(
xmlurl = self._search_regex( r'dataURL\s*:\s*(["\'])(?P<url>/.+/(?:video|audio)[0-9]+-avCustom\.xml)\1',
r'dataURL:\'(/(?:.+)/(?:video|audio)[0-9]+-avCustom.xml)', html, 'XML URL') webpage, 'data url', group='url')
doc = self._download_xml(
compat_urlparse.urljoin(url, data_url), video_id)
title = (xpath_text(doc, './title', 'title', default=None) or
xpath_text(doc, './broadcast/broadcastName', 'title'))
doc = self._download_xml(domain + xmlurl, video_id)
formats = [] formats = []
for a in doc.findall('./assets/asset'): processed_urls = []
url_el = a.find('./progressiveDownloadUrl') for asset in doc.findall('./assets/asset'):
if url_el is None: for source in (
continue 'progressiveDownload',
abr = int(a.find('bitrateAudio').text) // 1000 'dynamicHttpStreamingRedirector',
media_type = a.find('mediaType').text 'adaptiveHttpStreamingRedirector'):
format = { url_el = asset.find('./%sUrl' % source)
'abr': abr, if url_el is None:
'filesize': int(a.find('fileSize').text), continue
'url': url_el.text,
}
vbr_el = a.find('bitrateVideo') video_url = url_el.text
if vbr_el is None: if video_url in processed_urls:
format.update({ continue
'vcodec': 'none',
'format_id': '%s-%d' % (media_type, abr), processed_urls.append(video_url)
})
else: vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
vbr = int(vbr_el.text) // 1000 abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
format.update({
'vbr': vbr, url_formats = []
'width': int(a.find('frameWidth').text),
'height': int(a.find('frameHeight').text), ext = determine_ext(url_el.text)
'format_id': '%s-%d' % (media_type, vbr), if ext == 'm3u8':
}) url_formats = self._extract_m3u8_formats(
formats.append(format) video_url, video_id, 'mp4', entry_protocol='m3u8_native',
preference=0, m3u8_id='HLS', fatal=False)
elif ext == 'f4m':
url_formats = self._extract_f4m_formats(
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
preference=0, f4m_id='HDS', fatal=False)
else:
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
f = {
'url': video_url,
'format_id': '%s-%d' % (media_type, vbr or abr),
'filesize': filesize,
'abr': abr,
'preference': 1,
}
if vbr:
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
f.update({
'vbr': vbr,
'width': width,
'height': height,
})
url_formats.append(f)
if not vbr:
for f in url_formats:
abr = f.get('tbr') or abr
if 'tbr' in f:
del f['tbr']
f.update({
'abr': abr,
'vcodec': 'none',
})
if url_formats:
formats.extend(url_formats)
self._sort_formats(formats) self._sort_formats(formats)
description = xpath_text(doc, './broadcast/broadcastDescription', 'description')
timestamp = parse_iso8601(
xpath_text(doc, './broadcast/broadcastDate', 'timestamp', default=None) or
xpath_text(doc, './broadcast/broadcastStartDate', 'timestamp', default=None))
duration = parse_duration(xpath_text(doc, './duration', 'duration'))
uploader = xpath_text(doc, './rights', 'uploader')
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description,
'timestamp': timestamp,
'duration': duration,
'uploader': uploader,
'formats': formats, 'formats': formats,
} }