[bbc] Extract legacy playlist embedded media

This commit is contained in:
Sergey M․ 2015-10-10 23:01:20 +06:00
parent f790c43f6e
commit e6174ee975

View file

@ -29,6 +29,14 @@ class BBCCoUkIE(InfoExtractor):
'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s', 'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
] ]
_MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
_EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'
_NAMESPACES = (
_MEDIASELECTION_NS,
_EMP_PLAYLIST_NS,
)
_TESTS = [ _TESTS = [
{ {
'url': 'http://www.bbc.co.uk/programmes/b039g8p7', 'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
@ -194,6 +202,7 @@ class BBCCoUkIE(InfoExtractor):
def _extract_connection(self, connection, programme_id): def _extract_connection(self, connection, programme_id):
formats = [] formats = []
kind = connection.get('kind')
protocol = connection.get('protocol') protocol = connection.get('protocol')
supplier = connection.get('supplier') supplier = connection.get('supplier')
if protocol == 'http': if protocol == 'http':
@ -219,7 +228,7 @@ class BBCCoUkIE(InfoExtractor):
else: else:
formats.append({ formats.append({
'url': href, 'url': href,
'format_id': supplier, 'format_id': supplier or kind or protocol,
}) })
elif protocol == 'rtmp': elif protocol == 'rtmp':
application = connection.get('application', 'ondemand') application = connection.get('application', 'ondemand')
@ -239,16 +248,24 @@ class BBCCoUkIE(InfoExtractor):
return formats return formats
def _extract_items(self, playlist): def _extract_items(self, playlist):
return playlist.findall('./{http://bbc.co.uk/2008/emp/playlist}item') return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)
def _findall_ns(self, element, xpath):
elements = []
for ns in self._NAMESPACES:
elements.extend(element.findall(xpath % ns))
return elements
def _extract_medias(self, media_selection): def _extract_medias(self, media_selection):
error = media_selection.find('./{http://bbc.co.uk/2008/mp/mediaselection}error') error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
if error is None:
media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
if error is not None: if error is not None:
raise BBCCoUkIE.MediaSelectionError(error.get('id')) raise BBCCoUkIE.MediaSelectionError(error.get('id'))
return media_selection.findall('./{http://bbc.co.uk/2008/mp/mediaselection}media') return self._findall_ns(media_selection, './{%s}media')
def _extract_connections(self, media): def _extract_connections(self, media):
return media.findall('./{http://bbc.co.uk/2008/mp/mediaselection}connection') return self._findall_ns(media, './{%s}connection')
def _extract_video(self, media, programme_id): def _extract_video(self, media, programme_id):
formats = [] formats = []
@ -262,13 +279,14 @@ class BBCCoUkIE(InfoExtractor):
conn_formats = self._extract_connection(connection, programme_id) conn_formats = self._extract_connection(connection, programme_id)
for format in conn_formats: for format in conn_formats:
format.update({ format.update({
'format_id': '%s_%s' % (service, format['format_id']),
'width': width, 'width': width,
'height': height, 'height': height,
'vbr': vbr, 'vbr': vbr,
'vcodec': vcodec, 'vcodec': vcodec,
'filesize': file_size, 'filesize': file_size,
}) })
if service:
format['format_id'] = '%s_%s' % (service, format['format_id'])
formats.extend(conn_formats) formats.extend(conn_formats)
return formats return formats
@ -383,7 +401,7 @@ class BBCCoUkIE(InfoExtractor):
url, playlist_id, 'Downloading legacy playlist XML') url, playlist_id, 'Downloading legacy playlist XML')
def _extract_from_legacy_playlist(self, playlist, playlist_id): def _extract_from_legacy_playlist(self, playlist, playlist_id):
no_items = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}noItems') no_items = playlist.find('./{%s}noItems' % self._EMP_PLAYLIST_NS)
if no_items is not None: if no_items is not None:
reason = no_items.get('reason') reason = no_items.get('reason')
if reason == 'preAvailability': if reason == 'preAvailability':
@ -400,8 +418,8 @@ class BBCCoUkIE(InfoExtractor):
kind = item.get('kind') kind = item.get('kind')
if kind != 'programme' and kind != 'radioProgramme': if kind != 'programme' and kind != 'radioProgramme':
continue continue
title = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}title').text title = playlist.find('./{%s}title' % self._EMP_PLAYLIST_NS).text
description_el = playlist.find('./{http://bbc.co.uk/2008/emp/playlist}summary') description_el = playlist.find('./{%s}summary' % self._EMP_PLAYLIST_NS)
description = description_el.text if description_el else None description = description_el.text if description_el else None
def get_programme_id(item): def get_programme_id(item):
@ -411,16 +429,18 @@ class BBCCoUkIE(InfoExtractor):
if value and re.match(r'^[pb][\da-z]{7}$', value): if value and re.match(r'^[pb][\da-z]{7}$', value):
return value return value
get_from_attributes(item) get_from_attributes(item)
mediator = item.find('./{http://bbc.co.uk/2008/emp/playlist}mediator') mediator = item.find('./{%s}mediator' % self._EMP_PLAYLIST_NS)
if mediator is not None: if mediator is not None:
return get_from_attributes(mediator) return get_from_attributes(mediator)
programme_id = get_programme_id(item) programme_id = get_programme_id(item)
duration = int_or_none(item.get('duration')) duration = int_or_none(item.get('duration'))
# TODO: programme_id can be None and media items can be incorporated right inside
# playlist's item (e.g. http://www.bbc.com/turkce/haberler/2015/06/150615_telabyad_kentin_cogu) if programme_id:
# as f4m and m3u8 formats, subtitles = self._download_media_selector(programme_id)
formats, subtitles = self._download_media_selector(programme_id) else:
formats, subtitles = self._process_media_selector(item, playlist_id)
programme_id = playlist_id
return programme_id, title, description, duration, formats, subtitles return programme_id, title, description, duration, formats, subtitles