[generic] Parse RSS enclosure URLs (Fixes #5091)
This commit is contained in:
parent
c75f0b361a
commit
76c73715fb
1 changed files with 29 additions and 5 deletions
|
@ -26,6 +26,7 @@ from ..utils import (
|
||||||
unsmuggle_url,
|
unsmuggle_url,
|
||||||
UnsupportedError,
|
UnsupportedError,
|
||||||
url_basename,
|
url_basename,
|
||||||
|
xpath_text,
|
||||||
)
|
)
|
||||||
from .brightcove import BrightcoveIE
|
from .brightcove import BrightcoveIE
|
||||||
from .ooyala import OoyalaIE
|
from .ooyala import OoyalaIE
|
||||||
|
@ -569,6 +570,16 @@ class GenericIE(InfoExtractor):
|
||||||
'title': 'John Carlson Postgame 2/25/15',
|
'title': 'John Carlson Postgame 2/25/15',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
# RSS feed with enclosure
|
||||||
|
{
|
||||||
|
'url': 'http://podcastfeeds.nbcnews.com/audio/podcast/MSNBC-MADDOW-NETCAST-M4V.xml',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
'ext': 'm4v',
|
||||||
|
'upload_date': '20150228',
|
||||||
|
'title': 'pdv_maddow_netcast_m4v-02-27-2015-201624',
|
||||||
|
}
|
||||||
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def report_following_redirect(self, new_url):
|
def report_following_redirect(self, new_url):
|
||||||
|
@ -580,11 +591,24 @@ class GenericIE(InfoExtractor):
|
||||||
playlist_desc_el = doc.find('./channel/description')
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
entries = [{
|
entries = []
|
||||||
|
for it in doc.findall('./channel/item'):
|
||||||
|
next_url = xpath_text(it, 'link', fatal=False)
|
||||||
|
if not next_url:
|
||||||
|
enclosure_nodes = it.findall('./enclosure')
|
||||||
|
for e in enclosure_nodes:
|
||||||
|
next_url = e.attrib.get('url')
|
||||||
|
if next_url:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not next_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries.append({
|
||||||
'_type': 'url',
|
'_type': 'url',
|
||||||
'url': e.find('link').text,
|
'url': next_url,
|
||||||
'title': e.find('title').text,
|
'title': it.find('title').text,
|
||||||
} for e in doc.findall('./channel/item')]
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'_type': 'playlist',
|
'_type': 'playlist',
|
||||||
|
|
Loading…
Reference in a new issue