[generic] Add support for RSS feeds (Fixes #667)
This commit is contained in:
parent
280bc5dad6
commit
4fc946b546
2 changed files with 37 additions and 0 deletions
|
@ -250,5 +250,14 @@ class TestPlaylists(unittest.TestCase):
|
||||||
self.assertEqual(result['title'], 'python language')
|
self.assertEqual(result['title'], 'python language')
|
||||||
self.assertTrue(len(result['entries']) == 15)
|
self.assertTrue(len(result['entries']) == 15)
|
||||||
|
|
||||||
|
def test_generic_rss_feed(self):
|
||||||
|
dl = FakeYDL()
|
||||||
|
ie = GenericIE(dl)
|
||||||
|
result = ie.extract('http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||||
|
self.assertIsPlaylist(result)
|
||||||
|
self.assertEqual(result['id'], 'http://www.escapistmagazine.com/rss/videos/list/1.xml')
|
||||||
|
self.assertEqual(result['title'], 'Zero Punctuation')
|
||||||
|
self.assertTrue(len(result['entries']) > 10)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -4,6 +4,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
from .common import InfoExtractor
|
from .common import InfoExtractor
|
||||||
from .youtube import YoutubeIE
|
from .youtube import YoutubeIE
|
||||||
|
@ -159,6 +160,25 @@ class GenericIE(InfoExtractor):
|
||||||
raise ExtractorError('Invalid URL protocol')
|
raise ExtractorError('Invalid URL protocol')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
def _extract_rss(self, url, video_id, doc):
|
||||||
|
playlist_title = doc.find('./channel/title').text
|
||||||
|
playlist_desc_el = doc.find('./channel/description')
|
||||||
|
playlist_desc = None if playlist_desc_el is None else playlist_desc_el.text
|
||||||
|
|
||||||
|
entries = [{
|
||||||
|
'_type': 'url',
|
||||||
|
'url': e.find('link').text,
|
||||||
|
'title': e.find('title').text,
|
||||||
|
} for e in doc.findall('./channel/item')]
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': url,
|
||||||
|
'title': playlist_title,
|
||||||
|
'description': playlist_desc,
|
||||||
|
'entries': entries,
|
||||||
|
}
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
parsed_url = compat_urlparse.urlparse(url)
|
parsed_url = compat_urlparse.urlparse(url)
|
||||||
if not parsed_url.scheme:
|
if not parsed_url.scheme:
|
||||||
|
@ -219,6 +239,14 @@ class GenericIE(InfoExtractor):
|
||||||
|
|
||||||
self.report_extraction(video_id)
|
self.report_extraction(video_id)
|
||||||
|
|
||||||
|
# Is it an RSS feed?
|
||||||
|
try:
|
||||||
|
doc = xml.etree.ElementTree.fromstring(webpage)
|
||||||
|
if doc.tag == 'rss':
|
||||||
|
return self._extract_rss(url, video_id, doc)
|
||||||
|
except xml.etree.ElementTree.ParseError:
|
||||||
|
pass
|
||||||
|
|
||||||
# it's tempting to parse this further, but you would
|
# it's tempting to parse this further, but you would
|
||||||
# have to take into account all the variations like
|
# have to take into account all the variations like
|
||||||
# Video Title - Site Name
|
# Video Title - Site Name
|
||||||
|
|
Loading…
Reference in a new issue