Generalize XML manifest processing code and improve XSPF parsing (closes #15794)
This commit is contained in:
		
							parent
							
								
									e0d198c18d
								
							
						
					
					
						commit
						47a5cb7734
					
				
					 3 changed files with 52 additions and 36 deletions
				
			
		| 
						 | 
					@ -698,40 +698,47 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
 | 
				
			||||||
        _TEST_CASES = [
 | 
					        _TEST_CASES = [
 | 
				
			||||||
            (
 | 
					            (
 | 
				
			||||||
                'foo_xspf',
 | 
					                'foo_xspf',
 | 
				
			||||||
                'https://example.org/src/',
 | 
					                'https://example.org/src/foo_xspf.xspf',
 | 
				
			||||||
                [{
 | 
					                [{
 | 
				
			||||||
 | 
					                    'id': 'foo_xspf',
 | 
				
			||||||
 | 
					                    'title': 'Pandemonium',
 | 
				
			||||||
                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
					                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
				
			||||||
                    'duration': 202.416,
 | 
					                    'duration': 202.416,
 | 
				
			||||||
                    'formats': [{'url': 'https://example.org/src/cd1/track%201.mp3'}],
 | 
					                    'formats': [{
 | 
				
			||||||
 | 
					                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
 | 
				
			||||||
 | 
					                        'url': 'https://example.org/src/cd1/track%201.mp3',
 | 
				
			||||||
 | 
					                    }],
 | 
				
			||||||
 | 
					                }, {
 | 
				
			||||||
                    'id': 'foo_xspf',
 | 
					                    'id': 'foo_xspf',
 | 
				
			||||||
                    'title': 'Pandemonium'
 | 
					                    'title': 'Final Cartridge (Nichico Twelve Remix)',
 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
					                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
				
			||||||
                    'duration': 255.857,
 | 
					                    'duration': 255.857,
 | 
				
			||||||
                    'formats': [{'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3'}],
 | 
					                    'formats': [{
 | 
				
			||||||
 | 
					                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
 | 
				
			||||||
 | 
					                        'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3',
 | 
				
			||||||
 | 
					                    }],
 | 
				
			||||||
 | 
					                }, {
 | 
				
			||||||
                    'id': 'foo_xspf',
 | 
					                    'id': 'foo_xspf',
 | 
				
			||||||
                    'title': 'Final Cartridge (Nichico Twelve Remix)'
 | 
					                    'title': 'Rebuilding Nightingale',
 | 
				
			||||||
                },
 | 
					 | 
				
			||||||
                {
 | 
					 | 
				
			||||||
                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
					                    'description': 'Visit http://bigbrother404.bandcamp.com',
 | 
				
			||||||
                    'duration': 287.915,
 | 
					                    'duration': 287.915,
 | 
				
			||||||
                    'formats': [
 | 
					                    'formats': [{
 | 
				
			||||||
                        {'url': 'https://example.org/src/track3.mp3'},
 | 
					                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
 | 
				
			||||||
                        {'url': 'https://example.com/track3.mp3'}
 | 
					                        'url': 'https://example.org/src/track3.mp3',
 | 
				
			||||||
                    ],
 | 
					                    }, {
 | 
				
			||||||
                    'id': 'foo_xspf',
 | 
					                        'manifest_url': 'https://example.org/src/foo_xspf.xspf',
 | 
				
			||||||
                    'title': 'Rebuilding Nightingale'
 | 
					                        'url': 'https://example.com/track3.mp3',
 | 
				
			||||||
 | 
					                    }]
 | 
				
			||||||
                }]
 | 
					                }]
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for xspf_file, xspf_base_url, expected_entries in _TEST_CASES:
 | 
					        for xspf_file, xspf_url, expected_entries in _TEST_CASES:
 | 
				
			||||||
            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
 | 
					            with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
 | 
				
			||||||
                         mode='r', encoding='utf-8') as f:
 | 
					                         mode='r', encoding='utf-8') as f:
 | 
				
			||||||
                entries = self.ie._parse_xspf(
 | 
					                entries = self.ie._parse_xspf(
 | 
				
			||||||
                    compat_etree_fromstring(f.read().encode('utf-8')),
 | 
					                    compat_etree_fromstring(f.read().encode('utf-8')),
 | 
				
			||||||
                        xspf_file, xspf_base_url)
 | 
					                    xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
 | 
				
			||||||
                expect_value(self, entries, expected_entries, None)
 | 
					                expect_value(self, entries, expected_entries, None)
 | 
				
			||||||
                for i in range(len(entries)):
 | 
					                for i in range(len(entries)):
 | 
				
			||||||
                    expect_dict(self, entries[i], expected_entries[i])
 | 
					                    expect_dict(self, entries[i], expected_entries[i])
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1706,22 +1706,24 @@ class InfoExtractor(object):
 | 
				
			||||||
            })
 | 
					            })
 | 
				
			||||||
        return subtitles
 | 
					        return subtitles
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_xspf_playlist(self, playlist_url, playlist_id, fatal=True):
 | 
					    def _extract_xspf_playlist(self, xspf_url, playlist_id, fatal=True):
 | 
				
			||||||
        xspf = self._download_xml(
 | 
					        xspf = self._download_xml(
 | 
				
			||||||
            playlist_url, playlist_id, 'Downloading xpsf playlist',
 | 
					            xspf_url, playlist_id, 'Downloading xpsf playlist',
 | 
				
			||||||
            'Unable to download xspf manifest', fatal=fatal)
 | 
					            'Unable to download xspf manifest', fatal=fatal)
 | 
				
			||||||
        if xspf is False:
 | 
					        if xspf is False:
 | 
				
			||||||
            return []
 | 
					            return []
 | 
				
			||||||
        return self._parse_xspf(xspf, playlist_id, base_url(playlist_url))
 | 
					        return self._parse_xspf(
 | 
				
			||||||
 | 
					            xspf, playlist_id, xspf_url=xspf_url,
 | 
				
			||||||
 | 
					            xspf_base_url=base_url(xspf_url))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _parse_xspf(self, playlist, playlist_id, playlist_base_url=''):
 | 
					    def _parse_xspf(self, xspf_doc, playlist_id, xspf_url=None, xspf_base_url=None):
 | 
				
			||||||
        NS_MAP = {
 | 
					        NS_MAP = {
 | 
				
			||||||
            'xspf': 'http://xspf.org/ns/0/',
 | 
					            'xspf': 'http://xspf.org/ns/0/',
 | 
				
			||||||
            's1': 'http://static.streamone.nl/player/ns/0',
 | 
					            's1': 'http://static.streamone.nl/player/ns/0',
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        entries = []
 | 
					        entries = []
 | 
				
			||||||
        for track in playlist.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
 | 
					        for track in xspf_doc.findall(xpath_with_ns('./xspf:trackList/xspf:track', NS_MAP)):
 | 
				
			||||||
            title = xpath_text(
 | 
					            title = xpath_text(
 | 
				
			||||||
                track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
 | 
					                track, xpath_with_ns('./xspf:title', NS_MAP), 'title', default=playlist_id)
 | 
				
			||||||
            description = xpath_text(
 | 
					            description = xpath_text(
 | 
				
			||||||
| 
						 | 
					@ -1731,12 +1733,18 @@ class InfoExtractor(object):
 | 
				
			||||||
            duration = float_or_none(
 | 
					            duration = float_or_none(
 | 
				
			||||||
                xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
 | 
					                xpath_text(track, xpath_with_ns('./xspf:duration', NS_MAP), 'duration'), 1000)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            formats = [{
 | 
					            formats = []
 | 
				
			||||||
                'url': urljoin(playlist_base_url, location.text),
 | 
					            for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP)):
 | 
				
			||||||
                'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
 | 
					                format_url = urljoin(xspf_base_url, location.text)
 | 
				
			||||||
                'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
 | 
					                if not format_url:
 | 
				
			||||||
                'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
 | 
					                    continue
 | 
				
			||||||
            } for location in track.findall(xpath_with_ns('./xspf:location', NS_MAP))]
 | 
					                formats.append({
 | 
				
			||||||
 | 
					                    'url': format_url,
 | 
				
			||||||
 | 
					                    'manifest_url': xspf_url,
 | 
				
			||||||
 | 
					                    'format_id': location.get(xpath_with_ns('s1:label', NS_MAP)),
 | 
				
			||||||
 | 
					                    'width': int_or_none(location.get(xpath_with_ns('s1:width', NS_MAP))),
 | 
				
			||||||
 | 
					                    'height': int_or_none(location.get(xpath_with_ns('s1:height', NS_MAP))),
 | 
				
			||||||
 | 
					                })
 | 
				
			||||||
            self._sort_formats(formats)
 | 
					            self._sort_formats(formats)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            entries.append({
 | 
					            entries.append({
 | 
				
			||||||
| 
						 | 
					@ -1750,18 +1758,18 @@ class InfoExtractor(object):
 | 
				
			||||||
        return entries
 | 
					        return entries
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
 | 
					    def _extract_mpd_formats(self, mpd_url, video_id, mpd_id=None, note=None, errnote=None, fatal=True, formats_dict={}):
 | 
				
			||||||
        res = self._download_webpage_handle(
 | 
					        res = self._download_xml_handle(
 | 
				
			||||||
            mpd_url, video_id,
 | 
					            mpd_url, video_id,
 | 
				
			||||||
            note=note or 'Downloading MPD manifest',
 | 
					            note=note or 'Downloading MPD manifest',
 | 
				
			||||||
            errnote=errnote or 'Failed to download MPD manifest',
 | 
					            errnote=errnote or 'Failed to download MPD manifest',
 | 
				
			||||||
            fatal=fatal)
 | 
					            fatal=fatal)
 | 
				
			||||||
        if res is False:
 | 
					        if res is False:
 | 
				
			||||||
            return []
 | 
					            return []
 | 
				
			||||||
        mpd, urlh = res
 | 
					        mpd_doc, urlh = res
 | 
				
			||||||
        mpd_base_url = base_url(urlh.geturl())
 | 
					        mpd_base_url = base_url(urlh.geturl())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return self._parse_mpd_formats(
 | 
					        return self._parse_mpd_formats(
 | 
				
			||||||
            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url,
 | 
					            mpd_doc, mpd_id=mpd_id, mpd_base_url=mpd_base_url,
 | 
				
			||||||
            formats_dict=formats_dict, mpd_url=mpd_url)
 | 
					            formats_dict=formats_dict, mpd_url=mpd_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
 | 
					    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):
 | 
				
			||||||
| 
						 | 
					@ -2035,17 +2043,16 @@ class InfoExtractor(object):
 | 
				
			||||||
        return formats
 | 
					        return formats
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
 | 
					    def _extract_ism_formats(self, ism_url, video_id, ism_id=None, note=None, errnote=None, fatal=True):
 | 
				
			||||||
        res = self._download_webpage_handle(
 | 
					        res = self._download_xml_handle(
 | 
				
			||||||
            ism_url, video_id,
 | 
					            ism_url, video_id,
 | 
				
			||||||
            note=note or 'Downloading ISM manifest',
 | 
					            note=note or 'Downloading ISM manifest',
 | 
				
			||||||
            errnote=errnote or 'Failed to download ISM manifest',
 | 
					            errnote=errnote or 'Failed to download ISM manifest',
 | 
				
			||||||
            fatal=fatal)
 | 
					            fatal=fatal)
 | 
				
			||||||
        if res is False:
 | 
					        if res is False:
 | 
				
			||||||
            return []
 | 
					            return []
 | 
				
			||||||
        ism, urlh = res
 | 
					        ism_doc, urlh = res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return self._parse_ism_formats(
 | 
					        return self._parse_ism_formats(ism_doc, urlh.geturl(), ism_id)
 | 
				
			||||||
            compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
 | 
					    def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2233,7 +2233,9 @@ class GenericIE(InfoExtractor):
 | 
				
			||||||
                return smil
 | 
					                return smil
 | 
				
			||||||
            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
 | 
					            elif doc.tag == '{http://xspf.org/ns/0/}playlist':
 | 
				
			||||||
                return self.playlist_result(
 | 
					                return self.playlist_result(
 | 
				
			||||||
                    self._parse_xspf(doc, video_id, compat_str(full_response.geturl())),
 | 
					                    self._parse_xspf(
 | 
				
			||||||
 | 
					                        doc, video_id, xspf_url=url,
 | 
				
			||||||
 | 
					                        xspf_base_url=compat_str(full_response.geturl())),
 | 
				
			||||||
                    video_id)
 | 
					                    video_id)
 | 
				
			||||||
            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
 | 
					            elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
 | 
				
			||||||
                info_dict['formats'] = self._parse_mpd_formats(
 | 
					                info_dict['formats'] = self._parse_mpd_formats(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue