[extractor/common] Fix url meta field for unfragmented DASH formats (closes #20346)
This commit is contained in:
		
							parent
							
								
									6db03a29d1
								
							
						
					
					
						commit
						79d2077edc
					
				
					 1 changed files with 17 additions and 8 deletions
				
			
		| 
						 | 
				
			
			@ -108,10 +108,13 @@ class InfoExtractor(object):
 | 
			
		|||
                                   for RTMP - RTMP URL,
 | 
			
		||||
                                   for HLS - URL of the M3U8 media playlist,
 | 
			
		||||
                                   for HDS - URL of the F4M manifest,
 | 
			
		||||
                                   for DASH - URL of the MPD manifest or
 | 
			
		||||
                                              base URL representing the media
 | 
			
		||||
                                              if MPD manifest is parsed from
 | 
			
		||||
                                              a string,
 | 
			
		||||
                                   for DASH
 | 
			
		||||
                                     - HTTP URL to plain file media (in case of
 | 
			
		||||
                                       unfragmented media)
 | 
			
		||||
                                     - URL of the MPD manifest or base URL
 | 
			
		||||
                                       representing the media if MPD manifest
 | 
			
		||||
                                       is parsed froma string (in case of
 | 
			
		||||
                                       fragmented media)
 | 
			
		||||
                                   for MSS - URL of the ISM manifest.
 | 
			
		||||
                    * manifest_url
 | 
			
		||||
                                 The URL of the manifest file in case of
 | 
			
		||||
| 
						 | 
				
			
			@ -2137,8 +2140,6 @@ class InfoExtractor(object):
 | 
			
		|||
                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
 | 
			
		||||
                        f = {
 | 
			
		||||
                            'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
 | 
			
		||||
                            # NB: mpd_url may be empty when MPD manifest is parsed from a string
 | 
			
		||||
                            'url': mpd_url or base_url,
 | 
			
		||||
                            'manifest_url': mpd_url,
 | 
			
		||||
                            'ext': mimetype2ext(mime_type),
 | 
			
		||||
                            'width': int_or_none(representation_attrib.get('width')),
 | 
			
		||||
| 
						 | 
				
			
			@ -2277,10 +2278,14 @@ class InfoExtractor(object):
 | 
			
		|||
                                    fragment['duration'] = segment_duration
 | 
			
		||||
                                fragments.append(fragment)
 | 
			
		||||
                            representation_ms_info['fragments'] = fragments
 | 
			
		||||
                        # NB: MPD manifest may contain direct URLs to unfragmented media.
 | 
			
		||||
                        # No fragments key is present in this case.
 | 
			
		||||
                        # If there is a fragments key available then we correctly recognized fragmented media.
 | 
			
		||||
                        # Otherwise we will assume unfragmented media with direct access. Technically, such
 | 
			
		||||
                        # assumption is not necessarily correct since we may simply have no support for
 | 
			
		||||
                        # some forms of fragmented media renditions yet, but for now we'll use this fallback.
 | 
			
		||||
                        if 'fragments' in representation_ms_info:
 | 
			
		||||
                            f.update({
 | 
			
		||||
                                # NB: mpd_url may be empty when MPD manifest is parsed from a string
 | 
			
		||||
                                'url': mpd_url or base_url,
 | 
			
		||||
                                'fragment_base_url': base_url,
 | 
			
		||||
                                'fragments': [],
 | 
			
		||||
                                'protocol': 'http_dash_segments',
 | 
			
		||||
| 
						 | 
				
			
			@ -2291,6 +2296,10 @@ class InfoExtractor(object):
 | 
			
		|||
                                    f['url'] = initialization_url
 | 
			
		||||
                                f['fragments'].append({location_key(initialization_url): initialization_url})
 | 
			
		||||
                            f['fragments'].extend(representation_ms_info['fragments'])
 | 
			
		||||
                        else:
 | 
			
		||||
                            # Assuming direct URL to unfragmented media.
 | 
			
		||||
                            f['url'] = base_url
 | 
			
		||||
 | 
			
		||||
                        # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation
 | 
			
		||||
                        # is not necessarily unique within a Period thus formats with
 | 
			
		||||
                        # the same `format_id` are quite possible. There are numerous examples
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue