[bloomberg] Extract the available formats (closes #2776)
It uses a helper method in the InfoExtractor class. The downloader will pick the requested formats using the bitrate in the info dict.
This commit is contained in:
		
							parent
							
								
									4958ae2058
								
							
						
					
					
						commit
						31bb8d3f51
					
				
					 3 changed files with 28 additions and 5 deletions
				
			
		| 
						 | 
				
			
			@ -220,6 +220,7 @@ class F4mFD(FileDownloader):
 | 
			
		|||
 | 
			
		||||
    def real_download(self, filename, info_dict):
 | 
			
		||||
        man_url = info_dict['url']
 | 
			
		||||
        requested_bitrate = info_dict.get('tbr')
 | 
			
		||||
        self.to_screen('[download] Downloading f4m manifest')
 | 
			
		||||
        manifest = self.ydl.urlopen(man_url).read()
 | 
			
		||||
        self.report_destination(filename)
 | 
			
		||||
| 
						 | 
				
			
			@ -233,8 +234,14 @@ class F4mFD(FileDownloader):
 | 
			
		|||
 | 
			
		||||
        doc = etree.fromstring(manifest)
 | 
			
		||||
        formats = [(int(f.attrib.get('bitrate', -1)), f) for f in doc.findall(_add_ns('media'))]
 | 
			
		||||
        formats = sorted(formats, key=lambda f: f[0])
 | 
			
		||||
        rate, media = formats[-1]
 | 
			
		||||
        if requested_bitrate is None:
 | 
			
		||||
            # get the best format
 | 
			
		||||
            formats = sorted(formats, key=lambda f: f[0])
 | 
			
		||||
            rate, media = formats[-1]
 | 
			
		||||
        else:
 | 
			
		||||
            rate, media = list(filter(
 | 
			
		||||
                lambda f: int(f[0]) == requested_bitrate, formats))[0]
 | 
			
		||||
 | 
			
		||||
        base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
 | 
			
		||||
        bootstrap = base64.b64decode(doc.find(_add_ns('bootstrapInfo')).text)
 | 
			
		||||
        metadata = base64.b64decode(media.find(_add_ns('metadata')).text)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,7 +10,7 @@ class BloombergIE(InfoExtractor):
 | 
			
		|||
 | 
			
		||||
    _TEST = {
 | 
			
		||||
        'url': 'http://www.bloomberg.com/video/shah-s-presentation-on-foreign-exchange-strategies-qurhIVlJSB6hzkVi229d8g.html',
 | 
			
		||||
        'md5': '7bf08858ff7c203c870e8a6190e221e5',
 | 
			
		||||
        # The md5 checksum changes
 | 
			
		||||
        'info_dict': {
 | 
			
		||||
            'id': 'qurhIVlJSB6hzkVi229d8g',
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
| 
						 | 
				
			
			@ -31,8 +31,7 @@ class BloombergIE(InfoExtractor):
 | 
			
		|||
        return {
 | 
			
		||||
            'id': name.split('-')[-1],
 | 
			
		||||
            'title': title,
 | 
			
		||||
            'url': f4m_url,
 | 
			
		||||
            'ext': 'flv',
 | 
			
		||||
            'formats': self._extract_f4m_formats(f4m_url, name),
 | 
			
		||||
            'description': self._og_search_description(webpage),
 | 
			
		||||
            'thumbnail': self._og_search_thumbnail(webpage),
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,6 +18,7 @@ from ..utils import (
 | 
			
		|||
    clean_html,
 | 
			
		||||
    compiled_regex_type,
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    RegexNotFoundError,
 | 
			
		||||
    sanitize_filename,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
| 
						 | 
				
			
			@ -590,6 +591,22 @@ class InfoExtractor(object):
 | 
			
		|||
        self.to_screen(msg)
 | 
			
		||||
        time.sleep(timeout)
 | 
			
		||||
 | 
			
		||||
    def _extract_f4m_formats(self, manifest_url, video_id):
 | 
			
		||||
        manifest = self._download_xml(manifest_url, video_id)
 | 
			
		||||
 | 
			
		||||
        formats = []
 | 
			
		||||
        for media_el in manifest.findall('{http://ns.adobe.com/f4m/1.0}media'):
 | 
			
		||||
            formats.append({
 | 
			
		||||
                'url': manifest_url,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'tbr': int_or_none(media_el.attrib.get('bitrate')),
 | 
			
		||||
                'width': int_or_none(media_el.attrib.get('width')),
 | 
			
		||||
                'height': int_or_none(media_el.attrib.get('height')),
 | 
			
		||||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        return formats
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SearchInfoExtractor(InfoExtractor):
 | 
			
		||||
    """
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue