[nova:embed] Fix extraction (closes #24700)
This commit is contained in:
		
							parent
							
								
									dcc8522fdb
								
							
						
					
					
						commit
						5caf88ccb4
					
				
					 1 changed files with 71 additions and 35 deletions
				
			
		| 
						 | 
				
			
			@ -6,6 +6,7 @@ import re
 | 
			
		|||
from .common import InfoExtractor
 | 
			
		||||
from ..utils import (
 | 
			
		||||
    clean_html,
 | 
			
		||||
    determine_ext,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    qualities,
 | 
			
		||||
| 
						 | 
				
			
			@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor):
 | 
			
		|||
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        bitrates = self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
 | 
			
		||||
            video_id, transform_source=js_to_json)
 | 
			
		||||
 | 
			
		||||
        QUALITIES = ('lq', 'mq', 'hq', 'hd')
 | 
			
		||||
        quality_key = qualities(QUALITIES)
 | 
			
		||||
 | 
			
		||||
        duration = None
 | 
			
		||||
        formats = []
 | 
			
		||||
        for format_id, format_list in bitrates.items():
 | 
			
		||||
            if not isinstance(format_list, list):
 | 
			
		||||
                format_list = [format_list]
 | 
			
		||||
            for format_url in format_list:
 | 
			
		||||
                format_url = url_or_none(format_url)
 | 
			
		||||
                if not format_url:
 | 
			
		||||
                    continue
 | 
			
		||||
                if format_id == 'hls':
 | 
			
		||||
                    formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                        format_url, video_id, ext='mp4',
 | 
			
		||||
                        entry_protocol='m3u8_native', m3u8_id='hls',
 | 
			
		||||
                        fatal=False))
 | 
			
		||||
                    continue
 | 
			
		||||
                f = {
 | 
			
		||||
                    'url': format_url,
 | 
			
		||||
                }
 | 
			
		||||
                f_id = format_id
 | 
			
		||||
                for quality in QUALITIES:
 | 
			
		||||
                    if '%s.mp4' % quality in format_url:
 | 
			
		||||
                        f_id += '-%s' % quality
 | 
			
		||||
                        f.update({
 | 
			
		||||
                            'quality': quality_key(quality),
 | 
			
		||||
                            'format_note': quality.upper(),
 | 
			
		||||
 | 
			
		||||
        player = self._parse_json(
 | 
			
		||||
            self._search_regex(
 | 
			
		||||
                r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
 | 
			
		||||
                webpage, 'player', default='{}'), video_id, fatal=False)
 | 
			
		||||
        if player:
 | 
			
		||||
            for format_id, format_list in player['tracks'].items():
 | 
			
		||||
                if not isinstance(format_list, list):
 | 
			
		||||
                    format_list = [format_list]
 | 
			
		||||
                for format_dict in format_list:
 | 
			
		||||
                    if not isinstance(format_dict, dict):
 | 
			
		||||
                        continue
 | 
			
		||||
                    format_url = url_or_none(format_dict.get('src'))
 | 
			
		||||
                    format_type = format_dict.get('type')
 | 
			
		||||
                    ext = determine_ext(format_url)
 | 
			
		||||
                    if (format_type == 'application/x-mpegURL'
 | 
			
		||||
                            or format_id == 'HLS' or ext == 'm3u8'):
 | 
			
		||||
                        formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                            format_url, video_id, 'mp4',
 | 
			
		||||
                            entry_protocol='m3u8_native', m3u8_id='hls',
 | 
			
		||||
                            fatal=False))
 | 
			
		||||
                    elif (format_type == 'application/dash+xml'
 | 
			
		||||
                          or format_id == 'DASH' or ext == 'mpd'):
 | 
			
		||||
                        formats.extend(self._extract_mpd_formats(
 | 
			
		||||
                            format_url, video_id, mpd_id='dash', fatal=False))
 | 
			
		||||
                    else:
 | 
			
		||||
                        formats.append({
 | 
			
		||||
                            'url': format_url,
 | 
			
		||||
                        })
 | 
			
		||||
                        break
 | 
			
		||||
                f['format_id'] = f_id
 | 
			
		||||
                formats.append(f)
 | 
			
		||||
            duration = int_or_none(player.get('duration'))
 | 
			
		||||
        else:
 | 
			
		||||
            # Old path, not actual as of 08.04.2020
 | 
			
		||||
            bitrates = self._parse_json(
 | 
			
		||||
                self._search_regex(
 | 
			
		||||
                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
 | 
			
		||||
                video_id, transform_source=js_to_json)
 | 
			
		||||
 | 
			
		||||
            QUALITIES = ('lq', 'mq', 'hq', 'hd')
 | 
			
		||||
            quality_key = qualities(QUALITIES)
 | 
			
		||||
 | 
			
		||||
            for format_id, format_list in bitrates.items():
 | 
			
		||||
                if not isinstance(format_list, list):
 | 
			
		||||
                    format_list = [format_list]
 | 
			
		||||
                for format_url in format_list:
 | 
			
		||||
                    format_url = url_or_none(format_url)
 | 
			
		||||
                    if not format_url:
 | 
			
		||||
                        continue
 | 
			
		||||
                    if format_id == 'hls':
 | 
			
		||||
                        formats.extend(self._extract_m3u8_formats(
 | 
			
		||||
                            format_url, video_id, ext='mp4',
 | 
			
		||||
                            entry_protocol='m3u8_native', m3u8_id='hls',
 | 
			
		||||
                            fatal=False))
 | 
			
		||||
                        continue
 | 
			
		||||
                    f = {
 | 
			
		||||
                        'url': format_url,
 | 
			
		||||
                    }
 | 
			
		||||
                    f_id = format_id
 | 
			
		||||
                    for quality in QUALITIES:
 | 
			
		||||
                        if '%s.mp4' % quality in format_url:
 | 
			
		||||
                            f_id += '-%s' % quality
 | 
			
		||||
                            f.update({
 | 
			
		||||
                                'quality': quality_key(quality),
 | 
			
		||||
                                'format_note': quality.upper(),
 | 
			
		||||
                            })
 | 
			
		||||
                            break
 | 
			
		||||
                    f['format_id'] = f_id
 | 
			
		||||
                    formats.append(f)
 | 
			
		||||
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        title = self._og_search_title(
 | 
			
		||||
| 
						 | 
				
			
			@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
 | 
			
		|||
            r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
 | 
			
		||||
            'thumbnail', fatal=False, group='value')
 | 
			
		||||
        duration = int_or_none(self._search_regex(
 | 
			
		||||
            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
 | 
			
		||||
            r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
 | 
			
		||||
            default=duration))
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue