[nova:embed] Fix extraction (closes #24700)
This commit is contained in:
		
							parent
							
								
									dcc8522fdb
								
							
						
					
					
						commit
						5caf88ccb4
					
				
					 1 changed files with 71 additions and 35 deletions
				
			
		| 
						 | 
					@ -6,6 +6,7 @@ import re
 | 
				
			||||||
from .common import InfoExtractor
 | 
					from .common import InfoExtractor
 | 
				
			||||||
from ..utils import (
 | 
					from ..utils import (
 | 
				
			||||||
    clean_html,
 | 
					    clean_html,
 | 
				
			||||||
 | 
					    determine_ext,
 | 
				
			||||||
    int_or_none,
 | 
					    int_or_none,
 | 
				
			||||||
    js_to_json,
 | 
					    js_to_json,
 | 
				
			||||||
    qualities,
 | 
					    qualities,
 | 
				
			||||||
| 
						 | 
					@ -33,6 +34,40 @@ class NovaEmbedIE(InfoExtractor):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        webpage = self._download_webpage(url, video_id)
 | 
					        webpage = self._download_webpage(url, video_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        duration = None
 | 
				
			||||||
 | 
					        formats = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        player = self._parse_json(
 | 
				
			||||||
 | 
					            self._search_regex(
 | 
				
			||||||
 | 
					                r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
 | 
				
			||||||
 | 
					                webpage, 'player', default='{}'), video_id, fatal=False)
 | 
				
			||||||
 | 
					        if player:
 | 
				
			||||||
 | 
					            for format_id, format_list in player['tracks'].items():
 | 
				
			||||||
 | 
					                if not isinstance(format_list, list):
 | 
				
			||||||
 | 
					                    format_list = [format_list]
 | 
				
			||||||
 | 
					                for format_dict in format_list:
 | 
				
			||||||
 | 
					                    if not isinstance(format_dict, dict):
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    format_url = url_or_none(format_dict.get('src'))
 | 
				
			||||||
 | 
					                    format_type = format_dict.get('type')
 | 
				
			||||||
 | 
					                    ext = determine_ext(format_url)
 | 
				
			||||||
 | 
					                    if (format_type == 'application/x-mpegURL'
 | 
				
			||||||
 | 
					                            or format_id == 'HLS' or ext == 'm3u8'):
 | 
				
			||||||
 | 
					                        formats.extend(self._extract_m3u8_formats(
 | 
				
			||||||
 | 
					                            format_url, video_id, 'mp4',
 | 
				
			||||||
 | 
					                            entry_protocol='m3u8_native', m3u8_id='hls',
 | 
				
			||||||
 | 
					                            fatal=False))
 | 
				
			||||||
 | 
					                    elif (format_type == 'application/dash+xml'
 | 
				
			||||||
 | 
					                          or format_id == 'DASH' or ext == 'mpd'):
 | 
				
			||||||
 | 
					                        formats.extend(self._extract_mpd_formats(
 | 
				
			||||||
 | 
					                            format_url, video_id, mpd_id='dash', fatal=False))
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        formats.append({
 | 
				
			||||||
 | 
					                            'url': format_url,
 | 
				
			||||||
 | 
					                        })
 | 
				
			||||||
 | 
					            duration = int_or_none(player.get('duration'))
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            # Old path, not actual as of 08.04.2020
 | 
				
			||||||
            bitrates = self._parse_json(
 | 
					            bitrates = self._parse_json(
 | 
				
			||||||
                self._search_regex(
 | 
					                self._search_regex(
 | 
				
			||||||
                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
 | 
					                    r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
 | 
				
			||||||
| 
						 | 
					@ -41,7 +76,6 @@ class NovaEmbedIE(InfoExtractor):
 | 
				
			||||||
            QUALITIES = ('lq', 'mq', 'hq', 'hd')
 | 
					            QUALITIES = ('lq', 'mq', 'hq', 'hd')
 | 
				
			||||||
            quality_key = qualities(QUALITIES)
 | 
					            quality_key = qualities(QUALITIES)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        formats = []
 | 
					 | 
				
			||||||
            for format_id, format_list in bitrates.items():
 | 
					            for format_id, format_list in bitrates.items():
 | 
				
			||||||
                if not isinstance(format_list, list):
 | 
					                if not isinstance(format_list, list):
 | 
				
			||||||
                    format_list = [format_list]
 | 
					                    format_list = [format_list]
 | 
				
			||||||
| 
						 | 
					@ -69,6 +103,7 @@ class NovaEmbedIE(InfoExtractor):
 | 
				
			||||||
                            break
 | 
					                            break
 | 
				
			||||||
                    f['format_id'] = f_id
 | 
					                    f['format_id'] = f_id
 | 
				
			||||||
                    formats.append(f)
 | 
					                    formats.append(f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self._sort_formats(formats)
 | 
					        self._sort_formats(formats)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        title = self._og_search_title(
 | 
					        title = self._og_search_title(
 | 
				
			||||||
| 
						 | 
					@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
 | 
				
			||||||
            r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
 | 
					            r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
 | 
				
			||||||
            'thumbnail', fatal=False, group='value')
 | 
					            'thumbnail', fatal=False, group='value')
 | 
				
			||||||
        duration = int_or_none(self._search_regex(
 | 
					        duration = int_or_none(self._search_regex(
 | 
				
			||||||
            r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
 | 
					            r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
 | 
				
			||||||
 | 
					            default=duration))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue