[YoutubeDL] Add negation support for string comparisons in format selection expressions (closes #18600, closes #18805)
This commit is contained in:
		
							parent
							
								
									379306ef55
								
							
						
					
					
						commit
						2cc779f497
					
				
					 3 changed files with 54 additions and 4 deletions
				
			
		| 
						 | 
				
			
			@ -667,13 +667,14 @@ The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `
 | 
			
		|||
 - `asr`: Audio sampling rate in Hertz
 | 
			
		||||
 - `fps`: Frame rate
 | 
			
		||||
 | 
			
		||||
Also filtering work for comparisons `=` (equals), `!=` (not equals), `^=` (begins with), `$=` (ends with), `*=` (contains) and following string meta fields:
 | 
			
		||||
Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains) and following string meta fields:
 | 
			
		||||
 - `ext`: File extension
 | 
			
		||||
 - `acodec`: Name of the audio codec in use
 | 
			
		||||
 - `vcodec`: Name of the video codec in use
 | 
			
		||||
 - `container`: Name of the container format
 | 
			
		||||
 - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`)
 | 
			
		||||
 - `format_id`: A short description of the format
 | 
			
		||||
Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain).
 | 
			
		||||
 | 
			
		||||
Note that none of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by particular extractor, i.e. the metadata offered by the video hoster.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -239,6 +239,52 @@ class TestFormatSelection(unittest.TestCase):
 | 
			
		|||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot')
 | 
			
		||||
 | 
			
		||||
    def test_format_selection_string_ops(self):
 | 
			
		||||
        formats = [
 | 
			
		||||
            {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL},
 | 
			
		||||
        ]
 | 
			
		||||
        info_dict = _make_result(formats)
 | 
			
		||||
 | 
			
		||||
        # equals (=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id=abc-cba]'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'abc-cba')
 | 
			
		||||
 | 
			
		||||
        # does not equal (!=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id!=abc-cba]'})
 | 
			
		||||
        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 | 
			
		||||
 | 
			
		||||
        # starts with (^=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id^=abc]'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'abc-cba')
 | 
			
		||||
 | 
			
		||||
        # does not start with (!^=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id!^=abc-cba]'})
 | 
			
		||||
        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 | 
			
		||||
 | 
			
		||||
        # ends with ($=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id$=cba]'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'abc-cba')
 | 
			
		||||
 | 
			
		||||
        # does not end with (!$=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id!$=abc-cba]'})
 | 
			
		||||
        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 | 
			
		||||
 | 
			
		||||
        # contains (*=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id*=-]'})
 | 
			
		||||
        ydl.process_ie_result(info_dict.copy())
 | 
			
		||||
        downloaded = ydl.downloaded_info_dicts[0]
 | 
			
		||||
        self.assertEqual(downloaded['format_id'], 'abc-cba')
 | 
			
		||||
 | 
			
		||||
        # does not contain (!*=)
 | 
			
		||||
        ydl = YDL({'format': '[format_id!*=-]'})
 | 
			
		||||
        self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy())
 | 
			
		||||
 | 
			
		||||
    def test_youtube_format_selection(self):
 | 
			
		||||
        order = [
 | 
			
		||||
            '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1063,21 +1063,24 @@ class YoutubeDL(object):
 | 
			
		|||
        if not m:
 | 
			
		||||
            STR_OPERATORS = {
 | 
			
		||||
                '=': operator.eq,
 | 
			
		||||
                '!=': operator.ne,
 | 
			
		||||
                '^=': lambda attr, value: attr.startswith(value),
 | 
			
		||||
                '$=': lambda attr, value: attr.endswith(value),
 | 
			
		||||
                '*=': lambda attr, value: value in attr,
 | 
			
		||||
            }
 | 
			
		||||
            str_operator_rex = re.compile(r'''(?x)
 | 
			
		||||
                \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
 | 
			
		||||
                \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
 | 
			
		||||
                \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
 | 
			
		||||
                \s*(?P<value>[a-zA-Z0-9._-]+)
 | 
			
		||||
                \s*$
 | 
			
		||||
                ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 | 
			
		||||
            m = str_operator_rex.search(filter_spec)
 | 
			
		||||
            if m:
 | 
			
		||||
                comparison_value = m.group('value')
 | 
			
		||||
                op = STR_OPERATORS[m.group('op')]
 | 
			
		||||
                str_op = STR_OPERATORS[m.group('op')]
 | 
			
		||||
                if m.group('negation'):
 | 
			
		||||
                    op = lambda attr, value: not str_op
 | 
			
		||||
                else:
 | 
			
		||||
                    op = str_op
 | 
			
		||||
 | 
			
		||||
        if not m:
 | 
			
		||||
            raise ValueError('Invalid filter specification %r' % filter_spec)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue