[youtube] Improve player id extraction and add tests
This commit is contained in:
		
							parent
							
								
									011e75e641
								
							
						
					
					
						commit
						e40c758c2a
					
				
					 2 changed files with 39 additions and 21 deletions
				
			
		| 
						 | 
					@ -74,6 +74,28 @@ _TESTS = [
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class TestPlayerInfo(unittest.TestCase):
 | 
				
			||||||
 | 
					    def test_youtube_extract_player_info(self):
 | 
				
			||||||
 | 
					        PLAYER_URLS = (
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'),
 | 
				
			||||||
 | 
					            # obsolete
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'),
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'),
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'),
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'),
 | 
				
			||||||
 | 
					            ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'),
 | 
				
			||||||
 | 
					            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'),
 | 
				
			||||||
 | 
					            ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'),
 | 
				
			||||||
 | 
					            ('http://s.ytimg.com/yt/swfbin/watch_as3-vflrEm9Nq.swf', 'vflrEm9Nq'),
 | 
				
			||||||
 | 
					            ('https://s.ytimg.com/yts/swfbin/player-vflenCdZL/watch_as3.swf', 'vflenCdZL'),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        for player_url, expected_player_id in PLAYER_URLS:
 | 
				
			||||||
 | 
					            expected_player_type = player_url.split('.')[-1]
 | 
				
			||||||
 | 
					            player_type, player_id = YoutubeIE._extract_player_info(player_url)
 | 
				
			||||||
 | 
					            self.assertEqual(player_type, expected_player_type)
 | 
				
			||||||
 | 
					            self.assertEqual(player_id, expected_player_id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestSignature(unittest.TestCase):
 | 
					class TestSignature(unittest.TestCase):
 | 
				
			||||||
    def setUp(self):
 | 
					    def setUp(self):
 | 
				
			||||||
        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
					        TEST_DIR = os.path.dirname(os.path.abspath(__file__))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -426,6 +426,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			||||||
                     (?(1).+)?                                                # if we found the ID, everything can follow
 | 
					                     (?(1).+)?                                                # if we found the ID, everything can follow
 | 
				
			||||||
                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 | 
					                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
 | 
				
			||||||
    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 | 
					    _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
 | 
				
			||||||
 | 
					    _PLAYER_INFO_RE = (
 | 
				
			||||||
 | 
					        r'/(?P<id>[a-zA-Z0-9_-]{8,})/player_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?/base\.(?P<ext>[a-z]+)$',
 | 
				
			||||||
 | 
					        r'\b(?P<id>vfl[a-zA-Z0-9_-]+)\b.*?\.(?P<ext>[a-z]+)$',
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
    _formats = {
 | 
					    _formats = {
 | 
				
			||||||
        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 | 
					        '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 | 
				
			||||||
        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 | 
					        '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
 | 
				
			||||||
| 
						 | 
					@ -1273,14 +1277,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			||||||
        """ Return a string representation of a signature """
 | 
					        """ Return a string representation of a signature """
 | 
				
			||||||
        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 | 
					        return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _extract_signature_function(self, video_id, player_url, example_sig):
 | 
					    @classmethod
 | 
				
			||||||
        id_m = re.match(
 | 
					    def _extract_player_info(cls, player_url):
 | 
				
			||||||
            r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
 | 
					        for player_re in cls._PLAYER_INFO_RE:
 | 
				
			||||||
            player_url)
 | 
					            id_m = re.search(player_re, player_url)
 | 
				
			||||||
        if not id_m:
 | 
					            if id_m:
 | 
				
			||||||
 | 
					                break
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
            raise ExtractorError('Cannot identify player %r' % player_url)
 | 
					            raise ExtractorError('Cannot identify player %r' % player_url)
 | 
				
			||||||
        player_type = id_m.group('ext')
 | 
					        return id_m.group('ext'), id_m.group('id')
 | 
				
			||||||
        player_id = id_m.group('id')
 | 
					
 | 
				
			||||||
 | 
					    def _extract_signature_function(self, video_id, player_url, example_sig):
 | 
				
			||||||
 | 
					        player_type, player_id = self._extract_player_info(player_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Read from filesystem cache
 | 
					        # Read from filesystem cache
 | 
				
			||||||
        func_id = '%s_%s_%s' % (
 | 
					        func_id = '%s_%s_%s' % (
 | 
				
			||||||
| 
						 | 
					@ -2009,22 +2017,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        if self._downloader.params.get('verbose'):
 | 
					                        if self._downloader.params.get('verbose'):
 | 
				
			||||||
                            if player_url is None:
 | 
					                            if player_url is None:
 | 
				
			||||||
                                player_version = 'unknown'
 | 
					 | 
				
			||||||
                                player_desc = 'unknown'
 | 
					                                player_desc = 'unknown'
 | 
				
			||||||
                            else:
 | 
					                            else:
 | 
				
			||||||
                                if player_url.endswith('swf'):
 | 
					                                player_type, player_version = self._extract_player_info(player_url)
 | 
				
			||||||
                                    player_version = self._search_regex(
 | 
					                                player_desc = '%s player %s' % ('flash' if player_type == 'swf' else 'html5', player_version)
 | 
				
			||||||
                                        r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
 | 
					 | 
				
			||||||
                                        'flash player', fatal=False)
 | 
					 | 
				
			||||||
                                    player_desc = 'flash player %s' % player_version
 | 
					 | 
				
			||||||
                                else:
 | 
					 | 
				
			||||||
                                    player_version = self._search_regex(
 | 
					 | 
				
			||||||
                                        [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
 | 
					 | 
				
			||||||
                                         r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
 | 
					 | 
				
			||||||
                                        player_url,
 | 
					 | 
				
			||||||
                                        'html5 player', fatal=False)
 | 
					 | 
				
			||||||
                                    player_desc = 'html5 player %s' % player_version
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            parts_sizes = self._signature_cache_id(encrypted_sig)
 | 
					                            parts_sizes = self._signature_cache_id(encrypted_sig)
 | 
				
			||||||
                            self.to_screen('{%s} signature length %s, %s' %
 | 
					                            self.to_screen('{%s} signature length %s, %s' %
 | 
				
			||||||
                                           (format_id, parts_sizes, player_desc))
 | 
					                                           (format_id, parts_sizes, player_desc))
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue