[pornhub] Extract metadata from JSON-LD (closes #26614)
This commit is contained in:
		
							parent
							
								
									ce5b904050
								
							
						
					
					
						commit
						cd85a1bb8b
					
				
					 1 changed files with 12 additions and 5 deletions
				
			
		| 
						 | 
					@ -17,6 +17,7 @@ from ..utils import (
 | 
				
			||||||
    determine_ext,
 | 
					    determine_ext,
 | 
				
			||||||
    ExtractorError,
 | 
					    ExtractorError,
 | 
				
			||||||
    int_or_none,
 | 
					    int_or_none,
 | 
				
			||||||
 | 
					    merge_dicts,
 | 
				
			||||||
    NO_DEFAULT,
 | 
					    NO_DEFAULT,
 | 
				
			||||||
    orderedSet,
 | 
					    orderedSet,
 | 
				
			||||||
    remove_quotes,
 | 
					    remove_quotes,
 | 
				
			||||||
| 
						 | 
					@ -59,13 +60,14 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
                    '''
 | 
					                    '''
 | 
				
			||||||
    _TESTS = [{
 | 
					    _TESTS = [{
 | 
				
			||||||
        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
 | 
					        'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015',
 | 
				
			||||||
        'md5': '1e19b41231a02eba417839222ac9d58e',
 | 
					        'md5': 'a6391306d050e4547f62b3f485dd9ba9',
 | 
				
			||||||
        'info_dict': {
 | 
					        'info_dict': {
 | 
				
			||||||
            'id': '648719015',
 | 
					            'id': '648719015',
 | 
				
			||||||
            'ext': 'mp4',
 | 
					            'ext': 'mp4',
 | 
				
			||||||
            'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
 | 
					            'title': 'Seductive Indian beauty strips down and fingers her pink pussy',
 | 
				
			||||||
            'uploader': 'Babes',
 | 
					            'uploader': 'Babes',
 | 
				
			||||||
            'upload_date': '20130628',
 | 
					            'upload_date': '20130628',
 | 
				
			||||||
 | 
					            'timestamp': 1372447216,
 | 
				
			||||||
            'duration': 361,
 | 
					            'duration': 361,
 | 
				
			||||||
            'view_count': int,
 | 
					            'view_count': int,
 | 
				
			||||||
            'like_count': int,
 | 
					            'like_count': int,
 | 
				
			||||||
| 
						 | 
					@ -82,8 +84,8 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
            'id': '1331683002',
 | 
					            'id': '1331683002',
 | 
				
			||||||
            'ext': 'mp4',
 | 
					            'ext': 'mp4',
 | 
				
			||||||
            'title': '重庆婷婷女王足交',
 | 
					            'title': '重庆婷婷女王足交',
 | 
				
			||||||
            'uploader': 'Unknown',
 | 
					 | 
				
			||||||
            'upload_date': '20150213',
 | 
					            'upload_date': '20150213',
 | 
				
			||||||
 | 
					            'timestamp': 1423804862,
 | 
				
			||||||
            'duration': 1753,
 | 
					            'duration': 1753,
 | 
				
			||||||
            'view_count': int,
 | 
					            'view_count': int,
 | 
				
			||||||
            'like_count': int,
 | 
					            'like_count': int,
 | 
				
			||||||
| 
						 | 
					@ -121,6 +123,7 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
        'params': {
 | 
					        'params': {
 | 
				
			||||||
            'skip_download': True,
 | 
					            'skip_download': True,
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
 | 
					        'skip': 'This video has been disabled',
 | 
				
			||||||
    }, {
 | 
					    }, {
 | 
				
			||||||
        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
 | 
					        'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d',
 | 
				
			||||||
        'only_matching': True,
 | 
					        'only_matching': True,
 | 
				
			||||||
| 
						 | 
					@ -338,7 +341,7 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        video_uploader = self._html_search_regex(
 | 
					        video_uploader = self._html_search_regex(
 | 
				
			||||||
            r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
 | 
					            r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
 | 
				
			||||||
            webpage, 'uploader', fatal=False)
 | 
					            webpage, 'uploader', default=None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        view_count = self._extract_count(
 | 
					        view_count = self._extract_count(
 | 
				
			||||||
            r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
 | 
					            r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
 | 
				
			||||||
| 
						 | 
					@ -356,7 +359,11 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
            if div:
 | 
					            if div:
 | 
				
			||||||
                return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
 | 
					                return re.findall(r'<a[^>]+\bhref=[^>]+>([^<]+)', div)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        info = self._search_json_ld(webpage, video_id, default={})
 | 
				
			||||||
 | 
					        # description provided in JSON-LD is irrelevant
 | 
				
			||||||
 | 
					        info['description'] = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return merge_dicts({
 | 
				
			||||||
            'id': video_id,
 | 
					            'id': video_id,
 | 
				
			||||||
            'uploader': video_uploader,
 | 
					            'uploader': video_uploader,
 | 
				
			||||||
            'upload_date': upload_date,
 | 
					            'upload_date': upload_date,
 | 
				
			||||||
| 
						 | 
					@ -372,7 +379,7 @@ class PornHubIE(PornHubBaseIE):
 | 
				
			||||||
            'tags': extract_list('tags'),
 | 
					            'tags': extract_list('tags'),
 | 
				
			||||||
            'categories': extract_list('categories'),
 | 
					            'categories': extract_list('categories'),
 | 
				
			||||||
            'subtitles': subtitles,
 | 
					            'subtitles': subtitles,
 | 
				
			||||||
        }
 | 
					        }, info)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class PornHubPlaylistBaseIE(PornHubBaseIE):
 | 
					class PornHubPlaylistBaseIE(PornHubBaseIE):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue