[pornhub] Extract categories and tags (closes #10499)
This commit is contained in:
		
							parent
							
								
									fea74acad8
								
							
						
					
					
						commit
						6bb05b32a9
					
				
					 2 changed files with 17 additions and 0 deletions
				
			
		| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
version <unreleased>
 | 
			
		||||
 | 
			
		||||
Extractors
 | 
			
		||||
+ [pornhub] Extract categories and tags (#10499)
 | 
			
		||||
+ [foxnews] Support Fox News articles (#10598)
 | 
			
		||||
* [iwara] Fix extraction after relaunch (#10462, #3215)
 | 
			
		||||
* [newgrounds] Fix uploader extraction (#10584)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,6 +15,7 @@ from ..compat import (
 | 
			
		|||
from ..utils import (
 | 
			
		||||
    ExtractorError,
 | 
			
		||||
    int_or_none,
 | 
			
		||||
    js_to_json,
 | 
			
		||||
    orderedSet,
 | 
			
		||||
    sanitized_Request,
 | 
			
		||||
    str_to_int,
 | 
			
		||||
| 
						 | 
				
			
			@ -48,6 +49,8 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
            'dislike_count': int,
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
            'tags': list,
 | 
			
		||||
            'categories': list,
 | 
			
		||||
        },
 | 
			
		||||
    }, {
 | 
			
		||||
        # non-ASCII title
 | 
			
		||||
| 
						 | 
				
			
			@ -63,6 +66,8 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
            'dislike_count': int,
 | 
			
		||||
            'comment_count': int,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
            'tags': list,
 | 
			
		||||
            'categories': list,
 | 
			
		||||
        },
 | 
			
		||||
        'params': {
 | 
			
		||||
            'skip_download': True,
 | 
			
		||||
| 
						 | 
				
			
			@ -183,6 +188,15 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
            })
 | 
			
		||||
        self._sort_formats(formats)
 | 
			
		||||
 | 
			
		||||
        page_params = self._parse_json(self._search_regex(
 | 
			
		||||
            r'page_params\.zoneDetails\[([\'"])[^\'"]+\1\]\s*=\s*(?P<data>{[^}]+})',
 | 
			
		||||
            webpage, 'page parameters', group='data', default='{}'),
 | 
			
		||||
            video_id, transform_source=js_to_json, fatal=False)
 | 
			
		||||
        tags = categories = None
 | 
			
		||||
        if page_params:
 | 
			
		||||
            tags = page_params.get('tags', '').split(',')
 | 
			
		||||
            categories = page_params.get('categories', '').split(',')
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
| 
						 | 
				
			
			@ -195,6 +209,8 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
            'comment_count': comment_count,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
            'tags': tags,
 | 
			
		||||
            'categories': categories,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue