Add the missing age_limit tags; added a devscript to do a superficial check for porn sites without the age_limit tag in the test
This commit is contained in:
		
							parent
							
								
									82f0ac657c
								
							
						
					
					
						commit
						750e9833b8
					
				
					 7 changed files with 59 additions and 4 deletions
				
			
		
							
								
								
									
										39
									
								
								devscripts/check-porn.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								devscripts/check-porn.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,39 @@
 | 
			
		|||
#!/usr/bin/env python
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check
 | 
			
		||||
if we are not 'age_limit' tagging some porn site
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# Allow direct execution
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 | 
			
		||||
 | 
			
		||||
from test.helper import get_testcases
 | 
			
		||||
from youtube_dl.utils import compat_urllib_request
 | 
			
		||||
 | 
			
		||||
for test in get_testcases():
 | 
			
		||||
    try:
 | 
			
		||||
        webpage = compat_urllib_request.urlopen(test['url'], timeout=10).read()
 | 
			
		||||
    except:
 | 
			
		||||
        print('\nFail: {0}'.format(test['name']))
 | 
			
		||||
        continue
 | 
			
		||||
 | 
			
		||||
    webpage = webpage.decode('utf8', 'replace')
 | 
			
		||||
 | 
			
		||||
    if 'porn' in webpage.lower() and ('info_dict' not in test
 | 
			
		||||
                                      or 'age_limit' not in test['info_dict']
 | 
			
		||||
                                      or test['info_dict']['age_limit'] != 18):
 | 
			
		||||
        print('\nPotential missing age_limit check: {0}'.format(test['name']))
 | 
			
		||||
 | 
			
		||||
    elif 'porn' not in webpage.lower() and ('info_dict' in test and
 | 
			
		||||
                                            'age_limit' in test['info_dict'] and
 | 
			
		||||
                                            test['info_dict']['age_limit'] == 18):
 | 
			
		||||
        print('\nPotential false negative: {0}'.format(test['name']))
 | 
			
		||||
 | 
			
		||||
    else:
 | 
			
		||||
        sys.stdout.write('.')
 | 
			
		||||
    sys.stdout.flush()
 | 
			
		||||
 | 
			
		||||
print()
 | 
			
		||||
| 
						 | 
				
			
			@ -6,7 +6,6 @@ from ..utils import (
 | 
			
		|||
    compat_urllib_parse_urlparse,
 | 
			
		||||
    compat_urllib_request,
 | 
			
		||||
    compat_urllib_parse,
 | 
			
		||||
    unescapeHTML,
 | 
			
		||||
)
 | 
			
		||||
from ..aes import (
 | 
			
		||||
    aes_decrypt_text
 | 
			
		||||
| 
						 | 
				
			
			@ -20,6 +19,7 @@ class KeezMoviesIE(InfoExtractor):
 | 
			
		|||
        u'md5': u'6e297b7e789329923fcf83abb67c9289',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Petite Asian Lady Mai Playing In Bathtub",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -48,6 +48,8 @@ class KeezMoviesIE(InfoExtractor):
 | 
			
		|||
        format = path.split('/')[4].split('_')[:2]
 | 
			
		||||
        format = "-".join( format )
 | 
			
		||||
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'title': video_title,
 | 
			
		||||
| 
						 | 
				
			
			@ -55,4 +57,5 @@ class KeezMoviesIE(InfoExtractor):
 | 
			
		|||
            'ext': extension,
 | 
			
		||||
            'format': format,
 | 
			
		||||
            'format_id': format,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -21,6 +21,7 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
        u'info_dict': {
 | 
			
		||||
            u"uploader": u"BABES-COM", 
 | 
			
		||||
            u"title": u"Seductive Indian beauty strips down and fingers her pink pussy",
 | 
			
		||||
            u"age_limit": 18
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -64,4 +65,5 @@ class PornHubIE(InfoExtractor):
 | 
			
		|||
            'title': video_title,
 | 
			
		||||
            'thumbnail': thumbnail,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -16,7 +16,8 @@ class PornotubeIE(InfoExtractor):
 | 
			
		|||
        u'md5': u'374dd6dcedd24234453b295209aa69b6',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"upload_date": u"20090708", 
 | 
			
		||||
            u"title": u"Marilyn-Monroe-Bathing"
 | 
			
		||||
            u"title": u"Marilyn-Monroe-Bathing",
 | 
			
		||||
            u"age_limit": 18
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,7 @@ class SpankwireIE(InfoExtractor):
 | 
			
		|||
            u"uploader": u"oreusz", 
 | 
			
		||||
            u"title": u"Buckcherry`s X Rated Music Video Crazy Bitch",
 | 
			
		||||
            u"description": u"Crazy Bitch X rated music video.",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -60,6 +61,8 @@ class SpankwireIE(InfoExtractor):
 | 
			
		|||
            })
 | 
			
		||||
        formats.sort(key=lambda format: list(map(lambda s: s.zfill(6), format['format'].split('-'))))
 | 
			
		||||
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
            'id': video_id,
 | 
			
		||||
            'uploader': video_uploader,
 | 
			
		||||
| 
						 | 
				
			
			@ -67,4 +70,5 @@ class SpankwireIE(InfoExtractor):
 | 
			
		|||
            'thumbnail': thumbnail,
 | 
			
		||||
            'description': description,
 | 
			
		||||
            'formats': formats,
 | 
			
		||||
            'age_limit': age_limit,
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -22,6 +22,7 @@ class Tube8IE(InfoExtractor):
 | 
			
		|||
            u"description": u"hot teen Kasia grinding", 
 | 
			
		||||
            u"uploader": u"unknown", 
 | 
			
		||||
            u"title": u"Kasia music video",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -60,4 +61,5 @@ class Tube8IE(InfoExtractor):
 | 
			
		|||
            'ext': extension,
 | 
			
		||||
            'format': format,
 | 
			
		||||
            'format_id': format,
 | 
			
		||||
            'age_limit': 18,
 | 
			
		||||
        }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,7 +13,8 @@ class YouJizzIE(InfoExtractor):
 | 
			
		|||
        u'file': u'2189178.flv',
 | 
			
		||||
        u'md5': u'07e15fa469ba384c7693fd246905547c',
 | 
			
		||||
        u'info_dict': {
 | 
			
		||||
            u"title": u"Zeichentrick 1"
 | 
			
		||||
            u"title": u"Zeichentrick 1",
 | 
			
		||||
            u"age_limit": 18,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -25,6 +26,8 @@ class YouJizzIE(InfoExtractor):
 | 
			
		|||
        # Get webpage content
 | 
			
		||||
        webpage = self._download_webpage(url, video_id)
 | 
			
		||||
 | 
			
		||||
        age_limit = self._rta_search(webpage)
 | 
			
		||||
 | 
			
		||||
        # Get the video title
 | 
			
		||||
        video_title = self._html_search_regex(r'<title>(?P<title>.*)</title>',
 | 
			
		||||
            webpage, u'title').strip()
 | 
			
		||||
| 
						 | 
				
			
			@ -60,6 +63,7 @@ class YouJizzIE(InfoExtractor):
 | 
			
		|||
                'title': video_title,
 | 
			
		||||
                'ext': 'flv',
 | 
			
		||||
                'format': 'flv',
 | 
			
		||||
                'player_url': embed_page_url}
 | 
			
		||||
                'player_url': embed_page_url,
 | 
			
		||||
                'age_limit': age_limit}
 | 
			
		||||
 | 
			
		||||
        return [info]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue