]+\bhref=[^>]+>([^<]+)', div)
return {
'id': video_id,
'uploader': video_uploader,
'upload_date': upload_date,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'like_count': like_count,
'dislike_count': dislike_count,
'comment_count': comment_count,
'formats': formats,
'age_limit': 18,
'tags': extract_list('tags'),
'categories': extract_list('categories'),
'subtitles': subtitles,
}
class PornHubPlaylistBaseIE(PornHubBaseIE):
def _extract_entries(self, webpage, host):
# Only process container div with main playlist content skipping
# drop-down menu that uses similar pattern for videos (see
# https://github.com/ytdl-org/youtube-dl/issues/11594).
container = self._search_regex(
r'(?s)(]+class=["\']container.+)', webpage,
'container', default=webpage)
return [
self.url_result(
'http://www.%s/%s' % (host, video_url),
PornHubIE.ie_key(), video_title=title)
for video_url, title in orderedSet(re.findall(
r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"',
container))
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
playlist_id = mobj.group('id')
webpage = self._download_webpage(url, playlist_id)
entries = self._extract_entries(webpage, host)
playlist = self._parse_json(
self._search_regex(
r'(?:playlistObject|PLAYLIST_VIEW)\s*=\s*({.+?});', webpage,
'playlist', default='{}'),
playlist_id, fatal=False)
title = playlist.get('title') or self._search_regex(
r'>Videos\s+in\s+(.+?)\s+[Pp]laylist<', webpage, 'title', fatal=False)
return self.playlist_result(
entries, playlist_id, title, playlist.get('description'))
class PornHubPlaylistIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/playlist/(?P\d+)'
_TESTS = [{
'url': 'http://www.pornhub.com/playlist/4667351',
'info_dict': {
'id': '4667351',
'title': 'Nataly Hot',
},
'playlist_mincount': 2,
}, {
'url': 'https://de.pornhub.com/playlist/4667351',
'only_matching': True,
}]
class PornHubUserVideosIE(PornHubPlaylistBaseIE):
_VALID_URL = r'https?://(?:[^/]+\.)?(?Ppornhub\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P[^/]+)/videos'
_TESTS = [{
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'info_dict': {
'id': 'zoe_ph',
},
'playlist_mincount': 171,
}, {
'url': 'http://www.pornhub.com/users/rushandlia/videos',
'only_matching': True,
}, {
# default sorting as Top Rated Videos
'url': 'https://www.pornhub.com/channels/povd/videos',
'info_dict': {
'id': 'povd',
},
'playlist_mincount': 293,
}, {
# Top Rated Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=ra',
'only_matching': True,
}, {
# Most Recent Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=da',
'only_matching': True,
}, {
# Most Viewed Videos
'url': 'https://www.pornhub.com/channels/povd/videos?o=vi',
'only_matching': True,
}, {
'url': 'http://www.pornhub.com/users/zoe_ph/videos/public',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/model/jayndrea/videos/upload',
'only_matching': True,
}, {
'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host')
user_id = mobj.group('id')
entries = []
for page_num in itertools.count(1):
try:
webpage = self._download_webpage(
url, user_id, 'Downloading page %d' % page_num,
query={'page': page_num})
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
break
raise
page_entries = self._extract_entries(webpage, host)
if not page_entries:
break
entries.extend(page_entries)
return self.playlist_result(entries, user_id)