[porncom] Add extractor (Closes #2251, closes #10251)

This commit is contained in:
Sergey M․ 2016-08-18 23:52:41 +07:00
parent 13585d7682
commit 850837b67a
No known key found for this signature in database
GPG key ID: 2C393E0F18A9236D
2 changed files with 90 additions and 0 deletions

View file

@ -642,6 +642,7 @@ from .podomatic import PodomaticIE
from .pokemon import PokemonIE from .pokemon import PokemonIE
from .polskieradio import PolskieRadioIE from .polskieradio import PolskieRadioIE
from .porn91 import Porn91IE from .porn91 import Porn91IE
from .porncom import PornComIE
from .pornhd import PornHdIE from .pornhd import PornHdIE
from .pornhub import ( from .pornhub import (
PornHubIE, PornHubIE,

View file

@ -0,0 +1,89 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_urlparse
from ..utils import (
int_or_none,
js_to_json,
parse_filesize,
str_to_int,
)
class PornComIE(InfoExtractor):
_VALID_URL = r'https?://(?:[a-zA-Z]+\.)?porn\.com/videos/(?:(?P<display_id>[^/]+)-)?(?P<id>\d+)'
_TESTS = [{
'url': 'http://www.porn.com/videos/teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec-2603339',
'md5': '3f30ce76267533cd12ba999263156de7',
'info_dict': {
'id': '2603339',
'display_id': 'teen-grabs-a-dildo-and-fucks-her-pussy-live-on-1hottie-i-rec',
'ext': 'mp4',
'title': 'Teen grabs a dildo and fucks her pussy live on 1hottie, I rec',
'thumbnail': 're:^https?://.*\.jpg$',
'duration': 551,
'view_count': int,
'age_limit': 18,
},
}, {
'url': 'http://se.porn.com/videos/marsha-may-rides-seth-on-top-of-his-thick-cock-2658067',
'only_matching': True,
}]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
display_id = mobj.group('display_id') or video_id
webpage = self._download_webpage(url, display_id)
config = self._parse_json(
self._search_regex(
r'=\s*({.+?})\s*,\s*[\da-zA-Z_]+\s*=',
webpage, 'config', default='{}'),
display_id, transform_source=js_to_json, fatal=False)
if config:
title = config['title']
formats = [{
'url': stream['url'],
'format_id': stream.get('id'),
'height': int_or_none(self._search_regex(
r'^(\d+)[pP]', stream.get('id') or '', 'height', default=None))
} for stream in config['streams'] if stream.get('url')]
thumbnail = (compat_urlparse.urljoin(
config['thumbCDN'], config['poster'])
if config.get('thumbCDN') and config.get('poster') else None)
duration = int_or_none(config.get('length'))
else:
title = self._search_regex(
(r'<title>([^<]+)</title>', r'<h1[^>]*>([^<]+)</h1>'),
webpage, 'title')
formats = [{
'url': compat_urlparse.urljoin(url, format_url),
'format_id': '%sp' % height,
'height': int(height),
'filesize_approx': parse_filesize(filesize),
} for format_url, height, filesize in re.findall(
r'<a[^>]+href="(/download/[^"]+)">MPEG4 (\d+)p<span[^>]*>(\d+\s+[a-zA-Z]+)<',
webpage)]
thumbnail = None
duration = None
self._sort_formats(formats)
view_count = str_to_int(self._search_regex(
r'class=["\']views["\'][^>]*><p>([\d,.]+)', webpage, 'view count'))
return {
'id': video_id,
'display_id': display_id,
'title': title,
'thumbnail': thumbnail,
'duration': duration,
'view_count': view_count,
'formats': formats,
'age_limit': 18,
}