[pornhub] Bypass scrape detection (closes #5930)
This commit is contained in:
parent
503b604a31
commit
278d061a0c
1 changed files with 22 additions and 0 deletions
|
@ -10,7 +10,9 @@ from .common import InfoExtractor
|
|||
from ..compat import (
|
||||
compat_HTTPError,
|
||||
compat_str,
|
||||
compat_urllib_request,
|
||||
)
|
||||
from .openload import PhantomJSwrapper
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
|
@ -126,6 +128,26 @@ class PornHubIE(InfoExtractor):
|
|||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _download_webpage_handle(self, *args, **kwargs):
|
||||
def dl(*args, **kwargs):
|
||||
return super(PornHubIE, self)._download_webpage_handle(*args, **kwargs)
|
||||
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
if any(re.search(p, webpage) for p in (
|
||||
r'<body\b[^>]+\bonload=["\']go\(\)',
|
||||
r'document\.cookie\s*=\s*["\']RNKEY=',
|
||||
r'document\.location\.reload\(true\)')):
|
||||
url_or_request = args[0]
|
||||
url = (url_or_request.get_full_url()
|
||||
if isinstance(url_or_request, compat_urllib_request.Request)
|
||||
else url_or_request)
|
||||
phantom = PhantomJSwrapper(self, required_version='2.0')
|
||||
phantom.get(url, html=webpage)
|
||||
webpage, urlh = dl(*args, **kwargs)
|
||||
|
||||
return webpage, urlh
|
||||
|
||||
@staticmethod
|
||||
def _extract_urls(webpage):
|
||||
return re.findall(
|
||||
|
|
Loading…
Reference in a new issue