Merge pull request #211 from mauriciocolli/improve-recaptcha-detection
[YouTube] Improve reCAPTCHA detection
This commit is contained in:
commit
514ed7bdc1
8 changed files with 60 additions and 35 deletions
|
@ -62,6 +62,9 @@ public interface Downloader {
|
||||||
|
|
||||||
DownloadResponse head(String siteUrl) throws IOException, ReCaptchaException;
|
DownloadResponse head(String siteUrl) throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
|
DownloadResponse get(String siteUrl, Localization localization)
|
||||||
|
throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
DownloadResponse get(String siteUrl, DownloadRequest request)
|
DownloadResponse get(String siteUrl, DownloadRequest request)
|
||||||
throws IOException, ReCaptchaException;
|
throws IOException, ReCaptchaException;
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.Downloader;
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
import org.schabi.newpipe.extractor.NewPipe;
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
@ -14,6 +15,7 @@ import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
||||||
|
@ -60,8 +62,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||||
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
||||||
String pageContent = downloader.download(channelUrl);
|
final DownloadResponse response = downloader.get(channelUrl);
|
||||||
doc = Jsoup.parse(pageContent, channelUrl);
|
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParserException;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.Downloader;
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
@ -35,8 +36,9 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||||
String pageContent = downloader.download(getUrl());
|
final String url = getUrl();
|
||||||
doc = Jsoup.parse(pageContent, getUrl());
|
final DownloadResponse response = downloader.get(url);
|
||||||
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.Downloader;
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
import org.schabi.newpipe.extractor.InfoItem;
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
|
@ -12,6 +13,7 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
|
@ -52,13 +54,9 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||||
final String site;
|
|
||||||
final String url = getUrl();
|
final String url = getUrl();
|
||||||
//String url = builder.build().toString();
|
final DownloadResponse response = downloader.get(url, getLocalization());
|
||||||
//if we've been passed a valid language code, append it to the URL
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||||
site = downloader.download(url, getLocalization());
|
|
||||||
|
|
||||||
doc = Jsoup.parse(site, url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,6 +18,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.*;
|
import org.schabi.newpipe.extractor.stream.*;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
@ -611,23 +612,16 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
|
|
||||||
private String pageHtml = null;
|
private String pageHtml = null;
|
||||||
|
|
||||||
private String getPageHtml(Downloader downloader) throws IOException, ExtractionException {
|
|
||||||
final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS;
|
|
||||||
if (pageHtml == null) {
|
|
||||||
pageHtml = downloader.download(verifiedUrl);
|
|
||||||
}
|
|
||||||
return pageHtml;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
public void onFetchPage(@Nonnull Downloader downloader) throws IOException, ExtractionException {
|
||||||
final String pageContent = getPageHtml(downloader);
|
final String verifiedUrl = getUrl() + VERIFIED_URL_PARAMS;
|
||||||
doc = Jsoup.parse(pageContent, getUrl());
|
final DownloadResponse response = downloader.get(verifiedUrl);
|
||||||
|
pageHtml = response.getResponseBody();
|
||||||
|
doc = YoutubeParsingHelper.parseAndCheckPage(verifiedUrl, response);
|
||||||
|
|
||||||
final String playerUrl;
|
final String playerUrl;
|
||||||
// Check if the video is age restricted
|
// Check if the video is age restricted
|
||||||
if (pageContent.contains("<meta property=\"og:restrictions:age")) {
|
if (!doc.select("meta[property=\"og:restrictions:age\"").isEmpty()) {
|
||||||
// do this if it is age gated
|
|
||||||
final EmbeddedInfo info = getEmbeddedInfo();
|
final EmbeddedInfo info = getEmbeddedInfo();
|
||||||
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
|
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
|
||||||
final String infoPageResponse = downloader.download(videoInfoUrl);
|
final String infoPageResponse = downloader.download(videoInfoUrl);
|
||||||
|
@ -635,7 +629,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
playerUrl = info.url;
|
playerUrl = info.url;
|
||||||
isAgeRestricted = true;
|
isAgeRestricted = true;
|
||||||
} else {
|
} else {
|
||||||
final JsonObject ytPlayerConfig = getPlayerConfig(pageContent);
|
final JsonObject ytPlayerConfig = getPlayerConfig();
|
||||||
playerArgs = getPlayerArgs(ytPlayerConfig);
|
playerArgs = getPlayerArgs(ytPlayerConfig);
|
||||||
playerUrl = getPlayerUrl(ytPlayerConfig);
|
playerUrl = getPlayerUrl(ytPlayerConfig);
|
||||||
isAgeRestricted = false;
|
isAgeRestricted = false;
|
||||||
|
@ -651,9 +645,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private JsonObject getPlayerConfig(String pageContent) throws ParsingException {
|
private JsonObject getPlayerConfig() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
String ytPlayerConfigRaw = Parser.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageContent);
|
String ytPlayerConfigRaw = Parser.matchGroup1("ytplayer.config\\s*=\\s*(\\{.*?\\});", pageHtml);
|
||||||
return JsonParser.object().from(ytPlayerConfigRaw);
|
return JsonParser.object().from(ytPlayerConfigRaw);
|
||||||
} catch (Parser.RegexException e) {
|
} catch (Parser.RegexException e) {
|
||||||
String errorReason = getErrorMessage();
|
String errorReason = getErrorMessage();
|
||||||
|
@ -823,13 +817,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
// If the video is age restricted getPlayerConfig will fail
|
// If the video is age restricted getPlayerConfig will fail
|
||||||
if(isAgeRestricted) return Collections.emptyList();
|
if(isAgeRestricted) return Collections.emptyList();
|
||||||
|
|
||||||
final JsonObject playerConfig;
|
|
||||||
try {
|
|
||||||
playerConfig = getPlayerConfig(getPageHtml(NewPipe.getDownloader()));
|
|
||||||
} catch (IOException | ExtractionException e) {
|
|
||||||
throw new SubtitlesException("Unable to download player configs", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
final JsonObject captions;
|
final JsonObject captions;
|
||||||
if (!playerResponse.has("captions")) {
|
if (!playerResponse.has("captions")) {
|
||||||
// Captions does not exist
|
// Captions does not exist
|
||||||
|
|
|
@ -24,12 +24,14 @@ import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.Downloader;
|
import org.schabi.newpipe.extractor.Downloader;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
import org.schabi.newpipe.extractor.kiosk.KioskExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
|
@ -56,8 +58,8 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||||
url += "?gl=" + contentCountry;
|
url += "?gl=" + contentCountry;
|
||||||
}
|
}
|
||||||
|
|
||||||
String pageContent = downloader.download(url);
|
final DownloadResponse response = downloader.get(url);
|
||||||
doc = Jsoup.parse(pageContent, url);
|
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,7 +1,11 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||||
|
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
|
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
|
||||||
|
@ -30,6 +34,23 @@ public class YoutubeParsingHelper {
|
||||||
private YoutubeParsingHelper() {
|
private YoutubeParsingHelper() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final String[] RECAPTCHA_DETECTION_SELECTORS = {
|
||||||
|
"form[action*=\"/das_captcha\"]",
|
||||||
|
"input[name*=\"action_recaptcha_verify\"]"
|
||||||
|
};
|
||||||
|
|
||||||
|
public static Document parseAndCheckPage(final String url, final DownloadResponse response) throws ReCaptchaException {
|
||||||
|
final Document document = Jsoup.parse(response.getResponseBody(), url);
|
||||||
|
|
||||||
|
for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) {
|
||||||
|
if (!document.select(detectionSelector).isEmpty()) {
|
||||||
|
throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
public static boolean isYoutubeURL(URL url) {
|
public static boolean isYoutubeURL(URL url) {
|
||||||
String host = url.getHost();
|
String host = url.getHost();
|
||||||
return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com")
|
return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com")
|
||||||
|
|
|
@ -16,6 +16,8 @@ import org.schabi.newpipe.extractor.DownloadResponse;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
import org.schabi.newpipe.extractor.utils.Localization;
|
import org.schabi.newpipe.extractor.utils.Localization;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonList;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Created by Christian Schabesberger on 28.01.16.
|
* Created by Christian Schabesberger on 28.01.16.
|
||||||
*
|
*
|
||||||
|
@ -194,6 +196,14 @@ public class Downloader implements org.schabi.newpipe.extractor.Downloader {
|
||||||
return new DownloadResponse(con.getResponseCode(), null, con.getHeaderFields());
|
return new DownloadResponse(con.getResponseCode(), null, con.getHeaderFields());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DownloadResponse get(String siteUrl, Localization localization) throws IOException, ReCaptchaException {
|
||||||
|
final Map<String, List<String>> requestHeaders = new HashMap<>();
|
||||||
|
requestHeaders.put("Accept-Language", singletonList(localization.getLanguage()));
|
||||||
|
|
||||||
|
return get(siteUrl, new DownloadRequest(null, requestHeaders));
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DownloadResponse get(String siteUrl, DownloadRequest request)
|
public DownloadResponse get(String siteUrl, DownloadRequest request)
|
||||||
throws IOException, ReCaptchaException {
|
throws IOException, ReCaptchaException {
|
||||||
|
|
Loading…
Reference in a new issue