Implement pagination in YoutubeSearchExtractor
This commit is contained in:
parent
c0a8e01889
commit
5d883d100c
2 changed files with 62 additions and 42 deletions
|
@ -5,9 +5,7 @@ import com.grack.nanojson.JsonObject;
|
|||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
|
@ -19,12 +17,12 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
|||
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
|
@ -73,58 +71,70 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
|
||||
@Override
|
||||
public String getSearchSuggestion() {
|
||||
final Element el = doc.select("div[class*=\"spell-correction\"]").first();
|
||||
if (el != null) {
|
||||
return el.select("a").first().text();
|
||||
} else {
|
||||
JsonObject showingResultsForRenderer = initialData.getObject("contents")
|
||||
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
|
||||
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||
.getObject("showingResultsForRenderer");
|
||||
if (showingResultsForRenderer == null) {
|
||||
return "";
|
||||
} else {
|
||||
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
|
||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl());
|
||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
||||
|
||||
collectStreamsFrom(collector, videos);
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws ExtractionException {
|
||||
return getUrl() + "&page=" + 2;
|
||||
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||
// TODO: Get extracting next pages working
|
||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
||||
doc = Jsoup.parse(response, pageUrl);
|
||||
|
||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
|
||||
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||
}
|
||||
|
||||
private String getNextPageUrlFromCurrentUrl(String currentUrl)
|
||||
throws MalformedURLException, UnsupportedEncodingException {
|
||||
final int pageNr = Integer.parseInt(
|
||||
Parser.compatParseMap(
|
||||
new URL(currentUrl)
|
||||
.getQuery())
|
||||
.get("page"));
|
||||
|
||||
return currentUrl.replace("&page=" + pageNr,
|
||||
"&page=" + Integer.toString(pageNr + 1));
|
||||
}
|
||||
|
||||
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException, ParsingException {
|
||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
JsonArray ajaxJson;
|
||||
try {
|
||||
Map<String, List<String>> headers = new HashMap<>();
|
||||
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||
headers.put("X-YouTube-Client-Version", Collections.singletonList("2.20200221.03.00")); // TODO: Automatically get YouTube client version somehow
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (JsonParserException pe) {
|
||||
throw new ParsingException("Could not parse json data for next streams", pe);
|
||||
}
|
||||
|
||||
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
|
||||
.getObject("continuationContents").getObject("itemSectionContinuation");
|
||||
|
||||
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
|
||||
}
|
||||
|
||||
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException, ParsingException {
|
||||
collector.reset();
|
||||
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
if (initialData == null) initialData = YoutubeParsingHelper.getInitialData(doc.toString());
|
||||
JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
||||
|
||||
for (Object item : list) {
|
||||
for (Object item : videos) {
|
||||
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
|
||||
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
|
||||
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
|
||||
|
@ -136,7 +146,17 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
|
||||
}
|
||||
}
|
||||
return collector;
|
||||
}
|
||||
|
||||
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
|
||||
if (continuations == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||
String continuation = nextContinuationData.getString("continuation");
|
||||
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
|
||||
+ "&itct=" + clickTrackingParams;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
|
|||
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
|
||||
try {
|
||||
final String url = "https://www.youtube.com/results"
|
||||
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
||||
+ "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
||||
|
||||
if (contentFilters.size() > 0) {
|
||||
switch (contentFilters.get(0)) {
|
||||
case VIDEOS: return url + "&sp=EgIQAVAU";
|
||||
case CHANNELS: return url + "&sp=EgIQAlAU";
|
||||
case PLAYLISTS: return url + "&sp=EgIQA1AU";
|
||||
case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
|
||||
case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
|
||||
case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
|
||||
case ALL:
|
||||
default:
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue