From 4c987a530292bc4b5a43540efcdf4b262c19de0d Mon Sep 17 00:00:00 2001 From: wb9688 Date: Sun, 26 Jul 2020 10:01:03 +0200 Subject: [PATCH] Support YouTube's new continuations for search --- .../org/schabi/newpipe/extractor/Page.java | 20 +++- .../extractors/YoutubeSearchExtractor.java | 94 ++++++++++++++++--- 2 files changed, 98 insertions(+), 16 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java index 6b8b4247..e4faae77 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/Page.java @@ -8,35 +8,45 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class Page implements Serializable { private final String url; + private final String id; private final List ids; private final Map cookies; - public Page(final String url, final List ids, final Map cookies) { + public Page(final String url, final String id, final List ids, final Map cookies) { this.url = url; + this.id = id; this.ids = ids; this.cookies = cookies; } public Page(final String url) { - this(url, null, null); + this(url, null, null, null); + } + + public Page(final String url, final String id) { + this(url, id, null, null); } public Page(final String url, final Map cookies) { - this(url, null, cookies); + this(url, null, null, cookies); } public Page(final List ids) { - this(null, ids, null); + this(null, null, ids, null); } public Page(final List ids, final Map cookies) { - this(null, ids, cookies); + this(null, null, ids, cookies); } public String getUrl() { return url; } + public String getId() { + return id; + } + public List getIds() { return ids; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java index f02fc2bd..df86b5d3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java @@ -2,6 +2,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonParser; +import com.grack.nanojson.JsonParserException; +import com.grack.nanojson.JsonWriter; + import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; @@ -14,11 +18,18 @@ import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.utils.JsonUtils; -import javax.annotation.Nonnull; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import javax.annotation.Nonnull; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getClientVersion; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; /* @@ -104,12 +115,16 @@ public class YoutubeSearchExtractor extends SearchExtractor { Page nextPage = null; - for (Object section : sections) { - final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer"); + for (final Object section : sections) { + if (((JsonObject) section).has("itemSectionRenderer")) { + final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer"); - collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); + collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); - nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations")); + nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations")); + } else if (((JsonObject) section).has("continuationItemRenderer")) { + nextPage = getNewNextPageFrom(((JsonObject) section).getObject("continuationItemRenderer")); + } } return new InfoItemsPage<>(collector, nextPage); @@ -122,15 +137,58 @@ public class YoutubeSearchExtractor extends SearchExtractor { } final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId()); - final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization()); - final JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response") - .getObject("continuationContents").getObject("itemSectionContinuation"); + if (page.getId() == null) { + final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization()); - collectStreamsFrom(collector, itemSectionRenderer.getArray("contents")); - final JsonArray continuations = itemSectionRenderer.getArray("continuations"); + final JsonObject itemSectionContinuation = ajaxJson.getObject(1).getObject("response") + .getObject("continuationContents").getObject("itemSectionContinuation"); - return new InfoItemsPage<>(collector, getNextPageFrom(continuations)); + collectStreamsFrom(collector, itemSectionContinuation.getArray("contents")); + final JsonArray continuations = itemSectionContinuation.getArray("continuations"); + + return new InfoItemsPage<>(collector, getNextPageFrom(continuations)); + } else { + // @formatter:off + final byte[] json = JsonWriter.string() + .object() + .object("context") + .object("client") + .value("hl", "en") + .value("gl", getExtractorContentCountry().getCountryCode()) + .value("clientName", "WEB") + .value("clientVersion", getClientVersion()) + .value("utcOffsetMinutes", 0) + .end() + .object("request").end() + .object("user").end() + .end() + .value("continuation", page.getId()) + .end().done().getBytes("UTF-8"); + // @formatter:on + + final Map> headers = new HashMap<>(); + headers.put("Origin", Collections.singletonList("https://www.youtube.com")); + headers.put("Referer", Collections.singletonList(this.getUrl())); + headers.put("Content-Type", Collections.singletonList("application/json")); + + final String responseBody = getValidJsonResponseBody(getDownloader().post(page.getUrl(), headers, json)); + + final JsonObject ajaxJson; + try { + ajaxJson = JsonParser.object().from(responseBody); + } catch (JsonParserException e) { + throw new ParsingException("Could not parse JSON", e); + } + + final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands") + .getObject(0).getObject("appendContinuationItemsAction").getArray("continuationItems"); + + final JsonArray contents = continuationItems.getObject(0).getObject("itemSectionRenderer").getArray("contents"); + collectStreamsFrom(collector, contents); + + return new InfoItemsPage<>(collector, getNewNextPageFrom(continuationItems.getObject(1).getObject("continuationItemRenderer"))); + } } private void collectStreamsFrom(final InfoItemsSearchCollector collector, final JsonArray videos) throws NothingFoundException, ParsingException { @@ -162,4 +220,18 @@ public class YoutubeSearchExtractor extends SearchExtractor { return new Page(getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation + "&itct=" + clickTrackingParams); } + + private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) { + if (isNullOrEmpty(continuationItemRenderer)) { + return null; + } + + final String token = continuationItemRenderer.getObject("continuationEndpoint") + .getObject("continuationCommand").getString("token"); + + // FIXME: Key needs to be extracted + final String url = "https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; + + return new Page(url, token); + } }