Merge pull request #799 from litetex/imporve-yt-comments-extractor

Refactored YoutubeCommentsExtractor
This commit is contained in:
Stypox 2022-02-23 17:58:00 +01:00 committed by GitHub
commit 69e18c80cb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -2,10 +2,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@ -17,7 +17,6 @@ import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem; import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector; import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -38,7 +37,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
* Caching mechanism and holder of the commentsDisabled value. * Caching mechanism and holder of the commentsDisabled value.
* <br/> * <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/> * Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()} * Some method calls {@link #findInitialCommentsToken()}
* -> value is set<br/> * -> value is set<br/>
* If the method or another one that is depending on disabled comments * If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls * is now called again, the method execution can avoid unnecessary calls
@ -74,45 +73,45 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
/** /**
* Finds the initial comments token and initializes commentsDisabled. * Finds the initial comments token and initializes commentsDisabled.
* <br/>
* Also sets {@link #optCommentsDisabled}.
* *
* @return the continuation token or null if none was found * @return the continuation token or null if none was found
*/ */
@Nullable @Nullable
private String findInitialCommentsToken() throws ExtractionException { private String findInitialCommentsToken() throws ExtractionException {
final String token = JsonUtils.getArray(nextResponse,
"contents.twoColumnWatchNextResults.results.results.contents")
.stream()
// Only use JsonObjects
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
// Check if the comment-section is present
.filter(jObj -> {
try {
return "comments-section".equals(
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
} catch (final ParsingException ignored) {
return false;
}
})
.findFirst()
// Extract the token (or null in case of error)
.map(itemSectionRenderer -> {
try {
return JsonUtils.getString(
itemSectionRenderer
.getObject("itemSectionRenderer")
.getArray("contents").getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} catch (final ParsingException ignored) {
return null;
}
})
.orElse(null);
final JsonArray jArray = JsonUtils.getArray(nextResponse, // The comments are disabled if we couldn't get a token
"contents.twoColumnWatchNextResults.results.results.contents"); optCommentsDisabled = Optional.of(token == null);
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
JsonObject jObj = (JsonObject) o;
if (jObj.has("itemSectionRenderer")) {
try {
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
.equals("comments-section");
} catch (final ParsingException ignored) {
}
}
return false;
}).findFirst();
final String token;
if (itemSectionRenderer.isPresent()) {
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} else {
token = null;
}
if (token == null) {
optCommentsDisabled = Optional.of(true);
return null;
}
optCommentsDisabled = Optional.of(false);
return token; return token;
} }
@ -124,25 +123,37 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
@Nullable @Nullable
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException { private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
final JsonArray jsonArray; final JsonArray onResponseReceivedEndpoints =
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray( ajaxJson.getArray("onResponseReceivedEndpoints");
"onResponseReceivedEndpoints");
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
onResponseReceivedEndpoints.size() - 1);
// Prevent ArrayIndexOutOfBoundsException
if (onResponseReceivedEndpoints.isEmpty()) {
return null;
}
final JsonArray continuationItemsArray;
try { try {
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject( final JsonObject endpoint = onResponseReceivedEndpoints
"appendContinuationItemsAction")).getArray("continuationItems"); .getObject(onResponseReceivedEndpoints.size() - 1);
continuationItemsArray = endpoint
.getObject("reloadContinuationItemsCommand",
endpoint.getObject("appendContinuationItemsAction"))
.getArray("continuationItems");
} catch (final Exception e) { } catch (final Exception e) {
return null; return null;
} }
if (jsonArray.isEmpty()) { // Prevent ArrayIndexOutOfBoundsException
if (continuationItemsArray.isEmpty()) {
return null; return null;
} }
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer"); final JsonObject continuationItemRenderer = continuationItemsArray
.getObject(continuationItemsArray.size() - 1)
.getObject("continuationItemRenderer");
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token"; final String jsonPath = continuationItemRenderer.has("button")
? "button.buttonRenderer.command.continuationCommand.token"
: "continuationEndpoint.continuationCommand.token";
final String continuation; final String continuation;
try { try {
@ -169,11 +180,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
} }
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization, final byte[] body = JsonWriter.string(
getExtractorContentCountry()) prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", page.getId()) .value("continuation", page.getId())
.done()) .done())
.getBytes(UTF_8); .getBytes(StandardCharsets.UTF_8);
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
@ -186,10 +197,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject ajaxJson) throws ParsingException { @Nonnull final JsonObject ajaxJson) throws ParsingException {
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray( final JsonArray onResponseReceivedEndpoints =
"onResponseReceivedEndpoints"); ajaxJson.getArray("onResponseReceivedEndpoints");
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject( // Prevent ArrayIndexOutOfBoundsException
onResponseReceivedEndpoints.size() - 1); if (onResponseReceivedEndpoints.isEmpty()) {
return;
}
final JsonObject commentsEndpoint =
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);
final String path; final String path;
@ -204,18 +219,20 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
final JsonArray contents; final JsonArray contents;
try { try {
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone(); contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
} catch (final Exception e) { } catch (final Exception e) {
// No comments // No comments
return; return;
} }
final int index = contents.size() - 1; final int index = contents.size() - 1;
if (contents.getObject(index).has("continuationItemRenderer")) { if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
contents.remove(index); contents.remove(index);
} }
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer"; final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
? "commentThreadRenderer"
: "commentRenderer";
final List<Object> comments; final List<Object> comments;
try { try {
@ -224,24 +241,23 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
throw new ParsingException("Unable to get parse youtube comments", e); throw new ParsingException("Unable to get parse youtube comments", e);
} }
for (final Object c : comments) { final String url = getUrl();
if (c instanceof JsonObject) { comments.stream()
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor( .filter(JsonObject.class::isInstance)
(JsonObject) c, getUrl(), getTimeAgoParser()); .map(JsonObject.class::cast)
collector.commit(extractor); .map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
} .forEach(collector::commit);
}
} }
@Override @Override
public void onFetchPage(@Nonnull final Downloader downloader) public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException { throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization, final byte[] body = JsonWriter.string(
getExtractorContentCountry()) prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("videoId", getId()) .value("videoId", getId())
.done()) .done())
.getBytes(UTF_8); .getBytes(StandardCharsets.UTF_8);
nextResponse = getJsonPostResponse("next", body, localization); nextResponse = getJsonPostResponse("next", body, localization);
} }