Refactored YoutubeCommentsExtractor
* Use Java Streaming API * Use StandardCharsets * Prevented several NPEs/ArrayIndexOutOfBound * Reformatted some code so that it's easier readable
This commit is contained in:
parent
dfe8716f5f
commit
f79ce1f52a
1 changed files with 87 additions and 70 deletions
|
@ -2,10 +2,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
@ -17,7 +17,6 @@ import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.StreamingService;
|
import org.schabi.newpipe.extractor.StreamingService;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
|
||||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||||
|
@ -38,7 +37,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
* Caching mechanism and holder of the commentsDisabled value.
|
* Caching mechanism and holder of the commentsDisabled value.
|
||||||
* <br/>
|
* <br/>
|
||||||
* Initial value = empty -> unknown if comments are disabled or not<br/>
|
* Initial value = empty -> unknown if comments are disabled or not<br/>
|
||||||
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
|
* Some method calls {@link #findInitialCommentsToken()}
|
||||||
* -> value is set<br/>
|
* -> value is set<br/>
|
||||||
* If the method or another one that is depending on disabled comments
|
* If the method or another one that is depending on disabled comments
|
||||||
* is now called again, the method execution can avoid unnecessary calls
|
* is now called again, the method execution can avoid unnecessary calls
|
||||||
|
@ -74,45 +73,46 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the initial comments token and initializes commentsDisabled.
|
* Finds the initial comments token and initializes commentsDisabled.
|
||||||
|
* Also set
|
||||||
*
|
*
|
||||||
* @return the continuation token or null if none was found
|
* @return the continuation token or null if none was found
|
||||||
*/
|
*/
|
||||||
@Nullable
|
@Nullable
|
||||||
private String findInitialCommentsToken() throws ExtractionException {
|
private String findInitialCommentsToken() throws ExtractionException {
|
||||||
|
final String token = JsonUtils.getArray(nextResponse,
|
||||||
|
"contents.twoColumnWatchNextResults.results.results.contents")
|
||||||
|
.stream()
|
||||||
|
// Only use JsonObjects
|
||||||
|
.filter(JsonObject.class::isInstance)
|
||||||
|
.map(JsonObject.class::cast)
|
||||||
|
// Only process JsonObjects that have a itemSectionRenderer
|
||||||
|
.filter(jObj -> jObj.has("itemSectionRenderer"))
|
||||||
|
// Check if the comment-section is present
|
||||||
|
.filter(jObj -> {
|
||||||
|
try {
|
||||||
|
return "comments-section".equals(
|
||||||
|
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
|
||||||
|
} catch (final ParsingException ex) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.findFirst()
|
||||||
|
// Extract the token (or null in case of error)
|
||||||
|
.map(itemSectionRenderer -> {
|
||||||
|
try {
|
||||||
|
return JsonUtils.getString(
|
||||||
|
itemSectionRenderer
|
||||||
|
.getObject("itemSectionRenderer")
|
||||||
|
.getArray("contents").getObject(0),
|
||||||
|
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||||
|
} catch (final ParsingException ex) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.orElse(null);
|
||||||
|
|
||||||
final JsonArray jArray = JsonUtils.getArray(nextResponse,
|
// The comments are disabled if we couldn't get a token
|
||||||
"contents.twoColumnWatchNextResults.results.results.contents");
|
optCommentsDisabled = Optional.of(token == null);
|
||||||
|
|
||||||
final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
|
|
||||||
JsonObject jObj = (JsonObject) o;
|
|
||||||
|
|
||||||
if (jObj.has("itemSectionRenderer")) {
|
|
||||||
try {
|
|
||||||
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
|
|
||||||
.equals("comments-section");
|
|
||||||
} catch (final ParsingException ignored) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}).findFirst();
|
|
||||||
|
|
||||||
final String token;
|
|
||||||
|
|
||||||
if (itemSectionRenderer.isPresent()) {
|
|
||||||
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
|
|
||||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
|
||||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
|
||||||
} else {
|
|
||||||
token = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (token == null) {
|
|
||||||
optCommentsDisabled = Optional.of(true);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
optCommentsDisabled = Optional.of(false);
|
|
||||||
|
|
||||||
return token;
|
return token;
|
||||||
}
|
}
|
||||||
|
@ -124,25 +124,37 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
|
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
|
||||||
final JsonArray jsonArray;
|
final JsonArray onResponseReceivedEndpoints =
|
||||||
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
|
ajaxJson.getArray("onResponseReceivedEndpoints");
|
||||||
"onResponseReceivedEndpoints");
|
|
||||||
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
|
|
||||||
onResponseReceivedEndpoints.size() - 1);
|
|
||||||
|
|
||||||
|
// Prevent ArrayIndexOutOfBoundsException
|
||||||
|
if (onResponseReceivedEndpoints.isEmpty()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final JsonArray continuationItemsArray;
|
||||||
try {
|
try {
|
||||||
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
|
final JsonObject endpoint = onResponseReceivedEndpoints
|
||||||
"appendContinuationItemsAction")).getArray("continuationItems");
|
.getObject(onResponseReceivedEndpoints.size() - 1);
|
||||||
|
continuationItemsArray = endpoint
|
||||||
|
.getObject("reloadContinuationItemsCommand",
|
||||||
|
endpoint.getObject("appendContinuationItemsAction"))
|
||||||
|
.getArray("continuationItems");
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (jsonArray.isEmpty()) {
|
// Prevent ArrayIndexOutOfBoundsException
|
||||||
|
if (continuationItemsArray.isEmpty()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
|
final JsonObject continuationItemRenderer = continuationItemsArray
|
||||||
|
.getObject(continuationItemsArray.size() - 1)
|
||||||
|
.getObject("continuationItemRenderer");
|
||||||
|
|
||||||
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
|
final String jsonPath = continuationItemRenderer.has("button")
|
||||||
|
? "button.buttonRenderer.command.continuationCommand.token"
|
||||||
|
: "continuationEndpoint.continuationCommand.token";
|
||||||
|
|
||||||
final String continuation;
|
final String continuation;
|
||||||
try {
|
try {
|
||||||
|
@ -169,11 +181,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
final byte[] body = JsonWriter.string(
|
||||||
getExtractorContentCountry())
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||||
.value("continuation", page.getId())
|
.value("continuation", page.getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(UTF_8);
|
.getBytes(StandardCharsets.UTF_8);
|
||||||
|
|
||||||
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
|
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
|
||||||
|
|
||||||
|
@ -186,10 +198,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
||||||
@Nonnull final JsonObject ajaxJson) throws ParsingException {
|
@Nonnull final JsonObject ajaxJson) throws ParsingException {
|
||||||
|
|
||||||
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
|
final JsonArray onResponseReceivedEndpoints =
|
||||||
"onResponseReceivedEndpoints");
|
ajaxJson.getArray("onResponseReceivedEndpoints");
|
||||||
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
|
// Prevent ArrayIndexOutOfBoundsException
|
||||||
onResponseReceivedEndpoints.size() - 1);
|
if (onResponseReceivedEndpoints.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
final JsonObject commentsEndpoint =
|
||||||
|
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);
|
||||||
|
|
||||||
final String path;
|
final String path;
|
||||||
|
|
||||||
|
@ -204,18 +220,20 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
final JsonArray contents;
|
final JsonArray contents;
|
||||||
try {
|
try {
|
||||||
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
|
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
// No comments
|
// No comments
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int index = contents.size() - 1;
|
final int index = contents.size() - 1;
|
||||||
if (contents.getObject(index).has("continuationItemRenderer")) {
|
if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
|
||||||
contents.remove(index);
|
contents.remove(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
|
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
|
||||||
|
? "commentThreadRenderer"
|
||||||
|
: "commentRenderer";
|
||||||
|
|
||||||
final List<Object> comments;
|
final List<Object> comments;
|
||||||
try {
|
try {
|
||||||
|
@ -224,24 +242,23 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
throw new ParsingException("Unable to get parse youtube comments", e);
|
throw new ParsingException("Unable to get parse youtube comments", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (final Object c : comments) {
|
final String url = getUrl();
|
||||||
if (c instanceof JsonObject) {
|
comments.stream()
|
||||||
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
|
.filter(JsonObject.class::isInstance)
|
||||||
(JsonObject) c, getUrl(), getTimeAgoParser());
|
.map(JsonObject.class::cast)
|
||||||
collector.commit(extractor);
|
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
|
||||||
}
|
.forEach(collector::commit);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFetchPage(@Nonnull final Downloader downloader)
|
public void onFetchPage(@Nonnull final Downloader downloader)
|
||||||
throws IOException, ExtractionException {
|
throws IOException, ExtractionException {
|
||||||
final Localization localization = getExtractorLocalization();
|
final Localization localization = getExtractorLocalization();
|
||||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
final byte[] body = JsonWriter.string(
|
||||||
getExtractorContentCountry())
|
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||||
.value("videoId", getId())
|
.value("videoId", getId())
|
||||||
.done())
|
.done())
|
||||||
.getBytes(UTF_8);
|
.getBytes(StandardCharsets.UTF_8);
|
||||||
|
|
||||||
nextResponse = getJsonPostResponse("next", body, localization);
|
nextResponse = getJsonPostResponse("next", body, localization);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue