Merge pull request #794 from FireMasterK/comments-count
[YouTube] Add support to extract total comment count
This commit is contained in:
commit
c1040bccac
4 changed files with 126 additions and 59 deletions
|
@ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem>
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the total number of comments
|
||||
*/
|
||||
public int getCommentsCount() throws ExtractionException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
|
|
|
@ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
|||
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
|
||||
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
|
||||
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
|
||||
try {
|
||||
commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount());
|
||||
} catch (final Exception e) {
|
||||
commentsInfo.addError(e);
|
||||
}
|
||||
commentsInfo.setNextPage(initialCommentsPage.getNextPage());
|
||||
|
||||
return commentsInfo;
|
||||
|
@ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
|||
|
||||
private transient CommentsExtractor commentsExtractor;
|
||||
private boolean commentsDisabled = false;
|
||||
private int commentsCount;
|
||||
|
||||
public CommentsExtractor getCommentsExtractor() {
|
||||
return commentsExtractor;
|
||||
|
@ -86,7 +92,6 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
|||
}
|
||||
|
||||
/**
|
||||
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||
* @return {@code true} if the comments are disabled otherwise {@code false} (default)
|
||||
* @see CommentsExtractor#isCommentsDisabled()
|
||||
*/
|
||||
|
@ -95,10 +100,27 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
|
|||
}
|
||||
|
||||
/**
|
||||
* @apiNote Warning: This method is experimental and may get removed in a future release.
|
||||
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
|
||||
*/
|
||||
public void setCommentsDisabled(final boolean commentsDisabled) {
|
||||
this.commentsDisabled = commentsDisabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of comments.
|
||||
*
|
||||
* @return the total number of comments
|
||||
*/
|
||||
public int getCommentsCount() {
|
||||
return commentsCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the total number of comments.
|
||||
*
|
||||
* @param commentsCount the commentsCount to set.
|
||||
*/
|
||||
public void setCommentsCount(final int commentsCount) {
|
||||
this.commentsCount = commentsCount;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,18 +1,8 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonWriter;
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
|
||||
|
@ -24,26 +14,31 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.localization.Localization;
|
||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonWriter;
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
|
||||
public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||
|
||||
private JsonObject nextResponse;
|
||||
/**
|
||||
* Whether comments are disabled on video.
|
||||
*/
|
||||
private boolean commentsDisabled;
|
||||
|
||||
/**
|
||||
* Caching mechanism and holder of the commentsDisabled value.
|
||||
* <br/>
|
||||
* Initial value = empty -> unknown if comments are disabled or not<br/>
|
||||
* Some method calls {@link #findInitialCommentsToken()}
|
||||
* -> value is set<br/>
|
||||
* If the method or another one that is depending on disabled comments
|
||||
* is now called again, the method execution can avoid unnecessary calls
|
||||
* The second ajax <b>/next</b> response.
|
||||
*/
|
||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||
private Optional<Boolean> optCommentsDisabled = Optional.empty();
|
||||
private JsonObject ajaxJson;
|
||||
|
||||
public YoutubeCommentsExtractor(
|
||||
final StreamingService service,
|
||||
|
@ -56,32 +51,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
public InfoItemsPage<CommentsInfoItem> getInitialPage()
|
||||
throws IOException, ExtractionException {
|
||||
|
||||
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
|
||||
if (optCommentsDisabled.orElse(false)) {
|
||||
if (commentsDisabled) {
|
||||
return getInfoItemsPageForDisabledComments();
|
||||
}
|
||||
|
||||
// Get the token
|
||||
final String commentsToken = findInitialCommentsToken();
|
||||
// Check if the comments have been disabled
|
||||
if (optCommentsDisabled.get()) {
|
||||
return getInfoItemsPageForDisabledComments();
|
||||
}
|
||||
|
||||
return getPage(getNextPage(commentsToken));
|
||||
return extractComments(ajaxJson);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the initial comments token and initializes commentsDisabled.
|
||||
* <br/>
|
||||
* Also sets {@link #optCommentsDisabled}.
|
||||
* Also sets {@link #commentsDisabled}.
|
||||
*
|
||||
* @return the continuation token or null if none was found
|
||||
*/
|
||||
@Nullable
|
||||
private String findInitialCommentsToken() throws ExtractionException {
|
||||
private String findInitialCommentsToken(final JsonObject nextResponse)
|
||||
throws ExtractionException {
|
||||
final String token = JsonUtils.getArray(nextResponse,
|
||||
"contents.twoColumnWatchNextResults.results.results.contents")
|
||||
"contents.twoColumnWatchNextResults.results.results.contents")
|
||||
.stream()
|
||||
// Only use JsonObjects
|
||||
.filter(JsonObject.class::isInstance)
|
||||
|
@ -112,7 +100,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
.orElse(null);
|
||||
|
||||
// The comments are disabled if we couldn't get a token
|
||||
optCommentsDisabled = Optional.of(token == null);
|
||||
commentsDisabled = token == null;
|
||||
|
||||
return token;
|
||||
}
|
||||
|
@ -123,9 +111,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
}
|
||||
|
||||
@Nullable
|
||||
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
|
||||
private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
|
||||
final JsonArray onResponseReceivedEndpoints =
|
||||
ajaxJson.getArray("onResponseReceivedEndpoints");
|
||||
jsonObject.getArray("onResponseReceivedEndpoints");
|
||||
|
||||
// Prevent ArrayIndexOutOfBoundsException
|
||||
if (onResponseReceivedEndpoints.isEmpty()) {
|
||||
|
@ -173,30 +161,39 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
@Override
|
||||
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
|
||||
throws IOException, ExtractionException {
|
||||
if (optCommentsDisabled.orElse(false)) {
|
||||
|
||||
if (commentsDisabled) {
|
||||
return getInfoItemsPageForDisabledComments();
|
||||
}
|
||||
|
||||
if (page == null || isNullOrEmpty(page.getId())) {
|
||||
throw new IllegalArgumentException("Page doesn't have the continuation.");
|
||||
}
|
||||
|
||||
final Localization localization = getExtractorLocalization();
|
||||
// @formatter:off
|
||||
final byte[] body = JsonWriter.string(
|
||||
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||
.value("continuation", page.getId())
|
||||
.done())
|
||||
.getBytes(StandardCharsets.UTF_8);
|
||||
// @formatter:on
|
||||
|
||||
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
|
||||
final var jsonObject = getJsonPostResponse("next", body, localization);
|
||||
|
||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||
getServiceId());
|
||||
collectCommentsFrom(collector, ajaxJson);
|
||||
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
||||
return extractComments(jsonObject);
|
||||
}
|
||||
|
||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
|
||||
@Nonnull final JsonObject ajaxJson) throws ParsingException {
|
||||
private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
|
||||
throws ExtractionException {
|
||||
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
|
||||
getServiceId());
|
||||
collectCommentsFrom(collector);
|
||||
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
|
||||
}
|
||||
|
||||
private void collectCommentsFrom(final CommentsInfoItemsCollector collector)
|
||||
throws ParsingException {
|
||||
|
||||
final JsonArray onResponseReceivedEndpoints =
|
||||
ajaxJson.getArray("onResponseReceivedEndpoints");
|
||||
|
@ -254,24 +251,59 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
public void onFetchPage(@Nonnull final Downloader downloader)
|
||||
throws IOException, ExtractionException {
|
||||
final Localization localization = getExtractorLocalization();
|
||||
// @formatter:off
|
||||
final byte[] body = JsonWriter.string(
|
||||
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||
.value("videoId", getId())
|
||||
.done())
|
||||
.getBytes(StandardCharsets.UTF_8);
|
||||
// @formatter:on
|
||||
|
||||
nextResponse = getJsonPostResponse("next", body, localization);
|
||||
final String initialToken =
|
||||
findInitialCommentsToken(getJsonPostResponse("next", body, localization));
|
||||
|
||||
if (initialToken == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
// @formatter:off
|
||||
final byte[] ajaxBody = JsonWriter.string(
|
||||
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
|
||||
.value("continuation", initialToken)
|
||||
.done())
|
||||
.getBytes(StandardCharsets.UTF_8);
|
||||
// @formatter:on
|
||||
|
||||
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isCommentsDisabled() throws ExtractionException {
|
||||
// Check if commentsDisabled has to be initialized
|
||||
if (!optCommentsDisabled.isPresent()) {
|
||||
// Initialize commentsDisabled
|
||||
this.findInitialCommentsToken();
|
||||
public boolean isCommentsDisabled() {
|
||||
return commentsDisabled;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCommentsCount() throws ExtractionException {
|
||||
assertPageFetched();
|
||||
|
||||
if (commentsDisabled) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return optCommentsDisabled.get();
|
||||
final JsonObject countText = ajaxJson
|
||||
.getArray("onResponseReceivedEndpoints").getObject(0)
|
||||
.getObject("reloadContinuationItemsCommand")
|
||||
.getArray("continuationItems").getObject(0)
|
||||
.getObject("commentsHeaderRenderer")
|
||||
.getObject("countText");
|
||||
|
||||
try {
|
||||
return Integer.parseInt(
|
||||
Utils.removeNonDigitCharacters(getTextFromObject(countText))
|
||||
);
|
||||
} catch (final Exception e) {
|
||||
throw new ExtractionException("Unable to get comments count", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest {
|
|||
@Test
|
||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments
|
||||
|
||||
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||
for (CommentsInfoItem c : comments.getItems()) {
|
||||
|
@ -344,6 +345,11 @@ public class YoutubeCommentsExtractorTest {
|
|||
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
|
||||
assertGreater(300, firstComment.getReplyCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCommentsCount() throws IOException, ExtractionException {
|
||||
assertTrue(extractor.getCommentsCount() > 18800);
|
||||
}
|
||||
}
|
||||
|
||||
public static class FormattingTest {
|
||||
|
|
Loading…
Reference in a new issue