Merge pull request #794 from FireMasterK/comments-count

[YouTube] Add support to extract total comment count
This commit is contained in:
Stypox 2023-01-11 15:32:19 +01:00 committed by GitHub
commit c1040bccac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 126 additions and 59 deletions

View File

@ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem>
return false; return false;
} }
/**
* @return the total number of comments
*/
public int getCommentsCount() throws ExtractionException {
return -1;
}
@Nonnull @Nonnull
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {

View File

@ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
commentsInfo.setRelatedItems(initialCommentsPage.getItems()); commentsInfo.setRelatedItems(initialCommentsPage.getItems());
try {
commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount());
} catch (final Exception e) {
commentsInfo.addError(e);
}
commentsInfo.setNextPage(initialCommentsPage.getNextPage()); commentsInfo.setNextPage(initialCommentsPage.getNextPage());
return commentsInfo; return commentsInfo;
@ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
private transient CommentsExtractor commentsExtractor; private transient CommentsExtractor commentsExtractor;
private boolean commentsDisabled = false; private boolean commentsDisabled = false;
private int commentsCount;
public CommentsExtractor getCommentsExtractor() { public CommentsExtractor getCommentsExtractor() {
return commentsExtractor; return commentsExtractor;
@ -86,7 +92,6 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
} }
/** /**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return {@code true} if the comments are disabled otherwise {@code false} (default) * @return {@code true} if the comments are disabled otherwise {@code false} (default)
* @see CommentsExtractor#isCommentsDisabled() * @see CommentsExtractor#isCommentsDisabled()
*/ */
@ -95,10 +100,27 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> {
} }
/** /**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false} * @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
*/ */
public void setCommentsDisabled(final boolean commentsDisabled) { public void setCommentsDisabled(final boolean commentsDisabled) {
this.commentsDisabled = commentsDisabled; this.commentsDisabled = commentsDisabled;
} }
/**
* Returns the total number of comments.
*
* @return the total number of comments
*/
public int getCommentsCount() {
return commentsCount;
}
/**
* Sets the total number of comments.
*
* @param commentsCount the commentsCount to set.
*/
public void setCommentsCount(final int commentsCount) {
this.commentsCount = commentsCount;
}
} }

View File

@ -1,18 +1,8 @@
package org.schabi.newpipe.extractor.services.youtube.extractors; package org.schabi.newpipe.extractor.services.youtube.extractors;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; import com.grack.nanojson.JsonArray;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import com.grack.nanojson.JsonObject;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import com.grack.nanojson.JsonWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor; import org.schabi.newpipe.extractor.comments.CommentsExtractor;
@ -24,26 +14,31 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;
import com.grack.nanojson.JsonArray; import javax.annotation.Nonnull;
import com.grack.nanojson.JsonObject; import javax.annotation.Nullable;
import com.grack.nanojson.JsonWriter; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
public class YoutubeCommentsExtractor extends CommentsExtractor { public class YoutubeCommentsExtractor extends CommentsExtractor {
private JsonObject nextResponse; /**
* Whether comments are disabled on video.
*/
private boolean commentsDisabled;
/** /**
* Caching mechanism and holder of the commentsDisabled value. * The second ajax <b>/next</b> response.
* <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link #findInitialCommentsToken()}
* -> value is set<br/>
* If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls
*/ */
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") private JsonObject ajaxJson;
private Optional<Boolean> optCommentsDisabled = Optional.empty();
public YoutubeCommentsExtractor( public YoutubeCommentsExtractor(
final StreamingService service, final StreamingService service,
@ -56,32 +51,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
public InfoItemsPage<CommentsInfoItem> getInitialPage() public InfoItemsPage<CommentsInfoItem> getInitialPage()
throws IOException, ExtractionException { throws IOException, ExtractionException {
// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized if (commentsDisabled) {
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments(); return getInfoItemsPageForDisabledComments();
} }
// Get the token return extractComments(ajaxJson);
final String commentsToken = findInitialCommentsToken();
// Check if the comments have been disabled
if (optCommentsDisabled.get()) {
return getInfoItemsPageForDisabledComments();
}
return getPage(getNextPage(commentsToken));
} }
/** /**
* Finds the initial comments token and initializes commentsDisabled. * Finds the initial comments token and initializes commentsDisabled.
* <br/> * <br/>
* Also sets {@link #optCommentsDisabled}. * Also sets {@link #commentsDisabled}.
* *
* @return the continuation token or null if none was found * @return the continuation token or null if none was found
*/ */
@Nullable @Nullable
private String findInitialCommentsToken() throws ExtractionException { private String findInitialCommentsToken(final JsonObject nextResponse)
throws ExtractionException {
final String token = JsonUtils.getArray(nextResponse, final String token = JsonUtils.getArray(nextResponse,
"contents.twoColumnWatchNextResults.results.results.contents") "contents.twoColumnWatchNextResults.results.results.contents")
.stream() .stream()
// Only use JsonObjects // Only use JsonObjects
.filter(JsonObject.class::isInstance) .filter(JsonObject.class::isInstance)
@ -112,7 +100,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
.orElse(null); .orElse(null);
// The comments are disabled if we couldn't get a token // The comments are disabled if we couldn't get a token
optCommentsDisabled = Optional.of(token == null); commentsDisabled = token == null;
return token; return token;
} }
@ -123,9 +111,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
} }
@Nullable @Nullable
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException { private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
final JsonArray onResponseReceivedEndpoints = final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints"); jsonObject.getArray("onResponseReceivedEndpoints");
// Prevent ArrayIndexOutOfBoundsException // Prevent ArrayIndexOutOfBoundsException
if (onResponseReceivedEndpoints.isEmpty()) { if (onResponseReceivedEndpoints.isEmpty()) {
@ -173,30 +161,39 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
@Override @Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws IOException, ExtractionException { throws IOException, ExtractionException {
if (optCommentsDisabled.orElse(false)) {
if (commentsDisabled) {
return getInfoItemsPageForDisabledComments(); return getInfoItemsPageForDisabledComments();
} }
if (page == null || isNullOrEmpty(page.getId())) { if (page == null || isNullOrEmpty(page.getId())) {
throw new IllegalArgumentException("Page doesn't have the continuation."); throw new IllegalArgumentException("Page doesn't have the continuation.");
} }
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
// @formatter:off
final byte[] body = JsonWriter.string( final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", page.getId()) .value("continuation", page.getId())
.done()) .done())
.getBytes(StandardCharsets.UTF_8); .getBytes(StandardCharsets.UTF_8);
// @formatter:on
final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); final var jsonObject = getJsonPostResponse("next", body, localization);
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( return extractComments(jsonObject);
getServiceId());
collectCommentsFrom(collector, ajaxJson);
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
} }
private void collectCommentsFrom(final CommentsInfoItemsCollector collector, private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
@Nonnull final JsonObject ajaxJson) throws ParsingException { throws ExtractionException {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());
collectCommentsFrom(collector);
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
}
private void collectCommentsFrom(final CommentsInfoItemsCollector collector)
throws ParsingException {
final JsonArray onResponseReceivedEndpoints = final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints"); ajaxJson.getArray("onResponseReceivedEndpoints");
@ -254,24 +251,59 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
public void onFetchPage(@Nonnull final Downloader downloader) public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException { throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
// @formatter:off
final byte[] body = JsonWriter.string( final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("videoId", getId()) .value("videoId", getId())
.done()) .done())
.getBytes(StandardCharsets.UTF_8); .getBytes(StandardCharsets.UTF_8);
// @formatter:on
nextResponse = getJsonPostResponse("next", body, localization); final String initialToken =
findInitialCommentsToken(getJsonPostResponse("next", body, localization));
if (initialToken == null) {
return;
}
// @formatter:off
final byte[] ajaxBody = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", initialToken)
.done())
.getBytes(StandardCharsets.UTF_8);
// @formatter:on
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
} }
@Override @Override
public boolean isCommentsDisabled() throws ExtractionException { public boolean isCommentsDisabled() {
// Check if commentsDisabled has to be initialized return commentsDisabled;
if (!optCommentsDisabled.isPresent()) { }
// Initialize commentsDisabled
this.findInitialCommentsToken(); @Override
public int getCommentsCount() throws ExtractionException {
assertPageFetched();
if (commentsDisabled) {
return -1;
} }
return optCommentsDisabled.get(); final JsonObject countText = ajaxJson
.getArray("onResponseReceivedEndpoints").getObject(0)
.getObject("reloadContinuationItemsCommand")
.getArray("continuationItems").getObject(0)
.getObject("commentsHeaderRenderer")
.getObject("countText");
try {
return Integer.parseInt(
Utils.removeNonDigitCharacters(getTextFromObject(countText))
);
} catch (final Exception e) {
throw new ExtractionException("Unable to get comments count", e);
}
} }
} }

View File

@ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest {
@Test @Test
public void testGetCommentsAllData() throws IOException, ExtractionException { public void testGetCommentsAllData() throws IOException, ExtractionException {
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) { for (CommentsInfoItem c : comments.getItems()) {
@ -344,6 +345,11 @@ public class YoutubeCommentsExtractorTest {
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment"); assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
assertGreater(300, firstComment.getReplyCount()); assertGreater(300, firstComment.getReplyCount());
} }
@Test
public void testCommentsCount() throws IOException, ExtractionException {
assertTrue(extractor.getCommentsCount() > 18800);
}
} }
public static class FormattingTest { public static class FormattingTest {