diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java index ac4792fc..1402b1d2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor return false; } + /** + * @return the total number of comments + */ + public int getCommentsCount() throws ExtractionException { + return -1; + } + @Nonnull @Override public String getName() throws ParsingException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 98ec136e..f50d6bd9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo { ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); commentsInfo.setRelatedItems(initialCommentsPage.getItems()); + try { + commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount()); + } catch (final Exception e) { + commentsInfo.addError(e); + } commentsInfo.setNextPage(initialCommentsPage.getNextPage()); return commentsInfo; @@ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo { private transient CommentsExtractor commentsExtractor; private boolean commentsDisabled = false; + private int commentsCount; public CommentsExtractor getCommentsExtractor() { return commentsExtractor; @@ -86,7 +92,6 @@ public final class CommentsInfo extends ListInfo { } /** - * @apiNote Warning: This method is experimental and may get removed in a future release. * @return {@code true} if the comments are disabled otherwise {@code false} (default) * @see CommentsExtractor#isCommentsDisabled() */ @@ -95,10 +100,27 @@ public final class CommentsInfo extends ListInfo { } /** - * @apiNote Warning: This method is experimental and may get removed in a future release. * @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false} */ public void setCommentsDisabled(final boolean commentsDisabled) { this.commentsDisabled = commentsDisabled; } + + /** + * Returns the total number of comments. + * + * @return the total number of comments + */ + public int getCommentsCount() { + return commentsCount; + } + + /** + * Sets the total number of comments. + * + * @param commentsCount the commentsCount to set. + */ + public void setCommentsCount(final int commentsCount) { + this.commentsCount = commentsCount; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index e2481952..4581afcb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,18 +1,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; -import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import java.util.Optional; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonWriter; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; @@ -24,26 +14,31 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.utils.JsonUtils; +import org.schabi.newpipe.extractor.utils.Utils; -import com.grack.nanojson.JsonArray; -import com.grack.nanojson.JsonObject; -import com.grack.nanojson.JsonWriter; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class YoutubeCommentsExtractor extends CommentsExtractor { - private JsonObject nextResponse; + /** + * Whether comments are disabled on video. + */ + private boolean commentsDisabled; /** - * Caching mechanism and holder of the commentsDisabled value. - *
- * Initial value = empty -> unknown if comments are disabled or not
- * Some method calls {@link #findInitialCommentsToken()} - * -> value is set
- * If the method or another one that is depending on disabled comments - * is now called again, the method execution can avoid unnecessary calls + * The second ajax /next response. */ - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private Optional optCommentsDisabled = Optional.empty(); + private JsonObject ajaxJson; public YoutubeCommentsExtractor( final StreamingService service, @@ -56,32 +51,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized - if (optCommentsDisabled.orElse(false)) { + if (commentsDisabled) { return getInfoItemsPageForDisabledComments(); } - // Get the token - final String commentsToken = findInitialCommentsToken(); - // Check if the comments have been disabled - if (optCommentsDisabled.get()) { - return getInfoItemsPageForDisabledComments(); - } - - return getPage(getNextPage(commentsToken)); + return extractComments(ajaxJson); } /** * Finds the initial comments token and initializes commentsDisabled. *
- * Also sets {@link #optCommentsDisabled}. + * Also sets {@link #commentsDisabled}. * * @return the continuation token or null if none was found */ @Nullable - private String findInitialCommentsToken() throws ExtractionException { + private String findInitialCommentsToken(final JsonObject nextResponse) + throws ExtractionException { final String token = JsonUtils.getArray(nextResponse, - "contents.twoColumnWatchNextResults.results.results.contents") + "contents.twoColumnWatchNextResults.results.results.contents") .stream() // Only use JsonObjects .filter(JsonObject.class::isInstance) @@ -112,7 +100,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { .orElse(null); // The comments are disabled if we couldn't get a token - optCommentsDisabled = Optional.of(token == null); + commentsDisabled = token == null; return token; } @@ -123,9 +111,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } @Nullable - private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException { + private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException { final JsonArray onResponseReceivedEndpoints = - ajaxJson.getArray("onResponseReceivedEndpoints"); + jsonObject.getArray("onResponseReceivedEndpoints"); // Prevent ArrayIndexOutOfBoundsException if (onResponseReceivedEndpoints.isEmpty()) { @@ -173,30 +161,39 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public InfoItemsPage getPage(final Page page) throws IOException, ExtractionException { - if (optCommentsDisabled.orElse(false)) { + + if (commentsDisabled) { return getInfoItemsPageForDisabledComments(); } + if (page == null || isNullOrEmpty(page.getId())) { throw new IllegalArgumentException("Page doesn't have the continuation."); } final Localization localization = getExtractorLocalization(); + // @formatter:off final byte[] body = JsonWriter.string( prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) .value("continuation", page.getId()) .done()) .getBytes(StandardCharsets.UTF_8); + // @formatter:on - final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); + final var jsonObject = getJsonPostResponse("next", body, localization); - final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( - getServiceId()); - collectCommentsFrom(collector, ajaxJson); - return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); + return extractComments(jsonObject); } - private void collectCommentsFrom(final CommentsInfoItemsCollector collector, - @Nonnull final JsonObject ajaxJson) throws ParsingException { + private InfoItemsPage extractComments(final JsonObject jsonObject) + throws ExtractionException { + final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( + getServiceId()); + collectCommentsFrom(collector); + return new InfoItemsPage<>(collector, getNextPage(jsonObject)); + } + + private void collectCommentsFrom(final CommentsInfoItemsCollector collector) + throws ParsingException { final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray("onResponseReceivedEndpoints"); @@ -254,24 +251,59 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException { final Localization localization = getExtractorLocalization(); + // @formatter:off final byte[] body = JsonWriter.string( prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) .value("videoId", getId()) .done()) .getBytes(StandardCharsets.UTF_8); + // @formatter:on - nextResponse = getJsonPostResponse("next", body, localization); + final String initialToken = + findInitialCommentsToken(getJsonPostResponse("next", body, localization)); + + if (initialToken == null) { + return; + } + + // @formatter:off + final byte[] ajaxBody = JsonWriter.string( + prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) + .value("continuation", initialToken) + .done()) + .getBytes(StandardCharsets.UTF_8); + // @formatter:on + + ajaxJson = getJsonPostResponse("next", ajaxBody, localization); } @Override - public boolean isCommentsDisabled() throws ExtractionException { - // Check if commentsDisabled has to be initialized - if (!optCommentsDisabled.isPresent()) { - // Initialize commentsDisabled - this.findInitialCommentsToken(); + public boolean isCommentsDisabled() { + return commentsDisabled; + } + + @Override + public int getCommentsCount() throws ExtractionException { + assertPageFetched(); + + if (commentsDisabled) { + return -1; } - return optCommentsDisabled.get(); + final JsonObject countText = ajaxJson + .getArray("onResponseReceivedEndpoints").getObject(0) + .getObject("reloadContinuationItemsCommand") + .getArray("continuationItems").getObject(0) + .getObject("commentsHeaderRenderer") + .getObject("countText"); + + try { + return Integer.parseInt( + Utils.removeNonDigitCharacters(getTextFromObject(countText)) + ); + } catch (final Exception e) { + throw new ExtractionException("Unable to get comments count", e); + } } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 7f7a8551..b05502a8 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest { @Test public void testGetCommentsAllData() throws IOException, ExtractionException { InfoItemsPage comments = extractor.getInitialPage(); + assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); for (CommentsInfoItem c : comments.getItems()) { @@ -344,6 +345,11 @@ public class YoutubeCommentsExtractorTest { assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment"); assertGreater(300, firstComment.getReplyCount()); } + + @Test + public void testCommentsCount() throws IOException, ExtractionException { + assertTrue(extractor.getCommentsCount() > 18800); + } } public static class FormattingTest {