From 981aee40927ae03442f16856185ac5726ea836c8 Mon Sep 17 00:00:00 2001 From: FireMasterK <20838718+FireMasterK@users.noreply.github.com> Date: Tue, 8 Feb 2022 10:44:55 +0000 Subject: [PATCH 1/8] Add support to extract total comment count. --- .../extractor/comments/CommentsExtractor.java | 7 +++++ .../extractor/comments/CommentsInfo.java | 26 +++++++++++++++++++ .../extractors/YoutubeCommentsExtractor.java | 23 +++++++++++++--- .../youtube/YoutubeCommentsExtractorTest.java | 7 +++++ 4 files changed, 59 insertions(+), 4 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java index ac4792fc..5f34ffbf 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor return false; } + /** + * @return total number of comments. + */ + public int getCommentsCount() throws ExtractionException { + return -1; + } + @Nonnull @Override public String getName() throws ParsingException { diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 98ec136e..73dfa8ab 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo { ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); commentsInfo.setRelatedItems(initialCommentsPage.getItems()); + try { + commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount()); + } catch (Exception e) { + commentsInfo.addError(e); + } commentsInfo.setNextPage(initialCommentsPage.getNextPage()); return commentsInfo; @@ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo { private transient CommentsExtractor commentsExtractor; private boolean commentsDisabled = false; + private int commentsCount; public CommentsExtractor getCommentsExtractor() { return commentsExtractor; @@ -86,6 +92,7 @@ public final class CommentsInfo extends ListInfo { } /** + * @return true if the comments are disabled otherwise false (default) * @apiNote Warning: This method is experimental and may get removed in a future release. * @return {@code true} if the comments are disabled otherwise {@code false} (default) * @see CommentsExtractor#isCommentsDisabled() @@ -95,10 +102,29 @@ public final class CommentsInfo extends ListInfo { } /** + * @param commentsDisabled true if the comments are disabled otherwise false * @apiNote Warning: This method is experimental and may get removed in a future release. * @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false} */ public void setCommentsDisabled(final boolean commentsDisabled) { this.commentsDisabled = commentsDisabled; } + + /** + * Returns the total number of comments. + * + * @return totalComments + */ + public int getCommentsCount() { + return commentsCount; + } + + /** + * Sets the total number of comments. + * + * @param commentsCount + */ + public void setCommentsCount(int commentsCount) { + this.commentsCount = commentsCount; + } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index e2481952..6d0c38cb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,6 +1,7 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; @@ -28,6 +29,7 @@ import org.schabi.newpipe.extractor.utils.JsonUtils; import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonWriter; +import org.schabi.newpipe.extractor.utils.Utils; public class YoutubeCommentsExtractor extends CommentsExtractor { @@ -44,6 +46,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { */ @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private Optional optCommentsDisabled = Optional.empty(); + private JsonObject ajaxJson; public YoutubeCommentsExtractor( final StreamingService service, @@ -187,16 +190,15 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { .done()) .getBytes(StandardCharsets.UTF_8); - final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); + this.ajaxJson = getJsonPostResponse("next", body, localization); final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( getServiceId()); - collectCommentsFrom(collector, ajaxJson); + collectCommentsFrom(collector); return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); } - private void collectCommentsFrom(final CommentsInfoItemsCollector collector, - @Nonnull final JsonObject ajaxJson) throws ParsingException { + private void collectCommentsFrom(final CommentsInfoItemsCollector collector) throws ParsingException { final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray("onResponseReceivedEndpoints"); @@ -274,4 +276,17 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return optCommentsDisabled.get(); } + + @Override + public int getCommentsCount() throws ExtractionException { + final JsonObject commentsHeaderRenderer = ajaxJson + .getArray("onResponseReceivedEndpoints").getObject(0) + .getObject("reloadContinuationItemsCommand") + .getArray("continuationItems").getObject(0) + .getObject("commentsHeaderRenderer"); + + final String text = getTextFromObject(commentsHeaderRenderer.getObject("countText")); + + return Integer.parseInt(Utils.removeNonDigitCharacters(text)); + } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 7f7a8551..951fe958 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest { @Test public void testGetCommentsAllData() throws IOException, ExtractionException { InfoItemsPage comments = extractor.getInitialPage(); + assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); for (CommentsInfoItem c : comments.getItems()) { @@ -344,6 +345,12 @@ public class YoutubeCommentsExtractorTest { assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment"); assertGreater(300, firstComment.getReplyCount()); } + + @Test + public void testCommentsCount() throws IOException, ExtractionException { + extractor.getInitialPage(); // Needs to be called first + assertTrue(extractor.getCommentsCount() > 18800); + } } public static class FormattingTest { From 656b7c1cd922851b9c04d2493e298da6790a2a66 Mon Sep 17 00:00:00 2001 From: FireMasterK <20838718+FireMasterK@users.noreply.github.com> Date: Tue, 8 Feb 2022 12:06:19 +0000 Subject: [PATCH 2/8] Improve method documentation. --- .../org/schabi/newpipe/extractor/comments/CommentsInfo.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 73dfa8ab..7386716c 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -113,7 +113,7 @@ public final class CommentsInfo extends ListInfo { /** * Returns the total number of comments. * - * @return totalComments + * @return commentsCount the total number of comments. */ public int getCommentsCount() { return commentsCount; @@ -122,7 +122,7 @@ public final class CommentsInfo extends ListInfo { /** * Sets the total number of comments. * - * @param commentsCount + * @param commentsCount the commentsCount to set. */ public void setCommentsCount(int commentsCount) { this.commentsCount = commentsCount; From 22f71b010c20936cab9034f77c02afbfe44c1d5f Mon Sep 17 00:00:00 2001 From: FireMasterK <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 10 Feb 2022 15:43:02 +0000 Subject: [PATCH 3/8] Fix for requested changes. --- .../extractors/YoutubeCommentsExtractor.java | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 6d0c38cb..40a22fe5 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -46,6 +46,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { */ @SuppressWarnings("OptionalUsedAsFieldOrParameterType") private Optional optCommentsDisabled = Optional.empty(); + /** + * The second ajax /next response. + */ private JsonObject ajaxJson; public YoutubeCommentsExtractor( @@ -279,14 +282,17 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public int getCommentsCount() throws ExtractionException { - final JsonObject commentsHeaderRenderer = ajaxJson + final JsonObject countText = ajaxJson .getArray("onResponseReceivedEndpoints").getObject(0) .getObject("reloadContinuationItemsCommand") .getArray("continuationItems").getObject(0) - .getObject("commentsHeaderRenderer"); + .getObject("commentsHeaderRenderer") + .getObject("countText"); - final String text = getTextFromObject(commentsHeaderRenderer.getObject("countText")); - - return Integer.parseInt(Utils.removeNonDigitCharacters(text)); + try { + return Integer.parseInt(Utils.removeNonDigitCharacters(getTextFromObject(countText))); + } catch (final Exception e) { + throw new ExtractionException("Unable to get comments count", e); + } } } From 67ef4f4c30678c5309e061f363321506240c5ab5 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 8 Dec 2022 00:41:49 +0000 Subject: [PATCH 4/8] Cleanup and remove optional. --- .../extractor/comments/CommentsInfo.java | 8 +- .../extractors/YoutubeCommentsExtractor.java | 98 +++++++++---------- 2 files changed, 46 insertions(+), 60 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index 7386716c..aac965db 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -50,7 +50,7 @@ public final class CommentsInfo extends ListInfo { commentsInfo.setRelatedItems(initialCommentsPage.getItems()); try { commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount()); - } catch (Exception e) { + } catch (final Exception e) { commentsInfo.addError(e); } commentsInfo.setNextPage(initialCommentsPage.getNextPage()); @@ -92,8 +92,6 @@ public final class CommentsInfo extends ListInfo { } /** - * @return true if the comments are disabled otherwise false (default) - * @apiNote Warning: This method is experimental and may get removed in a future release. * @return {@code true} if the comments are disabled otherwise {@code false} (default) * @see CommentsExtractor#isCommentsDisabled() */ @@ -102,8 +100,6 @@ public final class CommentsInfo extends ListInfo { } /** - * @param commentsDisabled true if the comments are disabled otherwise false - * @apiNote Warning: This method is experimental and may get removed in a future release. * @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false} */ public void setCommentsDisabled(final boolean commentsDisabled) { @@ -124,7 +120,7 @@ public final class CommentsInfo extends ListInfo { * * @param commentsCount the commentsCount to set. */ - public void setCommentsCount(int commentsCount) { + public void setCommentsCount(final int commentsCount) { this.commentsCount = commentsCount; } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 40a22fe5..208f0d00 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -1,19 +1,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; -import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.List; -import java.util.Optional; - -import javax.annotation.Nonnull; -import javax.annotation.Nullable; - +import com.grack.nanojson.JsonArray; +import com.grack.nanojson.JsonObject; +import com.grack.nanojson.JsonWriter; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.comments.CommentsExtractor; @@ -25,27 +14,33 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; import org.schabi.newpipe.extractor.localization.Localization; import org.schabi.newpipe.extractor.utils.JsonUtils; - -import com.grack.nanojson.JsonArray; -import com.grack.nanojson.JsonObject; -import com.grack.nanojson.JsonWriter; import org.schabi.newpipe.extractor.utils.Utils; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; + +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; +import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; + public class YoutubeCommentsExtractor extends CommentsExtractor { - private JsonObject nextResponse; + /** + * The initial request's continuation token. + * Since we need to make two requests to get the comments, + */ + private String initialToken; /** - * Caching mechanism and holder of the commentsDisabled value. - *
- * Initial value = empty -> unknown if comments are disabled or not
- * Some method calls {@link #findInitialCommentsToken()} - * -> value is set
- * If the method or another one that is depending on disabled comments - * is now called again, the method execution can avoid unnecessary calls + * Whether comments are disabled on video. */ - @SuppressWarnings("OptionalUsedAsFieldOrParameterType") - private Optional optCommentsDisabled = Optional.empty(); + private boolean commentsDisabled = true; + /** * The second ajax /next response. */ @@ -63,31 +58,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { throws IOException, ExtractionException { // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized - if (optCommentsDisabled.orElse(false)) { + if (commentsDisabled) { return getInfoItemsPageForDisabledComments(); } - // Get the token - final String commentsToken = findInitialCommentsToken(); - // Check if the comments have been disabled - if (optCommentsDisabled.get()) { - return getInfoItemsPageForDisabledComments(); - } - - return getPage(getNextPage(commentsToken)); + return getPage(getNextPage(this.initialToken)); } /** * Finds the initial comments token and initializes commentsDisabled. *
- * Also sets {@link #optCommentsDisabled}. + * Also sets {@link #commentsDisabled}. * * @return the continuation token or null if none was found */ @Nullable - private String findInitialCommentsToken() throws ExtractionException { + private String findInitialCommentsToken(final JsonObject nextResponse) + throws ExtractionException { final String token = JsonUtils.getArray(nextResponse, - "contents.twoColumnWatchNextResults.results.results.contents") + "contents.twoColumnWatchNextResults.results.results.contents") .stream() // Only use JsonObjects .filter(JsonObject.class::isInstance) @@ -118,7 +107,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { .orElse(null); // The comments are disabled if we couldn't get a token - optCommentsDisabled = Optional.of(token == null); + commentsDisabled = token == null; return token; } @@ -129,9 +118,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { } @Nullable - private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException { + private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException { final JsonArray onResponseReceivedEndpoints = - ajaxJson.getArray("onResponseReceivedEndpoints"); + jsonObject.getArray("onResponseReceivedEndpoints"); // Prevent ArrayIndexOutOfBoundsException if (onResponseReceivedEndpoints.isEmpty()) { @@ -179,19 +168,23 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public InfoItemsPage getPage(final Page page) throws IOException, ExtractionException { - if (optCommentsDisabled.orElse(false)) { + + if (commentsDisabled) { return getInfoItemsPageForDisabledComments(); } + if (page == null || isNullOrEmpty(page.getId())) { throw new IllegalArgumentException("Page doesn't have the continuation."); } final Localization localization = getExtractorLocalization(); + // @formatter:off final byte[] body = JsonWriter.string( prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) .value("continuation", page.getId()) .done()) .getBytes(StandardCharsets.UTF_8); + // @formatter:on this.ajaxJson = getJsonPostResponse("next", body, localization); @@ -201,7 +194,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); } - private void collectCommentsFrom(final CommentsInfoItemsCollector collector) throws ParsingException { + private void collectCommentsFrom(final CommentsInfoItemsCollector collector) + throws ParsingException { final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray("onResponseReceivedEndpoints"); @@ -259,25 +253,21 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException { final Localization localization = getExtractorLocalization(); + // @formatter:off final byte[] body = JsonWriter.string( prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) .value("videoId", getId()) .done()) .getBytes(StandardCharsets.UTF_8); + // @formatter:on - nextResponse = getJsonPostResponse("next", body, localization); + initialToken = findInitialCommentsToken(getJsonPostResponse("next", body, localization)); } @Override - public boolean isCommentsDisabled() throws ExtractionException { - // Check if commentsDisabled has to be initialized - if (!optCommentsDisabled.isPresent()) { - // Initialize commentsDisabled - this.findInitialCommentsToken(); - } - - return optCommentsDisabled.get(); + public boolean isCommentsDisabled() { + return commentsDisabled; } @Override From 64d24aa09ee64f5b6a14e57cd2cb6af292e2cae6 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 8 Dec 2022 11:44:02 +0000 Subject: [PATCH 5/8] Fix request changes. --- .../schabi/newpipe/extractor/comments/CommentsExtractor.java | 2 +- .../org/schabi/newpipe/extractor/comments/CommentsInfo.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java index 5f34ffbf..1402b1d2 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsExtractor.java @@ -23,7 +23,7 @@ public abstract class CommentsExtractor extends ListExtractor } /** - * @return total number of comments. + * @return the total number of comments */ public int getCommentsCount() throws ExtractionException { return -1; diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java index aac965db..f50d6bd9 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfo.java @@ -109,7 +109,7 @@ public final class CommentsInfo extends ListInfo { /** * Returns the total number of comments. * - * @return commentsCount the total number of comments. + * @return the total number of comments */ public int getCommentsCount() { return commentsCount; From 2974dfaa4858e08ce7d2cede005f7fd2c8cbf783 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 8 Dec 2022 11:44:47 +0000 Subject: [PATCH 6/8] Only store ajaxJson for initial page and eager fetch the initial continuation. --- .../extractors/YoutubeCommentsExtractor.java | 63 +++++++++++++------ .../youtube/YoutubeCommentsExtractorTest.java | 1 - 2 files changed, 43 insertions(+), 21 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 208f0d00..92d46118 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -30,17 +30,16 @@ import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; public class YoutubeCommentsExtractor extends CommentsExtractor { - /** - * The initial request's continuation token. - * Since we need to make two requests to get the comments, - */ - private String initialToken; - /** * Whether comments are disabled on video. */ private boolean commentsDisabled = true; + /** + * The total number of comments on video. + */ + private int commentsCount = (int) ITEM_COUNT_UNKNOWN; + /** * The second ajax /next response. */ @@ -62,7 +61,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { return getInfoItemsPageForDisabledComments(); } - return getPage(getNextPage(this.initialToken)); + return extractComments(ajaxJson); } /** @@ -186,12 +185,17 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { .getBytes(StandardCharsets.UTF_8); // @formatter:on - this.ajaxJson = getJsonPostResponse("next", body, localization); + final var jsonObject = getJsonPostResponse("next", body, localization); + return extractComments(jsonObject); + } + + private InfoItemsPage extractComments(final JsonObject jsonObject) + throws ExtractionException { final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( getServiceId()); collectCommentsFrom(collector); - return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); + return new InfoItemsPage<>(collector, getNextPage(jsonObject)); } private void collectCommentsFrom(final CommentsInfoItemsCollector collector) @@ -261,7 +265,18 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { .getBytes(StandardCharsets.UTF_8); // @formatter:on - initialToken = findInitialCommentsToken(getJsonPostResponse("next", body, localization)); + final String initialToken = + findInitialCommentsToken(getJsonPostResponse("next", body, localization)); + + // @formatter:off + final byte[] ajaxBody = JsonWriter.string( + prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) + .value("continuation", initialToken) + .done()) + .getBytes(StandardCharsets.UTF_8); + // @formatter:on + + ajaxJson = getJsonPostResponse("next", ajaxBody, localization); } @@ -272,17 +287,25 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { @Override public int getCommentsCount() throws ExtractionException { - final JsonObject countText = ajaxJson - .getArray("onResponseReceivedEndpoints").getObject(0) - .getObject("reloadContinuationItemsCommand") - .getArray("continuationItems").getObject(0) - .getObject("commentsHeaderRenderer") - .getObject("countText"); + assertPageFetched(); - try { - return Integer.parseInt(Utils.removeNonDigitCharacters(getTextFromObject(countText))); - } catch (final Exception e) { - throw new ExtractionException("Unable to get comments count", e); + if (commentsCount == ITEM_COUNT_UNKNOWN) { + final JsonObject countText = ajaxJson + .getArray("onResponseReceivedEndpoints").getObject(0) + .getObject("reloadContinuationItemsCommand") + .getArray("continuationItems").getObject(0) + .getObject("commentsHeaderRenderer") + .getObject("countText"); + + try { + commentsCount = Integer.parseInt( + Utils.removeNonDigitCharacters(getTextFromObject(countText)) + ); + } catch (final Exception e) { + throw new ExtractionException("Unable to get comments count", e); + } } + + return commentsCount; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 951fe958..b05502a8 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -348,7 +348,6 @@ public class YoutubeCommentsExtractorTest { @Test public void testCommentsCount() throws IOException, ExtractionException { - extractor.getInitialPage(); // Needs to be called first assertTrue(extractor.getCommentsCount() > 18800); } } From 98a90fd9c8fa02c11bb2a8a04cd2b5cf3b7c3028 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Thu, 8 Dec 2022 12:10:33 +0000 Subject: [PATCH 7/8] Don't cache comments count and return early on page fetch if no token. --- .../extractors/YoutubeCommentsExtractor.java | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 92d46118..176317a3 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -33,12 +33,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { /** * Whether comments are disabled on video. */ - private boolean commentsDisabled = true; - - /** - * The total number of comments on video. - */ - private int commentsCount = (int) ITEM_COUNT_UNKNOWN; + private boolean commentsDisabled; /** * The second ajax /next response. @@ -268,6 +263,10 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { final String initialToken = findInitialCommentsToken(getJsonPostResponse("next", body, localization)); + if (initialToken == null) { + return; + } + // @formatter:off final byte[] ajaxBody = JsonWriter.string( prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) @@ -289,23 +288,23 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public int getCommentsCount() throws ExtractionException { assertPageFetched(); - if (commentsCount == ITEM_COUNT_UNKNOWN) { - final JsonObject countText = ajaxJson - .getArray("onResponseReceivedEndpoints").getObject(0) - .getObject("reloadContinuationItemsCommand") - .getArray("continuationItems").getObject(0) - .getObject("commentsHeaderRenderer") - .getObject("countText"); - - try { - commentsCount = Integer.parseInt( - Utils.removeNonDigitCharacters(getTextFromObject(countText)) - ); - } catch (final Exception e) { - throw new ExtractionException("Unable to get comments count", e); - } + if (commentsDisabled) { + return -1; } - return commentsCount; + final JsonObject countText = ajaxJson + .getArray("onResponseReceivedEndpoints").getObject(0) + .getObject("reloadContinuationItemsCommand") + .getArray("continuationItems").getObject(0) + .getObject("commentsHeaderRenderer") + .getObject("countText"); + + try { + return Integer.parseInt( + Utils.removeNonDigitCharacters(getTextFromObject(countText)) + ); + } catch (final Exception e) { + throw new ExtractionException("Unable to get comments count", e); + } } } From 22a47da8c7ca9bd62ea3e2a2c1e00491f8ddbca4 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Mon, 2 Jan 2023 20:42:32 +0000 Subject: [PATCH 8/8] Fix requested change and remove outdated comment. --- .../services/youtube/extractors/YoutubeCommentsExtractor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java index 176317a3..4581afcb 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsExtractor.java @@ -51,7 +51,6 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { public InfoItemsPage getInitialPage() throws IOException, ExtractionException { - // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized if (commentsDisabled) { return getInfoItemsPageForDisabledComments(); }