Merge pull request #794 from FireMasterK/comments-count
[YouTube] Add support to extract total comment count
This commit is contained in:
		
						commit
						c1040bccac
					
				
					 4 changed files with 126 additions and 59 deletions
				
			
		|  | @ -22,6 +22,13 @@ public abstract class CommentsExtractor extends ListExtractor<CommentsInfoItem> | |||
|         return false; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @return the total number of comments | ||||
|      */ | ||||
|     public int getCommentsCount() throws ExtractionException { | ||||
|         return -1; | ||||
|     } | ||||
| 
 | ||||
|     @Nonnull | ||||
|     @Override | ||||
|     public String getName() throws ParsingException { | ||||
|  |  | |||
|  | @ -48,6 +48,11 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> { | |||
|                 ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor); | ||||
|         commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled()); | ||||
|         commentsInfo.setRelatedItems(initialCommentsPage.getItems()); | ||||
|         try { | ||||
|             commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount()); | ||||
|         } catch (final Exception e) { | ||||
|             commentsInfo.addError(e); | ||||
|         } | ||||
|         commentsInfo.setNextPage(initialCommentsPage.getNextPage()); | ||||
| 
 | ||||
|         return commentsInfo; | ||||
|  | @ -76,6 +81,7 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> { | |||
| 
 | ||||
|     private transient CommentsExtractor commentsExtractor; | ||||
|     private boolean commentsDisabled = false; | ||||
|     private int commentsCount; | ||||
| 
 | ||||
|     public CommentsExtractor getCommentsExtractor() { | ||||
|         return commentsExtractor; | ||||
|  | @ -86,7 +92,6 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> { | |||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @apiNote Warning: This method is experimental and may get removed in a future release. | ||||
|      * @return {@code true} if the comments are disabled otherwise {@code false} (default) | ||||
|      * @see CommentsExtractor#isCommentsDisabled() | ||||
|      */ | ||||
|  | @ -95,10 +100,27 @@ public final class CommentsInfo extends ListInfo<CommentsInfoItem> { | |||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * @apiNote Warning: This method is experimental and may get removed in a future release. | ||||
|      * @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false} | ||||
|      */ | ||||
|     public void setCommentsDisabled(final boolean commentsDisabled) { | ||||
|         this.commentsDisabled = commentsDisabled; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Returns the total number of comments. | ||||
|      * | ||||
|      * @return the total number of comments | ||||
|      */ | ||||
|     public int getCommentsCount() { | ||||
|         return commentsCount; | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Sets the total number of comments. | ||||
|      * | ||||
|      * @param commentsCount the commentsCount to set. | ||||
|      */ | ||||
|     public void setCommentsCount(final int commentsCount) { | ||||
|         this.commentsCount = commentsCount; | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -1,18 +1,8 @@ | |||
| package org.schabi.newpipe.extractor.services.youtube.extractors; | ||||
| 
 | ||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; | ||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; | ||||
| import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.nio.charset.StandardCharsets; | ||||
| import java.util.Collections; | ||||
| import java.util.List; | ||||
| import java.util.Optional; | ||||
| 
 | ||||
| import javax.annotation.Nonnull; | ||||
| import javax.annotation.Nullable; | ||||
| 
 | ||||
| import com.grack.nanojson.JsonArray; | ||||
| import com.grack.nanojson.JsonObject; | ||||
| import com.grack.nanojson.JsonWriter; | ||||
| import org.schabi.newpipe.extractor.Page; | ||||
| import org.schabi.newpipe.extractor.StreamingService; | ||||
| import org.schabi.newpipe.extractor.comments.CommentsExtractor; | ||||
|  | @ -24,26 +14,31 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; | |||
| import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler; | ||||
| import org.schabi.newpipe.extractor.localization.Localization; | ||||
| import org.schabi.newpipe.extractor.utils.JsonUtils; | ||||
| import org.schabi.newpipe.extractor.utils.Utils; | ||||
| 
 | ||||
| import com.grack.nanojson.JsonArray; | ||||
| import com.grack.nanojson.JsonObject; | ||||
| import com.grack.nanojson.JsonWriter; | ||||
| import javax.annotation.Nonnull; | ||||
| import javax.annotation.Nullable; | ||||
| import java.io.IOException; | ||||
| import java.nio.charset.StandardCharsets; | ||||
| import java.util.Collections; | ||||
| import java.util.List; | ||||
| 
 | ||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse; | ||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; | ||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder; | ||||
| import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; | ||||
| 
 | ||||
| public class YoutubeCommentsExtractor extends CommentsExtractor { | ||||
| 
 | ||||
|     private JsonObject nextResponse; | ||||
|     /** | ||||
|      * Whether comments are disabled on video. | ||||
|      */ | ||||
|     private boolean commentsDisabled; | ||||
| 
 | ||||
|     /** | ||||
|      * Caching mechanism and holder of the commentsDisabled value. | ||||
|      * <br/> | ||||
|      * Initial value = empty -> unknown if comments are disabled or not<br/> | ||||
|      * Some method calls {@link #findInitialCommentsToken()} | ||||
|      * -> value is set<br/> | ||||
|      * If the method or another one that is depending on disabled comments | ||||
|      * is now called again, the method execution can avoid unnecessary calls | ||||
|      * The second ajax <b>/next</b> response. | ||||
|      */ | ||||
|     @SuppressWarnings("OptionalUsedAsFieldOrParameterType") | ||||
|     private Optional<Boolean> optCommentsDisabled = Optional.empty(); | ||||
|     private JsonObject ajaxJson; | ||||
| 
 | ||||
|     public YoutubeCommentsExtractor( | ||||
|             final StreamingService service, | ||||
|  | @ -56,30 +51,23 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|     public InfoItemsPage<CommentsInfoItem> getInitialPage() | ||||
|             throws IOException, ExtractionException { | ||||
| 
 | ||||
|         // Check if findInitialCommentsToken was already called and optCommentsDisabled initialized | ||||
|         if (optCommentsDisabled.orElse(false)) { | ||||
|         if (commentsDisabled) { | ||||
|             return getInfoItemsPageForDisabledComments(); | ||||
|         } | ||||
| 
 | ||||
|         // Get the token | ||||
|         final String commentsToken = findInitialCommentsToken(); | ||||
|         // Check if the comments have been disabled | ||||
|         if (optCommentsDisabled.get()) { | ||||
|             return getInfoItemsPageForDisabledComments(); | ||||
|         } | ||||
| 
 | ||||
|         return getPage(getNextPage(commentsToken)); | ||||
|         return extractComments(ajaxJson); | ||||
|     } | ||||
| 
 | ||||
|     /** | ||||
|      * Finds the initial comments token and initializes commentsDisabled. | ||||
|      * <br/> | ||||
|      * Also sets {@link #optCommentsDisabled}. | ||||
|      * Also sets {@link #commentsDisabled}. | ||||
|      * | ||||
|      * @return the continuation token or null if none was found | ||||
|      */ | ||||
|     @Nullable | ||||
|     private String findInitialCommentsToken() throws ExtractionException { | ||||
|     private String findInitialCommentsToken(final JsonObject nextResponse) | ||||
|             throws ExtractionException { | ||||
|         final String token = JsonUtils.getArray(nextResponse, | ||||
|                         "contents.twoColumnWatchNextResults.results.results.contents") | ||||
|                 .stream() | ||||
|  | @ -112,7 +100,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|                 .orElse(null); | ||||
| 
 | ||||
|         // The comments are disabled if we couldn't get a token | ||||
|         optCommentsDisabled = Optional.of(token == null); | ||||
|         commentsDisabled = token == null; | ||||
| 
 | ||||
|         return token; | ||||
|     } | ||||
|  | @ -123,9 +111,9 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|     } | ||||
| 
 | ||||
|     @Nullable | ||||
|     private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException { | ||||
|     private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException { | ||||
|         final JsonArray onResponseReceivedEndpoints = | ||||
|                 ajaxJson.getArray("onResponseReceivedEndpoints"); | ||||
|                 jsonObject.getArray("onResponseReceivedEndpoints"); | ||||
| 
 | ||||
|         // Prevent ArrayIndexOutOfBoundsException | ||||
|         if (onResponseReceivedEndpoints.isEmpty()) { | ||||
|  | @ -173,30 +161,39 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|     @Override | ||||
|     public InfoItemsPage<CommentsInfoItem> getPage(final Page page) | ||||
|             throws IOException, ExtractionException { | ||||
|         if (optCommentsDisabled.orElse(false)) { | ||||
| 
 | ||||
|         if (commentsDisabled) { | ||||
|             return getInfoItemsPageForDisabledComments(); | ||||
|         } | ||||
| 
 | ||||
|         if (page == null || isNullOrEmpty(page.getId())) { | ||||
|             throw new IllegalArgumentException("Page doesn't have the continuation."); | ||||
|         } | ||||
| 
 | ||||
|         final Localization localization = getExtractorLocalization(); | ||||
|         // @formatter:off | ||||
|         final byte[] body = JsonWriter.string( | ||||
|                 prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) | ||||
|                     .value("continuation", page.getId()) | ||||
|                     .done()) | ||||
|                 .getBytes(StandardCharsets.UTF_8); | ||||
|         // @formatter:on | ||||
| 
 | ||||
|         final JsonObject ajaxJson = getJsonPostResponse("next", body, localization); | ||||
|         final var jsonObject = getJsonPostResponse("next", body, localization); | ||||
| 
 | ||||
|         final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( | ||||
|                 getServiceId()); | ||||
|         collectCommentsFrom(collector, ajaxJson); | ||||
|         return new InfoItemsPage<>(collector, getNextPage(ajaxJson)); | ||||
|         return extractComments(jsonObject); | ||||
|     } | ||||
| 
 | ||||
|     private void collectCommentsFrom(final CommentsInfoItemsCollector collector, | ||||
|                                      @Nonnull final JsonObject ajaxJson) throws ParsingException { | ||||
|     private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject) | ||||
|             throws ExtractionException { | ||||
|         final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector( | ||||
|                 getServiceId()); | ||||
|         collectCommentsFrom(collector); | ||||
|         return new InfoItemsPage<>(collector, getNextPage(jsonObject)); | ||||
|     } | ||||
| 
 | ||||
|     private void collectCommentsFrom(final CommentsInfoItemsCollector collector) | ||||
|             throws ParsingException { | ||||
| 
 | ||||
|         final JsonArray onResponseReceivedEndpoints = | ||||
|                 ajaxJson.getArray("onResponseReceivedEndpoints"); | ||||
|  | @ -254,24 +251,59 @@ public class YoutubeCommentsExtractor extends CommentsExtractor { | |||
|     public void onFetchPage(@Nonnull final Downloader downloader) | ||||
|             throws IOException, ExtractionException { | ||||
|         final Localization localization = getExtractorLocalization(); | ||||
|         // @formatter:off | ||||
|         final byte[] body = JsonWriter.string( | ||||
|                 prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) | ||||
|                     .value("videoId", getId()) | ||||
|                     .done()) | ||||
|                 .getBytes(StandardCharsets.UTF_8); | ||||
|         // @formatter:on | ||||
| 
 | ||||
|         nextResponse = getJsonPostResponse("next", body, localization); | ||||
|         final String initialToken = | ||||
|                 findInitialCommentsToken(getJsonPostResponse("next", body, localization)); | ||||
| 
 | ||||
|         if (initialToken == null) { | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|         // @formatter:off | ||||
|         final byte[] ajaxBody = JsonWriter.string( | ||||
|                         prepareDesktopJsonBuilder(localization, getExtractorContentCountry()) | ||||
|                                 .value("continuation", initialToken) | ||||
|                                 .done()) | ||||
|                 .getBytes(StandardCharsets.UTF_8); | ||||
|         // @formatter:on | ||||
| 
 | ||||
|         ajaxJson = getJsonPostResponse("next", ajaxBody, localization); | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     @Override | ||||
|     public boolean isCommentsDisabled() throws ExtractionException { | ||||
|         // Check if commentsDisabled has to be initialized | ||||
|         if (!optCommentsDisabled.isPresent()) { | ||||
|             // Initialize commentsDisabled | ||||
|             this.findInitialCommentsToken(); | ||||
|     public boolean isCommentsDisabled() { | ||||
|         return commentsDisabled; | ||||
|     } | ||||
| 
 | ||||
|         return optCommentsDisabled.get(); | ||||
|     @Override | ||||
|     public int getCommentsCount() throws ExtractionException { | ||||
|         assertPageFetched(); | ||||
| 
 | ||||
|         if (commentsDisabled) { | ||||
|             return -1; | ||||
|         } | ||||
| 
 | ||||
|         final JsonObject countText = ajaxJson | ||||
|                 .getArray("onResponseReceivedEndpoints").getObject(0) | ||||
|                 .getObject("reloadContinuationItemsCommand") | ||||
|                 .getArray("continuationItems").getObject(0) | ||||
|                 .getObject("commentsHeaderRenderer") | ||||
|                 .getObject("countText"); | ||||
| 
 | ||||
|         try { | ||||
|             return Integer.parseInt( | ||||
|                     Utils.removeNonDigitCharacters(getTextFromObject(countText)) | ||||
|             ); | ||||
|         } catch (final Exception e) { | ||||
|             throw new ExtractionException("Unable to get comments count", e); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -89,6 +89,7 @@ public class YoutubeCommentsExtractorTest { | |||
|         @Test | ||||
|         public void testGetCommentsAllData() throws IOException, ExtractionException { | ||||
|             InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage(); | ||||
|             assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments | ||||
| 
 | ||||
|             DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors()); | ||||
|             for (CommentsInfoItem c : comments.getItems()) { | ||||
|  | @ -344,6 +345,11 @@ public class YoutubeCommentsExtractorTest { | |||
|             assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment"); | ||||
|             assertGreater(300, firstComment.getReplyCount()); | ||||
|         } | ||||
| 
 | ||||
|         @Test | ||||
|         public void testCommentsCount() throws IOException, ExtractionException { | ||||
|             assertTrue(extractor.getCommentsCount() > 18800); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     public static class FormattingTest { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue