Merge pull request #703 from FireMasterK/comment-replies

Add support for extracting comment replies continuation
This commit is contained in:
Tobi 2021-09-14 23:58:14 +02:00 committed by GitHub
commit a9d214478d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1275 additions and 30 deletions

View file

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.comments; package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -21,6 +22,8 @@ public class CommentsInfoItem extends InfoItem {
private boolean heartedByUploader; private boolean heartedByUploader;
private boolean pinned; private boolean pinned;
private int streamPosition; private int streamPosition;
@Nullable
private Page replies;
public static final int NO_LIKE_COUNT = -1; public static final int NO_LIKE_COUNT = -1;
public static final int NO_STREAM_POSITION = -1; public static final int NO_STREAM_POSITION = -1;
@ -142,4 +145,8 @@ public class CommentsInfoItem extends InfoItem {
public int getStreamPosition() { public int getStreamPosition() {
return streamPosition; return streamPosition;
} }
public void setReplies(@Nullable Page replies) { this.replies = replies; }
public Page getReplies() { return this.replies; }
} }

View file

@ -1,6 +1,7 @@
package org.schabi.newpipe.extractor.comments; package org.schabi.newpipe.extractor.comments;
import org.schabi.newpipe.extractor.InfoItemExtractor; import org.schabi.newpipe.extractor.InfoItemExtractor;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
@ -107,4 +108,13 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
default int getStreamPosition() throws ParsingException { default int getStreamPosition() throws ParsingException {
return CommentsInfoItem.NO_STREAM_POSITION; return CommentsInfoItem.NO_STREAM_POSITION;
} }
/**
* The continuation page which is used to get comment replies from.
* @return the continuation Page for the replies, or null if replies are not supported
*/
@Nullable
default Page getReplies() throws ParsingException {
return null;
}
} }

View file

@ -93,6 +93,12 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
addError(e); addError(e);
} }
try {
resultItem.setReplies(extractor.getReplies());
} catch (Exception e) {
addError(e);
}
return resultItem; return resultItem;
} }
@ -106,12 +112,6 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
} }
public List<CommentsInfoItem> getCommentsInfoItemList() { public List<CommentsInfoItem> getCommentsInfoItemList() {
List<CommentsInfoItem> siiList = new ArrayList<>(); return new ArrayList<>(super.getItems());
for (InfoItem ii : super.getItems()) {
if (ii instanceof CommentsInfoItem) {
siiList.add((CommentsInfoItem) ii);
}
}
return siiList;
} }
} }

View file

@ -101,7 +101,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
if (itemSectionRenderer.isPresent()) { if (itemSectionRenderer.isPresent()) {
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get()) token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
.getObject("itemSectionRenderer").getArray("contents").getObject(0), .getObject("itemSectionRenderer").getArray("contents").getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token"); "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} else { } else {
token = null; token = null;
@ -140,10 +140,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
return null; return null;
} }
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
final String continuation; final String continuation;
try { try {
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1), continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} catch (final Exception e) { } catch (final Exception e) {
return null; return null;
} }
@ -167,7 +170,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization, final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry()) getExtractorContentCountry())
.value("continuation", page.getId()) .value("continuation", page.getId())
.done()) .done())
.getBytes(UTF_8); .getBytes(UTF_8);
@ -212,10 +215,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
contents.remove(index); contents.remove(index);
} }
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
final List<Object> comments; final List<Object> comments;
try { try {
comments = JsonUtils.getValues(contents, comments = JsonUtils.getValues(contents, jsonKey);
"commentThreadRenderer.comment.commentRenderer");
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Unable to get parse youtube comments", e); throw new ParsingException("Unable to get parse youtube comments", e);
} }
@ -234,7 +238,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
throws IOException, ExtractionException { throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization(); final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization, final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry()) getExtractorContentCountry())
.value("videoId", getId()) .value("videoId", getId())
.done()) .done())
.getBytes(UTF_8); .getBytes(UTF_8);

View file

@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
@ -18,6 +20,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
private final JsonObject json; private final JsonObject json;
private JsonObject commentRenderer;
private final String url; private final String url;
private final TimeAgoParser timeAgoParser; private final TimeAgoParser timeAgoParser;
@ -29,6 +32,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
this.timeAgoParser = timeAgoParser; this.timeAgoParser = timeAgoParser;
} }
private JsonObject getCommentRenderer() throws ParsingException {
if(commentRenderer == null) {
if(!json.has("comment"))
commentRenderer = json;
else
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
}
return commentRenderer;
}
@Override @Override
public String getUrl() throws ParsingException { public String getUrl() throws ParsingException {
return url; return url;
@ -37,7 +50,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getThumbnailUrl() throws ParsingException { public String getThumbnailUrl() throws ParsingException {
try { try {
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails"); final JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
return JsonUtils.getString(arr.getObject(2), "url"); return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get thumbnail url", e); throw new ParsingException("Could not get thumbnail url", e);
@ -47,7 +60,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getName() throws ParsingException { public String getName() throws ParsingException {
try { try {
return getTextFromObject(JsonUtils.getObject(json, "authorText")); return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) { } catch (final Exception e) {
return EMPTY_STRING; return EMPTY_STRING;
} }
@ -56,7 +69,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getTextualUploadDate() throws ParsingException { public String getTextualUploadDate() throws ParsingException {
try { try {
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText")); return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "publishedTimeText"));
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get publishedTimeText", e); throw new ParsingException("Could not get publishedTimeText", e);
} }
@ -94,7 +107,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
// Try first to get the exact like count by using the accessibility data // Try first to get the exact like count by using the accessibility data
final String likeCount; final String likeCount;
try { try {
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json, likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label")); "actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
} catch (final Exception e) { } catch (final Exception e) {
// Use the approximate like count returned into the voteCount object // Use the approximate like count returned into the voteCount object
@ -145,11 +158,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
*/ */
try { try {
// If a comment has no likes voteCount is not set // If a comment has no likes voteCount is not set
if (!json.has("voteCount")) { if (!getCommentRenderer().has("voteCount")) {
return EMPTY_STRING; return EMPTY_STRING;
} }
final JsonObject voteCountObj = JsonUtils.getObject(json, "voteCount"); final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
if (voteCountObj.isEmpty()) { if (voteCountObj.isEmpty()) {
return EMPTY_STRING; return EMPTY_STRING;
} }
@ -162,7 +175,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getCommentText() throws ParsingException { public String getCommentText() throws ParsingException {
try { try {
final JsonObject contentText = JsonUtils.getObject(json, "contentText"); final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) { if (contentText.isEmpty()) {
// completely empty comments as described in // completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584 // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
@ -180,7 +193,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getCommentId() throws ParsingException { public String getCommentId() throws ParsingException {
try { try {
return JsonUtils.getString(json, "commentId"); return JsonUtils.getString(getCommentRenderer(), "commentId");
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get comment id", e); throw new ParsingException("Could not get comment id", e);
} }
@ -189,7 +202,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getUploaderAvatarUrl() throws ParsingException { public String getUploaderAvatarUrl() throws ParsingException {
try { try {
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails"); JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
return JsonUtils.getString(arr.getObject(2), "url"); return JsonUtils.getString(arr.getObject(2), "url");
} catch (final Exception e) { } catch (final Exception e) {
throw new ParsingException("Could not get author thumbnail", e); throw new ParsingException("Could not get author thumbnail", e);
@ -198,24 +211,24 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public boolean isHeartedByUploader() throws ParsingException { public boolean isHeartedByUploader() throws ParsingException {
final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons") final JsonObject commentActionButtonsRenderer = getCommentRenderer().getObject("actionButtons")
.getObject("commentActionButtonsRenderer"); .getObject("commentActionButtonsRenderer");
return commentActionButtonsRenderer.has("creatorHeart"); return commentActionButtonsRenderer.has("creatorHeart");
} }
@Override @Override
public boolean isPinned() { public boolean isPinned() throws ParsingException {
return json.has("pinnedCommentBadge"); return getCommentRenderer().has("pinnedCommentBadge");
} }
public boolean isUploaderVerified() { public boolean isUploaderVerified() throws ParsingException {
return json.has("authorCommentBadge"); return getCommentRenderer().has("authorCommentBadge");
} }
@Override @Override
public String getUploaderName() throws ParsingException { public String getUploaderName() throws ParsingException {
try { try {
return getTextFromObject(JsonUtils.getObject(json, "authorText")); return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
} catch (final Exception e) { } catch (final Exception e) {
return EMPTY_STRING; return EMPTY_STRING;
} }
@ -224,10 +237,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
@Override @Override
public String getUploaderUrl() throws ParsingException { public String getUploaderUrl() throws ParsingException {
try { try {
return "https://www.youtube.com/channel/" + JsonUtils.getString(json, return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
"authorEndpoint.browseEndpoint.browseId"); "authorEndpoint.browseEndpoint.browseId");
} catch (final Exception e) { } catch (final Exception e) {
return EMPTY_STRING; return EMPTY_STRING;
} }
} }
@Override
public Page getReplies() throws ParsingException {
try {
final String id = JsonUtils.getString(JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents").getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
return new Page(url, id);
} catch (final Exception e) {
return null; // Would return null for Comment Replies, since YouTube does not support nested replies.
}
}
} }

View file

@ -306,4 +306,32 @@ public class YoutubeCommentsExtractorTest {
assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount())); assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
} }
} }
public static class RepliesTest {
private final static String url = "https://www.youtube.com/watch?v=--yeOvJGZQk";
private static YoutubeCommentsExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
YoutubeParsingHelper.resetClientVersionAndKey();
YoutubeParsingHelper.setNumberGenerator(new Random(1));
NewPipe.init(new DownloaderFactory().getDownloader(RESOURCE_PATH + "replies"));
extractor = (YoutubeCommentsExtractor) YouTube
.getCommentsExtractor(url);
extractor.fetchPage();
}
@Test
public void testGetCommentsFirstReplies() throws IOException, ExtractionException {
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
CommentsInfoItem firstComment = comments.getItems().get(0);
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
assertEquals("First reply comment did not match", "Lol", replies.getItems().get(0).getCommentText());
}
}
} }