Merge pull request #703 from FireMasterK/comment-replies
Add support for extracting comment replies continuation
This commit is contained in:
commit
a9d214478d
10 changed files with 1275 additions and 30 deletions
|
@ -1,6 +1,7 @@
|
|||
package org.schabi.newpipe.extractor.comments;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
|
@ -21,6 +22,8 @@ public class CommentsInfoItem extends InfoItem {
|
|||
private boolean heartedByUploader;
|
||||
private boolean pinned;
|
||||
private int streamPosition;
|
||||
@Nullable
|
||||
private Page replies;
|
||||
|
||||
public static final int NO_LIKE_COUNT = -1;
|
||||
public static final int NO_STREAM_POSITION = -1;
|
||||
|
@ -142,4 +145,8 @@ public class CommentsInfoItem extends InfoItem {
|
|||
public int getStreamPosition() {
|
||||
return streamPosition;
|
||||
}
|
||||
|
||||
public void setReplies(@Nullable Page replies) { this.replies = replies; }
|
||||
|
||||
public Page getReplies() { return this.replies; }
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.schabi.newpipe.extractor.comments;
|
||||
|
||||
import org.schabi.newpipe.extractor.InfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
|
||||
|
@ -107,4 +108,13 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
|
|||
default int getStreamPosition() throws ParsingException {
|
||||
return CommentsInfoItem.NO_STREAM_POSITION;
|
||||
}
|
||||
|
||||
/**
|
||||
* The continuation page which is used to get comment replies from.
|
||||
* @return the continuation Page for the replies, or null if replies are not supported
|
||||
*/
|
||||
@Nullable
|
||||
default Page getReplies() throws ParsingException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -93,6 +93,12 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
|
|||
addError(e);
|
||||
}
|
||||
|
||||
try {
|
||||
resultItem.setReplies(extractor.getReplies());
|
||||
} catch (Exception e) {
|
||||
addError(e);
|
||||
}
|
||||
|
||||
return resultItem;
|
||||
}
|
||||
|
||||
|
@ -106,12 +112,6 @@ public class CommentsInfoItemsCollector extends InfoItemsCollector<CommentsInfoI
|
|||
}
|
||||
|
||||
public List<CommentsInfoItem> getCommentsInfoItemList() {
|
||||
List<CommentsInfoItem> siiList = new ArrayList<>();
|
||||
for (InfoItem ii : super.getItems()) {
|
||||
if (ii instanceof CommentsInfoItem) {
|
||||
siiList.add((CommentsInfoItem) ii);
|
||||
}
|
||||
}
|
||||
return siiList;
|
||||
return new ArrayList<>(super.getItems());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
|
||||
if (itemSectionRenderer.isPresent()) {
|
||||
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
|
||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||
} else {
|
||||
token = null;
|
||||
|
@ -140,10 +140,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
return null;
|
||||
}
|
||||
|
||||
final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
|
||||
|
||||
final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
|
||||
|
||||
final String continuation;
|
||||
try {
|
||||
continuation = JsonUtils.getString(jsonArray.getObject(jsonArray.size() - 1),
|
||||
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||
continuation = JsonUtils.getString(continuationItemRenderer, jsonPath);
|
||||
} catch (final Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
@ -167,7 +170,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
|
||||
final Localization localization = getExtractorLocalization();
|
||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||
getExtractorContentCountry())
|
||||
getExtractorContentCountry())
|
||||
.value("continuation", page.getId())
|
||||
.done())
|
||||
.getBytes(UTF_8);
|
||||
|
@ -212,10 +215,11 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
contents.remove(index);
|
||||
}
|
||||
|
||||
final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
|
||||
|
||||
final List<Object> comments;
|
||||
try {
|
||||
comments = JsonUtils.getValues(contents,
|
||||
"commentThreadRenderer.comment.commentRenderer");
|
||||
comments = JsonUtils.getValues(contents, jsonKey);
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Unable to get parse youtube comments", e);
|
||||
}
|
||||
|
@ -234,7 +238,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
|||
throws IOException, ExtractionException {
|
||||
final Localization localization = getExtractorLocalization();
|
||||
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
|
||||
getExtractorContentCountry())
|
||||
getExtractorContentCountry())
|
||||
.value("videoId", getId())
|
||||
.done())
|
||||
.getBytes(UTF_8);
|
||||
|
|
|
@ -3,6 +3,8 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import com.grack.nanojson.JsonWriter;
|
||||
import org.schabi.newpipe.extractor.Page;
|
||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
|
@ -18,6 +20,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
|||
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||
|
||||
private final JsonObject json;
|
||||
private JsonObject commentRenderer;
|
||||
private final String url;
|
||||
private final TimeAgoParser timeAgoParser;
|
||||
|
||||
|
@ -29,6 +32,16 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
this.timeAgoParser = timeAgoParser;
|
||||
}
|
||||
|
||||
private JsonObject getCommentRenderer() throws ParsingException {
|
||||
if(commentRenderer == null) {
|
||||
if(!json.has("comment"))
|
||||
commentRenderer = json;
|
||||
else
|
||||
commentRenderer = JsonUtils.getObject(json, "comment.commentRenderer");
|
||||
}
|
||||
return commentRenderer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
return url;
|
||||
|
@ -37,7 +50,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
||||
final JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
|
||||
return JsonUtils.getString(arr.getObject(2), "url");
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
|
@ -47,7 +60,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
||||
} catch (final Exception e) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
|
@ -56,7 +69,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
try {
|
||||
return getTextFromObject(JsonUtils.getObject(json, "publishedTimeText"));
|
||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "publishedTimeText"));
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get publishedTimeText", e);
|
||||
}
|
||||
|
@ -94,7 +107,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
// Try first to get the exact like count by using the accessibility data
|
||||
final String likeCount;
|
||||
try {
|
||||
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(json,
|
||||
likeCount = Utils.removeNonDigitCharacters(JsonUtils.getString(getCommentRenderer(),
|
||||
"actionButtons.commentActionButtonsRenderer.likeButton.toggleButtonRenderer.accessibilityData.accessibilityData.label"));
|
||||
} catch (final Exception e) {
|
||||
// Use the approximate like count returned into the voteCount object
|
||||
|
@ -145,11 +158,11 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
*/
|
||||
try {
|
||||
// If a comment has no likes voteCount is not set
|
||||
if (!json.has("voteCount")) {
|
||||
if (!getCommentRenderer().has("voteCount")) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
|
||||
final JsonObject voteCountObj = JsonUtils.getObject(json, "voteCount");
|
||||
final JsonObject voteCountObj = JsonUtils.getObject(getCommentRenderer(), "voteCount");
|
||||
if (voteCountObj.isEmpty()) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
|
@ -162,7 +175,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getCommentText() throws ParsingException {
|
||||
try {
|
||||
final JsonObject contentText = JsonUtils.getObject(json, "contentText");
|
||||
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
||||
if (contentText.isEmpty()) {
|
||||
// completely empty comments as described in
|
||||
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
||||
|
@ -180,7 +193,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getCommentId() throws ParsingException {
|
||||
try {
|
||||
return JsonUtils.getString(json, "commentId");
|
||||
return JsonUtils.getString(getCommentRenderer(), "commentId");
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get comment id", e);
|
||||
}
|
||||
|
@ -189,7 +202,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getUploaderAvatarUrl() throws ParsingException {
|
||||
try {
|
||||
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
||||
JsonArray arr = JsonUtils.getArray(getCommentRenderer(), "authorThumbnail.thumbnails");
|
||||
return JsonUtils.getString(arr.getObject(2), "url");
|
||||
} catch (final Exception e) {
|
||||
throw new ParsingException("Could not get author thumbnail", e);
|
||||
|
@ -198,24 +211,24 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
|
||||
@Override
|
||||
public boolean isHeartedByUploader() throws ParsingException {
|
||||
final JsonObject commentActionButtonsRenderer = json.getObject("actionButtons")
|
||||
final JsonObject commentActionButtonsRenderer = getCommentRenderer().getObject("actionButtons")
|
||||
.getObject("commentActionButtonsRenderer");
|
||||
return commentActionButtonsRenderer.has("creatorHeart");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPinned() {
|
||||
return json.has("pinnedCommentBadge");
|
||||
public boolean isPinned() throws ParsingException {
|
||||
return getCommentRenderer().has("pinnedCommentBadge");
|
||||
}
|
||||
|
||||
public boolean isUploaderVerified() {
|
||||
return json.has("authorCommentBadge");
|
||||
public boolean isUploaderVerified() throws ParsingException {
|
||||
return getCommentRenderer().has("authorCommentBadge");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
try {
|
||||
return getTextFromObject(JsonUtils.getObject(json, "authorText"));
|
||||
return getTextFromObject(JsonUtils.getObject(getCommentRenderer(), "authorText"));
|
||||
} catch (final Exception e) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
|
@ -224,10 +237,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
|||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
try {
|
||||
return "https://www.youtube.com/channel/" + JsonUtils.getString(json,
|
||||
return "https://www.youtube.com/channel/" + JsonUtils.getString(getCommentRenderer(),
|
||||
"authorEndpoint.browseEndpoint.browseId");
|
||||
} catch (final Exception e) {
|
||||
return EMPTY_STRING;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Page getReplies() throws ParsingException {
|
||||
try {
|
||||
final String id = JsonUtils.getString(JsonUtils.getArray(json, "replies.commentRepliesRenderer.contents").getObject(0), "continuationItemRenderer.continuationEndpoint.continuationCommand.token");
|
||||
return new Page(url, id);
|
||||
} catch (final Exception e) {
|
||||
return null; // Would return null for Comment Replies, since YouTube does not support nested replies.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -306,4 +306,32 @@ public class YoutubeCommentsExtractorTest {
|
|||
assertTrue("The first pinned comment has no vote count", !Utils.isBlank(pinnedComment.getTextualLikeCount()));
|
||||
}
|
||||
}
|
||||
|
||||
public static class RepliesTest {
|
||||
private final static String url = "https://www.youtube.com/watch?v=--yeOvJGZQk";
|
||||
private static YoutubeCommentsExtractor extractor;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
YoutubeParsingHelper.resetClientVersionAndKey();
|
||||
YoutubeParsingHelper.setNumberGenerator(new Random(1));
|
||||
NewPipe.init(new DownloaderFactory().getDownloader(RESOURCE_PATH + "replies"));
|
||||
extractor = (YoutubeCommentsExtractor) YouTube
|
||||
.getCommentsExtractor(url);
|
||||
extractor.fetchPage();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetCommentsFirstReplies() throws IOException, ExtractionException {
|
||||
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||
|
||||
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||
|
||||
CommentsInfoItem firstComment = comments.getItems().get(0);
|
||||
|
||||
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
|
||||
|
||||
assertEquals("First reply comment did not match", "Lol", replies.getItems().get(0).getCommentText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue