Use Description object for comments text.
This commit is contained in:
parent
40f1ec4a54
commit
b566084cac
9 changed files with 41 additions and 30 deletions
|
@ -3,13 +3,14 @@ package org.schabi.newpipe.extractor.comments;
|
||||||
import org.schabi.newpipe.extractor.InfoItem;
|
import org.schabi.newpipe.extractor.InfoItem;
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
public class CommentsInfoItem extends InfoItem {
|
public class CommentsInfoItem extends InfoItem {
|
||||||
|
|
||||||
private String commentId;
|
private String commentId;
|
||||||
private String commentText;
|
private Description commentText;
|
||||||
private String uploaderName;
|
private String uploaderName;
|
||||||
private String uploaderAvatarUrl;
|
private String uploaderAvatarUrl;
|
||||||
private String uploaderUrl;
|
private String uploaderUrl;
|
||||||
|
@ -43,11 +44,11 @@ public class CommentsInfoItem extends InfoItem {
|
||||||
this.commentId = commentId;
|
this.commentId = commentId;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCommentText() {
|
public Description getCommentText() {
|
||||||
return commentText;
|
return commentText;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCommentText(final String commentText) {
|
public void setCommentText(final Description commentText) {
|
||||||
this.commentText = commentText;
|
this.commentText = commentText;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
@ -41,8 +42,8 @@ public interface CommentsInfoItemExtractor extends InfoItemExtractor {
|
||||||
/**
|
/**
|
||||||
* The text of the comment
|
* The text of the comment
|
||||||
*/
|
*/
|
||||||
default String getCommentText() throws ParsingException {
|
default Description getCommentText() throws ParsingException {
|
||||||
return "";
|
return Description.EMPTY_DESCRIPTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.bandcamp.extractors;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
@ -18,7 +19,7 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getName() throws ParsingException {
|
public String getName() throws ParsingException {
|
||||||
return getCommentText();
|
return getCommentText().getContent();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -32,12 +33,14 @@ public class BandcampCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getCommentText() throws ParsingException {
|
public Description getCommentText() throws ParsingException {
|
||||||
return writing.getElementsByClass("text").stream()
|
final var text = writing.getElementsByClass("text").stream()
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.map(Element::ownText)
|
.map(Element::ownText)
|
||||||
.findFirst()
|
.findFirst()
|
||||||
.orElseThrow(() -> new ParsingException("Could not get comment text"));
|
.orElseThrow(() -> new ParsingException("Could not get comment text"));
|
||||||
|
|
||||||
|
return new Description(text, Description.PLAIN_TEXT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
@ -59,13 +60,15 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getCommentText() throws ParsingException {
|
public Description getCommentText() throws ParsingException {
|
||||||
final String htmlText = JsonUtils.getString(item, "text");
|
final String htmlText = JsonUtils.getString(item, "text");
|
||||||
try {
|
try {
|
||||||
final Document doc = Jsoup.parse(htmlText);
|
final Document doc = Jsoup.parse(htmlText);
|
||||||
return doc.body().text();
|
final var text = doc.body().text();
|
||||||
|
return new Description(text, Description.PLAIN_TEXT);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
|
final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
|
||||||
|
return new Description(text, Description.PLAIN_TEXT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
|
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
@ -24,8 +25,8 @@ public class SoundcloudCommentsInfoItemExtractor implements CommentsInfoItemExtr
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getCommentText() {
|
public Description getCommentText() {
|
||||||
return json.getString("body");
|
return new Description(json.getString("body"), Description.PLAIN_TEXT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -1,21 +1,21 @@
|
||||||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
|
||||||
|
|
||||||
import com.grack.nanojson.JsonArray;
|
import com.grack.nanojson.JsonArray;
|
||||||
import com.grack.nanojson.JsonObject;
|
import com.grack.nanojson.JsonObject;
|
||||||
|
|
||||||
import org.schabi.newpipe.extractor.Page;
|
import org.schabi.newpipe.extractor.Page;
|
||||||
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
|
||||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
|
import org.schabi.newpipe.extractor.stream.Description;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
|
import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
|
||||||
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
|
||||||
|
|
||||||
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {
|
||||||
|
|
||||||
private final JsonObject json;
|
private final JsonObject json;
|
||||||
|
@ -176,18 +176,20 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getCommentText() throws ParsingException {
|
public Description getCommentText() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
|
||||||
if (contentText.isEmpty()) {
|
if (contentText.isEmpty()) {
|
||||||
// completely empty comments as described in
|
// completely empty comments as described in
|
||||||
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
||||||
return "";
|
return Description.EMPTY_DESCRIPTION;
|
||||||
}
|
}
|
||||||
final String commentText = getTextFromObject(contentText, true);
|
final String commentText = getTextFromObject(contentText, true);
|
||||||
// YouTube adds U+FEFF in some comments.
|
// YouTube adds U+FEFF in some comments.
|
||||||
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
||||||
return Utils.removeUTF8BOM(commentText);
|
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);
|
||||||
|
|
||||||
|
return new Description(commentTextBomRemoved, Description.HTML);
|
||||||
} catch (final Exception e) {
|
} catch (final Exception e) {
|
||||||
throw new ParsingException("Could not get comment text", e);
|
throw new ParsingException("Could not get comment text", e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class BandcampCommentsExtractorTest {
|
||||||
for (CommentsInfoItem c : comments.getItems()) {
|
for (CommentsInfoItem c : comments.getItems()) {
|
||||||
assertFalse(Utils.isBlank(c.getUploaderName()));
|
assertFalse(Utils.isBlank(c.getUploaderName()));
|
||||||
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
assertFalse(Utils.isBlank(c.getName()));
|
assertFalse(Utils.isBlank(c.getName()));
|
||||||
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getUrl()));
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
|
|
|
@ -75,7 +75,7 @@ public class PeertubeCommentsExtractorTest {
|
||||||
assertFalse(Utils.isBlank(c.getUploaderName()));
|
assertFalse(Utils.isBlank(c.getUploaderName()));
|
||||||
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getCommentId()));
|
assertFalse(Utils.isBlank(c.getCommentId()));
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
assertFalse(Utils.isBlank(c.getName()));
|
assertFalse(Utils.isBlank(c.getName()));
|
||||||
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
||||||
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
|
@ -91,7 +91,7 @@ public class PeertubeCommentsExtractorTest {
|
||||||
|
|
||||||
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
||||||
for (CommentsInfoItem c : comments) {
|
for (CommentsInfoItem c : comments) {
|
||||||
if (c.getCommentText().contains(comment)) {
|
if (c.getCommentText().getContent().contains(comment)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -95,7 +95,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
assertFalse(Utils.isBlank(c.getUploaderName()));
|
assertFalse(Utils.isBlank(c.getUploaderName()));
|
||||||
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getCommentId()));
|
assertFalse(Utils.isBlank(c.getCommentId()));
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
assertFalse(Utils.isBlank(c.getName()));
|
assertFalse(Utils.isBlank(c.getName()));
|
||||||
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
||||||
assertNotNull(c.getUploadDate());
|
assertNotNull(c.getUploadDate());
|
||||||
|
@ -111,7 +111,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
|
|
||||||
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
||||||
for (CommentsInfoItem c : comments) {
|
for (CommentsInfoItem c : comments) {
|
||||||
if (c.getCommentText().contains(comment)) {
|
if (c.getCommentText().getContent().contains(comment)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -152,9 +152,9 @@ public class YoutubeCommentsExtractorTest {
|
||||||
assertFalse(Utils.isBlank(c.getUrl()));
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
assertTrue(c.getLikeCount() >= 0);
|
assertTrue(c.getLikeCount() >= 0);
|
||||||
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
|
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
|
||||||
assertTrue(Utils.isBlank(c.getCommentText()));
|
assertTrue(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
} else {
|
} else {
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getUrl()));
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
assertTrue(c.getLikeCount() >= 0);
|
assertTrue(c.getLikeCount() >= 0);
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
if (c.isHeartedByUploader()) {
|
if (c.isHeartedByUploader()) {
|
||||||
heartedByUploader = true;
|
heartedByUploader = true;
|
||||||
}
|
}
|
||||||
|
@ -233,7 +233,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
assertFalse(Utils.isBlank(c.getUrl()));
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
assertTrue(c.getLikeCount() >= 0);
|
assertTrue(c.getLikeCount() >= 0);
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
|
assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
|
||||||
|
@ -328,7 +328,7 @@ public class YoutubeCommentsExtractorTest {
|
||||||
|
|
||||||
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
|
InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());
|
||||||
|
|
||||||
assertEquals("First", replies.getItems().get(0).getCommentText(),
|
assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
|
||||||
"First reply comment did not match");
|
"First reply comment did not match");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue