parent
5ac80624a4
commit
0fb73301e3
3 changed files with 139 additions and 84 deletions
|
@ -47,13 +47,13 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
|
||||||
String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
|
final String commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
|
||||||
String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
|
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
|
||||||
return getPage(getNextPage(commentsToken));
|
return getPage(getNextPage(commentsToken));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
|
private Page getNextPage(JsonObject ajaxJson) throws ParsingException {
|
||||||
JsonArray arr;
|
final JsonArray arr;
|
||||||
try {
|
try {
|
||||||
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
|
arr = JsonUtils.getArray(ajaxJson, "response.continuationContents.commentSectionContinuation.continuations");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -89,14 +89,14 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
throw new IllegalArgumentException("Page doesn't contain an URL");
|
throw new IllegalArgumentException("Page doesn't contain an URL");
|
||||||
}
|
}
|
||||||
|
|
||||||
String ajaxResponse = makeAjaxRequest(page.getUrl());
|
final String ajaxResponse = makeAjaxRequest(page.getUrl());
|
||||||
JsonObject ajaxJson;
|
final JsonObject ajaxJson;
|
||||||
try {
|
try {
|
||||||
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
|
ajaxJson = JsonParser.array().from(ajaxResponse).getObject(1);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not parse json data for comments", e);
|
throw new ParsingException("Could not parse json data for comments", e);
|
||||||
}
|
}
|
||||||
CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(getServiceId());
|
||||||
collectCommentsFrom(collector, ajaxJson);
|
collectCommentsFrom(collector, ajaxJson);
|
||||||
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
|
||||||
}
|
}
|
||||||
|
@ -160,8 +160,8 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String findValue(String doc, String start, String end) {
|
private String findValue(String doc, String start, String end) {
|
||||||
int beginIndex = doc.indexOf(start) + start.length();
|
final int beginIndex = doc.indexOf(start) + start.length();
|
||||||
int endIndex = doc.indexOf(end, beginIndex);
|
final int endIndex = doc.indexOf(end, beginIndex);
|
||||||
return doc.substring(beginIndex, endIndex);
|
return doc.substring(beginIndex, endIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
@Override
|
@Override
|
||||||
public String getThumbnailUrl() throws ParsingException {
|
public String getThumbnailUrl() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
final JsonArray arr = JsonUtils.getArray(json, "authorThumbnail.thumbnails");
|
||||||
return JsonUtils.getString(arr.getObject(2), "url");
|
return JsonUtils.getString(arr.getObject(2), "url");
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ParsingException("Could not get thumbnail url", e);
|
throw new ParsingException("Could not get thumbnail url", e);
|
||||||
|
@ -82,7 +82,13 @@ public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtract
|
||||||
@Override
|
@Override
|
||||||
public String getCommentText() throws ParsingException {
|
public String getCommentText() throws ParsingException {
|
||||||
try {
|
try {
|
||||||
String commentText = getTextFromObject(JsonUtils.getObject(json, "contentText"));
|
final JsonObject contentText = JsonUtils.getObject(json, "contentText");
|
||||||
|
if (contentText.isEmpty()) {
|
||||||
|
// completely empty comments as described in
|
||||||
|
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
final String commentText = getTextFromObject(contentText);
|
||||||
// youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
// youtube adds U+FEFF in some comments. eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
|
||||||
return Utils.removeUTF8BOM(commentText);
|
return Utils.removeUTF8BOM(commentText);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -23,91 +23,140 @@ import static org.junit.Assert.assertTrue;
|
||||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||||
|
|
||||||
public class YoutubeCommentsExtractorTest {
|
public class YoutubeCommentsExtractorTest {
|
||||||
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
|
/**
|
||||||
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
|
* Test a "normal" YouTube and Invidious page
|
||||||
private static YoutubeCommentsExtractor extractorYT;
|
*/
|
||||||
private static YoutubeCommentsExtractor extractorInvidious;
|
public static class Thomas {
|
||||||
|
private static final String urlYT = "https://www.youtube.com/watch?v=D00Au7k3i6o";
|
||||||
|
private static final String urlInvidious = "https://invidio.us/watch?v=D00Au7k3i6o";
|
||||||
|
private static YoutubeCommentsExtractor extractorYT;
|
||||||
|
private static YoutubeCommentsExtractor extractorInvidious;
|
||||||
|
|
||||||
@BeforeClass
|
private static final String commentContent = "sub 4 sub";
|
||||||
public static void setUp() throws Exception {
|
|
||||||
NewPipe.init(DownloaderTestImpl.getInstance());
|
|
||||||
extractorYT = (YoutubeCommentsExtractor) YouTube
|
|
||||||
.getCommentsExtractor(urlYT);
|
|
||||||
extractorYT.fetchPage();
|
|
||||||
extractorInvidious = (YoutubeCommentsExtractor) YouTube
|
|
||||||
.getCommentsExtractor(urlInvidious);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@BeforeClass
|
||||||
public void testGetComments() throws IOException, ExtractionException {
|
public static void setUp() throws Exception {
|
||||||
assertTrue(getCommentsHelper(extractorYT));
|
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||||
assertTrue(getCommentsHelper(extractorInvidious));
|
extractorYT = (YoutubeCommentsExtractor) YouTube
|
||||||
}
|
.getCommentsExtractor(urlYT);
|
||||||
|
extractorYT.fetchPage();
|
||||||
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
|
extractorInvidious = (YoutubeCommentsExtractor) YouTube
|
||||||
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
.getCommentsExtractor(urlInvidious);
|
||||||
boolean result = findInComments(comments, "s1ck m3m3");
|
extractorInvidious.fetchPage();
|
||||||
|
|
||||||
while (comments.hasNextPage() && !result) {
|
|
||||||
comments = extractor.getPage(comments.getNextPage());
|
|
||||||
result = findInComments(comments, "s1ck m3m3");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
@Test
|
||||||
}
|
public void testGetComments() throws IOException, ExtractionException {
|
||||||
|
assertTrue(getCommentsHelper(extractorYT));
|
||||||
@Test
|
assertTrue(getCommentsHelper(extractorInvidious));
|
||||||
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
|
|
||||||
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
|
|
||||||
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
|
|
||||||
CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
|
|
||||||
|
|
||||||
assertEquals("Comments", commentsInfo.getName());
|
|
||||||
boolean result = findInComments(commentsInfo.getRelatedItems(), "s1ck m3m3");
|
|
||||||
|
|
||||||
Page nextPage = commentsInfo.getNextPage();
|
|
||||||
InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
|
|
||||||
while (moreItems.hasNextPage() && !result) {
|
|
||||||
moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
|
|
||||||
result = findInComments(moreItems.getItems(), "s1ck m3m3");
|
|
||||||
nextPage = moreItems.getNextPage();
|
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
private boolean getCommentsHelper(YoutubeCommentsExtractor extractor) throws IOException, ExtractionException {
|
||||||
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
|
boolean result = findInComments(comments, commentContent);
|
||||||
|
|
||||||
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
while (comments.hasNextPage() && !result) {
|
||||||
for (CommentsInfoItem c : comments.getItems()) {
|
comments = extractor.getPage(comments.getNextPage());
|
||||||
assertFalse(Utils.isBlank(c.getUploaderUrl()));
|
result = findInComments(comments, commentContent);
|
||||||
assertFalse(Utils.isBlank(c.getUploaderName()));
|
}
|
||||||
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
|
||||||
assertFalse(Utils.isBlank(c.getCommentId()));
|
return result;
|
||||||
assertFalse(Utils.isBlank(c.getCommentText()));
|
|
||||||
assertFalse(Utils.isBlank(c.getName()));
|
|
||||||
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
|
||||||
assertNotNull(c.getUploadDate());
|
|
||||||
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
|
||||||
assertFalse(Utils.isBlank(c.getUrl()));
|
|
||||||
assertFalse(c.getLikeCount() < 0);
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
@Test
|
||||||
return findInComments(comments.getItems(), comment);
|
public void testGetCommentsFromCommentsInfo() throws IOException, ExtractionException {
|
||||||
}
|
assertTrue(getCommentsFromCommentsInfoHelper(urlYT));
|
||||||
|
assertTrue(getCommentsFromCommentsInfoHelper(urlInvidious));
|
||||||
|
}
|
||||||
|
|
||||||
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException, ExtractionException {
|
||||||
for (CommentsInfoItem c : comments) {
|
final CommentsInfo commentsInfo = CommentsInfo.getInfo(url);
|
||||||
if (c.getCommentText().contains(comment)) {
|
|
||||||
return true;
|
assertEquals("Comments", commentsInfo.getName());
|
||||||
|
boolean result = findInComments(commentsInfo.getRelatedItems(), commentContent);
|
||||||
|
|
||||||
|
Page nextPage = commentsInfo.getNextPage();
|
||||||
|
InfoItemsPage<CommentsInfoItem> moreItems = new InfoItemsPage<>(null, nextPage, null);
|
||||||
|
while (moreItems.hasNextPage() && !result) {
|
||||||
|
moreItems = CommentsInfo.getMoreItems(YouTube, commentsInfo, nextPage);
|
||||||
|
result = findInComments(moreItems.getItems(), commentContent);
|
||||||
|
nextPage = moreItems.getNextPage();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||||
|
InfoItemsPage<CommentsInfoItem> comments = extractorYT.getInitialPage();
|
||||||
|
|
||||||
|
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||||
|
for (CommentsInfoItem c : comments.getItems()) {
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderName()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getCommentId()));
|
||||||
|
assertFalse(Utils.isBlank(c.getCommentText()));
|
||||||
|
assertFalse(Utils.isBlank(c.getName()));
|
||||||
|
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
||||||
|
assertNotNull(c.getUploadDate());
|
||||||
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
|
assertFalse(c.getLikeCount() < 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String comment) {
|
||||||
|
return findInComments(comments.getItems(), comment);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
|
||||||
|
for (CommentsInfoItem c : comments) {
|
||||||
|
if (c.getCommentText().contains(comment)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test a video with an empty comment
|
||||||
|
*/
|
||||||
|
public static class EmptyComment {
|
||||||
|
private static YoutubeCommentsExtractor extractor;
|
||||||
|
private final static String url = "https://www.youtube.com/watch?v=VM_6n762j6M";
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
NewPipe.init(DownloaderTestImpl.getInstance());
|
||||||
|
extractor = (YoutubeCommentsExtractor) YouTube
|
||||||
|
.getCommentsExtractor(url);
|
||||||
|
extractor.fetchPage();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetCommentsAllData() throws IOException, ExtractionException {
|
||||||
|
final InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
|
||||||
|
|
||||||
|
DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
|
||||||
|
for (CommentsInfoItem c : comments.getItems()) {
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderName()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getCommentId()));
|
||||||
|
assertFalse(Utils.isBlank(c.getName()));
|
||||||
|
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
|
||||||
|
assertNotNull(c.getUploadDate());
|
||||||
|
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
|
||||||
|
assertFalse(Utils.isBlank(c.getUrl()));
|
||||||
|
assertFalse(c.getLikeCount() < 0);
|
||||||
|
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
|
||||||
|
assertTrue(Utils.isBlank(c.getCommentText()));
|
||||||
|
} else {
|
||||||
|
assertFalse(Utils.isBlank(c.getCommentText()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue