From bb5ad49face622b301739ccba9161a86d5adb32f Mon Sep 17 00:00:00 2001 From: yausername <13ritvik@gmail.com> Date: Sat, 16 Nov 2019 03:20:35 +0530 Subject: [PATCH] fix html comments --- .../extractors/PeertubeCommentsInfoItemExtractor.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java index 1e6df770..cfe3e6aa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java @@ -5,6 +5,8 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Locale; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; import org.schabi.newpipe.extractor.ServiceList; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; @@ -59,7 +61,12 @@ public class PeertubeCommentsInfoItemExtractor implements CommentsInfoItemExtrac @Override public String getCommentText() throws ParsingException { String htmlText = JsonUtils.getString(item, "text"); - return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); + try { + Document doc = Jsoup.parse(htmlText); + return doc.body().text(); + }catch(Exception e) { + return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); + } } @Override