From 389a87fc897aa44ac6347de41deb02b0fe9a5e1d Mon Sep 17 00:00:00 2001 From: Christian Schabesberger Date: Thu, 16 Aug 2018 17:11:18 +0200 Subject: [PATCH] add link parsing for youtube description --- .../extractors/YoutubeStreamExtractor.java | 18 ++++++++++++++++-- .../YoutubeStreamExtractorDefaultTest.java | 7 +++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java index bd44ecf3..4709f257 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java @@ -18,13 +18,15 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.stream.*; -import org.schabi.newpipe.extractor.utils.DonationLinkHelper; import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; +import java.net.URL; import java.util.*; /* @@ -152,12 +154,24 @@ public class YoutubeStreamExtractor extends StreamExtractor { public String getDescription() throws ParsingException { assertPageFetched(); try { - return doc.select("p[id=\"eow-description\"]").first().html(); + return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()); } catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know throw new ParsingException("Could not get the description", e); } } + private String parseHtmlAndGetFullLinks(String descriptionHtml) + throws MalformedURLException, UnsupportedEncodingException, ParsingException { + final Document description = Jsoup.parse(descriptionHtml, getUrl()); + for(Element a : description.select("a")) { + final URL redirectLink = new URL( + a.attr("abs:href")); + final String link = Parser.compatParseMap(redirectLink.getQuery()).get("q"); + a.text(link); + } + return description.select("body").first().html(); + } + @Override public int getAgeLimit() throws ParsingException { assertPageFetched(); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java index 982ceb98..669032ec 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeStreamExtractorDefaultTest.java @@ -73,6 +73,13 @@ public class YoutubeStreamExtractorDefaultTest { assertFalse(extractor.getDescription().isEmpty()); } + @Test + public void testGetFullLinksInDescriptlion() throws ParsingException { + assertTrue(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQid=yt")); + assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi...")); + System.out.println(extractor.getDescription()); + } + @Test public void testGetUploaderName() throws ParsingException { assertNotNull(extractor.getUploaderName());