add link parsing for youtube description

This commit is contained in:
Christian Schabesberger 2018-08-16 17:11:18 +02:00
parent aeb813840d
commit 389a87fc89
2 changed files with 23 additions and 2 deletions

View file

@ -18,13 +18,15 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler; import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*; import java.util.*;
/* /*
@ -152,12 +154,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
assertPageFetched(); assertPageFetched();
try { try {
return doc.select("p[id=\"eow-description\"]").first().html(); return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know } catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
throw new ParsingException("Could not get the description", e); throw new ParsingException("Could not get the description", e);
} }
} }
private String parseHtmlAndGetFullLinks(String descriptionHtml)
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
final Document description = Jsoup.parse(descriptionHtml, getUrl());
for(Element a : description.select("a")) {
final URL redirectLink = new URL(
a.attr("abs:href"));
final String link = Parser.compatParseMap(redirectLink.getQuery()).get("q");
a.text(link);
}
return description.select("body").first().html();
}
@Override @Override
public int getAgeLimit() throws ParsingException { public int getAgeLimit() throws ParsingException {
assertPageFetched(); assertPageFetched();

View file

@ -73,6 +73,13 @@ public class YoutubeStreamExtractorDefaultTest {
assertFalse(extractor.getDescription().isEmpty()); assertFalse(extractor.getDescription().isEmpty());
} }
@Test
public void testGetFullLinksInDescriptlion() throws ParsingException {
assertTrue(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQid=yt"));
assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi..."));
System.out.println(extractor.getDescription());
}
@Test @Test
public void testGetUploaderName() throws ParsingException { public void testGetUploaderName() throws ParsingException {
assertNotNull(extractor.getUploaderName()); assertNotNull(extractor.getUploaderName());