add link parsing for youtube description
This commit is contained in:
parent
aeb813840d
commit
389a87fc89
2 changed files with 23 additions and 2 deletions
|
@ -18,13 +18,15 @@ import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||||
import org.schabi.newpipe.extractor.stream.*;
|
import org.schabi.newpipe.extractor.stream.*;
|
||||||
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
|
|
||||||
import org.schabi.newpipe.extractor.utils.Parser;
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
import org.schabi.newpipe.extractor.utils.Utils;
|
import org.schabi.newpipe.extractor.utils.Utils;
|
||||||
|
|
||||||
import javax.annotation.Nonnull;
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -152,12 +154,24 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public String getDescription() throws ParsingException {
|
public String getDescription() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
try {
|
try {
|
||||||
return doc.select("p[id=\"eow-description\"]").first().html();
|
return parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html());
|
||||||
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
|
} catch (Exception e) {//todo: add fallback method <-- there is no ... as long as i know
|
||||||
throw new ParsingException("Could not get the description", e);
|
throw new ParsingException("Could not get the description", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String parseHtmlAndGetFullLinks(String descriptionHtml)
|
||||||
|
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
|
||||||
|
final Document description = Jsoup.parse(descriptionHtml, getUrl());
|
||||||
|
for(Element a : description.select("a")) {
|
||||||
|
final URL redirectLink = new URL(
|
||||||
|
a.attr("abs:href"));
|
||||||
|
final String link = Parser.compatParseMap(redirectLink.getQuery()).get("q");
|
||||||
|
a.text(link);
|
||||||
|
}
|
||||||
|
return description.select("body").first().html();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getAgeLimit() throws ParsingException {
|
public int getAgeLimit() throws ParsingException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
|
|
|
@ -73,6 +73,13 @@ public class YoutubeStreamExtractorDefaultTest {
|
||||||
assertFalse(extractor.getDescription().isEmpty());
|
assertFalse(extractor.getDescription().isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetFullLinksInDescriptlion() throws ParsingException {
|
||||||
|
assertTrue(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQid=yt"));
|
||||||
|
assertFalse(extractor.getDescription().contains("http://smarturl.it/SubscribeAdele?IQi..."));
|
||||||
|
System.out.println(extractor.getDescription());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetUploaderName() throws ParsingException {
|
public void testGetUploaderName() throws ParsingException {
|
||||||
assertNotNull(extractor.getUploaderName());
|
assertNotNull(extractor.getUploaderName());
|
||||||
|
|
Loading…
Reference in a new issue