Extract stream and search meta info for YouTube

Add method to extract Google webcache URLs.
This commit is contained in:
TobiGr 2020-12-20 19:54:12 +01:00
parent 853a65a1a6
commit bc6de14952
20 changed files with 526 additions and 29 deletions

View file

@ -0,0 +1,76 @@
package org.schabi.newpipe.extractor;
import org.schabi.newpipe.extractor.stream.Description;
import java.io.Serializable;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
public class MetaInfo implements Serializable {
private String title = "";
private Description content;
private List<URL> urls = new ArrayList<>();
private List<String> urlTexts = new ArrayList<>();
public MetaInfo(@Nonnull final String title, @Nonnull final Description content,
@Nonnull final List<URL> urls, @Nonnull final List<String> urlTexts) {
this.title = title;
this.content = content;
this.urls = urls;
this.urlTexts = urlTexts;
}
public MetaInfo() {
}
/**
* @return Title of the info. Can be empty.
*/
@Nonnull
public String getTitle() {
return title;
}
public void setTitle(@Nonnull final String title) {
this.title = title;
}
@Nonnull
public Description getContent() {
return content;
}
public void setContent(@Nonnull final Description content) {
this.content = content;
}
@Nonnull
public List<URL> getUrls() {
return urls;
}
public void setUrls(@Nonnull final List<URL> urls) {
this.urls = urls;
}
public void addUrl(@Nonnull final URL url) {
urls.add(url);
}
@Nonnull
public List<String> getUrlTexts() {
return urlTexts;
}
public void setUrlTexts(@Nonnull final List<String> urlTexts) {
this.urlTexts = urlTexts;
}
public void addUrlText(@Nonnull final String urlText) {
urlTexts.add(urlText);
}
}

View file

@ -2,12 +2,14 @@ package org.schabi.newpipe.extractor.search;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.ListExtractor; import org.schabi.newpipe.extractor.ListExtractor;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import java.util.List;
public abstract class SearchExtractor extends ListExtractor<InfoItem> { public abstract class SearchExtractor extends ListExtractor<InfoItem> {
@ -57,4 +59,15 @@ public abstract class SearchExtractor extends ListExtractor<InfoItem> {
* @return whether the results comes from a corrected query or not. * @return whether the results comes from a corrected query or not.
*/ */
public abstract boolean isCorrectedSearch() throws ParsingException; public abstract boolean isCorrectedSearch() throws ParsingException;
/**
* Meta information about the search query.
* <p>
* Example: on YouTube, if you search for "Covid-19",
* there is a box with information from the WHO about Covid-19 and a link to the WHO's website.
* @return additional meta information about the search query
* @throws ParsingException
*/
@Nonnull
public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
} }

View file

@ -1,20 +1,20 @@
package org.schabi.newpipe.extractor.search; package org.schabi.newpipe.extractor.search;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.*;
import org.schabi.newpipe.extractor.ListExtractor;
import org.schabi.newpipe.extractor.ListInfo;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler; import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.utils.ExtractorHelper; import org.schabi.newpipe.extractor.utils.ExtractorHelper;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import javax.annotation.Nonnull;
public class SearchInfo extends ListInfo<InfoItem> { public class SearchInfo extends ListInfo<InfoItem> {
private String searchString; private String searchString;
private String searchSuggestion; private String searchSuggestion;
private boolean isCorrectedSearch; private boolean isCorrectedSearch;
private List<MetaInfo> metaInfo;
public SearchInfo(int serviceId, public SearchInfo(int serviceId,
SearchQueryHandler qIHandler, SearchQueryHandler qIHandler,
@ -51,6 +51,11 @@ public class SearchInfo extends ListInfo<InfoItem> {
} catch (Exception e) { } catch (Exception e) {
info.addError(e); info.addError(e);
} }
try {
info.setMetaInfo(extractor.getMetaInfo());
} catch (Exception e) {
info.addError(e);
}
ListExtractor.InfoItemsPage<InfoItem> page = ExtractorHelper.getItemsPageOrLogError(info, extractor); ListExtractor.InfoItemsPage<InfoItem> page = ExtractorHelper.getItemsPageOrLogError(info, extractor);
info.setRelatedItems(page.getItems()); info.setRelatedItems(page.getItems());
@ -87,4 +92,13 @@ public class SearchInfo extends ListInfo<InfoItem> {
public void setSearchSuggestion(String searchSuggestion) { public void setSearchSuggestion(String searchSuggestion) {
this.searchSuggestion = searchSuggestion; this.searchSuggestion = searchSuggestion;
} }
@Nonnull
public List<MetaInfo> getMetaInfo() {
return metaInfo;
}
public void setMetaInfo(@Nonnull List<MetaInfo> metaInfo) {
this.metaInfo = metaInfo;
}
} }

View file

@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelInfoItem; import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
@ -20,6 +21,7 @@ import org.schabi.newpipe.extractor.services.media_ccc.extractors.infoItems.Medi
import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferencesListLinkHandlerFactory; import org.schabi.newpipe.extractor.services.media_ccc.linkHandler.MediaCCCConferencesListLinkHandlerFactory;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.List; import java.util.List;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
@ -55,6 +57,12 @@ public class MediaCCCSearchExtractor extends SearchExtractor {
return false; return false;
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() { public InfoItemsPage<InfoItem> getInitialPage() {

View file

@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -301,4 +302,10 @@ public class MediaCCCStreamExtractor extends StreamExtractor {
public List<StreamSegment> getStreamSegments() { public List<StreamSegment> getStreamSegments() {
return Collections.emptyList(); return Collections.emptyList();
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
} }

View file

@ -4,6 +4,7 @@ import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -17,6 +18,8 @@ import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import java.io.IOException; import java.io.IOException;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
@ -42,6 +45,12 @@ public class PeertubeSearchExtractor extends SearchExtractor {
return false; return false;
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
final String pageUrl = getUrl() + "&" + START_KEY + "=0&" + COUNT_KEY + "=" + ITEMS_PER_PAGE; final String pageUrl = getUrl() + "&" + START_KEY + "=0&" + COUNT_KEY + "=" + ITEMS_PER_PAGE;

View file

@ -5,6 +5,7 @@ import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -309,6 +310,12 @@ public class PeertubeStreamExtractor extends StreamExtractor {
return Collections.emptyList(); return Collections.emptyList();
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
private String getRelatedStreamsUrl(final List<String> tags) throws UnsupportedEncodingException { private String getRelatedStreamsUrl(final List<String> tags) throws UnsupportedEncodingException {
final String url = baseUrl + PeertubeSearchQueryHandlerFactory.SEARCH_ENDPOINT; final String url = baseUrl + PeertubeSearchQueryHandlerFactory.SEARCH_ENDPOINT;
final StringBuilder params = new StringBuilder(); final StringBuilder params = new StringBuilder();

View file

@ -8,6 +8,7 @@ import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.InfoItemExtractor; import org.schabi.newpipe.extractor.InfoItemExtractor;
import org.schabi.newpipe.extractor.InfoItemsCollector; import org.schabi.newpipe.extractor.InfoItemsCollector;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -22,6 +23,8 @@ import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.Collections;
import java.util.List;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
@ -47,6 +50,12 @@ public class SoundcloudSearchExtractor extends SearchExtractor {
return false; return false;
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {

View file

@ -6,6 +6,7 @@ import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException; import com.grack.nanojson.JsonParserException;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -327,4 +328,10 @@ public class SoundcloudStreamExtractor extends StreamExtractor {
public List<StreamSegment> getStreamSegments() { public List<StreamSegment> getStreamSegments() {
return Collections.emptyList(); return Collections.emptyList();
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
} }

View file

@ -76,19 +76,16 @@ public class YoutubeParsingHelper {
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id="; private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user="; private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
private static final String[] RECAPTCHA_DETECTION_SELECTORS = { private static boolean isGoogleURL(String url) {
"form[action*=\"/das_captcha\"]", url = extractCachedUrlIfNeeded(url);
"input[name*=\"action_recaptcha_verify\"]" try {
}; final URL u = new URL(url);
final String host = u.getHost();
public static Document parseAndCheckPage(final String url, final Response response) throws ReCaptchaException { return host.startsWith("google.") || host.startsWith("m.google.");
final Document document = Jsoup.parse(response.responseBody(), url); } catch (MalformedURLException e) {
return false;
for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) {
if (!document.select(detectionSelector).isEmpty()) {
throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url);
}
} }
}
return document; return document;
} }
@ -650,4 +647,124 @@ public class YoutubeParsingHelper {
} }
} }
} }
@Nonnull
public static List<MetaInfo> getMetaInfo(final JsonArray contents) throws ParsingException {
final List<MetaInfo> metaInfo = new ArrayList<>();
for (final Object content : contents) {
final JsonObject resultObject = (JsonObject) content;
if (resultObject.has("itemSectionRenderer")) {
for (final Object sectionContentObject :
resultObject.getObject("itemSectionRenderer").getArray("contents")) {
final JsonObject sectionContent = (JsonObject) sectionContentObject;
if (sectionContent.has("infoPanelContentRenderer")) {
metaInfo.add(getInfoPanelContent(sectionContent.getObject("infoPanelContentRenderer")));
}
if (sectionContent.has("clarificationRenderer")) {
metaInfo.add(getClarificationRendererContent(sectionContent.getObject("clarificationRenderer")
));
}
}
}
}
return metaInfo;
}
@Nonnull
private static MetaInfo getInfoPanelContent(final JsonObject infoPanelContentRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final StringBuilder sb = new StringBuilder();
for (final Object paragraph : infoPanelContentRenderer.getArray("paragraphs")) {
if (sb.length() != 0) {
sb.append("<br>");
}
sb.append(YoutubeParsingHelper.getTextFromObject((JsonObject) paragraph));
}
metaInfo.setContent(new Description(sb.toString(), Description.HTML));
if (infoPanelContentRenderer.has("sourceEndpoint")) {
final String metaInfoLinkUrl = YoutubeParsingHelper.getUrlFromNavigationEndpoint(
infoPanelContentRenderer.getObject("sourceEndpoint"));
try {
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(metaInfoLinkUrl))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
infoPanelContentRenderer.getObject("inlineSource"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
return metaInfo;
}
@Nonnull
private static MetaInfo getClarificationRendererContent(final JsonObject clarificationRenderer)
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo();
final String title = YoutubeParsingHelper.getTextFromObject(clarificationRenderer.getObject("contentTitle"));
final String text = YoutubeParsingHelper.getTextFromObject(clarificationRenderer.getObject("text"));
if (title == null || text == null) {
throw new ParsingException("Could not extract clarification renderer content");
}
metaInfo.setTitle(title);
metaInfo.setContent(new Description(text, Description.PLAIN_TEXT));
if (clarificationRenderer.has("actionButton")) {
final JsonObject actionButton = clarificationRenderer.getObject("actionButton")
.getObject("buttonRenderer");
try {
final String url = YoutubeParsingHelper.getUrlFromNavigationEndpoint(actionButton.getObject("command"));
metaInfo.addUrl(new URL(Objects.requireNonNull(extractCachedUrlIfNeeded(url))));
} catch (final NullPointerException | MalformedURLException e) {
throw new ParsingException("Could not get metadata info URL", e);
}
final String metaInfoLinkText = YoutubeParsingHelper.getTextFromObject(
actionButton.getObject("text"));
if (isNullOrEmpty(metaInfoLinkText)) {
throw new ParsingException("Could not get metadata info link text.");
}
metaInfo.addUrlText(metaInfoLinkText);
}
if (clarificationRenderer.has("secondaryEndpoint") && clarificationRenderer.has("secondarySource")) {
final String url = getUrlFromNavigationEndpoint(clarificationRenderer.getObject("secondaryEndpoint"));
// ignore Google URLs, because those point to a Google search about "Covid-19"
if (url != null && !isGoogleURL(url)) {
try {
metaInfo.addUrl(new URL(url));
final String description = getTextFromObject(clarificationRenderer.getObject("secondarySource"));
metaInfo.addUrlText(description == null ? url : description);
} catch (MalformedURLException e) {
throw new ParsingException("Could not get metadata info secondary URL", e);
}
}
}
return metaInfo;
}
/**
* Sometimes, YouTube provides URLs which use Google's cache. They look like
* {@code https://webcache.googleusercontent.com/search?q=cache:CACHED_URL}
* @param url the URL which might refer to the Google's webcache
* @return the URL which is referring to the original site
*/
public static String extractCachedUrlIfNeeded(final String url) {
if (url == null) {
return null;
}
if (url.contains("webcache.googleusercontent.com")) {
return url.split("cache:")[1];
}
return url;
}
} }

View file

@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter; import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -163,6 +164,12 @@ public class YoutubeMusicSearchExtractor extends SearchExtractor {
return !showingResultsForRenderer.isEmpty(); return !showingResultsForRenderer.isEmpty();
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() {
return Collections.emptyList();
}
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException, IOException { public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException, IOException {

View file

@ -7,6 +7,7 @@ import com.grack.nanojson.JsonParserException;
import com.grack.nanojson.JsonWriter; import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -16,13 +17,11 @@ import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser; import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector; import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
import java.io.IOException; import java.io.IOException;
import java.util.Collections; import java.util.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
@ -106,6 +105,13 @@ public class YoutubeSearchExtractor extends SearchExtractor {
return !showingResultsForRenderer.isEmpty(); return !showingResultsForRenderer.isEmpty();
} }
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(
initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents"));
}
@Nonnull @Nonnull
@Override @Override
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException { public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {

View file

@ -13,6 +13,7 @@ import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader; import org.schabi.newpipe.extractor.downloader.Downloader;
@ -45,6 +46,9 @@ import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull; import javax.annotation.Nonnull;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
@ -1118,4 +1122,12 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
return segments; return segments;
} }
@Nonnull
@Override
public List<MetaInfo> getMetaInfo() throws ParsingException {
return YoutubeParsingHelper.getMetaInfo(
initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents"));
}
} }

View file

@ -22,6 +22,7 @@ package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.Extractor; import org.schabi.newpipe.extractor.Extractor;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor; import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -486,4 +487,18 @@ public abstract class StreamExtractor extends Extractor {
*/ */
@Nonnull @Nonnull
public abstract List<StreamSegment> getStreamSegments() throws ParsingException; public abstract List<StreamSegment> getStreamSegments() throws ParsingException;
/**
* Meta information about the stream.
* <p>
* This can be information about the stream creator (e.g. if the creator is a public broadcaster)
* or further information on the topic (e.g. hints that the video might contain conspiracy theories
* or contains information about a current health situation like the Covid-19 pandemic).
* </p>
* The meta information often contains links to external sources like Wikipedia or the WHO.
* @return The meta info of the stream or an empty List if not provided.
* @throws ParsingException
*/
@Nonnull
public abstract List<MetaInfo> getMetaInfo() throws ParsingException;
} }

View file

@ -1,9 +1,6 @@
package org.schabi.newpipe.extractor.stream; package org.schabi.newpipe.extractor.stream;
import org.schabi.newpipe.extractor.Info; import org.schabi.newpipe.extractor.*;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException; import org.schabi.newpipe.extractor.exceptions.ContentNotSupportedException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException; import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -13,9 +10,12 @@ import org.schabi.newpipe.extractor.utils.ExtractorHelper;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
/* /*
@ -329,6 +329,11 @@ public class StreamInfo extends Info {
} catch (Exception e) { } catch (Exception e) {
streamInfo.addError(e); streamInfo.addError(e);
} }
try {
streamInfo.setMetaInfo(extractor.getMetaInfo());
} catch (Exception e) {
streamInfo.addError(e);
}
streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor)); streamInfo.setRelatedStreams(ExtractorHelper.getRelatedVideosOrLogError(streamInfo, extractor));
@ -379,6 +384,7 @@ public class StreamInfo extends Info {
private Locale language = null; private Locale language = null;
private List<String> tags = new ArrayList<>(); private List<String> tags = new ArrayList<>();
private List<StreamSegment> streamSegments = new ArrayList<>(); private List<StreamSegment> streamSegments = new ArrayList<>();
private List<MetaInfo> metaInfo = new ArrayList<>();
/** /**
* Get the stream type * Get the stream type
@ -684,4 +690,13 @@ public class StreamInfo extends Info {
public void setStreamSegments(List<StreamSegment> streamSegments) { public void setStreamSegments(List<StreamSegment> streamSegments) {
this.streamSegments = streamSegments; this.streamSegments = streamSegments;
} }
public void setMetaInfo(final List<MetaInfo> metaInfo) {
this.metaInfo = metaInfo;
}
@Nonnull
public List<MetaInfo> getMetaInfo() {
return this.metaInfo;
}
} }

View file

@ -1,12 +1,20 @@
package org.schabi.newpipe.extractor.services; package org.schabi.newpipe.extractor.services;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.schabi.newpipe.extractor.ExtractorAsserts.assertEmpty; import static org.schabi.newpipe.extractor.ExtractorAsserts.assertEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
@ -20,6 +28,10 @@ public abstract class DefaultSearchExtractorTest extends DefaultListExtractorTes
return false; return false;
} }
public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
return Collections.emptyList();
}
@Test @Test
@Override @Override
public void testSearchString() throws Exception { public void testSearchString() throws Exception {
@ -41,4 +53,34 @@ public abstract class DefaultSearchExtractorTest extends DefaultListExtractorTes
public void testSearchCorrected() throws Exception { public void testSearchCorrected() throws Exception {
assertEquals(isCorrectedSearch(), extractor().isCorrectedSearch()); assertEquals(isCorrectedSearch(), extractor().isCorrectedSearch());
} }
/**
* @see DefaultStreamExtractorTest#testMetaInfo()
*/
@Test
public void testMetaInfo() throws Exception {
final List<MetaInfo> metaInfoList = extractor().getMetaInfo();
final List<MetaInfo> expectedMetaInfoList = expectedMetaInfo();
for (final MetaInfo expectedMetaInfo : expectedMetaInfoList) {
final List<String> texts = metaInfoList.stream()
.map(metaInfo -> metaInfo.getContent().getContent())
.collect(Collectors.toList());
final List<String> titles = metaInfoList.stream().map(MetaInfo::getTitle).collect(Collectors.toList());
final List<URL> urls = metaInfoList.stream().flatMap(info -> info.getUrls().stream())
.collect(Collectors.toList());
final List<String> urlTexts = metaInfoList.stream().flatMap(info -> info.getUrlTexts().stream())
.collect(Collectors.toList());
assertTrue(texts.contains(expectedMetaInfo.getContent().getContent()));
assertTrue(titles.contains(expectedMetaInfo.getTitle()));
for (final String expectedUrlText : expectedMetaInfo.getUrlTexts()) {
assertTrue(urlTexts.contains(expectedUrlText));
}
for (final URL expectedUrl : expectedMetaInfo.getUrls()) {
assertTrue(urls.contains(expectedUrl));
}
}
}
} }

View file

@ -2,6 +2,7 @@ package org.schabi.newpipe.extractor.services;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.extractor.MediaFormat; import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.stream.AudioStream; import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.stream.Description;
@ -15,9 +16,12 @@ import org.schabi.newpipe.extractor.stream.VideoStream;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.stream.Collectors;
import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
@ -67,6 +71,7 @@ public abstract class DefaultStreamExtractorTest extends DefaultExtractorTest<St
public List<String> expectedTags() { return Collections.emptyList(); } // default: no tags public List<String> expectedTags() { return Collections.emptyList(); } // default: no tags
public String expectedSupportInfo() { return ""; } // default: no support info available public String expectedSupportInfo() { return ""; } // default: no support info available
public int expectedStreamSegmentsCount() { return -1; } // return 0 or greater to test (default is -1 to ignore) public int expectedStreamSegmentsCount() { return -1; } // return 0 or greater to test (default is -1 to ignore)
public List<MetaInfo> expectedMetaInfo() throws MalformedURLException { return Collections.emptyList(); } // default: no metadata info available
@Test @Test
@Override @Override
@ -387,4 +392,35 @@ public abstract class DefaultStreamExtractorTest extends DefaultExtractorTest<St
assertEquals(expectedStreamSegmentsCount(), extractor().getStreamSegments().size()); assertEquals(expectedStreamSegmentsCount(), extractor().getStreamSegments().size());
} }
} }
/**
* @see DefaultSearchExtractorTest#testMetaInfo()
*/
@Test
public void testMetaInfo() throws Exception {
final List<MetaInfo> metaInfoList = extractor().getMetaInfo();
final List<MetaInfo> expectedMetaInfoList = expectedMetaInfo();
for (final MetaInfo expectedMetaInfo : expectedMetaInfoList) {
final List<String> texts = metaInfoList.stream()
.map((metaInfo) -> metaInfo.getContent().getContent())
.collect(Collectors.toList());
final List<String> titles = metaInfoList.stream().map(MetaInfo::getTitle).collect(Collectors.toList());
final List<URL> urls = metaInfoList.stream().flatMap(info -> info.getUrls().stream())
.collect(Collectors.toList());
final List<String> urlTexts = metaInfoList.stream().flatMap(info -> info.getUrlTexts().stream())
.collect(Collectors.toList());
assertTrue(texts.contains(expectedMetaInfo.getContent().getContent()));
assertTrue(titles.contains(expectedMetaInfo.getTitle()));
for (final String expectedUrlText : expectedMetaInfo.getUrlTexts()) {
assertTrue(urlTexts.contains(expectedUrlText));
}
for (final URL expectedUrl : expectedMetaInfo.getUrls()) {
assertTrue(urls.contains(expectedUrl));
}
}
}
} }

View file

@ -36,4 +36,12 @@ public class YoutubeParsingHelperTest {
assertEquals(4445767, YoutubeParsingHelper.parseDurationString("1,234:56:07")); assertEquals(4445767, YoutubeParsingHelper.parseDurationString("1,234:56:07"));
assertEquals(754, YoutubeParsingHelper.parseDurationString("12:34 ")); assertEquals(754, YoutubeParsingHelper.parseDurationString("12:34 "));
} }
@Test
public void testConvertFromGoogleCacheUrl() throws ParsingException {
assertEquals("https://mohfw.gov.in/",
YoutubeParsingHelper.extractCachedUrlIfNeeded("https://webcache.googleusercontent.com/search?q=cache:https://mohfw.gov.in/"));
assertEquals("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html",
YoutubeParsingHelper.extractCachedUrlIfNeeded("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html"));
}
} }

View file

@ -3,15 +3,21 @@ package org.schabi.newpipe.extractor.services.youtube.search;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.*;
import org.schabi.newpipe.extractor.ListExtractor;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.search.SearchExtractor; import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.DefaultSearchExtractorTest; import org.schabi.newpipe.extractor.services.DefaultSearchExtractorTest;
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
import org.schabi.newpipe.extractor.stream.Description;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static java.util.Collections.singletonList; import static java.util.Collections.singletonList;
import static junit.framework.TestCase.assertFalse; import static junit.framework.TestCase.assertFalse;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
@ -211,4 +217,39 @@ public class YoutubeSearchExtractorTest {
assertNoDuplicatedItems(YouTube, page1, page2); assertNoDuplicatedItems(YouTube, page1, page2);
} }
} }
public static class MetaInfoTest extends DefaultSearchExtractorTest {
private static SearchExtractor extractor;
private static final String QUERY = "Covid";
@Test
public void clarificationTest() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = YouTube.getSearchExtractor(QUERY, singletonList(VIDEOS), "");
extractor.fetchPage();
}
@Override public String expectedSearchString() { return QUERY; }
@Override public String expectedSearchSuggestion() { return null; }
@Override public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
final List<URL> urls = new ArrayList<>();
urls.add(new URL("https://www.who.int/emergencies/diseases/novel-coronavirus-2019"));
urls.add(new URL("https://www.who.int/emergencies/diseases/novel-coronavirus-2019/covid-19-vaccines"));
final List<String> urlTexts = new ArrayList<>();
urlTexts.add("LEARN MORE");
urlTexts.add("Learn about vaccine progress from the WHO");
return Collections.singletonList(new MetaInfo(
"COVID-19",
new Description("Get the latest information from the WHO about coronavirus.", Description.PLAIN_TEXT),
urls,
urlTexts
));
}
@Override public SearchExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return QUERY; }
@Override public String expectedId() { return QUERY; }
@Override public String expectedUrlContains() { return "youtube.com/results?search_query=" + QUERY; }
@Override public String expectedOriginalUrlContains() throws Exception { return "youtube.com/results?search_query=" + QUERY; }
}
} }

View file

@ -3,16 +3,22 @@ package org.schabi.newpipe.extractor.services.youtube.stream;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.schabi.newpipe.DownloaderTestImpl; import org.schabi.newpipe.DownloaderTestImpl;
import org.schabi.newpipe.extractor.MetaInfo;
import org.schabi.newpipe.extractor.NewPipe; import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService; import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException; import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.DefaultStreamExtractorTest; import org.schabi.newpipe.extractor.services.DefaultStreamExtractorTest;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamExtractor; import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamSegment; import org.schabi.newpipe.extractor.stream.StreamSegment;
import org.schabi.newpipe.extractor.stream.StreamType; import org.schabi.newpipe.extractor.stream.StreamType;
import javax.annotation.Nullable;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
import javax.annotation.Nullable; import javax.annotation.Nullable;
@ -258,4 +264,46 @@ public class YoutubeStreamExtractorDefaultTest {
assertNotNull(segment.getPreviewUrl()); assertNotNull(segment.getPreviewUrl());
} }
} }
public static class PublicBroadcasterTest extends DefaultStreamExtractorTest {
private static final String ID = "q6fgbYWsMgw";
private static final int TIMESTAMP = 0;
private static final String URL = BASE_URL + ID;
private static StreamExtractor extractor;
@BeforeClass
public static void setUp() throws Exception {
NewPipe.init(DownloaderTestImpl.getInstance());
extractor = YouTube.getStreamExtractor(URL);
extractor.fetchPage();
}
@Override public StreamExtractor extractor() { return extractor; }
@Override public StreamingService expectedService() { return YouTube; }
@Override public String expectedName() { return "Was verbirgt sich am tiefsten Punkt des Ozeans?"; }
@Override public String expectedId() { return ID; }
@Override public String expectedUrlContains() { return BASE_URL + ID; }
@Override public String expectedOriginalUrlContains() { return URL; }
@Override public StreamType expectedStreamType() { return StreamType.VIDEO_STREAM; }
@Override public String expectedUploaderName() { return "Dinge Erklärt Kurzgesagt"; }
@Override public String expectedUploaderUrl() { return "https://www.youtube.com/channel/UCwRH985XgMYXQ6NxXDo8npw"; }
@Override public List<String> expectedDescriptionContains() { return Arrays.asList("Lasst uns abtauchen!", "Angebot von funk", "Dinge"); }
@Override public long expectedLength() { return 631; }
@Override public long expectedTimestamp() { return TIMESTAMP; }
@Override public long expectedViewCountAtLeast() { return 1_600_000; }
@Nullable @Override public String expectedUploadDate() { return "2019-06-12 00:00:00.000"; }
@Nullable @Override public String expectedTextualUploadDate() { return "2019-06-12"; }
@Override public long expectedLikeCountAtLeast() { return 70000; }
@Override public long expectedDislikeCountAtLeast() { return 500; }
@Override public List<MetaInfo> expectedMetaInfo() throws MalformedURLException {
return Collections.singletonList(new MetaInfo(
"",
new Description("Funk is a German public broadcast service.", Description.PLAIN_TEXT),
Collections.singletonList(new URL("https://de.wikipedia.org/wiki/Funk_(Medienangebot)?wprov=yicw1")),
Collections.singletonList("Wikipedia (German)")
));
}
}
} }