Merge pull request #261 from TeamNewPipe/yt_new

Update YouTube to material version
This commit is contained in:
Tobias Groza 2020-02-25 21:57:18 +01:00 committed by GitHub
commit 8838e2d136
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 871 additions and 1036 deletions

View file

@ -1,11 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
import org.schabi.newpipe.extractor.downloader.Downloader;
@ -17,11 +17,18 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
/*
* Created by Christian Schabesberger on 25.07.16.
@ -49,6 +56,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private Document doc;
private JsonObject initialData;
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler);
@ -59,11 +67,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
final Response response = downloader.get(channelUrl, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
}
@Override
public String getNextPageUrl() throws ExtractionException {
return getNextPageUrlFrom(doc);
return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
}
@Nonnull
@ -80,15 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getId() throws ParsingException {
try {
return doc.select("meta[itemprop=\"channelId\"]").first().attr("content");
} catch (Exception ignored) {}
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
try {
Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
return element.attr("data-channel-external-id");
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId");
} catch (Exception e) {
throw new ParsingException("Could not get channel id", e);
}
@ -98,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getName() throws ParsingException {
try {
return doc.select("meta[property=\"og:title\"]").first().attr("content");
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title");
} catch (Exception e) {
throw new ParsingException("Could not get channel name", e);
}
@ -107,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getAvatarUrl() throws ParsingException {
try {
return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get avatar", e);
}
@ -116,13 +119,27 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getBannerUrl() throws ParsingException {
try {
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
String cssContent = el.html();
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
String url = null;
try {
url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
return null;
}
// the first characters of the banner URLs are different for each channel and some are not even valid URLs
if (url.startsWith("//")) {
url = url.substring(2);
}
if (url.startsWith(HTTP)) {
url = Utils.replaceHttpWithHttps(url);
} else if (!url.startsWith(HTTPS)) {
url = HTTPS + url;
}
return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
return url;
} catch (Exception e) {
throw new ParsingException("Could not get Banner", e);
throw new ParsingException("Could not get banner", e);
}
}
@ -137,12 +154,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public long getSubscriberCount() throws ParsingException {
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
if (el != null) {
String elTitle = el.attr("title");
final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
if (subscriberInfo != null) {
try {
return Utils.mixedNumberWordToLong(elTitle);
return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text"));
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
@ -155,7 +170,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public String getDescription() throws ParsingException {
try {
return doc.select("meta[name=\"description\"]").first().attr("content");
return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
} catch (Exception e) {
throw new ParsingException("Could not get channel description", e);
}
@ -165,8 +180,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
collectStreamsFrom(collector, ul);
JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl());
}
@ -181,106 +198,98 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
fetchPage();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonObject ajaxJson;
JsonArray ajaxJson;
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
try {
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
ajaxJson = JsonParser.object().from(response);
} catch (JsonParserException pe) {
throw new ParsingException("Could not parse json data for next streams", pe);
// Use the hardcoded client version first to get JSON with a structure we know
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
}
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
collectStreamsFrom(collector, ajaxHtml.select("body").first());
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("sectionListContinuation");
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
}
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
throws ParsingException {
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
if (!loadMoreHtmlDataRaw.isEmpty()) {
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
} else {
private String getNextPageUrlFrom(JsonArray continuations) {
if (continuations == null) {
return "";
}
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
}
private String getNextPageUrlFrom(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes channels are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("Could not get next page url", e);
}
}
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
collector.reset();
final String uploaderName = getName();
final String uploaderUrl = getUrl();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (final Element li : element.children()) {
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
for (Object video : videos) {
JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
.getArray("contents").getObject(0);
if (videoInfo.getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
try {
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.text();
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
public String getUploaderName() {
return uploaderName;
}
@Override
public String getUploaderUrl() throws ParsingException {
public String getUploaderUrl() {
return uploaderUrl;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
String url;
Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
}
}
}
private JsonObject getVideoTab() throws ParsingException {
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs");
JsonObject videoTab = null;
for (Object tab : tabs) {
if (((JsonObject) tab).getObject("tabRenderer") != null) {
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
videoTab = ((JsonObject) tab).getObject("tabRenderer");
break;
}
}
}
if (videoTab == null) {
throw new ParsingException("Could not find Videos tab");
}
return videoTab;
}
}

View file

@ -1,12 +1,14 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
/*
* Created by Christian Schabesberger on 12.02.17.
@ -29,87 +31,75 @@ import java.util.regex.Pattern;
*/
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
private final Element el;
private JsonObject channelInfoItem;
public YoutubeChannelInfoItemExtractor(Element el) {
this.el = el;
public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) {
this.channelInfoItem = channelInfoItem;
}
@Override
public String getThumbnailUrl() throws ParsingException {
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first()
.select("img").first();
String url = img.attr("abs:src");
if (url.contains("gif")) {
url = img.attr("abs:data-thumb");
try {
String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
if (url.startsWith("//")) {
url = url.substring(2);
}
if (url.startsWith(HTTP)) {
url = Utils.replaceHttpWithHttps(url);
} else if (!url.startsWith(HTTPS)) {
url = HTTPS + url;
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
return url;
}
@Override
public String getName() throws ParsingException {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.text();
try {
return channelInfoItem.getObject("title").getString("simpleText");
} catch (Exception e) {
throw new ParsingException("Could not get name", e);
}
}
@Override
public String getUrl() throws ParsingException {
try {
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
.attr("abs:data-href");
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
if (match.matches()) {
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
}
} catch(Exception ignored) {}
// fallback method for channels without "Subscribe" button (or just in case yt changes things)
// provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail
try {
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
.attr("abs:href");
String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work?
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) {
throw new ParsingException("Could not get channel url", e);
throw new ParsingException("Could not get url", e);
}
}
@Override
public long getSubscriberCount() throws ParsingException {
final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first();
if (subsEl != null) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text()));
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
} else {
// If the element is null, the channel have the subscriber count disabled
return -1;
try {
String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0];
return Utils.mixedNumberWordToLong(subscribers);
} catch (Exception e) {
throw new ParsingException("Could not get subscriber count", e);
}
}
@Override
public long getStreamCount() throws ParsingException {
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first();
if (metaEl == null) {
return 0;
} else {
return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text()));
try {
return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText")
.getArray("runs").getObject(0).getString("text")));
} catch (Exception e) {
throw new ParsingException("Could not get stream count", e);
}
}
@Override
public String getDescription() throws ParsingException {
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first();
if (desEl == null) {
return "";
} else {
return desEl.text();
try {
return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Could not get description", e);
}
}
}

View file

@ -1,34 +1,39 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
@SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor {
private Document doc;
private JsonObject initialData;
private JsonObject uploaderInfo;
private JsonObject playlistInfo;
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler);
@ -39,18 +44,61 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
uploaderInfo = getUploaderInfo();
playlistInfo = getPlaylistInfo();
}
private JsonObject getUploaderInfo() throws ParsingException {
JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items");
try {
JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner").getObject("videoOwnerRenderer");
if (uploaderInfo != null) {
return uploaderInfo;
}
} catch (Exception ignored) {}
// we might want to create a loop here instead of using duplicated code
try {
JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer")
.getObject("videoOwner").getObject("videoOwnerRenderer");
if (uploaderInfo != null) {
return uploaderInfo;
}
} catch (Exception e) {
throw new ParsingException("Could not get uploader info", e);
}
throw new ParsingException("Could not get uploader info");
}
private JsonObject getPlaylistInfo() throws ParsingException {
try {
return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items")
.getObject(0).getObject("playlistSidebarPrimaryInfoRenderer");
} catch (Exception e) {
throw new ParsingException("Could not get PlaylistInfo", e);
}
}
@Override
public String getNextPageUrl() throws ExtractionException {
return getNextPageUrlFrom(doc);
public String getNextPageUrl() {
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("playlistVideoListRenderer").getArray("continuations"));
}
@Nonnull
@Override
public String getName() throws ParsingException {
try {
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text");
if (name != null) return name;
} catch (Exception ignored) {}
try {
return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title");
} catch (Exception e) {
throw new ParsingException("Could not get playlist name", e);
}
@ -59,7 +107,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer")
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
try {
return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail")
.getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get playlist thumbnail", e);
}
@ -75,8 +128,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
public String getUploaderUrl() throws ParsingException {
try {
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
doc.select("button[class*=\"yt-uix-subscription-button\"]")
.first().attr("data-channel-external-id");
uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader url", e);
}
@ -85,7 +137,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public String getUploaderName() throws ParsingException {
try {
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader name", e);
}
@ -94,7 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public String getUploaderAvatarUrl() throws ParsingException {
try {
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get playlist uploader avatar", e);
}
@ -102,33 +154,26 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
@Override
public long getStreamCount() throws ParsingException {
String input;
try {
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
} catch (IndexOutOfBoundsException e) {
String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text");
return Long.parseLong(Utils.removeNonDigitCharacters(viewsText));
} catch (Exception e) {
throw new ParsingException("Could not get video count from playlist", e);
}
try {
return Long.parseLong(Utils.removeNonDigitCharacters(input));
} catch (NumberFormatException e) {
// When there's no videos in a playlist, there's no number in the "innerHtml",
// all characters that is not a number is removed, so we try to parse a empty string
if (!input.isEmpty()) {
return 0;
} else {
throw new ParsingException("Could not handle input: " + input, e);
}
}
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
public InfoItemsPage<StreamInfoItem> getInitialPage() {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
collectStreamsFrom(collector, tbody);
JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("playlistVideoListRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl());
}
@ -139,156 +184,67 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
}
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonObject pageJson;
JsonArray ajaxJson;
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
try {
final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
pageJson = JsonParser.object().from(responseBody);
} catch (JsonParserException pe) {
throw new ParsingException("Could not parse ajax json", pe);
// Use the hardcoded client version first to get JSON with a structure we know
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
}
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
+ pageJson.getString("content_html")
+ "</tbody></table>", pageUrl);
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("playlistVideoListContinuation");
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
}
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
throws ParsingException {
String nextPageHtml = pageJson.getString("load_more_widget_html");
if (!nextPageHtml.isEmpty()) {
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
} else {
private String getNextPageUrlFrom(JsonArray continuations) {
if (continuations == null) {
return "";
}
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
}
private String getNextPageUrlFrom(Document d) throws ParsingException {
try {
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
if (button != null) {
return button.attr("abs:data-uix-load-more-href");
} else {
// Sometimes playlists are simply so small, they don't have a more streams/videos
return "";
}
} catch (Exception e) {
throw new ParsingException("could not get next streams' url", e);
}
}
private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) {
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) {
collector.reset();
if (element == null) {
return;
}
final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (final Element li : element.children()) {
if (isDeletedItem(li)) {
continue;
for (Object video : videos) {
if (((JsonObject) video).getObject("playlistVideoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {
@Override
public long getViewCount() {
return -1;
}
});
}
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
public Element uploaderLink;
@Override
public boolean isAd() {
return false;
}
@Override
public String getUrl() throws ParsingException {
try {
return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl();
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
return li.attr("data-title");
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
}
}
@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
Element first = li.select("div[class=\"timestamp\"] span").first();
if (first == null) {
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
// because in other cases, those videos don't even show up
return -1;
}
return YoutubeParsingHelper.parseDurationString(first.text());
} catch (Exception e) {
throw new ParsingException("Could not get duration" + getUrl(), e);
}
}
private Element getUploaderLink() {
// should always be present since we filter deleted items
if (uploaderLink == null) {
uploaderLink = li.select("div[class=pl-video-owner] a").first();
}
return uploaderLink;
}
@Override
public String getUploaderName() throws ParsingException {
return getUploaderLink().text();
}
@Override
public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
return getUploaderLink().attr("abs:href");
}
@Override
public String getTextualUploadDate() throws ParsingException {
return "";
}
@Override
public long getViewCount() throws ParsingException {
return -1;
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg";
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
}
}
/**
* Check if the playlist item is deleted
*
* @param li the list item
* @return true if the item is deleted
*/
private boolean isDeletedItem(Element li) {
return li.select("div[class=pl-video-owner] a").isEmpty();
}
}

View file

@ -1,97 +1,63 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
import org.schabi.newpipe.extractor.utils.Utils;
public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor {
private final Element el;
private JsonObject playlistInfoItem;
public YoutubePlaylistInfoItemExtractor(Element el) {
this.el = el;
public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) {
this.playlistInfoItem = playlistInfoItem;
}
@Override
public String getThumbnailUrl() throws ParsingException {
String url;
try {
Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first()
.select("img").first();
url = te.attr("abs:src");
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails")
.getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Failed to extract playlist thumbnail url", e);
throw new ParsingException("Could not get thumbnail url", e);
}
return url;
}
@Override
public String getName() throws ParsingException {
String name;
try {
final Element title = el.select("[class=\"yt-lockup-title\"]").first()
.select("a").first();
name = title == null ? "" : title.text();
return playlistInfoItem.getObject("title").getString("simpleText");
} catch (Exception e) {
throw new ParsingException("Failed to extract playlist name", e);
throw new ParsingException("Could not get name", e);
}
return name;
}
@Override
public String getUrl() throws ParsingException {
try {
final Element a = el.select("div[class=\"yt-lockup-meta\"]")
.select("ul[class=\"yt-lockup-meta-info\"]")
.select("li").select("a").first();
if (a != null) {
return a.attr("abs:href");
}
// this is for yt premium playlists
return el.select("h3[class=\"yt-lockup-title\"").first()
.select("a").first()
.attr("abs:href");
String id = playlistInfoItem.getString("playlistId");
return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) {
throw new ParsingException("Failed to extract playlist url", e);
throw new ParsingException("Could not get url", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
String name;
try {
final Element div = el.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first();
name = div.text();
return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Failed to extract playlist uploader", e);
throw new ParsingException("Could not get uploader name", e);
}
return name;
}
@Override
public long getStreamCount() throws ParsingException {
try {
final Element count = el.select("span[class=\"formatted-video-count-label\"]").first()
.select("b").first();
return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text()));
return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount")));
} catch (Exception e) {
throw new ParsingException("Failed to extract playlist stream count", e);
throw new ParsingException("Could not get stream count", e);
}
}
}

View file

@ -1,8 +1,11 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.Jsoup;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
@ -14,13 +17,14 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.Parser;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
/*
* Created by Christian Schabesberger on 22.07.2018
@ -45,6 +49,7 @@ import java.net.URL;
public class YoutubeSearchExtractor extends SearchExtractor {
private Document doc;
private JsonObject initialData;
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
super(service, linkHandler);
@ -55,6 +60,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
}
@Nonnull
@ -65,80 +71,109 @@ public class YoutubeSearchExtractor extends SearchExtractor {
@Override
public String getSearchSuggestion() {
final Element el = doc.select("div[class*=\"spell-correction\"]").first();
if (el != null) {
return el.select("a").first().text();
} else {
JsonObject showingResultsForRenderer = initialData.getObject("contents")
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
.getObject("sectionListRenderer").getArray("contents").getObject(0)
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
.getObject("showingResultsForRenderer");
if (showingResultsForRenderer == null) {
return "";
} else {
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
.getObject(0).getString("text");
}
}
@Nonnull
@Override
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl());
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
collectStreamsFrom(collector, videos);
return new InfoItemsPage<>(collector, getNextPageUrl());
}
@Override
public String getNextPageUrl() throws ExtractionException {
return getUrl() + "&page=" + 2;
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
}
@Override
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
doc = Jsoup.parse(response, pageUrl);
if (pageUrl == null || pageUrl.isEmpty()) {
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
}
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
}
private String getNextPageUrlFromCurrentUrl(String currentUrl)
throws MalformedURLException, UnsupportedEncodingException {
final int pageNr = Integer.parseInt(
Parser.compatParseMap(
new URL(currentUrl)
.getQuery())
.get("page"));
return currentUrl.replace("&page=" + pageNr,
"&page=" + Integer.toString(pageNr + 1));
}
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
collector.reset();
JsonArray ajaxJson;
Element list = doc.select("ol[class=\"item-section\"]").first();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
Map<String, List<String>> headers = new HashMap<>();
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
for (Element item : list.children()) {
/* First we need to determine which kind of item we are working with.
Youtube depicts five different kinds of items on its search result page. These are
regular videos, playlists, channels, two types of video suggestions, and a "no video
found" item. Since we only want videos, we need to filter out all the others.
An example for this can be seen here:
https://www.youtube.com/results?search_query=asdf&page=1
We already applied a filter to the url, so we don't need to care about channels and
playlists now.
*/
Element el;
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
throw new NothingFoundException(el.text());
// video item type
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(el));
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
item.select(".yt-pl-icon-mix").isEmpty()) {
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
try {
// Use the hardcoded client version first to get JSON with a structure we know
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (Exception e) {
try {
headers.put("X-YouTube-Client-Version",
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
if (response.length() < 50) { // ensure to have a valid response
throw new ParsingException("Could not parse json data for next streams");
}
ajaxJson = JsonParser.array().from(response);
} catch (JsonParserException ignored) {
throw new ParsingException("Could not parse json data for next streams", e);
}
}
return collector;
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
.getObject("continuationContents").getObject("itemSectionContinuation");
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
}
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException {
collector.reset();
final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (Object item : videos) {
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
}
}
}
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
if (continuations == null) {
return "";
}
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
String continuation = nextContinuationData.getString("continuation");
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
+ "&itct=" + clickTrackingParams;
}
}

View file

@ -3,11 +3,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -15,7 +13,6 @@ import org.schabi.newpipe.extractor.MediaFormat;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Request;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
@ -23,23 +20,41 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.stream.AudioStream;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.Frameset;
import org.schabi.newpipe.extractor.stream.Stream;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/*
* Created by Christian Schabesberger on 06.08.15.
@ -62,8 +77,6 @@ import java.util.regex.Pattern;
*/
public class YoutubeStreamExtractor extends StreamExtractor {
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
/*//////////////////////////////////////////////////////////////////////////
// Exceptions
//////////////////////////////////////////////////////////////////////////*/
@ -74,12 +87,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
public class SubtitlesException extends ContentNotAvailableException {
SubtitlesException(String message, Throwable cause) {
super(message, cause);
}
}
/*//////////////////////////////////////////////////////////////////////////*/
private Document doc;
@ -88,6 +95,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonObject playerResponse;
private JsonObject initialData;
@Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@ -106,22 +114,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
public String getName() throws ParsingException {
assertPageFetched();
String title = null;
try {
return playerResponse.getObject("videoDetails").getString("title");
} catch (Exception e) {
// fallback HTML method
String name = null;
title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
if (title == null) {
try {
name = doc.select("meta[name=title]").attr(CONTENT);
} catch (Exception ignored) {
}
if (name == null) {
throw new ParsingException("Could not get name", e);
}
return name;
title = playerResponse.getObject("videoDetails").getString("title");
} catch (Exception ignored) {}
}
if (title != null) return title;
throw new ParsingException("Could not get name");
}
@Override
@ -131,19 +134,33 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
try {
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
} catch (Exception e) {
String uploadDate = null;
try {
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
} catch (Exception ignored) {
}
// return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
} catch (Exception ignored) {}
if (uploadDate == null) {
throw new ParsingException("Could not get upload date", e);
try {
if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) {
String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10);
try { // Premiered 20 hours ago
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
Calendar parsedTime = timeAgoParser.parse(time).date();
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
} catch (Exception ignored) {}
try { // Premiered Premiered Feb 21, 2020
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
} catch (Exception ignored) {}
}
return uploadDate;
}
} catch (Exception ignored) {}
try {
// TODO this parses English formatted dates only, we need a better approach to parse the textual date
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText"));
return new SimpleDateFormat("yyyy-MM-dd").format(d);
} catch (Exception ignored) {}
throw new ParsingException("Could not get upload date");
}
@Override
@ -167,15 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
} catch (Exception e) {
String url = null;
try {
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
} catch (Exception ignored) {}
if (url == null) {
throw new ParsingException("Could not get thumbnail url", e);
}
return url;
throw new ParsingException("Could not get thumbnail url");
}
}
@ -184,88 +193,65 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
public Description getDescription() throws ParsingException {
assertPageFetched();
// description with more info on links
try {
// first try to get html-formatted description
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML);
} catch (Exception e) {
try {
// fallback to raw non-html description
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
} catch (Exception ignored) {
throw new ParsingException("Could not get the description", e);
}
}
}
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
// :00 is NOT recognized as a timestamp in description or comments.
// 0:00 is recognized in both description and comments.
// https://www.youtube.com/watch?v=4cccfDXu1vA
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
"seekTo\\("
+ "(?:(\\d+)\\*3600\\+)?" // hours?
+ "(\\d+)\\*60\\+" // minutes
+ "(\\d+)" // seconds
+ "\\)");
@SafeVarargs
private static <T> T coalesce(T... args) {
for (T arg : args) {
if (arg != null) return arg;
}
throw new IllegalArgumentException("all arguments to coalesce() were null");
}
private String parseHtmlAndGetFullLinks(String descriptionHtml)
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
final Document description = Jsoup.parse(descriptionHtml, getUrl());
for (Element a : description.select("a")) {
final String rawUrl = a.attr("abs:href");
final URL redirectLink = new URL(rawUrl);
final Matcher onClickTimestamp;
final String queryString;
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
.find()) {
a.removeAttr("onclick");
String hours = coalesce(onClickTimestamp.group(1), "0");
String minutes = onClickTimestamp.group(2);
String seconds = onClickTimestamp.group(3);
int timestamp = 0;
timestamp += Integer.parseInt(hours) * 3600;
timestamp += Integer.parseInt(minutes) * 60;
timestamp += Integer.parseInt(seconds);
String setTimestamp = "&t=" + timestamp;
// Even after clicking https://youtu.be/...?t=6,
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
a.attr("href", getUrl() + setTimestamp);
} else if ((queryString = redirectLink.getQuery()) != null) {
// if the query string is null we are not dealing with a redirect link,
// so we don't need to override it.
final String link =
Parser.compatParseMap(queryString).get("q");
if (link != null) {
// if link is null the a tag is a hashtag.
// They refer to the youtube search. We do not handle them.
a.text(link);
a.attr("href", link);
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
a.text(redirectLink.toString());
a.attr("href", redirectLink.toString());
boolean htmlConversionRequired = false;
JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs");
StringBuilder descriptionBuilder = new StringBuilder(descriptions.size());
for (Object textObjectHolder : descriptions) {
JsonObject textHolder = (JsonObject) textObjectHolder;
String text = textHolder.getString("text");
if (textHolder.getObject("navigationEndpoint") != null) {
// The text is a link. Get the URL it points to and generate a HTML link of it
if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) {
String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url");
if (internUrl.startsWith("/redirect?")) {
// q parameter can be the first parameter
internUrl = internUrl.substring(10);
String[] params = internUrl.split("&");
for (String param : params) {
if (param.split("=")[0].equals("q")) {
String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name());
if (url != null && !url.isEmpty()) {
descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
htmlConversionRequired = true;
} else {
descriptionBuilder.append(text);
}
break;
}
}
} else if (internUrl.startsWith("http")) {
descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
htmlConversionRequired = true;
}
continue;
}
continue;
}
if (text != null) {
descriptionBuilder.append(text);
}
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
a.text(redirectLink.toString());
a.attr("href", redirectLink.toString());
}
String description = descriptionBuilder.toString();
if (!description.isEmpty()) {
if (htmlConversionRequired) {
description = description.replaceAll("\\n", "<br>");
description = description.replaceAll(" ", " &nbsp;");
return new Description(description, Description.HTML);
}
return new Description(description, Description.PLAIN_TEXT);
}
} catch (Exception ignored) { }
// raw non-html description
try {
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
} catch (Exception ignored) {
throw new ParsingException("Could not get description");
}
return description.select("body").first().html();
}
@Override
@ -318,68 +304,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
public long getViewCount() throws ParsingException {
assertPageFetched();
String views = null;
try {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return getLiveStreamWatchingCount();
} else {
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
}
} catch (Exception e) {
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
.getObject("videoViewCountRenderer").getObject("viewCount")
.getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
if (views == null) {
try {
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
} catch (Exception ignored) {
throw new ParsingException("Could not get view count", e);
}
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
.getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
} catch (Exception ignored) {}
}
}
private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException {
// https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=
String innerTubeKey = null, clientVersion = null;
if (playerArgs != null && !playerArgs.isEmpty()) {
innerTubeKey = playerArgs.getString("innertube_api_key");
clientVersion = playerArgs.getString("innertube_context_client_version");
} else if (!videoInfoPage.isEmpty()) {
innerTubeKey = videoInfoPage.get("innertube_api_key");
clientVersion = videoInfoPage.get("innertube_context_client_version");
if (views == null) {
try {
views = playerResponse.getObject("videoDetails").getString("viewCount");
} catch (Exception ignored) {}
}
if (innerTubeKey == null || innerTubeKey.isEmpty()) {
throw new ExtractionException("Couldn't get innerTube key");
}
if (clientVersion == null || clientVersion.isEmpty()) {
throw new ExtractionException("Couldn't get innerTube client version");
}
final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey;
final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" +
",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8");
final Response response = getDownloader().execute(Request.newBuilder()
.post(metadataUrl, dataBody)
.addHeader("Content-Type", "application/json")
.build());
final JsonObject jsonObject = JsonParser.object().from(response.responseBody());
for (Object actionEntry : jsonObject.getArray("actions")) {
if (!(actionEntry instanceof JsonObject)) continue;
final JsonObject entry = (JsonObject) actionEntry;
final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null);
if (updateViewershipAction == null) continue;
final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs");
if (viewCountRuns.isEmpty()) continue;
final JsonObject textObject = viewCountRuns.getObject(0);
if (!textObject.has("text")) {
throw new ExtractionException("Response don't have \"text\" element");
}
return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text")));
}
throw new ExtractionException("Could not find correct results in response");
if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views));
throw new ParsingException("Could not get view count");
}
@Override
@ -387,9 +330,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched();
String likesString = "";
try {
Element button = doc.select("button.like-button-renderer-like-button").first();
try {
likesString = button.select("span.yt-uix-button-content").first().text();
likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[0];
} catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -410,9 +353,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched();
String dislikesString = "";
try {
Element button = doc.select("button.like-button-renderer-dislike-button").first();
try {
dislikesString = button.select("span.yt-uix-button-content").first().text();
dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[1];
} catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -432,40 +375,36 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
public String getUploaderUrl() throws ParsingException {
assertPageFetched();
String uploaderId = null;
try {
return "https://www.youtube.com/channel/" +
playerResponse.getObject("videoDetails").getString("channelId");
} catch (Exception e) {
String uploaderUrl = null;
uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
if (uploaderId == null) {
try {
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
.select("a").first().attr("abs:href");
uploaderId = playerResponse.getObject("videoDetails").getString("channelId");
} catch (Exception ignored) {}
if (uploaderUrl == null) {
throw new ParsingException("Could not get channel link", e);
}
return uploaderUrl;
}
if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId;
throw new ParsingException("Could not get uploader url");
}
@Nonnull
@Override
public String getUploaderName() throws ParsingException {
assertPageFetched();
String uploaderName = null;
try {
return playerResponse.getObject("videoDetails").getString("author");
} catch (Exception e) {
String name = null;
uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
if (uploaderName == null) {
try {
name = doc.select("div.yt-user-info").first().text();
uploaderName = playerResponse.getObject("videoDetails").getString("author");
} catch (Exception ignored) {}
if (name == null) {
throw new ParsingException("Could not get uploader name");
}
return name;
}
if (uploaderName != null) return uploaderName;
throw new ParsingException("Could not get uploader name");
}
@Nonnull
@ -475,12 +414,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String uploaderAvatarUrl = null;
try {
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
.select("img").first()
.attr("abs:data-thumb");
} catch (Exception e) {//todo: add fallback method
throw new ParsingException("Could not get uploader avatar url", e);
}
uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults")
.getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer")
.getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail")
.getArray("thumbnails").getObject(0).getString("url");
if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) {
return uploaderAvatarUrl;
}
} catch (Exception ignored) {}
try {
uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception ignored) {}
if (uploaderAvatarUrl == null) {
throw new ParsingException("Could not get uploader avatar url");
@ -594,13 +540,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Override
@Nonnull
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
public List<SubtitlesStream> getSubtitlesDefault() {
return getSubtitles(MediaFormat.TTML);
}
@Override
@Nonnull
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOException, ExtractionException {
public List<SubtitlesStream> getSubtitles(final MediaFormat format) {
assertPageFetched();
List<SubtitlesStream> subtitles = new ArrayList<>();
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
@ -624,18 +570,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Override
public StreamInfoItem getNextStream() throws IOException, ExtractionException {
public StreamInfoItem getNextStream() throws ExtractionException {
assertPageFetched();
if (isAgeRestricted) {
return null;
}
try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results")
.getObject(0).getObject("compactAutoplayRenderer").getArray("contents")
.getObject(0).getObject("compactVideoRenderer");
final TimeAgoParser timeAgoParser = getTimeAgoParser();
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Elements watch = doc.select("div[class=\"watch-sidebar-section\"]");
if (watch.size() < 1) {
return null;// prevent the snackbar notification "report error" on age-restricted videos
}
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser));
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
return collector.getItems().get(0);
} catch (Exception e) {
throw new ParsingException("Could not get next video", e);
@ -643,20 +591,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Override
public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException {
public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
assertPageFetched();
if (isAgeRestricted) {
return null;
}
try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results");
final TimeAgoParser timeAgoParser = getTimeAgoParser();
Element ul = doc.select("ul[id=\"watch-related\"]").first();
if (ul != null) {
for (Element li : ul.children()) {
// first check if we have a playlist. If so leave them out
if (li.select("a[class*=\"content-link\"]").first() != null) {
collector.commit(extractVideoPreviewInfo(li, timeAgoParser));
}
}
for (Object ul : results) {
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
}
return collector;
} catch (Exception e) {
@ -736,6 +686,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
isAgeRestricted = false;
}
playerResponse = getPlayerResponse();
initialData = YoutubeParsingHelper.getInitialData(pageHtml);
if (decryptionCode.isEmpty()) {
decryptionCode = loadDecryptionCode(playerUrl);
@ -752,12 +703,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return JsonParser.object().from(ytPlayerConfigRaw);
} catch (Parser.RegexException e) {
String errorReason = getErrorMessage();
switch (errorReason) {
case "":
throw new ContentNotAvailableException("Content not available: player config empty", e);
default:
throw new ContentNotAvailableException("Content not available", e);
if (errorReason.isEmpty()) {
throw new ContentNotAvailableException("Content not available: player config empty", e);
}
throw new ContentNotAvailableException("Content not available", e);
} catch (Exception e) {
throw new ParsingException("Could not parse yt player config", e);
}
@ -912,7 +861,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
@Nonnull
private List<SubtitlesInfo> getAvailableSubtitlesInfo() throws SubtitlesException {
private List<SubtitlesInfo> getAvailableSubtitlesInfo() {
// If the video is age restricted getPlayerConfig will fail
if (isAgeRestricted) return Collections.emptyList();
@ -926,7 +875,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject());
final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray());
// todo: use this to apply auto translation to different language from a source language
final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
// This check is necessary since there may be cases where subtitles metadata do not contain caption track info
// e.g. https://www.youtube.com/watch?v=-Vpwatutnko
@ -983,6 +932,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
// Utils
//////////////////////////////////////////////////////////////////////////*/
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents");
JsonObject videoPrimaryInfoRenderer = null;
for (Object content : contents) {
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
break;
}
}
if (videoPrimaryInfoRenderer == null) {
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
}
return videoPrimaryInfoRenderer;
}
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents");
JsonObject videoSecondaryInfoRenderer = null;
for (Object content : contents) {
if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) {
videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer");
break;
}
}
if (videoSecondaryInfoRenderer == null) {
throw new ParsingException("Could not find videoSecondaryInfoRenderer");
}
return videoSecondaryInfoRenderer;
}
@Nonnull
private static String getVideoInfoUrl(final String id, final String sts) {
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
@ -1026,60 +1013,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
return urlAndItags;
}
/**
* Provides information about links to other videos on the video page, such as related videos.
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) {
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
return li.select("a.content-link").first().attr("abs:href");
}
@Override
public String getName() throws ParsingException {
//todo: check NullPointerException causing
return li.select("span.title").first().text();
//this page causes the NullPointerException, after finding it by searching for "tjvg":
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
}
@Override
public String getUploaderName() throws ParsingException {
return li.select("span[class*=\"attribution\"").first()
.select("span").first().text();
}
@Override
public String getUploaderUrl() throws ParsingException {
return ""; // The uploader is not linked
}
@Override
public String getTextualUploadDate() throws ParsingException {
return "";
}
@Override
public String getThumbnailUrl() throws ParsingException {
Element img = li.select("img").first();
String thumbnailUrl = img.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we caught such an item.
if (thumbnailUrl.contains(".gif")) {
thumbnailUrl = img.attr("data-thumb");
}
if (thumbnailUrl.startsWith("//")) {
thumbnailUrl = HTTPS + thumbnailUrl;
}
return thumbnailUrl;
}
};
}
@Nonnull
@Override
public List<Frameset> getFrames() throws ExtractionException {
@ -1137,40 +1070,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
@Nonnull
@Override
public String getHost() throws ParsingException {
public String getHost() {
return "";
}
@Nonnull
@Override
public String getPrivacy() {
return "";
}
@Nonnull
@Override
public String getCategory() {
return "";
}
@Nonnull
@Override
public String getLicence() {
return "";
}
@Override
public String getPrivacy() throws ParsingException {
return "";
}
@Override
public String getCategory() throws ParsingException {
return "";
}
@Override
public String getLicence() throws ParsingException {
return "";
}
@Override
public Locale getLanguageInfo() throws ParsingException {
public Locale getLanguageInfo() {
return null;
}
@Nonnull
@Override
public List<String> getTags() throws ParsingException {
public List<String> getTags() {
return new ArrayList<>();
}
@Nonnull
@Override
public String getSupportInfo() throws ParsingException {
public String getSupportInfo() {
return "";
}
}

View file

@ -1,19 +1,19 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nullable;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
@ -35,263 +35,190 @@ import java.util.Date;
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
private final Element item;
private JsonObject videoInfo;
private final TimeAgoParser timeAgoParser;
private String cachedUploadDate;
/**
* Creates an extractor of StreamInfoItems from a YouTube page.
*
* @param item The page element
* @param videoInfoItem The JSON page element
* @param timeAgoParser A parser of the textual dates or {@code null}.
*/
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
this.item = item;
public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
this.videoInfo = videoInfoItem;
this.timeAgoParser = timeAgoParser;
}
@Override
public StreamType getStreamType() throws ParsingException {
if (isLiveStream(item)) {
return StreamType.LIVE_STREAM;
} else {
return StreamType.VIDEO_STREAM;
}
public StreamType getStreamType() {
try {
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
return StreamType.LIVE_STREAM;
}
} catch (Exception ignored) {}
return StreamType.VIDEO_STREAM;
}
@Override
public boolean isAd() throws ParsingException {
return !item.select("span[class*=\"icon-not-available\"]").isEmpty()
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|| isPremiumVideo();
}
private boolean isPremiumVideo() {
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
if (premiumSpan == null) return false;
// if this span has text it most likely says ("Free Video") so we can play this
if (premiumSpan.hasText()) return false;
return true;
return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]");
}
@Override
public String getUrl() throws ParsingException {
try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
String videoId = videoInfo.getString("videoId");
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
throw new ParsingException("Could not get url", e);
}
}
@Override
public String getName() throws ParsingException {
String name = null;
try {
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
Element dl = el.select("h3").first().select("a").first();
return dl.text();
} catch (Exception e) {
throw new ParsingException("Could not get title", e);
name = videoInfo.getObject("title").getString("simpleText");
} catch (Exception ignored) {}
if (name == null) {
try {
name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
} catch (Exception ignored) {}
}
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get name");
}
@Override
public long getDuration() throws ParsingException {
try {
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
final Element duration = item.select("span[class*=\"video-time\"]").first();
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
} catch (Exception e) {
throw new ParsingException("Could not get Duration: " + getUrl(), e);
throw new ParsingException("Could not get duration", e);
}
}
@Override
public String getUploaderName() throws ParsingException {
String name = null;
try {
return item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.text();
} catch (Exception e) {
throw new ParsingException("Could not get uploader", e);
name = videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception ignored) {}
if (name == null) {
try {
name = videoInfo.getObject("ownerText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception ignored) {}
}
if (name == null) {
try {
name = videoInfo.getObject("shortBylineText").getArray("runs")
.getObject(0).getString("text");
} catch (Exception ignored) {}
}
if (name != null && !name.isEmpty()) return name;
throw new ParsingException("Could not get uploader name");
}
@Override
public String getUploaderUrl() throws ParsingException {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
try {
String id = null;
try {
return item.select("div[class=\"yt-lockup-byline\"]").first()
.select("a").first()
.attr("abs:href");
} catch (Exception e){}
// try this if the first didn't work
return item.select("span[class=\"title\"")
.text().split(" - ")[0];
id = videoInfo.getObject("longBylineText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
if (id == null) {
try {
id = videoInfo.getObject("ownerText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
}
if (id == null) {
try {
id = videoInfo.getObject("shortBylineText").getArray("runs")
.getObject(0).getObject("navigationEndpoint")
.getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {}
}
if (id == null || id.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
} catch (Exception e) {
System.out.println(item.html());
throw new ParsingException("Could not get uploader url", e);
throw new ParsingException("Could not get uploader url");
}
}
@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (cachedUploadDate != null) {
return cachedUploadDate;
}
public String getTextualUploadDate() {
try {
if (isVideoReminder()) {
final Calendar calendar = getDateFromReminder();
if (calendar != null) {
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
.format(calendar.getTime());
}
}
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return "";
final Elements li = meta.select("li");
if (li.isEmpty()) return "";
return cachedUploadDate = li.first().text();
return videoInfo.getObject("publishedTimeText").getString("simpleText");
} catch (Exception e) {
throw new ParsingException("Could not get upload date", e);
// upload date is not always available, e.g. in playlists
return null;
}
}
@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
return null;
}
if (isVideoReminder()) {
return new DateWrapper(getDateFromReminder());
}
String textualUploadDate = getTextualUploadDate();
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
return timeAgoParser.parse(textualUploadDate);
} else {
return null;
try {
return timeAgoParser.parse(textualUploadDate);
} catch (ParsingException e) {
throw new ParsingException("Could not get upload date", e);
}
}
return null;
}
@Override
public long getViewCount() throws ParsingException {
String input;
final Element spanViewCount = item.select("span.view-count").first();
if (spanViewCount != null) {
input = spanViewCount.text();
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
Element meta = item.select("ul.yt-lockup-meta-info").first();
if (meta == null) return 0;
final Elements li = meta.select("li");
if (li.isEmpty()) return 0;
input = li.first().text();
} else {
try {
Element meta = item.select("div.yt-lockup-meta").first();
if (meta == null) return -1;
// This case can happen if google releases a special video
if (meta.select("li").size() < 2) return -1;
input = meta.select("li").get(1).text();
} catch (IndexOutOfBoundsException e) {
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
}
}
if (input == null) {
throw new ParsingException("Input is null");
}
try {
return Long.parseLong(Utils.removeNonDigitCharacters(input));
} catch (NumberFormatException e) {
// if this happens the video probably has no views
if (!input.isEmpty()) {
return 0;
if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) {
return -1;
}
throw new ParsingException("Could not handle input: " + input, e);
String viewCount;
if (getStreamType() == StreamType.LIVE_STREAM) {
viewCount = videoInfo.getObject("viewCountText")
.getArray("runs").getObject(0).getString("text");
} else {
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
}
if (viewCount.equals("Recommended for you")) return -1;
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
} catch (Exception e) {
throw new ParsingException("Could not get view count", e);
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
String url;
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
return videoInfo.getObject("thumbnail").getArray("thumbnails")
.getObject(0).getString("url");
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
private boolean isVideoReminder() {
return !item.select("span.yt-uix-livereminder").isEmpty();
}
private Calendar getDateFromReminder() throws ParsingException {
final Element timeFuture = item.select("span.yt-badge.localized-date").first();
if (timeFuture == null) {
throw new ParsingException("Span timeFuture is null");
}
final String timestamp = timeFuture.attr("data-timestamp");
if (!timestamp.isEmpty()) {
try {
final Calendar calendar = Calendar.getInstance();
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
return calendar;
} catch (Exception e) {
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
private boolean isPremium() {
try {
JsonArray badges = videoInfo.getArray("badges");
for (Object badge : badges) {
if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) {
return true;
}
}
}
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
}
/**
* Generic method that checks if the element contains any clues that it's a livestream item
*/
protected static boolean isLiveStream(Element item) {
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
} catch (Exception ignored) {}
return false;
}
}

View file

@ -20,9 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.downloader.Response;
@ -35,12 +35,12 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import javax.annotation.Nonnull;
import java.io.IOException;
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
import javax.annotation.Nonnull;
private Document doc;
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
private JsonObject initialData;
public YoutubeTrendingExtractor(StreamingService service,
ListLinkHandler linkHandler,
@ -54,7 +54,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
"?gl=" + getExtractorContentCountry().getCountryCode();
final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
}
@Override
@ -70,99 +70,36 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
@Nonnull
@Override
public String getName() throws ParsingException {
String name;
try {
Element a = doc.select("a[href*=\"/feed/trending\"]").first();
Element span = a.select("span[class*=\"display-name\"]").first();
Element nameSpan = span.select("span").first();
return nameSpan.text();
name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title")
.getArray("runs").getObject(0).getString("text");
} catch (Exception e) {
throw new ParsingException("Could not get Trending name", e);
}
if (name != null && !name.isEmpty()) {
return name;
}
throw new ParsingException("Could not get Trending name");
}
@Nonnull
@Override
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
public InfoItemsPage<StreamInfoItem> getInitialPage() {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]");
JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
.getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer")
.getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content")
.getObject("expandedShelfContentsRenderer").getArray("items");
final TimeAgoParser timeAgoParser = getTimeAgoParser();
for (Element ul : uls) {
for (final Element li : ul.children()) {
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
try {
Element dl = el.select("h3").first().select("a").first();
return dl.attr("abs:href");
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getName() throws ParsingException {
try {
Element dl = el.select("h3").first().select("a").first();
return dl.text();
} catch (Exception e) {
throw new ParsingException("Could not get web page url for the video", e);
}
}
@Override
public String getUploaderUrl() throws ParsingException {
try {
String link = getUploaderLink().attr("abs:href");
if (link.isEmpty()) {
throw new IllegalArgumentException("is empty");
}
return link;
} catch (Exception e) {
throw new ParsingException("Could not get Uploader name");
}
}
private Element getUploaderLink() {
// this url is not always in the form "/channel/..."
// sometimes Youtube provides urls in the from "/user/..."
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
return uploaderEl.select("a").first();
}
@Override
public String getUploaderName() throws ParsingException {
try {
return getUploaderLink().text();
} catch (Exception e) {
throw new ParsingException("Could not get Uploader name");
}
}
@Override
public String getThumbnailUrl() throws ParsingException {
try {
String url;
Element te = li.select("span[class=\"yt-thumb-simple\"]").first()
.select("img").first();
url = te.attr("abs:src");
// Sometimes youtube sends links to gif files which somehow seem to not exist
// anymore. Items with such gif also offer a secondary image source. So we are going
// to use that if we've caught such an item.
if (url.contains(".gif")) {
url = te.attr("abs:data-thumb");
}
return url;
} catch (Exception e) {
throw new ParsingException("Could not get thumbnail url", e);
}
}
});
}
for (Object ul : firstPageElements) {
final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer");
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
}
return new InfoItemsPage<>(collector, getNextPageUrl());
}
}

View file

@ -1,11 +1,16 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser;
import java.net.URL;
import java.text.ParseException;
@ -38,6 +43,8 @@ public class YoutubeParsingHelper {
private YoutubeParsingHelper() {
}
public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
@ -143,4 +150,68 @@ public class YoutubeParsingHelper {
uploadDate.setTime(date);
return uploadDate;
}
public static JsonObject getInitialData(String html) throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
/**
* Get the client version from a page
* @param initialData
* @param html The page HTML
* @return
* @throws ParsingException
*/
public static String getClientVersion(JsonObject initialData, String html) throws ParsingException {
if (initialData == null) initialData = getInitialData(html);
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
String shortClientVersion = null;
// try to get version from initial data first
for (Object service : serviceTrackingParams) {
JsonObject s = (JsonObject) service;
if (s.getString("service").equals("CSI")) {
JsonArray params = s.getArray("params");
for (Object param: params) {
JsonObject p = (JsonObject) param;
String key = p.getString("key");
if (key != null && key.equals("cver")) {
return p.getString("value");
}
}
} else if (s.getString("service").equals("ECATCHER")) {
// fallback to get a shortened client version which does not contain the last do digits
JsonArray params = s.getArray("params");
for (Object param: params) {
JsonObject p = (JsonObject) param;
String key = p.getString("key");
if (key != null && key.equals("client.version")) {
shortClientVersion = p.getString("value");
}
}
}
}
String clientVersion;
String[] patterns = {
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)"
};
for (String pattern: patterns) {
try {
clientVersion = Parser.matchGroup1(pattern, html);
if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion;
} catch (Exception ignored) {}
}
if (shortClientVersion != null) return shortClientVersion;
throw new ParsingException("Could not get client version");
}
}

View file

@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
try {
final String url = "https://www.youtube.com/results"
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
+ "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
if (contentFilters.size() > 0) {
switch (contentFilters.get(0)) {
case VIDEOS: return url + "&sp=EgIQAVAU";
case CHANNELS: return url + "&sp=EgIQAlAU";
case PLAYLISTS: return url + "&sp=EgIQA1AU";
case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
case ALL:
default:
}

View file

@ -10,6 +10,9 @@ import java.util.List;
public class Utils {
public static final String HTTP = "http://";
public static final String HTTPS = "https://";
private Utils() {
//no instance
}
@ -83,9 +86,6 @@ public class Utils {
}
}
private static final String HTTP = "http://";
private static final String HTTPS = "https://";
public static String replaceHttpWithHttps(final String url) {
if (url == null) return null;

View file

@ -20,7 +20,7 @@ import java.util.Map;
public class DownloaderTestImpl extends Downloader {
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0";
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
private static DownloaderTestImpl instance = null;

View file

@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest {
@Test
public void testDescription() throws Exception {
assertTrue("What it actually was: " + extractor.getDescription(),
extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:"));
extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:"));
}
@Test

View file

@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
import java.util.regex.Pattern;
import static java.util.Arrays.asList;
import static org.junit.Assert.*;
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto
@Test
public void testGetSecondPageUrl() throws Exception {
assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl());
// check that ctoken, continuation and itct are longer than 5 characters
Pattern pattern = Pattern.compile(
"https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1"
+ "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?"
);
assertTrue(pattern.matcher(extractor.getNextPageUrl()).find());
}
@Ignore

View file

@ -28,13 +28,13 @@ public class YoutubeSearchQHTest {
@Test
public void testWithContentfilter() throws Exception {
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory()
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory()
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory()
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl());
assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory()
assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory()
.fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl());
}