Merge pull request #261 from TeamNewPipe/yt_new
Update YouTube to material version
This commit is contained in:
commit
8838e2d136
15 changed files with 871 additions and 1036 deletions
|
@ -1,11 +1,11 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.channel.ChannelExtractor;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
|
@ -17,11 +17,18 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
|||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 25.07.16.
|
||||
|
@ -49,6 +56,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
|
||||
|
||||
private Document doc;
|
||||
private JsonObject initialData;
|
||||
|
||||
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
|
@ -59,11 +67,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
|
||||
final Response response = downloader.get(channelUrl, getExtractorLocalization());
|
||||
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
|
||||
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws ExtractionException {
|
||||
return getNextPageUrlFrom(doc);
|
||||
return getNextPageUrlFrom(getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("continuations"));
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -80,15 +90,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public String getId() throws ParsingException {
|
||||
try {
|
||||
return doc.select("meta[itemprop=\"channelId\"]").first().attr("content");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
|
||||
try {
|
||||
Element element = doc.getElementsByClass("yt-uix-subscription-button").first();
|
||||
if (element == null) element = doc.getElementsByClass("yt-uix-subscription-preferences-button").first();
|
||||
|
||||
return element.attr("data-channel-external-id");
|
||||
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("channelId");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get channel id", e);
|
||||
}
|
||||
|
@ -98,7 +100,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return doc.select("meta[property=\"og:title\"]").first().attr("content");
|
||||
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getString("title");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get channel name", e);
|
||||
}
|
||||
|
@ -107,7 +109,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public String getAvatarUrl() throws ParsingException {
|
||||
try {
|
||||
return doc.select("img[class=\"channel-header-profile-image\"]").first().attr("abs:src");
|
||||
return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar")
|
||||
.getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get avatar", e);
|
||||
}
|
||||
|
@ -116,13 +119,27 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public String getBannerUrl() throws ParsingException {
|
||||
try {
|
||||
Element el = doc.select("div[id=\"gh-banner\"]").first().select("style").first();
|
||||
String cssContent = el.html();
|
||||
String url = "https:" + Parser.matchGroup1("url\\(([^)]+)\\)", cssContent);
|
||||
String url = null;
|
||||
try {
|
||||
url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner")
|
||||
.getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception ignored) {}
|
||||
if (url == null || url.contains("s.ytimg.com") || url.contains("default_banner")) {
|
||||
return null;
|
||||
}
|
||||
// the first characters of the banner URLs are different for each channel and some are not even valid URLs
|
||||
if (url.startsWith("//")) {
|
||||
url = url.substring(2);
|
||||
}
|
||||
if (url.startsWith(HTTP)) {
|
||||
url = Utils.replaceHttpWithHttps(url);
|
||||
} else if (!url.startsWith(HTTPS)) {
|
||||
url = HTTPS + url;
|
||||
}
|
||||
|
||||
return url.contains("s.ytimg.com") || url.contains("default_banner") ? null : url;
|
||||
return url;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Banner", e);
|
||||
throw new ParsingException("Could not get banner", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,12 +154,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
|
||||
@Override
|
||||
public long getSubscriberCount() throws ParsingException {
|
||||
|
||||
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
|
||||
if (el != null) {
|
||||
String elTitle = el.attr("title");
|
||||
final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
|
||||
if (subscriberInfo != null) {
|
||||
try {
|
||||
return Utils.mixedNumberWordToLong(elTitle);
|
||||
return Utils.mixedNumberWordToLong(subscriberInfo.getArray("runs").getObject(0).getString("text"));
|
||||
} catch (NumberFormatException e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
}
|
||||
|
@ -155,7 +170,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public String getDescription() throws ParsingException {
|
||||
try {
|
||||
return doc.select("meta[name=\"description\"]").first().attr("content");
|
||||
return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get channel description", e);
|
||||
}
|
||||
|
@ -165,8 +180,10 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
Element ul = doc.select("ul[id=\"browse-items-primary\"]").first();
|
||||
collectStreamsFrom(collector, ul);
|
||||
|
||||
JsonArray videos = getVideoTab().getObject("content").getObject("sectionListRenderer").getArray("contents");
|
||||
collectStreamsFrom(collector, videos);
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||
}
|
||||
|
||||
|
@ -181,106 +198,98 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
|
|||
fetchPage();
|
||||
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
JsonObject ajaxJson;
|
||||
JsonArray ajaxJson;
|
||||
|
||||
Map<String, List<String>> headers = new HashMap<>();
|
||||
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||
try {
|
||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
||||
ajaxJson = JsonParser.object().from(response);
|
||||
} catch (JsonParserException pe) {
|
||||
throw new ParsingException("Could not parse json data for next streams", pe);
|
||||
// Use the hardcoded client version first to get JSON with a structure we know
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (JsonParserException ignored) {
|
||||
throw new ParsingException("Could not parse json data for next streams", e);
|
||||
}
|
||||
}
|
||||
|
||||
final Document ajaxHtml = Jsoup.parse(ajaxJson.getString("content_html"), pageUrl);
|
||||
collectStreamsFrom(collector, ajaxHtml.select("body").first());
|
||||
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
|
||||
.getObject("continuationContents").getObject("sectionListContinuation");
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFromAjaxPage(ajaxJson, pageUrl));
|
||||
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
|
||||
}
|
||||
|
||||
private String getNextPageUrlFromAjaxPage(final JsonObject ajaxJson, final String pageUrl)
|
||||
throws ParsingException {
|
||||
String loadMoreHtmlDataRaw = ajaxJson.getString("load_more_widget_html");
|
||||
if (!loadMoreHtmlDataRaw.isEmpty()) {
|
||||
return getNextPageUrlFrom(Jsoup.parse(loadMoreHtmlDataRaw, pageUrl));
|
||||
} else {
|
||||
|
||||
private String getNextPageUrlFrom(JsonArray continuations) {
|
||||
if (continuations == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||
String continuation = nextContinuationData.getString("continuation");
|
||||
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
|
||||
+ "&itct=" + clickTrackingParams;
|
||||
}
|
||||
|
||||
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
||||
try {
|
||||
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
||||
if (button != null) {
|
||||
return button.attr("abs:data-uix-load-more-href");
|
||||
} else {
|
||||
// Sometimes channels are simply so small, they don't have a more streams/videos
|
||||
return "";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get next page url", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectStreamsFrom(StreamInfoItemsCollector collector, Element element) throws ParsingException {
|
||||
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) throws ParsingException {
|
||||
collector.reset();
|
||||
|
||||
final String uploaderName = getName();
|
||||
final String uploaderUrl = getUrl();
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
for (final Element li : element.children()) {
|
||||
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
for (Object video : videos) {
|
||||
JsonObject videoInfo = ((JsonObject) video).getObject("itemSectionRenderer")
|
||||
.getArray("contents").getObject(0);
|
||||
if (videoInfo.getObject("videoRenderer") != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo.getObject("videoRenderer"), timeAgoParser) {
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.attr("abs:href");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
Element el = li.select("div[class=\"feed-item-dismissable\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get title", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
public String getUploaderName() {
|
||||
return uploaderName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
public String getUploaderUrl() {
|
||||
return uploaderUrl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
String url;
|
||||
Element te = li.select("span[class=\"yt-thumb-clip\"]").first()
|
||||
.select("img").first();
|
||||
url = te.attr("abs:src");
|
||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
||||
// to use that if we've caught such an item.
|
||||
if (url.contains(".gif")) {
|
||||
url = te.attr("abs:data-thumb");
|
||||
}
|
||||
return url;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private JsonObject getVideoTab() throws ParsingException {
|
||||
JsonArray tabs = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||
.getArray("tabs");
|
||||
JsonObject videoTab = null;
|
||||
|
||||
for (Object tab : tabs) {
|
||||
if (((JsonObject) tab).getObject("tabRenderer") != null) {
|
||||
if (((JsonObject) tab).getObject("tabRenderer").getString("title").equals("Videos")) {
|
||||
videoTab = ((JsonObject) tab).getObject("tabRenderer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (videoTab == null) {
|
||||
throw new ParsingException("Could not find Videos tab");
|
||||
}
|
||||
|
||||
return videoTab;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.channel.ChannelInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 12.02.17.
|
||||
|
@ -29,87 +31,75 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
|
||||
public class YoutubeChannelInfoItemExtractor implements ChannelInfoItemExtractor {
|
||||
private final Element el;
|
||||
private JsonObject channelInfoItem;
|
||||
|
||||
public YoutubeChannelInfoItemExtractor(Element el) {
|
||||
this.el = el;
|
||||
public YoutubeChannelInfoItemExtractor(JsonObject channelInfoItem) {
|
||||
this.channelInfoItem = channelInfoItem;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
Element img = el.select("span[class*=\"yt-thumb-simple\"]").first()
|
||||
.select("img").first();
|
||||
|
||||
String url = img.attr("abs:src");
|
||||
|
||||
if (url.contains("gif")) {
|
||||
url = img.attr("abs:data-thumb");
|
||||
try {
|
||||
String url = channelInfoItem.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||
if (url.startsWith("//")) {
|
||||
url = url.substring(2);
|
||||
}
|
||||
if (url.startsWith(HTTP)) {
|
||||
url = Utils.replaceHttpWithHttps(url);
|
||||
} else if (!url.startsWith(HTTPS)) {
|
||||
url = HTTPS + url;
|
||||
}
|
||||
return url;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
|
||||
.text();
|
||||
try {
|
||||
return channelInfoItem.getObject("title").getString("simpleText");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get name", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
String buttonTrackingUrl = el.select("button[class*=\"yt-uix-button\"]").first()
|
||||
.attr("abs:data-href");
|
||||
|
||||
Pattern channelIdPattern = Pattern.compile("(?:.*?)\\%252Fchannel\\%252F([A-Za-z0-9\\-\\_]+)(?:.*)");
|
||||
Matcher match = channelIdPattern.matcher(buttonTrackingUrl);
|
||||
|
||||
if (match.matches()) {
|
||||
return YoutubeChannelExtractor.CHANNEL_URL_BASE + match.group(1);
|
||||
}
|
||||
} catch(Exception ignored) {}
|
||||
|
||||
// fallback method for channels without "Subscribe" button (or just in case yt changes things)
|
||||
// provides an url with "/user/NAME", inconsistent with stream and channel extractor: tests will fail
|
||||
try {
|
||||
return el.select("a[class*=\"yt-uix-tile-link\"]").first()
|
||||
.attr("abs:href");
|
||||
String id = "channel/" + channelInfoItem.getString("channelId"); // Does prepending 'channel/' always work?
|
||||
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get channel url", e);
|
||||
throw new ParsingException("Could not get url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getSubscriberCount() throws ParsingException {
|
||||
final Element subsEl = el.select("span[class*=\"yt-subscriber-count\"]").first();
|
||||
if (subsEl != null) {
|
||||
try {
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(subsEl.text()));
|
||||
} catch (NumberFormatException e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
}
|
||||
} else {
|
||||
// If the element is null, the channel have the subscriber count disabled
|
||||
return -1;
|
||||
try {
|
||||
String subscribers = channelInfoItem.getObject("subscriberCountText").getString("simpleText").split(" ")[0];
|
||||
return Utils.mixedNumberWordToLong(subscribers);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get subscriber count", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getStreamCount() throws ParsingException {
|
||||
Element metaEl = el.select("ul[class*=\"yt-lockup-meta-info\"]").first();
|
||||
if (metaEl == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(metaEl.text()));
|
||||
try {
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(channelInfoItem.getObject("videoCountText")
|
||||
.getArray("runs").getObject(0).getString("text")));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get stream count", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() throws ParsingException {
|
||||
Element desEl = el.select("div[class*=\"yt-lockup-description\"]").first();
|
||||
if (desEl == null) {
|
||||
return "";
|
||||
} else {
|
||||
return desEl.text();
|
||||
try {
|
||||
return channelInfoItem.getObject("descriptionSnippet").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get description", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,34 +1,39 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.downloader.Response;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
||||
|
||||
private Document doc;
|
||||
private JsonObject initialData;
|
||||
private JsonObject uploaderInfo;
|
||||
private JsonObject playlistInfo;
|
||||
|
||||
public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
|
@ -39,18 +44,61 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
final String url = getUrl();
|
||||
final Response response = downloader.get(url, getExtractorLocalization());
|
||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||
uploaderInfo = getUploaderInfo();
|
||||
playlistInfo = getPlaylistInfo();
|
||||
}
|
||||
|
||||
private JsonObject getUploaderInfo() throws ParsingException {
|
||||
JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items");
|
||||
try {
|
||||
JsonObject uploaderInfo = items.getObject(1).getObject("playlistSidebarSecondaryInfoRenderer")
|
||||
.getObject("videoOwner").getObject("videoOwnerRenderer");
|
||||
if (uploaderInfo != null) {
|
||||
return uploaderInfo;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
// we might want to create a loop here instead of using duplicated code
|
||||
try {
|
||||
JsonObject uploaderInfo = items.getObject(items.size()).getObject("playlistSidebarSecondaryInfoRenderer")
|
||||
.getObject("videoOwner").getObject("videoOwnerRenderer");
|
||||
if (uploaderInfo != null) {
|
||||
return uploaderInfo;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get uploader info", e);
|
||||
}
|
||||
throw new ParsingException("Could not get uploader info");
|
||||
}
|
||||
|
||||
private JsonObject getPlaylistInfo() throws ParsingException {
|
||||
try {
|
||||
return initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items")
|
||||
.getObject(0).getObject("playlistSidebarPrimaryInfoRenderer");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get PlaylistInfo", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws ExtractionException {
|
||||
return getNextPageUrlFrom(doc);
|
||||
public String getNextPageUrl() {
|
||||
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||
.getObject("playlistVideoListRenderer").getArray("continuations"));
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return doc.select("div[id=pl-header] h1[class=pl-header-title]").first().text();
|
||||
String name = playlistInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
if (name != null) return name;
|
||||
} catch (Exception ignored) {}
|
||||
try {
|
||||
return initialData.getObject("microformat").getObject("microformatDataRenderer").getString("title");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist name", e);
|
||||
}
|
||||
|
@ -59,7 +107,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return doc.select("div[id=pl-header] div[class=pl-header-thumb] img").first().attr("abs:src");
|
||||
return playlistInfo.getObject("thumbnailRenderer").getObject("playlistVideoThumbnailRenderer")
|
||||
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception ignored) {}
|
||||
try {
|
||||
return initialData.getObject("microformat").getObject("microformatDataRenderer").getObject("thumbnail")
|
||||
.getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist thumbnail", e);
|
||||
}
|
||||
|
@ -75,8 +128,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
public String getUploaderUrl() throws ParsingException {
|
||||
try {
|
||||
return YoutubeChannelExtractor.CHANNEL_URL_BASE +
|
||||
doc.select("button[class*=\"yt-uix-subscription-button\"]")
|
||||
.first().attr("data-channel-external-id");
|
||||
uploaderInfo.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist uploader url", e);
|
||||
}
|
||||
|
@ -85,7 +137,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
try {
|
||||
return doc.select("span[class=\"qualified-channel-title-text\"]").first().select("a").first().text();
|
||||
return uploaderInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist uploader name", e);
|
||||
}
|
||||
|
@ -94,7 +146,7 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
@Override
|
||||
public String getUploaderAvatarUrl() throws ParsingException {
|
||||
try {
|
||||
return doc.select("div[id=gh-banner] img[class=channel-header-profile-image]").first().attr("abs:src");
|
||||
return uploaderInfo.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get playlist uploader avatar", e);
|
||||
}
|
||||
|
@ -102,33 +154,26 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
|
||||
@Override
|
||||
public long getStreamCount() throws ParsingException {
|
||||
String input;
|
||||
|
||||
try {
|
||||
input = doc.select("ul[class=\"pl-header-details\"] li").get(1).text();
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
String viewsText = getPlaylistInfo().getArray("stats").getObject(0).getArray("runs").getObject(0).getString("text");
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewsText));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get video count from playlist", e);
|
||||
}
|
||||
|
||||
try {
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
||||
} catch (NumberFormatException e) {
|
||||
// When there's no videos in a playlist, there's no number in the "innerHtml",
|
||||
// all characters that is not a number is removed, so we try to parse a empty string
|
||||
if (!input.isEmpty()) {
|
||||
return 0;
|
||||
} else {
|
||||
throw new ParsingException("Could not handle input: " + input, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ExtractionException {
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() {
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
Element tbody = doc.select("tbody[id=\"pl-load-more-destination\"]").first();
|
||||
collectStreamsFrom(collector, tbody);
|
||||
|
||||
JsonArray videos = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||
.getObject("playlistVideoListRenderer").getArray("contents");
|
||||
|
||||
collectStreamsFrom(collector, videos);
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||
}
|
||||
|
||||
|
@ -139,156 +184,67 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
|
|||
}
|
||||
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
JsonObject pageJson;
|
||||
JsonArray ajaxJson;
|
||||
|
||||
Map<String, List<String>> headers = new HashMap<>();
|
||||
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||
try {
|
||||
final String responseBody = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
||||
pageJson = JsonParser.object().from(responseBody);
|
||||
} catch (JsonParserException pe) {
|
||||
throw new ParsingException("Could not parse ajax json", pe);
|
||||
// Use the hardcoded client version first to get JSON with a structure we know
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (JsonParserException ignored) {
|
||||
throw new ParsingException("Could not parse json data for next streams", e);
|
||||
}
|
||||
}
|
||||
|
||||
final Document pageHtml = Jsoup.parse("<table><tbody id=\"pl-load-more-destination\">"
|
||||
+ pageJson.getString("content_html")
|
||||
+ "</tbody></table>", pageUrl);
|
||||
JsonObject sectionListContinuation = ajaxJson.getObject(1).getObject("response")
|
||||
.getObject("continuationContents").getObject("playlistVideoListContinuation");
|
||||
|
||||
collectStreamsFrom(collector, pageHtml.select("tbody[id=\"pl-load-more-destination\"]").first());
|
||||
collectStreamsFrom(collector, sectionListContinuation.getArray("contents"));
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFromAjax(pageJson, pageUrl));
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFrom(sectionListContinuation.getArray("continuations")));
|
||||
}
|
||||
|
||||
private String getNextPageUrlFromAjax(final JsonObject pageJson, final String pageUrl)
|
||||
throws ParsingException {
|
||||
String nextPageHtml = pageJson.getString("load_more_widget_html");
|
||||
if (!nextPageHtml.isEmpty()) {
|
||||
return getNextPageUrlFrom(Jsoup.parse(nextPageHtml, pageUrl));
|
||||
} else {
|
||||
private String getNextPageUrlFrom(JsonArray continuations) {
|
||||
if (continuations == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||
String continuation = nextContinuationData.getString("continuation");
|
||||
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||
return "https://www.youtube.com/browse_ajax?ctoken=" + continuation + "&continuation=" + continuation
|
||||
+ "&itct=" + clickTrackingParams;
|
||||
}
|
||||
|
||||
private String getNextPageUrlFrom(Document d) throws ParsingException {
|
||||
try {
|
||||
Element button = d.select("button[class*=\"yt-uix-load-more\"]").first();
|
||||
if (button != null) {
|
||||
return button.attr("abs:data-uix-load-more-href");
|
||||
} else {
|
||||
// Sometimes playlists are simply so small, they don't have a more streams/videos
|
||||
return "";
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("could not get next streams' url", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nullable Element element) {
|
||||
private void collectStreamsFrom(StreamInfoItemsCollector collector, JsonArray videos) {
|
||||
collector.reset();
|
||||
|
||||
if (element == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
final LinkHandlerFactory streamLinkHandlerFactory = getService().getStreamLHFactory();
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
for (final Element li : element.children()) {
|
||||
if (isDeletedItem(li)) {
|
||||
continue;
|
||||
for (Object video : videos) {
|
||||
if (((JsonObject) video).getObject("playlistVideoRenderer") != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) video).getObject("playlistVideoRenderer"), timeAgoParser) {
|
||||
@Override
|
||||
public long getViewCount() {
|
||||
return -1;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
public Element uploaderLink;
|
||||
|
||||
@Override
|
||||
public boolean isAd() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
return streamLinkHandlerFactory.fromId(li.attr("data-video-id")).getUrl();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
return li.attr("data-title");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get title", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() throws ParsingException {
|
||||
try {
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
||||
|
||||
Element first = li.select("div[class=\"timestamp\"] span").first();
|
||||
if (first == null) {
|
||||
// Video unavailable (private, deleted, etc.), this is a thing that happens specifically with playlists,
|
||||
// because in other cases, those videos don't even show up
|
||||
return -1;
|
||||
}
|
||||
|
||||
return YoutubeParsingHelper.parseDurationString(first.text());
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get duration" + getUrl(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Element getUploaderLink() {
|
||||
// should always be present since we filter deleted items
|
||||
if (uploaderLink == null) {
|
||||
uploaderLink = li.select("div[class=pl-video-owner] a").first();
|
||||
}
|
||||
return uploaderLink;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
return getUploaderLink().text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
// this url is not always in the form "/channel/..."
|
||||
// sometimes Youtube provides urls in the from "/user/..."
|
||||
return getUploaderLink().attr("abs:href");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
return "https://i.ytimg.com/vi/" + streamLinkHandlerFactory.fromUrl(getUrl()).getId() + "/hqdefault.jpg";
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the playlist item is deleted
|
||||
*
|
||||
* @param li the list item
|
||||
* @return true if the item is deleted
|
||||
*/
|
||||
private boolean isDeletedItem(Element li) {
|
||||
return li.select("div[class=pl-video-owner] a").isEmpty();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,97 +1,63 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.playlist.PlaylistInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubePlaylistLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
public class YoutubePlaylistInfoItemExtractor implements PlaylistInfoItemExtractor {
|
||||
private final Element el;
|
||||
private JsonObject playlistInfoItem;
|
||||
|
||||
public YoutubePlaylistInfoItemExtractor(Element el) {
|
||||
this.el = el;
|
||||
public YoutubePlaylistInfoItemExtractor(JsonObject playlistInfoItem) {
|
||||
this.playlistInfoItem = playlistInfoItem;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
String url;
|
||||
|
||||
try {
|
||||
Element te = el.select("div[class=\"yt-thumb video-thumb\"]").first()
|
||||
.select("img").first();
|
||||
url = te.attr("abs:src");
|
||||
|
||||
if (url.contains(".gif")) {
|
||||
url = te.attr("abs:data-thumb");
|
||||
}
|
||||
return playlistInfoItem.getArray("thumbnails").getObject(0).getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Failed to extract playlist thumbnail url", e);
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
|
||||
return url;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
String name;
|
||||
try {
|
||||
final Element title = el.select("[class=\"yt-lockup-title\"]").first()
|
||||
.select("a").first();
|
||||
|
||||
name = title == null ? "" : title.text();
|
||||
return playlistInfoItem.getObject("title").getString("simpleText");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Failed to extract playlist name", e);
|
||||
throw new ParsingException("Could not get name", e);
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
final Element a = el.select("div[class=\"yt-lockup-meta\"]")
|
||||
.select("ul[class=\"yt-lockup-meta-info\"]")
|
||||
.select("li").select("a").first();
|
||||
|
||||
if (a != null) {
|
||||
return a.attr("abs:href");
|
||||
}
|
||||
|
||||
// this is for yt premium playlists
|
||||
return el.select("h3[class=\"yt-lockup-title\"").first()
|
||||
.select("a").first()
|
||||
.attr("abs:href");
|
||||
|
||||
String id = playlistInfoItem.getString("playlistId");
|
||||
return YoutubePlaylistLinkHandlerFactory.getInstance().getUrl(id);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Failed to extract playlist url", e);
|
||||
throw new ParsingException("Could not get url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
String name;
|
||||
|
||||
try {
|
||||
final Element div = el.select("div[class=\"yt-lockup-byline\"]").first()
|
||||
.select("a").first();
|
||||
|
||||
name = div.text();
|
||||
return playlistInfoItem.getObject("longBylineText").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Failed to extract playlist uploader", e);
|
||||
throw new ParsingException("Could not get uploader name", e);
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getStreamCount() throws ParsingException {
|
||||
try {
|
||||
final Element count = el.select("span[class=\"formatted-video-count-label\"]").first()
|
||||
.select("b").first();
|
||||
|
||||
return count == null ? 0 : Long.parseLong(Utils.removeNonDigitCharacters(count.text()));
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(playlistInfoItem.getString("videoCount")));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Failed to extract playlist stream count", e);
|
||||
throw new ParsingException("Could not get stream count", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.schabi.newpipe.extractor.InfoItem;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
|
@ -14,13 +17,14 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
|||
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
|
||||
import org.schabi.newpipe.extractor.search.SearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 22.07.2018
|
||||
|
@ -45,6 +49,7 @@ import java.net.URL;
|
|||
public class YoutubeSearchExtractor extends SearchExtractor {
|
||||
|
||||
private Document doc;
|
||||
private JsonObject initialData;
|
||||
|
||||
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
|
||||
super(service, linkHandler);
|
||||
|
@ -55,6 +60,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
final String url = getUrl();
|
||||
final Response response = downloader.get(url, getExtractorLocalization());
|
||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -65,80 +71,109 @@ public class YoutubeSearchExtractor extends SearchExtractor {
|
|||
|
||||
@Override
|
||||
public String getSearchSuggestion() {
|
||||
final Element el = doc.select("div[class*=\"spell-correction\"]").first();
|
||||
if (el != null) {
|
||||
return el.select("a").first().text();
|
||||
} else {
|
||||
JsonObject showingResultsForRenderer = initialData.getObject("contents")
|
||||
.getObject("twoColumnSearchResultsRenderer").getObject("primaryContents")
|
||||
.getObject("sectionListRenderer").getArray("contents").getObject(0)
|
||||
.getObject("itemSectionRenderer").getArray("contents").getObject(0)
|
||||
.getObject("showingResultsForRenderer");
|
||||
if (showingResultsForRenderer == null) {
|
||||
return "";
|
||||
} else {
|
||||
return showingResultsForRenderer.getObject("correctedQuery").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
|
||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrl());
|
||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
JsonArray videos = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("contents");
|
||||
|
||||
collectStreamsFrom(collector, videos);
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getNextPageUrl() throws ExtractionException {
|
||||
return getUrl() + "&page=" + 2;
|
||||
return getNextPageUrlFrom(initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
|
||||
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
|
||||
.getObject(0).getObject("itemSectionRenderer").getArray("continuations"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public InfoItemsPage<InfoItem> getPage(String pageUrl) throws IOException, ExtractionException {
|
||||
final String response = getDownloader().get(pageUrl, getExtractorLocalization()).responseBody();
|
||||
doc = Jsoup.parse(response, pageUrl);
|
||||
if (pageUrl == null || pageUrl.isEmpty()) {
|
||||
throw new ExtractionException(new IllegalArgumentException("Page url is empty or null"));
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collectItems(doc), getNextPageUrlFromCurrentUrl(pageUrl));
|
||||
}
|
||||
|
||||
private String getNextPageUrlFromCurrentUrl(String currentUrl)
|
||||
throws MalformedURLException, UnsupportedEncodingException {
|
||||
final int pageNr = Integer.parseInt(
|
||||
Parser.compatParseMap(
|
||||
new URL(currentUrl)
|
||||
.getQuery())
|
||||
.get("page"));
|
||||
|
||||
return currentUrl.replace("&page=" + pageNr,
|
||||
"&page=" + Integer.toString(pageNr + 1));
|
||||
}
|
||||
|
||||
private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundException {
|
||||
InfoItemsSearchCollector collector = getInfoItemSearchCollector();
|
||||
collector.reset();
|
||||
JsonArray ajaxJson;
|
||||
|
||||
Element list = doc.select("ol[class=\"item-section\"]").first();
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
Map<String, List<String>> headers = new HashMap<>();
|
||||
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
||||
|
||||
for (Element item : list.children()) {
|
||||
/* First we need to determine which kind of item we are working with.
|
||||
Youtube depicts five different kinds of items on its search result page. These are
|
||||
regular videos, playlists, channels, two types of video suggestions, and a "no video
|
||||
found" item. Since we only want videos, we need to filter out all the others.
|
||||
An example for this can be seen here:
|
||||
https://www.youtube.com/results?search_query=asdf&page=1
|
||||
|
||||
We already applied a filter to the url, so we don't need to care about channels and
|
||||
playlists now.
|
||||
*/
|
||||
|
||||
Element el;
|
||||
|
||||
if ((el = item.select("div[class*=\"search-message\"]").first()) != null) {
|
||||
throw new NothingFoundException(el.text());
|
||||
|
||||
// video item type
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(el, timeAgoParser));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
|
||||
collector.commit(new YoutubeChannelInfoItemExtractor(el));
|
||||
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
|
||||
item.select(".yt-pl-icon-mix").isEmpty()) {
|
||||
collector.commit(new YoutubePlaylistInfoItemExtractor(el));
|
||||
try {
|
||||
// Use the hardcoded client version first to get JSON with a structure we know
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.HARDCODED_CLIENT_VERSION));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
headers.put("X-YouTube-Client-Version",
|
||||
Collections.singletonList(YoutubeParsingHelper.getClientVersion(initialData, doc.toString())));
|
||||
final String response = getDownloader().get(pageUrl, headers, getExtractorLocalization()).responseBody();
|
||||
if (response.length() < 50) { // ensure to have a valid response
|
||||
throw new ParsingException("Could not parse json data for next streams");
|
||||
}
|
||||
ajaxJson = JsonParser.array().from(response);
|
||||
} catch (JsonParserException ignored) {
|
||||
throw new ParsingException("Could not parse json data for next streams", e);
|
||||
}
|
||||
}
|
||||
|
||||
return collector;
|
||||
JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
|
||||
.getObject("continuationContents").getObject("itemSectionContinuation");
|
||||
|
||||
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrlFrom(itemSectionRenderer.getArray("continuations")));
|
||||
}
|
||||
|
||||
private void collectStreamsFrom(InfoItemsSearchCollector collector, JsonArray videos) throws NothingFoundException {
|
||||
collector.reset();
|
||||
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
for (Object item : videos) {
|
||||
if (((JsonObject) item).getObject("backgroundPromoRenderer") != null) {
|
||||
throw new NothingFoundException(((JsonObject) item).getObject("backgroundPromoRenderer")
|
||||
.getObject("bodyText").getArray("runs").getObject(0).getString("text"));
|
||||
} else if (((JsonObject) item).getObject("videoRenderer") != null) {
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(((JsonObject) item).getObject("videoRenderer"), timeAgoParser));
|
||||
} else if (((JsonObject) item).getObject("channelRenderer") != null) {
|
||||
collector.commit(new YoutubeChannelInfoItemExtractor(((JsonObject) item).getObject("channelRenderer")));
|
||||
} else if (((JsonObject) item).getObject("playlistRenderer") != null) {
|
||||
collector.commit(new YoutubePlaylistInfoItemExtractor(((JsonObject) item).getObject("playlistRenderer")));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String getNextPageUrlFrom(JsonArray continuations) throws ParsingException {
|
||||
if (continuations == null) {
|
||||
return "";
|
||||
}
|
||||
|
||||
JsonObject nextContinuationData = continuations.getObject(0).getObject("nextContinuationData");
|
||||
String continuation = nextContinuationData.getString("continuation");
|
||||
String clickTrackingParams = nextContinuationData.getString("clickTrackingParams");
|
||||
return getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
|
||||
+ "&itct=" + clickTrackingParams;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,11 +3,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
import org.jsoup.Jsoup;
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.mozilla.javascript.Context;
|
||||
import org.mozilla.javascript.Function;
|
||||
import org.mozilla.javascript.ScriptableObject;
|
||||
|
@ -15,7 +13,6 @@ import org.schabi.newpipe.extractor.MediaFormat;
|
|||
import org.schabi.newpipe.extractor.NewPipe;
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.downloader.Request;
|
||||
import org.schabi.newpipe.extractor.downloader.Response;
|
||||
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
||||
|
@ -23,23 +20,41 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
|||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.localization.Localization;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
|
||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.stream.*;
|
||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||
import org.schabi.newpipe.extractor.stream.AudioStream;
|
||||
import org.schabi.newpipe.extractor.stream.Description;
|
||||
import org.schabi.newpipe.extractor.stream.Frameset;
|
||||
import org.schabi.newpipe.extractor.stream.Stream;
|
||||
import org.schabi.newpipe.extractor.stream.StreamExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.stream.SubtitlesStream;
|
||||
import org.schabi.newpipe.extractor.stream.VideoStream;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import javax.annotation.Nullable;
|
||||
|
||||
/*
|
||||
* Created by Christian Schabesberger on 06.08.15.
|
||||
|
@ -62,8 +77,6 @@ import java.util.regex.Pattern;
|
|||
*/
|
||||
|
||||
public class YoutubeStreamExtractor extends StreamExtractor {
|
||||
private static final String TAG = YoutubeStreamExtractor.class.getSimpleName();
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////
|
||||
// Exceptions
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
@ -74,12 +87,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
public class SubtitlesException extends ContentNotAvailableException {
|
||||
SubtitlesException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
|
||||
/*//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
private Document doc;
|
||||
|
@ -88,6 +95,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
@Nonnull
|
||||
private final Map<String, String> videoInfoPage = new HashMap<>();
|
||||
private JsonObject playerResponse;
|
||||
private JsonObject initialData;
|
||||
|
||||
@Nonnull
|
||||
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
|
||||
|
@ -106,22 +114,17 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
assertPageFetched();
|
||||
String title = null;
|
||||
try {
|
||||
return playerResponse.getObject("videoDetails").getString("title");
|
||||
|
||||
} catch (Exception e) {
|
||||
// fallback HTML method
|
||||
String name = null;
|
||||
title = getVideoPrimaryInfoRenderer().getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
if (title == null) {
|
||||
try {
|
||||
name = doc.select("meta[name=title]").attr(CONTENT);
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
|
||||
if (name == null) {
|
||||
throw new ParsingException("Could not get name", e);
|
||||
}
|
||||
return name;
|
||||
title = playerResponse.getObject("videoDetails").getString("title");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (title != null) return title;
|
||||
throw new ParsingException("Could not get name");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -131,19 +134,33 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
try {
|
||||
return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||
} catch (Exception e) {
|
||||
String uploadDate = null;
|
||||
try {
|
||||
uploadDate = doc.select("meta[itemprop=datePublished]").attr(CONTENT);
|
||||
} catch (Exception ignored) {
|
||||
}
|
||||
// return playerResponse.getObject("microformat").getObject("playerMicroformatRenderer").getString("publishDate");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
if (uploadDate == null) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
try {
|
||||
if (getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").startsWith("Premiered")) {
|
||||
String time = getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText").substring(10);
|
||||
|
||||
try { // Premiered 20 hours ago
|
||||
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
|
||||
Calendar parsedTime = timeAgoParser.parse(time).date();
|
||||
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
try { // Premiered Premiered Feb 21, 2020
|
||||
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
|
||||
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
return uploadDate;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
try {
|
||||
// TODO this parses English formatted dates only, we need a better approach to parse the textual date
|
||||
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
|
||||
getVideoPrimaryInfoRenderer().getObject("dateText").getString("simpleText"));
|
||||
return new SimpleDateFormat("yyyy-MM-dd").format(d);
|
||||
} catch (Exception ignored) {}
|
||||
throw new ParsingException("Could not get upload date");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -167,15 +184,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return thumbnails.getObject(thumbnails.size() - 1).getString("url");
|
||||
|
||||
} catch (Exception e) {
|
||||
String url = null;
|
||||
try {
|
||||
url = doc.select("link[itemprop=\"thumbnailUrl\"]").first().attr("abs:href");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
if (url == null) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
return url;
|
||||
throw new ParsingException("Could not get thumbnail url");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -184,88 +193,65 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
@Override
|
||||
public Description getDescription() throws ParsingException {
|
||||
assertPageFetched();
|
||||
// description with more info on links
|
||||
try {
|
||||
// first try to get html-formatted description
|
||||
return new Description(parseHtmlAndGetFullLinks(doc.select("p[id=\"eow-description\"]").first().html()), Description.HTML);
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
// fallback to raw non-html description
|
||||
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
||||
} catch (Exception ignored) {
|
||||
throw new ParsingException("Could not get the description", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// onclick="yt.www.watch.player.seekTo(0*3600+00*60+00);return false;"
|
||||
// :00 is NOT recognized as a timestamp in description or comments.
|
||||
// 0:00 is recognized in both description and comments.
|
||||
// https://www.youtube.com/watch?v=4cccfDXu1vA
|
||||
private final static Pattern DESCRIPTION_TIMESTAMP_ONCLICK_REGEX = Pattern.compile(
|
||||
"seekTo\\("
|
||||
+ "(?:(\\d+)\\*3600\\+)?" // hours?
|
||||
+ "(\\d+)\\*60\\+" // minutes
|
||||
+ "(\\d+)" // seconds
|
||||
+ "\\)");
|
||||
|
||||
@SafeVarargs
|
||||
private static <T> T coalesce(T... args) {
|
||||
for (T arg : args) {
|
||||
if (arg != null) return arg;
|
||||
}
|
||||
throw new IllegalArgumentException("all arguments to coalesce() were null");
|
||||
}
|
||||
|
||||
private String parseHtmlAndGetFullLinks(String descriptionHtml)
|
||||
throws MalformedURLException, UnsupportedEncodingException, ParsingException {
|
||||
final Document description = Jsoup.parse(descriptionHtml, getUrl());
|
||||
for (Element a : description.select("a")) {
|
||||
final String rawUrl = a.attr("abs:href");
|
||||
final URL redirectLink = new URL(rawUrl);
|
||||
|
||||
final Matcher onClickTimestamp;
|
||||
final String queryString;
|
||||
if ((onClickTimestamp = DESCRIPTION_TIMESTAMP_ONCLICK_REGEX.matcher(a.attr("onclick")))
|
||||
.find()) {
|
||||
a.removeAttr("onclick");
|
||||
|
||||
String hours = coalesce(onClickTimestamp.group(1), "0");
|
||||
String minutes = onClickTimestamp.group(2);
|
||||
String seconds = onClickTimestamp.group(3);
|
||||
|
||||
int timestamp = 0;
|
||||
timestamp += Integer.parseInt(hours) * 3600;
|
||||
timestamp += Integer.parseInt(minutes) * 60;
|
||||
timestamp += Integer.parseInt(seconds);
|
||||
|
||||
String setTimestamp = "&t=" + timestamp;
|
||||
|
||||
// Even after clicking https://youtu.be/...?t=6,
|
||||
// getUrl() is https://www.youtube.com/watch?v=..., never youtu.be, never &t=.
|
||||
a.attr("href", getUrl() + setTimestamp);
|
||||
|
||||
} else if ((queryString = redirectLink.getQuery()) != null) {
|
||||
// if the query string is null we are not dealing with a redirect link,
|
||||
// so we don't need to override it.
|
||||
final String link =
|
||||
Parser.compatParseMap(queryString).get("q");
|
||||
|
||||
if (link != null) {
|
||||
// if link is null the a tag is a hashtag.
|
||||
// They refer to the youtube search. We do not handle them.
|
||||
a.text(link);
|
||||
a.attr("href", link);
|
||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
||||
a.text(redirectLink.toString());
|
||||
a.attr("href", redirectLink.toString());
|
||||
boolean htmlConversionRequired = false;
|
||||
JsonArray descriptions = getVideoSecondaryInfoRenderer().getObject("description").getArray("runs");
|
||||
StringBuilder descriptionBuilder = new StringBuilder(descriptions.size());
|
||||
for (Object textObjectHolder : descriptions) {
|
||||
JsonObject textHolder = (JsonObject) textObjectHolder;
|
||||
String text = textHolder.getString("text");
|
||||
if (textHolder.getObject("navigationEndpoint") != null) {
|
||||
// The text is a link. Get the URL it points to and generate a HTML link of it
|
||||
if (textHolder.getObject("navigationEndpoint").getObject("urlEndpoint") != null) {
|
||||
String internUrl = textHolder.getObject("navigationEndpoint").getObject("urlEndpoint").getString("url");
|
||||
if (internUrl.startsWith("/redirect?")) {
|
||||
// q parameter can be the first parameter
|
||||
internUrl = internUrl.substring(10);
|
||||
String[] params = internUrl.split("&");
|
||||
for (String param : params) {
|
||||
if (param.split("=")[0].equals("q")) {
|
||||
String url = URLDecoder.decode(param.split("=")[1], StandardCharsets.UTF_8.name());
|
||||
if (url != null && !url.isEmpty()) {
|
||||
descriptionBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
|
||||
htmlConversionRequired = true;
|
||||
} else {
|
||||
descriptionBuilder.append(text);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (internUrl.startsWith("http")) {
|
||||
descriptionBuilder.append("<a href=\"").append(internUrl).append("\">").append(text).append("</a>");
|
||||
htmlConversionRequired = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (text != null) {
|
||||
descriptionBuilder.append(text);
|
||||
}
|
||||
} else if (redirectLink.toString().contains("https://www.youtube.com/")) {
|
||||
descriptionHtml = descriptionHtml.replace(rawUrl, redirectLink.toString());
|
||||
a.text(redirectLink.toString());
|
||||
a.attr("href", redirectLink.toString());
|
||||
}
|
||||
|
||||
String description = descriptionBuilder.toString();
|
||||
|
||||
if (!description.isEmpty()) {
|
||||
if (htmlConversionRequired) {
|
||||
description = description.replaceAll("\\n", "<br>");
|
||||
description = description.replaceAll(" ", " ");
|
||||
return new Description(description, Description.HTML);
|
||||
}
|
||||
return new Description(description, Description.PLAIN_TEXT);
|
||||
}
|
||||
} catch (Exception ignored) { }
|
||||
|
||||
// raw non-html description
|
||||
try {
|
||||
return new Description(playerResponse.getObject("videoDetails").getString("shortDescription"), Description.PLAIN_TEXT);
|
||||
} catch (Exception ignored) {
|
||||
throw new ParsingException("Could not get description");
|
||||
}
|
||||
return description.select("body").first().html();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -318,68 +304,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
assertPageFetched();
|
||||
String views = null;
|
||||
try {
|
||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
return getLiveStreamWatchingCount();
|
||||
} else {
|
||||
return Long.parseLong(playerResponse.getObject("videoDetails").getString("viewCount"));
|
||||
}
|
||||
} catch (Exception e) {
|
||||
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||
.getObject("videoViewCountRenderer").getObject("viewCount")
|
||||
.getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
if (views == null) {
|
||||
try {
|
||||
return Long.parseLong(doc.select("meta[itemprop=interactionCount]").attr(CONTENT));
|
||||
} catch (Exception ignored) {
|
||||
throw new ParsingException("Could not get view count", e);
|
||||
}
|
||||
views = getVideoPrimaryInfoRenderer().getObject("viewCount")
|
||||
.getObject("videoViewCountRenderer").getObject("viewCount").getString("simpleText");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
}
|
||||
|
||||
private long getLiveStreamWatchingCount() throws ExtractionException, IOException, JsonParserException {
|
||||
// https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=
|
||||
String innerTubeKey = null, clientVersion = null;
|
||||
if (playerArgs != null && !playerArgs.isEmpty()) {
|
||||
innerTubeKey = playerArgs.getString("innertube_api_key");
|
||||
clientVersion = playerArgs.getString("innertube_context_client_version");
|
||||
} else if (!videoInfoPage.isEmpty()) {
|
||||
innerTubeKey = videoInfoPage.get("innertube_api_key");
|
||||
clientVersion = videoInfoPage.get("innertube_context_client_version");
|
||||
if (views == null) {
|
||||
try {
|
||||
views = playerResponse.getObject("videoDetails").getString("viewCount");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
|
||||
if (innerTubeKey == null || innerTubeKey.isEmpty()) {
|
||||
throw new ExtractionException("Couldn't get innerTube key");
|
||||
}
|
||||
|
||||
if (clientVersion == null || clientVersion.isEmpty()) {
|
||||
throw new ExtractionException("Couldn't get innerTube client version");
|
||||
}
|
||||
|
||||
final String metadataUrl = "https://www.youtube.com/youtubei/v1/updated_metadata?alt=json&key=" + innerTubeKey;
|
||||
final byte[] dataBody = ("{\"context\":{\"client\":{\"clientName\":1,\"clientVersion\":\"" + clientVersion + "\"}}" +
|
||||
",\"videoId\":\"" + getId() + "\"}").getBytes("UTF-8");
|
||||
final Response response = getDownloader().execute(Request.newBuilder()
|
||||
.post(metadataUrl, dataBody)
|
||||
.addHeader("Content-Type", "application/json")
|
||||
.build());
|
||||
final JsonObject jsonObject = JsonParser.object().from(response.responseBody());
|
||||
|
||||
for (Object actionEntry : jsonObject.getArray("actions")) {
|
||||
if (!(actionEntry instanceof JsonObject)) continue;
|
||||
final JsonObject entry = (JsonObject) actionEntry;
|
||||
|
||||
final JsonObject updateViewershipAction = entry.getObject("updateViewershipAction", null);
|
||||
if (updateViewershipAction == null) continue;
|
||||
|
||||
final JsonArray viewCountRuns = JsonUtils.getArray(updateViewershipAction, "viewership.videoViewCountRenderer.viewCount.runs");
|
||||
if (viewCountRuns.isEmpty()) continue;
|
||||
|
||||
final JsonObject textObject = viewCountRuns.getObject(0);
|
||||
if (!textObject.has("text")) {
|
||||
throw new ExtractionException("Response don't have \"text\" element");
|
||||
}
|
||||
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(textObject.getString("text")));
|
||||
}
|
||||
|
||||
throw new ExtractionException("Could not find correct results in response");
|
||||
if (views != null) return Long.parseLong(Utils.removeNonDigitCharacters(views));
|
||||
throw new ParsingException("Could not get view count");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -387,9 +330,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
assertPageFetched();
|
||||
String likesString = "";
|
||||
try {
|
||||
Element button = doc.select("button.like-button-renderer-like-button").first();
|
||||
try {
|
||||
likesString = button.select("span.yt-uix-button-content").first().text();
|
||||
likesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
|
||||
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[0];
|
||||
} catch (NullPointerException e) {
|
||||
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||
|
@ -410,9 +353,9 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
assertPageFetched();
|
||||
String dislikesString = "";
|
||||
try {
|
||||
Element button = doc.select("button.like-button-renderer-dislike-button").first();
|
||||
try {
|
||||
dislikesString = button.select("span.yt-uix-button-content").first().text();
|
||||
dislikesString = getVideoPrimaryInfoRenderer().getObject("sentimentBar")
|
||||
.getObject("sentimentBarRenderer").getString("tooltip").split("/")[1];
|
||||
} catch (NullPointerException e) {
|
||||
//if this kicks in our button has no content and therefore ratings must be disabled
|
||||
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
|
||||
|
@ -432,40 +375,36 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
assertPageFetched();
|
||||
String uploaderId = null;
|
||||
try {
|
||||
return "https://www.youtube.com/channel/" +
|
||||
playerResponse.getObject("videoDetails").getString("channelId");
|
||||
} catch (Exception e) {
|
||||
String uploaderUrl = null;
|
||||
uploaderId = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||
.getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
|
||||
} catch (Exception ignored) {}
|
||||
if (uploaderId == null) {
|
||||
try {
|
||||
uploaderUrl = doc.select("div[class=\"yt-user-info\"]").first().children()
|
||||
.select("a").first().attr("abs:href");
|
||||
uploaderId = playerResponse.getObject("videoDetails").getString("channelId");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
if (uploaderUrl == null) {
|
||||
throw new ParsingException("Could not get channel link", e);
|
||||
}
|
||||
return uploaderUrl;
|
||||
}
|
||||
if (uploaderId != null) return "https://www.youtube.com/channel/" + uploaderId;
|
||||
throw new ParsingException("Could not get uploader url");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
assertPageFetched();
|
||||
String uploaderName = null;
|
||||
try {
|
||||
return playerResponse.getObject("videoDetails").getString("author");
|
||||
} catch (Exception e) {
|
||||
String name = null;
|
||||
uploaderName = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||
.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
if (uploaderName == null) {
|
||||
try {
|
||||
name = doc.select("div.yt-user-info").first().text();
|
||||
uploaderName = playerResponse.getObject("videoDetails").getString("author");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
if (name == null) {
|
||||
throw new ParsingException("Could not get uploader name");
|
||||
}
|
||||
return name;
|
||||
}
|
||||
if (uploaderName != null) return uploaderName;
|
||||
throw new ParsingException("Could not get uploader name");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
@ -475,12 +414,19 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
|
||||
String uploaderAvatarUrl = null;
|
||||
try {
|
||||
uploaderAvatarUrl = doc.select("a[class*=\"yt-user-photo\"]").first()
|
||||
.select("img").first()
|
||||
.attr("abs:data-thumb");
|
||||
} catch (Exception e) {//todo: add fallback method
|
||||
throw new ParsingException("Could not get uploader avatar url", e);
|
||||
}
|
||||
uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults")
|
||||
.getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer")
|
||||
.getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail")
|
||||
.getArray("thumbnails").getObject(0).getString("url");
|
||||
if (uploaderAvatarUrl != null && !uploaderAvatarUrl.isEmpty()) {
|
||||
return uploaderAvatarUrl;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
try {
|
||||
uploaderAvatarUrl = getVideoSecondaryInfoRenderer().getObject("owner").getObject("videoOwnerRenderer")
|
||||
.getObject("thumbnail").getArray("thumbnails").getObject(0).getString("url");
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
if (uploaderAvatarUrl == null) {
|
||||
throw new ParsingException("Could not get uploader avatar url");
|
||||
|
@ -594,13 +540,13 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
|
||||
@Override
|
||||
@Nonnull
|
||||
public List<SubtitlesStream> getSubtitlesDefault() throws IOException, ExtractionException {
|
||||
public List<SubtitlesStream> getSubtitlesDefault() {
|
||||
return getSubtitles(MediaFormat.TTML);
|
||||
}
|
||||
|
||||
@Override
|
||||
@Nonnull
|
||||
public List<SubtitlesStream> getSubtitles(final MediaFormat format) throws IOException, ExtractionException {
|
||||
public List<SubtitlesStream> getSubtitles(final MediaFormat format) {
|
||||
assertPageFetched();
|
||||
List<SubtitlesStream> subtitles = new ArrayList<>();
|
||||
for (final SubtitlesInfo subtitlesInfo : subtitlesInfos) {
|
||||
|
@ -624,18 +570,20 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public StreamInfoItem getNextStream() throws IOException, ExtractionException {
|
||||
public StreamInfoItem getNextStream() throws ExtractionException {
|
||||
assertPageFetched();
|
||||
if (isAgeRestricted) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||
.getObject("secondaryResults").getObject("secondaryResults").getArray("results")
|
||||
.getObject(0).getObject("compactAutoplayRenderer").getArray("contents")
|
||||
.getObject(0).getObject("compactVideoRenderer");
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
|
||||
Elements watch = doc.select("div[class=\"watch-sidebar-section\"]");
|
||||
if (watch.size() < 1) {
|
||||
return null;// prevent the snackbar notification "report error" on age-restricted videos
|
||||
}
|
||||
|
||||
collector.commit(extractVideoPreviewInfo(watch.first().select("li").first(), timeAgoParser));
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||
return collector.getItems().get(0);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get next video", e);
|
||||
|
@ -643,20 +591,22 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public StreamInfoItemsCollector getRelatedStreams() throws IOException, ExtractionException {
|
||||
public StreamInfoItemsCollector getRelatedStreams() throws ExtractionException {
|
||||
assertPageFetched();
|
||||
if (isAgeRestricted) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||
.getObject("secondaryResults").getObject("secondaryResults").getArray("results");
|
||||
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
Element ul = doc.select("ul[id=\"watch-related\"]").first();
|
||||
if (ul != null) {
|
||||
for (Element li : ul.children()) {
|
||||
// first check if we have a playlist. If so leave them out
|
||||
if (li.select("a[class*=\"content-link\"]").first() != null) {
|
||||
collector.commit(extractVideoPreviewInfo(li, timeAgoParser));
|
||||
}
|
||||
}
|
||||
for (Object ul : results) {
|
||||
final JsonObject videoInfo = ((JsonObject) ul).getObject("compactVideoRenderer");
|
||||
|
||||
if (videoInfo != null) collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||
}
|
||||
return collector;
|
||||
} catch (Exception e) {
|
||||
|
@ -736,6 +686,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
isAgeRestricted = false;
|
||||
}
|
||||
playerResponse = getPlayerResponse();
|
||||
initialData = YoutubeParsingHelper.getInitialData(pageHtml);
|
||||
|
||||
if (decryptionCode.isEmpty()) {
|
||||
decryptionCode = loadDecryptionCode(playerUrl);
|
||||
|
@ -752,12 +703,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return JsonParser.object().from(ytPlayerConfigRaw);
|
||||
} catch (Parser.RegexException e) {
|
||||
String errorReason = getErrorMessage();
|
||||
switch (errorReason) {
|
||||
case "":
|
||||
throw new ContentNotAvailableException("Content not available: player config empty", e);
|
||||
default:
|
||||
throw new ContentNotAvailableException("Content not available", e);
|
||||
if (errorReason.isEmpty()) {
|
||||
throw new ContentNotAvailableException("Content not available: player config empty", e);
|
||||
}
|
||||
throw new ContentNotAvailableException("Content not available", e);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not parse yt player config", e);
|
||||
}
|
||||
|
@ -912,7 +861,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
|
||||
@Nonnull
|
||||
private List<SubtitlesInfo> getAvailableSubtitlesInfo() throws SubtitlesException {
|
||||
private List<SubtitlesInfo> getAvailableSubtitlesInfo() {
|
||||
// If the video is age restricted getPlayerConfig will fail
|
||||
if (isAgeRestricted) return Collections.emptyList();
|
||||
|
||||
|
@ -926,7 +875,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
final JsonObject renderer = captions.getObject("playerCaptionsTracklistRenderer", new JsonObject());
|
||||
final JsonArray captionsArray = renderer.getArray("captionTracks", new JsonArray());
|
||||
// todo: use this to apply auto translation to different language from a source language
|
||||
final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
|
||||
// final JsonArray autoCaptionsArray = renderer.getArray("translationLanguages", new JsonArray());
|
||||
|
||||
// This check is necessary since there may be cases where subtitles metadata do not contain caption track info
|
||||
// e.g. https://www.youtube.com/watch?v=-Vpwatutnko
|
||||
|
@ -983,6 +932,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
// Utils
|
||||
//////////////////////////////////////////////////////////////////////////*/
|
||||
|
||||
private JsonObject getVideoPrimaryInfoRenderer() throws ParsingException {
|
||||
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||
.getObject("results").getObject("results").getArray("contents");
|
||||
JsonObject videoPrimaryInfoRenderer = null;
|
||||
|
||||
for (Object content : contents) {
|
||||
if (((JsonObject) content).getObject("videoPrimaryInfoRenderer") != null) {
|
||||
videoPrimaryInfoRenderer = ((JsonObject) content).getObject("videoPrimaryInfoRenderer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (videoPrimaryInfoRenderer == null) {
|
||||
throw new ParsingException("Could not find videoPrimaryInfoRenderer");
|
||||
}
|
||||
|
||||
return videoPrimaryInfoRenderer;
|
||||
}
|
||||
|
||||
private JsonObject getVideoSecondaryInfoRenderer() throws ParsingException {
|
||||
JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
|
||||
.getObject("results").getObject("results").getArray("contents");
|
||||
JsonObject videoSecondaryInfoRenderer = null;
|
||||
|
||||
for (Object content : contents) {
|
||||
if (((JsonObject) content).getObject("videoSecondaryInfoRenderer") != null) {
|
||||
videoSecondaryInfoRenderer = ((JsonObject) content).getObject("videoSecondaryInfoRenderer");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (videoSecondaryInfoRenderer == null) {
|
||||
throw new ParsingException("Could not find videoSecondaryInfoRenderer");
|
||||
}
|
||||
|
||||
return videoSecondaryInfoRenderer;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private static String getVideoInfoUrl(final String id, final String sts) {
|
||||
return "https://www.youtube.com/get_video_info?" + "video_id=" + id +
|
||||
|
@ -1026,60 +1013,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
return urlAndItags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides information about links to other videos on the video page, such as related videos.
|
||||
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
|
||||
*/
|
||||
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li, final TimeAgoParser timeAgoParser) {
|
||||
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
return li.select("a.content-link").first().attr("abs:href");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
//todo: check NullPointerException causing
|
||||
return li.select("span.title").first().text();
|
||||
//this page causes the NullPointerException, after finding it by searching for "tjvg":
|
||||
//https://www.youtube.com/watch?v=Uqg0aEhLFAg
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
return li.select("span[class*=\"attribution\"").first()
|
||||
.select("span").first().text();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
return ""; // The uploader is not linked
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
Element img = li.select("img").first();
|
||||
String thumbnailUrl = img.attr("abs:src");
|
||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
||||
// to use that if we caught such an item.
|
||||
if (thumbnailUrl.contains(".gif")) {
|
||||
thumbnailUrl = img.attr("data-thumb");
|
||||
}
|
||||
if (thumbnailUrl.startsWith("//")) {
|
||||
thumbnailUrl = HTTPS + thumbnailUrl;
|
||||
}
|
||||
return thumbnailUrl;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public List<Frameset> getFrames() throws ExtractionException {
|
||||
|
@ -1137,40 +1070,44 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getHost() throws ParsingException {
|
||||
public String getHost() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getPrivacy() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getCategory() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getLicence() {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPrivacy() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCategory() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLicence() throws ParsingException {
|
||||
return "";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Locale getLanguageInfo() throws ParsingException {
|
||||
public Locale getLanguageInfo() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public List<String> getTags() throws ParsingException {
|
||||
public List<String> getTags() {
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public String getSupportInfo() throws ParsingException {
|
||||
public String getSupportInfo() {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,19 +1,19 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.extractors;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.localization.DateWrapper;
|
||||
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
|
||||
import org.schabi.newpipe.extractor.stream.StreamType;
|
||||
import org.schabi.newpipe.extractor.utils.Utils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
||||
/*
|
||||
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
||||
|
@ -35,263 +35,190 @@ import java.util.Date;
|
|||
|
||||
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
|
||||
|
||||
private final Element item;
|
||||
private JsonObject videoInfo;
|
||||
private final TimeAgoParser timeAgoParser;
|
||||
|
||||
private String cachedUploadDate;
|
||||
|
||||
/**
|
||||
* Creates an extractor of StreamInfoItems from a YouTube page.
|
||||
*
|
||||
* @param item The page element
|
||||
* @param videoInfoItem The JSON page element
|
||||
* @param timeAgoParser A parser of the textual dates or {@code null}.
|
||||
*/
|
||||
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.item = item;
|
||||
public YoutubeStreamInfoItemExtractor(JsonObject videoInfoItem, @Nullable TimeAgoParser timeAgoParser) {
|
||||
this.videoInfo = videoInfoItem;
|
||||
this.timeAgoParser = timeAgoParser;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StreamType getStreamType() throws ParsingException {
|
||||
if (isLiveStream(item)) {
|
||||
return StreamType.LIVE_STREAM;
|
||||
} else {
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
public StreamType getStreamType() {
|
||||
try {
|
||||
if (videoInfo.getArray("badges").getObject(0).getObject("metadataBadgeRenderer").getString("label").equals("LIVE NOW")) {
|
||||
return StreamType.LIVE_STREAM;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
return StreamType.VIDEO_STREAM;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAd() throws ParsingException {
|
||||
return !item.select("span[class*=\"icon-not-available\"]").isEmpty()
|
||||
|| !item.select("span[class*=\"yt-badge-ad\"]").isEmpty()
|
||||
|| isPremiumVideo();
|
||||
}
|
||||
|
||||
private boolean isPremiumVideo() {
|
||||
Element premiumSpan = item.select("span[class=\"standalone-collection-badge-renderer-red-text\"]").first();
|
||||
if (premiumSpan == null) return false;
|
||||
|
||||
// if this span has text it most likely says ("Free Video") so we can play this
|
||||
if (premiumSpan.hasText()) return false;
|
||||
return true;
|
||||
return isPremium() || getName().equals("[Private video]") || getName().equals("[Deleted video]");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.attr("abs:href");
|
||||
String videoId = videoInfo.getString("videoId");
|
||||
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
throw new ParsingException("Could not get url", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
String name = null;
|
||||
try {
|
||||
Element el = item.select("div[class*=\"yt-lockup-video\"]").first();
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get title", e);
|
||||
name = videoInfo.getObject("title").getString("simpleText");
|
||||
} catch (Exception ignored) {}
|
||||
if (name == null) {
|
||||
try {
|
||||
name = videoInfo.getObject("title").getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (name != null && !name.isEmpty()) return name;
|
||||
throw new ParsingException("Could not get name");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getDuration() throws ParsingException {
|
||||
try {
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) return -1;
|
||||
|
||||
final Element duration = item.select("span[class*=\"video-time\"]").first();
|
||||
// apparently on youtube, video-time element will not show up if the video has a duration of 00:00
|
||||
// see: https://www.youtube.com/results?sp=EgIQAVAU&q=asdfgf
|
||||
return duration == null ? 0 : YoutubeParsingHelper.parseDurationString(duration.text());
|
||||
return YoutubeParsingHelper.parseDurationString(videoInfo.getObject("lengthText").getString("simpleText"));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Duration: " + getUrl(), e);
|
||||
throw new ParsingException("Could not get duration", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
String name = null;
|
||||
try {
|
||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
||||
.select("a").first()
|
||||
.text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get uploader", e);
|
||||
name = videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
if (name == null) {
|
||||
try {
|
||||
name = videoInfo.getObject("ownerText").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (name == null) {
|
||||
try {
|
||||
name = videoInfo.getObject("shortBylineText").getArray("runs")
|
||||
.getObject(0).getString("text");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (name != null && !name.isEmpty()) return name;
|
||||
throw new ParsingException("Could not get uploader name");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
// this url is not always in the form "/channel/..."
|
||||
// sometimes Youtube provides urls in the from "/user/..."
|
||||
try {
|
||||
String id = null;
|
||||
try {
|
||||
return item.select("div[class=\"yt-lockup-byline\"]").first()
|
||||
.select("a").first()
|
||||
.attr("abs:href");
|
||||
} catch (Exception e){}
|
||||
|
||||
// try this if the first didn't work
|
||||
return item.select("span[class=\"title\"")
|
||||
.text().split(" - ")[0];
|
||||
id = videoInfo.getObject("longBylineText").getArray("runs")
|
||||
.getObject(0).getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint").getString("browseId");
|
||||
} catch (Exception ignored) {}
|
||||
if (id == null) {
|
||||
try {
|
||||
id = videoInfo.getObject("ownerText").getArray("runs")
|
||||
.getObject(0).getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint").getString("browseId");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (id == null) {
|
||||
try {
|
||||
id = videoInfo.getObject("shortBylineText").getArray("runs")
|
||||
.getObject(0).getObject("navigationEndpoint")
|
||||
.getObject("browseEndpoint").getString("browseId");
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
if (id == null || id.isEmpty()) {
|
||||
throw new IllegalArgumentException("is empty");
|
||||
}
|
||||
return YoutubeChannelLinkHandlerFactory.getInstance().getUrl(id);
|
||||
} catch (Exception e) {
|
||||
System.out.println(item.html());
|
||||
throw new ParsingException("Could not get uploader url", e);
|
||||
throw new ParsingException("Could not get uploader url");
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public String getTextualUploadDate() throws ParsingException {
|
||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (cachedUploadDate != null) {
|
||||
return cachedUploadDate;
|
||||
}
|
||||
|
||||
public String getTextualUploadDate() {
|
||||
try {
|
||||
if (isVideoReminder()) {
|
||||
final Calendar calendar = getDateFromReminder();
|
||||
if (calendar != null) {
|
||||
return cachedUploadDate = new SimpleDateFormat("yyyy-MM-dd HH:mm")
|
||||
.format(calendar.getTime());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
|
||||
if (meta == null) return "";
|
||||
|
||||
final Elements li = meta.select("li");
|
||||
if (li.isEmpty()) return "";
|
||||
|
||||
return cachedUploadDate = li.first().text();
|
||||
return videoInfo.getObject("publishedTimeText").getString("simpleText");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
// upload date is not always available, e.g. in playlists
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
@Override
|
||||
public DateWrapper getUploadDate() throws ParsingException {
|
||||
if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (isVideoReminder()) {
|
||||
return new DateWrapper(getDateFromReminder());
|
||||
}
|
||||
|
||||
String textualUploadDate = getTextualUploadDate();
|
||||
if (timeAgoParser != null && textualUploadDate != null && !textualUploadDate.isEmpty()) {
|
||||
return timeAgoParser.parse(textualUploadDate);
|
||||
} else {
|
||||
return null;
|
||||
try {
|
||||
return timeAgoParser.parse(textualUploadDate);
|
||||
} catch (ParsingException e) {
|
||||
throw new ParsingException("Could not get upload date", e);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getViewCount() throws ParsingException {
|
||||
String input;
|
||||
|
||||
final Element spanViewCount = item.select("span.view-count").first();
|
||||
if (spanViewCount != null) {
|
||||
input = spanViewCount.text();
|
||||
|
||||
} else if (getStreamType().equals(StreamType.LIVE_STREAM)) {
|
||||
Element meta = item.select("ul.yt-lockup-meta-info").first();
|
||||
if (meta == null) return 0;
|
||||
|
||||
final Elements li = meta.select("li");
|
||||
if (li.isEmpty()) return 0;
|
||||
|
||||
input = li.first().text();
|
||||
} else {
|
||||
try {
|
||||
Element meta = item.select("div.yt-lockup-meta").first();
|
||||
if (meta == null) return -1;
|
||||
|
||||
// This case can happen if google releases a special video
|
||||
if (meta.select("li").size() < 2) return -1;
|
||||
|
||||
input = meta.select("li").get(1).text();
|
||||
} catch (IndexOutOfBoundsException e) {
|
||||
throw new ParsingException("Could not parse yt-lockup-meta although available: " + getUrl(), e);
|
||||
}
|
||||
}
|
||||
|
||||
if (input == null) {
|
||||
throw new ParsingException("Input is null");
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(input));
|
||||
} catch (NumberFormatException e) {
|
||||
// if this happens the video probably has no views
|
||||
if (!input.isEmpty()) {
|
||||
return 0;
|
||||
if (videoInfo.getObject("topStandaloneBadge") != null || isPremium()) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not handle input: " + input, e);
|
||||
String viewCount;
|
||||
if (getStreamType() == StreamType.LIVE_STREAM) {
|
||||
viewCount = videoInfo.getObject("viewCountText")
|
||||
.getArray("runs").getObject(0).getString("text");
|
||||
} else {
|
||||
viewCount = videoInfo.getObject("viewCountText").getString("simpleText");
|
||||
}
|
||||
if (viewCount.equals("Recommended for you")) return -1;
|
||||
return Long.parseLong(Utils.removeNonDigitCharacters(viewCount));
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get view count", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
String url;
|
||||
Element te = item.select("div[class=\"yt-thumb video-thumb\"]").first()
|
||||
.select("img").first();
|
||||
url = te.attr("abs:src");
|
||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
||||
// to use that if we've caught such an item.
|
||||
if (url.contains(".gif")) {
|
||||
url = te.attr("abs:data-thumb");
|
||||
}
|
||||
return url;
|
||||
// TODO: Don't simply get the first item, but look at all thumbnails and their resolution
|
||||
return videoInfo.getObject("thumbnail").getArray("thumbnails")
|
||||
.getObject(0).getString("url");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean isVideoReminder() {
|
||||
return !item.select("span.yt-uix-livereminder").isEmpty();
|
||||
}
|
||||
|
||||
private Calendar getDateFromReminder() throws ParsingException {
|
||||
final Element timeFuture = item.select("span.yt-badge.localized-date").first();
|
||||
|
||||
if (timeFuture == null) {
|
||||
throw new ParsingException("Span timeFuture is null");
|
||||
}
|
||||
|
||||
final String timestamp = timeFuture.attr("data-timestamp");
|
||||
if (!timestamp.isEmpty()) {
|
||||
try {
|
||||
final Calendar calendar = Calendar.getInstance();
|
||||
calendar.setTime(new Date(Long.parseLong(timestamp) * 1000L));
|
||||
return calendar;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not parse = \"" + timestamp + "\"");
|
||||
private boolean isPremium() {
|
||||
try {
|
||||
JsonArray badges = videoInfo.getArray("badges");
|
||||
for (Object badge : badges) {
|
||||
if (((JsonObject) badge).getObject("metadataBadgeRenderer").getString("label").equals("Premium")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParsingException("Could not parse date from reminder element: \"" + timeFuture + "\"");
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic method that checks if the element contains any clues that it's a livestream item
|
||||
*/
|
||||
protected static boolean isLiveStream(Element item) {
|
||||
return !item.select("span[class*=\"yt-badge-live\"]").isEmpty()
|
||||
|| !item.select("span[class*=\"video-time-overlay-live\"]").isEmpty();
|
||||
} catch (Exception ignored) {}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
|
|||
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
|
||||
import org.schabi.newpipe.extractor.StreamingService;
|
||||
import org.schabi.newpipe.extractor.downloader.Downloader;
|
||||
import org.schabi.newpipe.extractor.downloader.Response;
|
||||
|
@ -35,12 +35,12 @@ import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingH
|
|||
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
|
||||
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.IOException;
|
||||
|
||||
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||
import javax.annotation.Nonnull;
|
||||
|
||||
private Document doc;
|
||||
public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
||||
private JsonObject initialData;
|
||||
|
||||
public YoutubeTrendingExtractor(StreamingService service,
|
||||
ListLinkHandler linkHandler,
|
||||
|
@ -54,7 +54,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
|||
"?gl=" + getExtractorContentCountry().getCountryCode();
|
||||
|
||||
final Response response = downloader.get(url, getExtractorLocalization());
|
||||
doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
|
||||
initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,99 +70,36 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
|
|||
@Nonnull
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
String name;
|
||||
try {
|
||||
Element a = doc.select("a[href*=\"/feed/trending\"]").first();
|
||||
Element span = a.select("span[class*=\"display-name\"]").first();
|
||||
Element nameSpan = span.select("span").first();
|
||||
return nameSpan.text();
|
||||
name = initialData.getObject("header").getObject("feedTabbedHeaderRenderer").getObject("title")
|
||||
.getArray("runs").getObject(0).getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Trending name", e);
|
||||
}
|
||||
if (name != null && !name.isEmpty()) {
|
||||
return name;
|
||||
}
|
||||
throw new ParsingException("Could not get Trending name");
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
@Override
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
|
||||
public InfoItemsPage<StreamInfoItem> getInitialPage() {
|
||||
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
|
||||
Elements uls = doc.select("ul[class*=\"expanded-shelf-content-list\"]");
|
||||
JsonArray firstPageElements = initialData.getObject("contents").getObject("twoColumnBrowseResultsRenderer")
|
||||
.getArray("tabs").getObject(0).getObject("tabRenderer").getObject("content")
|
||||
.getObject("sectionListRenderer").getArray("contents").getObject(0).getObject("itemSectionRenderer")
|
||||
.getArray("contents").getObject(0).getObject("shelfRenderer").getObject("content")
|
||||
.getObject("expandedShelfContentsRenderer").getArray("items");
|
||||
|
||||
final TimeAgoParser timeAgoParser = getTimeAgoParser();
|
||||
|
||||
for (Element ul : uls) {
|
||||
for (final Element li : ul.children()) {
|
||||
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
|
||||
@Override
|
||||
public String getUrl() throws ParsingException {
|
||||
try {
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.attr("abs:href");
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() throws ParsingException {
|
||||
try {
|
||||
Element dl = el.select("h3").first().select("a").first();
|
||||
return dl.text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get web page url for the video", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderUrl() throws ParsingException {
|
||||
try {
|
||||
String link = getUploaderLink().attr("abs:href");
|
||||
if (link.isEmpty()) {
|
||||
throw new IllegalArgumentException("is empty");
|
||||
}
|
||||
return link;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Uploader name");
|
||||
}
|
||||
}
|
||||
|
||||
private Element getUploaderLink() {
|
||||
// this url is not always in the form "/channel/..."
|
||||
// sometimes Youtube provides urls in the from "/user/..."
|
||||
Element uploaderEl = el.select("div[class*=\"yt-lockup-byline \"]").first();
|
||||
return uploaderEl.select("a").first();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getUploaderName() throws ParsingException {
|
||||
try {
|
||||
return getUploaderLink().text();
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get Uploader name");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getThumbnailUrl() throws ParsingException {
|
||||
try {
|
||||
String url;
|
||||
Element te = li.select("span[class=\"yt-thumb-simple\"]").first()
|
||||
.select("img").first();
|
||||
url = te.attr("abs:src");
|
||||
// Sometimes youtube sends links to gif files which somehow seem to not exist
|
||||
// anymore. Items with such gif also offer a secondary image source. So we are going
|
||||
// to use that if we've caught such an item.
|
||||
if (url.contains(".gif")) {
|
||||
url = te.attr("abs:data-thumb");
|
||||
}
|
||||
return url;
|
||||
} catch (Exception e) {
|
||||
throw new ParsingException("Could not get thumbnail url", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
for (Object ul : firstPageElements) {
|
||||
final JsonObject videoInfo = ((JsonObject) ul).getObject("videoRenderer");
|
||||
collector.commit(new YoutubeStreamInfoItemExtractor(videoInfo, timeAgoParser));
|
||||
}
|
||||
|
||||
return new InfoItemsPage<>(collector, getNextPageUrl());
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
|
||||
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonObject;
|
||||
import com.grack.nanojson.JsonParser;
|
||||
import com.grack.nanojson.JsonParserException;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.schabi.newpipe.extractor.downloader.Response;
|
||||
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
||||
import org.schabi.newpipe.extractor.utils.Parser;
|
||||
|
||||
import java.net.URL;
|
||||
import java.text.ParseException;
|
||||
|
@ -38,6 +43,8 @@ public class YoutubeParsingHelper {
|
|||
private YoutubeParsingHelper() {
|
||||
}
|
||||
|
||||
public static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
|
||||
|
||||
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
||||
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
||||
|
||||
|
@ -143,4 +150,68 @@ public class YoutubeParsingHelper {
|
|||
uploadDate.setTime(date);
|
||||
return uploadDate;
|
||||
}
|
||||
|
||||
public static JsonObject getInitialData(String html) throws ParsingException {
|
||||
try {
|
||||
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
|
||||
return JsonParser.object().from(initialData);
|
||||
} catch (JsonParserException | Parser.RegexException e) {
|
||||
throw new ParsingException("Could not get ytInitialData", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the client version from a page
|
||||
* @param initialData
|
||||
* @param html The page HTML
|
||||
* @return
|
||||
* @throws ParsingException
|
||||
*/
|
||||
public static String getClientVersion(JsonObject initialData, String html) throws ParsingException {
|
||||
if (initialData == null) initialData = getInitialData(html);
|
||||
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
|
||||
String shortClientVersion = null;
|
||||
|
||||
// try to get version from initial data first
|
||||
for (Object service : serviceTrackingParams) {
|
||||
JsonObject s = (JsonObject) service;
|
||||
if (s.getString("service").equals("CSI")) {
|
||||
JsonArray params = s.getArray("params");
|
||||
for (Object param: params) {
|
||||
JsonObject p = (JsonObject) param;
|
||||
String key = p.getString("key");
|
||||
if (key != null && key.equals("cver")) {
|
||||
return p.getString("value");
|
||||
}
|
||||
}
|
||||
} else if (s.getString("service").equals("ECATCHER")) {
|
||||
// fallback to get a shortened client version which does not contain the last do digits
|
||||
JsonArray params = s.getArray("params");
|
||||
for (Object param: params) {
|
||||
JsonObject p = (JsonObject) param;
|
||||
String key = p.getString("key");
|
||||
if (key != null && key.equals("client.version")) {
|
||||
shortClientVersion = p.getString("value");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String clientVersion;
|
||||
String[] patterns = {
|
||||
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
||||
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
||||
"client.version=([0-9\\.]+)"
|
||||
};
|
||||
for (String pattern: patterns) {
|
||||
try {
|
||||
clientVersion = Parser.matchGroup1(pattern, html);
|
||||
if (clientVersion != null && !clientVersion.isEmpty()) return clientVersion;
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
|
||||
if (shortClientVersion != null) return shortClientVersion;
|
||||
|
||||
throw new ParsingException("Could not get client version");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,13 +24,13 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
|
|||
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
|
||||
try {
|
||||
final String url = "https://www.youtube.com/results"
|
||||
+ "?q=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
||||
+ "?search_query=" + URLEncoder.encode(searchString, CHARSET_UTF_8);
|
||||
|
||||
if (contentFilters.size() > 0) {
|
||||
switch (contentFilters.get(0)) {
|
||||
case VIDEOS: return url + "&sp=EgIQAVAU";
|
||||
case CHANNELS: return url + "&sp=EgIQAlAU";
|
||||
case PLAYLISTS: return url + "&sp=EgIQA1AU";
|
||||
case VIDEOS: return url + "&sp=EgIQAQ%253D%253D";
|
||||
case CHANNELS: return url + "&sp=EgIQAg%253D%253D";
|
||||
case PLAYLISTS: return url + "&sp=EgIQAw%253D%253D";
|
||||
case ALL:
|
||||
default:
|
||||
}
|
||||
|
|
|
@ -10,6 +10,9 @@ import java.util.List;
|
|||
|
||||
public class Utils {
|
||||
|
||||
public static final String HTTP = "http://";
|
||||
public static final String HTTPS = "https://";
|
||||
|
||||
private Utils() {
|
||||
//no instance
|
||||
}
|
||||
|
@ -83,9 +86,6 @@ public class Utils {
|
|||
}
|
||||
}
|
||||
|
||||
private static final String HTTP = "http://";
|
||||
private static final String HTTPS = "https://";
|
||||
|
||||
public static String replaceHttpWithHttps(final String url) {
|
||||
if (url == null) return null;
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import java.util.Map;
|
|||
|
||||
public class DownloaderTestImpl extends Downloader {
|
||||
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
|
||||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Firefox/68.0";
|
||||
private static final String DEFAULT_HTTP_ACCEPT_LANGUAGE = "en";
|
||||
|
||||
private static DownloaderTestImpl instance = null;
|
||||
|
|
|
@ -170,7 +170,7 @@ public class YoutubeChannelExtractorTest {
|
|||
@Test
|
||||
public void testDescription() throws Exception {
|
||||
assertTrue("What it actually was: " + extractor.getDescription(),
|
||||
extractor.getDescription().contains("Our World is Amazing. Questions? Ideas? Tweet me:"));
|
||||
extractor.getDescription().contains("Our World is Amazing. \n\nQuestions? Ideas? Tweet me:"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -12,6 +12,8 @@ import org.schabi.newpipe.extractor.channel.ChannelInfoItem;
|
|||
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeSearchExtractor;
|
||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeSearchQueryHandlerFactory;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.Arrays.asList;
|
||||
import static org.junit.Assert.*;
|
||||
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
|
||||
|
@ -51,7 +53,12 @@ public class YoutubeSearchExtractorChannelOnlyTest extends YoutubeSearchExtracto
|
|||
|
||||
@Test
|
||||
public void testGetSecondPageUrl() throws Exception {
|
||||
assertEquals("https://www.youtube.com/results?q=pewdiepie&sp=EgIQAlAU&gl=GB&page=2", extractor.getNextPageUrl());
|
||||
// check that ctoken, continuation and itct are longer than 5 characters
|
||||
Pattern pattern = Pattern.compile(
|
||||
"https:\\/\\/www.youtube.com\\/results\\?search_query=pewdiepie&sp=EgIQAg%253D%253D&gl=GB&pbj=1"
|
||||
+ "&ctoken=[\\w%]{5,}?&continuation=[\\w%]{5,}?&itct=[\\w]{5,}?"
|
||||
);
|
||||
assertTrue(pattern.matcher(extractor.getNextPageUrl()).find());
|
||||
}
|
||||
|
||||
@Ignore
|
||||
|
|
|
@ -28,13 +28,13 @@ public class YoutubeSearchQHTest {
|
|||
|
||||
@Test
|
||||
public void testWithContentfilter() throws Exception {
|
||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAVAU", YouTube.getSearchQHFactory()
|
||||
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAQ%253D%253D", YouTube.getSearchQHFactory()
|
||||
.fromQuery("asdf", asList(new String[]{VIDEOS}), "").getUrl());
|
||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQAlAU", YouTube.getSearchQHFactory()
|
||||
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAg%253D%253D", YouTube.getSearchQHFactory()
|
||||
.fromQuery("asdf", asList(new String[]{CHANNELS}), "").getUrl());
|
||||
assertEquals("https://www.youtube.com/results?q=asdf&sp=EgIQA1AU", YouTube.getSearchQHFactory()
|
||||
assertEquals("https://www.youtube.com/results?search_query=asdf&sp=EgIQAw%253D%253D", YouTube.getSearchQHFactory()
|
||||
.fromQuery("asdf", asList(new String[]{PLAYLISTS}), "").getUrl());
|
||||
assertEquals("https://www.youtube.com/results?q=asdf", YouTube.getSearchQHFactory()
|
||||
assertEquals("https://www.youtube.com/results?search_query=asdf", YouTube.getSearchQHFactory()
|
||||
.fromQuery("asdf", asList(new String[]{"fjiijie"}), "").getUrl());
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue