Move getInitialData() method to YouTubeParsingHelper

Rename ytInitialData to initialData
This commit is contained in:
TobiGr 2020-02-22 23:51:02 +01:00
parent 38aabc6aca
commit 5816202cc7
6 changed files with 35 additions and 63 deletions

View File

@ -52,7 +52,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000"; private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private Document doc; private Document doc;
private JsonObject ytInitialData; private JsonObject initialData;
public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) { public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -63,17 +63,9 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS; String channelUrl = super.getUrl() + CHANNEL_URL_PARAMETERS;
final Response response = downloader.get(channelUrl, getExtractorLocalization()); final Response response = downloader.get(channelUrl, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response); doc = YoutubeParsingHelper.parseAndCheckPage(channelUrl, response);
ytInitialData = getInitialData(); initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
} }
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
@Override @Override
public String getNextPageUrl() throws ExtractionException { public String getNextPageUrl() throws ExtractionException {
@ -97,7 +89,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
return doc.select("meta[property=\"og:url\"]").first().attr("content").replace(CHANNEL_URL_BASE, ""); return doc.select("meta[property=\"og:url\"]").first().attr("content").replace(CHANNEL_URL_BASE, "");
} catch (Exception ignored) {} } catch (Exception ignored) {}
try { try {
return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId"); return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("navigationEndpoint").getObject("browseEndpoint").getString("browseId");
} catch (Exception ignored) {} } catch (Exception ignored) {}
// fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO) // fallback method; does not work with channels that have no "Subscribe" button (e.g. EminemVEVO)
@ -124,7 +116,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getAvatarUrl() throws ParsingException { public String getAvatarUrl() throws ParsingException {
try { try {
return ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url"); return initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("avatar").getArray("thumbnails").getObject(0).getString("url");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get avatar", e); throw new ParsingException("Could not get avatar", e);
} }
@ -133,7 +125,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getBannerUrl() throws ParsingException { public String getBannerUrl() throws ParsingException {
try { try {
String url = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url"); String url = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("banner").getArray("thumbnails").getObject(0).getString("url");
if (url.contains("s.ytimg.com") || url.contains("default_banner")) { if (url.contains("s.ytimg.com") || url.contains("default_banner")) {
return null; return null;
} }
@ -165,7 +157,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public long getSubscriberCount() throws ParsingException { public long getSubscriberCount() throws ParsingException {
final JsonObject subscriberInfo = ytInitialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText"); final JsonObject subscriberInfo = initialData.getObject("header").getObject("c4TabbedHeaderRenderer").getObject("subscriberCountText");
if (subscriberInfo != null) { if (subscriberInfo != null) {
try { try {
@ -182,7 +174,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override @Override
public String getDescription() throws ParsingException { public String getDescription() throws ParsingException {
try { try {
return ytInitialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description"); return initialData.getObject("metadata").getObject("channelMetadataRenderer").getString("description");
} catch (Exception e) { } catch (Exception e) {
throw new ParsingException("Could not get channel description", e); throw new ParsingException("Could not get channel description", e);
} }

View File

@ -47,21 +47,12 @@ public class YoutubePlaylistExtractor extends PlaylistExtractor {
final String url = getUrl(); final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = getInitialData(); initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
uploaderInfo = getUploaderInfo(); uploaderInfo = getUploaderInfo();
playlistInfo = getPlaylistInfo(); playlistInfo = getPlaylistInfo();
playlistVideos = getPlaylistVideos(); playlistVideos = getPlaylistVideos();
} }
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
private JsonObject getUploaderInfo() throws ParsingException { private JsonObject getUploaderInfo() throws ParsingException {
JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items"); JsonArray items = initialData.getObject("sidebar").getObject("playlistSidebarRenderer").getArray("items");
try { try {

View File

@ -51,7 +51,7 @@ import javax.annotation.Nonnull;
public class YoutubeSearchExtractor extends SearchExtractor { public class YoutubeSearchExtractor extends SearchExtractor {
private Document doc; private Document doc;
private JsonObject ytInitialData; private JsonObject initialData;
public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) { public YoutubeSearchExtractor(StreamingService service, SearchQueryHandler linkHandler) {
super(service, linkHandler); super(service, linkHandler);
@ -62,7 +62,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
final String url = getUrl(); final String url = getUrl();
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
ytInitialData = getInitialData(); initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
} }
@Nonnull @Nonnull
@ -119,7 +119,7 @@ public class YoutubeSearchExtractor extends SearchExtractor {
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
JsonArray list = ytInitialData.getObject("contents").getObject("twoColumnSearchResultsRenderer") JsonArray list = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents") .getObject("primaryContents").getObject("sectionListRenderer").getArray("contents")
.getObject(0).getObject("itemSectionRenderer").getArray("contents"); .getObject(0).getObject("itemSectionRenderer").getArray("contents");
@ -138,12 +138,4 @@ public class YoutubeSearchExtractor extends SearchExtractor {
return collector; return collector;
} }
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
} }

View File

@ -104,7 +104,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull @Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>(); private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonObject playerResponse; private JsonObject playerResponse;
private JsonObject ytInitialData; private JsonObject initialData;
@Nonnull @Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>(); private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@ -339,7 +339,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
if (getStreamType().equals(StreamType.LIVE_STREAM)) { if (getStreamType().equals(StreamType.LIVE_STREAM)) {
// The array index is variable, therefore we loop throw the complete array. // The array index is variable, therefore we loop throw the complete array.
// videoPrimaryInfoRenderer is often stored at index 1 // videoPrimaryInfoRenderer is often stored at index 1
JsonArray contents = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") JsonArray contents = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("results").getObject("results").getArray("contents"); .getObject("results").getObject("results").getArray("contents");
for (Object c : contents) { for (Object c : contents) {
try { try {
@ -421,7 +421,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String likesString = ""; String likesString = "";
try { try {
try { try {
likesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0]; likesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[0];
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -444,7 +444,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
Element button = doc.select("button.like-button-renderer-dislike-button").first(); Element button = doc.select("button.like-button-renderer-dislike-button").first();
try { try {
dislikesString = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1]; dislikesString = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results").getObject("results").getArray("contents").getObject(0).getObject("videoPrimaryInfoRenderer").getObject("sentimentBar").getObject("sentimentBarRenderer").getString("tooltip").split("/")[1];
} catch (NullPointerException e) { } catch (NullPointerException e) {
//if this kicks in our button has no content and therefore ratings must be disabled //if this kicks in our button has no content and therefore ratings must be disabled
if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) { if (playerResponse.getObject("videoDetails").getBoolean("allowRatings")) {
@ -507,7 +507,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
String uploaderAvatarUrl = null; String uploaderAvatarUrl = null;
try { try {
uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults") uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("secondaryResults")
.getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer") .getObject("secondaryResults").getArray("results").getObject(0).getObject("compactAutoplayRenderer")
.getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail") .getArray("contents").getObject(0).getObject("compactVideoRenderer").getObject("channelThumbnail")
.getArray("thumbnails").getObject(0).getString("url"); .getArray("thumbnails").getObject(0).getString("url");
@ -517,7 +517,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} catch (Exception ignored) {} } catch (Exception ignored) {}
try { try {
uploaderAvatarUrl = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results") uploaderAvatarUrl = initialData.getObject("contents").getObject("twoColumnWatchNextResults").getObject("results")
.getObject("results").getArray("contents").getObject(1).getObject("videoSecondaryInfoRenderer") .getObject("results").getArray("contents").getObject(1).getObject("videoSecondaryInfoRenderer")
.getObject("owner").getObject("videoOwnerRenderer").getObject("thumbnail").getArray("thumbnails") .getObject("owner").getObject("videoOwnerRenderer").getObject("thumbnail").getArray("thumbnails")
.getObject(0).getString("url"); .getObject(0).getString("url");
@ -670,7 +670,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public StreamInfoItem getNextStream() throws IOException, ExtractionException { public StreamInfoItem getNextStream() throws IOException, ExtractionException {
assertPageFetched(); assertPageFetched();
try { try {
final JsonObject videoInfo = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") final JsonObject videoInfo = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results") .getObject("secondaryResults").getObject("secondaryResults").getArray("results")
.getObject(0).getObject("compactAutoplayRenderer").getArray("contents") .getObject(0).getObject("compactAutoplayRenderer").getArray("contents")
.getObject(0).getObject("compactVideoRenderer"); .getObject(0).getObject("compactVideoRenderer");
@ -690,7 +690,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
assertPageFetched(); assertPageFetched();
try { try {
StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId()); StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
JsonArray results = ytInitialData.getObject("contents").getObject("twoColumnWatchNextResults") JsonArray results = initialData.getObject("contents").getObject("twoColumnWatchNextResults")
.getObject("secondaryResults").getObject("secondaryResults").getArray("results"); .getObject("secondaryResults").getObject("secondaryResults").getArray("results");
final TimeAgoParser timeAgoParser = getTimeAgoParser(); final TimeAgoParser timeAgoParser = getTimeAgoParser();
@ -778,7 +778,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
isAgeRestricted = false; isAgeRestricted = false;
} }
playerResponse = getPlayerResponse(); playerResponse = getPlayerResponse();
ytInitialData = getInitialData(); initialData = YoutubeParsingHelper.getInitialData(pageHtml);
if (decryptionCode.isEmpty()) { if (decryptionCode.isEmpty()) {
decryptionCode = loadDecryptionCode(playerUrl); decryptionCode = loadDecryptionCode(playerUrl);
@ -852,14 +852,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
@Nonnull @Nonnull
private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException { private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaException {

View File

@ -61,16 +61,7 @@ public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {
final Response response = downloader.get(url, getExtractorLocalization()); final Response response = downloader.get(url, getExtractorLocalization());
doc = YoutubeParsingHelper.parseAndCheckPage(url, response); doc = YoutubeParsingHelper.parseAndCheckPage(url, response);
initialData = getInitialData(); initialData = YoutubeParsingHelper.getInitialData(response.responseBody());
}
private JsonObject getInitialData() throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", doc.toString());
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
} }
@Override @Override

View File

@ -1,11 +1,15 @@
package org.schabi.newpipe.extractor.services.youtube.linkHandler; package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.schabi.newpipe.extractor.downloader.Response; import org.schabi.newpipe.extractor.downloader.Response;
import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException; import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.utils.Parser;
import java.net.URL; import java.net.URL;
import java.text.ParseException; import java.text.ParseException;
@ -143,4 +147,14 @@ public class YoutubeParsingHelper {
uploadDate.setTime(date); uploadDate.setTime(date);
return uploadDate; return uploadDate;
} }
public static JsonObject getInitialData(String html) throws ParsingException {
try {
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
} catch (JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
}
} }