Merge pull request #441 from B0pol/fix-three-attempts

Support new YouTube JSON scheme
This commit is contained in:
Tobias Groza 2020-10-28 07:24:31 +01:00 committed by GitHub
commit 354426fe5d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3,7 +3,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject; import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser; import com.grack.nanojson.JsonParser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context; import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function; import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject; import org.mozilla.javascript.ScriptableObject;
@ -36,6 +39,8 @@ import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Parser; import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils; import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException; import java.io.IOException;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -49,9 +54,6 @@ import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
@ -102,6 +104,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject videoPrimaryInfoRenderer; private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer; private JsonObject videoSecondaryInfoRenderer;
private int ageLimit; private int ageLimit;
private boolean newJsonScheme;
@Nonnull @Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>(); private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@ -156,12 +159,14 @@ public class YoutubeStreamExtractor extends StreamExtractor {
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en")); TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
Calendar parsedTime = timeAgoParser.parse(time).date(); Calendar parsedTime = timeAgoParser.parse(time).date();
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime()); return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
} catch (Exception ignored) {} } catch (Exception ignored) {
}
try { // Premiered Feb 21, 2020 try { // Premiered Feb 21, 2020
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time); Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime()); return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
} catch (Exception ignored) {} } catch (Exception ignored) {
}
} }
try { try {
@ -169,7 +174,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse( Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText"))); getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")));
return new SimpleDateFormat("yyyy-MM-dd").format(d); return new SimpleDateFormat("yyyy-MM-dd").format(d);
} catch (Exception ignored) {} } catch (Exception ignored) {
}
throw new ParsingException("Could not get upload date"); throw new ParsingException("Could not get upload date");
} }
@ -360,7 +366,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
uploaderName = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("owner") uploaderName = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("owner")
.getObject("videoOwnerRenderer").getObject("title")); .getObject("videoOwnerRenderer").getObject("title"));
} catch (ParsingException ignored) { } } catch (ParsingException ignored) {
}
if (isNullOrEmpty(uploaderName)) { if (isNullOrEmpty(uploaderName)) {
uploaderName = playerResponse.getObject("videoDetails").getString("author"); uploaderName = playerResponse.getObject("videoDetails").getString("author");
@ -650,27 +657,23 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} else { } else {
ageLimit = NO_AGE_LIMIT; ageLimit = NO_AGE_LIMIT;
JsonObject playerConfig; JsonObject playerConfig;
// sometimes at random YouTube does not provide the player config,
// so just retry the same request three times
int attempts = 2;
while (true) {
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
if (playerConfig != null) {
break;
}
if (attempts <= 0) {
throw new ParsingException(
"YouTube did not provide player config even after three attempts");
}
initialAjaxJson = getJsonResponse(url, getExtractorLocalization());
--attempts;
}
initialData = initialAjaxJson.getObject(3).getObject("response"); initialData = initialAjaxJson.getObject(3).getObject("response");
playerArgs = getPlayerArgs(playerConfig); // sometimes at random YouTube does not provide the player config
playerUrl = getPlayerUrl(playerConfig); playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
if (playerConfig == null) {
newJsonScheme = true;
final EmbeddedInfo info = getEmbeddedInfo();
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
playerUrl = info.url;
} else {
playerArgs = getPlayerArgs(playerConfig);
playerUrl = getPlayerUrl(playerConfig);
}
} }
playerResponse = getPlayerResponse(); playerResponse = getPlayerResponse();
@ -718,6 +721,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject getPlayerResponse() throws ParsingException { private JsonObject getPlayerResponse() throws ParsingException {
try { try {
String playerResponseStr; String playerResponseStr;
if (newJsonScheme) {
return initialAjaxJson.getObject(2).getObject("playerResponse");
}
if (playerArgs != null) { if (playerArgs != null) {
playerResponseStr = playerArgs.getString("player_response"); playerResponseStr = playerArgs.getString("player_response");
} else { } else {
@ -737,11 +744,30 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final String embedPageContent = downloader.get(embedUrl, getExtractorLocalization()).responseBody(); final String embedPageContent = downloader.get(embedUrl, getExtractorLocalization()).responseBody();
// Get player url // Get player url
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; String playerUrl = null;
String playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent) try {
.replace("\\", "").replace("\"", ""); final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (Parser.RegexException ex) {
// playerUrl is still available in the file, just somewhere else
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
playerUrl = elem.attr("src");
}
}
if (playerUrl == null) {
throw new ParsingException("Could not get playerUrl");
}
}
if (playerUrl.startsWith("//")) { if (playerUrl.startsWith("//")) {
playerUrl = HTTPS + playerUrl; playerUrl = HTTPS + playerUrl;
} else if (playerUrl.startsWith("/")) {
playerUrl = HTTPS + "//youtube.com" + playerUrl;
} }
try { try {
@ -988,7 +1014,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
urlAndItags.put(streamUrl, itagItem); urlAndItags.put(streamUrl, itagItem);
} }
} catch (UnsupportedEncodingException ignored) {} } catch (UnsupportedEncodingException ignored) {
}
} }
} }