Merge pull request #441 from B0pol/fix-three-attempts

Support new YouTube JSON scheme
This commit is contained in:
Tobias Groza 2020-10-28 07:24:31 +01:00 committed by GitHub
commit 354426fe5d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 58 additions and 31 deletions

View File

@ -3,7 +3,10 @@ package org.schabi.newpipe.extractor.services.youtube.extractors;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -36,6 +39,8 @@ import org.schabi.newpipe.extractor.stream.VideoStream;
import org.schabi.newpipe.extractor.utils.Parser;
import org.schabi.newpipe.extractor.utils.Utils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.text.SimpleDateFormat;
@ -49,9 +54,6 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
@ -102,6 +104,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer;
private int ageLimit;
private boolean newJsonScheme;
@Nonnull
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@ -156,12 +159,14 @@ public class YoutubeStreamExtractor extends StreamExtractor {
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
Calendar parsedTime = timeAgoParser.parse(time).date();
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
} catch (Exception ignored) {}
} catch (Exception ignored) {
}
try { // Premiered Feb 21, 2020
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
} catch (Exception ignored) {}
} catch (Exception ignored) {
}
}
try {
@ -169,7 +174,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")));
return new SimpleDateFormat("yyyy-MM-dd").format(d);
} catch (Exception ignored) {}
} catch (Exception ignored) {
}
throw new ParsingException("Could not get upload date");
}
@ -360,7 +366,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try {
uploaderName = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("owner")
.getObject("videoOwnerRenderer").getObject("title"));
} catch (ParsingException ignored) { }
} catch (ParsingException ignored) {
}
if (isNullOrEmpty(uploaderName)) {
uploaderName = playerResponse.getObject("videoDetails").getString("author");
@ -650,27 +657,23 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} else {
ageLimit = NO_AGE_LIMIT;
JsonObject playerConfig;
// sometimes at random YouTube does not provide the player config,
// so just retry the same request three times
int attempts = 2;
while (true) {
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
if (playerConfig != null) {
break;
}
if (attempts <= 0) {
throw new ParsingException(
"YouTube did not provide player config even after three attempts");
}
initialAjaxJson = getJsonResponse(url, getExtractorLocalization());
--attempts;
}
initialData = initialAjaxJson.getObject(3).getObject("response");
playerArgs = getPlayerArgs(playerConfig);
playerUrl = getPlayerUrl(playerConfig);
// sometimes at random YouTube does not provide the player config
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
if (playerConfig == null) {
newJsonScheme = true;
final EmbeddedInfo info = getEmbeddedInfo();
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
playerUrl = info.url;
} else {
playerArgs = getPlayerArgs(playerConfig);
playerUrl = getPlayerUrl(playerConfig);
}
}
playerResponse = getPlayerResponse();
@ -718,6 +721,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
private JsonObject getPlayerResponse() throws ParsingException {
try {
String playerResponseStr;
if (newJsonScheme) {
return initialAjaxJson.getObject(2).getObject("playerResponse");
}
if (playerArgs != null) {
playerResponseStr = playerArgs.getString("player_response");
} else {
@ -737,11 +744,30 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final String embedPageContent = downloader.get(embedUrl, getExtractorLocalization()).responseBody();
// Get player url
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
String playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
String playerUrl = null;
try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (Parser.RegexException ex) {
// playerUrl is still available in the file, just somewhere else
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
playerUrl = elem.attr("src");
}
}
if (playerUrl == null) {
throw new ParsingException("Could not get playerUrl");
}
}
if (playerUrl.startsWith("//")) {
playerUrl = HTTPS + playerUrl;
} else if (playerUrl.startsWith("/")) {
playerUrl = HTTPS + "//youtube.com" + playerUrl;
}
try {
@ -988,7 +1014,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
urlAndItags.put(streamUrl, itagItem);
}
} catch (UnsupportedEncodingException ignored) {}
} catch (UnsupportedEncodingException ignored) {
}
}
}