Extract separate YoutubeThrottlingDecoder

This commit is contained in:
XiangRongLin 2021-07-12 20:06:19 +02:00
parent a86a30103f
commit 80cf8b3acd
3 changed files with 151 additions and 51 deletions

View file

@ -0,0 +1,101 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.Javascript;
import org.schabi.newpipe.extractor.utils.Parser;
import java.util.regex.Pattern;
public class YoutubeThrottlingDecoder {
private static final String HTTPS = "https:";
private final String functionName;
private final String function;
public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException {
String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization));
String playerJsCode = downloadPlayerJsCode(localization, playerJsUrl);
functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}
private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException {
try {
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, localization).responseBody();
try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
return Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
return elem.attr("src");
}
}
}
} catch (final Exception i) {
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
private String cleanPlayerJsUrl(String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
return HTTPS + "//www.youtube.com" + playerJsUrl;
} else {
return playerJsUrl;
}
}
private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException {
try {
return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody();
} catch (Exception e) {
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
}
}
private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
return Parser.matchGroup1(pattern, playerJsCode);
}
private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
}
public String parseNParam(String url) throws Parser.RegexException {
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
return Parser.matchGroup1(nValuePattern, url);
}
public String decodeNParam(String nParam) {
Javascript javascript = new Javascript();
return javascript.run(function, functionName, nParam);
}
public String replaceNParam(String url, String newValue) {
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
return nValuePattern.matcher(url).replaceFirst(newValue);
}
}

View file

@ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem; import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*; import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.JsonUtils;
@ -39,7 +40,6 @@ import java.time.LocalDate;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*;
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING; import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
@ -80,13 +80,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nullable @Nullable
private static String cachedDeobfuscationCode = null; private static String cachedDeobfuscationCode = null;
@Nullable
private String playerJsUrl = null;
private JsonArray initialAjaxJson;
private JsonObject initialData;
@Nonnull @Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>(); private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonArray initialAjaxJson;
private JsonObject initialData;
private JsonObject playerResponse; private JsonObject playerResponse;
private JsonObject videoPrimaryInfoRenderer; private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer; private JsonObject videoSecondaryInfoRenderer;
@ -526,32 +523,18 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public List<VideoStream> getVideoStreams() throws ExtractionException { public List<VideoStream> getVideoStreams() throws ExtractionException {
assertPageFetched(); assertPageFetched();
final List<VideoStream> videoStreams = new ArrayList<>(); final List<VideoStream> videoStreams = new ArrayList<>();
YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization());
try { try {
getDeobfuscationCode();
final String playerCode = NewPipe.getDownloader()
.get(playerJsUrl, getExtractorLocalization()).responseBody();
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
String functionName = Parser.matchGroup1(pattern, playerCode);
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
String function = "function " + functionName + Parser.matchGroup1(functionPattern, playerCode);
Context context = Context.enter();
context.setOptimizationLevel(-1);
ScriptableObject scope = context.initSafeStandardObjects();
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) { for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
final ItagItem itag = entry.getValue(); final ItagItem itag = entry.getValue();
final String url = entry.getKey(); final String url = entry.getKey();
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
String nValue = Parser.matchGroup1(nValuePattern, url);
context.evaluateString(scope, function, functionName, 1, null); String oldNParam = throttlingDecoder.parseNParam(url);
final Function jsFunction = (Function) scope.get(functionName, scope); String newNParam = throttlingDecoder.decodeNParam(oldNParam);
Object result = jsFunction.call(context, scope, scope, new Object[]{nValue}); String newUrl = throttlingDecoder.replaceNParam(url, newNParam);
String newNValue = Objects.toString(result, nValue);
String newUrl = nValuePattern.matcher(url).replaceFirst(newNValue); System.out.println("aaaaaa " + oldNParam + " - " + newNParam);
System.out.println("aaaaaa " + nValue + " - " + newNValue);
final VideoStream videoStream = new VideoStream(newUrl, false, itag); final VideoStream videoStream = new VideoStream(newUrl, false, itag);
if (!Stream.containSimilarStream(videoStream, videoStreams)) { if (!Stream.containSimilarStream(videoStream, videoStreams)) {
videoStreams.add(videoStream); videoStreams.add(videoStream);
@ -820,8 +803,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
} }
} }
@Nonnull private String extractPlayerJsUrl() throws ParsingException {
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
try { try {
final String embedUrl = "https://www.youtube.com/embed/" + getId(); final String embedUrl = "https://www.youtube.com/embed/" + getId();
final String embedPageContent = NewPipe.getDownloader() final String embedPageContent = NewPipe.getDownloader()
@ -829,7 +811,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
try { try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent) return Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", ""); .replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) { } catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO // playerJsUrl is still available in the file, just somewhere else TODO
@ -838,17 +820,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
final Elements elems = doc.select("script").attr("name", "player_ias/base"); final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) { for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) { if (elem.attr("src").contains("base.js")) {
playerJsUrl = elem.attr("src"); return elem.attr("src");
break;
} }
} }
} }
// Get embed sts
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
} catch (final Exception i) { } catch (final Exception i) {
// if it fails we simply reply with no sts as then it does not seem to be necessary throw new ParsingException("Embedded info did not provide YouTube player js url");
return ""; }
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
private String cleanPlayerJsUrl(String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
return HTTPS + "//www.youtube.com" + playerJsUrl;
} else {
return playerJsUrl;
} }
} }
@ -899,22 +889,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull @Nonnull
private String getDeobfuscationCode() throws ParsingException { private String getDeobfuscationCode() throws ParsingException {
if (cachedDeobfuscationCode == null) { if (cachedDeobfuscationCode == null) {
if (playerJsUrl == null) { String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl());
// the currentPlayerJsUrl was not found in any page fetched so far and there is
// nothing cached, so try fetching embedded info
getEmbeddedInfoStsAndStorePlayerJsUrl();
if (playerJsUrl == null) {
throw new ParsingException(
"Embedded info did not provide YouTube player js url");
}
}
if (playerJsUrl.startsWith("//")) {
playerJsUrl = HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
}
cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl); cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
} }

View file

@ -0,0 +1,24 @@
package org.schabi.newpipe.extractor.utils;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
public class Javascript {
public String run(String function, String functionName, String... parameters) {
try {
Context context = Context.enter();
context.setOptimizationLevel(-1);
ScriptableObject scope = context.initSafeStandardObjects();
context.evaluateString(scope, function, functionName, 1, null);
Function jsFunction = (Function) scope.get(functionName, scope);
Object result = jsFunction.call(context, scope, scope, parameters);
return result.toString();
} finally {
Context.exit();
}
}
}