Extract separate YoutubeThrottlingDecoder
This commit is contained in:
parent
a86a30103f
commit
80cf8b3acd
3 changed files with 151 additions and 51 deletions
|
@ -0,0 +1,101 @@
|
||||||
|
package org.schabi.newpipe.extractor.services.youtube;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import org.schabi.newpipe.extractor.NewPipe;
|
||||||
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
||||||
|
import org.schabi.newpipe.extractor.localization.Localization;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Javascript;
|
||||||
|
import org.schabi.newpipe.extractor.utils.Parser;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class YoutubeThrottlingDecoder {
|
||||||
|
|
||||||
|
private static final String HTTPS = "https:";
|
||||||
|
|
||||||
|
private final String functionName;
|
||||||
|
private final String function;
|
||||||
|
|
||||||
|
public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException {
|
||||||
|
String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization));
|
||||||
|
String playerJsCode = downloadPlayerJsCode(localization, playerJsUrl);
|
||||||
|
|
||||||
|
functionName = parseDecodeFunctionName(playerJsCode);
|
||||||
|
function = parseDecodeFunction(playerJsCode, functionName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException {
|
||||||
|
try {
|
||||||
|
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
|
||||||
|
final String embedPageContent = NewPipe.getDownloader()
|
||||||
|
.get(embedUrl, localization).responseBody();
|
||||||
|
|
||||||
|
try {
|
||||||
|
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
|
||||||
|
return Parser.matchGroup1(assetsPattern, embedPageContent)
|
||||||
|
.replace("\\", "").replace("\"", "");
|
||||||
|
} catch (final Parser.RegexException ex) {
|
||||||
|
// playerJsUrl is still available in the file, just somewhere else TODO
|
||||||
|
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
|
||||||
|
final Document doc = Jsoup.parse(embedPageContent);
|
||||||
|
final Elements elems = doc.select("script").attr("name", "player_ias/base");
|
||||||
|
for (final Element elem : elems) {
|
||||||
|
if (elem.attr("src").contains("base.js")) {
|
||||||
|
return elem.attr("src");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (final Exception i) {
|
||||||
|
throw new ParsingException("Embedded info did not provide YouTube player js url");
|
||||||
|
}
|
||||||
|
throw new ParsingException("Embedded info did not provide YouTube player js url");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String cleanPlayerJsUrl(String playerJsUrl) {
|
||||||
|
if (playerJsUrl.startsWith("//")) {
|
||||||
|
return HTTPS + playerJsUrl;
|
||||||
|
} else if (playerJsUrl.startsWith("/")) {
|
||||||
|
// sometimes https://www.youtube.com part has to be added manually
|
||||||
|
return HTTPS + "//www.youtube.com" + playerJsUrl;
|
||||||
|
} else {
|
||||||
|
return playerJsUrl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException {
|
||||||
|
try {
|
||||||
|
return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
|
||||||
|
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
|
||||||
|
return Parser.matchGroup1(pattern, playerJsCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
|
||||||
|
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
|
||||||
|
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String parseNParam(String url) throws Parser.RegexException {
|
||||||
|
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
|
||||||
|
return Parser.matchGroup1(nValuePattern, url);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String decodeNParam(String nParam) {
|
||||||
|
Javascript javascript = new Javascript();
|
||||||
|
return javascript.run(function, functionName, nParam);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String replaceNParam(String url, String newValue) {
|
||||||
|
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
|
||||||
|
return nValuePattern.matcher(url).replaceFirst(newValue);
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser;
|
||||||
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
|
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
|
||||||
|
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder;
|
||||||
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
|
||||||
import org.schabi.newpipe.extractor.stream.*;
|
import org.schabi.newpipe.extractor.stream.*;
|
||||||
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
import org.schabi.newpipe.extractor.utils.JsonUtils;
|
||||||
|
@ -39,7 +40,6 @@ import java.time.LocalDate;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*;
|
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*;
|
||||||
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
|
||||||
|
@ -80,13 +80,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private static String cachedDeobfuscationCode = null;
|
private static String cachedDeobfuscationCode = null;
|
||||||
@Nullable
|
|
||||||
private String playerJsUrl = null;
|
|
||||||
|
|
||||||
private JsonArray initialAjaxJson;
|
|
||||||
private JsonObject initialData;
|
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private final Map<String, String> videoInfoPage = new HashMap<>();
|
private final Map<String, String> videoInfoPage = new HashMap<>();
|
||||||
|
private JsonArray initialAjaxJson;
|
||||||
|
private JsonObject initialData;
|
||||||
private JsonObject playerResponse;
|
private JsonObject playerResponse;
|
||||||
private JsonObject videoPrimaryInfoRenderer;
|
private JsonObject videoPrimaryInfoRenderer;
|
||||||
private JsonObject videoSecondaryInfoRenderer;
|
private JsonObject videoSecondaryInfoRenderer;
|
||||||
|
@ -526,32 +523,18 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
public List<VideoStream> getVideoStreams() throws ExtractionException {
|
public List<VideoStream> getVideoStreams() throws ExtractionException {
|
||||||
assertPageFetched();
|
assertPageFetched();
|
||||||
final List<VideoStream> videoStreams = new ArrayList<>();
|
final List<VideoStream> videoStreams = new ArrayList<>();
|
||||||
|
YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
getDeobfuscationCode();
|
|
||||||
final String playerCode = NewPipe.getDownloader()
|
|
||||||
.get(playerJsUrl, getExtractorLocalization()).responseBody();
|
|
||||||
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
|
|
||||||
String functionName = Parser.matchGroup1(pattern, playerCode);
|
|
||||||
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
|
|
||||||
String function = "function " + functionName + Parser.matchGroup1(functionPattern, playerCode);
|
|
||||||
|
|
||||||
Context context = Context.enter();
|
|
||||||
context.setOptimizationLevel(-1);
|
|
||||||
ScriptableObject scope = context.initSafeStandardObjects();
|
|
||||||
|
|
||||||
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
|
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
|
||||||
final ItagItem itag = entry.getValue();
|
final ItagItem itag = entry.getValue();
|
||||||
final String url = entry.getKey();
|
final String url = entry.getKey();
|
||||||
Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)");
|
|
||||||
String nValue = Parser.matchGroup1(nValuePattern, url);
|
|
||||||
|
|
||||||
context.evaluateString(scope, function, functionName, 1, null);
|
String oldNParam = throttlingDecoder.parseNParam(url);
|
||||||
final Function jsFunction = (Function) scope.get(functionName, scope);
|
String newNParam = throttlingDecoder.decodeNParam(oldNParam);
|
||||||
Object result = jsFunction.call(context, scope, scope, new Object[]{nValue});
|
String newUrl = throttlingDecoder.replaceNParam(url, newNParam);
|
||||||
String newNValue = Objects.toString(result, nValue);
|
|
||||||
String newUrl = nValuePattern.matcher(url).replaceFirst(newNValue);
|
System.out.println("aaaaaa " + oldNParam + " - " + newNParam);
|
||||||
System.out.println("aaaaaa " + nValue + " - " + newNValue);
|
|
||||||
final VideoStream videoStream = new VideoStream(newUrl, false, itag);
|
final VideoStream videoStream = new VideoStream(newUrl, false, itag);
|
||||||
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
|
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
|
||||||
videoStreams.add(videoStream);
|
videoStreams.add(videoStream);
|
||||||
|
@ -820,8 +803,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nonnull
|
private String extractPlayerJsUrl() throws ParsingException {
|
||||||
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
|
|
||||||
try {
|
try {
|
||||||
final String embedUrl = "https://www.youtube.com/embed/" + getId();
|
final String embedUrl = "https://www.youtube.com/embed/" + getId();
|
||||||
final String embedPageContent = NewPipe.getDownloader()
|
final String embedPageContent = NewPipe.getDownloader()
|
||||||
|
@ -829,7 +811,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
|
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
|
||||||
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
|
return Parser.matchGroup1(assetsPattern, embedPageContent)
|
||||||
.replace("\\", "").replace("\"", "");
|
.replace("\\", "").replace("\"", "");
|
||||||
} catch (final Parser.RegexException ex) {
|
} catch (final Parser.RegexException ex) {
|
||||||
// playerJsUrl is still available in the file, just somewhere else TODO
|
// playerJsUrl is still available in the file, just somewhere else TODO
|
||||||
|
@ -838,17 +820,25 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
final Elements elems = doc.select("script").attr("name", "player_ias/base");
|
final Elements elems = doc.select("script").attr("name", "player_ias/base");
|
||||||
for (final Element elem : elems) {
|
for (final Element elem : elems) {
|
||||||
if (elem.attr("src").contains("base.js")) {
|
if (elem.attr("src").contains("base.js")) {
|
||||||
playerJsUrl = elem.attr("src");
|
return elem.attr("src");
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get embed sts
|
|
||||||
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
|
|
||||||
} catch (final Exception i) {
|
} catch (final Exception i) {
|
||||||
// if it fails we simply reply with no sts as then it does not seem to be necessary
|
throw new ParsingException("Embedded info did not provide YouTube player js url");
|
||||||
return "";
|
}
|
||||||
|
throw new ParsingException("Embedded info did not provide YouTube player js url");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String cleanPlayerJsUrl(String playerJsUrl) {
|
||||||
|
if (playerJsUrl.startsWith("//")) {
|
||||||
|
return HTTPS + playerJsUrl;
|
||||||
|
} else if (playerJsUrl.startsWith("/")) {
|
||||||
|
// sometimes https://www.youtube.com part has to be added manually
|
||||||
|
return HTTPS + "//www.youtube.com" + playerJsUrl;
|
||||||
|
} else {
|
||||||
|
return playerJsUrl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -899,22 +889,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
|
||||||
@Nonnull
|
@Nonnull
|
||||||
private String getDeobfuscationCode() throws ParsingException {
|
private String getDeobfuscationCode() throws ParsingException {
|
||||||
if (cachedDeobfuscationCode == null) {
|
if (cachedDeobfuscationCode == null) {
|
||||||
if (playerJsUrl == null) {
|
String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl());
|
||||||
// the currentPlayerJsUrl was not found in any page fetched so far and there is
|
|
||||||
// nothing cached, so try fetching embedded info
|
|
||||||
getEmbeddedInfoStsAndStorePlayerJsUrl();
|
|
||||||
if (playerJsUrl == null) {
|
|
||||||
throw new ParsingException(
|
|
||||||
"Embedded info did not provide YouTube player js url");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (playerJsUrl.startsWith("//")) {
|
|
||||||
playerJsUrl = HTTPS + playerJsUrl;
|
|
||||||
} else if (playerJsUrl.startsWith("/")) {
|
|
||||||
// sometimes https://www.youtube.com part has to be added manually
|
|
||||||
playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
|
|
||||||
}
|
|
||||||
|
|
||||||
cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
|
cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
package org.schabi.newpipe.extractor.utils;
|
||||||
|
|
||||||
|
import org.mozilla.javascript.Context;
|
||||||
|
import org.mozilla.javascript.Function;
|
||||||
|
import org.mozilla.javascript.ScriptableObject;
|
||||||
|
|
||||||
|
public class Javascript {
|
||||||
|
|
||||||
|
public String run(String function, String functionName, String... parameters) {
|
||||||
|
try {
|
||||||
|
Context context = Context.enter();
|
||||||
|
context.setOptimizationLevel(-1);
|
||||||
|
ScriptableObject scope = context.initSafeStandardObjects();
|
||||||
|
|
||||||
|
context.evaluateString(scope, function, functionName, 1, null);
|
||||||
|
Function jsFunction = (Function) scope.get(functionName, scope);
|
||||||
|
Object result = jsFunction.call(context, scope, scope, parameters);
|
||||||
|
return result.toString();
|
||||||
|
} finally {
|
||||||
|
Context.exit();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in a new issue