Merge pull request #683 from XiangRongLin/yt_throttling

[YouTube] Fix buffering by decoding n parameter of stream urls
This commit is contained in:
Tobi 2021-07-28 18:01:57 +02:00 committed by GitHub
commit 394c02ad06
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 375 additions and 67 deletions

View file

@ -0,0 +1,112 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
/**
* YouTube restricts streaming their media in multiple ways by requiring clients to apply a cipher
* function on parameters of requests.
* The cipher function is sent alongside as a JavaScript function.
* <p>
* This class handling fetching the JavaScript file in order to allow other classes to extract the
* needed functions.
*/
public class YoutubeJavaScriptExtractor {
private static final String HTTPS = "https:";
private static String cachedJavaScriptCode;
private YoutubeJavaScriptExtractor() {
}
/**
* Extracts the JavaScript file. The result is cached, so subsequent calls use the result of
* previous calls.
*
* @param videoId Does not influence the result, but a valid video id may help in the chance
* that YouTube tracks it.
* @return The whole JavaScript file as a string.
* @throws ParsingException If the extraction failed.
*/
@Nonnull
public static String extractJavaScriptCode(final String videoId) throws ParsingException {
if (cachedJavaScriptCode == null) {
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(
YoutubeJavaScriptExtractor.extractJavaScriptUrl(videoId));
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
}
return cachedJavaScriptCode;
}
/**
* Same as {@link YoutubeJavaScriptExtractor#extractJavaScriptCode(String)} but with a constant
* value for videoId.
* Possible because the videoId has no influence on the result.
* <p>
* In the off chance that YouTube tracks with which video id the request is made, it may make
* sense to pass in video ids.
*/
@Nonnull
public static String extractJavaScriptCode() throws ParsingException {
return extractJavaScriptCode("d4IGg5dqeO8");
}
private static String extractJavaScriptUrl(final String videoId) throws ParsingException {
try {
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, Localization.DEFAULT).responseBody();
try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
return Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
return elem.attr("src");
}
}
}
} catch (final Exception i) {
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
@Nonnull
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
return HTTPS + "//www.youtube.com" + playerJsUrl;
} else {
return playerJsUrl;
}
}
@Nonnull
private static String downloadJavaScriptCode(final String playerJsUrl)
throws ParsingException {
try {
return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
} catch (final Exception e) {
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
}
}
}

View file

@ -0,0 +1,126 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;
import javax.annotation.Nonnull;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
/**
* <p>
* YouTube's media is protected with a cipher,
* which modifies the "n" query parameter of it's video playback urls.
* This class handles extracting that "n" query parameter,
* applying the cipher on it and returning the resulting url which is not throttled.
* </p>
*
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
* </p>
* becomes
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
* </p>
* <br>
* <p>
* Decoding the "n" parameter is time intensive. For this reason, the results are cached.
* The cache can be cleared using {@link #clearCache()}
* </p>
*
*/
public class YoutubeThrottlingDecrypter {
private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
private static final Map<String, String> nParams = new HashMap<>();
private final String functionName;
private final String function;
/**
* <p>
* Use this if you care about the off chance that YouTube tracks with which videoId the cipher
* is requested.
* </p>
* Otherwise use the no-arg constructor which uses a constant value.
*/
public YoutubeThrottlingDecrypter(final String videoId) throws ParsingException {
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(videoId);
functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}
public YoutubeThrottlingDecrypter() throws ParsingException {
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();
functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}
private String parseDecodeFunctionName(final String playerJsCode)
throws Parser.RegexException {
Pattern pattern = Pattern.compile(
"b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
return Parser.matchGroup1(pattern, playerJsCode);
}
@Nonnull
private String parseDecodeFunction(final String playerJsCode, final String functionName)
throws Parser.RegexException {
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n",
Pattern.DOTALL);
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
}
public String apply(final String url) throws Parser.RegexException {
if (containsNParam(url)) {
String oldNParam = parseNParam(url);
String newNParam = decryptNParam(oldNParam);
return replaceNParam(url, oldNParam, newNParam);
} else {
return url;
}
}
private boolean containsNParam(final String url) {
return Parser.isMatch(N_PARAM_REGEX, url);
}
private String parseNParam(final String url) throws Parser.RegexException {
Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
return Parser.matchGroup1(nValuePattern, url);
}
private String decryptNParam(final String nParam) {
if (nParams.containsKey(nParam)) {
return nParams.get(nParam);
}
final String decryptedNParam = JavaScript.run(function, functionName, nParam);
nParams.put(nParam, decryptedNParam);
return decryptedNParam;
}
@Nonnull
private String replaceNParam(@Nonnull final String url,
final String oldValue,
final String newValue) {
return url.replace(oldValue, newValue);
}
/**
* @return the number of the cached "n" query parameters.
*/
public static int getCacheSize() {
return nParams.size();
}
/**
* Clears all stored "n" query parameters.
*/
public static void clearCache() {
nParams.clear();
}
}

View file

@ -4,10 +4,6 @@ import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
import com.grack.nanojson.JsonParserException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
@ -24,7 +20,9 @@ import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.YoutubeJavaScriptExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils;
@ -79,13 +77,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nullable
private static String cachedDeobfuscationCode = null;
@Nullable
private String playerJsUrl = null;
private JsonArray initialAjaxJson;
private JsonObject initialData;
@Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonArray initialAjaxJson;
private JsonObject initialData;
private JsonObject playerResponse;
private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer;
@ -505,11 +500,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public List<AudioStream> getAudioStreams() throws ExtractionException {
assertPageFetched();
final List<AudioStream> audioStreams = new ArrayList<>();
final YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
try {
for (final Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FORMATS, ItagItem.ItagType.AUDIO).entrySet()) {
final ItagItem itag = entry.getValue();
final AudioStream audioStream = new AudioStream(entry.getKey(), itag);
String url = entry.getKey();
url = throttlingDecrypter.apply(url);
final AudioStream audioStream = new AudioStream(url, itag);
if (!Stream.containSimilarStream(audioStream, audioStreams)) {
audioStreams.add(audioStream);
}
@ -525,11 +524,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public List<VideoStream> getVideoStreams() throws ExtractionException {
assertPageFetched();
final List<VideoStream> videoStreams = new ArrayList<>();
final YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
try {
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
final ItagItem itag = entry.getValue();
final VideoStream videoStream = new VideoStream(entry.getKey(), false, itag);
String url = entry.getKey();
url = throttlingDecrypter.apply(url);
final VideoStream videoStream = new VideoStream(url, false, itag);
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
videoStreams.add(videoStream);
}
@ -545,11 +548,15 @@ public class YoutubeStreamExtractor extends StreamExtractor {
public List<VideoStream> getVideoOnlyStreams() throws ExtractionException {
assertPageFetched();
final List<VideoStream> videoOnlyStreams = new ArrayList<>();
final YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
try {
for (final Map.Entry<String, ItagItem> entry : getItags(ADAPTIVE_FORMATS, ItagItem.ItagType.VIDEO_ONLY).entrySet()) {
final ItagItem itag = entry.getValue();
String url = entry.getKey();
url = throttlingDecrypter.apply(url);
final VideoStream videoStream = new VideoStream(entry.getKey(), true, itag);
final VideoStream videoStream = new VideoStream(url, true, itag);
if (!Stream.containSimilarStream(videoStream, videoOnlyStreams)) {
videoOnlyStreams.add(videoStream);
}
@ -797,38 +804,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
@Nonnull
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
try {
final String embedUrl = "https://www.youtube.com/embed/" + getId();
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, getExtractorLocalization()).responseBody();
try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
playerJsUrl = elem.attr("src");
break;
}
}
}
// Get embed sts
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
} catch (final Exception i) {
// if it fails we simply reply with no sts as then it does not seem to be necessary
return "";
}
}
private String getDeobfuscationFuncName(final String playerCode) throws DeobfuscateException {
Parser.RegexException exception = null;
for (final String regex : REGEXES) {
@ -843,11 +818,10 @@ public class YoutubeStreamExtractor extends StreamExtractor {
throw new DeobfuscateException("Could not find deobfuscate function with any of the given patterns.", exception);
}
private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
private String loadDeobfuscationCode()
throws DeobfuscateException {
try {
final String playerCode = NewPipe.getDownloader()
.get(playerJsUrl, getExtractorLocalization()).responseBody();
final String playerCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(getId());
final String deobfuscationFunctionName = getDeobfuscationFuncName(playerCode);
final String functionPattern = "("
@ -866,8 +840,6 @@ public class YoutubeStreamExtractor extends StreamExtractor {
"function " + DEOBFUSCATION_FUNC_NAME + "(a){return " + deobfuscationFunctionName + "(a);}";
return helperObject + deobfuscateFunction + callerFunction;
} catch (final IOException ioe) {
throw new DeobfuscateException("Could not load deobfuscate function", ioe);
} catch (final Exception e) {
throw new DeobfuscateException("Could not parse deobfuscate function ", e);
}
@ -876,24 +848,7 @@ public class YoutubeStreamExtractor extends StreamExtractor {
@Nonnull
private String getDeobfuscationCode() throws ParsingException {
if (cachedDeobfuscationCode == null) {
if (playerJsUrl == null) {
// the currentPlayerJsUrl was not found in any page fetched so far and there is
// nothing cached, so try fetching embedded info
getEmbeddedInfoStsAndStorePlayerJsUrl();
if (playerJsUrl == null) {
throw new ParsingException(
"Embedded info did not provide YouTube player js url");
}
}
if (playerJsUrl.startsWith("//")) {
playerJsUrl = HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
}
cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
cachedDeobfuscationCode = loadDeobfuscationCode();
}
return cachedDeobfuscationCode;
}

View file

@ -0,0 +1,29 @@
package org.schabi.newpipe.extractor.utils;
import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;
public class JavaScript {
private JavaScript() {
}
public static String run(final String function,
final String functionName,
final String... parameters) {
try {
final Context context = Context.enter();
context.setOptimizationLevel(-1);
final ScriptableObject scope = context.initSafeStandardObjects();
context.evaluateString(scope, function, functionName, 1, null);
final Function jsFunction = (Function) scope.get(functionName, scope);
final Object result = jsFunction.call(context, scope, scope, parameters);
return result.toString();
} finally {
Context.exit();
}
}
}

View file

@ -0,0 +1,47 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.junit.Before;
import org.junit.Test;
import org.schabi.newpipe.downloader.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.IOException;
import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.MatcherAssert.assertThat;
public class YoutubeJavaScriptExtractorTest {
@Before
public void setup() throws IOException {
NewPipe.init(DownloaderTestImpl.getInstance());
}
@Test
public void testExtractJavaScript__success() throws ParsingException {
String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode("d4IGg5dqeO8");
assertPlayerJsCode(playerJsCode);
playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();
assertPlayerJsCode(playerJsCode);
}
@Test
public void testExtractJavaScript__invalidVideoId__success() throws ParsingException {
String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode("not_a_video_id");
assertPlayerJsCode(playerJsCode);
playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode("11-chars123");
assertPlayerJsCode(playerJsCode);
}
private void assertPlayerJsCode(final String playerJsCode) {
assertThat(playerJsCode, allOf(
containsString(" Copyright The Closure Library Authors.\n"
+ " SPDX-License-Identifier: Apache-2.0"),
containsString("var _yt_player")));
}
}

View file

@ -0,0 +1,39 @@
package org.schabi.newpipe.extractor.services.youtube;
import org.junit.Before;
import org.junit.Test;
import org.schabi.newpipe.downloader.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import java.io.IOException;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertNotEquals;
public class YoutubeThrottlingDecrypterTest {
@Before
public void setup() throws IOException {
NewPipe.init(DownloaderTestImpl.getInstance());
}
@Test
public void testDecode__success() throws ParsingException {
// URL extracted from browser with the dev tools
final String encryptedUrl = "https://r6---sn-4g5ednek.googlevideo.com/videoplayback?expire=1626562120&ei=6AnzYO_YBpql1gLGkb_IBQ&ip=127.0.0.1&id=o-ANhBEf36Z5h-8U9DDddtPDqtS0ZNwf0XJAAigudKI2uI&itag=278&aitags=133%2C134%2C135%2C136%2C137%2C160%2C242%2C243%2C244%2C247%2C248%2C278&source=youtube&requiressl=yes&vprv=1&mime=video%2Fwebm&ns=TvecOReN0vPuXb3j_zq157IG&gir=yes&clen=2915100&dur=270.203&lmt=1608157174907785&keepalive=yes&fexp=24001373,24007246&c=WEB&txp=5535432&n=N9BWSTFT7vvBJrvQ&sparams=expire%2Cei%2Cip%2Cid%2Caitags%2Csource%2Crequiressl%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&alr=yes&sig=AOq0QJ8wRQIgW6XnUDKPDSxiT0_KE_tDDMpcaCJl2Un5p0Fu9qZNQGkCIQDWxsDHi_s2BEmRqIbd1C5g_gzfihB7RZLsScKWNMwzzA%3D%3D&cpn=9r2yt3BqcYmeb2Yu&cver=2.20210716.00.00&redirect_counter=1&cm2rm=sn-4g5ezy7s&cms_redirect=yes&mh=Y5&mm=34&mn=sn-4g5ednek&ms=ltu&mt=1626540524&mv=m&mvi=6&pl=43&lsparams=mh,mm,mn,ms,mv,mvi,pl&lsig=AG3C_xAwRQIhAIUzxTn9Vw1-vm-_7OQ5-0h1M6AZsY9Bx1FlCCTeMICzAiADtGggbn4Znsrh2EnvyOsGnYdRGcbxn4mW9JMOQiInDQ%3D%3D&range=259165-480735&rn=11&rbuf=20190";
final String decryptedUrl = new YoutubeThrottlingDecrypter().apply(encryptedUrl);
// The cipher function changes over time, so we just check if the n param changed.
assertNotEquals(encryptedUrl, decryptedUrl);
}
@Test
public void testDecode__noNParam__success() throws ParsingException {
final String noNParamUrl = "https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?expire=1626553257&ei=SefyYPmIFoKT1wLtqbjgCQ&ip=127.0.0.1&id=o-AIT5xGifsaEAdEOAb5vd06J9VNtm-KHHolnaZRGPjHZi&itag=140&source=youtube&requiressl=yes&mh=xO&mm=31%2C29&mn=sn-4g5ednsz%2Csn-4g5e6nsr&ms=au%2Crdu&mv=m&mvi=5&pl=24&initcwndbps=1322500&vprv=1&mime=audio%2Fmp4&ns=cA2SS5atEe0mH8tMwGTO4RIG&gir=yes&clen=3009275&dur=185.898&lmt=1626356984653961&mt=1626531173&fvip=5&keepalive=yes&fexp=24001373%2C24007246&beids=23886212&c=WEB&txp=6411222&sparams=expire%2Cei%2Cip%2Cid%2Citag%2Csource%2Crequiressl%2Cvprv%2Cmime%2Cns%2Cgir%2Cclen%2Cdur%2Clmt&lsparams=mh%2Cmm%2Cmn%2Cms%2Cmv%2Cmvi%2Cpl%2Cinitcwndbps&lsig=AG3C_xAwRgIhAPueRlTutSlzPafxrqBmgZz5m7-Zfbw3QweDp3j4XO9SAiEA5tF7_ZCJFKmS-D6I1jlUURjpjoiTbsYyKuarV4u6E8Y%3D&sig=AOq0QJ8wRQIgRD_4WwkPeTEKGVSQqPsznMJGqq4nVJ8o1ChGBCgi4Y0CIQCZT3tI40YLKBWJCh2Q7AlvuUIpN0ficzdSElLeQpJdrw==";
String decrypted = new YoutubeThrottlingDecrypter().apply(noNParamUrl);
assertThat(decrypted, equalTo(noNParamUrl));
}
}