[YouTube] Try to use lighter requests when extracting client version and key from YouTube and YouTube Music

This is done by fetching https://www.youtube.com/sw.js for YouTube and https://music.youtube.com/sw.js for YouTube Music.

Two new methods in Utils class have been added which allow to try to get a match of regular expressions in a string array, or a Pattern array, on a content, on a specific index or 0.
Also some code refactoring has been made in this class.
This commit is contained in:
TiA4f8R 2022-01-09 22:49:37 +01:00
parent 05b7fee23b
commit 7d07924de8
No known key found for this signature in database
GPG key ID: E6D3E7F5949450DD
3 changed files with 264 additions and 115 deletions

View file

@ -5,6 +5,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import com.grack.nanojson.JsonArray;
@ -57,20 +58,20 @@ import javax.annotation.Nullable;
* Created by Christian Schabesberger on 02.03.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* YoutubeParsingHelper.java is part of NewPipe.
* YoutubeParsingHelper.java is part of NewPipe Extractor.
*
* NewPipe is free software: you can redistribute it and/or modify
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
public final class YoutubeParsingHelper {
@ -98,6 +99,15 @@ public final class YoutubeParsingHelper {
private static boolean keyAndVersionExtracted = false;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)"};
private static final String[] INNERTUBE_API_KEY_REGEXES =
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
private static final String INNERTUBE_CLIENT_NAME_REGEX =
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
private static final String CONTENT_PLAYBACK_NONCE_ALPHABET =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
@ -484,12 +494,33 @@ public final class YoutubeParsingHelper {
return hardcodedClientVersionAndKeyValid.get();
}
private static void extractClientVersionAndKey() throws IOException, ExtractionException {
private static void extractClientVersionAndKeyFromSwJs()
throws IOException, ExtractionException {
if (keyAndVersionExtracted) {
return;
}
final String url = "https://www.youtube.com/sw.js";
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
headers.put("Referer", Collections.singletonList("https://www.youtube.com"));
final String response = getDownloader().get(url, headers).responseBody();
try {
clientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
key = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) {
throw new ParsingException("Could not extract YouTube WEB InnerTube client version and API key from sw.js", e);
}
keyAndVersionExtracted = true;
}
private static void extractClientVersionAndKeyFromHtmlSearchResultsPage()
throws IOException, ExtractionException {
// Don't extract the client version and the InnerTube key if it has been already extracted
if (keyAndVersionExtracted) {
return;
}
// Don't provide a search term in order to have a smaller response
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
final Map<String, List<String>> headers = new HashMap<>();
@ -526,21 +557,10 @@ public final class YoutubeParsingHelper {
}
}
String contextClientVersion;
final String[] patterns = {
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
"client.version=([0-9\\.]+)"
};
for (final String pattern : patterns) {
try {
contextClientVersion = Parser.matchGroup1(pattern, html);
if (!isNullOrEmpty(contextClientVersion)) {
clientVersion = contextClientVersion;
break;
}
} catch (final Parser.RegexException ignored) {
}
try {
clientVersion = getStringResultFromRegexArray(html,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
} catch (final Parser.RegexException ignored) {
}
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
@ -548,13 +568,10 @@ public final class YoutubeParsingHelper {
}
try {
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
} catch (final Parser.RegexException e1) {
try {
key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html);
} catch (final Parser.RegexException e2) {
throw new ParsingException("Could not extract client version and key");
}
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) {
throw new ParsingException(
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
}
keyAndVersionExtracted = true;
}
@ -567,7 +584,11 @@ public final class YoutubeParsingHelper {
return clientVersion;
}
extractClientVersionAndKey();
try {
extractClientVersionAndKeyFromSwJs();
} catch (final Exception e) {
extractClientVersionAndKeyFromHtmlSearchResultsPage();
}
if (keyAndVersionExtracted) {
return clientVersion;
@ -588,7 +609,11 @@ public final class YoutubeParsingHelper {
return key;
}
extractClientVersionAndKey();
try {
extractClientVersionAndKeyFromSwJs();
} catch (final Exception e) {
extractClientVersionAndKeyFromHtmlSearchResultsPage();
}
if (keyAndVersionExtracted) {
return key;
@ -682,8 +707,8 @@ public final class YoutubeParsingHelper {
return response.responseBody().length() > 500 && response.responseCode() == 200;
}
public static String[] getYoutubeMusicKey() throws IOException, ReCaptchaException,
Parser.RegexException {
public static String[] getYoutubeMusicKey()
throws IOException, ReCaptchaException, Parser.RegexException {
if (youtubeMusicKey != null && youtubeMusicKey.length == 3) {
return youtubeMusicKey;
}
@ -692,40 +717,34 @@ public final class YoutubeParsingHelper {
return youtubeMusicKey;
}
final String url = "https://music.youtube.com/";
final Map<String, List<String>> headers = new HashMap<>();
addCookieHeader(headers);
final String html = getDownloader().get(url, headers).responseBody();
String musicClientVersion = null;
String musicKey = null;
String musicClientName = null;
String innertubeApiKey;
try {
innertubeApiKey = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
} catch (final Parser.RegexException e) {
innertubeApiKey = Parser.matchGroup1("innertube_api_key\":\"([0-9a-zA-Z_-]+?)\"", html);
final String url = "https://music.youtube.com/sw.js";
final Map<String, List<String>> headers = new HashMap<>();
headers.put("Origin", Collections.singletonList("https://music.youtube.com"));
headers.put("Referer", Collections.singletonList("https://music.youtube.com"));
final String response = getDownloader().get(url, headers).responseBody();
musicClientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
musicKey = getStringResultFromRegexArray(response,
INNERTUBE_API_KEY_REGEXES, 1);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
} catch (final Exception e) {
final String url = "https://music.youtube.com/";
final Map<String, List<String>> headers = new HashMap<>();
addCookieHeader(headers);
final String html = getDownloader().get(url, headers).responseBody();
musicKey = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
musicClientVersion = getStringResultFromRegexArray(html,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, html);
}
final String innertubeClientName
= Parser.matchGroup1("INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),", html);
String innertubeClientVersion;
try {
innertubeClientVersion = Parser.matchGroup1(
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
} catch (final Parser.RegexException e) {
try {
innertubeClientVersion = Parser.matchGroup1(
"INNERTUBE_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
} catch (final Parser.RegexException ee) {
innertubeClientVersion = Parser.matchGroup1(
"innertube_context_client_version\":\"([0-9\\.]+?)\"", html);
}
}
youtubeMusicKey = new String[]{
innertubeApiKey,
innertubeClientName,
innertubeClientVersion
};
youtubeMusicKey = new String[] { musicKey, musicClientName, musicClientVersion };
return youtubeMusicKey;
}

View file

@ -1,3 +1,23 @@
/*
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* Parser.java is part of NewPipe Extractor.
*
* NewPipe Extractor is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe Extractor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe Extractor. If not, see <https://www.gnu.org/licenses/>.
*/
package org.schabi.newpipe.extractor.utils;
import org.nibor.autolink.LinkExtractor;
@ -5,39 +25,21 @@ import org.nibor.autolink.LinkSpan;
import org.nibor.autolink.LinkType;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import javax.annotation.Nonnull;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
/*
* Created by Christian Schabesberger on 02.02.16.
*
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
* Parser.java is part of NewPipe.
*
* NewPipe is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* NewPipe is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* avoid using regex !!!
* Avoid using regex !!!
*/
public final class Parser {
@ -66,8 +68,9 @@ public final class Parser {
return matchGroup(Pattern.compile(pattern), input, group);
}
public static String matchGroup(final Pattern pat, final String input, final int group)
throws RegexException {
public static String matchGroup(@Nonnull final Pattern pat,
final String input,
final int group) throws RegexException {
final Matcher matcher = pat.matcher(input);
final boolean foundMatch = matcher.find();
if (foundMatch) {
@ -75,9 +78,9 @@ public final class Parser {
} else {
// only pass input to exception message when it is not too long
if (input.length() > 1024) {
throw new RegexException("failed to find pattern \"" + pat.pattern() + "\"");
throw new RegexException("Failed to find pattern \"" + pat.pattern() + "\"");
} else {
throw new RegexException("failed to find pattern \"" + pat.pattern()
throw new RegexException("Failed to find pattern \"" + pat.pattern()
+ "\" inside of \"" + input + "\"");
}
}
@ -89,14 +92,15 @@ public final class Parser {
return mat.find();
}
public static boolean isMatch(final Pattern pattern, final String input) {
public static boolean isMatch(@Nonnull final Pattern pattern, final String input) {
final Matcher mat = pattern.matcher(input);
return mat.find();
}
public static Map<String, String> compatParseMap(final String input)
@Nonnull
public static Map<String, String> compatParseMap(@Nonnull final String input)
throws UnsupportedEncodingException {
final Map<String, String> map = new HashMap<>();
final Map<String, String> map = new HashMap<>();
for (final String arg : input.split("&")) {
final String[] splitArg = arg.split("=");
if (splitArg.length > 1) {
@ -108,9 +112,10 @@ public final class Parser {
return map;
}
@Nonnull
public static String[] getLinksFromString(final String txt) throws ParsingException {
try {
final ArrayList<String> links = new ArrayList<>();
final List<String> links = new ArrayList<>();
final LinkExtractor linkExtractor = LinkExtractor.builder()
.linkTypes(EnumSet.of(LinkType.URL, LinkType.WWW))
.build();

View file

@ -2,6 +2,8 @@ package org.schabi.newpipe.extractor.utils;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
@ -10,6 +12,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
@ -36,7 +39,8 @@ public final class Utils {
* @param toRemove string to remove non-digit chars
* @return a string that contains only digits
*/
public static String removeNonDigitCharacters(final String toRemove) {
@Nonnull
public static String removeNonDigitCharacters(@Nonnull final String toRemove) {
return toRemove.replaceAll("\\D+", "");
}
@ -52,8 +56,8 @@ public final class Utils {
* @param numberWord string to be converted to a long
* @return a long
*/
public static long mixedNumberWordToLong(final String numberWord) throws NumberFormatException,
ParsingException {
public static long mixedNumberWordToLong(final String numberWord)
throws NumberFormatException, ParsingException {
String multiplier = "";
try {
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMBkmb])+", numberWord, 2);
@ -94,7 +98,7 @@ public final class Utils {
return null;
}
if (!url.isEmpty() && url.startsWith(HTTP)) {
if (url.startsWith(HTTP)) {
return HTTPS + url.substring(HTTP.length());
}
return url;
@ -108,7 +112,9 @@ public final class Utils {
* @param parameterName the pattern that will be used to check the url
* @return a string that contains the value of the query parameter or null if nothing was found
*/
public static String getQueryValue(final URL url, final String parameterName) {
@Nullable
public static String getQueryValue(@Nonnull final URL url,
final String parameterName) {
final String urlQuery = url.getQuery();
if (urlQuery != null) {
@ -144,11 +150,12 @@ public final class Utils {
* @param url the string to be converted to a URL-Object
* @return a URL-Object containing the url
*/
@Nonnull
public static URL stringToURL(final String url) throws MalformedURLException {
try {
return new URL(url);
} catch (final MalformedURLException e) {
// if no protocol is given try prepending "https://"
// If no protocol is given try prepending "https://"
if (e.getMessage().equals("no protocol: " + url)) {
return new URL(HTTPS + url);
}
@ -157,8 +164,8 @@ public final class Utils {
}
}
public static boolean isHTTP(final URL url) {
// make sure its http or https
public static boolean isHTTP(@Nonnull final URL url) {
// Make sure it's HTTP or HTTPS
final String protocol = url.getProtocol();
if (!protocol.equals("http") && !protocol.equals("https")) {
return false;
@ -180,7 +187,7 @@ public final class Utils {
return url;
}
public static String removeUTF8BOM(final String s) {
public static String removeUTF8BOM(@Nonnull final String s) {
String result = s;
if (result.startsWith("\uFEFF")) {
result = result.substring(1);
@ -198,7 +205,7 @@ public final class Utils {
} catch (final MalformedURLException e) {
final String message = e.getMessage();
if (message.startsWith("unknown protocol: ")) {
// return just the protocol (e.g. vnd.youtube)
// Return just the protocol (e.g. vnd.youtube)
return message.substring("unknown protocol: ".length());
}
@ -214,17 +221,16 @@ public final class Utils {
* @return an url with no Google search redirects
*/
public static String followGoogleRedirectIfNeeded(final String url) {
// if the url is a redirect from a Google search, extract the actual url
// If the url is a redirect from a Google search, extract the actual URL
try {
final URL decoded = Utils.stringToURL(url);
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url),
UTF_8);
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), UTF_8);
}
} catch (final Exception ignored) {
}
// url is not a google search redirect
// URL is not a Google search redirect
return url;
}
@ -232,13 +238,29 @@ public final class Utils {
return str == null || str.isEmpty();
}
// can be used for JsonArrays
/**
* Checks if a collection is null or empty.
*
* <p>
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
* </p>
* @param collection the collection on which check if it's null or empty
* @return whether the collection is null or empty
*/
public static boolean isNullOrEmpty(final Collection<?> collection) {
return collection == null || collection.isEmpty();
}
// can be used for JsonObjects
public static boolean isNullOrEmpty(final Map<?, ?> map) {
/**
* Checks if a {@link Map map} is null or empty.
*
* <p>
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
* </p>
* @param map the {@link Map map} on which check if it's null or empty
* @return whether the {@link Map map} is null or empty
*/
public static <K,V> boolean isNullOrEmpty(final Map<K, V> map) {
return map == null || map.isEmpty();
}
@ -261,8 +283,9 @@ public final class Utils {
return true;
}
@Nonnull
public static String join(final CharSequence delimiter,
final Iterable<? extends CharSequence> elements) {
@Nonnull final Iterable<? extends CharSequence> elements) {
final StringBuilder stringBuilder = new StringBuilder();
final Iterator<? extends CharSequence> iterator = elements.iterator();
while (iterator.hasNext()) {
@ -274,11 +297,14 @@ public final class Utils {
return stringBuilder.toString();
}
public static String join(final String delimiter, final String mapJoin,
final Map<? extends CharSequence, ? extends CharSequence> elements) {
@Nonnull
public static String join(
final String delimiter,
final String mapJoin,
@Nonnull final Map<? extends CharSequence, ? extends CharSequence> elements) {
final List<String> list = new LinkedList<>();
for (final Map.Entry<? extends CharSequence, ? extends CharSequence> entry : elements
.entrySet()) {
for (final Map.Entry<? extends CharSequence, ? extends CharSequence> entry
: elements.entrySet()) {
list.add(entry.getKey() + mapJoin + entry.getValue());
}
return join(delimiter, list);
@ -287,10 +313,109 @@ public final class Utils {
/**
* Concatenate all non-null, non-empty and strings which are not equal to <code>"null"</code>.
*/
@Nonnull
public static String nonEmptyAndNullJoin(final CharSequence delimiter,
final String[] elements) {
final List<String> list = new java.util.ArrayList<>(Arrays.asList(elements));
final List<String> list = new ArrayList<>(Arrays.asList(elements));
list.removeIf(s -> isNullOrEmpty(s) || s.equals("null"));
return join(delimiter, list);
}
/**
* Find the result of an array of string regular expressions inside an input on the first
* group ({@code 0}).
*
* @param input the input on which using the regular expressions
* @param regexes the string array of regular expressions
* @return the result
* @throws Parser.RegexException if none of the patterns match the input
*/
@Nonnull
public static String getStringResultFromRegexArray(@Nonnull final String input,
@Nonnull final String[] regexes)
throws Parser.RegexException {
return getStringResultFromRegexArray(input, regexes, 0);
}
/**
* Find the result of an array of {@link Pattern}s inside an input on the first group
* ({@code 0}).
*
* @param input the input on which using the regular expressions
* @param regexes the {@link Pattern} array
* @return the result
* @throws Parser.RegexException if none of the patterns match the input
*/
@Nonnull
public static String getStringResultFromRegexArray(@Nonnull final String input,
@Nonnull final Pattern[] regexes)
throws Parser.RegexException {
return getStringResultFromRegexArray(input, regexes, 0);
}
/**
* Find the result of an array of string regular expressions inside an input on a specific
* group.
*
* @param input the input on which using the regular expressions
* @param regexes the string array of regular expressions
* @param group the group to match
* @return the result
* @throws Parser.RegexException if none of the patterns match the input, or at least in the
* specified group
*/
@Nonnull
public static String getStringResultFromRegexArray(@Nonnull final String input,
@Nonnull final String[] regexes,
final int group)
throws Parser.RegexException {
String result = null;
for (final String regex : regexes) {
try {
result = Parser.matchGroup(regex, input, group);
if (result != null) {
// Continue if the result is null
break;
}
} catch (final Parser.RegexException ignored) {
}
}
if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group);
}
return result;
}
/**
* Find the result of an array of {@link Pattern}s inside an input on a specific
* group.
*
* @param input the input on which using the regular expressions
* @param regexes the {@link Pattern} array
* @param group the group to match
* @return the result
* @throws Parser.RegexException if none of the patterns match the input, or at least in the
* specified group
*/
@Nonnull
public static String getStringResultFromRegexArray(@Nonnull final String input,
@Nonnull final Pattern[] regexes,
final int group)
throws Parser.RegexException {
String result = null;
for (final Pattern regex : regexes) {
try {
result = Parser.matchGroup(regex, input, group);
if (result != null) {
// Continue if the result is null
break;
}
} catch (final Parser.RegexException ignored) {
}
}
if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group);
}
return result;
}
}