Adress requested changes

This commit is contained in:
TiA4f8R 2022-02-05 22:05:07 +01:00
parent b6bc521f0d
commit d0d91e6690
No known key found for this signature in database
GPG key ID: E6D3E7F5949450DD
2 changed files with 124 additions and 46 deletions

View file

@ -7,6 +7,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8; import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray; import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty; import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.randomStringFromAlphabet;
import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder; import com.grack.nanojson.JsonBuilder;
@ -36,7 +37,6 @@ import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.time.LocalDate; import java.time.LocalDate;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.time.ZoneOffset; import java.time.ZoneOffset;
@ -83,6 +83,11 @@ public final class YoutubeParsingHelper {
public static final String CPN = "cpn"; public static final String CPN = "cpn";
public static final String VIDEO_ID = "videoId"; public static final String VIDEO_ID = "videoId";
/**
* Seed that will be used for video tests, in order to mock video requests.
*/
private static final long SEED_FOR_VIDEOS_TESTS = 3000;
private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00"; private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00";
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
@ -100,6 +105,7 @@ public final class YoutubeParsingHelper {
private static boolean keyAndVersionExtracted = false; private static boolean keyAndVersionExtracted = false;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType") @SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty(); private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES = private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", {"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"", "innertube_context_client_version\":\"([0-9\\.]+?)\"",
@ -107,6 +113,9 @@ public final class YoutubeParsingHelper {
private static final String[] INNERTUBE_API_KEY_REGEXES = private static final String[] INNERTUBE_API_KEY_REGEXES =
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", {"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""}; "innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
private static final String[] INITIAL_DATA_REGEXES =
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
private static final String INNERTUBE_CLIENT_NAME_REGEX = private static final String INNERTUBE_CLIENT_NAME_REGEX =
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),"; "INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
@ -116,13 +125,24 @@ public final class YoutubeParsingHelper {
private static Random numberGenerator = new Random(); private static Random numberGenerator = new Random();
/** /**
* <code>PENDING+</code> means that the user did not yet submit their choices. * {@code PENDING+} means that the user did not yet submit their choices.
*
* <p>
* Therefore, YouTube & Google should not track the user, because they did not give consent. * Therefore, YouTube & Google should not track the user, because they did not give consent.
* </p>
*
* <p>
* The three digits at the end can be random, but are required. * The three digits at the end can be random, but are required.
* </p>
*/ */
private static final String CONSENT_COOKIE_VALUE = "PENDING+"; private static final String CONSENT_COOKIE_VALUE = "PENDING+";
/** /**
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com * YouTube {@code CONSENT} cookie.
*
* <p>
* Should prevent redirect to {@code consent.youtube.com}.
* </p>
*/ */
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE; private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
@ -439,17 +459,10 @@ public final class YoutubeParsingHelper {
} }
} }
public static JsonObject getInitialData(final String html) throws ParsingException { private static JsonObject getInitialData(final String html) throws ParsingException {
try { try {
try { return JsonParser.object().from(getStringResultFromRegexArray(html,
final String initialData = Parser.matchGroup1( INITIAL_DATA_REGEXES, 1));
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
} catch (final Parser.RegexException e) {
final String initialData = Parser.matchGroup1(
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
}
} catch (final JsonParserException | Parser.RegexException e) { } catch (final JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e); throw new ParsingException("Could not get ytInitialData", e);
} }
@ -572,7 +585,7 @@ public final class YoutubeParsingHelper {
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1); key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) { } catch (final Parser.RegexException e) {
throw new ParsingException( throw new ParsingException(
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page"); "Could not extract YouTube WEB InnerTube client version and API key from HTML search results page", e);
} }
keyAndVersionExtracted = true; keyAndVersionExtracted = true;
} }
@ -730,8 +743,7 @@ public final class YoutubeParsingHelper {
final String response = getDownloader().get(url, headers).responseBody(); final String response = getDownloader().get(url, headers).responseBody();
musicClientVersion = getStringResultFromRegexArray(response, musicClientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1); INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
musicKey = getStringResultFromRegexArray(response, musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
INNERTUBE_API_KEY_REGEXES, 1);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response); musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
} catch (final Exception e) { } catch (final Exception e) {
final String url = "https://music.youtube.com/"; final String url = "https://music.youtube.com/";
@ -815,10 +827,11 @@ public final class YoutubeParsingHelper {
} }
/** /**
* Get the text from a JSON object that has either a simpleText or a runs array. * Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
* array.
* *
* @param textObject JSON object to get the text from * @param textObject JSON object to get the text from
* @param html whether to return HTML, by parsing the navigationEndpoint * @param html whether to return HTML, by parsing the {@code navigationEndpoint}
* @return text in the JSON object or {@code null} * @return text in the JSON object or {@code null}
*/ */
@Nullable @Nullable
@ -1495,15 +1508,7 @@ public final class YoutubeParsingHelper {
*/ */
@Nonnull @Nonnull
public static String generateContentPlaybackNonce() { public static String generateContentPlaybackNonce() {
final SecureRandom random = new SecureRandom(); return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 16);
final StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < 16; i++) {
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
(random.nextInt(128) + 1) & 63));
}
return stringBuilder.toString();
} }
/** /**
@ -1519,14 +1524,23 @@ public final class YoutubeParsingHelper {
*/ */
@Nonnull @Nonnull
public static String generateTParameter() { public static String generateTParameter() {
final SecureRandom random = new SecureRandom(); return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 12);
final StringBuilder stringBuilder = new StringBuilder(); }
for (int i = 0; i < 12; i++) { /**
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt( * Set the seed for video tests.
(random.nextInt(128) + 1) & 63)); *
} * <p>
* This seed will be used to generate the same {@code t} and {@code cpn} values between
return stringBuilder.toString(); * different execution of tests so mocks can be used for stream tests.
* </p>
*
* <p>
* This method will call {@link Utils#setSecureRandomSeed(long)} with the
* {@link #SEED_FOR_VIDEOS_TESTS value}.
* </p>
*/
public static void setSeedForVideoTests() {
Utils.setSecureRandomSeed(SEED_FOR_VIDEOS_TESTS);
} }
} }

View file

@ -15,6 +15,7 @@ import java.util.LinkedList;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.security.SecureRandom;
import java.util.regex.Pattern; import java.util.regex.Pattern;
public final class Utils { public final class Utils {
@ -25,16 +26,23 @@ public final class Utils {
public static final String EMPTY_STRING = ""; public static final String EMPTY_STRING = "";
private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\."); private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\.");
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\."); private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
private static final SecureRandom random = new SecureRandom();
private Utils() { private Utils() {
// no instance // no instance
} }
/** /**
* Remove all non-digit characters from a string.<p> * Remove all non-digit characters from a string.
* Examples:<p> *
* <ul><li>1 234 567 views -&gt; 1234567</li> * <p>
* <li>$31,133.124 -&gt; 31133124</li></ul> * Examples:
* </p>
*
* <ul>
* <li>1 234 567 views -&gt; 1234567</li>
* <li>$31,133.124 -&gt; 31133124</li>
* </ul>
* *
* @param toRemove string to remove non-digit chars * @param toRemove string to remove non-digit chars
* @return a string that contains only digits * @return a string that contains only digits
@ -45,8 +53,12 @@ public final class Utils {
} }
/** /**
* <p>Convert a mixed number word to a long.</p> * Convert a mixed number word to a long.
* <p>Examples:</p> *
* <p>
* Examples:
* </p>
*
* <ul> * <ul>
* <li>123 -&gt; 123</li> * <li>123 -&gt; 123</li>
* <li>1.23K -&gt; 1230</li> * <li>1.23K -&gt; 1230</li>
@ -106,11 +118,15 @@ public final class Utils {
/** /**
* Get the value of a URL-query by name. * Get the value of a URL-query by name.
* If a url-query is give multiple times, only the value of the first query is returned *
* <p>
* If an url-query is give multiple times, only the value of the first query is returned.
* </p>
* *
* @param url the url to be used * @param url the url to be used
* @param parameterName the pattern that will be used to check the url * @param parameterName the pattern that will be used to check the url
* @return a string that contains the value of the query parameter or null if nothing was found * @return a string that contains the value of the query parameter or {@code null} if nothing
* was found
*/ */
@Nullable @Nullable
public static String getQueryValue(@Nonnull final URL url, public static String getQueryValue(@Nonnull final URL url,
@ -144,11 +160,14 @@ public final class Utils {
} }
/** /**
* converts a string to a URL-Object. * Convert a string to a {@link URL URL object}.
* defaults to HTTP if no protocol is given *
* <p>
* Defaults to HTTP if no protocol is given.
* </p>
* *
* @param url the string to be converted to a URL-Object * @param url the string to be converted to a URL-Object
* @return a URL-Object containing the url * @return a {@link URL URL object} containing the url
*/ */
@Nonnull @Nonnull
public static URL stringToURL(final String url) throws MalformedURLException { public static URL stringToURL(final String url) throws MalformedURLException {
@ -187,6 +206,7 @@ public final class Utils {
return url; return url;
} }
@Nonnull
public static String removeUTF8BOM(@Nonnull final String s) { public static String removeUTF8BOM(@Nonnull final String s) {
String result = s; String result = s;
if (result.startsWith("\uFEFF")) { if (result.startsWith("\uFEFF")) {
@ -198,6 +218,7 @@ public final class Utils {
return result; return result;
} }
@Nonnull
public static String getBaseUrl(final String url) throws ParsingException { public static String getBaseUrl(final String url) throws ParsingException {
try { try {
final URL uri = stringToURL(url); final URL uri = stringToURL(url);
@ -244,6 +265,7 @@ public final class Utils {
* <p> * <p>
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s. * This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
* </p> * </p>
*
* @param collection the collection on which check if it's null or empty * @param collection the collection on which check if it's null or empty
* @return whether the collection is null or empty * @return whether the collection is null or empty
*/ */
@ -257,6 +279,7 @@ public final class Utils {
* <p> * <p>
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s. * This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
* </p> * </p>
*
* @param map the {@link Map map} on which check if it's null or empty * @param map the {@link Map map} on which check if it's null or empty
* @return whether the {@link Map map} is null or empty * @return whether the {@link Map map} is null or empty
*/ */
@ -380,6 +403,7 @@ public final class Utils {
} catch (final Parser.RegexException ignored) { } catch (final Parser.RegexException ignored) {
} }
} }
if (result == null) { if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group); throw new Parser.RegexException("No regex matched the input on group " + group);
} }
@ -413,9 +437,49 @@ public final class Utils {
} catch (final Parser.RegexException ignored) { } catch (final Parser.RegexException ignored) {
} }
} }
if (result == null) { if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group); throw new Parser.RegexException("No regex matched the input on group " + group);
} }
return result; return result;
} }
/**
* Generate a random string using the secure random device {@link #random}.
*
* <p>
* {@link #setSecureRandomSeed(long)} might be useful when mocking tests.
* </p>
*
* @param alphabet the characters' alphabet to use
* @param length the length of the returned string
* @return a random string of the requested length made of only characters from the provided
* alphabet
*/
@Nonnull
public static String randomStringFromAlphabet(final String alphabet, final int length) {
final StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < length; ++i) {
stringBuilder.append(alphabet.charAt(random.nextInt(alphabet.length())));
}
return stringBuilder.toString();
}
/**
* Seed the secure random device used for {@link #randomStringFromAlphabet(String, int)}.
*
* <p>
* Use this in tests so that they can be mocked as the same random numbers are always
* generated.
* </p>
*
* <p>
* This is not intended to be used outside of tests.
* </p>
*
* @param seed the seed to pass to {@link SecureRandom#setSeed(long)}
*/
public static void setSecureRandomSeed(final long seed) {
random.setSeed(seed);
}
} }