Adress requested changes

This commit is contained in:
TiA4f8R 2022-02-05 22:05:07 +01:00
parent b6bc521f0d
commit d0d91e6690
No known key found for this signature in database
GPG key ID: E6D3E7F5949450DD
2 changed files with 124 additions and 46 deletions

View file

@ -7,6 +7,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import static org.schabi.newpipe.extractor.utils.Utils.randomStringFromAlphabet;
import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonBuilder;
@ -36,7 +37,6 @@ import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.time.LocalDate;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
@ -83,6 +83,11 @@ public final class YoutubeParsingHelper {
public static final String CPN = "cpn";
public static final String VIDEO_ID = "videoId";
/**
* Seed that will be used for video tests, in order to mock video requests.
*/
private static final long SEED_FOR_VIDEOS_TESTS = 3000;
private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00";
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
@ -100,6 +105,7 @@ public final class YoutubeParsingHelper {
private static boolean keyAndVersionExtracted = false;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
@ -107,6 +113,9 @@ public final class YoutubeParsingHelper {
private static final String[] INNERTUBE_API_KEY_REGEXES =
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
private static final String[] INITIAL_DATA_REGEXES =
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
private static final String INNERTUBE_CLIENT_NAME_REGEX =
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
@ -116,13 +125,24 @@ public final class YoutubeParsingHelper {
private static Random numberGenerator = new Random();
/**
* <code>PENDING+</code> means that the user did not yet submit their choices.
* {@code PENDING+} means that the user did not yet submit their choices.
*
* <p>
* Therefore, YouTube & Google should not track the user, because they did not give consent.
* </p>
*
* <p>
* The three digits at the end can be random, but are required.
* </p>
*/
private static final String CONSENT_COOKIE_VALUE = "PENDING+";
/**
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com
* YouTube {@code CONSENT} cookie.
*
* <p>
* Should prevent redirect to {@code consent.youtube.com}.
* </p>
*/
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
@ -439,17 +459,10 @@ public final class YoutubeParsingHelper {
}
}
public static JsonObject getInitialData(final String html) throws ParsingException {
private static JsonObject getInitialData(final String html) throws ParsingException {
try {
try {
final String initialData = Parser.matchGroup1(
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
} catch (final Parser.RegexException e) {
final String initialData = Parser.matchGroup1(
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
return JsonParser.object().from(initialData);
}
return JsonParser.object().from(getStringResultFromRegexArray(html,
INITIAL_DATA_REGEXES, 1));
} catch (final JsonParserException | Parser.RegexException e) {
throw new ParsingException("Could not get ytInitialData", e);
}
@ -572,7 +585,7 @@ public final class YoutubeParsingHelper {
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
} catch (final Parser.RegexException e) {
throw new ParsingException(
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page", e);
}
keyAndVersionExtracted = true;
}
@ -730,8 +743,7 @@ public final class YoutubeParsingHelper {
final String response = getDownloader().get(url, headers).responseBody();
musicClientVersion = getStringResultFromRegexArray(response,
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
musicKey = getStringResultFromRegexArray(response,
INNERTUBE_API_KEY_REGEXES, 1);
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
} catch (final Exception e) {
final String url = "https://music.youtube.com/";
@ -815,10 +827,11 @@ public final class YoutubeParsingHelper {
}
/**
* Get the text from a JSON object that has either a simpleText or a runs array.
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
* array.
*
* @param textObject JSON object to get the text from
* @param html whether to return HTML, by parsing the navigationEndpoint
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
* @return text in the JSON object or {@code null}
*/
@Nullable
@ -1495,15 +1508,7 @@ public final class YoutubeParsingHelper {
*/
@Nonnull
public static String generateContentPlaybackNonce() {
final SecureRandom random = new SecureRandom();
final StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < 16; i++) {
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
(random.nextInt(128) + 1) & 63));
}
return stringBuilder.toString();
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 16);
}
/**
@ -1519,14 +1524,23 @@ public final class YoutubeParsingHelper {
*/
@Nonnull
public static String generateTParameter() {
final SecureRandom random = new SecureRandom();
final StringBuilder stringBuilder = new StringBuilder();
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 12);
}
for (int i = 0; i < 12; i++) {
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
(random.nextInt(128) + 1) & 63));
}
return stringBuilder.toString();
/**
* Set the seed for video tests.
*
* <p>
* This seed will be used to generate the same {@code t} and {@code cpn} values between
* different execution of tests so mocks can be used for stream tests.
* </p>
*
* <p>
* This method will call {@link Utils#setSecureRandomSeed(long)} with the
* {@link #SEED_FOR_VIDEOS_TESTS value}.
* </p>
*/
public static void setSeedForVideoTests() {
Utils.setSecureRandomSeed(SEED_FOR_VIDEOS_TESTS);
}
}

View file

@ -15,6 +15,7 @@ import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.security.SecureRandom;
import java.util.regex.Pattern;
public final class Utils {
@ -25,16 +26,23 @@ public final class Utils {
public static final String EMPTY_STRING = "";
private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\.");
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
private static final SecureRandom random = new SecureRandom();
private Utils() {
// no instance
}
/**
* Remove all non-digit characters from a string.<p>
* Examples:<p>
* <ul><li>1 234 567 views -&gt; 1234567</li>
* <li>$31,133.124 -&gt; 31133124</li></ul>
* Remove all non-digit characters from a string.
*
* <p>
* Examples:
* </p>
*
* <ul>
* <li>1 234 567 views -&gt; 1234567</li>
* <li>$31,133.124 -&gt; 31133124</li>
* </ul>
*
* @param toRemove string to remove non-digit chars
* @return a string that contains only digits
@ -45,8 +53,12 @@ public final class Utils {
}
/**
* <p>Convert a mixed number word to a long.</p>
* <p>Examples:</p>
* Convert a mixed number word to a long.
*
* <p>
* Examples:
* </p>
*
* <ul>
* <li>123 -&gt; 123</li>
* <li>1.23K -&gt; 1230</li>
@ -106,11 +118,15 @@ public final class Utils {
/**
* Get the value of a URL-query by name.
* If a url-query is give multiple times, only the value of the first query is returned
*
* <p>
* If an url-query is give multiple times, only the value of the first query is returned.
* </p>
*
* @param url the url to be used
* @param parameterName the pattern that will be used to check the url
* @return a string that contains the value of the query parameter or null if nothing was found
* @return a string that contains the value of the query parameter or {@code null} if nothing
* was found
*/
@Nullable
public static String getQueryValue(@Nonnull final URL url,
@ -144,11 +160,14 @@ public final class Utils {
}
/**
* converts a string to a URL-Object.
* defaults to HTTP if no protocol is given
* Convert a string to a {@link URL URL object}.
*
* <p>
* Defaults to HTTP if no protocol is given.
* </p>
*
* @param url the string to be converted to a URL-Object
* @return a URL-Object containing the url
* @return a {@link URL URL object} containing the url
*/
@Nonnull
public static URL stringToURL(final String url) throws MalformedURLException {
@ -187,6 +206,7 @@ public final class Utils {
return url;
}
@Nonnull
public static String removeUTF8BOM(@Nonnull final String s) {
String result = s;
if (result.startsWith("\uFEFF")) {
@ -198,6 +218,7 @@ public final class Utils {
return result;
}
@Nonnull
public static String getBaseUrl(final String url) throws ParsingException {
try {
final URL uri = stringToURL(url);
@ -244,6 +265,7 @@ public final class Utils {
* <p>
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
* </p>
*
* @param collection the collection on which check if it's null or empty
* @return whether the collection is null or empty
*/
@ -257,6 +279,7 @@ public final class Utils {
* <p>
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
* </p>
*
* @param map the {@link Map map} on which check if it's null or empty
* @return whether the {@link Map map} is null or empty
*/
@ -380,6 +403,7 @@ public final class Utils {
} catch (final Parser.RegexException ignored) {
}
}
if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group);
}
@ -413,9 +437,49 @@ public final class Utils {
} catch (final Parser.RegexException ignored) {
}
}
if (result == null) {
throw new Parser.RegexException("No regex matched the input on group " + group);
}
return result;
}
/**
* Generate a random string using the secure random device {@link #random}.
*
* <p>
* {@link #setSecureRandomSeed(long)} might be useful when mocking tests.
* </p>
*
* @param alphabet the characters' alphabet to use
* @param length the length of the returned string
* @return a random string of the requested length made of only characters from the provided
* alphabet
*/
@Nonnull
public static String randomStringFromAlphabet(final String alphabet, final int length) {
final StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < length; ++i) {
stringBuilder.append(alphabet.charAt(random.nextInt(alphabet.length())));
}
return stringBuilder.toString();
}
/**
* Seed the secure random device used for {@link #randomStringFromAlphabet(String, int)}.
*
* <p>
* Use this in tests so that they can be mocked as the same random numbers are always
* generated.
* </p>
*
* <p>
* This is not intended to be used outside of tests.
* </p>
*
* @param seed the seed to pass to {@link SecureRandom#setSeed(long)}
*/
public static void setSecureRandomSeed(final long seed) {
random.setSeed(seed);
}
}