Adress requested changes
This commit is contained in:
parent
b6bc521f0d
commit
d0d91e6690
2 changed files with 124 additions and 46 deletions
|
@ -7,6 +7,7 @@ import static org.schabi.newpipe.extractor.utils.Utils.HTTPS;
|
|||
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.getStringResultFromRegexArray;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
|
||||
import static org.schabi.newpipe.extractor.utils.Utils.randomStringFromAlphabet;
|
||||
|
||||
import com.grack.nanojson.JsonArray;
|
||||
import com.grack.nanojson.JsonBuilder;
|
||||
|
@ -36,7 +37,6 @@ import java.net.MalformedURLException;
|
|||
import java.net.URL;
|
||||
import java.net.URLDecoder;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.security.SecureRandom;
|
||||
import java.time.LocalDate;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.ZoneOffset;
|
||||
|
@ -83,6 +83,11 @@ public final class YoutubeParsingHelper {
|
|||
public static final String CPN = "cpn";
|
||||
public static final String VIDEO_ID = "videoId";
|
||||
|
||||
/**
|
||||
* Seed that will be used for video tests, in order to mock video requests.
|
||||
*/
|
||||
private static final long SEED_FOR_VIDEOS_TESTS = 3000;
|
||||
|
||||
private static final String HARDCODED_CLIENT_VERSION = "2.20220114.01.00";
|
||||
private static final String HARDCODED_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
|
||||
|
||||
|
@ -100,6 +105,7 @@ public final class YoutubeParsingHelper {
|
|||
private static boolean keyAndVersionExtracted = false;
|
||||
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
|
||||
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
|
||||
|
||||
private static final String[] INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES =
|
||||
{"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
||||
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
||||
|
@ -107,6 +113,9 @@ public final class YoutubeParsingHelper {
|
|||
private static final String[] INNERTUBE_API_KEY_REGEXES =
|
||||
{"INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"",
|
||||
"innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\""};
|
||||
private static final String[] INITIAL_DATA_REGEXES =
|
||||
{"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});",
|
||||
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});"};
|
||||
private static final String INNERTUBE_CLIENT_NAME_REGEX =
|
||||
"INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),";
|
||||
|
||||
|
@ -116,13 +125,24 @@ public final class YoutubeParsingHelper {
|
|||
private static Random numberGenerator = new Random();
|
||||
|
||||
/**
|
||||
* <code>PENDING+</code> means that the user did not yet submit their choices.
|
||||
* {@code PENDING+} means that the user did not yet submit their choices.
|
||||
*
|
||||
* <p>
|
||||
* Therefore, YouTube & Google should not track the user, because they did not give consent.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* The three digits at the end can be random, but are required.
|
||||
* </p>
|
||||
*/
|
||||
private static final String CONSENT_COOKIE_VALUE = "PENDING+";
|
||||
|
||||
/**
|
||||
* Youtube <code>CONSENT</code> cookie. Should prevent redirect to consent.youtube.com
|
||||
* YouTube {@code CONSENT} cookie.
|
||||
*
|
||||
* <p>
|
||||
* Should prevent redirect to {@code consent.youtube.com}.
|
||||
* </p>
|
||||
*/
|
||||
private static final String CONSENT_COOKIE = "CONSENT=" + CONSENT_COOKIE_VALUE;
|
||||
|
||||
|
@ -439,17 +459,10 @@ public final class YoutubeParsingHelper {
|
|||
}
|
||||
}
|
||||
|
||||
public static JsonObject getInitialData(final String html) throws ParsingException {
|
||||
private static JsonObject getInitialData(final String html) throws ParsingException {
|
||||
try {
|
||||
try {
|
||||
final String initialData = Parser.matchGroup1(
|
||||
"window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
|
||||
return JsonParser.object().from(initialData);
|
||||
} catch (final Parser.RegexException e) {
|
||||
final String initialData = Parser.matchGroup1(
|
||||
"var\\s*ytInitialData\\s*=\\s*(\\{.*?\\});", html);
|
||||
return JsonParser.object().from(initialData);
|
||||
}
|
||||
return JsonParser.object().from(getStringResultFromRegexArray(html,
|
||||
INITIAL_DATA_REGEXES, 1));
|
||||
} catch (final JsonParserException | Parser.RegexException e) {
|
||||
throw new ParsingException("Could not get ytInitialData", e);
|
||||
}
|
||||
|
@ -572,7 +585,7 @@ public final class YoutubeParsingHelper {
|
|||
key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1);
|
||||
} catch (final Parser.RegexException e) {
|
||||
throw new ParsingException(
|
||||
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page");
|
||||
"Could not extract YouTube WEB InnerTube client version and API key from HTML search results page", e);
|
||||
}
|
||||
keyAndVersionExtracted = true;
|
||||
}
|
||||
|
@ -730,8 +743,7 @@ public final class YoutubeParsingHelper {
|
|||
final String response = getDownloader().get(url, headers).responseBody();
|
||||
musicClientVersion = getStringResultFromRegexArray(response,
|
||||
INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1);
|
||||
musicKey = getStringResultFromRegexArray(response,
|
||||
INNERTUBE_API_KEY_REGEXES, 1);
|
||||
musicKey = getStringResultFromRegexArray(response, INNERTUBE_API_KEY_REGEXES, 1);
|
||||
musicClientName = Parser.matchGroup1(INNERTUBE_CLIENT_NAME_REGEX, response);
|
||||
} catch (final Exception e) {
|
||||
final String url = "https://music.youtube.com/";
|
||||
|
@ -815,10 +827,11 @@ public final class YoutubeParsingHelper {
|
|||
}
|
||||
|
||||
/**
|
||||
* Get the text from a JSON object that has either a simpleText or a runs array.
|
||||
* Get the text from a JSON object that has either a {@code simpleText} or a {@code runs}
|
||||
* array.
|
||||
*
|
||||
* @param textObject JSON object to get the text from
|
||||
* @param html whether to return HTML, by parsing the navigationEndpoint
|
||||
* @param html whether to return HTML, by parsing the {@code navigationEndpoint}
|
||||
* @return text in the JSON object or {@code null}
|
||||
*/
|
||||
@Nullable
|
||||
|
@ -1495,15 +1508,7 @@ public final class YoutubeParsingHelper {
|
|||
*/
|
||||
@Nonnull
|
||||
public static String generateContentPlaybackNonce() {
|
||||
final SecureRandom random = new SecureRandom();
|
||||
final StringBuilder stringBuilder = new StringBuilder();
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
|
||||
(random.nextInt(128) + 1) & 63));
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1519,14 +1524,23 @@ public final class YoutubeParsingHelper {
|
|||
*/
|
||||
@Nonnull
|
||||
public static String generateTParameter() {
|
||||
final SecureRandom random = new SecureRandom();
|
||||
final StringBuilder stringBuilder = new StringBuilder();
|
||||
return randomStringFromAlphabet(CONTENT_PLAYBACK_NONCE_ALPHABET, 12);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 12; i++) {
|
||||
stringBuilder.append(CONTENT_PLAYBACK_NONCE_ALPHABET.charAt(
|
||||
(random.nextInt(128) + 1) & 63));
|
||||
}
|
||||
|
||||
return stringBuilder.toString();
|
||||
/**
|
||||
* Set the seed for video tests.
|
||||
*
|
||||
* <p>
|
||||
* This seed will be used to generate the same {@code t} and {@code cpn} values between
|
||||
* different execution of tests so mocks can be used for stream tests.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* This method will call {@link Utils#setSecureRandomSeed(long)} with the
|
||||
* {@link #SEED_FOR_VIDEOS_TESTS value}.
|
||||
* </p>
|
||||
*/
|
||||
public static void setSeedForVideoTests() {
|
||||
Utils.setSecureRandomSeed(SEED_FOR_VIDEOS_TESTS);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ import java.util.LinkedList;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.security.SecureRandom;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public final class Utils {
|
||||
|
@ -25,16 +26,23 @@ public final class Utils {
|
|||
public static final String EMPTY_STRING = "";
|
||||
private static final Pattern M_PATTERN = Pattern.compile("(https?)?:\\/\\/m\\.");
|
||||
private static final Pattern WWW_PATTERN = Pattern.compile("(https?)?:\\/\\/www\\.");
|
||||
private static final SecureRandom random = new SecureRandom();
|
||||
|
||||
private Utils() {
|
||||
// no instance
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove all non-digit characters from a string.<p>
|
||||
* Examples:<p>
|
||||
* <ul><li>1 234 567 views -> 1234567</li>
|
||||
* <li>$31,133.124 -> 31133124</li></ul>
|
||||
* Remove all non-digit characters from a string.
|
||||
*
|
||||
* <p>
|
||||
* Examples:
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>1 234 567 views -> 1234567</li>
|
||||
* <li>$31,133.124 -> 31133124</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param toRemove string to remove non-digit chars
|
||||
* @return a string that contains only digits
|
||||
|
@ -45,8 +53,12 @@ public final class Utils {
|
|||
}
|
||||
|
||||
/**
|
||||
* <p>Convert a mixed number word to a long.</p>
|
||||
* <p>Examples:</p>
|
||||
* Convert a mixed number word to a long.
|
||||
*
|
||||
* <p>
|
||||
* Examples:
|
||||
* </p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>123 -> 123</li>
|
||||
* <li>1.23K -> 1230</li>
|
||||
|
@ -106,11 +118,15 @@ public final class Utils {
|
|||
|
||||
/**
|
||||
* Get the value of a URL-query by name.
|
||||
* If a url-query is give multiple times, only the value of the first query is returned
|
||||
*
|
||||
* <p>
|
||||
* If an url-query is give multiple times, only the value of the first query is returned.
|
||||
* </p>
|
||||
*
|
||||
* @param url the url to be used
|
||||
* @param parameterName the pattern that will be used to check the url
|
||||
* @return a string that contains the value of the query parameter or null if nothing was found
|
||||
* @return a string that contains the value of the query parameter or {@code null} if nothing
|
||||
* was found
|
||||
*/
|
||||
@Nullable
|
||||
public static String getQueryValue(@Nonnull final URL url,
|
||||
|
@ -144,11 +160,14 @@ public final class Utils {
|
|||
}
|
||||
|
||||
/**
|
||||
* converts a string to a URL-Object.
|
||||
* defaults to HTTP if no protocol is given
|
||||
* Convert a string to a {@link URL URL object}.
|
||||
*
|
||||
* <p>
|
||||
* Defaults to HTTP if no protocol is given.
|
||||
* </p>
|
||||
*
|
||||
* @param url the string to be converted to a URL-Object
|
||||
* @return a URL-Object containing the url
|
||||
* @return a {@link URL URL object} containing the url
|
||||
*/
|
||||
@Nonnull
|
||||
public static URL stringToURL(final String url) throws MalformedURLException {
|
||||
|
@ -187,6 +206,7 @@ public final class Utils {
|
|||
return url;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public static String removeUTF8BOM(@Nonnull final String s) {
|
||||
String result = s;
|
||||
if (result.startsWith("\uFEFF")) {
|
||||
|
@ -198,6 +218,7 @@ public final class Utils {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public static String getBaseUrl(final String url) throws ParsingException {
|
||||
try {
|
||||
final URL uri = stringToURL(url);
|
||||
|
@ -244,6 +265,7 @@ public final class Utils {
|
|||
* <p>
|
||||
* This method can be also used for {@link com.grack.nanojson.JsonArray JsonArray}s.
|
||||
* </p>
|
||||
*
|
||||
* @param collection the collection on which check if it's null or empty
|
||||
* @return whether the collection is null or empty
|
||||
*/
|
||||
|
@ -257,6 +279,7 @@ public final class Utils {
|
|||
* <p>
|
||||
* This method can be also used for {@link com.grack.nanojson.JsonObject JsonObject}s.
|
||||
* </p>
|
||||
*
|
||||
* @param map the {@link Map map} on which check if it's null or empty
|
||||
* @return whether the {@link Map map} is null or empty
|
||||
*/
|
||||
|
@ -380,6 +403,7 @@ public final class Utils {
|
|||
} catch (final Parser.RegexException ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
if (result == null) {
|
||||
throw new Parser.RegexException("No regex matched the input on group " + group);
|
||||
}
|
||||
|
@ -413,9 +437,49 @@ public final class Utils {
|
|||
} catch (final Parser.RegexException ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
if (result == null) {
|
||||
throw new Parser.RegexException("No regex matched the input on group " + group);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a random string using the secure random device {@link #random}.
|
||||
*
|
||||
* <p>
|
||||
* {@link #setSecureRandomSeed(long)} might be useful when mocking tests.
|
||||
* </p>
|
||||
*
|
||||
* @param alphabet the characters' alphabet to use
|
||||
* @param length the length of the returned string
|
||||
* @return a random string of the requested length made of only characters from the provided
|
||||
* alphabet
|
||||
*/
|
||||
@Nonnull
|
||||
public static String randomStringFromAlphabet(final String alphabet, final int length) {
|
||||
final StringBuilder stringBuilder = new StringBuilder();
|
||||
for (int i = 0; i < length; ++i) {
|
||||
stringBuilder.append(alphabet.charAt(random.nextInt(alphabet.length())));
|
||||
}
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Seed the secure random device used for {@link #randomStringFromAlphabet(String, int)}.
|
||||
*
|
||||
* <p>
|
||||
* Use this in tests so that they can be mocked as the same random numbers are always
|
||||
* generated.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* This is not intended to be used outside of tests.
|
||||
* </p>
|
||||
*
|
||||
* @param seed the seed to pass to {@link SecureRandom#setSeed(long)}
|
||||
*/
|
||||
public static void setSecureRandomSeed(final long seed) {
|
||||
random.setSeed(seed);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue