Adress the last requested changes + update YoutubeCommentsExtractor mocks

This commit is contained in:
TiA4f8R 2021-07-09 18:23:46 +02:00
parent 2320aecb7c
commit 7753556e66
No known key found for this signature in database
GPG Key ID: E6D3E7F5949450DD
22 changed files with 157 additions and 144 deletions

View File

@ -78,7 +78,8 @@ public class YoutubeParsingHelper {
private static String[] youtubeMusicKey;
private static boolean keyAndVersionExtracted = false;
private static Boolean areHardcodedClientVersionAndKeyValidValue = null;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private static Optional<Boolean> hardcodedClientVersionAndKeyValid = Optional.empty();
private static Random numberGenerator = new Random();
@ -308,10 +309,10 @@ public class YoutubeParsingHelper {
}
}
public static boolean areHardcodedClientVersionAndKeyValid()
public static Optional<Boolean> areHardcodedClientVersionAndKeyValid()
throws IOException, ExtractionException {
if (areHardcodedClientVersionAndKeyValidValue != null) {
return areHardcodedClientVersionAndKeyValidValue;
if (hardcodedClientVersionAndKeyValid.isPresent()) {
return hardcodedClientVersionAndKeyValid;
}
// @formatter:off
final byte[] body = JsonWriter.string()
@ -343,12 +344,12 @@ public class YoutubeParsingHelper {
final String responseBody = response.responseBody();
final int responseCode = response.responseCode();
return areHardcodedClientVersionAndKeyValidValue = responseBody.length() > 5000
&& responseCode == 200; // Ensure to have a valid response
return hardcodedClientVersionAndKeyValid = Optional.of(responseBody.length() > 5000
&& responseCode == 200); // Ensure to have a valid response
}
private static void extractClientVersionAndKey() throws IOException, ExtractionException {
// Don't extract the client version and the innertube key if it has been already extracted
// Don't extract the client version and the InnerTube key if it has been already extracted
if (keyAndVersionExtracted) return;
// Don't provide a search term in order to have a smaller response
final String url = "https://www.youtube.com/results?search_query=&ucbcb=1";
@ -424,7 +425,7 @@ public class YoutubeParsingHelper {
*/
public static String getClientVersion() throws IOException, ExtractionException {
if (!isNullOrEmpty(clientVersion)) return clientVersion;
if (areHardcodedClientVersionAndKeyValid()) {
if (areHardcodedClientVersionAndKeyValid().orElse(false)) {
return clientVersion = HARDCODED_CLIENT_VERSION;
}
@ -437,7 +438,7 @@ public class YoutubeParsingHelper {
*/
public static String getKey() throws IOException, ExtractionException {
if (!isNullOrEmpty(key)) return key;
if (areHardcodedClientVersionAndKeyValid()) {
if (areHardcodedClientVersionAndKeyValid().orElse(false)) {
return key = HARDCODED_KEY;
}

View File

@ -81,30 +81,31 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
@Override
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException,
ExtractionException {
final String channel_path = super.getId();
final String[] channelInfo = channel_path.split("/");
final String channelPath = super.getId();
final String[] channelId = channelPath.split("/");
String id = "";
// If the url is an URL which is not a /channel URL, we need to use the
// navigation/resolve_url endpoint of the youtubei API to get the channel id. Otherwise, we
// couldn't get information about the channel associated with this URL, if there is one.
if (!channelInfo[0].equals("channel")) {
// navigation/resolve_url endpoint of the InnerTube API to get the channel id. Otherwise,
// we couldn't get information about the channel associated with this URL, if there is one.
if (!channelId[0].equals("channel")) {
final byte[] body = JsonWriter.string(prepareJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry())
.value("url", "https://www.youtube.com/" + channel_path)
.value("url", "https://www.youtube.com/" + channelPath)
.done())
.getBytes(UTF_8);
final JsonObject jsonResponse = getJsonPostResponse("navigation/resolve_url",
body, getExtractorLocalization());
if (jsonResponse.has("error")) {
if (jsonResponse.getInt("code") == 404) {
throw new ContentNotAvailableException(
"No channel associated with this user exists");
if (!isNullOrEmpty(jsonResponse.getObject("error"))) {
final JsonObject errorJsonObject = jsonResponse.getObject("error");
final int errorCode = errorJsonObject.getInt("code");
if (errorCode == 404) {
throw new ContentNotAvailableException("This channel doesn't exist.");
} else {
throw new ContentNotAvailableException("Got error:\""
+ jsonResponse.getString("status") + "\": "
+ jsonResponse.getString("message"));
+ errorJsonObject.getString("status") + "\": "
+ errorJsonObject.getString("message"));
}
}
@ -128,7 +129,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
redirectedChannelId = browseId;
}
} else {
id = channelInfo[1];
id = channelId[1];
}
JsonObject ajaxJson = null;
@ -145,13 +146,14 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
getExtractorLocalization());
if (!isNullOrEmpty(jsonResponse.getObject("error"))) {
final int errorCode = jsonResponse.getObject("error").getInt("code");
if (errorCode == 400) {
final JsonObject errorJsonObject = jsonResponse.getObject("error");
final int errorCode = errorJsonObject.getInt("code");
if (errorCode == 404) {
throw new ContentNotAvailableException("This channel doesn't exist.");
} else {
throw new ContentNotAvailableException("Got error:\""
+ jsonResponse.getString("status") + "\": "
+ jsonResponse.getString("message"));
+ errorJsonObject.getString("status") + "\": "
+ errorJsonObject.getString("message"));
}
}
@ -330,13 +332,13 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
.getArray("contents").getObject(0).getObject("itemSectionRenderer")
.getArray("contents").getObject(0).getObject("gridRenderer");
final List<String> channelInfo = new ArrayList<>();
channelInfo.add(getName());
channelInfo.add(getUrl());
final List<String> channelIds = new ArrayList<>();
channelIds.add(getName());
channelIds.add(getUrl());
final JsonObject continuation = collectStreamsFrom(collector, gridRenderer
.getArray("items"), channelInfo);
.getArray("items"), channelIds);
nextPage = getNextPageFrom(continuation, channelInfo);
nextPage = getNextPageFrom(continuation, channelIds);
}
return new InfoItemsPage<>(collector, nextPage);
@ -349,7 +351,7 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
throw new IllegalArgumentException("Page doesn't contain an URL");
}
final List<String> channelInfos = page.getIds();
final List<String> channelIds = page.getIds();
final StreamInfoItemsCollector collector = new StreamInfoItemsCollector(getServiceId());
final Map<String, List<String>> headers = new HashMap<>();
@ -365,14 +367,14 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
.getObject("appendContinuationItemsAction");
final JsonObject continuation = collectStreamsFrom(collector, sectionListContinuation
.getArray("continuationItems"), channelInfos);
.getArray("continuationItems"), channelIds);
return new InfoItemsPage<>(collector, getNextPageFrom(continuation, channelInfos));
return new InfoItemsPage<>(collector, getNextPageFrom(continuation, channelIds));
}
@Nullable
private Page getNextPageFrom(final JsonObject continuations,
final List<String> channelInfo) throws IOException,
final List<String> channelIds) throws IOException,
ExtractionException {
if (isNullOrEmpty(continuations)) {
return null;
@ -388,23 +390,24 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
.done())
.getBytes(UTF_8);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey(), null, channelInfo, null, body);
return new Page(YOUTUBEI_V1_URL + "browse?key=" + getKey(), null, channelIds, null, body);
}
/**
* Collect streams from an array of items
*
* @param collector the collector where videos will be commited
* @param videos the array to get videos from
* @param collector the collector where videos will be committed
* @param videos the array to get videos from
* @param channelIds the ids of the channel, which are its name and its URL
* @return the continuation object
*/
private JsonObject collectStreamsFrom(@Nonnull final StreamInfoItemsCollector collector,
@Nonnull final JsonArray videos,
@Nonnull final List<String> channelInfo) {
@Nonnull final List<String> channelIds) {
collector.reset();
final String uploaderName = channelInfo.get(0);
final String uploaderUrl = channelInfo.get(1);
final String uploaderName = channelIds.get(0);
final String uploaderUrl = channelIds.get(1);
final TimeAgoParser timeAgoParser = getTimeAgoParser();
JsonObject continuation = null;

View File

@ -189,7 +189,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
public void onFetchPage(@Nonnull final Downloader downloader) throws IOException, ExtractionException {
final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
final Response response = downloader.get(getUrl() + "&ucbcb=1", requestHeaders, getExtractorLocalization());
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
responseBody = YoutubeParsingHelper.unescapeDocument(response.responseBody());
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);

View File

@ -85,7 +85,6 @@ public class YoutubeMixPlaylistExtractor extends PlaylistExtractor {
if (isNullOrEmpty(playlistData)) throw new ExtractionException(
"Could not get playlistData");
cookieValue = extractCookieValue(COOKIE_NAME, response);
}
@Nonnull

View File

@ -962,7 +962,8 @@ public class YoutubeStreamExtractor extends StreamExtractor {
}
}
}
} else if (streamingData.has("formats")) {
}
if (streamingData.has("formats")) {
final JsonArray formats = streamingData.getArray("formats");
if (!isNullOrEmpty(formats)) {
for (final Object format : formats) {

View File

@ -3,6 +3,7 @@ package org.schabi.newpipe.extractor.services.youtube.linkHandler;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandlerFactory;
import javax.annotation.Nonnull;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.List;
@ -26,24 +27,31 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
private static final String SEARCH_URL = "https://www.youtube.com/results?search_query=";
private static final String MUSIC_SEARCH_URL = "https://music.youtube.com/search?q=";
@Nonnull
public static YoutubeSearchQueryHandlerFactory getInstance() {
return new YoutubeSearchQueryHandlerFactory();
}
@Override
public String getUrl(String searchString, List<String> contentFilters, String sortFilter) throws ParsingException {
public String getUrl(final String searchString,
@Nonnull final List<String> contentFilters,
final String sortFilter) throws ParsingException {
try {
if (!contentFilters.isEmpty()) {
switch (contentFilters.get(0)) {
final String contentFilter = contentFilters.get(0);
switch (contentFilter) {
case ALL:
default:
break;
case VIDEOS:
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8) + "&sp=EgIQAQ%253D%253D";
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8)
+ "&sp=EgIQAQ%253D%253D";
case CHANNELS:
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8) + "&sp=EgIQAg%253D%253D";
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8)
+ "&sp=EgIQAg%253D%253D";
case PLAYLISTS:
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8) + "&sp=EgIQAw%253D%253D";
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8)
+ "&sp=EgIQAw%253D%253D";
case MUSIC_SONGS:
case MUSIC_VIDEOS:
case MUSIC_ALBUMS:
@ -54,7 +62,7 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
}
return SEARCH_URL + URLEncoder.encode(searchString, UTF_8);
} catch (UnsupportedEncodingException e) {
} catch (final UnsupportedEncodingException e) {
throw new ParsingException("Could not encode query", e);
}
}
@ -70,10 +78,11 @@ public class YoutubeSearchQueryHandlerFactory extends SearchQueryHandlerFactory
MUSIC_VIDEOS,
MUSIC_ALBUMS,
MUSIC_PLAYLISTS
// MUSIC_ARTISTS
// MUSIC_ARTISTS
};
}
@Nonnull
public static String getSearchParameter(final String contentFilter) {
if (isNullOrEmpty(contentFilter)) return "";
switch (contentFilter) {

View File

@ -27,7 +27,7 @@ public class YoutubeParsingHelperTest {
@Test
public void testAreHardcodedClientVersionAndKeyValid() throws IOException, ExtractionException {
assertTrue("Hardcoded client version and key are not valid anymore",
YoutubeParsingHelper.areHardcodedClientVersionAndKeyValid());
YoutubeParsingHelper.areHardcodedClientVersionAndKeyValid().orElse(false));
}
@Test
@ -44,7 +44,7 @@ public class YoutubeParsingHelperTest {
}
@Test
public void testConvertFromGoogleCacheUrl() throws ParsingException {
public void testConvertFromGoogleCacheUrl() {
assertEquals("https://mohfw.gov.in/",
YoutubeParsingHelper.extractCachedUrlIfNeeded("https://webcache.googleusercontent.com/search?q=cache:https://mohfw.gov.in/"));
assertEquals("https://www.infektionsschutz.de/coronavirus-sars-cov-2.html",