2020-04-10 08:51:05 +00:00
|
|
|
package org.schabi.newpipe.extractor.services.youtube;
|
2017-03-01 17:47:52 +00:00
|
|
|
|
|
|
|
|
2020-02-24 18:03:54 +00:00
|
|
|
import com.grack.nanojson.JsonArray;
|
2020-02-22 22:51:02 +00:00
|
|
|
import com.grack.nanojson.JsonObject;
|
|
|
|
import com.grack.nanojson.JsonParser;
|
|
|
|
import com.grack.nanojson.JsonParserException;
|
2020-03-20 10:05:19 +00:00
|
|
|
import com.grack.nanojson.JsonWriter;
|
|
|
|
|
2019-10-29 05:00:29 +00:00
|
|
|
import org.jsoup.Jsoup;
|
|
|
|
import org.jsoup.nodes.Document;
|
2019-04-28 20:03:16 +00:00
|
|
|
import org.schabi.newpipe.extractor.downloader.Response;
|
2020-03-01 00:50:31 +00:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ContentNotAvailableException;
|
2020-02-29 15:42:04 +00:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
|
2017-03-01 17:47:52 +00:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ParsingException;
|
2019-10-29 05:00:29 +00:00
|
|
|
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
|
2020-02-29 15:55:07 +00:00
|
|
|
import org.schabi.newpipe.extractor.localization.Localization;
|
2020-02-22 22:51:02 +00:00
|
|
|
import org.schabi.newpipe.extractor.utils.Parser;
|
2020-02-28 08:36:33 +00:00
|
|
|
import org.schabi.newpipe.extractor.utils.Utils;
|
2017-03-01 17:47:52 +00:00
|
|
|
|
2020-02-29 15:42:04 +00:00
|
|
|
import java.io.IOException;
|
2020-02-27 16:39:23 +00:00
|
|
|
import java.io.UnsupportedEncodingException;
|
2020-04-01 14:01:21 +00:00
|
|
|
import java.net.MalformedURLException;
|
2019-01-13 11:52:07 +00:00
|
|
|
import java.net.URL;
|
2020-02-27 16:39:23 +00:00
|
|
|
import java.net.URLDecoder;
|
2019-04-28 20:03:16 +00:00
|
|
|
import java.text.ParseException;
|
|
|
|
import java.text.SimpleDateFormat;
|
2020-02-29 21:42:43 +00:00
|
|
|
import java.util.*;
|
2020-02-26 14:22:59 +00:00
|
|
|
|
|
|
|
import static org.schabi.newpipe.extractor.NewPipe.getDownloader;
|
2020-05-03 08:28:45 +00:00
|
|
|
import static org.schabi.newpipe.extractor.utils.JsonUtils.EMPTY_STRING;
|
2020-04-15 16:49:58 +00:00
|
|
|
import static org.schabi.newpipe.extractor.utils.Utils.*;
|
2019-01-13 11:52:07 +00:00
|
|
|
|
2017-06-29 18:12:55 +00:00
|
|
|
/*
|
2017-03-01 17:47:52 +00:00
|
|
|
* Created by Christian Schabesberger on 02.03.16.
|
|
|
|
*
|
|
|
|
* Copyright (C) Christian Schabesberger 2016 <chris.schabesberger@mailbox.org>
|
|
|
|
* YoutubeParsingHelper.java is part of NewPipe.
|
|
|
|
*
|
|
|
|
* NewPipe is free software: you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* NewPipe is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with NewPipe. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
public class YoutubeParsingHelper {
|
|
|
|
|
|
|
|
private YoutubeParsingHelper() {
|
|
|
|
}
|
|
|
|
|
2020-02-15 08:30:05 +00:00
|
|
|
/**
|
|
|
|
* The official youtube app supports intents in this format, where after the ':' is the videoId.
|
|
|
|
* Accordingly there are other apps sharing streams in this format.
|
|
|
|
*/
|
|
|
|
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";
|
|
|
|
|
2020-02-26 14:22:59 +00:00
|
|
|
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
|
|
|
|
private static String clientVersion;
|
|
|
|
|
2020-03-20 10:05:19 +00:00
|
|
|
private static final String[] HARDCODED_YOUTUBE_MUSIC_KEYS = {"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "0.1"};
|
2020-03-17 10:33:39 +00:00
|
|
|
private static String[] youtubeMusicKeys;
|
|
|
|
|
2019-12-16 07:35:44 +00:00
|
|
|
private static final String FEED_BASE_CHANNEL_ID = "https://www.youtube.com/feeds/videos.xml?channel_id=";
|
|
|
|
private static final String FEED_BASE_USER = "https://www.youtube.com/feeds/videos.xml?user=";
|
|
|
|
|
2019-10-29 05:00:29 +00:00
|
|
|
private static final String[] RECAPTCHA_DETECTION_SELECTORS = {
|
|
|
|
"form[action*=\"/das_captcha\"]",
|
|
|
|
"input[name*=\"action_recaptcha_verify\"]"
|
|
|
|
};
|
|
|
|
|
2019-04-28 20:03:16 +00:00
|
|
|
public static Document parseAndCheckPage(final String url, final Response response) throws ReCaptchaException {
|
|
|
|
final Document document = Jsoup.parse(response.responseBody(), url);
|
2019-10-29 05:00:29 +00:00
|
|
|
|
|
|
|
for (String detectionSelector : RECAPTCHA_DETECTION_SELECTORS) {
|
|
|
|
if (!document.select(detectionSelector).isEmpty()) {
|
|
|
|
throw new ReCaptchaException("reCAPTCHA challenge requested (detected with selector: \"" + detectionSelector + "\")", url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return document;
|
|
|
|
}
|
|
|
|
|
2019-01-13 11:52:07 +00:00
|
|
|
public static boolean isYoutubeURL(URL url) {
|
|
|
|
String host = url.getHost();
|
|
|
|
return host.equalsIgnoreCase("youtube.com") || host.equalsIgnoreCase("www.youtube.com")
|
2019-09-12 02:43:49 +00:00
|
|
|
|| host.equalsIgnoreCase("m.youtube.com") || host.equalsIgnoreCase("music.youtube.com");
|
2019-01-13 11:52:07 +00:00
|
|
|
}
|
|
|
|
|
2019-01-27 00:28:51 +00:00
|
|
|
public static boolean isYoutubeServiceURL(URL url) {
|
|
|
|
String host = url.getHost();
|
|
|
|
return host.equalsIgnoreCase("www.youtube-nocookie.com") || host.equalsIgnoreCase("youtu.be");
|
|
|
|
}
|
2019-01-13 11:52:07 +00:00
|
|
|
|
2019-01-27 00:28:51 +00:00
|
|
|
public static boolean isHooktubeURL(URL url) {
|
2019-01-13 11:52:07 +00:00
|
|
|
String host = url.getHost();
|
2019-01-27 00:28:51 +00:00
|
|
|
return host.equalsIgnoreCase("hooktube.com");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static boolean isInvidioURL(URL url) {
|
|
|
|
String host = url.getHost();
|
2019-09-10 16:54:32 +00:00
|
|
|
return host.equalsIgnoreCase("invidio.us") || host.equalsIgnoreCase("dev.invidio.us") || host.equalsIgnoreCase("www.invidio.us") || host.equalsIgnoreCase("invidious.snopyta.org") || host.equalsIgnoreCase("de.invidious.snopyta.org") || host.equalsIgnoreCase("fi.invidious.snopyta.org") || host.equalsIgnoreCase("vid.wxzm.sx") || host.equalsIgnoreCase("invidious.kabi.tk") || host.equalsIgnoreCase("invidiou.sh") || host.equalsIgnoreCase("www.invidiou.sh") || host.equalsIgnoreCase("no.invidiou.sh") || host.equalsIgnoreCase("invidious.enkirton.net") || host.equalsIgnoreCase("tube.poal.co") || host.equalsIgnoreCase("invidious.13ad.de") || host.equalsIgnoreCase("yt.elukerio.org");
|
2019-01-13 11:52:07 +00:00
|
|
|
}
|
|
|
|
|
2017-08-11 18:21:49 +00:00
|
|
|
public static long parseDurationString(String input)
|
2017-03-01 17:47:52 +00:00
|
|
|
throws ParsingException, NumberFormatException {
|
2018-09-09 09:53:10 +00:00
|
|
|
|
|
|
|
// If time separator : is not detected, try . instead
|
2018-09-09 12:01:39 +00:00
|
|
|
|
|
|
|
final String[] splitInput = input.contains(":")
|
|
|
|
? input.split(":")
|
|
|
|
: input.split("\\.");
|
|
|
|
|
2017-03-01 17:47:52 +00:00
|
|
|
String days = "0";
|
|
|
|
String hours = "0";
|
|
|
|
String minutes = "0";
|
2018-09-09 12:01:39 +00:00
|
|
|
final String seconds;
|
2017-03-01 17:47:52 +00:00
|
|
|
|
2017-06-29 18:12:55 +00:00
|
|
|
switch (splitInput.length) {
|
2017-03-01 17:47:52 +00:00
|
|
|
case 4:
|
|
|
|
days = splitInput[0];
|
|
|
|
hours = splitInput[1];
|
|
|
|
minutes = splitInput[2];
|
|
|
|
seconds = splitInput[3];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
hours = splitInput[0];
|
|
|
|
minutes = splitInput[1];
|
|
|
|
seconds = splitInput[2];
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
minutes = splitInput[0];
|
|
|
|
seconds = splitInput[1];
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
seconds = splitInput[0];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw new ParsingException("Error duration string with unknown format: " + input);
|
|
|
|
}
|
2017-08-11 18:21:49 +00:00
|
|
|
return ((((Long.parseLong(days) * 24)
|
|
|
|
+ Long.parseLong(hours) * 60)
|
|
|
|
+ Long.parseLong(minutes)) * 60)
|
|
|
|
+ Long.parseLong(seconds);
|
2017-03-01 17:47:52 +00:00
|
|
|
}
|
2019-04-28 20:03:16 +00:00
|
|
|
|
2019-12-16 07:35:44 +00:00
|
|
|
public static String getFeedUrlFrom(final String channelIdOrUser) {
|
|
|
|
if (channelIdOrUser.startsWith("user/")) {
|
|
|
|
return FEED_BASE_USER + channelIdOrUser.replace("user/", "");
|
|
|
|
} else if (channelIdOrUser.startsWith("channel/")) {
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser.replace("channel/", "");
|
|
|
|
} else {
|
|
|
|
return FEED_BASE_CHANNEL_ID + channelIdOrUser;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-28 20:03:16 +00:00
|
|
|
public static Calendar parseDateFrom(String textualUploadDate) throws ParsingException {
|
|
|
|
Date date;
|
|
|
|
try {
|
|
|
|
date = new SimpleDateFormat("yyyy-MM-dd").parse(textualUploadDate);
|
|
|
|
} catch (ParseException e) {
|
|
|
|
throw new ParsingException("Could not parse date: \"" + textualUploadDate + "\"", e);
|
|
|
|
}
|
|
|
|
|
|
|
|
final Calendar uploadDate = Calendar.getInstance();
|
|
|
|
uploadDate.setTime(date);
|
|
|
|
return uploadDate;
|
|
|
|
}
|
2020-02-22 22:51:02 +00:00
|
|
|
|
|
|
|
public static JsonObject getInitialData(String html) throws ParsingException {
|
|
|
|
try {
|
|
|
|
String initialData = Parser.matchGroup1("window\\[\"ytInitialData\"\\]\\s*=\\s*(\\{.*?\\});", html);
|
|
|
|
return JsonParser.object().from(initialData);
|
|
|
|
} catch (JsonParserException | Parser.RegexException e) {
|
|
|
|
throw new ParsingException("Could not get ytInitialData", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-29 21:42:43 +00:00
|
|
|
public static boolean isHardcodedClientVersionValid() throws IOException, ExtractionException {
|
|
|
|
final String url = "https://www.youtube.com/results?search_query=test&pbj=1";
|
|
|
|
|
|
|
|
Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
|
|
|
headers.put("X-YouTube-Client-Version",
|
|
|
|
Collections.singletonList(HARDCODED_CLIENT_VERSION));
|
|
|
|
final String response = getDownloader().get(url, headers).responseBody();
|
2020-02-26 14:22:59 +00:00
|
|
|
|
2020-02-29 21:42:43 +00:00
|
|
|
return response.length() > 50; // ensure to have a valid response
|
2020-02-28 15:35:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the client version from a page
|
|
|
|
* @return
|
|
|
|
* @throws ParsingException
|
|
|
|
*/
|
2020-02-29 21:42:43 +00:00
|
|
|
public static String getClientVersion() throws IOException, ExtractionException {
|
2020-04-15 16:49:58 +00:00
|
|
|
if (!isNullOrEmpty(clientVersion)) return clientVersion;
|
2020-03-20 10:05:19 +00:00
|
|
|
if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION;
|
2020-02-28 15:35:24 +00:00
|
|
|
|
2020-02-29 21:42:43 +00:00
|
|
|
final String url = "https://www.youtube.com/results?search_query=test";
|
|
|
|
final String html = getDownloader().get(url).responseBody();
|
|
|
|
JsonObject initialData = getInitialData(html);
|
|
|
|
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
|
|
|
|
String shortClientVersion = null;
|
|
|
|
|
|
|
|
// try to get version from initial data first
|
|
|
|
for (Object service : serviceTrackingParams) {
|
|
|
|
JsonObject s = (JsonObject) service;
|
|
|
|
if (s.getString("service").equals("CSI")) {
|
|
|
|
JsonArray params = s.getArray("params");
|
|
|
|
for (Object param : params) {
|
|
|
|
JsonObject p = (JsonObject) param;
|
|
|
|
String key = p.getString("key");
|
|
|
|
if (key != null && key.equals("cver")) {
|
2020-03-20 10:05:19 +00:00
|
|
|
return clientVersion = p.getString("value");
|
2020-02-24 18:03:54 +00:00
|
|
|
}
|
2020-02-29 21:42:43 +00:00
|
|
|
}
|
|
|
|
} else if (s.getString("service").equals("ECATCHER")) {
|
|
|
|
// fallback to get a shortened client version which does not contain the last two digits
|
|
|
|
JsonArray params = s.getArray("params");
|
|
|
|
for (Object param : params) {
|
|
|
|
JsonObject p = (JsonObject) param;
|
|
|
|
String key = p.getString("key");
|
|
|
|
if (key != null && key.equals("client.version")) {
|
|
|
|
shortClientVersion = p.getString("value");
|
2020-02-24 18:03:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-02-29 21:42:43 +00:00
|
|
|
}
|
2020-02-24 18:03:54 +00:00
|
|
|
|
2020-02-29 21:42:43 +00:00
|
|
|
String contextClientVersion;
|
|
|
|
String[] patterns = {
|
|
|
|
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
|
|
|
|
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
|
|
|
|
"client.version=([0-9\\.]+)"
|
|
|
|
};
|
|
|
|
for (String pattern : patterns) {
|
|
|
|
try {
|
|
|
|
contextClientVersion = Parser.matchGroup1(pattern, html);
|
2020-04-15 16:49:58 +00:00
|
|
|
if (!isNullOrEmpty(contextClientVersion)) {
|
2020-03-20 10:05:19 +00:00
|
|
|
return clientVersion = contextClientVersion;
|
2020-02-26 14:22:59 +00:00
|
|
|
}
|
2020-02-29 21:42:43 +00:00
|
|
|
} catch (Exception ignored) {
|
2020-02-26 14:22:59 +00:00
|
|
|
}
|
2020-02-29 21:42:43 +00:00
|
|
|
}
|
2020-02-24 18:03:54 +00:00
|
|
|
|
2020-02-29 21:42:43 +00:00
|
|
|
if (shortClientVersion != null) {
|
2020-03-20 10:05:19 +00:00
|
|
|
return clientVersion = shortClientVersion;
|
2020-02-29 21:42:43 +00:00
|
|
|
}
|
2020-02-24 18:03:54 +00:00
|
|
|
|
|
|
|
throw new ParsingException("Could not get client version");
|
|
|
|
}
|
2020-02-27 16:39:23 +00:00
|
|
|
|
2020-03-20 10:05:19 +00:00
|
|
|
public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, ReCaptchaException {
|
|
|
|
final String url = "https://music.youtube.com/youtubei/v1/search?alt=json&key=" + HARDCODED_YOUTUBE_MUSIC_KEYS[0];
|
|
|
|
|
|
|
|
// @formatter:off
|
|
|
|
byte[] json = JsonWriter.string()
|
|
|
|
.object()
|
|
|
|
.object("context")
|
|
|
|
.object("client")
|
|
|
|
.value("clientName", "WEB_REMIX")
|
|
|
|
.value("clientVersion", HARDCODED_YOUTUBE_MUSIC_KEYS[2])
|
|
|
|
.value("hl", "en")
|
|
|
|
.value("gl", "GB")
|
|
|
|
.array("experimentIds").end()
|
|
|
|
.value("experimentsToken", "")
|
|
|
|
.value("utcOffsetMinutes", 0)
|
|
|
|
.object("locationInfo").end()
|
|
|
|
.object("musicAppInfo").end()
|
|
|
|
.end()
|
|
|
|
.object("capabilities").end()
|
|
|
|
.object("request")
|
|
|
|
.array("internalExperimentFlags").end()
|
|
|
|
.object("sessionIndex").end()
|
|
|
|
.end()
|
|
|
|
.object("activePlayers").end()
|
|
|
|
.object("user")
|
|
|
|
.value("enableSafetyMode", false)
|
|
|
|
.end()
|
|
|
|
.end()
|
|
|
|
.value("query", "test")
|
|
|
|
.value("params", "Eg-KAQwIARAAGAAgACgAMABqChAEEAUQAxAKEAk%3D")
|
|
|
|
.end().done().getBytes("UTF-8");
|
|
|
|
// @formatter:on
|
|
|
|
|
|
|
|
Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList(HARDCODED_YOUTUBE_MUSIC_KEYS[1]));
|
|
|
|
headers.put("X-YouTube-Client-Version", Collections.singletonList(HARDCODED_YOUTUBE_MUSIC_KEYS[2]));
|
|
|
|
headers.put("Origin", Collections.singletonList("https://music.youtube.com"));
|
2020-03-20 13:14:02 +00:00
|
|
|
headers.put("Referer", Collections.singletonList("music.youtube.com"));
|
2020-03-20 10:05:19 +00:00
|
|
|
headers.put("Content-Type", Collections.singletonList("application/json"));
|
|
|
|
|
|
|
|
String response = getDownloader().post(url, headers, json).responseBody();
|
|
|
|
|
|
|
|
return response.length() > 50; // ensure to have a valid response
|
|
|
|
}
|
|
|
|
|
2020-03-17 10:33:39 +00:00
|
|
|
public static String[] getYoutubeMusicKeys() throws IOException, ReCaptchaException, Parser.RegexException {
|
|
|
|
if (youtubeMusicKeys != null && youtubeMusicKeys.length == 3) return youtubeMusicKeys;
|
2020-03-20 10:05:19 +00:00
|
|
|
if (areHardcodedYoutubeMusicKeysValid()) return youtubeMusicKeys = HARDCODED_YOUTUBE_MUSIC_KEYS;
|
2020-03-17 10:33:39 +00:00
|
|
|
|
|
|
|
final String url = "https://music.youtube.com/";
|
|
|
|
final String html = getDownloader().get(url).responseBody();
|
|
|
|
|
2020-03-20 10:05:19 +00:00
|
|
|
String key;
|
|
|
|
try {
|
|
|
|
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
|
|
|
|
} catch (Parser.RegexException e) {
|
|
|
|
key = Parser.matchGroup1("innertube_api_key\":\"([0-9a-zA-Z_-]+?)\"", html);
|
|
|
|
}
|
|
|
|
|
2020-03-17 10:33:39 +00:00
|
|
|
final String clientName = Parser.matchGroup1("INNERTUBE_CONTEXT_CLIENT_NAME\":([0-9]+?),", html);
|
2020-03-20 10:05:19 +00:00
|
|
|
|
|
|
|
String clientVersion;
|
|
|
|
try {
|
|
|
|
clientVersion = Parser.matchGroup1("INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
|
|
|
|
} catch (Parser.RegexException e) {
|
|
|
|
try {
|
|
|
|
clientVersion = Parser.matchGroup1("INNERTUBE_CLIENT_VERSION\":\"([0-9\\.]+?)\"", html);
|
|
|
|
} catch (Parser.RegexException ee) {
|
|
|
|
clientVersion = Parser.matchGroup1("innertube_context_client_version\":\"([0-9\\.]+?)\"", html);
|
|
|
|
}
|
|
|
|
}
|
2020-03-17 10:33:39 +00:00
|
|
|
|
|
|
|
return youtubeMusicKeys = new String[]{key, clientName, clientVersion};
|
|
|
|
}
|
|
|
|
|
2020-02-29 21:57:25 +00:00
|
|
|
public static String getUrlFromNavigationEndpoint(JsonObject navigationEndpoint) throws ParsingException {
|
2020-04-16 14:08:14 +00:00
|
|
|
if (navigationEndpoint.has("urlEndpoint")) {
|
2020-02-27 16:39:23 +00:00
|
|
|
String internUrl = navigationEndpoint.getObject("urlEndpoint").getString("url");
|
|
|
|
if (internUrl.startsWith("/redirect?")) {
|
|
|
|
// q parameter can be the first parameter
|
|
|
|
internUrl = internUrl.substring(10);
|
|
|
|
String[] params = internUrl.split("&");
|
|
|
|
for (String param : params) {
|
|
|
|
if (param.split("=")[0].equals("q")) {
|
|
|
|
String url;
|
|
|
|
try {
|
2020-03-01 08:41:06 +00:00
|
|
|
url = URLDecoder.decode(param.split("=")[1], "UTF-8");
|
2020-02-27 16:39:23 +00:00
|
|
|
} catch (UnsupportedEncodingException e) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return url;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (internUrl.startsWith("http")) {
|
|
|
|
return internUrl;
|
|
|
|
}
|
2020-04-16 14:08:14 +00:00
|
|
|
} else if (navigationEndpoint.has("browseEndpoint")) {
|
2020-02-29 21:57:25 +00:00
|
|
|
final JsonObject browseEndpoint = navigationEndpoint.getObject("browseEndpoint");
|
|
|
|
final String canonicalBaseUrl = browseEndpoint.getString("canonicalBaseUrl");
|
|
|
|
final String browseId = browseEndpoint.getString("browseId");
|
|
|
|
|
|
|
|
// All channel ids are prefixed with UC
|
|
|
|
if (browseId != null && browseId.startsWith("UC")) {
|
|
|
|
return "https://www.youtube.com/channel/" + browseId;
|
|
|
|
}
|
|
|
|
|
2020-04-15 16:49:58 +00:00
|
|
|
if (!isNullOrEmpty(canonicalBaseUrl)) {
|
2020-02-29 21:57:25 +00:00
|
|
|
return "https://www.youtube.com" + canonicalBaseUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new ParsingException("canonicalBaseUrl is null and browseId is not a channel (\"" + browseEndpoint + "\")");
|
2020-04-16 14:08:14 +00:00
|
|
|
} else if (navigationEndpoint.has("watchEndpoint")) {
|
2020-02-27 16:39:23 +00:00
|
|
|
StringBuilder url = new StringBuilder();
|
|
|
|
url.append("https://www.youtube.com/watch?v=").append(navigationEndpoint.getObject("watchEndpoint").getString("videoId"));
|
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("playlistId"))
|
|
|
|
url.append("&list=").append(navigationEndpoint.getObject("watchEndpoint").getString("playlistId"));
|
|
|
|
if (navigationEndpoint.getObject("watchEndpoint").has("startTimeSeconds"))
|
|
|
|
url.append("&t=").append(navigationEndpoint.getObject("watchEndpoint").getInt("startTimeSeconds"));
|
|
|
|
return url.toString();
|
2020-04-16 14:08:14 +00:00
|
|
|
} else if (navigationEndpoint.has("watchPlaylistEndpoint")) {
|
2020-03-17 10:33:39 +00:00
|
|
|
return "https://www.youtube.com/playlist?list=" +
|
|
|
|
navigationEndpoint.getObject("watchPlaylistEndpoint").getString("playlistId");
|
2020-02-27 16:39:23 +00:00
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2020-04-20 12:27:33 +00:00
|
|
|
/**
|
|
|
|
* Get the text from a JSON object that has either a simpleText or a runs array.
|
|
|
|
* @param textObject JSON object to get the text from
|
|
|
|
* @param html whether to return HTML, by parsing the navigationEndpoint
|
2020-05-01 11:55:15 +00:00
|
|
|
* @return text in the JSON object or {@code null}
|
2020-04-20 12:27:33 +00:00
|
|
|
*/
|
2020-02-29 21:57:25 +00:00
|
|
|
public static String getTextFromObject(JsonObject textObject, boolean html) throws ParsingException {
|
2020-05-11 09:40:24 +00:00
|
|
|
if (isNullOrEmpty(textObject)) return null;
|
2020-05-01 11:55:15 +00:00
|
|
|
|
2020-02-27 16:39:23 +00:00
|
|
|
if (textObject.has("simpleText")) return textObject.getString("simpleText");
|
|
|
|
|
2020-05-01 11:55:15 +00:00
|
|
|
if (textObject.getArray("runs").isEmpty()) return null;
|
|
|
|
|
2020-02-27 16:39:23 +00:00
|
|
|
StringBuilder textBuilder = new StringBuilder();
|
|
|
|
for (Object textPart : textObject.getArray("runs")) {
|
|
|
|
String text = ((JsonObject) textPart).getString("text");
|
2020-04-16 14:08:14 +00:00
|
|
|
if (html && ((JsonObject) textPart).has("navigationEndpoint")) {
|
2020-02-27 16:39:23 +00:00
|
|
|
String url = getUrlFromNavigationEndpoint(((JsonObject) textPart).getObject("navigationEndpoint"));
|
2020-04-15 16:49:58 +00:00
|
|
|
if (!isNullOrEmpty(url)) {
|
2020-02-27 16:39:23 +00:00
|
|
|
textBuilder.append("<a href=\"").append(url).append("\">").append(text).append("</a>");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
textBuilder.append(text);
|
|
|
|
}
|
|
|
|
|
|
|
|
String text = textBuilder.toString();
|
|
|
|
|
|
|
|
if (html) {
|
|
|
|
text = text.replaceAll("\\n", "<br>");
|
|
|
|
text = text.replaceAll(" ", " ");
|
|
|
|
}
|
|
|
|
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
|
2020-02-29 21:57:25 +00:00
|
|
|
public static String getTextFromObject(JsonObject textObject) throws ParsingException {
|
2020-02-27 16:39:23 +00:00
|
|
|
return getTextFromObject(textObject, false);
|
|
|
|
}
|
2020-02-28 08:36:33 +00:00
|
|
|
|
|
|
|
public static String fixThumbnailUrl(String thumbnailUrl) {
|
|
|
|
if (thumbnailUrl.startsWith("//")) {
|
|
|
|
thumbnailUrl = thumbnailUrl.substring(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (thumbnailUrl.startsWith(HTTP)) {
|
|
|
|
thumbnailUrl = Utils.replaceHttpWithHttps(thumbnailUrl);
|
|
|
|
} else if (!thumbnailUrl.startsWith(HTTPS)) {
|
|
|
|
thumbnailUrl = "https://" + thumbnailUrl;
|
|
|
|
}
|
|
|
|
|
|
|
|
return thumbnailUrl;
|
|
|
|
}
|
2020-02-29 15:42:04 +00:00
|
|
|
|
2020-04-03 15:23:18 +00:00
|
|
|
public static String getValidJsonResponseBody(final Response response)
|
2020-04-01 14:01:21 +00:00
|
|
|
throws ParsingException, MalformedURLException {
|
2020-03-01 00:50:31 +00:00
|
|
|
if (response.responseCode() == 404) {
|
|
|
|
throw new ContentNotAvailableException("Not found" +
|
|
|
|
" (\"" + response.responseCode() + " " + response.responseMessage() + "\")");
|
|
|
|
}
|
|
|
|
|
|
|
|
final String responseBody = response.responseBody();
|
|
|
|
if (responseBody.length() < 50) { // ensure to have a valid response
|
2020-02-29 15:42:04 +00:00
|
|
|
throw new ParsingException("JSON response is too short");
|
|
|
|
}
|
|
|
|
|
2020-03-01 00:52:25 +00:00
|
|
|
// Check if the request was redirected to the error page.
|
|
|
|
final URL latestUrl = new URL(response.latestUrl());
|
|
|
|
if (latestUrl.getHost().equalsIgnoreCase("www.youtube.com")) {
|
|
|
|
final String path = latestUrl.getPath();
|
|
|
|
if (path.equalsIgnoreCase("/oops") || path.equalsIgnoreCase("/error")) {
|
|
|
|
throw new ContentNotAvailableException("Content unavailable");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
final String responseContentType = response.getHeader("Content-Type");
|
2020-04-01 14:01:21 +00:00
|
|
|
if (responseContentType != null
|
|
|
|
&& responseContentType.toLowerCase().contains("text/html")) {
|
2020-03-01 00:52:25 +00:00
|
|
|
throw new ParsingException("Got HTML document, expected JSON response" +
|
|
|
|
" (latest url was: \"" + response.latestUrl() + "\")");
|
|
|
|
}
|
|
|
|
|
2020-04-01 14:01:21 +00:00
|
|
|
return responseBody;
|
|
|
|
}
|
|
|
|
|
|
|
|
public static JsonArray getJsonResponse(final String url, final Localization localization)
|
|
|
|
throws IOException, ExtractionException {
|
|
|
|
Map<String, List<String>> headers = new HashMap<>();
|
|
|
|
headers.put("X-YouTube-Client-Name", Collections.singletonList("1"));
|
|
|
|
headers.put("X-YouTube-Client-Version", Collections.singletonList(getClientVersion()));
|
|
|
|
final Response response = getDownloader().get(url, headers, localization);
|
|
|
|
|
2020-04-03 15:23:18 +00:00
|
|
|
final String responseBody = getValidJsonResponseBody(response);
|
2020-04-01 14:01:21 +00:00
|
|
|
|
2020-02-29 15:42:04 +00:00
|
|
|
try {
|
2020-03-01 00:50:31 +00:00
|
|
|
return JsonParser.array().from(responseBody);
|
2020-02-29 15:42:04 +00:00
|
|
|
} catch (JsonParserException e) {
|
|
|
|
throw new ParsingException("Could not parse JSON", e);
|
|
|
|
}
|
|
|
|
}
|
2020-03-01 00:52:25 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Shared alert detection function, multiple endpoints return the error similarly structured.
|
|
|
|
* <p>
|
|
|
|
* Will check if the object has an alert of the type "ERROR".
|
2020-04-01 14:01:21 +00:00
|
|
|
* </p>
|
2020-03-01 00:52:25 +00:00
|
|
|
*
|
|
|
|
* @param initialData the object which will be checked if an alert is present
|
|
|
|
* @throws ContentNotAvailableException if an alert is detected
|
|
|
|
*/
|
2020-05-03 08:28:45 +00:00
|
|
|
public static void defaultAlertsCheck(final JsonObject initialData) throws ParsingException {
|
2020-03-01 00:52:25 +00:00
|
|
|
final JsonArray alerts = initialData.getArray("alerts");
|
2020-04-15 16:49:58 +00:00
|
|
|
if (!isNullOrEmpty(alerts)) {
|
2020-03-01 00:52:25 +00:00
|
|
|
final JsonObject alertRenderer = alerts.getObject(0).getObject("alertRenderer");
|
2020-05-03 08:28:45 +00:00
|
|
|
final String alertText = getTextFromObject(alertRenderer.getObject("text"));
|
|
|
|
final String alertType = alertRenderer.getString("type", EMPTY_STRING);
|
2020-03-01 00:52:25 +00:00
|
|
|
if (alertType.equalsIgnoreCase("ERROR")) {
|
|
|
|
throw new ContentNotAvailableException("Got error: \"" + alertText + "\"");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-03-01 17:47:52 +00:00
|
|
|
}
|