2020-04-10 08:51:05 +00:00
package org.schabi.newpipe.extractor.services.youtube ;
2017-03-01 17:47:52 +00:00
2021-03-03 18:49:26 +00:00
import com.grack.nanojson.JsonArray ;
2021-04-02 19:34:47 +00:00
import com.grack.nanojson.JsonBuilder ;
2021-03-03 18:49:26 +00:00
import com.grack.nanojson.JsonObject ;
import com.grack.nanojson.JsonParser ;
import com.grack.nanojson.JsonParserException ;
import com.grack.nanojson.JsonWriter ;
2020-12-15 16:21:21 +00:00
import org.schabi.newpipe.extractor.MetaInfo ;
2020-04-16 17:28:27 +00:00
import org.schabi.newpipe.extractor.Page ;
2019-04-28 20:03:16 +00:00
import org.schabi.newpipe.extractor.downloader.Response ;
2021-03-22 09:35:05 +00:00
import org.schabi.newpipe.extractor.exceptions.* ;
2021-04-30 17:06:56 +00:00
import org.schabi.newpipe.extractor.localization.ContentCountry ;
2020-02-29 15:55:07 +00:00
import org.schabi.newpipe.extractor.localization.Localization ;
2020-12-15 16:21:21 +00:00
import org.schabi.newpipe.extractor.stream.Description ;
2021-03-04 17:58:51 +00:00
import org.schabi.newpipe.extractor.utils.JsonUtils ;
2020-02-22 22:51:02 +00:00
import org.schabi.newpipe.extractor.utils.Parser ;
2020-02-28 08:36:33 +00:00
import org.schabi.newpipe.extractor.utils.Utils ;
2017-03-01 17:47:52 +00:00
2020-02-29 15:42:04 +00:00
import java.io.IOException ;
2020-02-27 16:39:23 +00:00
import java.io.UnsupportedEncodingException ;
2020-04-01 14:01:21 +00:00
import java.net.MalformedURLException ;
2019-01-13 11:52:07 +00:00
import java.net.URL ;
2020-02-27 16:39:23 +00:00
import java.net.URLDecoder ;
2020-11-03 10:54:46 +00:00
import java.time.LocalDate ;
2020-10-18 03:48:14 +00:00
import java.time.OffsetDateTime ;
2020-11-03 10:54:46 +00:00
import java.time.ZoneOffset ;
2020-10-18 03:48:14 +00:00
import java.time.format.DateTimeParseException ;
2021-04-07 10:25:59 +00:00
import java.util.* ;
2021-01-17 17:48:16 +00:00
import javax.annotation.Nonnull ;
import javax.annotation.Nullable ;
2020-02-26 14:22:59 +00:00
import static org.schabi.newpipe.extractor.NewPipe.getDownloader ;
2021-03-03 18:49:26 +00:00
import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING ;
import static org.schabi.newpipe.extractor.utils.Utils.HTTP ;
import static org.schabi.newpipe.extractor.utils.Utils.HTTPS ;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8 ;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty ;
2019-01-13 11:52:07 +00:00
2017-06-29 18:12:55 +00:00
/ *
2017-03-01 17:47:52 +00:00
* Created by Christian Schabesberger on 02 . 03 . 16 .
*
* Copyright ( C ) Christian Schabesberger 2016 < chris . schabesberger @mailbox.org >
* YoutubeParsingHelper . java is part of NewPipe .
*
* NewPipe is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* NewPipe is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with NewPipe . If not , see < http : //www.gnu.org/licenses/>.
* /
public class YoutubeParsingHelper {
private YoutubeParsingHelper ( ) {
}
2021-05-30 15:23:51 +00:00
public static final String YOUTUBEI_V1_URL = " https://www.youtube.com/youtubei/v1/ " ;
2021-06-26 18:04:55 +00:00
private static final String HARDCODED_CLIENT_VERSION = " 2.20210623.00.00 " ;
2021-04-12 16:24:32 +00:00
private static final String HARDCODED_KEY = " AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8 " ;
2021-06-05 11:51:56 +00:00
private static final String MOBILE_YOUTUBE_KEY = " AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w " ;
2021-06-24 16:39:16 +00:00
private static final String MOBILE_YOUTUBE_CLIENT_VERSION = " 16.23.36 " ;
2020-02-26 14:22:59 +00:00
private static String clientVersion ;
2020-07-26 10:00:56 +00:00
private static String key ;
2021-06-05 11:51:56 +00:00
private static final String [ ] HARDCODED_YOUTUBE_MUSIC_KEY =
2021-06-24 16:39:16 +00:00
{ " AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30 " , " 67 " , " 1.20210621.00.00 " } ;
2021-06-06 13:39:45 +00:00
private static String [ ] youtubeMusicKey ;
2020-03-17 10:33:39 +00:00
2021-04-12 16:24:32 +00:00
private static boolean keyAndVersionExtracted = false ;
2021-04-25 16:54:26 +00:00
private static Boolean areHardcodedClientVersionAndKeyValidValue = null ;
2021-04-12 16:24:32 +00:00
2021-04-08 14:36:55 +00:00
private static Random numberGenerator = new Random ( ) ;
2021-04-07 10:25:59 +00:00
/ * *
* < code > PENDING + < / code > means that the user did not yet submit their choices .
* Therefore , YouTube & Google should not track the user , because they did not give consent .
* The three digits at the end can be random , but are required .
* /
2021-04-09 09:51:54 +00:00
private static final String CONSENT_COOKIE_VALUE = " PENDING+ " ;
2021-04-07 10:25:59 +00:00
/ * *
* Youtube < code > CONSENT < / code > cookie . Should prevent redirect to consent . youtube . com
* /
2021-04-09 09:51:54 +00:00
private static final String CONSENT_COOKIE = " CONSENT= " + CONSENT_COOKIE_VALUE ;
2021-04-07 10:25:59 +00:00
2021-04-25 16:54:26 +00:00
private static final String FEED_BASE_CHANNEL_ID =
" https://www.youtube.com/feeds/videos.xml?channel_id= " ;
2019-12-16 07:35:44 +00:00
private static final String FEED_BASE_USER = " https://www.youtube.com/feeds/videos.xml?user= " ;
2020-12-20 18:54:12 +00:00
private static boolean isGoogleURL ( String url ) {
url = extractCachedUrlIfNeeded ( url ) ;
try {
final URL u = new URL ( url ) ;
final String host = u . getHost ( ) ;
2021-06-24 16:39:16 +00:00
return host . startsWith ( " google. " )
| | host . startsWith ( " m.google. " )
2021-04-26 09:49:47 +00:00
| | host . startsWith ( " www.google. " ) ;
2021-04-08 14:17:59 +00:00
} catch ( final MalformedURLException e ) {
2020-12-20 18:54:12 +00:00
return false ;
2019-10-29 05:00:29 +00:00
}
2020-12-20 18:54:12 +00:00
}
2019-10-29 05:00:29 +00:00
2021-06-11 11:34:23 +00:00
public static boolean isYoutubeURL ( @Nonnull final URL url ) {
2020-12-15 16:21:21 +00:00
final String host = url . getHost ( ) ;
2021-06-24 16:39:16 +00:00
return host . equalsIgnoreCase ( " youtube.com " )
| | host . equalsIgnoreCase ( " www.youtube.com " )
2021-04-25 16:54:26 +00:00
| | host . equalsIgnoreCase ( " m.youtube.com " )
| | host . equalsIgnoreCase ( " music.youtube.com " ) ;
2019-01-13 11:52:07 +00:00
}
2021-06-11 11:34:23 +00:00
public static boolean isYoutubeServiceURL ( @Nonnull final URL url ) {
2020-12-15 16:21:21 +00:00
final String host = url . getHost ( ) ;
2021-04-25 16:54:26 +00:00
return host . equalsIgnoreCase ( " www.youtube-nocookie.com " )
| | host . equalsIgnoreCase ( " youtu.be " ) ;
2019-01-27 00:28:51 +00:00
}
2019-01-13 11:52:07 +00:00
2021-06-11 11:34:23 +00:00
public static boolean isHooktubeURL ( @Nonnull final URL url ) {
2020-12-15 16:21:21 +00:00
final String host = url . getHost ( ) ;
2019-01-27 00:28:51 +00:00
return host . equalsIgnoreCase ( " hooktube.com " ) ;
}
2021-06-11 11:34:23 +00:00
public static boolean isInvidioURL ( @Nonnull final URL url ) {
2020-12-15 16:21:21 +00:00
final String host = url . getHost ( ) ;
2020-07-02 19:31:05 +00:00
return host . equalsIgnoreCase ( " invidio.us " )
| | host . equalsIgnoreCase ( " dev.invidio.us " )
| | host . equalsIgnoreCase ( " www.invidio.us " )
2021-01-22 18:20:22 +00:00
| | host . equalsIgnoreCase ( " redirect.invidious.io " )
2020-07-02 19:31:05 +00:00
| | host . equalsIgnoreCase ( " invidious.snopyta.org " )
| | host . equalsIgnoreCase ( " yewtu.be " )
2020-11-11 15:12:31 +00:00
| | host . equalsIgnoreCase ( " tube.connect.cafe " )
| | host . equalsIgnoreCase ( " invidious.kavin.rocks " )
2021-06-23 12:12:03 +00:00
| | host . equalsIgnoreCase ( " invidious-us.kavin.rocks " )
| | host . equalsIgnoreCase ( " piped.kavin.rocks " )
2020-11-11 15:12:31 +00:00
| | host . equalsIgnoreCase ( " invidious.site " )
| | host . equalsIgnoreCase ( " vid.mint.lgbt " )
| | host . equalsIgnoreCase ( " invidiou.site " )
2021-01-22 18:20:22 +00:00
| | host . equalsIgnoreCase ( " invidious.fdn.fr " )
| | host . equalsIgnoreCase ( " invidious.048596.xyz " )
| | host . equalsIgnoreCase ( " invidious.zee.li " )
| | host . equalsIgnoreCase ( " vid.puffyan.us " )
2021-06-23 12:12:03 +00:00
| | host . equalsIgnoreCase ( " ytprivate.com " )
| | host . equalsIgnoreCase ( " invidious.namazso.eu " )
| | host . equalsIgnoreCase ( " invidious.silkky.cloud " )
| | host . equalsIgnoreCase ( " invidious.exonip.de " )
| | host . equalsIgnoreCase ( " inv.riverside.rocks " )
| | host . equalsIgnoreCase ( " invidious.blamefran.net " )
| | host . equalsIgnoreCase ( " invidious.moomoo.me " )
| | host . equalsIgnoreCase ( " ytb.trom.tf " )
| | host . equalsIgnoreCase ( " yt.cyberhost.uk " )
| | host . equalsIgnoreCase ( " y.com.cm " ) ;
2019-01-13 11:52:07 +00:00
}
2020-06-15 09:27:44 +00:00
/ * *
2020-07-02 19:31:05 +00:00
* Parses the duration string of the video expecting " : " or " . " as separators
2021-02-07 21:12:22 +00:00
*
2020-06-15 09:27:44 +00:00
* @return the duration in seconds
2020-07-02 19:31:05 +00:00
* @throws ParsingException when more than 3 separators are found
2020-06-15 09:27:44 +00:00
* /
2021-06-11 11:34:23 +00:00
public static int parseDurationString ( @Nonnull final String input )
2017-03-01 17:47:52 +00:00
throws ParsingException , NumberFormatException {
2018-09-09 09:53:10 +00:00
// If time separator : is not detected, try . instead
2018-09-09 12:01:39 +00:00
final String [ ] splitInput = input . contains ( " : " )
? input . split ( " : " )
: input . split ( " \\ . " ) ;
2017-03-01 17:47:52 +00:00
String days = " 0 " ;
String hours = " 0 " ;
String minutes = " 0 " ;
2018-09-09 12:01:39 +00:00
final String seconds ;
2017-03-01 17:47:52 +00:00
2017-06-29 18:12:55 +00:00
switch ( splitInput . length ) {
2017-03-01 17:47:52 +00:00
case 4 :
days = splitInput [ 0 ] ;
hours = splitInput [ 1 ] ;
minutes = splitInput [ 2 ] ;
seconds = splitInput [ 3 ] ;
break ;
case 3 :
hours = splitInput [ 0 ] ;
minutes = splitInput [ 1 ] ;
seconds = splitInput [ 2 ] ;
break ;
case 2 :
minutes = splitInput [ 0 ] ;
seconds = splitInput [ 1 ] ;
break ;
case 1 :
seconds = splitInput [ 0 ] ;
break ;
default :
throw new ParsingException ( " Error duration string with unknown format: " + input ) ;
}
2020-05-30 15:20:54 +00:00
2020-06-15 09:27:44 +00:00
return ( ( Integer . parseInt ( Utils . removeNonDigitCharacters ( days ) ) * 24
+ Integer . parseInt ( Utils . removeNonDigitCharacters ( hours ) ) ) * 60
+ Integer . parseInt ( Utils . removeNonDigitCharacters ( minutes ) ) ) * 60
+ Integer . parseInt ( Utils . removeNonDigitCharacters ( seconds ) ) ;
2017-03-01 17:47:52 +00:00
}
2019-04-28 20:03:16 +00:00
2021-06-11 11:34:23 +00:00
@Nonnull
public static String getFeedUrlFrom ( @Nonnull final String channelIdOrUser ) {
2019-12-16 07:35:44 +00:00
if ( channelIdOrUser . startsWith ( " user/ " ) ) {
return FEED_BASE_USER + channelIdOrUser . replace ( " user/ " , " " ) ;
} else if ( channelIdOrUser . startsWith ( " channel/ " ) ) {
return FEED_BASE_CHANNEL_ID + channelIdOrUser . replace ( " channel/ " , " " ) ;
} else {
return FEED_BASE_CHANNEL_ID + channelIdOrUser ;
}
}
2021-04-25 16:54:26 +00:00
public static OffsetDateTime parseDateFrom ( final String textualUploadDate )
throws ParsingException {
2019-04-28 20:03:16 +00:00
try {
2020-10-18 03:48:14 +00:00
return OffsetDateTime . parse ( textualUploadDate ) ;
2021-04-08 14:17:59 +00:00
} catch ( final DateTimeParseException e ) {
2020-11-03 10:54:46 +00:00
try {
return LocalDate . parse ( textualUploadDate ) . atStartOfDay ( ) . atOffset ( ZoneOffset . UTC ) ;
2021-04-08 14:17:59 +00:00
} catch ( final DateTimeParseException e1 ) {
2021-04-25 16:54:26 +00:00
throw new ParsingException ( " Could not parse date: \" " + textualUploadDate + " \" " ,
e1 ) ;
2020-11-03 10:54:46 +00:00
}
2019-04-28 20:03:16 +00:00
}
}
2020-02-22 22:51:02 +00:00
2020-02-02 17:15:47 +00:00
/ * *
2020-04-16 17:28:27 +00:00
* Checks if the given playlist id is a YouTube Mix ( auto - generated playlist )
* Ids from a YouTube Mix start with " RD "
2021-02-07 21:12:22 +00:00
*
2021-06-24 16:39:16 +00:00
* @param playlistId the playlist id
2020-04-16 17:28:27 +00:00
* @return Whether given id belongs to a YouTube Mix
2020-02-02 17:15:47 +00:00
* /
2021-06-11 11:34:23 +00:00
public static boolean isYoutubeMixId ( @Nonnull final String playlistId ) {
2020-03-21 17:48:12 +00:00
return playlistId . startsWith ( " RD " ) & & ! isYoutubeMusicMixId ( playlistId ) ;
}
/ * *
2020-04-16 17:28:27 +00:00
* Checks if the given playlist id is a YouTube Music Mix ( auto - generated playlist )
2020-12-23 20:07:30 +00:00
* Ids from a YouTube Music Mix start with " RDAMVM " or " RDCLAK "
2021-02-07 21:12:22 +00:00
*
2021-04-12 16:24:32 +00:00
* @param playlistId the playlist id
2020-04-16 17:28:27 +00:00
* @return Whether given id belongs to a YouTube Music Mix
2020-03-21 17:48:12 +00:00
* /
2021-06-11 11:34:23 +00:00
public static boolean isYoutubeMusicMixId ( @Nonnull final String playlistId ) {
2020-12-23 20:07:30 +00:00
return playlistId . startsWith ( " RDAMVM " ) | | playlistId . startsWith ( " RDCLAK " ) ;
2020-02-02 13:19:48 +00:00
}
2021-02-07 21:12:22 +00:00
2020-09-26 09:22:24 +00:00
/ * *
* Checks if the given playlist id is a YouTube Channel Mix ( auto - generated playlist )
* Ids from a YouTube channel Mix start with " RDCM "
2021-02-07 21:12:22 +00:00
*
2020-09-26 09:22:24 +00:00
* @return Whether given id belongs to a YouTube Channel Mix
* /
2021-06-11 11:34:23 +00:00
public static boolean isYoutubeChannelMixId ( @Nonnull final String playlistId ) {
2020-09-26 09:22:24 +00:00
return playlistId . startsWith ( " RDCM " ) ;
}
/ * *
* Extracts the video id from the playlist id for Mixes .
2021-02-07 21:12:22 +00:00
*
2020-09-26 09:22:24 +00:00
* @throws ParsingException If the playlistId is a Channel Mix or not a mix .
* /
2021-06-11 11:34:23 +00:00
@Nonnull
public static String extractVideoIdFromMixId ( @Nonnull final String playlistId )
throws ParsingException {
2020-12-25 14:00:31 +00:00
if ( playlistId . startsWith ( " RDMM " ) ) { // My Mix
2020-09-26 09:22:24 +00:00
return playlistId . substring ( 4 ) ;
2020-12-25 14:00:31 +00:00
} else if ( isYoutubeMusicMixId ( playlistId ) ) { // starts with "RDAMVM" or "RDCLAK"
2020-09-26 09:22:24 +00:00
return playlistId . substring ( 6 ) ;
2020-12-25 14:00:31 +00:00
} else if ( isYoutubeChannelMixId ( playlistId ) ) { // starts with "RMCM"
// Channel mix are build with RMCM{channelId}, so videoId can't be determined
2021-04-25 16:54:26 +00:00
throw new ParsingException ( " Video id could not be determined from mix id: "
+ playlistId ) ;
2020-09-26 09:22:24 +00:00
2020-12-25 14:00:31 +00:00
} else if ( isYoutubeMixId ( playlistId ) ) { // normal mix, starts with "RD"
2020-09-26 09:22:24 +00:00
return playlistId . substring ( 2 ) ;
2020-12-25 14:00:31 +00:00
} else { // not a mix
2021-04-25 16:54:26 +00:00
throw new ParsingException ( " Video id could not be determined from mix id: "
+ playlistId ) ;
2020-09-26 09:22:24 +00:00
}
}
2020-02-02 13:19:48 +00:00
2020-12-15 16:21:21 +00:00
public static JsonObject getInitialData ( final String html ) throws ParsingException {
2020-02-22 22:51:02 +00:00
try {
2020-10-16 18:27:40 +00:00
try {
2021-04-25 16:54:26 +00:00
final String initialData = Parser . matchGroup1 (
" window \\ [ \" ytInitialData \" \\ ] \\ s*= \\ s*( \\ {.*? \\ }); " , html ) ;
2020-10-16 18:27:40 +00:00
return JsonParser . object ( ) . from ( initialData ) ;
2021-04-08 14:17:59 +00:00
} catch ( final Parser . RegexException e ) {
2021-04-25 16:54:26 +00:00
final String initialData = Parser . matchGroup1 (
" var \\ s*ytInitialData \\ s*= \\ s*( \\ {.*? \\ }); " , html ) ;
2020-10-16 18:27:40 +00:00
return JsonParser . object ( ) . from ( initialData ) ;
}
2021-04-08 14:17:59 +00:00
} catch ( final JsonParserException | Parser . RegexException e ) {
2020-02-22 22:51:02 +00:00
throw new ParsingException ( " Could not get ytInitialData " , e ) ;
}
}
2021-06-24 16:39:16 +00:00
public static boolean areHardcodedClientVersionAndKeyValid ( )
throws IOException , ExtractionException {
2021-04-25 16:54:26 +00:00
if ( areHardcodedClientVersionAndKeyValidValue ! = null ) {
return areHardcodedClientVersionAndKeyValidValue ;
}
2021-04-12 16:24:32 +00:00
// @formatter:off
final byte [ ] body = JsonWriter . string ( )
. object ( )
. object ( " context " )
. object ( " client " )
2021-06-24 16:39:16 +00:00
. value ( " hl " , " en-GB " )
2021-04-12 16:24:32 +00:00
. value ( " gl " , " GB " )
2021-06-24 16:39:16 +00:00
. value ( " clientName " , " WEB " )
2021-04-12 16:24:32 +00:00
. value ( " clientVersion " , HARDCODED_CLIENT_VERSION )
. end ( )
2021-06-24 16:39:16 +00:00
. object ( " user " )
. value ( " lockedSafetyMode " , false )
. end ( )
. value ( " fetchLiveState " , true )
2021-04-12 16:24:32 +00:00
. end ( )
. end ( ) . done ( ) . getBytes ( UTF_8 ) ;
// @formatter:on
2020-02-29 21:42:43 +00:00
2020-12-15 16:21:21 +00:00
final Map < String , List < String > > headers = new HashMap < > ( ) ;
2020-02-29 21:42:43 +00:00
headers . put ( " X-YouTube-Client-Name " , Collections . singletonList ( " 1 " ) ) ;
2021-04-25 16:54:26 +00:00
headers . put ( " X-YouTube-Client-Version " ,
Collections . singletonList ( HARDCODED_CLIENT_VERSION ) ) ;
2020-02-26 14:22:59 +00:00
2021-04-12 16:24:32 +00:00
// This endpoint is fetched by the YouTube website to get the items of its main menu and is
// pretty lightweight (around 30kB)
2021-04-25 16:54:26 +00:00
final Response response = getDownloader ( ) . post ( YOUTUBEI_V1_URL + " guide?key= "
2021-04-12 16:24:32 +00:00
+ HARDCODED_KEY , headers , body ) ;
final String responseBody = response . responseBody ( ) ;
final int responseCode = response . responseCode ( ) ;
2021-06-24 16:39:16 +00:00
return areHardcodedClientVersionAndKeyValidValue = responseBody . length ( ) > 5000
2021-04-12 16:24:32 +00:00
& & responseCode = = 200 ; // Ensure to have a valid response
2020-02-28 15:35:24 +00:00
}
2020-07-26 11:14:25 +00:00
private static void extractClientVersionAndKey ( ) throws IOException , ExtractionException {
2021-04-25 16:54:26 +00:00
// Don't extract the client version and the innertube key if it has been already extracted
2021-06-06 13:39:45 +00:00
if ( keyAndVersionExtracted ) return ;
2021-04-12 16:24:32 +00:00
// Don't provide a search term in order to have a smaller response
2021-06-26 18:04:55 +00:00
final String url = " https://www.youtube.com/results?search_query=&ucbcb=1 " ;
2021-04-19 17:07:04 +00:00
final Map < String , List < String > > headers = new HashMap < > ( ) ;
addCookieHeader ( headers ) ;
final String html = getDownloader ( ) . get ( url , headers ) . responseBody ( ) ;
2020-07-26 10:00:56 +00:00
final JsonObject initialData = getInitialData ( html ) ;
2021-04-12 16:24:32 +00:00
final JsonArray serviceTrackingParams = initialData . getObject ( " responseContext " )
. getArray ( " serviceTrackingParams " ) ;
2020-02-29 21:42:43 +00:00
String shortClientVersion = null ;
2021-04-12 16:24:32 +00:00
// Try to get version from initial data first
2020-07-26 10:00:56 +00:00
for ( final Object service : serviceTrackingParams ) {
final JsonObject s = ( JsonObject ) service ;
2020-02-29 21:42:43 +00:00
if ( s . getString ( " service " ) . equals ( " CSI " ) ) {
2020-07-26 10:00:56 +00:00
final JsonArray params = s . getArray ( " params " ) ;
for ( final Object param : params ) {
final JsonObject p = ( JsonObject ) param ;
final String key = p . getString ( " key " ) ;
2020-02-29 21:42:43 +00:00
if ( key ! = null & & key . equals ( " cver " ) ) {
2020-07-26 10:00:56 +00:00
clientVersion = p . getString ( " value " ) ;
2020-02-24 18:03:54 +00:00
}
2020-02-29 21:42:43 +00:00
}
} else if ( s . getString ( " service " ) . equals ( " ECATCHER " ) ) {
2021-04-25 16:54:26 +00:00
// Fallback to get a shortened client version which does not contain the last two
// digits
2020-07-26 10:00:56 +00:00
final JsonArray params = s . getArray ( " params " ) ;
for ( final Object param : params ) {
final JsonObject p = ( JsonObject ) param ;
final String key = p . getString ( " key " ) ;
2020-02-29 21:42:43 +00:00
if ( key ! = null & & key . equals ( " client.version " ) ) {
shortClientVersion = p . getString ( " value " ) ;
2020-02-24 18:03:54 +00:00
}
}
}
2020-02-29 21:42:43 +00:00
}
2020-02-24 18:03:54 +00:00
2020-02-29 21:42:43 +00:00
String contextClientVersion ;
2020-07-26 10:00:56 +00:00
final String [ ] patterns = {
2020-02-29 21:42:43 +00:00
" INNERTUBE_CONTEXT_CLIENT_VERSION \" : \" ([0-9 \\ .]+?) \" " ,
" innertube_context_client_version \" : \" ([0-9 \\ .]+?) \" " ,
" client.version=([0-9 \\ .]+) "
} ;
2020-07-26 10:00:56 +00:00
for ( final String pattern : patterns ) {
2020-02-29 21:42:43 +00:00
try {
contextClientVersion = Parser . matchGroup1 ( pattern , html ) ;
2020-04-15 16:49:58 +00:00
if ( ! isNullOrEmpty ( contextClientVersion ) ) {
2020-07-26 10:00:56 +00:00
clientVersion = contextClientVersion ;
2020-07-26 11:14:25 +00:00
break ;
2020-02-26 14:22:59 +00:00
}
2021-04-08 14:17:59 +00:00
} catch ( final Parser . RegexException ignored ) {
2021-02-07 21:12:22 +00:00
}
2020-02-29 21:42:43 +00:00
}
2020-02-24 18:03:54 +00:00
2020-07-26 11:14:25 +00:00
if ( ! isNullOrEmpty ( clientVersion ) & & ! isNullOrEmpty ( shortClientVersion ) ) {
2020-07-26 10:00:56 +00:00
clientVersion = shortClientVersion ;
}
try {
key = Parser . matchGroup1 ( " INNERTUBE_API_KEY \" : \" ([0-9a-zA-Z_-]+?) \" " , html ) ;
2021-06-05 16:17:26 +00:00
} catch ( final Parser . RegexException e1 ) {
2020-07-26 10:00:56 +00:00
try {
key = Parser . matchGroup1 ( " innertubeApiKey \" : \" ([0-9a-zA-Z_-]+?) \" " , html ) ;
2021-06-05 16:17:26 +00:00
} catch ( final Parser . RegexException e2 ) {
throw new ParsingException ( " Could not extract client version and key " ) ;
2021-02-07 21:12:22 +00:00
}
2020-02-29 21:42:43 +00:00
}
2021-06-05 16:17:26 +00:00
keyAndVersionExtracted = true ;
2020-07-26 10:00:56 +00:00
}
/ * *
* Get the client version
* /
public static String getClientVersion ( ) throws IOException , ExtractionException {
if ( ! isNullOrEmpty ( clientVersion ) ) return clientVersion ;
2021-04-25 16:54:26 +00:00
if ( areHardcodedClientVersionAndKeyValid ( ) ) {
return clientVersion = HARDCODED_CLIENT_VERSION ;
}
2020-07-26 10:00:56 +00:00
2021-06-06 13:39:45 +00:00
extractClientVersionAndKey ( ) ;
2020-07-26 10:00:56 +00:00
return clientVersion ;
}
/ * *
* Get the key
* /
public static String getKey ( ) throws IOException , ExtractionException {
if ( ! isNullOrEmpty ( key ) ) return key ;
2021-06-06 13:39:45 +00:00
if ( areHardcodedClientVersionAndKeyValid ( ) ) {
return key = HARDCODED_KEY ;
}
2020-02-24 18:03:54 +00:00
2021-06-06 13:39:45 +00:00
extractClientVersionAndKey ( ) ;
2020-07-26 10:00:56 +00:00
return key ;
2020-02-24 18:03:54 +00:00
}
2020-02-27 16:39:23 +00:00
2021-01-14 19:01:52 +00:00
/ * *
2021-02-17 18:21:39 +00:00
* < p >
* < b > Only use in tests . < / b >
* < / p >
2021-01-14 19:01:52 +00:00
*
2021-02-17 18:21:39 +00:00
* < p >
2021-01-14 19:01:52 +00:00
* Quick - and - dirty solution to reset global state in between test classes .
2021-02-17 18:21:39 +00:00
* < / p >
* < p >
* This is needed for the mocks because in order to reach that state a network request has to
* be made . If the global state is not reset and the RecordingDownloader is used ,
* then only the first test class has that request recorded . Meaning running the other
* tests with mocks will fail , because the mock is missing .
* < / p >
2021-01-14 19:01:52 +00:00
* /
2021-01-17 17:48:16 +00:00
public static void resetClientVersionAndKey ( ) {
2021-01-10 19:24:50 +00:00
clientVersion = null ;
key = null ;
}
2021-04-08 14:36:55 +00:00
/ * *
* < p >
* < b > Only use in tests . < / b >
* < / p >
* /
2021-06-26 18:04:55 +00:00
public static void setNumberGenerator ( final Random random ) {
2021-04-08 14:36:55 +00:00
numberGenerator = random ;
}
2021-06-06 13:39:45 +00:00
public static boolean isHardcodedYoutubeMusicKeyValid ( ) throws IOException ,
2021-04-25 16:54:26 +00:00
ReCaptchaException {
2021-05-09 14:14:37 +00:00
final String url =
" https://music.youtube.com/youtubei/v1/music/get_search_suggestions?alt=json&key= "
2021-06-05 11:51:56 +00:00
+ HARDCODED_YOUTUBE_MUSIC_KEY [ 0 ] ;
2020-03-20 10:05:19 +00:00
// @formatter:off
byte [ ] json = JsonWriter . string ( )
. object ( )
. object ( " context " )
. object ( " client " )
. value ( " clientName " , " WEB_REMIX " )
2021-06-05 11:51:56 +00:00
. value ( " clientVersion " , HARDCODED_YOUTUBE_MUSIC_KEY [ 2 ] )
2021-05-09 14:14:37 +00:00
. value ( " hl " , " en-GB " )
2020-03-20 10:05:19 +00:00
. value ( " gl " , " GB " )
. array ( " experimentIds " ) . end ( )
2021-06-24 16:39:16 +00:00
. value ( " experimentsToken " , EMPTY_STRING )
2020-03-20 10:05:19 +00:00
. object ( " locationInfo " ) . end ( )
. object ( " musicAppInfo " ) . end ( )
. end ( )
. object ( " capabilities " ) . end ( )
. object ( " request " )
. array ( " internalExperimentFlags " ) . end ( )
. object ( " sessionIndex " ) . end ( )
. end ( )
. object ( " activePlayers " ) . end ( )
. object ( " user " )
. value ( " enableSafetyMode " , false )
. end ( )
. end ( )
2021-04-11 15:41:40 +00:00
. value ( " input " , " " )
2021-02-07 21:12:22 +00:00
. end ( ) . done ( ) . getBytes ( UTF_8 ) ;
2020-03-20 10:05:19 +00:00
// @formatter:on
2020-12-15 16:21:21 +00:00
final Map < String , List < String > > headers = new HashMap < > ( ) ;
2021-04-25 16:54:26 +00:00
headers . put ( " X-YouTube-Client-Name " , Collections . singletonList (
2021-06-05 11:51:56 +00:00
HARDCODED_YOUTUBE_MUSIC_KEY [ 1 ] ) ) ;
2021-04-25 16:54:26 +00:00
headers . put ( " X-YouTube-Client-Version " , Collections . singletonList (
2021-06-05 11:51:56 +00:00
HARDCODED_YOUTUBE_MUSIC_KEY [ 2 ] ) ) ;
2020-03-20 10:05:19 +00:00
headers . put ( " Origin " , Collections . singletonList ( " https://music.youtube.com " ) ) ;
2020-03-20 13:14:02 +00:00
headers . put ( " Referer " , Collections . singletonList ( " music.youtube.com " ) ) ;
2020-03-20 10:05:19 +00:00
headers . put ( " Content-Type " , Collections . singletonList ( " application/json " ) ) ;
2021-04-12 16:24:32 +00:00
final Response response = getDownloader ( ) . post ( url , headers , json ) ;
2021-04-25 16:54:26 +00:00
// Ensure to have a valid response
2021-06-06 13:39:45 +00:00
return response . responseBody ( ) . length ( ) > 500 & & response . responseCode ( ) = = 200 ;
2020-03-20 10:05:19 +00:00
}
2021-06-06 13:39:45 +00:00
public static String [ ] getYoutubeMusicKey ( ) throws IOException , ReCaptchaException ,
2021-04-25 16:54:26 +00:00
Parser . RegexException {
2021-06-06 13:39:45 +00:00
if ( youtubeMusicKey ! = null & & youtubeMusicKey . length = = 3 ) return youtubeMusicKey ;
if ( isHardcodedYoutubeMusicKeyValid ( ) ) {
return youtubeMusicKey = HARDCODED_YOUTUBE_MUSIC_KEY ;
2021-04-25 16:54:26 +00:00
}
2020-03-17 10:33:39 +00:00
final String url = " https://music.youtube.com/ " ;
2021-04-19 17:07:04 +00:00
final Map < String , List < String > > headers = new HashMap < > ( ) ;
addCookieHeader ( headers ) ;
final String html = getDownloader ( ) . get ( url , headers ) . responseBody ( ) ;
2020-03-17 10:33:39 +00:00
2020-03-20 10:05:19 +00:00
String key ;
try {
key = Parser . matchGroup1 ( " INNERTUBE_API_KEY \" : \" ([0-9a-zA-Z_-]+?) \" " , html ) ;
2021-04-08 14:17:59 +00:00
} catch ( final Parser . RegexException e ) {
2020-03-20 10:05:19 +00:00
key = Parser . matchGroup1 ( " innertube_api_key \" : \" ([0-9a-zA-Z_-]+?) \" " , html ) ;
}
2021-04-25 16:54:26 +00:00
final String clientName = Parser . matchGroup1 ( " INNERTUBE_CONTEXT_CLIENT_NAME \" :([0-9]+?), " ,
html ) ;
2020-03-20 10:05:19 +00:00
String clientVersion ;
try {
2021-04-25 16:54:26 +00:00
clientVersion = Parser . matchGroup1 (
" INNERTUBE_CONTEXT_CLIENT_VERSION \" : \" ([0-9 \\ .]+?) \" " , html ) ;
2021-04-08 14:17:59 +00:00
} catch ( final Parser . RegexException e ) {
2020-03-20 10:05:19 +00:00
try {
2021-04-25 16:54:26 +00:00
clientVersion = Parser . matchGroup1 (
" INNERTUBE_CLIENT_VERSION \" : \" ([0-9 \\ .]+?) \" " , html ) ;
2021-04-08 14:17:59 +00:00
} catch ( final Parser . RegexException ee ) {
2021-04-25 16:54:26 +00:00
clientVersion = Parser . matchGroup1 (
" innertube_context_client_version \" : \" ([0-9 \\ .]+?) \" " , html ) ;
2020-03-20 10:05:19 +00:00
}
}
2020-03-17 10:33:39 +00:00
2021-06-06 13:39:45 +00:00
return youtubeMusicKey = new String [ ] { key , clientName , clientVersion } ;
2020-03-17 10:33:39 +00:00
}
2020-12-15 16:21:21 +00:00
@Nullable
2021-06-11 11:34:23 +00:00
public static String getUrlFromNavigationEndpoint ( @Nonnull final JsonObject navigationEndpoint )
2021-04-25 16:54:26 +00:00
throws ParsingException {
2020-04-16 14:08:14 +00:00
if ( navigationEndpoint . has ( " urlEndpoint " ) ) {
2020-02-27 16:39:23 +00:00
String internUrl = navigationEndpoint . getObject ( " urlEndpoint " ) . getString ( " url " ) ;
2021-02-12 21:22:11 +00:00
if ( internUrl . startsWith ( " https://www.youtube.com/redirect? " ) ) {
2021-02-13 11:10:41 +00:00
// remove https://www.youtube.com part to fall in the next if block
2021-02-12 21:22:11 +00:00
internUrl = internUrl . substring ( 23 ) ;
}
2020-02-27 16:39:23 +00:00
if ( internUrl . startsWith ( " /redirect? " ) ) {
// q parameter can be the first parameter
internUrl = internUrl . substring ( 10 ) ;
String [ ] params = internUrl . split ( " & " ) ;
for ( String param : params ) {
if ( param . split ( " = " ) [ 0 ] . equals ( " q " ) ) {
String url ;
try {
2021-02-07 21:12:22 +00:00
url = URLDecoder . decode ( param . split ( " = " ) [ 1 ] , UTF_8 ) ;
2021-04-08 14:17:59 +00:00
} catch ( final UnsupportedEncodingException e ) {
2020-02-27 16:39:23 +00:00
return null ;
}
return url ;
}
}
} else if ( internUrl . startsWith ( " http " ) ) {
return internUrl ;
2021-04-25 16:54:26 +00:00
} else if ( internUrl . startsWith ( " /channel " ) | | internUrl . startsWith ( " /user " )
| | internUrl . startsWith ( " /watch " ) ) {
2021-02-12 21:22:11 +00:00
return " https://www.youtube.com " + internUrl ;
2020-02-27 16:39:23 +00:00
}
2020-04-16 14:08:14 +00:00
} else if ( navigationEndpoint . has ( " browseEndpoint " ) ) {
2020-02-29 21:57:25 +00:00
final JsonObject browseEndpoint = navigationEndpoint . getObject ( " browseEndpoint " ) ;
final String canonicalBaseUrl = browseEndpoint . getString ( " canonicalBaseUrl " ) ;
final String browseId = browseEndpoint . getString ( " browseId " ) ;
// All channel ids are prefixed with UC
if ( browseId ! = null & & browseId . startsWith ( " UC " ) ) {
return " https://www.youtube.com/channel/ " + browseId ;
}
2020-04-15 16:49:58 +00:00
if ( ! isNullOrEmpty ( canonicalBaseUrl ) ) {
2020-02-29 21:57:25 +00:00
return " https://www.youtube.com " + canonicalBaseUrl ;
}
2021-04-25 16:54:26 +00:00
throw new ParsingException ( " canonicalBaseUrl is null and browseId is not a channel ( \" "
+ browseEndpoint + " \" ) " ) ;
2020-04-16 14:08:14 +00:00
} else if ( navigationEndpoint . has ( " watchEndpoint " ) ) {
2020-02-27 16:39:23 +00:00
StringBuilder url = new StringBuilder ( ) ;
2021-04-25 16:54:26 +00:00
url . append ( " https://www.youtube.com/watch?v= " ) . append ( navigationEndpoint
. getObject ( " watchEndpoint " ) . getString ( " videoId " ) ) ;
2020-04-16 17:28:27 +00:00
if ( navigationEndpoint . getObject ( " watchEndpoint " ) . has ( " playlistId " ) ) {
2020-12-12 19:40:13 +00:00
url . append ( " &list= " ) . append ( navigationEndpoint . getObject ( " watchEndpoint " )
2020-04-16 17:28:27 +00:00
. getString ( " playlistId " ) ) ;
}
if ( navigationEndpoint . getObject ( " watchEndpoint " ) . has ( " startTimeSeconds " ) ) {
url . append ( " &t= " ) . append ( navigationEndpoint . getObject ( " watchEndpoint " )
. getInt ( " startTimeSeconds " ) ) ;
}
2020-02-27 16:39:23 +00:00
return url . toString ( ) ;
2020-04-16 14:08:14 +00:00
} else if ( navigationEndpoint . has ( " watchPlaylistEndpoint " ) ) {
2020-03-17 10:33:39 +00:00
return " https://www.youtube.com/playlist?list= " +
navigationEndpoint . getObject ( " watchPlaylistEndpoint " ) . getString ( " playlistId " ) ;
2020-02-27 16:39:23 +00:00
}
return null ;
}
2020-04-20 12:27:33 +00:00
/ * *
* Get the text from a JSON object that has either a simpleText or a runs array .
2021-02-07 21:12:22 +00:00
*
2020-04-20 12:27:33 +00:00
* @param textObject JSON object to get the text from
* @param html whether to return HTML , by parsing the navigationEndpoint
2020-05-01 11:55:15 +00:00
* @return text in the JSON object or { @code null }
2020-04-20 12:27:33 +00:00
* /
2020-12-15 16:21:21 +00:00
@Nullable
2021-04-25 16:54:26 +00:00
public static String getTextFromObject ( final JsonObject textObject , final boolean html )
throws ParsingException {
2020-05-11 09:40:24 +00:00
if ( isNullOrEmpty ( textObject ) ) return null ;
2020-05-01 11:55:15 +00:00
2020-02-27 16:39:23 +00:00
if ( textObject . has ( " simpleText " ) ) return textObject . getString ( " simpleText " ) ;
2020-05-01 11:55:15 +00:00
if ( textObject . getArray ( " runs " ) . isEmpty ( ) ) return null ;
2020-12-15 16:21:21 +00:00
final StringBuilder textBuilder = new StringBuilder ( ) ;
for ( final Object textPart : textObject . getArray ( " runs " ) ) {
2020-02-27 16:39:23 +00:00
String text = ( ( JsonObject ) textPart ) . getString ( " text " ) ;
2020-04-16 14:08:14 +00:00
if ( html & & ( ( JsonObject ) textPart ) . has ( " navigationEndpoint " ) ) {
2021-04-25 16:54:26 +00:00
String url = getUrlFromNavigationEndpoint ( ( ( JsonObject ) textPart )
. getObject ( " navigationEndpoint " ) ) ;
2020-04-15 16:49:58 +00:00
if ( ! isNullOrEmpty ( url ) ) {
2021-04-25 16:54:26 +00:00
textBuilder . append ( " <a href= \" " ) . append ( url ) . append ( " \" > " ) . append ( text )
. append ( " </a> " ) ;
2020-02-27 16:39:23 +00:00
continue ;
}
}
textBuilder . append ( text ) ;
}
String text = textBuilder . toString ( ) ;
if ( html ) {
text = text . replaceAll ( " \\ n " , " <br> " ) ;
text = text . replaceAll ( " " , " " ) ;
}
return text ;
}
2020-12-15 16:21:21 +00:00
@Nullable
2021-05-29 12:43:26 +00:00
public static String getTextFromObject ( final JsonObject textObject ) throws ParsingException {
2020-02-27 16:39:23 +00:00
return getTextFromObject ( textObject , false ) ;
}
2020-02-28 08:36:33 +00:00
2021-03-24 08:04:43 +00:00
@Nullable
2021-06-11 11:34:23 +00:00
public static String getTextAtKey ( @Nonnull final JsonObject jsonObject , final String key )
2021-03-24 08:04:43 +00:00
throws ParsingException {
if ( jsonObject . isString ( key ) ) {
return jsonObject . getString ( key ) ;
} else {
return getTextFromObject ( jsonObject . getObject ( key ) ) ;
}
}
2021-06-11 11:34:23 +00:00
public static String fixThumbnailUrl ( @Nonnull String thumbnailUrl ) {
2020-02-28 08:36:33 +00:00
if ( thumbnailUrl . startsWith ( " // " ) ) {
thumbnailUrl = thumbnailUrl . substring ( 2 ) ;
}
if ( thumbnailUrl . startsWith ( HTTP ) ) {
thumbnailUrl = Utils . replaceHttpWithHttps ( thumbnailUrl ) ;
} else if ( ! thumbnailUrl . startsWith ( HTTPS ) ) {
thumbnailUrl = " https:// " + thumbnailUrl ;
}
return thumbnailUrl ;
}
2020-02-29 15:42:04 +00:00
2021-06-11 11:34:23 +00:00
@Nonnull
public static String getValidJsonResponseBody ( @Nonnull final Response response )
2020-04-01 14:01:21 +00:00
throws ParsingException , MalformedURLException {
2020-03-01 00:50:31 +00:00
if ( response . responseCode ( ) = = 404 ) {
2020-04-16 17:28:27 +00:00
throw new ContentNotAvailableException ( " Not found "
+ " ( \" " + response . responseCode ( ) + " " + response . responseMessage ( ) + " \" ) " ) ;
2020-03-01 00:50:31 +00:00
}
final String responseBody = response . responseBody ( ) ;
2021-04-12 16:24:32 +00:00
if ( responseBody . length ( ) < 50 ) { // Ensure to have a valid response
2020-02-29 15:42:04 +00:00
throw new ParsingException ( " JSON response is too short " ) ;
}
2020-03-01 00:52:25 +00:00
// Check if the request was redirected to the error page.
final URL latestUrl = new URL ( response . latestUrl ( ) ) ;
if ( latestUrl . getHost ( ) . equalsIgnoreCase ( " www.youtube.com " ) ) {
final String path = latestUrl . getPath ( ) ;
if ( path . equalsIgnoreCase ( " /oops " ) | | path . equalsIgnoreCase ( " /error " ) ) {
throw new ContentNotAvailableException ( " Content unavailable " ) ;
}
}
final String responseContentType = response . getHeader ( " Content-Type " ) ;
2020-04-01 14:01:21 +00:00
if ( responseContentType ! = null
& & responseContentType . toLowerCase ( ) . contains ( " text/html " ) ) {
2020-04-16 17:28:27 +00:00
throw new ParsingException ( " Got HTML document, expected JSON response "
+ " (latest url was: \" " + response . latestUrl ( ) + " \" ) " ) ;
2020-03-01 00:52:25 +00:00
}
2020-04-01 14:01:21 +00:00
return responseBody ;
}
2020-04-16 17:28:27 +00:00
public static Response getResponse ( final String url , final Localization localization )
throws IOException , ExtractionException {
final Map < String , List < String > > headers = new HashMap < > ( ) ;
2021-04-07 10:25:59 +00:00
addYouTubeHeaders ( headers ) ;
2020-04-16 17:28:27 +00:00
final Response response = getDownloader ( ) . get ( url , headers , localization ) ;
getValidJsonResponseBody ( response ) ;
return response ;
}
2021-04-08 14:17:59 +00:00
public static JsonObject getJsonPostResponse ( final String endpoint ,
2021-04-11 15:01:43 +00:00
final byte [ ] body ,
final Localization localization )
2021-04-08 14:17:59 +00:00
throws IOException , ExtractionException {
2021-04-12 16:24:32 +00:00
final Map < String , List < String > > headers = new HashMap < > ( ) ;
2021-04-19 17:07:04 +00:00
addClientInfoHeaders ( headers ) ;
2021-06-26 18:04:55 +00:00
headers . put ( " Content-Type " , Collections . singletonList ( " application/json " ) ) ;
2021-04-08 14:17:59 +00:00
2021-04-25 16:54:26 +00:00
final Response response = getDownloader ( ) . post ( YOUTUBEI_V1_URL + endpoint + " ?key= "
+ getKey ( ) , headers , body , localization ) ;
2021-04-08 14:17:59 +00:00
return JsonUtils . toJsonObject ( getValidJsonResponseBody ( response ) ) ;
}
2021-05-29 12:43:26 +00:00
public static JsonObject getJsonMobilePostResponse ( final String endpoint ,
final byte [ ] body ,
2021-06-11 11:34:23 +00:00
@Nonnull final ContentCountry
contentCountry ,
2021-05-29 12:43:26 +00:00
final Localization localization )
throws IOException , ExtractionException {
final Map < String , List < String > > headers = new HashMap < > ( ) ;
headers . put ( " Content-Type " , Collections . singletonList ( " application/json " ) ) ;
// Spoofing an Android 11 device with the hardcoded version of the Android app
headers . put ( " User-Agent " , Collections . singletonList ( " com.google.android.youtube/ "
2021-06-05 11:51:56 +00:00
+ MOBILE_YOUTUBE_CLIENT_VERSION + " Linux; U; Android 11; "
2021-05-29 12:43:26 +00:00
+ contentCountry . getCountryCode ( ) + " ) gzip " ) ) ;
headers . put ( " x-goog-api-format-version " , Collections . singletonList ( " 2 " ) ) ;
final Response response = getDownloader ( ) . post (
" https://youtubei.googleapis.com/youtubei/v1/ " + endpoint + " ?key= "
2021-06-05 11:51:56 +00:00
+ MOBILE_YOUTUBE_KEY , headers , body , localization ) ;
2021-05-29 12:43:26 +00:00
return JsonUtils . toJsonObject ( getValidJsonResponseBody ( response ) ) ;
}
2020-04-01 14:01:21 +00:00
public static JsonArray getJsonResponse ( final String url , final Localization localization )
throws IOException , ExtractionException {
Map < String , List < String > > headers = new HashMap < > ( ) ;
2021-04-07 10:25:59 +00:00
addYouTubeHeaders ( headers ) ;
2020-04-01 14:01:21 +00:00
final Response response = getDownloader ( ) . get ( url , headers , localization ) ;
2021-03-04 17:58:51 +00:00
return JsonUtils . toJsonArray ( getValidJsonResponseBody ( response ) ) ;
2020-04-16 17:28:27 +00:00
}
2021-06-11 11:34:23 +00:00
public static JsonArray getJsonResponse ( @Nonnull final Page page ,
final Localization localization )
2020-04-16 17:28:27 +00:00
throws IOException , ExtractionException {
final Map < String , List < String > > headers = new HashMap < > ( ) ;
2021-04-07 10:25:59 +00:00
addYouTubeHeaders ( headers ) ;
2020-04-16 17:28:27 +00:00
final Response response = getDownloader ( ) . get ( page . getUrl ( ) , headers , localization ) ;
2021-03-04 17:58:51 +00:00
return JsonUtils . toJsonArray ( getValidJsonResponseBody ( response ) ) ;
2021-03-03 18:49:26 +00:00
}
2021-06-11 11:34:23 +00:00
@Nonnull
public static JsonBuilder < JsonObject > prepareJsonBuilder ( @Nonnull final Localization
localization ,
@Nonnull final ContentCountry
contentCountry )
2021-04-02 19:34:47 +00:00
throws IOException , ExtractionException {
// @formatter:off
return JsonObject . builder ( )
. object ( " context " )
. object ( " client " )
2021-04-30 17:06:56 +00:00
. value ( " hl " , localization . getLocalizationCode ( ) )
. value ( " gl " , contentCountry . getCountryCode ( ) )
2021-06-24 16:39:16 +00:00
. value ( " clientName " , " WEB " )
. value ( " clientVersion " , getClientVersion ( ) )
. end ( )
. object ( " user " )
2021-06-26 18:04:55 +00:00
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
2021-06-24 16:39:16 +00:00
. value ( " lockedSafetyMode " , false )
2021-04-02 19:34:47 +00:00
. end ( )
. end ( ) ;
2021-05-29 12:43:26 +00:00
// @formatter:on
}
2021-06-11 11:34:23 +00:00
@Nonnull
public static JsonBuilder < JsonObject > prepareMobileJsonBuilder ( @Nonnull final Localization
localization ,
@Nonnull final ContentCountry
2021-05-29 12:43:26 +00:00
contentCountry )
throws IOException , ExtractionException {
// @formatter:off
return JsonObject . builder ( )
. object ( " context " )
. object ( " client " )
. value ( " clientName " , " ANDROID " )
2021-06-05 11:51:56 +00:00
. value ( " clientVersion " , MOBILE_YOUTUBE_CLIENT_VERSION )
2021-05-29 12:43:26 +00:00
. value ( " hl " , localization . getLocalizationCode ( ) )
. value ( " gl " , contentCountry . getCountryCode ( ) )
. end ( )
2021-06-24 16:39:16 +00:00
. object ( " user " )
// TO DO: provide a way to enable restricted mode with:
// .value("enableSafetyMode", boolean)
. value ( " lockedSafetyMode " , false )
. end ( )
2021-05-29 12:43:26 +00:00
. end ( ) ;
2021-04-02 19:34:47 +00:00
// @formatter:on
}
2021-04-07 10:25:59 +00:00
/ * *
* Add required headers and cookies to an existing headers Map .
* @see # addClientInfoHeaders ( Map )
* @see # addCookieHeader ( Map )
* /
public static void addYouTubeHeaders ( final Map < String , List < String > > headers )
throws IOException , ExtractionException {
addClientInfoHeaders ( headers ) ;
addCookieHeader ( headers ) ;
}
/ * *
2021-04-19 17:07:04 +00:00
* Add the < code > X - YouTube - Client - Name < / code > , < code > X - YouTube - Client - Version < / code > ,
* < code > Origin < / code > , and < code > Referer < / code > headers .
2021-04-07 10:25:59 +00:00
* @param headers The headers which should be completed
* /
2021-06-11 11:34:23 +00:00
public static void addClientInfoHeaders ( @Nonnull final Map < String , List < String > > headers )
2021-04-07 10:25:59 +00:00
throws IOException , ExtractionException {
2021-06-24 16:39:16 +00:00
headers . computeIfAbsent ( " Origin " , k - > Collections . singletonList (
" https://www.youtube.com " ) ) ;
headers . computeIfAbsent ( " Referer " , k - > Collections . singletonList (
" https://www.youtube.com " ) ) ;
headers . computeIfAbsent ( " X-YouTube-Client-Name " , k - > Collections . singletonList ( " 1 " ) ) ;
2021-04-07 10:25:59 +00:00
if ( headers . get ( " X-YouTube-Client-Version " ) = = null ) {
headers . put ( " X-YouTube-Client-Version " , Collections . singletonList ( getClientVersion ( ) ) ) ;
}
}
/ * *
* Add the < code > CONSENT < / code > cookie to prevent redirect to < code > consent . youtube . com < / code >
* @see # CONSENT_COOKIE
* @param headers the headers which should be completed
* /
2021-06-26 18:04:55 +00:00
public static void addCookieHeader ( @Nonnull final Map < String , List < String > > headers ) {
2021-04-07 10:25:59 +00:00
if ( headers . get ( " Cookie " ) = = null ) {
2021-06-11 11:01:21 +00:00
headers . put ( " Cookie " , Arrays . asList ( generateConsentCookie ( ) ) ) ;
2021-04-07 10:25:59 +00:00
} else {
2021-04-08 14:36:55 +00:00
headers . get ( " Cookie " ) . add ( generateConsentCookie ( ) ) ;
2021-04-07 10:25:59 +00:00
}
}
2021-06-11 11:34:23 +00:00
@Nonnull
2021-04-08 14:36:55 +00:00
public static String generateConsentCookie ( ) {
2021-07-03 11:14:22 +00:00
final int statusCode = 100 + numberGenerator . nextInt ( 900 ) ;
return CONSENT_COOKIE + statusCode ;
2021-04-08 14:36:55 +00:00
}
2021-06-11 11:34:23 +00:00
public static String extractCookieValue ( final String cookieName ,
@Nonnull final Response response ) {
2021-04-07 10:25:59 +00:00
final List < String > cookies = response . responseHeaders ( ) . get ( " set-cookie " ) ;
int startIndex ;
String result = " " ;
for ( final String cookie : cookies ) {
startIndex = cookie . indexOf ( cookieName ) ;
if ( startIndex ! = - 1 ) {
result = cookie . substring ( startIndex + cookieName . length ( ) + " = " . length ( ) ,
cookie . indexOf ( " ; " , startIndex ) ) ;
}
}
return result ;
}
2020-03-01 00:52:25 +00:00
/ * *
* Shared alert detection function , multiple endpoints return the error similarly structured .
* < p >
* Will check if the object has an alert of the type " ERROR " .
2020-04-01 14:01:21 +00:00
* < / p >
2020-03-01 00:52:25 +00:00
*
* @param initialData the object which will be checked if an alert is present
* @throws ContentNotAvailableException if an alert is detected
* /
2021-06-11 11:34:23 +00:00
public static void defaultAlertsCheck ( @Nonnull final JsonObject initialData )
throws ParsingException {
2020-03-01 00:52:25 +00:00
final JsonArray alerts = initialData . getArray ( " alerts " ) ;
2020-04-15 16:49:58 +00:00
if ( ! isNullOrEmpty ( alerts ) ) {
2020-03-01 00:52:25 +00:00
final JsonObject alertRenderer = alerts . getObject ( 0 ) . getObject ( " alertRenderer " ) ;
2020-05-03 08:28:45 +00:00
final String alertText = getTextFromObject ( alertRenderer . getObject ( " text " ) ) ;
final String alertType = alertRenderer . getString ( " type " , EMPTY_STRING ) ;
2020-03-01 00:52:25 +00:00
if ( alertType . equalsIgnoreCase ( " ERROR " ) ) {
2021-03-22 09:35:05 +00:00
if ( alertText ! = null & & alertText . contains ( " This account has been terminated " ) ) {
2021-03-22 23:15:21 +00:00
if ( alertText . contains ( " violation " ) | | alertText . contains ( " violating " )
| | alertText . contains ( " infringement " ) ) {
2021-06-11 11:34:23 +00:00
// Possible error messages:
2021-03-22 09:35:05 +00:00
// "This account has been terminated for a violation of YouTube's Terms of Service."
2021-03-30 08:14:33 +00:00
// "This account has been terminated due to multiple or severe violations of YouTube's policy prohibiting hate speech."
// "This account has been terminated due to multiple or severe violations of YouTube's policy prohibiting content designed to harass, bully or threaten."
// "This account has been terminated due to multiple or severe violations of YouTube's policy against spam, deceptive practices and misleading content or other Terms of Service violations."
// "This account has been terminated due to multiple or severe violations of YouTube's policy on nudity or sexual content."
2021-03-22 23:15:21 +00:00
// "This account has been terminated for violating YouTube's Community Guidelines."
// "This account has been terminated because we received multiple third-party claims of copyright infringement regarding material that the user posted."
2021-03-30 08:14:33 +00:00
// "This account has been terminated because it is linked to an account that received multiple third-party claims of copyright infringement."
2021-03-22 09:35:05 +00:00
throw new AccountTerminatedException ( alertText , AccountTerminatedException . Reason . VIOLATION ) ;
} else {
throw new AccountTerminatedException ( alertText ) ;
}
}
2020-03-01 00:52:25 +00:00
throw new ContentNotAvailableException ( " Got error: \" " + alertText + " \" " ) ;
}
}
}
2020-12-20 18:54:12 +00:00
@Nonnull
2021-06-11 11:34:23 +00:00
public static List < MetaInfo > getMetaInfo ( @Nonnull final JsonArray contents )
throws ParsingException {
2020-12-20 18:54:12 +00:00
final List < MetaInfo > metaInfo = new ArrayList < > ( ) ;
for ( final Object content : contents ) {
final JsonObject resultObject = ( JsonObject ) content ;
if ( resultObject . has ( " itemSectionRenderer " ) ) {
for ( final Object sectionContentObject :
resultObject . getObject ( " itemSectionRenderer " ) . getArray ( " contents " ) ) {
final JsonObject sectionContent = ( JsonObject ) sectionContentObject ;
if ( sectionContent . has ( " infoPanelContentRenderer " ) ) {
2021-04-25 16:54:26 +00:00
metaInfo . add ( getInfoPanelContent ( sectionContent
. getObject ( " infoPanelContentRenderer " ) ) ) ;
2020-12-20 18:54:12 +00:00
}
if ( sectionContent . has ( " clarificationRenderer " ) ) {
2021-04-25 16:54:26 +00:00
metaInfo . add ( getClarificationRendererContent ( sectionContent
. getObject ( " clarificationRenderer " )
2020-12-20 18:54:12 +00:00
) ) ;
}
}
}
}
return metaInfo ;
}
@Nonnull
2021-06-11 11:34:23 +00:00
private static MetaInfo getInfoPanelContent ( @Nonnull final JsonObject infoPanelContentRenderer )
2020-12-20 18:54:12 +00:00
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo ( ) ;
final StringBuilder sb = new StringBuilder ( ) ;
for ( final Object paragraph : infoPanelContentRenderer . getArray ( " paragraphs " ) ) {
if ( sb . length ( ) ! = 0 ) {
sb . append ( " <br> " ) ;
}
sb . append ( YoutubeParsingHelper . getTextFromObject ( ( JsonObject ) paragraph ) ) ;
}
metaInfo . setContent ( new Description ( sb . toString ( ) , Description . HTML ) ) ;
if ( infoPanelContentRenderer . has ( " sourceEndpoint " ) ) {
final String metaInfoLinkUrl = YoutubeParsingHelper . getUrlFromNavigationEndpoint (
infoPanelContentRenderer . getObject ( " sourceEndpoint " ) ) ;
try {
2021-04-25 16:54:26 +00:00
metaInfo . addUrl ( new URL ( Objects . requireNonNull ( extractCachedUrlIfNeeded (
metaInfoLinkUrl ) ) ) ) ;
2020-12-20 18:54:12 +00:00
} catch ( final NullPointerException | MalformedURLException e ) {
throw new ParsingException ( " Could not get metadata info URL " , e ) ;
}
final String metaInfoLinkText = YoutubeParsingHelper . getTextFromObject (
infoPanelContentRenderer . getObject ( " inlineSource " ) ) ;
if ( isNullOrEmpty ( metaInfoLinkText ) ) {
throw new ParsingException ( " Could not get metadata info link text. " ) ;
}
metaInfo . addUrlText ( metaInfoLinkText ) ;
}
return metaInfo ;
}
@Nonnull
2021-06-11 11:34:23 +00:00
private static MetaInfo getClarificationRendererContent ( @Nonnull final JsonObject clarificationRenderer )
2020-12-20 18:54:12 +00:00
throws ParsingException {
final MetaInfo metaInfo = new MetaInfo ( ) ;
2021-04-25 16:54:26 +00:00
final String title = YoutubeParsingHelper . getTextFromObject ( clarificationRenderer
. getObject ( " contentTitle " ) ) ;
final String text = YoutubeParsingHelper . getTextFromObject ( clarificationRenderer
. getObject ( " text " ) ) ;
2021-02-07 21:12:22 +00:00
if ( title = = null | | text = = null ) {
2020-12-20 18:54:12 +00:00
throw new ParsingException ( " Could not extract clarification renderer content " ) ;
}
metaInfo . setTitle ( title ) ;
metaInfo . setContent ( new Description ( text , Description . PLAIN_TEXT ) ) ;
if ( clarificationRenderer . has ( " actionButton " ) ) {
final JsonObject actionButton = clarificationRenderer . getObject ( " actionButton " )
. getObject ( " buttonRenderer " ) ;
try {
2021-04-25 16:54:26 +00:00
final String url = YoutubeParsingHelper . getUrlFromNavigationEndpoint ( actionButton
. getObject ( " command " ) ) ;
2020-12-20 18:54:12 +00:00
metaInfo . addUrl ( new URL ( Objects . requireNonNull ( extractCachedUrlIfNeeded ( url ) ) ) ) ;
} catch ( final NullPointerException | MalformedURLException e ) {
throw new ParsingException ( " Could not get metadata info URL " , e ) ;
}
final String metaInfoLinkText = YoutubeParsingHelper . getTextFromObject (
actionButton . getObject ( " text " ) ) ;
if ( isNullOrEmpty ( metaInfoLinkText ) ) {
throw new ParsingException ( " Could not get metadata info link text. " ) ;
}
metaInfo . addUrlText ( metaInfoLinkText ) ;
}
2021-04-25 16:54:26 +00:00
if ( clarificationRenderer . has ( " secondaryEndpoint " ) & & clarificationRenderer
. has ( " secondarySource " ) ) {
final String url = getUrlFromNavigationEndpoint ( clarificationRenderer
. getObject ( " secondaryEndpoint " ) ) ;
2021-06-11 11:34:23 +00:00
// Ignore Google URLs, because those point to a Google search about "Covid-19"
2020-12-20 18:54:12 +00:00
if ( url ! = null & & ! isGoogleURL ( url ) ) {
try {
metaInfo . addUrl ( new URL ( url ) ) ;
2021-04-25 16:54:26 +00:00
final String description = getTextFromObject ( clarificationRenderer
. getObject ( " secondarySource " ) ) ;
2020-12-20 18:54:12 +00:00
metaInfo . addUrlText ( description = = null ? url : description ) ;
2021-04-08 14:17:59 +00:00
} catch ( final MalformedURLException e ) {
2020-12-20 18:54:12 +00:00
throw new ParsingException ( " Could not get metadata info secondary URL " , e ) ;
}
}
}
return metaInfo ;
}
/ * *
* Sometimes , YouTube provides URLs which use Google ' s cache . They look like
* { @code https : //webcache.googleusercontent.com/search?q=cache:CACHED_URL}
2021-02-07 21:12:22 +00:00
*
2020-12-20 18:54:12 +00:00
* @param url the URL which might refer to the Google ' s webcache
* @return the URL which is referring to the original site
* /
public static String extractCachedUrlIfNeeded ( final String url ) {
if ( url = = null ) {
return null ;
}
if ( url . contains ( " webcache.googleusercontent.com " ) ) {
return url . split ( " cache: " ) [ 1 ] ;
}
return url ;
}
2021-01-22 00:44:58 +00:00
public static boolean isVerified ( final JsonArray badges ) {
if ( Utils . isNullOrEmpty ( badges ) ) {
return false ;
}
for ( Object badge : badges ) {
final String style = ( ( JsonObject ) badge ) . getObject ( " metadataBadgeRenderer " )
. getString ( " style " ) ;
if ( style ! = null & & ( style . equals ( " BADGE_STYLE_TYPE_VERIFIED " )
| | style . equals ( " BADGE_STYLE_TYPE_VERIFIED_ARTIST " ) ) ) {
return true ;
}
}
return false ;
}
2021-03-05 12:33:25 +00:00
2021-06-11 11:34:23 +00:00
@Nonnull
public static String unescapeDocument ( @Nonnull final String doc ) {
2021-03-05 12:33:25 +00:00
return doc
. replaceAll ( " \\ \\ x22 " , " \" " )
. replaceAll ( " \\ \\ x7b " , " { " )
. replaceAll ( " \\ \\ x7d " , " } " )
. replaceAll ( " \\ \\ x5b " , " [ " )
. replaceAll ( " \\ \\ x5d " , " ] " ) ;
}
2017-03-01 17:47:52 +00:00
}