[YouTube] Improve WEB client version and API key HTML extraction
Common code in WEB client version HTML extraction has been deduplicated, usage of the Java 8 Stream API has been made and initial data fallback has been used as a last resort. This means that the client version extraction from regexes will be used before this fallback, as it doesn't contain the full client version. This can be used as a way to fingerprint the extractor, even if it seems to be not the case.
This commit is contained in:
		
							parent
							
								
									6a885ef5ab
								
							
						
					
					
						commit
						d7e678aca2
					
				
					 1 changed files with 61 additions and 39 deletions
				
			
		|  | @ -73,6 +73,7 @@ import java.util.Objects; | ||||||
| import java.util.Optional; | import java.util.Optional; | ||||||
| import java.util.Random; | import java.util.Random; | ||||||
| import java.util.regex.Pattern; | import java.util.regex.Pattern; | ||||||
|  | import java.util.stream.Stream; | ||||||
| 
 | 
 | ||||||
| import javax.annotation.Nonnull; | import javax.annotation.Nonnull; | ||||||
| import javax.annotation.Nullable; | import javax.annotation.Nullable; | ||||||
|  | @ -640,59 +641,79 @@ public final class YoutubeParsingHelper { | ||||||
|         if (keyAndVersionExtracted) { |         if (keyAndVersionExtracted) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|         // Don't provide a search term in order to have a smaller response |         // Don't provide a search term in order to have a smaller response | ||||||
|         final String url = "https://www.youtube.com/results?search_query=&ucbcb=1"; |         final String url = "https://www.youtube.com/results?search_query=&ucbcb=1"; | ||||||
|         final String html = getDownloader().get(url, getCookieHeader()).responseBody(); |         final String html = getDownloader().get(url, getCookieHeader()).responseBody(); | ||||||
|         final JsonObject initialData = getInitialData(html); |         final JsonObject initialData = getInitialData(html); | ||||||
|         final JsonArray serviceTrackingParams = initialData.getObject("responseContext") |         final JsonArray serviceTrackingParams = initialData.getObject("responseContext") | ||||||
|                 .getArray("serviceTrackingParams"); |                 .getArray("serviceTrackingParams"); | ||||||
|         String shortClientVersion = null; |  | ||||||
| 
 | 
 | ||||||
|         // Try to get version from initial data first |         // Try to get version from initial data first | ||||||
|         for (final Object service : serviceTrackingParams) { |         final Stream<JsonObject> serviceTrackingParamsStream = serviceTrackingParams.stream() | ||||||
|             final JsonObject s = (JsonObject) service; |                 .filter(JsonObject.class::isInstance) | ||||||
|             if (s.getString("service").equals("CSI")) { |                 .map(JsonObject.class::cast); | ||||||
|                 final JsonArray params = s.getArray("params"); |  | ||||||
|                 for (final Object param : params) { |  | ||||||
|                     final JsonObject p = (JsonObject) param; |  | ||||||
|                     final String paramKey = p.getString("key"); |  | ||||||
|                     if (paramKey != null && paramKey.equals("cver")) { |  | ||||||
|                         clientVersion = p.getString("value"); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } else if (s.getString("service").equals("ECATCHER")) { |  | ||||||
|                 // Fallback to get a shortened client version which does not contain the last two |  | ||||||
|                 // digits |  | ||||||
|                 final JsonArray params = s.getArray("params"); |  | ||||||
|                 for (final Object param : params) { |  | ||||||
|                     final JsonObject p = (JsonObject) param; |  | ||||||
|                     final String paramKey = p.getString("key"); |  | ||||||
|                     if (paramKey != null && paramKey.equals("client.version")) { |  | ||||||
|                         shortClientVersion = p.getString("value"); |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
|  |         clientVersion = getClientVersionFromServiceTrackingParam( | ||||||
|  |                 serviceTrackingParamsStream, "CSI", "cver"); | ||||||
|  | 
 | ||||||
|  |         if (clientVersion == null) { | ||||||
|             try { |             try { | ||||||
|                 clientVersion = getStringResultFromRegexArray(html, |                 clientVersion = getStringResultFromRegexArray(html, | ||||||
|                         INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1); |                         INNERTUBE_CONTEXT_CLIENT_VERSION_REGEXES, 1); | ||||||
|             } catch (final Parser.RegexException ignored) { |             } catch (final Parser.RegexException ignored) { | ||||||
|             } |             } | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) { |         // Fallback to get a shortened client version which does not contain the last two | ||||||
|             clientVersion = shortClientVersion; |         // digits | ||||||
|  |         if (isNullOrEmpty(clientVersion)) { | ||||||
|  |             clientVersion = getClientVersionFromServiceTrackingParam( | ||||||
|  |                     serviceTrackingParamsStream, "ECATCHER", "client.version"); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         try { |         try { | ||||||
|             key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1); |             key = getStringResultFromRegexArray(html, INNERTUBE_API_KEY_REGEXES, 1); | ||||||
|         } catch (final Parser.RegexException e) { |         } catch (final Parser.RegexException ignored) { | ||||||
|             throw new ParsingException("Could not extract YouTube WEB InnerTube client version " |  | ||||||
|                     + "and API key from HTML search results page", e); |  | ||||||
|         } |         } | ||||||
|  | 
 | ||||||
|  |         if (isNullOrEmpty(key)) { | ||||||
|  |             throw new ParsingException( | ||||||
|  |                     // CHECKSTYLE:OFF | ||||||
|  |                     "Could not extract YouTube WEB InnerTube API key from HTML search results page"); | ||||||
|  |                     // CHECKSTYLE:ON | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         if (clientVersion == null) { | ||||||
|  |             throw new ParsingException( | ||||||
|  |                     // CHECKSTYLE:OFF | ||||||
|  |                     "Could not extract YouTube WEB InnerTube client version from HTML search results page"); | ||||||
|  |                     // CHECKSTYLE:ON | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         keyAndVersionExtracted = true; |         keyAndVersionExtracted = true; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     @Nullable | ||||||
|  |     private static String getClientVersionFromServiceTrackingParam( | ||||||
|  |             @Nonnull final Stream<JsonObject> serviceTrackingParamsStream, | ||||||
|  |             @Nonnull final String serviceName, | ||||||
|  |             @Nonnull final String clientVersionKey) { | ||||||
|  |         return serviceTrackingParamsStream.filter(serviceTrackingParam -> | ||||||
|  |                         serviceTrackingParam.getString("service", "") | ||||||
|  |                                 .equals(serviceName)) | ||||||
|  |                 .flatMap(serviceTrackingParam -> serviceTrackingParam.getArray("params") | ||||||
|  |                         .stream()) | ||||||
|  |                 .filter(JsonObject.class::isInstance) | ||||||
|  |                 .map(JsonObject.class::cast) | ||||||
|  |                 .filter(param -> param.getString("key", "") | ||||||
|  |                         .equals(clientVersionKey)) | ||||||
|  |                 .map(param -> param.getString("value")) | ||||||
|  |                 .filter(paramValue -> !isNullOrEmpty(paramValue)) | ||||||
|  |                 .findFirst() | ||||||
|  |                 .orElse(null); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /** |     /** | ||||||
|      * Get the client version used by YouTube website on InnerTube requests. |      * Get the client version used by YouTube website on InnerTube requests. | ||||||
|      */ |      */ | ||||||
|  | @ -701,8 +722,8 @@ public final class YoutubeParsingHelper { | ||||||
|             return clientVersion; |             return clientVersion; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Always extract latest client version, by trying first to extract it from the JavaScript |         // Always extract the latest client version, by trying first to extract it from the | ||||||
|         // service worker, then from HTML search results page as a fallback, to prevent |         // JavaScript service worker, then from HTML search results page as a fallback, to prevent | ||||||
|         // fingerprinting based on the client version used |         // fingerprinting based on the client version used | ||||||
|         try { |         try { | ||||||
|             extractClientVersionAndKeyFromSwJs(); |             extractClientVersionAndKeyFromSwJs(); | ||||||
|  | @ -714,7 +735,7 @@ public final class YoutubeParsingHelper { | ||||||
|             return clientVersion; |             return clientVersion; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Fallback to the hardcoded one if it's valid |         // Fallback to the hardcoded one if it is valid | ||||||
|         if (areHardcodedClientVersionAndKeyValid()) { |         if (areHardcodedClientVersionAndKeyValid()) { | ||||||
|             clientVersion = HARDCODED_CLIENT_VERSION; |             clientVersion = HARDCODED_CLIENT_VERSION; | ||||||
|             return clientVersion; |             return clientVersion; | ||||||
|  | @ -731,7 +752,7 @@ public final class YoutubeParsingHelper { | ||||||
|             return key; |             return key; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // Always extract the key used by the webiste, by trying first to extract it from the |         // Always extract the key used by the website, by trying first to extract it from the | ||||||
|         // JavaScript service worker, then from HTML search results page as a fallback, to prevent |         // JavaScript service worker, then from HTML search results page as a fallback, to prevent | ||||||
|         // fingerprinting based on the key and/or invalid key issues |         // fingerprinting based on the key and/or invalid key issues | ||||||
|         try { |         try { | ||||||
|  | @ -751,7 +772,8 @@ public final class YoutubeParsingHelper { | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         // The ANDROID API key is also valid with the WEB client so return it if we couldn't |         // The ANDROID API key is also valid with the WEB client so return it if we couldn't | ||||||
|         // extract the WEB API key. |         // extract the WEB API key. This can be used as a way to fingerprint the extractor in this | ||||||
|  |         // case | ||||||
|         return ANDROID_YOUTUBE_KEY; |         return ANDROID_YOUTUBE_KEY; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue