Extract separate YoutubeThrottlingDecoder
This commit is contained in:
		
							parent
							
								
									a86a30103f
								
							
						
					
					
						commit
						80cf8b3acd
					
				
					 3 changed files with 151 additions and 51 deletions
				
			
		|  | @ -0,0 +1,101 @@ | ||||||
|  | package org.schabi.newpipe.extractor.services.youtube; | ||||||
|  | 
 | ||||||
|  | import org.jsoup.Jsoup; | ||||||
|  | import org.jsoup.nodes.Document; | ||||||
|  | import org.jsoup.nodes.Element; | ||||||
|  | import org.jsoup.select.Elements; | ||||||
|  | import org.schabi.newpipe.extractor.NewPipe; | ||||||
|  | import org.schabi.newpipe.extractor.exceptions.ParsingException; | ||||||
|  | import org.schabi.newpipe.extractor.localization.Localization; | ||||||
|  | import org.schabi.newpipe.extractor.utils.Javascript; | ||||||
|  | import org.schabi.newpipe.extractor.utils.Parser; | ||||||
|  | 
 | ||||||
|  | import java.util.regex.Pattern; | ||||||
|  | 
 | ||||||
|  | public class YoutubeThrottlingDecoder { | ||||||
|  | 
 | ||||||
|  |     private static final String HTTPS = "https:"; | ||||||
|  | 
 | ||||||
|  |     private final String functionName; | ||||||
|  |     private final String function; | ||||||
|  | 
 | ||||||
|  |     public YoutubeThrottlingDecoder(String videoId, Localization localization) throws ParsingException { | ||||||
|  |         String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl(videoId, localization)); | ||||||
|  |         String playerJsCode = downloadPlayerJsCode(localization, playerJsUrl); | ||||||
|  | 
 | ||||||
|  |         functionName = parseDecodeFunctionName(playerJsCode); | ||||||
|  |         function = parseDecodeFunction(playerJsCode, functionName); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String extractPlayerJsUrl(String videoId, Localization localization) throws ParsingException { | ||||||
|  |         try { | ||||||
|  |             final String embedUrl = "https://www.youtube.com/embed/" + videoId; | ||||||
|  |             final String embedPageContent = NewPipe.getDownloader() | ||||||
|  |                     .get(embedUrl, localization).responseBody(); | ||||||
|  | 
 | ||||||
|  |             try { | ||||||
|  |                 final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; | ||||||
|  |                 return Parser.matchGroup1(assetsPattern, embedPageContent) | ||||||
|  |                         .replace("\\", "").replace("\"", ""); | ||||||
|  |             } catch (final Parser.RegexException ex) { | ||||||
|  |                 // playerJsUrl is still available in the file, just somewhere else TODO | ||||||
|  |                 // it is ok not to find it, see how that's handled in getDeobfuscationCode() | ||||||
|  |                 final Document doc = Jsoup.parse(embedPageContent); | ||||||
|  |                 final Elements elems = doc.select("script").attr("name", "player_ias/base"); | ||||||
|  |                 for (final Element elem : elems) { | ||||||
|  |                     if (elem.attr("src").contains("base.js")) { | ||||||
|  |                         return elem.attr("src"); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |         } catch (final Exception i) { | ||||||
|  |             throw new ParsingException("Embedded info did not provide YouTube player js url"); | ||||||
|  |         } | ||||||
|  |         throw new ParsingException("Embedded info did not provide YouTube player js url"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String cleanPlayerJsUrl(String playerJsUrl) { | ||||||
|  |         if (playerJsUrl.startsWith("//")) { | ||||||
|  |             return HTTPS + playerJsUrl; | ||||||
|  |         } else if (playerJsUrl.startsWith("/")) { | ||||||
|  |             // sometimes https://www.youtube.com part has to be added manually | ||||||
|  |             return HTTPS + "//www.youtube.com" + playerJsUrl; | ||||||
|  |         } else { | ||||||
|  |             return playerJsUrl; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String downloadPlayerJsCode(Localization localization, String playerJsUrl) throws ParsingException { | ||||||
|  |         try { | ||||||
|  |             return NewPipe.getDownloader().get(playerJsUrl, localization).responseBody(); | ||||||
|  |         } catch (Exception e) { | ||||||
|  |             throw new ParsingException("Could not get player js code from url: " + playerJsUrl); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException { | ||||||
|  |         Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); | ||||||
|  |         return Parser.matchGroup1(pattern, playerJsCode); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException { | ||||||
|  |         Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); | ||||||
|  |         return  "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     public String parseNParam(String url) throws Parser.RegexException { | ||||||
|  |         Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); | ||||||
|  |         return Parser.matchGroup1(nValuePattern, url); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     public String decodeNParam(String nParam) { | ||||||
|  |         Javascript javascript = new Javascript(); | ||||||
|  |         return javascript.run(function, functionName, nParam); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     public String replaceNParam(String url, String newValue) { | ||||||
|  |         Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); | ||||||
|  |         return nValuePattern.matcher(url).replaceFirst(newValue); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | @ -25,6 +25,7 @@ import org.schabi.newpipe.extractor.localization.TimeAgoParser; | ||||||
| import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; | import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager; | ||||||
| import org.schabi.newpipe.extractor.services.youtube.ItagItem; | import org.schabi.newpipe.extractor.services.youtube.ItagItem; | ||||||
| import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; | import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper; | ||||||
|  | import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecoder; | ||||||
| import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; | import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory; | ||||||
| import org.schabi.newpipe.extractor.stream.*; | import org.schabi.newpipe.extractor.stream.*; | ||||||
| import org.schabi.newpipe.extractor.utils.JsonUtils; | import org.schabi.newpipe.extractor.utils.JsonUtils; | ||||||
|  | @ -39,7 +40,6 @@ import java.time.LocalDate; | ||||||
| import java.time.OffsetDateTime; | import java.time.OffsetDateTime; | ||||||
| import java.time.format.DateTimeFormatter; | import java.time.format.DateTimeFormatter; | ||||||
| import java.util.*; | import java.util.*; | ||||||
| import java.util.regex.Pattern; |  | ||||||
| 
 | 
 | ||||||
| import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; | import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.*; | ||||||
| import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING; | import static org.schabi.newpipe.extractor.utils.Utils.EMPTY_STRING; | ||||||
|  | @ -80,13 +80,10 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
| 
 | 
 | ||||||
|     @Nullable |     @Nullable | ||||||
|     private static String cachedDeobfuscationCode = null; |     private static String cachedDeobfuscationCode = null; | ||||||
|     @Nullable |  | ||||||
|     private String playerJsUrl = null; |  | ||||||
| 
 |  | ||||||
|     private JsonArray initialAjaxJson; |  | ||||||
|     private JsonObject initialData; |  | ||||||
|     @Nonnull |     @Nonnull | ||||||
|     private final Map<String, String> videoInfoPage = new HashMap<>(); |     private final Map<String, String> videoInfoPage = new HashMap<>(); | ||||||
|  |     private JsonArray initialAjaxJson; | ||||||
|  |     private JsonObject initialData; | ||||||
|     private JsonObject playerResponse; |     private JsonObject playerResponse; | ||||||
|     private JsonObject videoPrimaryInfoRenderer; |     private JsonObject videoPrimaryInfoRenderer; | ||||||
|     private JsonObject videoSecondaryInfoRenderer; |     private JsonObject videoSecondaryInfoRenderer; | ||||||
|  | @ -526,32 +523,18 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|     public List<VideoStream> getVideoStreams() throws ExtractionException { |     public List<VideoStream> getVideoStreams() throws ExtractionException { | ||||||
|         assertPageFetched(); |         assertPageFetched(); | ||||||
|         final List<VideoStream> videoStreams = new ArrayList<>(); |         final List<VideoStream> videoStreams = new ArrayList<>(); | ||||||
|  |         YoutubeThrottlingDecoder throttlingDecoder = new YoutubeThrottlingDecoder(getId(), getExtractorLocalization()); | ||||||
| 
 | 
 | ||||||
|         try { |         try { | ||||||
|             getDeobfuscationCode(); |  | ||||||
|             final String playerCode = NewPipe.getDownloader() |  | ||||||
|                     .get(playerJsUrl, getExtractorLocalization()).responseBody(); |  | ||||||
|             Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)"); |  | ||||||
|             String functionName = Parser.matchGroup1(pattern, playerCode); |  | ||||||
|             Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL); |  | ||||||
|             String function = "function " + functionName + Parser.matchGroup1(functionPattern, playerCode); |  | ||||||
| 
 |  | ||||||
|             Context context = Context.enter(); |  | ||||||
|             context.setOptimizationLevel(-1); |  | ||||||
|             ScriptableObject scope = context.initSafeStandardObjects(); |  | ||||||
| 
 |  | ||||||
|             for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) { |             for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) { | ||||||
|                 final ItagItem itag = entry.getValue(); |                 final ItagItem itag = entry.getValue(); | ||||||
|                 final String url = entry.getKey(); |                 final String url = entry.getKey(); | ||||||
|                 Pattern nValuePattern = Pattern.compile("[&?]n=([^&]+)"); |  | ||||||
|                 String nValue = Parser.matchGroup1(nValuePattern, url); |  | ||||||
| 
 | 
 | ||||||
|                 context.evaluateString(scope, function, functionName, 1, null); |                 String oldNParam = throttlingDecoder.parseNParam(url); | ||||||
|                 final Function jsFunction = (Function) scope.get(functionName, scope); |                 String newNParam = throttlingDecoder.decodeNParam(oldNParam); | ||||||
|                 Object result = jsFunction.call(context, scope, scope, new Object[]{nValue}); |                 String newUrl = throttlingDecoder.replaceNParam(url, newNParam); | ||||||
|                 String newNValue = Objects.toString(result, nValue); | 
 | ||||||
|                 String newUrl = nValuePattern.matcher(url).replaceFirst(newNValue); |                 System.out.println("aaaaaa  " + oldNParam + " - " + newNParam); | ||||||
|                 System.out.println("aaaaaa  " + nValue + " - " + newNValue); |  | ||||||
|                 final VideoStream videoStream = new VideoStream(newUrl, false, itag); |                 final VideoStream videoStream = new VideoStream(newUrl, false, itag); | ||||||
|                 if (!Stream.containSimilarStream(videoStream, videoStreams)) { |                 if (!Stream.containSimilarStream(videoStream, videoStreams)) { | ||||||
|                     videoStreams.add(videoStream); |                     videoStreams.add(videoStream); | ||||||
|  | @ -820,8 +803,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     @Nonnull |     private String extractPlayerJsUrl() throws ParsingException { | ||||||
|     private String getEmbeddedInfoStsAndStorePlayerJsUrl() { |  | ||||||
|         try { |         try { | ||||||
|             final String embedUrl = "https://www.youtube.com/embed/" + getId(); |             final String embedUrl = "https://www.youtube.com/embed/" + getId(); | ||||||
|             final String embedPageContent = NewPipe.getDownloader() |             final String embedPageContent = NewPipe.getDownloader() | ||||||
|  | @ -829,7 +811,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
| 
 | 
 | ||||||
|             try { |             try { | ||||||
|                 final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; |                 final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")"; | ||||||
|                 playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent) |                 return Parser.matchGroup1(assetsPattern, embedPageContent) | ||||||
|                         .replace("\\", "").replace("\"", ""); |                         .replace("\\", "").replace("\"", ""); | ||||||
|             } catch (final Parser.RegexException ex) { |             } catch (final Parser.RegexException ex) { | ||||||
|                 // playerJsUrl is still available in the file, just somewhere else TODO |                 // playerJsUrl is still available in the file, just somewhere else TODO | ||||||
|  | @ -838,17 +820,25 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|                 final Elements elems = doc.select("script").attr("name", "player_ias/base"); |                 final Elements elems = doc.select("script").attr("name", "player_ias/base"); | ||||||
|                 for (final Element elem : elems) { |                 for (final Element elem : elems) { | ||||||
|                     if (elem.attr("src").contains("base.js")) { |                     if (elem.attr("src").contains("base.js")) { | ||||||
|                         playerJsUrl = elem.attr("src"); |                         return elem.attr("src"); | ||||||
|                         break; |  | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             // Get embed sts |  | ||||||
|             return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent); |  | ||||||
|         } catch (final Exception i) { |         } catch (final Exception i) { | ||||||
|             // if it fails we simply reply with no sts as then it does not seem to be necessary |             throw new ParsingException("Embedded info did not provide YouTube player js url"); | ||||||
|             return ""; |         } | ||||||
|  |         throw new ParsingException("Embedded info did not provide YouTube player js url"); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     private String cleanPlayerJsUrl(String playerJsUrl) { | ||||||
|  |         if (playerJsUrl.startsWith("//")) { | ||||||
|  |             return HTTPS + playerJsUrl; | ||||||
|  |         } else if (playerJsUrl.startsWith("/")) { | ||||||
|  |             // sometimes https://www.youtube.com part has to be added manually | ||||||
|  |             return HTTPS + "//www.youtube.com" + playerJsUrl; | ||||||
|  |         } else { | ||||||
|  |             return playerJsUrl; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -899,22 +889,7 @@ public class YoutubeStreamExtractor extends StreamExtractor { | ||||||
|     @Nonnull |     @Nonnull | ||||||
|     private String getDeobfuscationCode() throws ParsingException { |     private String getDeobfuscationCode() throws ParsingException { | ||||||
|         if (cachedDeobfuscationCode == null) { |         if (cachedDeobfuscationCode == null) { | ||||||
|             if (playerJsUrl == null) { |             String playerJsUrl = cleanPlayerJsUrl(extractPlayerJsUrl()); | ||||||
|                 // the currentPlayerJsUrl was not found in any page fetched so far and there is |  | ||||||
|                 // nothing cached, so try fetching embedded info |  | ||||||
|                 getEmbeddedInfoStsAndStorePlayerJsUrl(); |  | ||||||
|                 if (playerJsUrl == null) { |  | ||||||
|                     throw new ParsingException( |  | ||||||
|                             "Embedded info did not provide YouTube player js url"); |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
| 
 |  | ||||||
|             if (playerJsUrl.startsWith("//")) { |  | ||||||
|                 playerJsUrl = HTTPS + playerJsUrl; |  | ||||||
|             } else if (playerJsUrl.startsWith("/")) { |  | ||||||
|                 // sometimes https://www.youtube.com part has to be added manually |  | ||||||
|                 playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl; |  | ||||||
|             } |  | ||||||
| 
 | 
 | ||||||
|             cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl); |             cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  | @ -0,0 +1,24 @@ | ||||||
|  | package org.schabi.newpipe.extractor.utils; | ||||||
|  | 
 | ||||||
|  | import org.mozilla.javascript.Context; | ||||||
|  | import org.mozilla.javascript.Function; | ||||||
|  | import org.mozilla.javascript.ScriptableObject; | ||||||
|  | 
 | ||||||
|  | public class Javascript { | ||||||
|  | 
 | ||||||
|  |     public String run(String function, String functionName, String... parameters) { | ||||||
|  |         try { | ||||||
|  |             Context context = Context.enter(); | ||||||
|  |             context.setOptimizationLevel(-1); | ||||||
|  |             ScriptableObject scope = context.initSafeStandardObjects(); | ||||||
|  | 
 | ||||||
|  |             context.evaluateString(scope, function, functionName, 1, null); | ||||||
|  |             Function jsFunction = (Function) scope.get(functionName, scope); | ||||||
|  |             Object result = jsFunction.call(context, scope, scope, parameters); | ||||||
|  |             return result.toString(); | ||||||
|  |         } finally { | ||||||
|  |             Context.exit(); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | } | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue