From c06a20e08575e721de003186c4ecbc2c073df1d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89milien=20Devos?= Date: Fri, 13 Aug 2021 22:29:43 +0200 Subject: [PATCH] Switch to innertube API for videos (#2220) Commit history: - Switch to innertube API for videos - fix hardcoded strings + fix bypass geo-restriction --- src/invidious/videos.cr | 100 ++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 60 deletions(-) diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 79897985..6a53b8ca 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -525,10 +525,6 @@ struct Video info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d")) end - def cookie - info["cookie"]?.try &.as_h.map { |k, v| "#{k}=#{v}" }.join("; ") || "" - end - def allow_ratings r = info["videoDetails"]["allowRatings"]?.try &.as_bool r.nil? ? false : r @@ -773,10 +769,6 @@ struct Video def reason : String? info["reason"]?.try &.as_s end - - def session_token : String? - info["sessionToken"]?.try &.as_s? - end end struct Caption @@ -820,44 +812,53 @@ def parse_related(r : JSON::Any) : JSON::Any? JSON::Any.new(rv) end -def extract_polymer_config(body) +def extract_video_info(video_id : String, proxy_region : String? = nil) params = {} of String => JSON::Any - player_response = body.match(/(window\["ytInitialPlayerResponse"\]|var\sytInitialPlayerResponse)\s*=\s*(?{.*?});\s*var\s*meta/m) - .try { |r| JSON.parse(r["info"]).as_h } - if body.includes?("To continue with your YouTube experience, please fill out the form below.") || - body.includes?("https://www.google.com/sorry/index") - params["reason"] = JSON::Any.new("Could not extract video info. Instance is likely blocked.") - elsif !player_response - params["reason"] = JSON::Any.new("Video unavailable.") - elsif player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK" - reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s| s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("") } || - player_response["playabilityStatus"]["reason"].as_s + client_config = YoutubeAPI::ClientConfig.new(proxy_region: proxy_region) + player_response = YoutubeAPI.player(video_id: video_id, params: "", client_config: client_config) + + if player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK" + reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s| + s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("") + } || player_response["playabilityStatus"]["reason"].as_s params["reason"] = JSON::Any.new(reason) end - session_token_json_encoded = body.match(/"XSRF_TOKEN":"(?[^"]+)"/).try &.["session_token"]? || "" - params["sessionToken"] = JSON.parse(%({"key": "#{session_token_json_encoded}"}))["key"] - params["shortDescription"] = JSON::Any.new(body.match(/"og:description" content="(?[^"]+)"/).try &.["description"]?) + params["shortDescription"] = player_response.dig?("videoDetails", "shortDescription") || JSON::Any.new(nil) - return params if !player_response + # Don't fetch the next endpoint if the video is unavailable. + if !params["reason"]? + next_response = YoutubeAPI.next({"videoId": video_id, "params": ""}) + player_response = player_response.merge(next_response) + end + + # Fetch the video streams using an Android client in order to get the decrypted URLs and + # maybe fix throttling issues (#2194).See for the explanation about the decrypted URLs: + # https://github.com/TeamNewPipe/NewPipeExtractor/issues/562 + if !params["reason"]? + client_config.client_type = YoutubeAPI::ClientType::Android + stream_data = YoutubeAPI.player(video_id: video_id, params: "", client_config: client_config) + params["streamingData"] = stream_data["streamingData"]? || JSON::Any.new("") + end {"captions", "microformat", "playabilityStatus", "storyboards", "videoDetails"}.each do |f| params[f] = player_response[f] if player_response[f]? end - yt_initial_data = extract_initial_data(body) + params["relatedVideos"] = ( + player_response + .dig?("playerOverlays", "playerOverlayRenderer", "endScreen", "watchNextEndScreenRenderer", "results") + .try &.as_a.compact_map { |r| parse_related r } || \ + player_response + .dig?("webWatchNextResponseExtensionData", "relatedVideoArgs") + .try &.as_s.split(",").map { |r| + r = HTTP::Params.parse(r).to_h + JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) })) + } + ).try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any) - params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]? - .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r| - parse_related r - }.try { |a| JSON::Any.new(a) } || yt_initial_data.try &.["webWatchNextResponseExtensionData"]?.try &.["relatedVideoArgs"]? - .try &.as_s.split(",").map { |r| - r = HTTP::Params.parse(r).to_h - JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) })) - }.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any) - - primary_results = yt_initial_data.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]? + primary_results = player_response.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]? .try &.["results"]?.try &.["contents"]? sentiment_bar = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]? .try &.["videoPrimaryInfoRenderer"]? @@ -917,20 +918,6 @@ def extract_polymer_config(body) params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]? .try { |t| t["simpleText"]? || t["runs"]?.try &.[0]?.try &.["text"]? }.try &.as_s.split(" ", 2)[0] || "-") - initial_data = body.match(/ytplayer\.config\s*=\s*(?.*?);ytplayer\.web_player_context_config/) - .try { |r| JSON.parse(r["info"]) }.try &.["args"]["player_response"]? - .try &.as_s?.try &.try { |r| JSON.parse(r).as_h } - - if initial_data - {"playabilityStatus", "streamingData"}.each do |f| - params[f] = initial_data[f] if initial_data[f]? - end - else - {"playabilityStatus", "streamingData"}.each do |f| - params[f] = player_response[f] if player_response[f]? - end - end - params end @@ -961,26 +948,19 @@ def get_video(id, db, refresh = true, region = nil, force_refresh = false) end def fetch_video(id, region) - response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) + info = extract_video_info(video_id: id) - if md = response.headers["location"]?.try &.match(/v=(?[a-zA-Z0-9_-]{11})/) - raise VideoRedirect.new(video_id: md["id"]) - end - - info = extract_polymer_config(response.body) - info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) }) - allowed_regions = info["microformat"]?.try &.["playerMicroformatRenderer"]["availableCountries"]?.try &.as_a.map &.as_s || [] of String + allowed_regions = info + .dig?("microformat", "playerMicroformatRenderer", "availableCountries") + .try &.as_a.map &.as_s || [] of String # Check for region-blocks if info["reason"]?.try &.as_s.includes?("your country") bypass_regions = PROXY_LIST.keys & allowed_regions if !bypass_regions.empty? region = bypass_regions[rand(bypass_regions.size)] - response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) - - region_info = extract_polymer_config(response.body) + region_info = extract_video_info(video_id: id, proxy_region: region) region_info["region"] = JSON::Any.new(region) if region - region_info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) }) info = region_info if !region_info["reason"]? end end