Switch to innertube API for videos (#2220)

Commit history:
- Switch to innertube API for videos
- fix hardcoded strings + fix bypass geo-restriction
This commit is contained in:
Émilien Devos 2021-08-13 22:29:43 +02:00 committed by GitHub
parent c60fccd7f6
commit c06a20e085
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -525,10 +525,6 @@ struct Video
info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d")) info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d"))
end end
def cookie
info["cookie"]?.try &.as_h.map { |k, v| "#{k}=#{v}" }.join("; ") || ""
end
def allow_ratings def allow_ratings
r = info["videoDetails"]["allowRatings"]?.try &.as_bool r = info["videoDetails"]["allowRatings"]?.try &.as_bool
r.nil? ? false : r r.nil? ? false : r
@ -773,10 +769,6 @@ struct Video
def reason : String? def reason : String?
info["reason"]?.try &.as_s info["reason"]?.try &.as_s
end end
def session_token : String?
info["sessionToken"]?.try &.as_s?
end
end end
struct Caption struct Caption
@ -820,44 +812,53 @@ def parse_related(r : JSON::Any) : JSON::Any?
JSON::Any.new(rv) JSON::Any.new(rv)
end end
def extract_polymer_config(body) def extract_video_info(video_id : String, proxy_region : String? = nil)
params = {} of String => JSON::Any params = {} of String => JSON::Any
player_response = body.match(/(window\["ytInitialPlayerResponse"\]|var\sytInitialPlayerResponse)\s*=\s*(?<info>{.*?});\s*var\s*meta/m)
.try { |r| JSON.parse(r["info"]).as_h }
if body.includes?("To continue with your YouTube experience, please fill out the form below.") || client_config = YoutubeAPI::ClientConfig.new(proxy_region: proxy_region)
body.includes?("https://www.google.com/sorry/index") player_response = YoutubeAPI.player(video_id: video_id, params: "", client_config: client_config)
params["reason"] = JSON::Any.new("Could not extract video info. Instance is likely blocked.")
elsif !player_response if player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK"
params["reason"] = JSON::Any.new("Video unavailable.") reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s|
elsif player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK" s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("")
reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s| s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("") } || } || player_response["playabilityStatus"]["reason"].as_s
player_response["playabilityStatus"]["reason"].as_s
params["reason"] = JSON::Any.new(reason) params["reason"] = JSON::Any.new(reason)
end end
session_token_json_encoded = body.match(/"XSRF_TOKEN":"(?<session_token>[^"]+)"/).try &.["session_token"]? || "" params["shortDescription"] = player_response.dig?("videoDetails", "shortDescription") || JSON::Any.new(nil)
params["sessionToken"] = JSON.parse(%({"key": "#{session_token_json_encoded}"}))["key"]
params["shortDescription"] = JSON::Any.new(body.match(/"og:description" content="(?<description>[^"]+)"/).try &.["description"]?)
return params if !player_response # Don't fetch the next endpoint if the video is unavailable.
if !params["reason"]?
next_response = YoutubeAPI.next({"videoId": video_id, "params": ""})
player_response = player_response.merge(next_response)
end
# Fetch the video streams using an Android client in order to get the decrypted URLs and
# maybe fix throttling issues (#2194).See for the explanation about the decrypted URLs:
# https://github.com/TeamNewPipe/NewPipeExtractor/issues/562
if !params["reason"]?
client_config.client_type = YoutubeAPI::ClientType::Android
stream_data = YoutubeAPI.player(video_id: video_id, params: "", client_config: client_config)
params["streamingData"] = stream_data["streamingData"]? || JSON::Any.new("")
end
{"captions", "microformat", "playabilityStatus", "storyboards", "videoDetails"}.each do |f| {"captions", "microformat", "playabilityStatus", "storyboards", "videoDetails"}.each do |f|
params[f] = player_response[f] if player_response[f]? params[f] = player_response[f] if player_response[f]?
end end
yt_initial_data = extract_initial_data(body) params["relatedVideos"] = (
player_response
.dig?("playerOverlays", "playerOverlayRenderer", "endScreen", "watchNextEndScreenRenderer", "results")
.try &.as_a.compact_map { |r| parse_related r } || \
player_response
.dig?("webWatchNextResponseExtensionData", "relatedVideoArgs")
.try &.as_s.split(",").map { |r|
r = HTTP::Params.parse(r).to_h
JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) }))
}
).try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any)
params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]? primary_results = player_response.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]?
.try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r|
parse_related r
}.try { |a| JSON::Any.new(a) } || yt_initial_data.try &.["webWatchNextResponseExtensionData"]?.try &.["relatedVideoArgs"]?
.try &.as_s.split(",").map { |r|
r = HTTP::Params.parse(r).to_h
JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) }))
}.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any)
primary_results = yt_initial_data.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]?
.try &.["results"]?.try &.["contents"]? .try &.["results"]?.try &.["contents"]?
sentiment_bar = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]? sentiment_bar = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]?
.try &.["videoPrimaryInfoRenderer"]? .try &.["videoPrimaryInfoRenderer"]?
@ -917,20 +918,6 @@ def extract_polymer_config(body)
params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]? params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]?
.try { |t| t["simpleText"]? || t["runs"]?.try &.[0]?.try &.["text"]? }.try &.as_s.split(" ", 2)[0] || "-") .try { |t| t["simpleText"]? || t["runs"]?.try &.[0]?.try &.["text"]? }.try &.as_s.split(" ", 2)[0] || "-")
initial_data = body.match(/ytplayer\.config\s*=\s*(?<info>.*?);ytplayer\.web_player_context_config/)
.try { |r| JSON.parse(r["info"]) }.try &.["args"]["player_response"]?
.try &.as_s?.try &.try { |r| JSON.parse(r).as_h }
if initial_data
{"playabilityStatus", "streamingData"}.each do |f|
params[f] = initial_data[f] if initial_data[f]?
end
else
{"playabilityStatus", "streamingData"}.each do |f|
params[f] = player_response[f] if player_response[f]?
end
end
params params
end end
@ -961,26 +948,19 @@ def get_video(id, db, refresh = true, region = nil, force_refresh = false)
end end
def fetch_video(id, region) def fetch_video(id, region)
response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) info = extract_video_info(video_id: id)
if md = response.headers["location"]?.try &.match(/v=(?<id>[a-zA-Z0-9_-]{11})/) allowed_regions = info
raise VideoRedirect.new(video_id: md["id"]) .dig?("microformat", "playerMicroformatRenderer", "availableCountries")
end .try &.as_a.map &.as_s || [] of String
info = extract_polymer_config(response.body)
info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) })
allowed_regions = info["microformat"]?.try &.["playerMicroformatRenderer"]["availableCountries"]?.try &.as_a.map &.as_s || [] of String
# Check for region-blocks # Check for region-blocks
if info["reason"]?.try &.as_s.includes?("your country") if info["reason"]?.try &.as_s.includes?("your country")
bypass_regions = PROXY_LIST.keys & allowed_regions bypass_regions = PROXY_LIST.keys & allowed_regions
if !bypass_regions.empty? if !bypass_regions.empty?
region = bypass_regions[rand(bypass_regions.size)] region = bypass_regions[rand(bypass_regions.size)]
response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) region_info = extract_video_info(video_id: id, proxy_region: region)
region_info = extract_polymer_config(response.body)
region_info["region"] = JSON::Any.new(region) if region region_info["region"] = JSON::Any.new(region) if region
region_info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) })
info = region_info if !region_info["reason"]? info = region_info if !region_info["reason"]?
end end
end end