Use new youtube API to fetch channel videos (#1355)

* Use new API to fetch videos from channels

This mirrors the process used by subscriptions.gir.st. The old API is
tried first, and if it fails then the new one is used.

* Use the new API whenever getting videos from a channel

I created the get_channel_videos_response function because now instead
of just getting a single url, there are extra steps involved in getting
the API response for channel videos, and these steps don't need to be
repeated throughout the code.

The only remaining exception is the bypass_captcha function, which still
only makes a request to the old API. I don't know whether this code
needs to be updated to use the new API for captcha bypassing to work
correctly.

* Correctly determine video length with new api

* Remove unnecessary line
This commit is contained in:
Ben Heller 2020-09-02 13:28:57 -07:00 committed by GitHub
parent 13f58d602f
commit 4a6e920d0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 204 additions and 156 deletions

View File

@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1 page = 1
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
videos = [] of SearchVideo videos = [] of SearchVideo
begin begin
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
ids = [] of String ids = [] of String
loop do loop do
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]? initial_data = JSON.parse(response.body).as_a.find &.["response"]?
raise "Could not extract JSON" if !initial_data raise "Could not extract JSON" if !initial_data
videos = extract_videos(initial_data.as_h, author, ucid) videos = extract_videos(initial_data.as_h, author, ucid)
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
return items, continuation return items, continuation
end end
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest") def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = { object = {
"80226972:embedded" => { "80226972:embedded" => {
"2:string" => ucid, "2:string" => ucid,
@ -411,6 +409,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
}, },
} }
if !v2
if auto_generated if auto_generated
seed = Time.unix(1525757349) seed = Time.unix(1525757349)
until seed >= Time.utc until seed >= Time.utc
@ -424,6 +423,20 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:embedded" => {
"1:varint" => 6307666885028338688_i64,
"2:embedded" => {
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
},
},
})))
end
case sort_by case sort_by
when "newest" when "newest"
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
}) })
end end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return response if !initial_data
needs_v2 = initial_data
.try &.["response"]?.try &.["alerts"]?
.try &.as_a.any? { |alert|
alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
}
if needs_v2
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
response = YT_POOL.client &.get(url)
end
response
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo videos = [] of SearchVideo
2.times do |i| 2.times do |i|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]? initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data break if !initial_data
videos.concat extract_videos(initial_data.as_h, author, ucid) videos.concat extract_videos(initial_data.as_h, author, ucid)
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end end
def get_latest_videos(ucid) def get_latest_videos(ucid)
url = produce_channel_videos_url(ucid, 0) response = get_channel_videos_response(ucid, 1)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]? initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return [] of SearchVideo if !initial_data return [] of SearchVideo if !initial_data
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s

View File

@ -164,20 +164,8 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
initial_data.try { |t| t["contents"]? || t["response"]? }
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
t["continuationContents"]? }
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
.try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
if i = item["videoRenderer"]?
video_id = i["videoId"].as_s video_id = i["videoId"].as_s
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
@ -188,7 +176,9 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
.try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
live_now = false live_now = false
paid = false paid = false
@ -212,7 +202,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
end end
end end
items << SearchVideo.new({ SearchVideo.new({
title: title, title: title,
id: video_id, id: video_id,
author: author, author: author,
@ -238,7 +228,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
items << SearchChannel.new({ SearchChannel.new({
author: author, author: author,
ucid: author_id, ucid: author_id,
author_thumbnail: author_thumbnail, author_thumbnail: author_thumbnail,
@ -254,7 +244,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
items << SearchPlaylist.new({ SearchPlaylist.new({
title: title, title: title,
id: plid, id: plid,
author: author_fallback || "", author: author_fallback || "",
@ -288,7 +278,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
# TODO: i["publishedTimeText"]? # TODO: i["publishedTimeText"]?
items << SearchPlaylist.new({ SearchPlaylist.new({
title: title, title: title,
id: plid, id: plid,
author: author, author: author,
@ -305,7 +295,37 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
elsif i = item["horizontalCardListRenderer"]? elsif i = item["horizontalCardListRenderer"]?
elsif i = item["searchPyvRenderer"]? # Ad elsif i = item["searchPyvRenderer"]? # Ad
end end
end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
channel_v2_response = initial_data
.try &.["response"]?
.try &.["continuationContents"]?
.try &.["gridContinuation"]?
.try &.["items"]?
if channel_v2_response
channel_v2_response.try &.as_a.each { |item|
extract_item(item, author_fallback, author_id_fallback)
.try { |t| items << t }
}
else
initial_data.try { |t| t["contents"]? || t["response"]? }
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
t["continuationContents"]? }
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
.try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
extract_item(item, author_fallback, author_id_fallback)
.try { |t| items << t }
} } } }
end
items items
end end