Use new youtube API to fetch channel videos (#1355)

* Use new API to fetch videos from channels

This mirrors the process used by subscriptions.gir.st. The old API is
tried first, and if it fails then the new one is used.

* Use the new API whenever getting videos from a channel

I created the get_channel_videos_response function because now instead
of just getting a single url, there are extra steps involved in getting
the API response for channel videos, and these steps don't need to be
repeated throughout the code.

The only remaining exception is the bypass_captcha function, which still
only makes a request to the old API. I don't know whether this code
needs to be updated to use the new API for captcha bypassing to work
correctly.

* Correctly determine video length with new api

* Remove unnecessary line
This commit is contained in:
Ben Heller 2020-09-02 13:28:57 -07:00 committed by GitHub
parent 13f58d602f
commit 4a6e920d0e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 204 additions and 156 deletions

View File

@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = [] of SearchVideo
begin
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
ids = [] of String
loop do
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
raise "Could not extract JSON" if !initial_data
videos = extract_videos(initial_data.as_h, author, ucid)
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
return items, continuation
end
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
@ -411,6 +409,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
},
}
if !v2
if auto_generated
seed = Time.unix(1525757349)
until seed >= Time.utc
@ -424,6 +423,20 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:embedded" => {
"1:varint" => 6307666885028338688_i64,
"2:embedded" => {
"1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
"1:varint" => 30_i64 * (page - 1),
}))),
},
},
})))
end
case sort_by
when "newest"
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
})
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return response if !initial_data
needs_v2 = initial_data
.try &.["response"]?.try &.["alerts"]?
.try &.as_a.any? { |alert|
alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
}
if needs_v2
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
response = YT_POOL.client &.get(url)
end
response
end
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
2.times do |i|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
response = YT_POOL.client &.get(url)
response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data
videos.concat extract_videos(initial_data.as_h, author, ucid)
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end
def get_latest_videos(ucid)
url = produce_channel_videos_url(ucid, 0)
response = YT_POOL.client &.get(url)
response = get_channel_videos_response(ucid, 1)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return [] of SearchVideo if !initial_data
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s

View File

@ -164,20 +164,8 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
initial_data.try { |t| t["contents"]? || t["response"]? }
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
t["continuationContents"]? }
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
.try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
if i = item["videoRenderer"]?
def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
video_id = i["videoId"].as_s
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
@ -188,7 +176,9 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
.try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
live_now = false
paid = false
@ -212,7 +202,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
end
end
items << SearchVideo.new({
SearchVideo.new({
title: title,
id: video_id,
author: author,
@ -238,7 +228,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
items << SearchChannel.new({
SearchChannel.new({
author: author,
ucid: author_id,
author_thumbnail: author_thumbnail,
@ -254,7 +244,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
items << SearchPlaylist.new({
SearchPlaylist.new({
title: title,
id: plid,
author: author_fallback || "",
@ -288,7 +278,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
# TODO: i["publishedTimeText"]?
items << SearchPlaylist.new({
SearchPlaylist.new({
title: title,
id: plid,
author: author,
@ -305,7 +295,37 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
elsif i = item["horizontalCardListRenderer"]?
elsif i = item["searchPyvRenderer"]? # Ad
end
end
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
channel_v2_response = initial_data
.try &.["response"]?
.try &.["continuationContents"]?
.try &.["gridContinuation"]?
.try &.["items"]?
if channel_v2_response
channel_v2_response.try &.as_a.each { |item|
extract_item(item, author_fallback, author_id_fallback)
.try { |t| items << t }
}
else
initial_data.try { |t| t["contents"]? || t["response"]? }
.try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
t["continuationContents"]? }
.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
.try &.["contents"].as_a
.each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
.try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
extract_item(item, author_fallback, author_id_fallback)
.try { |t| items << t }
} }
end
items
end