Use new youtube API to fetch channel videos (#1355)

* Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line
2024-08-15 00:53:18 +00:00 · 2020-09-02 13:28:57 -07:00 · 2020-09-02 13:28:57 -07:00 · 4a6e920d0e
commit 4a6e920d0e
parent 13f58d602f
2 changed files with 204 additions and 156 deletions
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)

  page = 1

-  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
-  response = YT_POOL.client &.get(url)
+  response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)

  videos = [] of SearchVideo
  begin
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
    ids = [] of String

    loop do
-      url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
-      response = YT_POOL.client &.get(url)
+      response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
      initial_data = JSON.parse(response.body).as_a.find &.["response"]?
      raise "Could not extract JSON" if !initial_data
      videos = extract_videos(initial_data.as_h, author, ucid)
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
  return items, continuation
 end

-def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
  object = {
    "80226972:embedded" => {
      "2:string" => ucid,
@ -411,6 +409,7 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
    },
  }

+  if !v2
    if auto_generated
      seed = Time.unix(1525757349)
      until seed >= Time.utc
@ -424,6 +423,20 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
      object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
      object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
    end
+  else
+    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
+
+    object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
+      "1:embedded" => {
+        "1:varint" => 6307666885028338688_i64,
+        "2:embedded" => {
+          "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
+            "1:varint" => 30_i64 * (page - 1),
+          }))),
+        },
+      },
+    })))
+  end

  case sort_by
  when "newest"
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
  })
 end

+def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
+  response = YT_POOL.client &.get(url)
+  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+  return response if !initial_data
+  needs_v2 = initial_data
+    .try &.["response"]?.try &.["alerts"]?
+    .try &.as_a.any? { |alert|
+      alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
+    }
+  if needs_v2
+    url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
+    response = YT_POOL.client &.get(url)
+  end
+  response
+end
+
 def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
  videos = [] of SearchVideo

  2.times do |i|
-    url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
-    response = YT_POOL.client &.get(url)
+    response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
    initial_data = JSON.parse(response.body).as_a.find &.["response"]?
    break if !initial_data
    videos.concat extract_videos(initial_data.as_h, author, ucid)
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
 end

 def get_latest_videos(ucid)
-  url = produce_channel_videos_url(ucid, 0)
-  response = YT_POOL.client &.get(url)
+  response = get_channel_videos_response(ucid, 1)
  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
  return [] of SearchVideo if !initial_data
  author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -164,20 +164,8 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
  extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end

-def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
-  items = [] of SearchItem
-
-  initial_data.try { |t| t["contents"]? || t["response"]? }
-    .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
-      t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
-      t["continuationContents"]? }
-    .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
-    .try &.["contents"].as_a
-      .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
-        .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
-          t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
-        .each { |item|
-          if i = item["videoRenderer"]?
+def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+  if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
    video_id = i["videoId"].as_s
    title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""

@ -188,7 +176,9 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
    view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-            length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+    length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
+      i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
+        .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0

    live_now = false
    paid = false
@ -212,7 +202,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
      end
    end

-            items << SearchVideo.new({
+    SearchVideo.new({
      title:              title,
      id:                 video_id,
      author:             author,
@ -238,7 +228,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""

-            items << SearchChannel.new({
+    SearchChannel.new({
      author:           author,
      ucid:             author_id,
      author_thumbnail: author_thumbnail,
@ -254,7 +244,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
    video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""

-            items << SearchPlaylist.new({
+    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author_fallback || "",
@ -288,7 +278,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri

    # TODO: i["publishedTimeText"]?

-            items << SearchPlaylist.new({
+    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author,
@ -305,7 +295,37 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
  elsif i = item["horizontalCardListRenderer"]?
  elsif i = item["searchPyvRenderer"]? # Ad
  end
+end
+
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+  items = [] of SearchItem
+
+  channel_v2_response = initial_data
+    .try &.["response"]?
+    .try &.["continuationContents"]?
+    .try &.["gridContinuation"]?
+    .try &.["items"]?
+
+  if channel_v2_response
+    channel_v2_response.try &.as_a.each { |item|
+        extract_item(item, author_fallback, author_id_fallback)
+          .try { |t| items << t }
+    }
+  else
+    initial_data.try { |t| t["contents"]? || t["response"]? }
+      .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
+        t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
+        t["continuationContents"]? }
+      .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+      .try &.["contents"].as_a
+        .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
+          .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
+            t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
+          .each { |item|
+            extract_item(item, author_fallback, author_id_fallback)
+              .try { |t| items << t }
          } }
+    end

  items
 end