Use new youtube API to fetch channel videos (#1355)

* Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line
2024-08-15 00:53:20 +00:00 · 2020-09-02 13:28:57 -07:00 · 2020-09-02 13:28:57 -07:00 · 4a6e920d0e
commit 4a6e920d0e
parent 13f58d602f
2 changed files with 204 additions and 156 deletions
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
  page = 1
-  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
+  response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
  response = YT_POOL.client &.get(url)
  videos = [] of SearchVideo
  begin
@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
    ids = [] of String
    loop do
-      url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
+      response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
      response = YT_POOL.client &.get(url)
      initial_data = JSON.parse(response.body).as_a.find &.["response"]?
      raise "Could not extract JSON" if !initial_data
      videos = extract_videos(initial_data.as_h, author, ucid)
@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
  return items, continuation
 end
-def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
  object = {
    "80226972:embedded" => {
      "2:string" => ucid,
@ -411,18 +409,33 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
    },
  }
-  if auto_generated
+  if !v2
-    seed = Time.unix(1525757349)
+    if auto_generated
-    until seed >= Time.utc
+      seed = Time.unix(1525757349)
-      seed += 1.month
+      until seed >= Time.utc
-    end
+        seed += 1.month
-    timestamp = seed - (page - 1).months
+      end
      timestamp = seed - (page - 1).months
-    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
+      object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
-    object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
+      object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
    else
      object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
      object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
    end
  else
    object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
-    object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
+
    object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
      "1:embedded" => {
        "1:varint" => 6307666885028338688_i64,
        "2:embedded" => {
          "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
            "1:varint" => 30_i64 * (page - 1),
          }))),
        },
      },
    })))
  end
  case sort_by
@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
  })
 end
 def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
  url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
  response = YT_POOL.client &.get(url)
  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
  return response if !initial_data
  needs_v2 = initial_data
    .try &.["response"]?.try &.["alerts"]?
    .try &.as_a.any? { |alert|
      alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
    }
  if needs_v2
    url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
    response = YT_POOL.client &.get(url)
  end
  response
 end
 def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
  videos = [] of SearchVideo
  2.times do |i|
-    url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
+    response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
    response = YT_POOL.client &.get(url)
    initial_data = JSON.parse(response.body).as_a.find &.["response"]?
    break if !initial_data
    videos.concat extract_videos(initial_data.as_h, author, ucid)
@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
 end
 def get_latest_videos(ucid)
-  url = produce_channel_videos_url(ucid, 0)
+  response = get_channel_videos_response(ucid, 1)
  response = YT_POOL.client &.get(url)
  initial_data = JSON.parse(response.body).as_a.find &.["response"]?
  return [] of SearchVideo if !initial_data
  author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -164,148 +164,168 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
  extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end
 def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
  if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
    video_id = i["videoId"].as_s
    title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
    author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
    author = author_info.try &.["text"].as_s || author_fallback || ""
    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
    published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
    view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
    length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
      i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
        .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
    live_now = false
    paid = false
    premium = false
    premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
    i["badges"]?.try &.as_a.each do |badge|
      b = badge["metadataBadgeRenderer"]
      case b["label"].as_s
      when "LIVE NOW"
        live_now = true
      when "New", "4K", "CC"
        # TODO
      when "Premium"
        paid = true
        # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
        premium = true
      else nil # Ignore
      end
    end
    SearchVideo.new({
      title:              title,
      id:                 video_id,
      author:             author,
      ucid:               author_id,
      published:          published,
      views:              view_count,
      description_html:   description_html,
      length_seconds:     length_seconds,
      live_now:           live_now,
      paid:               paid,
      premium:            premium,
      premiere_timestamp: premiere_timestamp,
    })
  elsif i = item["channelRenderer"]?
    author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
    author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
    author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
    subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
    auto_generated = false
    auto_generated = true if !i["videoCountText"]?
    video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
    SearchChannel.new({
      author:           author,
      ucid:             author_id,
      author_thumbnail: author_thumbnail,
      subscriber_count: subscriber_count,
      video_count:      video_count,
      description_html: description_html,
      auto_generated:   auto_generated,
    })
  elsif i = item["gridPlaylistRenderer"]?
    title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
    plid = i["playlistId"]?.try &.as_s || ""
    video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
    playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author_fallback || "",
      ucid:        author_id_fallback || "",
      video_count: video_count,
      videos:      [] of SearchPlaylistVideo,
      thumbnail:   playlist_thumbnail,
    })
  elsif i = item["playlistRenderer"]?
    title = i["title"]["simpleText"]?.try &.as_s || ""
    plid = i["playlistId"]?.try &.as_s || ""
    video_count = i["videoCount"]?.try &.as_s.to_i || 0
    playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
    author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
    author = author_info.try &.["text"].as_s || author_fallback || ""
    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
    videos = i["videos"]?.try &.as_a.map do |v|
      v = v["childVideoRenderer"]
      v_title = v["title"]["simpleText"]?.try &.as_s || ""
      v_id = v["videoId"]?.try &.as_s || ""
      v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
      SearchPlaylistVideo.new({
        title:          v_title,
        id:             v_id,
        length_seconds: v_length_seconds,
      })
    end || [] of SearchPlaylistVideo
    # TODO: i["publishedTimeText"]?
    SearchPlaylist.new({
      title:       title,
      id:          plid,
      author:      author,
      ucid:        author_id,
      video_count: video_count,
      videos:      videos,
      thumbnail:   playlist_thumbnail,
    })
  elsif i = item["radioRenderer"]? # Mix
    # TODO
  elsif i = item["showRenderer"]? # Show
    # TODO
  elsif i = item["shelfRenderer"]?
  elsif i = item["horizontalCardListRenderer"]?
  elsif i = item["searchPyvRenderer"]? # Ad
  end
 end
 def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
  items = [] of SearchItem
-  initial_data.try { |t| t["contents"]? || t["response"]? }
+  channel_v2_response = initial_data
-    .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
+    .try &.["response"]?
-      t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
+    .try &.["continuationContents"]?
-      t["continuationContents"]? }
+    .try &.["gridContinuation"]?
-    .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+    .try &.["items"]?
    .try &.["contents"].as_a
      .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
        .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
          t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
        .each { |item|
          if i = item["videoRenderer"]?
            video_id = i["videoId"].as_s
            title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
-            author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
+  if channel_v2_response
-            author = author_info.try &.["text"].as_s || author_fallback || ""
+    channel_v2_response.try &.as_a.each { |item|
-            author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
+        extract_item(item, author_fallback, author_id_fallback)
-
+          .try { |t| items << t }
-            published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+    }
-            view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+  else
-            description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+    initial_data.try { |t| t["contents"]? || t["response"]? }
-            length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+      .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
-
+        t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
-            live_now = false
+        t["continuationContents"]? }
-            paid = false
+      .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
-            premium = false
+      .try &.["contents"].as_a
-
+        .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
-            premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+          .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
-
+            t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
-            i["badges"]?.try &.as_a.each do |badge|
+          .each { |item|
-              b = badge["metadataBadgeRenderer"]
+            extract_item(item, author_fallback, author_id_fallback)
-              case b["label"].as_s
+              .try { |t| items << t }
-              when "LIVE NOW"
+          } }
-                live_now = true
+    end
              when "New", "4K", "CC"
                # TODO
              when "Premium"
                paid = true
                # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
                premium = true
              else nil # Ignore
              end
            end
            items << SearchVideo.new({
              title:              title,
              id:                 video_id,
              author:             author,
              ucid:               author_id,
              published:          published,
              views:              view_count,
              description_html:   description_html,
              length_seconds:     length_seconds,
              live_now:           live_now,
              paid:               paid,
              premium:            premium,
              premiere_timestamp: premiere_timestamp,
            })
          elsif i = item["channelRenderer"]?
            author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
            author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
            author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
            subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
            auto_generated = false
            auto_generated = true if !i["videoCountText"]?
            video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
            description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
            items << SearchChannel.new({
              author:           author,
              ucid:             author_id,
              author_thumbnail: author_thumbnail,
              subscriber_count: subscriber_count,
              video_count:      video_count,
              description_html: description_html,
              auto_generated:   auto_generated,
            })
          elsif i = item["gridPlaylistRenderer"]?
            title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
            plid = i["playlistId"]?.try &.as_s || ""
            video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
            playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
            items << SearchPlaylist.new({
              title:       title,
              id:          plid,
              author:      author_fallback || "",
              ucid:        author_id_fallback || "",
              video_count: video_count,
              videos:      [] of SearchPlaylistVideo,
              thumbnail:   playlist_thumbnail,
            })
          elsif i = item["playlistRenderer"]?
            title = i["title"]["simpleText"]?.try &.as_s || ""
            plid = i["playlistId"]?.try &.as_s || ""
            video_count = i["videoCount"]?.try &.as_s.to_i || 0
            playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
            author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
            author = author_info.try &.["text"].as_s || author_fallback || ""
            author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
            videos = i["videos"]?.try &.as_a.map do |v|
              v = v["childVideoRenderer"]
              v_title = v["title"]["simpleText"]?.try &.as_s || ""
              v_id = v["videoId"]?.try &.as_s || ""
              v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
              SearchPlaylistVideo.new({
                title:          v_title,
                id:             v_id,
                length_seconds: v_length_seconds,
              })
            end || [] of SearchPlaylistVideo
            # TODO: i["publishedTimeText"]?
            items << SearchPlaylist.new({
              title:       title,
              id:          plid,
              author:      author,
              ucid:        author_id,
              video_count: video_count,
              videos:      videos,
              thumbnail:   playlist_thumbnail,
            })
          elsif i = item["radioRenderer"]? # Mix
            # TODO
          elsif i = item["showRenderer"]? # Show
            # TODO
          elsif i = item["shelfRenderer"]?
          elsif i = item["horizontalCardListRenderer"]?
          elsif i = item["searchPyvRenderer"]? # Ad
          end
        } }
  items
 end