From 1323b94b7a3a90a27a4353edddb7b9c103044e02 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 4 May 2021 01:48:51 -0700
Subject: [PATCH 01/22] Rewrite extract_item and extract_items functions

This commit completely rewrites the extract_item and extract_items
function. Before this commit these two function were an unreadable
mess. The extract_item function was a lengthy if-elsif chain
while the extract_items function contained an incomprehensible
mess of .try, else and ||.

With this commit both of these functions have been pulled into a
separate file with the internal logic being moved to a few classes.

This significantly reduces the size of these two methods, enhances
readability and makes adding new extraction/parse rules much simpler.

See diff for details.

--
This cherry-picked commit also removes the code for parsing featured
channels present on the original.

(cherry picked from commit a027fbf7af1f96dc26fe5a610525ae52bcc40c28)
---
 src/invidious/helpers/extractors.cr | 317 ++++++++++++++++++++++++++++
 src/invidious/helpers/helpers.cr    | 162 +-------------
 2 files changed, 320 insertions(+), 159 deletions(-)
 create mode 100644 src/invidious/helpers/extractors.cr

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
new file mode 100644
index 00000000..e8daa913
--- /dev/null
+++ b/src/invidious/helpers/extractors.cr
@@ -0,0 +1,317 @@
+# This file contains helper methods to parse the Youtube API json data into
+# neat little packages we can use
+
+# Tuple of Parsers/Extractors so we can easily cycle through them.
+private ITEM_CONTAINER_EXTRACTOR = {
+  YoutubeTabsExtractor.new,
+  SearchResultsExtractor.new,
+  ContinuationExtractor.new,
+}
+
+private ITEM_PARSERS = {
+  VideoParser.new,
+  ChannelParser.new,
+  GridPlaylistParser.new,
+  PlaylistParser.new,
+}
+
+private struct AuthorFallback
+  property name, id
+
+  def initialize(@name : String? = nil, @id : String? = nil)
+  end
+end
+
+# The following are the parsers for parsing raw item data into neatly packaged structs.
+# They're accessed through the process() method which validates the given data as applicable
+# to their specific struct and then use the internal parse() method to assemble the struct
+# specific to their category.
+private class ItemParser
+  # Base type for all item parsers.
+  def process(item : JSON::Any, author_fallback : AuthorFallback)
+  end
+
+  private def parse(item_contents : JSON::Any, author_fallback : AuthorFallback)
+  end
+end
+
+private class VideoParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  private def parse(item_contents, author_fallback)
+    video_id = item_contents["videoId"].as_s
+    title = item_contents["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
+
+    author_info = item_contents["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
+    author = author_info.try &.["text"].as_s || author_fallback.name || ""
+    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
+
+    published = item_contents["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+    view_count = item_contents["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+    description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+    length_seconds = item_contents["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
+                     item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
+                       .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+
+    live_now = false
+    paid = false
+    premium = false
+
+    premiere_timestamp = item_contents["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+
+    item_contents["badges"]?.try &.as_a.each do |badge|
+      b = badge["metadataBadgeRenderer"]
+      case b["label"].as_s
+      when "LIVE NOW"
+        live_now = true
+      when "New", "4K", "CC"
+        # TODO
+      when "Premium"
+        # TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"]
+        premium = true
+      else nil # Ignore
+      end
+    end
+
+    SearchVideo.new({
+      title:              title,
+      id:                 video_id,
+      author:             author,
+      ucid:               author_id,
+      published:          published,
+      views:              view_count,
+      description_html:   description_html,
+      length_seconds:     length_seconds,
+      live_now:           live_now,
+      premium:            premium,
+      premiere_timestamp: premiere_timestamp,
+    })
+  end
+end
+
+private class ChannelParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = item["channelRenderer"]?
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  private def parse(item_contents, author_fallback)
+    author = item_contents["title"]["simpleText"]?.try &.as_s || author_fallback.name || ""
+    author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id || ""
+
+    author_thumbnail = item_contents["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || ""
+    subscriber_count = item_contents["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+
+    auto_generated = false
+    auto_generated = true if !item_contents["videoCountText"]?
+    video_count = item_contents["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+    description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+
+    SearchChannel.new({
+      author:           author,
+      ucid:             author_id,
+      author_thumbnail: author_thumbnail,
+      subscriber_count: subscriber_count,
+      video_count:      video_count,
+      description_html: description_html,
+      auto_generated:   auto_generated,
+    })
+  end
+end
+
+private class GridPlaylistParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = item["gridPlaylistRenderer"]?
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  private def parse(item_contents, author_fallback)
+    title = item_contents["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+    plid = item_contents["playlistId"]?.try &.as_s || ""
+
+    video_count = item_contents["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+    playlist_thumbnail = item_contents["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+
+    SearchPlaylist.new({
+      title:       title,
+      id:          plid,
+      author:      author_fallback.name || "",
+      ucid:        author_fallback.id || "",
+      video_count: video_count,
+      videos:      [] of SearchPlaylistVideo,
+      thumbnail:   playlist_thumbnail,
+    })
+  end
+end
+
+private class PlaylistParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = item["playlistRenderer"]?
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  def parse(item_contents, author_fallback)
+    title = item_contents["title"]["simpleText"]?.try &.as_s || ""
+    plid = item_contents["playlistId"]?.try &.as_s || ""
+
+    video_count = item_contents["videoCount"]?.try &.as_s.to_i || 0
+    playlist_thumbnail = item_contents["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+
+    author_info = item_contents["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
+    author = author_info.try &.["text"].as_s || author_fallback.name || ""
+    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
+
+    videos = item_contents["videos"]?.try &.as_a.map do |v|
+      v = v["childVideoRenderer"]
+      v_title = v["title"]["simpleText"]?.try &.as_s || ""
+      v_id = v["videoId"]?.try &.as_s || ""
+      v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+      SearchPlaylistVideo.new({
+        title:          v_title,
+        id:             v_id,
+        length_seconds: v_length_seconds,
+      })
+    end || [] of SearchPlaylistVideo
+
+    # TODO: item_contents["publishedTimeText"]?
+
+    SearchPlaylist.new({
+      title:       title,
+      id:          plid,
+      author:      author,
+      ucid:        author_id,
+      video_count: video_count,
+      videos:      videos,
+      thumbnail:   playlist_thumbnail,
+    })
+  end
+end
+
+# The following are the extractors for extracting an array of items from
+# the internal Youtube API's JSON response. The result is then packaged into
+# a structure we can more easily use via the parsers above. Their internals are
+# identical to the item parsers.
+
+private class ItemsContainerExtractor
+  def process(item : Hash(String, JSON::Any))
+  end
+
+  private def extract(target : JSON::Any)
+  end
+end
+
+private class YoutubeTabsExtractor < ItemsContainerExtractor
+  def process(initial_data)
+    if target = initial_data["twoColumnBrowseResultsRenderer"]?
+      self.extract(target)
+    end
+  end
+
+  private def extract(target)
+    raw_items = [] of JSON::Any
+    selected_tab = extract_selected_tab(target["tabs"])
+    content = selected_tab["tabRenderer"]["content"]
+
+    content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
+      renderer_container = renderer_container["itemSectionRenderer"]
+      renderer_container_contents = renderer_container["contents"].as_a[0]
+
+      # Shelf renderer usually refer to a category and would need special handling once
+      # An extractor for categories are added. But for now it is just used to
+      # extract items for the trending page
+      if items_container = renderer_container_contents["shelfRenderer"]?
+        if items_container["content"]["expandedShelfContentsRenderer"]?
+          items_container = items_container["content"]["expandedShelfContentsRenderer"]
+        end
+      elsif items_container = renderer_container_contents["gridRenderer"]?
+      else
+        items_container = renderer_container_contents
+      end
+
+      items_container["items"].as_a.each do |item|
+        raw_items << item
+      end
+    end
+
+    return raw_items
+  end
+end
+
+private class SearchResultsExtractor < ItemsContainerExtractor
+  def process(initial_data)
+    if target = initial_data["twoColumnSearchResultsRenderer"]?
+      self.extract(target)
+    end
+  end
+
+  private def extract(target)
+    raw_items = [] of JSON::Any
+    content = target["primaryContents"]
+    renderer = content["sectionListRenderer"]["contents"].as_a[0]["itemSectionRenderer"]
+    raw_items = renderer["contents"].as_a
+
+    return raw_items
+  end
+end
+
+private class ContinuationExtractor < ItemsContainerExtractor
+  def process(initial_data)
+    if target = initial_data["continuationContents"]?
+      self.extract(target)
+    end
+  end
+
+  private def extract(target)
+    raw_items = [] of JSON::Any
+    if content = target["gridContinuation"]?
+      raw_items = content["items"].as_a
+    end
+
+    return raw_items
+  end
+end
+
+def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+  # Parses an item from Youtube's JSON response into a more usable structure.
+  # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
+  author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
+
+  # Cycles through all of the item parsers and attempt to parse the raw YT JSON data.
+  # Each parser automatically validates the data given to see if the data is
+  # applicable to itself. If not nil is returned and the next parser is attemped.
+  ITEM_PARSERS.each do |parser|
+    result = parser.process(item, author_fallback)
+    if !result.nil?
+      return result
+    end
+  end
+  # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
+end
+
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+  items = [] of SearchItem
+  initial_data = initial_data["contents"]?.try &.as_h || initial_data["response"]?.try &.as_h || initial_data
+
+  # This is identicial to the parser cyling of extract_item().
+  ITEM_CONTAINER_EXTRACTOR.each do |extractor|
+    results = extractor.process(initial_data)
+    if !results.nil?
+      results.each do |item|
+        parsed_result = extract_item(item, author_fallback, author_id_fallback)
+
+        if !parsed_result.nil?
+          items << parsed_result
+        end
+      end
+    end
+  end
+
+  return items
+end
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index fb33df1c..1a058195 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -251,165 +251,9 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
   extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end
 
-def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
-  if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
-    video_id = i["videoId"].as_s
-    title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
-
-    author_info = i["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-    author = author_info.try &.["text"].as_s || author_fallback || ""
-    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
-
-    published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
-    view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
-    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-    length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
-                     i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
-                       .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
-
-    live_now = false
-    premium = false
-
-    premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
-
-    i["badges"]?.try &.as_a.each do |badge|
-      b = badge["metadataBadgeRenderer"]
-      case b["label"].as_s
-      when "LIVE NOW"
-        live_now = true
-      when "New", "4K", "CC"
-        # TODO
-      when "Premium"
-        # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
-        premium = true
-      else nil # Ignore
-      end
-    end
-
-    SearchVideo.new({
-      title:              title,
-      id:                 video_id,
-      author:             author,
-      ucid:               author_id,
-      published:          published,
-      views:              view_count,
-      description_html:   description_html,
-      length_seconds:     length_seconds,
-      live_now:           live_now,
-      premium:            premium,
-      premiere_timestamp: premiere_timestamp,
-    })
-  elsif i = item["channelRenderer"]?
-    author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
-    author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
-
-    author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || ""
-    subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
-
-    auto_generated = false
-    auto_generated = true if !i["videoCountText"]?
-    video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
-    description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-
-    SearchChannel.new({
-      author:           author,
-      ucid:             author_id,
-      author_thumbnail: author_thumbnail,
-      subscriber_count: subscriber_count,
-      video_count:      video_count,
-      description_html: description_html,
-      auto_generated:   auto_generated,
-    })
-  elsif i = item["gridPlaylistRenderer"]?
-    title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
-    plid = i["playlistId"]?.try &.as_s || ""
-
-    video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
-    playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
-
-    SearchPlaylist.new({
-      title:       title,
-      id:          plid,
-      author:      author_fallback || "",
-      ucid:        author_id_fallback || "",
-      video_count: video_count,
-      videos:      [] of SearchPlaylistVideo,
-      thumbnail:   playlist_thumbnail,
-    })
-  elsif i = item["playlistRenderer"]?
-    title = i["title"]["simpleText"]?.try &.as_s || ""
-    plid = i["playlistId"]?.try &.as_s || ""
-
-    video_count = i["videoCount"]?.try &.as_s.to_i || 0
-    playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
-
-    author_info = i["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-    author = author_info.try &.["text"].as_s || author_fallback || ""
-    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
-
-    videos = i["videos"]?.try &.as_a.map do |v|
-      v = v["childVideoRenderer"]
-      v_title = v["title"]["simpleText"]?.try &.as_s || ""
-      v_id = v["videoId"]?.try &.as_s || ""
-      v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
-      SearchPlaylistVideo.new({
-        title:          v_title,
-        id:             v_id,
-        length_seconds: v_length_seconds,
-      })
-    end || [] of SearchPlaylistVideo
-
-    # TODO: i["publishedTimeText"]?
-
-    SearchPlaylist.new({
-      title:       title,
-      id:          plid,
-      author:      author,
-      ucid:        author_id,
-      video_count: video_count,
-      videos:      videos,
-      thumbnail:   playlist_thumbnail,
-    })
-  elsif i = item["radioRenderer"]? # Mix
-    # TODO
-  elsif i = item["showRenderer"]? # Show
-    # TODO
-  elsif i = item["shelfRenderer"]?
-  elsif i = item["horizontalCardListRenderer"]?
-  elsif i = item["searchPyvRenderer"]? # Ad
-  end
-end
-
-def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
-  items = [] of SearchItem
-
-  channel_v2_response = initial_data
-    .try &.["continuationContents"]?
-      .try &.["gridContinuation"]?
-        .try &.["items"]?
-
-  if channel_v2_response
-    channel_v2_response.try &.as_a.each { |item|
-      extract_item(item, author_fallback, author_id_fallback)
-        .try { |t| items << t }
-    }
-  else
-    initial_data.try { |t| t["contents"]? || t["response"]? }
-      .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
-        t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
-        t["continuationContents"]? }
-      .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
-      .try &.["contents"].as_a
-        .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
-          .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
-            t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
-          .each { |item|
-            extract_item(item, author_fallback, author_id_fallback)
-              .try { |t| items << t }
-          } }
-  end
-
-  items
+def extract_selected_tab(tabs)
+  # Extract the selected tab from the array of tabs Youtube returns
+  return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]
 end
 
 def check_enum(db, enum_name, struct_type = nil)

From a50f64f6e9ab55efa9301915817b11f152625f22 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Fri, 7 May 2021 05:13:53 -0700
Subject: [PATCH 02/22] Add parser for categories (shelfRenderer)

This commit adds a new parser for YT's shelfRenderers which are
typically used to denote different categories.The code for featured
channels parsing has also been moved to use the new parser but some
additional refactoring are needed there.

The ContinuationExtractor has also been improved and is now capable of
extraction continuation data that is packaged under
"appendContinuationItemsAction"

In additional this commit adds some useful helper functions to extract
the current selected tab the continuation token. This is to mainly
reduce code size and repetition.
--
This cherry-picked commit also removes the code for parsing featured
channels present on the original.

(cherry picked from commit 8000d538dbbf1eb9c78e000b1449926ba3b24da9)
---
 src/invidious/helpers/extractors.cr     | 117 +++++++++--
 src/invidious/helpers/helpers.cr        |  29 ++-
 src/invidious/helpers/invidiousitems.cr | 256 ++++++++++++++++++++++++
 src/invidious/search.cr                 | 230 ---------------------
 src/invidious/views/components/item.ecr |   1 +
 5 files changed, 389 insertions(+), 244 deletions(-)
 create mode 100644 src/invidious/helpers/invidiousitems.cr

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index e8daa913..1fa06c91 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -13,6 +13,7 @@ private ITEM_PARSERS = {
   ChannelParser.new,
   GridPlaylistParser.new,
   PlaylistParser.new,
+  CategoryParser.new,
 }
 
 private struct AuthorFallback
@@ -95,7 +96,7 @@ end
 
 private class ChannelParser < ItemParser
   def process(item, author_fallback)
-    if item_contents = item["channelRenderer"]?
+    if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
       return self.parse(item_contents, author_fallback)
     end
   end
@@ -194,6 +195,88 @@ private class PlaylistParser < ItemParser
   end
 end
 
+private class CategoryParser < ItemParser
+  def process(item, author_fallback)
+    if item_contents = item["shelfRenderer"]?
+      return self.parse(item_contents, author_fallback)
+    end
+  end
+
+  def parse(item_contents, author_fallback)
+    # Title extraction is a bit complicated. There are two possible routes for it
+    # as well as times when the title attribute just isn't sent by YT.
+
+    title_container = item_contents["title"]? || ""
+    if !title_container.is_a? String
+      if title = title_container["simpleText"]?
+        title = title.as_s
+      else
+        title = title_container["runs"][0]["text"].as_s
+      end
+    else
+      title = ""
+    end
+
+    browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil
+    browse_endpoint_data = ""
+    category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending
+
+    # There's no endpoint data for video and trending category
+    if !item_contents["endpoint"]?
+      if !item_contents["videoId"]?
+        category_type = 3
+      end
+    end
+
+    if !browse_endpoint.nil?
+      # Playlist/feed categories doesn't need the params value (nor is it even included in yt response)
+      # instead it uses the browseId parameter. So if there isn't a params value we can assume the
+      # category is a playlist/feed
+      if browse_endpoint["params"]?
+        browse_endpoint_data = browse_endpoint["params"].as_s
+        category_type = 1
+      else
+        browse_endpoint_data = browse_endpoint["browseId"].as_s
+        category_type = 2
+      end
+    end
+
+    # Sometimes a category can have badges.
+    badges = [] of Tuple(String, String) # (Badge style, label)
+    item_contents["badges"]?.try &.as_a.each do |badge|
+      badge = badge["metadataBadgeRenderer"]
+      badges << {badge["style"].as_s, badge["label"].as_s}
+    end
+
+    # Content parsing
+    contents = [] of SearchItem
+
+    # Content could be in three locations.
+    if content_container = item_contents["content"]["horizontalListRenderer"]?
+    elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]
+    elsif content_container = item_contents["content"]["verticalListRenderer"]
+    else
+      content_container = item_contents["contents"]
+    end
+
+    raw_contents = content_container["items"].as_a
+    raw_contents.each do |item|
+      result = extract_item(item)
+      if !result.nil?
+        contents << result
+      end
+    end
+
+    Category.new({
+      title:                title,
+      contents:             contents,
+      browse_endpoint_data: browse_endpoint_data,
+      continuation_token:   nil,
+      badges:               badges,
+    })
+  end
+end
+
 # The following are the extractors for extracting an array of items from
 # the internal Youtube API's JSON response. The result is then packaged into
 # a structure we can more easily use via the parsers above. Their internals are
@@ -217,19 +300,16 @@ private class YoutubeTabsExtractor < ItemsContainerExtractor
   private def extract(target)
     raw_items = [] of JSON::Any
     selected_tab = extract_selected_tab(target["tabs"])
-    content = selected_tab["tabRenderer"]["content"]
+    content = selected_tab["content"]
 
     content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
       renderer_container = renderer_container["itemSectionRenderer"]
       renderer_container_contents = renderer_container["contents"].as_a[0]
 
-      # Shelf renderer usually refer to a category and would need special handling once
-      # An extractor for categories are added. But for now it is just used to
-      # extract items for the trending page
+      # Category extraction
       if items_container = renderer_container_contents["shelfRenderer"]?
-        if items_container["content"]["expandedShelfContentsRenderer"]?
-          items_container = items_container["content"]["expandedShelfContentsRenderer"]
-        end
+        raw_items << renderer_container_contents
+        next
       elsif items_container = renderer_container_contents["gridRenderer"]?
       else
         items_container = renderer_container_contents
@@ -265,6 +345,8 @@ private class ContinuationExtractor < ItemsContainerExtractor
   def process(initial_data)
     if target = initial_data["continuationContents"]?
       self.extract(target)
+    elsif target = initial_data["appendContinuationItemsAction"]?
+      self.extract(target)
     end
   end
 
@@ -272,13 +354,16 @@ private class ContinuationExtractor < ItemsContainerExtractor
     raw_items = [] of JSON::Any
     if content = target["gridContinuation"]?
       raw_items = content["items"].as_a
+    elsif content = target["continuationItems"]?
+      raw_items = content.as_a
     end
 
     return raw_items
   end
 end
 
-def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+def extract_item(item : JSON::Any, author_fallback : String? = nil,
+                 author_id_fallback : String? = nil)
   # Parses an item from Youtube's JSON response into a more usable structure.
   # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
   author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
@@ -295,13 +380,20 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fa
   # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
 end
 
-def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
+                  author_id_fallback : String? = nil)
   items = [] of SearchItem
-  initial_data = initial_data["contents"]?.try &.as_h || initial_data["response"]?.try &.as_h || initial_data
+
+  if unpackaged_data = initial_data["contents"]?.try &.as_h
+  elsif unpackaged_data = initial_data["response"]?.try &.as_h
+  elsif unpackaged_data = initial_data["onResponseReceivedActions"]?.try &.as_a.[0].as_h
+  else
+    unpackaged_data = initial_data
+  end
 
   # This is identicial to the parser cyling of extract_item().
   ITEM_CONTAINER_EXTRACTOR.each do |extractor|
-    results = extractor.process(initial_data)
+    results = extractor.process(unpackaged_data)
     if !results.nil?
       results.each do |item|
         parsed_result = extract_item(item, author_fallback, author_id_fallback)
@@ -310,6 +402,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
           items << parsed_result
         end
       end
+      return items
     end
   end
 
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index 1a058195..a52c7bd4 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -248,12 +248,37 @@ def html_to_content(description_html : String)
 end
 
 def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
-  extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
+  extracted = extract_items(initial_data, author_fallback, author_id_fallback)
+
+  if extracted.is_a?(Category)
+    target = extracted.contents
+  else
+    target = extracted
+  end
+  return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end
 
 def extract_selected_tab(tabs)
   # Extract the selected tab from the array of tabs Youtube returns
-  return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]
+  return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"]
+end
+
+def fetch_continuation_token(items : Array(JSON::Any))
+  # Fetches the continuation token from an array of items
+  return items.last["continuationItemRenderer"]?
+    .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
+end
+
+def fetch_continuation_token(initial_data : Hash(String, JSON::Any))
+  # Fetches the continuation token from initial data
+  if initial_data["onResponseReceivedActions"]?
+    continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
+  else
+    tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
+    continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"]
+  end
+
+  return fetch_continuation_token(continuation_items.as_a)
 end
 
 def check_enum(db, enum_name, struct_type = nil)
diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/invidiousitems.cr
new file mode 100644
index 00000000..50a47726
--- /dev/null
+++ b/src/invidious/helpers/invidiousitems.cr
@@ -0,0 +1,256 @@
+struct SearchVideo
+  include DB::Serializable
+
+  property title : String
+  property id : String
+  property author : String
+  property ucid : String
+  property published : Time
+  property views : Int64
+  property description_html : String
+  property length_seconds : Int32
+  property live_now : Bool
+  property premium : Bool
+  property premiere_timestamp : Time?
+
+  def to_xml(auto_generated, query_params, xml : XML::Builder)
+    query_params["v"] = self.id
+
+    xml.element("entry") do
+      xml.element("id") { xml.text "yt:video:#{self.id}" }
+      xml.element("yt:videoId") { xml.text self.id }
+      xml.element("yt:channelId") { xml.text self.ucid }
+      xml.element("title") { xml.text self.title }
+      xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
+
+      xml.element("author") do
+        if auto_generated
+          xml.element("name") { xml.text self.author }
+          xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
+        else
+          xml.element("name") { xml.text author }
+          xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
+        end
+      end
+
+      xml.element("content", type: "xhtml") do
+        xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
+          xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
+            xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
+          end
+
+          xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) }
+        end
+      end
+
+      xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
+
+      xml.element("media:group") do
+        xml.element("media:title") { xml.text self.title }
+        xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
+          width: "320", height: "180")
+        xml.element("media:description") { xml.text html_to_content(self.description_html) }
+      end
+
+      xml.element("media:community") do
+        xml.element("media:statistics", views: self.views)
+      end
+    end
+  end
+
+  def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil)
+    if xml
+      to_xml(HOST_URL, auto_generated, query_params, xml)
+    else
+      XML.build do |json|
+        to_xml(HOST_URL, auto_generated, query_params, xml)
+      end
+    end
+  end
+
+  def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder)
+    json.object do
+      json.field "type", "video"
+      json.field "title", self.title
+      json.field "videoId", self.id
+
+      json.field "author", self.author
+      json.field "authorId", self.ucid
+      json.field "authorUrl", "/channel/#{self.ucid}"
+
+      json.field "videoThumbnails" do
+        generate_thumbnails(json, self.id)
+      end
+
+      json.field "description", html_to_content(self.description_html)
+      json.field "descriptionHtml", self.description_html
+
+      json.field "viewCount", self.views
+      json.field "published", self.published.to_unix
+      json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
+      json.field "lengthSeconds", self.length_seconds
+      json.field "liveNow", self.live_now
+      json.field "premium", self.premium
+      json.field "isUpcoming", self.is_upcoming
+
+      if self.premiere_timestamp
+        json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
+      end
+    end
+  end
+
+  def to_json(locale, json : JSON::Builder | Nil = nil)
+    if json
+      to_json(locale, json)
+    else
+      JSON.build do |json|
+        to_json(locale, json)
+      end
+    end
+  end
+
+  def is_upcoming
+    premiere_timestamp ? true : false
+  end
+end
+
+struct SearchPlaylistVideo
+  include DB::Serializable
+
+  property title : String
+  property id : String
+  property length_seconds : Int32
+end
+
+struct SearchPlaylist
+  include DB::Serializable
+
+  property title : String
+  property id : String
+  property author : String
+  property ucid : String
+  property video_count : Int32
+  property videos : Array(SearchPlaylistVideo)
+  property thumbnail : String?
+
+  def to_json(locale, json : JSON::Builder)
+    json.object do
+      json.field "type", "playlist"
+      json.field "title", self.title
+      json.field "playlistId", self.id
+      json.field "playlistThumbnail", self.thumbnail
+
+      json.field "author", self.author
+      json.field "authorId", self.ucid
+      json.field "authorUrl", "/channel/#{self.ucid}"
+
+      json.field "videoCount", self.video_count
+      json.field "videos" do
+        json.array do
+          self.videos.each do |video|
+            json.object do
+              json.field "title", video.title
+              json.field "videoId", video.id
+              json.field "lengthSeconds", video.length_seconds
+
+              json.field "videoThumbnails" do
+                generate_thumbnails(json, video.id)
+              end
+            end
+          end
+        end
+      end
+    end
+  end
+
+  def to_json(locale, json : JSON::Builder | Nil = nil)
+    if json
+      to_json(locale, json)
+    else
+      JSON.build do |json|
+        to_json(locale, json)
+      end
+    end
+  end
+end
+
+struct SearchChannel
+  include DB::Serializable
+
+  property author : String
+  property ucid : String
+  property author_thumbnail : String
+  property subscriber_count : Int32
+  property video_count : Int32
+  property description_html : String
+  property auto_generated : Bool
+
+  def to_json(locale, json : JSON::Builder)
+    json.object do
+      json.field "type", "channel"
+      json.field "author", self.author
+      json.field "authorId", self.ucid
+      json.field "authorUrl", "/channel/#{self.ucid}"
+
+      json.field "authorThumbnails" do
+        json.array do
+          qualities = {32, 48, 76, 100, 176, 512}
+
+          qualities.each do |quality|
+            json.object do
+              json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
+              json.field "width", quality
+              json.field "height", quality
+            end
+          end
+        end
+      end
+
+      json.field "autoGenerated", self.auto_generated
+      json.field "subCount", self.subscriber_count
+      json.field "videoCount", self.video_count
+
+      json.field "description", html_to_content(self.description_html)
+      json.field "descriptionHtml", self.description_html
+    end
+  end
+
+  def to_json(locale, json : JSON::Builder | Nil = nil)
+    if json
+      to_json(locale, json)
+    else
+      JSON.build do |json|
+        to_json(locale, json)
+      end
+    end
+  end
+end
+
+class Category
+  include DB::Serializable
+
+  property title : String
+  property contents : Array(SearchItem) | SearchItem
+  property browse_endpoint_data : String?
+  property continuation_token : String?
+  property badges : Array(Tuple(String, String))?
+
+  def to_json(locale, json : JSON::Builder)
+    json.object do
+      json.field "title", self.title
+      json.field "contents", self.contents
+    end
+  end
+
+  def to_json(locale, json : JSON::Builder | Nil = nil)
+    if json
+      to_json(locale, json)
+    else
+      JSON.build do |json|
+        to_json(locale, json)
+      end
+    end
+  end
+end
+
+alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index a3fcc7a3..eb9c37c5 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -1,233 +1,3 @@
-struct SearchVideo
-  include DB::Serializable
-
-  property title : String
-  property id : String
-  property author : String
-  property ucid : String
-  property published : Time
-  property views : Int64
-  property description_html : String
-  property length_seconds : Int32
-  property live_now : Bool
-  property premium : Bool
-  property premiere_timestamp : Time?
-
-  def to_xml(auto_generated, query_params, xml : XML::Builder)
-    query_params["v"] = self.id
-
-    xml.element("entry") do
-      xml.element("id") { xml.text "yt:video:#{self.id}" }
-      xml.element("yt:videoId") { xml.text self.id }
-      xml.element("yt:channelId") { xml.text self.ucid }
-      xml.element("title") { xml.text self.title }
-      xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}")
-
-      xml.element("author") do
-        if auto_generated
-          xml.element("name") { xml.text self.author }
-          xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" }
-        else
-          xml.element("name") { xml.text author }
-          xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" }
-        end
-      end
-
-      xml.element("content", type: "xhtml") do
-        xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do
-          xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do
-            xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg")
-          end
-
-          xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) }
-        end
-      end
-
-      xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") }
-
-      xml.element("media:group") do
-        xml.element("media:title") { xml.text self.title }
-        xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg",
-          width: "320", height: "180")
-        xml.element("media:description") { xml.text html_to_content(self.description_html) }
-      end
-
-      xml.element("media:community") do
-        xml.element("media:statistics", views: self.views)
-      end
-    end
-  end
-
-  def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil)
-    if xml
-      to_xml(HOST_URL, auto_generated, query_params, xml)
-    else
-      XML.build do |json|
-        to_xml(HOST_URL, auto_generated, query_params, xml)
-      end
-    end
-  end
-
-  def to_json(locale, json : JSON::Builder)
-    json.object do
-      json.field "type", "video"
-      json.field "title", self.title
-      json.field "videoId", self.id
-
-      json.field "author", self.author
-      json.field "authorId", self.ucid
-      json.field "authorUrl", "/channel/#{self.ucid}"
-
-      json.field "videoThumbnails" do
-        generate_thumbnails(json, self.id)
-      end
-
-      json.field "description", html_to_content(self.description_html)
-      json.field "descriptionHtml", self.description_html
-
-      json.field "viewCount", self.views
-      json.field "published", self.published.to_unix
-      json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale))
-      json.field "lengthSeconds", self.length_seconds
-      json.field "liveNow", self.live_now
-      json.field "premium", self.premium
-      json.field "isUpcoming", self.is_upcoming
-
-      if self.premiere_timestamp
-        json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix
-      end
-    end
-  end
-
-  def to_json(locale, json : JSON::Builder | Nil = nil)
-    if json
-      to_json(locale, json)
-    else
-      JSON.build do |json|
-        to_json(locale, json)
-      end
-    end
-  end
-
-  def is_upcoming
-    premiere_timestamp ? true : false
-  end
-end
-
-struct SearchPlaylistVideo
-  include DB::Serializable
-
-  property title : String
-  property id : String
-  property length_seconds : Int32
-end
-
-struct SearchPlaylist
-  include DB::Serializable
-
-  property title : String
-  property id : String
-  property author : String
-  property ucid : String
-  property video_count : Int32
-  property videos : Array(SearchPlaylistVideo)
-  property thumbnail : String?
-
-  def to_json(locale, json : JSON::Builder)
-    json.object do
-      json.field "type", "playlist"
-      json.field "title", self.title
-      json.field "playlistId", self.id
-      json.field "playlistThumbnail", self.thumbnail
-
-      json.field "author", self.author
-      json.field "authorId", self.ucid
-      json.field "authorUrl", "/channel/#{self.ucid}"
-
-      json.field "videoCount", self.video_count
-      json.field "videos" do
-        json.array do
-          self.videos.each do |video|
-            json.object do
-              json.field "title", video.title
-              json.field "videoId", video.id
-              json.field "lengthSeconds", video.length_seconds
-
-              json.field "videoThumbnails" do
-                generate_thumbnails(json, video.id)
-              end
-            end
-          end
-        end
-      end
-    end
-  end
-
-  def to_json(locale, json : JSON::Builder | Nil = nil)
-    if json
-      to_json(locale, json)
-    else
-      JSON.build do |json|
-        to_json(locale, json)
-      end
-    end
-  end
-end
-
-struct SearchChannel
-  include DB::Serializable
-
-  property author : String
-  property ucid : String
-  property author_thumbnail : String
-  property subscriber_count : Int32
-  property video_count : Int32
-  property description_html : String
-  property auto_generated : Bool
-
-  def to_json(locale, json : JSON::Builder)
-    json.object do
-      json.field "type", "channel"
-      json.field "author", self.author
-      json.field "authorId", self.ucid
-      json.field "authorUrl", "/channel/#{self.ucid}"
-
-      json.field "authorThumbnails" do
-        json.array do
-          qualities = {32, 48, 76, 100, 176, 512}
-
-          qualities.each do |quality|
-            json.object do
-              json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}")
-              json.field "width", quality
-              json.field "height", quality
-            end
-          end
-        end
-      end
-
-      json.field "autoGenerated", self.auto_generated
-      json.field "subCount", self.subscriber_count
-      json.field "videoCount", self.video_count
-
-      json.field "description", html_to_content(self.description_html)
-      json.field "descriptionHtml", self.description_html
-    end
-  end
-
-  def to_json(locale, json : JSON::Builder | Nil = nil)
-    if json
-      to_json(locale, json)
-    else
-      JSON.build do |json|
-        to_json(locale, json)
-      end
-    end
-  end
-end
-
-alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
-
 def channel_search(query, page, channel)
   response = YT_POOL.client &.get("/channel/#{channel}")
 
diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr
index 68aa1812..ec282216 100644
--- a/src/invidious/views/components/item.ecr
+++ b/src/invidious/views/components/item.ecr
@@ -96,6 +96,7 @@
                 </div>
                 <% end %>
             </div>
+        <% when Category %>
         <% else %>
             <a style="width:100%" href="/watch?v=<%= item.id %>">
                 <% if !env.get("preferences").as(Preferences).thin_mode %>

From ae30f32c36c738b85dc114a4bb4edaa95257a3c2 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Sat, 8 May 2021 03:43:26 -0700
Subject: [PATCH 03/22] Unpack search items that are embedded in categories

This is a squash of a bunch of commits
cherry-picked commits

Fix category parse error on search

(cherry picked from commit cc02fed4e69f0eb5f19e017173632b3a3f20519f)

Fix category items not being extracted in search

(cherry picked from commit 2605b9c609ff217b5a6ae09d22450596dcad90fc)

Make search not include category items for now

(cherry picked from commit ca4afd59f46b595e3c339f31432cad98a5771ee1)

Change behavior of categories in search results

(cherry picked from commit cc1067561051b1c113b490e79c4a71cd346f7b3f)

Fix missing search results in extraction

(cherry picked from commit abda6840d5bfe58f845128bdd1a3f4916dd3bb84)

Fix miscount of search results

(cherry picked from commit 491e33450eb1300d0234bb33df0d0e78a027114f)
---
 src/invidious/helpers/extractors.cr | 15 ++++++++++-----
 src/invidious/search.cr             | 17 ++++++++++++++++-
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 1fa06c91..ea9411d7 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -253,8 +253,8 @@ private class CategoryParser < ItemParser
 
     # Content could be in three locations.
     if content_container = item_contents["content"]["horizontalListRenderer"]?
-    elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]
-    elsif content_container = item_contents["content"]["verticalListRenderer"]
+    elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]?
+    elsif content_container = item_contents["content"]["verticalListRenderer"]?
     else
       content_container = item_contents["contents"]
     end
@@ -332,10 +332,15 @@ private class SearchResultsExtractor < ItemsContainerExtractor
   end
 
   private def extract(target)
-    raw_items = [] of JSON::Any
+    raw_items = [] of Array(JSON::Any)
     content = target["primaryContents"]
-    renderer = content["sectionListRenderer"]["contents"].as_a[0]["itemSectionRenderer"]
-    raw_items = renderer["contents"].as_a
+    renderer = content["sectionListRenderer"]["contents"].as_a.each do |node|
+      if node = node["itemSectionRenderer"]?
+        raw_items << node["contents"].as_a
+      end
+    end
+
+    raw_items = raw_items.flatten
 
     return raw_items
   end
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index eb9c37c5..3873b2dd 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -232,5 +232,20 @@ def process_search_query(query, page, user, region)
     count, items = search(search_query, search_params, region).as(Tuple)
   end
 
-  {search_query, count, items, operators}
+  # Light processing to flatten search results out of Categories.
+  # They should ideally be supported in the future.
+  items_without_cate_items = [] of SearchItem | ChannelVideo
+  items.each do |i|
+    if i.is_a? Category
+      i.contents.each do |nest_i|
+        if !nest_i.is_a? Video
+          items_without_cate_items << nest_i
+        end
+      end
+    else
+      items_without_cate_items << i
+    end
+  end
+
+  {search_query, items_without_cate_items.size, items_without_cate_items, url_params}
 end

From 57c63f3598867ce406b807923ea81352f9b1b384 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Mon, 28 Jun 2021 22:51:28 -0700
Subject: [PATCH 04/22] Rename "items_without_cate_items" to reflect usage

---
 src/invidious/search.cr | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index 3873b2dd..adf079f3 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -234,18 +234,18 @@ def process_search_query(query, page, user, region)
 
   # Light processing to flatten search results out of Categories.
   # They should ideally be supported in the future.
-  items_without_cate_items = [] of SearchItem | ChannelVideo
+  items_without_category = [] of SearchItem | ChannelVideo
   items.each do |i|
     if i.is_a? Category
       i.contents.each do |nest_i|
         if !nest_i.is_a? Video
-          items_without_cate_items << nest_i
+          items_without_category << nest_i
         end
       end
     else
-      items_without_cate_items << i
+      items_without_category << i
     end
   end
 
-  {search_query, items_without_cate_items.size, items_without_cate_items, url_params}
+  {search_query, items_without_category.size, items_without_category, url_params}
 end

From 0b7a108a59b2f1def6aea5b611f68b29abf59064 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Sat, 8 May 2021 04:54:12 -0700
Subject: [PATCH 05/22] Move continuation_token out of Category struct

(cherry picked from commit 0e96eda28f25171a0344b972af1852a4d6fc3007)
---
 src/invidious/helpers/extractors.cr     | 11 +++++++++--
 src/invidious/helpers/invidiousitems.cr |  1 -
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index ea9411d7..cd3b1f93 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -217,6 +217,7 @@ private class CategoryParser < ItemParser
       title = ""
     end
 
+    auxiliary_data = {} of String => String
     browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil
     browse_endpoint_data = ""
     category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending
@@ -233,7 +234,14 @@ private class CategoryParser < ItemParser
       # instead it uses the browseId parameter. So if there isn't a params value we can assume the
       # category is a playlist/feed
       if browse_endpoint["params"]?
-        browse_endpoint_data = browse_endpoint["params"].as_s
+        # However, even though the channel category type returns the browse endpoint param
+        # we're not going to be using it in order to preserve compatablity with Youtube.
+        # and for an URL that looks cleaner
+        url = item_contents["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
+        url = URI.parse(url.as_s)
+        auxiliary_data["view"] = url.query_params["view"]
+        auxiliary_data["shelf_id"] = url.query_params["shelf_id"]
+
         category_type = 1
       else
         browse_endpoint_data = browse_endpoint["browseId"].as_s
@@ -271,7 +279,6 @@ private class CategoryParser < ItemParser
       title:                title,
       contents:             contents,
       browse_endpoint_data: browse_endpoint_data,
-      continuation_token:   nil,
       badges:               badges,
     })
   end
diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/invidiousitems.cr
index 50a47726..edcb2054 100644
--- a/src/invidious/helpers/invidiousitems.cr
+++ b/src/invidious/helpers/invidiousitems.cr
@@ -232,7 +232,6 @@ class Category
   property title : String
   property contents : Array(SearchItem) | SearchItem
   property browse_endpoint_data : String?
-  property continuation_token : String?
   property badges : Array(Tuple(String, String))?
 
   def to_json(locale, json : JSON::Builder)

From ea6434662daf97e8710fe4d2a4943112994ce760 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Sat, 8 May 2021 06:01:17 -0700
Subject: [PATCH 06/22] Change typing of Category contents to only Array

(cherry picked from commit d3384e17f10d0baca70db7993df14100485be9da)
---
 src/invidious/helpers/invidiousitems.cr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/invidiousitems.cr
index edcb2054..65f755e6 100644
--- a/src/invidious/helpers/invidiousitems.cr
+++ b/src/invidious/helpers/invidiousitems.cr
@@ -230,7 +230,7 @@ class Category
   include DB::Serializable
 
   property title : String
-  property contents : Array(SearchItem) | SearchItem
+  property contents : Array(SearchItem)
   property browse_endpoint_data : String?
   property badges : Array(Tuple(String, String))?
 

From 7b60dac526c5df118c39bf428c0778a7a7982c98 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Sat, 8 May 2021 20:07:07 -0700
Subject: [PATCH 07/22] Add description_html field to Category

(cherry picked from commit aa8f15f795787113e56473f8e8fd606749a14bdd)
---
 src/invidious/helpers/extractors.cr     | 4 ++++
 src/invidious/helpers/invidiousitems.cr | 1 +
 2 files changed, 5 insertions(+)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index cd3b1f93..48885d48 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -256,6 +256,9 @@ private class CategoryParser < ItemParser
       badges << {badge["style"].as_s, badge["label"].as_s}
     end
 
+    # Category description
+    description_html = item_contents["subtitle"]?.try { |desc| parse_content(desc) } || ""
+
     # Content parsing
     contents = [] of SearchItem
 
@@ -278,6 +281,7 @@ private class CategoryParser < ItemParser
     Category.new({
       title:                title,
       contents:             contents,
+      description_html:     description_html,
       browse_endpoint_data: browse_endpoint_data,
       badges:               badges,
     })
diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/invidiousitems.cr
index 65f755e6..2db838ea 100644
--- a/src/invidious/helpers/invidiousitems.cr
+++ b/src/invidious/helpers/invidiousitems.cr
@@ -232,6 +232,7 @@ class Category
   property title : String
   property contents : Array(SearchItem)
   property browse_endpoint_data : String?
+  property description_html : String
   property badges : Array(Tuple(String, String))?
 
   def to_json(locale, json : JSON::Builder)

From abca8f7a7ca043035459abce35d334013a71e957 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Mon, 24 May 2021 11:18:22 -0700
Subject: [PATCH 08/22] Rename invidiousitems.cr

---
 .../helpers/{invidiousitems.cr => serialized_yt_data.cr}          | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/invidious/helpers/{invidiousitems.cr => serialized_yt_data.cr} (100%)

diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/serialized_yt_data.cr
similarity index 100%
rename from src/invidious/helpers/invidiousitems.cr
rename to src/invidious/helpers/serialized_yt_data.cr

From be1a43a3377c543b84fd9bd534fd2033b7223e62 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Mon, 28 Jun 2021 23:11:04 -0700
Subject: [PATCH 09/22] Manually extract category refactor from
 1b569bbc99207cae7c20aa285f42477ae361dd30

Also fixes some errors caused by cherry-picking
---
 spec/helpers_spec.cr                        |  1 +
 src/invidious/helpers/extractors.cr         | 43 +++------------------
 src/invidious/helpers/serialized_yt_data.cr |  4 +-
 src/invidious/search.cr                     |  2 +-
 src/invidious/videos.cr                     |  2 +-
 5 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/spec/helpers_spec.cr b/spec/helpers_spec.cr
index ada5b28f..b17c8d73 100644
--- a/spec/helpers_spec.cr
+++ b/spec/helpers_spec.cr
@@ -6,6 +6,7 @@ require "spec"
 require "yaml"
 require "../src/invidious/helpers/*"
 require "../src/invidious/channels/*"
+require "../src/invidious/videos"
 require "../src/invidious/comments"
 require "../src/invidious/playlists"
 require "../src/invidious/search"
diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 48885d48..c1f7205c 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -205,7 +205,6 @@ private class CategoryParser < ItemParser
   def parse(item_contents, author_fallback)
     # Title extraction is a bit complicated. There are two possible routes for it
     # as well as times when the title attribute just isn't sent by YT.
-
     title_container = item_contents["title"]? || ""
     if !title_container.is_a? String
       if title = title_container["simpleText"]?
@@ -217,37 +216,7 @@ private class CategoryParser < ItemParser
       title = ""
     end
 
-    auxiliary_data = {} of String => String
-    browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil
-    browse_endpoint_data = ""
-    category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending
-
-    # There's no endpoint data for video and trending category
-    if !item_contents["endpoint"]?
-      if !item_contents["videoId"]?
-        category_type = 3
-      end
-    end
-
-    if !browse_endpoint.nil?
-      # Playlist/feed categories doesn't need the params value (nor is it even included in yt response)
-      # instead it uses the browseId parameter. So if there isn't a params value we can assume the
-      # category is a playlist/feed
-      if browse_endpoint["params"]?
-        # However, even though the channel category type returns the browse endpoint param
-        # we're not going to be using it in order to preserve compatablity with Youtube.
-        # and for an URL that looks cleaner
-        url = item_contents["endpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
-        url = URI.parse(url.as_s)
-        auxiliary_data["view"] = url.query_params["view"]
-        auxiliary_data["shelf_id"] = url.query_params["shelf_id"]
-
-        category_type = 1
-      else
-        browse_endpoint_data = browse_endpoint["browseId"].as_s
-        category_type = 2
-      end
-    end
+    url = item_contents["endpoint"]?.try &.["commandMetadata"]["webCommandMetadata"]["url"].as_s
 
     # Sometimes a category can have badges.
     badges = [] of Tuple(String, String) # (Badge style, label)
@@ -279,11 +248,11 @@ private class CategoryParser < ItemParser
     end
 
     Category.new({
-      title:                title,
-      contents:             contents,
-      description_html:     description_html,
-      browse_endpoint_data: browse_endpoint_data,
-      badges:               badges,
+      title:            title,
+      contents:         contents,
+      description_html: description_html,
+      url:              url,
+      badges:           badges,
     })
   end
 end
diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr
index 2db838ea..61356555 100644
--- a/src/invidious/helpers/serialized_yt_data.cr
+++ b/src/invidious/helpers/serialized_yt_data.cr
@@ -230,8 +230,8 @@ class Category
   include DB::Serializable
 
   property title : String
-  property contents : Array(SearchItem)
-  property browse_endpoint_data : String?
+  property contents : Array(SearchItem) | Array(Video)
+  property url : String?
   property description_html : String
   property badges : Array(Tuple(String, String))?
 
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index adf079f3..d95d802e 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -247,5 +247,5 @@ def process_search_query(query, page, user, region)
     end
   end
 
-  {search_query, items_without_category.size, items_without_category, url_params}
+  {search_query, items_without_category.size, items_without_category, operators}
 end
diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr
index d9c07142..0e6bd77c 100644
--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@@ -275,7 +275,7 @@ struct Video
     end
   end
 
-  def to_json(locale, json : JSON::Builder)
+  def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder)
     json.object do
       json.field "type", "video"
 

From 30e85b40f9b817c8620ef9536ad2d327da9ba83b Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Mon, 28 Jun 2021 23:51:04 -0700
Subject: [PATCH 10/22] Fix extract_videos

---
 src/invidious/helpers/helpers.cr | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index a52c7bd4..99adcd30 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -250,10 +250,13 @@ end
 def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
   extracted = extract_items(initial_data, author_fallback, author_id_fallback)
 
-  if extracted.is_a?(Category)
-    target = extracted.contents
-  else
-    target = extracted
+  target = [] of SearchItem
+  extracted.each do |i|
+    if i.is_a?(Category)
+      i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video }
+    else
+      target << i
+    end
   end
   return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
 end

From 8435e7991337edcb007b82c148a372a0a678b5c1 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 29 Jun 2021 09:23:48 -0700
Subject: [PATCH 11/22] Improve documentation for extract_item(s) funcs

---
 src/invidious/helpers/extractors.cr | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index c1f7205c..e8226888 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -347,10 +347,10 @@ private class ContinuationExtractor < ItemsContainerExtractor
   end
 end
 
+# Parses an item from Youtube's JSON response into a more usable structure.
+# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
 def extract_item(item : JSON::Any, author_fallback : String? = nil,
                  author_id_fallback : String? = nil)
-  # Parses an item from Youtube's JSON response into a more usable structure.
-  # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
   author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
 
   # Cycles through all of the item parsers and attempt to parse the raw YT JSON data.
@@ -365,8 +365,10 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil,
   # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
 end
 
+# Parses multiple items from Youtube's initial JSON response into a more usable structure.
+# The end result is an array of SearchItem.
 def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
-                  author_id_fallback : String? = nil)
+                  author_id_fallback : String? = nil) : Array(SearchItem)
   items = [] of SearchItem
 
   if unpackaged_data = initial_data["contents"]?.try &.as_h

From 3dea670091b0fc4a20d623c928292f7bd94892d8 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Mon, 19 Jul 2021 21:30:41 -0700
Subject: [PATCH 12/22] Switch to structs in extractors.cr for performance

---
 src/invidious/helpers/extractors.cr | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index e8226888..68e84850 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -27,7 +27,7 @@ end
 # They're accessed through the process() method which validates the given data as applicable
 # to their specific struct and then use the internal parse() method to assemble the struct
 # specific to their category.
-private class ItemParser
+private abstract struct ItemParser
   # Base type for all item parsers.
   def process(item : JSON::Any, author_fallback : AuthorFallback)
   end
@@ -36,7 +36,7 @@ private class ItemParser
   end
 end
 
-private class VideoParser < ItemParser
+private struct VideoParser < ItemParser
   def process(item, author_fallback)
     if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
       return self.parse(item_contents, author_fallback)
@@ -94,7 +94,7 @@ private class VideoParser < ItemParser
   end
 end
 
-private class ChannelParser < ItemParser
+private struct ChannelParser < ItemParser
   def process(item, author_fallback)
     if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
       return self.parse(item_contents, author_fallback)
@@ -125,7 +125,7 @@ private class ChannelParser < ItemParser
   end
 end
 
-private class GridPlaylistParser < ItemParser
+private struct GridPlaylistParser < ItemParser
   def process(item, author_fallback)
     if item_contents = item["gridPlaylistRenderer"]?
       return self.parse(item_contents, author_fallback)
@@ -151,7 +151,7 @@ private class GridPlaylistParser < ItemParser
   end
 end
 
-private class PlaylistParser < ItemParser
+private struct PlaylistParser < ItemParser
   def process(item, author_fallback)
     if item_contents = item["playlistRenderer"]?
       return self.parse(item_contents, author_fallback)
@@ -195,7 +195,7 @@ private class PlaylistParser < ItemParser
   end
 end
 
-private class CategoryParser < ItemParser
+private struct CategoryParser < ItemParser
   def process(item, author_fallback)
     if item_contents = item["shelfRenderer"]?
       return self.parse(item_contents, author_fallback)
@@ -262,7 +262,7 @@ end
 # a structure we can more easily use via the parsers above. Their internals are
 # identical to the item parsers.
 
-private class ItemsContainerExtractor
+private abstract struct ItemsContainerExtractor
   def process(item : Hash(String, JSON::Any))
   end
 
@@ -270,7 +270,7 @@ private class ItemsContainerExtractor
   end
 end
 
-private class YoutubeTabsExtractor < ItemsContainerExtractor
+private struct YoutubeTabsExtractor < ItemsContainerExtractor
   def process(initial_data)
     if target = initial_data["twoColumnBrowseResultsRenderer"]?
       self.extract(target)
@@ -304,7 +304,7 @@ private class YoutubeTabsExtractor < ItemsContainerExtractor
   end
 end
 
-private class SearchResultsExtractor < ItemsContainerExtractor
+private struct SearchResultsExtractor < ItemsContainerExtractor
   def process(initial_data)
     if target = initial_data["twoColumnSearchResultsRenderer"]?
       self.extract(target)
@@ -326,7 +326,7 @@ private class SearchResultsExtractor < ItemsContainerExtractor
   end
 end
 
-private class ContinuationExtractor < ItemsContainerExtractor
+private struct ContinuationExtractor < ItemsContainerExtractor
   def process(initial_data)
     if target = initial_data["continuationContents"]?
       self.extract(target)

From 142317c2be064f8114c7d75f9ae336eb6a6e96a3 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 3 Aug 2021 00:22:31 -0700
Subject: [PATCH 13/22] Overhaul extractors.cr to use modules

---
 src/invidious/helpers/extractors.cr | 556 ++++++++++++++--------------
 1 file changed, 269 insertions(+), 287 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 68e84850..cec0e728 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -3,257 +3,245 @@
 
 # Tuple of Parsers/Extractors so we can easily cycle through them.
 private ITEM_CONTAINER_EXTRACTOR = {
-  YoutubeTabsExtractor.new,
-  SearchResultsExtractor.new,
-  ContinuationExtractor.new,
+  Extractors::YouTubeTabs,
+  Extractors::SearchResults,
+  Extractors::Continuation,
 }
 
 private ITEM_PARSERS = {
-  VideoParser.new,
-  ChannelParser.new,
-  GridPlaylistParser.new,
-  PlaylistParser.new,
-  CategoryParser.new,
+  Parsers::VideoRendererParser,
+  Parsers::ChannelRendererParser,
+  Parsers::GridPlaylistRendererParser,
+  Parsers::PlaylistRendererParser,
+  Parsers::CategoryRendererParser,
 }
 
-private struct AuthorFallback
-  property name, id
-
-  def initialize(@name : String? = nil, @id : String? = nil)
-  end
-end
+record AuthorFallback, name : String? = nil, id : String? = nil
 
 # The following are the parsers for parsing raw item data into neatly packaged structs.
 # They're accessed through the process() method which validates the given data as applicable
 # to their specific struct and then use the internal parse() method to assemble the struct
 # specific to their category.
-private abstract struct ItemParser
-  # Base type for all item parsers.
-  def process(item : JSON::Any, author_fallback : AuthorFallback)
-  end
-
-  private def parse(item_contents : JSON::Any, author_fallback : AuthorFallback)
-  end
-end
-
-private struct VideoParser < ItemParser
-  def process(item, author_fallback)
-    if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
-      return self.parse(item_contents, author_fallback)
-    end
-  end
-
-  private def parse(item_contents, author_fallback)
-    video_id = item_contents["videoId"].as_s
-    title = item_contents["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
-
-    author_info = item_contents["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-    author = author_info.try &.["text"].as_s || author_fallback.name || ""
-    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
-
-    published = item_contents["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
-    view_count = item_contents["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
-    description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-    length_seconds = item_contents["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
-                     item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
-                       .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
-
-    live_now = false
-    paid = false
-    premium = false
-
-    premiere_timestamp = item_contents["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
-
-    item_contents["badges"]?.try &.as_a.each do |badge|
-      b = badge["metadataBadgeRenderer"]
-      case b["label"].as_s
-      when "LIVE NOW"
-        live_now = true
-      when "New", "4K", "CC"
-        # TODO
-      when "Premium"
-        # TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"]
-        premium = true
-      else nil # Ignore
+private module Parsers
+  module VideoRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
+        return self.parse(item_contents, author_fallback)
       end
     end
 
-    SearchVideo.new({
-      title:              title,
-      id:                 video_id,
-      author:             author,
-      ucid:               author_id,
-      published:          published,
-      views:              view_count,
-      description_html:   description_html,
-      length_seconds:     length_seconds,
-      live_now:           live_now,
-      premium:            premium,
-      premiere_timestamp: premiere_timestamp,
-    })
-  end
-end
+    private def self.parse(item_contents, author_fallback)
+      video_id = item_contents["videoId"].as_s
+      title = item_contents["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
 
-private struct ChannelParser < ItemParser
-  def process(item, author_fallback)
-    if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
-      return self.parse(item_contents, author_fallback)
-    end
-  end
+      author_info = item_contents["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
+      author = author_info.try &.["text"].as_s || author_fallback.name || ""
+      author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
 
-  private def parse(item_contents, author_fallback)
-    author = item_contents["title"]["simpleText"]?.try &.as_s || author_fallback.name || ""
-    author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id || ""
+      published = item_contents["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+      view_count = item_contents["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+      description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+      length_seconds = item_contents["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
+                       item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
+                         .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
 
-    author_thumbnail = item_contents["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || ""
-    subscriber_count = item_contents["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+      live_now = false
+      paid = false
+      premium = false
 
-    auto_generated = false
-    auto_generated = true if !item_contents["videoCountText"]?
-    video_count = item_contents["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
-    description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+      premiere_timestamp = item_contents["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
 
-    SearchChannel.new({
-      author:           author,
-      ucid:             author_id,
-      author_thumbnail: author_thumbnail,
-      subscriber_count: subscriber_count,
-      video_count:      video_count,
-      description_html: description_html,
-      auto_generated:   auto_generated,
-    })
-  end
-end
+      item_contents["badges"]?.try &.as_a.each do |badge|
+        b = badge["metadataBadgeRenderer"]
+        case b["label"].as_s
+        when "LIVE NOW"
+          live_now = true
+        when "New", "4K", "CC"
+          # TODO
+        when "Premium"
+          # TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"]
+          premium = true
+        else nil # Ignore
+        end
+      end
 
-private struct GridPlaylistParser < ItemParser
-  def process(item, author_fallback)
-    if item_contents = item["gridPlaylistRenderer"]?
-      return self.parse(item_contents, author_fallback)
-    end
-  end
-
-  private def parse(item_contents, author_fallback)
-    title = item_contents["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
-    plid = item_contents["playlistId"]?.try &.as_s || ""
-
-    video_count = item_contents["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
-    playlist_thumbnail = item_contents["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
-
-    SearchPlaylist.new({
-      title:       title,
-      id:          plid,
-      author:      author_fallback.name || "",
-      ucid:        author_fallback.id || "",
-      video_count: video_count,
-      videos:      [] of SearchPlaylistVideo,
-      thumbnail:   playlist_thumbnail,
-    })
-  end
-end
-
-private struct PlaylistParser < ItemParser
-  def process(item, author_fallback)
-    if item_contents = item["playlistRenderer"]?
-      return self.parse(item_contents, author_fallback)
-    end
-  end
-
-  def parse(item_contents, author_fallback)
-    title = item_contents["title"]["simpleText"]?.try &.as_s || ""
-    plid = item_contents["playlistId"]?.try &.as_s || ""
-
-    video_count = item_contents["videoCount"]?.try &.as_s.to_i || 0
-    playlist_thumbnail = item_contents["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
-
-    author_info = item_contents["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-    author = author_info.try &.["text"].as_s || author_fallback.name || ""
-    author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
-
-    videos = item_contents["videos"]?.try &.as_a.map do |v|
-      v = v["childVideoRenderer"]
-      v_title = v["title"]["simpleText"]?.try &.as_s || ""
-      v_id = v["videoId"]?.try &.as_s || ""
-      v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
-      SearchPlaylistVideo.new({
-        title:          v_title,
-        id:             v_id,
-        length_seconds: v_length_seconds,
+      SearchVideo.new({
+        title:              title,
+        id:                 video_id,
+        author:             author,
+        ucid:               author_id,
+        published:          published,
+        views:              view_count,
+        description_html:   description_html,
+        length_seconds:     length_seconds,
+        live_now:           live_now,
+        premium:            premium,
+        premiere_timestamp: premiere_timestamp,
       })
-    end || [] of SearchPlaylistVideo
-
-    # TODO: item_contents["publishedTimeText"]?
-
-    SearchPlaylist.new({
-      title:       title,
-      id:          plid,
-      author:      author,
-      ucid:        author_id,
-      video_count: video_count,
-      videos:      videos,
-      thumbnail:   playlist_thumbnail,
-    })
-  end
-end
-
-private struct CategoryParser < ItemParser
-  def process(item, author_fallback)
-    if item_contents = item["shelfRenderer"]?
-      return self.parse(item_contents, author_fallback)
     end
   end
 
-  def parse(item_contents, author_fallback)
-    # Title extraction is a bit complicated. There are two possible routes for it
-    # as well as times when the title attribute just isn't sent by YT.
-    title_container = item_contents["title"]? || ""
-    if !title_container.is_a? String
-      if title = title_container["simpleText"]?
-        title = title.as_s
+  module ChannelRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
+        return self.parse(item_contents, author_fallback)
+      end
+    end
+
+    private def self.parse(item_contents, author_fallback)
+      author = item_contents["title"]["simpleText"]?.try &.as_s || author_fallback.name || ""
+      author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id || ""
+
+      author_thumbnail = item_contents["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || ""
+      subscriber_count = item_contents["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+
+      auto_generated = false
+      auto_generated = true if !item_contents["videoCountText"]?
+      video_count = item_contents["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+      description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+
+      SearchChannel.new({
+        author:           author,
+        ucid:             author_id,
+        author_thumbnail: author_thumbnail,
+        subscriber_count: subscriber_count,
+        video_count:      video_count,
+        description_html: description_html,
+        auto_generated:   auto_generated,
+      })
+    end
+  end
+
+  module GridPlaylistRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = item["gridPlaylistRenderer"]?
+        return self.parse(item_contents, author_fallback)
+      end
+    end
+
+    private def self.parse(item_contents, author_fallback)
+      title = item_contents["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+      plid = item_contents["playlistId"]?.try &.as_s || ""
+
+      video_count = item_contents["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+      playlist_thumbnail = item_contents["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+
+      SearchPlaylist.new({
+        title:       title,
+        id:          plid,
+        author:      author_fallback.name || "",
+        ucid:        author_fallback.id || "",
+        video_count: video_count,
+        videos:      [] of SearchPlaylistVideo,
+        thumbnail:   playlist_thumbnail,
+      })
+    end
+  end
+
+  module PlaylistRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = item["playlistRenderer"]?
+        return self.parse(item_contents, author_fallback)
+      end
+    end
+
+    private def self.parse(item_contents, author_fallback)
+      title = item_contents["title"]["simpleText"]?.try &.as_s || ""
+      plid = item_contents["playlistId"]?.try &.as_s || ""
+
+      video_count = item_contents["videoCount"]?.try &.as_s.to_i || 0
+      playlist_thumbnail = item_contents["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+
+      author_info = item_contents["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
+      author = author_info.try &.["text"].as_s || author_fallback.name || ""
+      author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
+
+      videos = item_contents["videos"]?.try &.as_a.map do |v|
+        v = v["childVideoRenderer"]
+        v_title = v["title"]["simpleText"]?.try &.as_s || ""
+        v_id = v["videoId"]?.try &.as_s || ""
+        v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+        SearchPlaylistVideo.new({
+          title:          v_title,
+          id:             v_id,
+          length_seconds: v_length_seconds,
+        })
+      end || [] of SearchPlaylistVideo
+
+      # TODO: item_contents["publishedTimeText"]?
+
+      SearchPlaylist.new({
+        title:       title,
+        id:          plid,
+        author:      author,
+        ucid:        author_id,
+        video_count: video_count,
+        videos:      videos,
+        thumbnail:   playlist_thumbnail,
+      })
+    end
+  end
+
+  module CategoryRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = item["shelfRenderer"]?
+        return self.parse(item_contents, author_fallback)
+      end
+    end
+
+    private def self.parse(item_contents, author_fallback)
+      # Title extraction is a bit complicated. There are two possible routes for it
+      # as well as times when the title attribute just isn't sent by YT.
+      title_container = item_contents["title"]? || ""
+      if !title_container.is_a? String
+        if title = title_container["simpleText"]?
+          title = title.as_s
+        else
+          title = title_container["runs"][0]["text"].as_s
+        end
       else
-        title = title_container["runs"][0]["text"].as_s
+        title = ""
       end
-    else
-      title = ""
-    end
 
-    url = item_contents["endpoint"]?.try &.["commandMetadata"]["webCommandMetadata"]["url"].as_s
+      url = item_contents["endpoint"]?.try &.["commandMetadata"]["webCommandMetadata"]["url"].as_s
 
-    # Sometimes a category can have badges.
-    badges = [] of Tuple(String, String) # (Badge style, label)
-    item_contents["badges"]?.try &.as_a.each do |badge|
-      badge = badge["metadataBadgeRenderer"]
-      badges << {badge["style"].as_s, badge["label"].as_s}
-    end
-
-    # Category description
-    description_html = item_contents["subtitle"]?.try { |desc| parse_content(desc) } || ""
-
-    # Content parsing
-    contents = [] of SearchItem
-
-    # Content could be in three locations.
-    if content_container = item_contents["content"]["horizontalListRenderer"]?
-    elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]?
-    elsif content_container = item_contents["content"]["verticalListRenderer"]?
-    else
-      content_container = item_contents["contents"]
-    end
-
-    raw_contents = content_container["items"].as_a
-    raw_contents.each do |item|
-      result = extract_item(item)
-      if !result.nil?
-        contents << result
+      # Sometimes a category can have badges.
+      badges = [] of Tuple(String, String) # (Badge style, label)
+      item_contents["badges"]?.try &.as_a.each do |badge|
+        badge = badge["metadataBadgeRenderer"]
+        badges << {badge["style"].as_s, badge["label"].as_s}
       end
-    end
 
-    Category.new({
-      title:            title,
-      contents:         contents,
-      description_html: description_html,
-      url:              url,
-      badges:           badges,
-    })
+      # Category description
+      description_html = item_contents["subtitle"]?.try { |desc| parse_content(desc) } || ""
+
+      # Content parsing
+      contents = [] of SearchItem
+
+      # Content could be in three locations.
+      if content_container = item_contents["content"]["horizontalListRenderer"]?
+      elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]?
+      elsif content_container = item_contents["content"]["verticalListRenderer"]?
+      else
+        content_container = item_contents["contents"]
+      end
+
+      raw_contents = content_container["items"].as_a
+      raw_contents.each do |item|
+        result = extract_item(item)
+        if !result.nil?
+          contents << result
+        end
+      end
+
+      Category.new({
+        title:            title,
+        contents:         contents,
+        description_html: description_html,
+        url:              url,
+        badges:           badges,
+      })
+    end
   end
 end
 
@@ -262,88 +250,82 @@ end
 # a structure we can more easily use via the parsers above. Their internals are
 # identical to the item parsers.
 
-private abstract struct ItemsContainerExtractor
-  def process(item : Hash(String, JSON::Any))
-  end
-
-  private def extract(target : JSON::Any)
-  end
-end
-
-private struct YoutubeTabsExtractor < ItemsContainerExtractor
-  def process(initial_data)
-    if target = initial_data["twoColumnBrowseResultsRenderer"]?
-      self.extract(target)
-    end
-  end
-
-  private def extract(target)
-    raw_items = [] of JSON::Any
-    selected_tab = extract_selected_tab(target["tabs"])
-    content = selected_tab["content"]
-
-    content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
-      renderer_container = renderer_container["itemSectionRenderer"]
-      renderer_container_contents = renderer_container["contents"].as_a[0]
-
-      # Category extraction
-      if items_container = renderer_container_contents["shelfRenderer"]?
-        raw_items << renderer_container_contents
-        next
-      elsif items_container = renderer_container_contents["gridRenderer"]?
-      else
-        items_container = renderer_container_contents
-      end
-
-      items_container["items"].as_a.each do |item|
-        raw_items << item
+private module Extractors
+  module YouTubeTabs
+    def self.process(initial_data : Hash(String, JSON::Any))
+      if target = initial_data["twoColumnBrowseResultsRenderer"]?
+        self.extract(target)
       end
     end
 
-    return raw_items
-  end
-end
+    private def self.extract(target)
+      raw_items = [] of JSON::Any
+      selected_tab = extract_selected_tab(target["tabs"])
+      content = selected_tab["content"]
 
-private struct SearchResultsExtractor < ItemsContainerExtractor
-  def process(initial_data)
-    if target = initial_data["twoColumnSearchResultsRenderer"]?
-      self.extract(target)
+      content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
+        renderer_container = renderer_container["itemSectionRenderer"]
+        renderer_container_contents = renderer_container["contents"].as_a[0]
+
+        # Category extraction
+        if items_container = renderer_container_contents["shelfRenderer"]?
+          raw_items << renderer_container_contents
+          next
+        elsif items_container = renderer_container_contents["gridRenderer"]?
+        else
+          items_container = renderer_container_contents
+        end
+
+        items_container["items"].as_a.each do |item|
+          raw_items << item
+        end
+      end
+
+      return raw_items
     end
   end
 
-  private def extract(target)
-    raw_items = [] of Array(JSON::Any)
-    content = target["primaryContents"]
-    renderer = content["sectionListRenderer"]["contents"].as_a.each do |node|
-      if node = node["itemSectionRenderer"]?
-        raw_items << node["contents"].as_a
+  module SearchResults
+    def self.process(initial_data : Hash(String, JSON::Any))
+      if target = initial_data["twoColumnSearchResultsRenderer"]?
+        self.extract(target)
       end
     end
 
-    raw_items = raw_items.flatten
+    private def self.extract(target)
+      raw_items = [] of Array(JSON::Any)
+      content = target["primaryContents"]
+      renderer = content["sectionListRenderer"]["contents"].as_a.each do |node|
+        if node = node["itemSectionRenderer"]?
+          raw_items << node["contents"].as_a
+        end
+      end
 
-    return raw_items
-  end
-end
+      raw_items = raw_items.flatten
 
-private struct ContinuationExtractor < ItemsContainerExtractor
-  def process(initial_data)
-    if target = initial_data["continuationContents"]?
-      self.extract(target)
-    elsif target = initial_data["appendContinuationItemsAction"]?
-      self.extract(target)
+      return raw_items
     end
   end
 
-  private def extract(target)
-    raw_items = [] of JSON::Any
-    if content = target["gridContinuation"]?
-      raw_items = content["items"].as_a
-    elsif content = target["continuationItems"]?
-      raw_items = content.as_a
+  module Continuation
+    def self.process(initial_data : Hash(String, JSON::Any))
+      if target = initial_data["continuationContents"]?
+        self.extract(target)
+      elsif target = initial_data["appendContinuationItemsAction"]?
+        self.extract(target)
+      end
     end
 
-    return raw_items
+    private def self.extract(target)
+      raw_items = [] of JSON::Any
+      if content = target["gridContinuation"]?
+        raw_items = content["items"].as_a
+      elsif content = target["continuationItems"]?
+        raw_items = content.as_a
+      end
+
+      return raw_items
+    end
   end
 end
 

From ca9eb0d5392743cd64c9e0c010ae9c507699bc7c Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 3 Aug 2021 21:22:34 -0700
Subject: [PATCH 14/22] Bountiful extractor changes

- Add extract_text to simplify extraction of InnerTube texts
- Add helper extractor methods to reduce repetition in parsing InnerTube
- Change [] more than 2 blocks long to use #dig or #dig?
- Remove useless ?.try blocks for items that always exists
- Add (some) documentation to VideoRendererParser
---
 src/invidious/helpers/extractors.cr | 178 ++++++++++++++++++++--------
 1 file changed, 127 insertions(+), 51 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index cec0e728..dc46d40a 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -32,24 +32,49 @@ private module Parsers
 
     private def self.parse(item_contents, author_fallback)
       video_id = item_contents["videoId"].as_s
-      title = item_contents["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
+      title = extract_text(item_contents["title"]) || ""
 
+      # Extract author information
       author_info = item_contents["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-      author = author_info.try &.["text"].as_s || author_fallback.name || ""
-      author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
+      if author_info = item_contents.dig?("ownerText", "runs")
+        author_info = author_info[0]
+        author = author_info["text"].as_s
+        author_id = HelperExtractors.get_browse_endpoint(author_info)
+      else
+        author = author_fallback.name || ""
+        author_id = author_fallback.id || ""
+      end
 
-      published = item_contents["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
-      view_count = item_contents["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+      # For live videos (and possibly recently premiered videos) there is no published information.
+      # Instead, in its place is the amount of people currently watching. This behavior should be replicated
+      # on Invidious once all features of livestreams are supported. On an unrelated note, defaulting to the current
+      # time for publishing isn't a good idea.
+      published = item_contents["publishedTimeText"]?.try &.["simpleText"].try { |t| decode_date(t.as_s) } || Time.local
+
+      # Typically views are stored under a "simpleText" in the "viewCountText". However, for
+      # livestreams and premiered it is stored under a "runs" array: [{"text":123}, {"text": "watching"}]
+      # When view count is disabled the "viewCountText" is not present on InnerTube data.
+      # TODO change default value to nil and typical encoding type to tuple storing type (watchers, views, etc)
+      # and count
+      view_count = item_contents.dig?("viewCountText", "simpleText").try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
       description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-      length_seconds = item_contents["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
-                       item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
-                         .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+
+      # The length information *should* only always exist in "lengthText". However, the legacy Invidious code
+      # extracts from "thumbnailOverlays" when it doesn't. More testing is needed to see if this is
+      # actually needed
+      if length_container = item_contents["lengthText"]?
+        length_seconds = decode_length_seconds(length_container["simpleText"].as_s)
+      elsif length_container = item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?)
+        length_seconds = extract_text(length_container["thumbnailOverlayTimeStatusRenderer"]["text"]).try { |t| decode_length_seconds(t) } || 0
+      else
+        length_seconds = 0
+      end
 
       live_now = false
       paid = false
       premium = false
 
-      premiere_timestamp = item_contents["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+      premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) }
 
       item_contents["badges"]?.try &.as_a.each do |badge|
         b = badge["metadataBadgeRenderer"]
@@ -89,15 +114,17 @@ private module Parsers
     end
 
     private def self.parse(item_contents, author_fallback)
-      author = item_contents["title"]["simpleText"]?.try &.as_s || author_fallback.name || ""
+      author = extract_text(item_contents["title"]) || author_fallback.name || ""
       author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id || ""
 
-      author_thumbnail = item_contents["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || ""
-      subscriber_count = item_contents["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+      author_thumbnail = HelperExtractors.get_thumbnails(item_contents)
+      # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube.
+      # TODO change default value to nil
+      subscriber_count = item_contents.dig?("subscriberCountText").try &.["simpleText"].try { |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
 
-      auto_generated = false
-      auto_generated = true if !item_contents["videoCountText"]?
-      video_count = item_contents["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+      auto_generated = !item_contents["videoCountText"]? ? true : false
+
+      video_count = HelperExtractors.get_video_count(item_contents)
       description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
 
       SearchChannel.new({
@@ -120,11 +147,11 @@ private module Parsers
     end
 
     private def self.parse(item_contents, author_fallback)
-      title = item_contents["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+      title = extract_text(item_contents["title"]) || ""
       plid = item_contents["playlistId"]?.try &.as_s || ""
 
-      video_count = item_contents["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
-      playlist_thumbnail = item_contents["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+      video_count = HelperExtractors.get_video_count(item_contents)
+      playlist_thumbnail = HelperExtractors.get_thumbnails(item_contents)
 
       SearchPlaylist.new({
         title:       title,
@@ -141,26 +168,26 @@ private module Parsers
   module PlaylistRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = item["playlistRenderer"]?
-        return self.parse(item_contents, author_fallback)
+        return self.parse(item_contents)
       end
     end
 
-    private def self.parse(item_contents, author_fallback)
+    private def self.parse(item_contents)
       title = item_contents["title"]["simpleText"]?.try &.as_s || ""
       plid = item_contents["playlistId"]?.try &.as_s || ""
 
-      video_count = item_contents["videoCount"]?.try &.as_s.to_i || 0
-      playlist_thumbnail = item_contents["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+      video_count = HelperExtractors.get_video_count(item_contents)
+      playlist_thumbnail = HelperExtractors.get_thumbnails_plural(item_contents)
 
-      author_info = item_contents["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-      author = author_info.try &.["text"].as_s || author_fallback.name || ""
-      author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_fallback.id || ""
+      author_info = item_contents.dig("shortBylineText", "runs", 0)
+      author = author_info["text"].as_s
+      author_id = HelperExtractors.get_browse_endpoint(author_info)
 
       videos = item_contents["videos"]?.try &.as_a.map do |v|
         v = v["childVideoRenderer"]
-        v_title = v["title"]["simpleText"]?.try &.as_s || ""
+        v_title = v.dig?("title", "simpleText").try &.as_s || ""
         v_id = v["videoId"]?.try &.as_s || ""
-        v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+        v_length_seconds = v.dig?("lengthText", "simpleText").try { |t| decode_length_seconds(t.as_s) } || 0
         SearchPlaylistVideo.new({
           title:          v_title,
           id:             v_id,
@@ -190,20 +217,8 @@ private module Parsers
     end
 
     private def self.parse(item_contents, author_fallback)
-      # Title extraction is a bit complicated. There are two possible routes for it
-      # as well as times when the title attribute just isn't sent by YT.
-      title_container = item_contents["title"]? || ""
-      if !title_container.is_a? String
-        if title = title_container["simpleText"]?
-          title = title.as_s
-        else
-          title = title_container["runs"][0]["text"].as_s
-        end
-      else
-        title = ""
-      end
-
-      url = item_contents["endpoint"]?.try &.["commandMetadata"]["webCommandMetadata"]["url"].as_s
+      title = extract_text(item_contents["title"]?) || ""
+      url = item_contents["endpoint"]?.try &.dig("commandMetadata", "webCommandMetadata", "url").as_s
 
       # Sometimes a category can have badges.
       badges = [] of Tuple(String, String) # (Badge style, label)
@@ -249,7 +264,6 @@ end
 # the internal Youtube API's JSON response. The result is then packaged into
 # a structure we can more easily use via the parsers above. Their internals are
 # identical to the item parsers.
-
 private module Extractors
   module YouTubeTabs
     def self.process(initial_data : Hash(String, JSON::Any))
@@ -260,12 +274,10 @@ private module Extractors
 
     private def self.extract(target)
       raw_items = [] of JSON::Any
-      selected_tab = extract_selected_tab(target["tabs"])
-      content = selected_tab["content"]
+      content = extract_selected_tab(target["tabs"])["content"]
 
       content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
-        renderer_container = renderer_container["itemSectionRenderer"]
-        renderer_container_contents = renderer_container["contents"].as_a[0]
+        renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"].as_a[0]
 
         # Category extraction
         if items_container = renderer_container_contents["shelfRenderer"]?
@@ -294,16 +306,14 @@ private module Extractors
 
     private def self.extract(target)
       raw_items = [] of Array(JSON::Any)
-      content = target["primaryContents"]
-      renderer = content["sectionListRenderer"]["contents"].as_a.each do |node|
+
+      target.dig("primaryContents", "sectionListRenderer", "contents").as_a.each do |node|
         if node = node["itemSectionRenderer"]?
           raw_items << node["contents"].as_a
         end
       end
 
-      raw_items = raw_items.flatten
-
-      return raw_items
+      return raw_items.flatten
     end
   end
 
@@ -329,6 +339,72 @@ private module Extractors
   end
 end
 
+# Helper methods to extract out certain stuff from InnerTube
+private module HelperExtractors
+  # Retrieves the amount of videos present within the given InnerTube data.
+  #
+  # Returns a 0 when it's unable to do so
+  def self.get_video_count(container : JSON::Any) : Int32
+    if box = container["videoCountText"]?
+      return extract_text(container["videoCountText"]?).try &.gsub(/\D/, "").to_i || 0
+    elsif box = container["videoCount"]?
+      return box.as_s.to_i
+    else
+      return 0
+    end
+  end
+
+  # Retrieve lowest quality thumbnail from InnerTube data
+  #
+  # TODO allow configuration of image quality (-1 is highest)
+  #
+  # Raises when it's unable to parse from the given JSON data.
+  def self.get_thumbnails(container : JSON::Any) : String
+    return container.dig("thumbnail", "thumbnails", 0, "url").as_s
+  end
+
+  # ditto
+  # YouTube sometimes sends the thumbnail as:
+  # {"thumbnails": [{"thumbnails": [{"url": "example.com"}, ...]}]}
+  def self.get_thumbnails_plural(container : JSON::Any) : String
+    return container.dig("thumbnails", 0, "thumbnails", 0, "url").as_s
+  end
+
+  # Retrieves the ID required for querying the InnerTube browse endpoint
+  #
+  # Raises when it's unable to do so
+  def self.get_browse_endpoint(container)
+    return container.dig("navigationEndpoint", "browseEndpoint", "browseId").as_s
+  end
+end
+
+# Extracts text from InnerTube response
+#
+# InnerTube can package text in three different formats
+# "runs": [
+# {"text": "something"},
+# {"text": "cont"},
+# ...
+# ]
+#
+# "SimpleText": "something"
+#
+# Or sometimes just none at all as with the data returned from
+# category continuations.
+def extract_text(item : JSON::Any?) : String?
+  if item.nil?
+    return nil
+  end
+
+  if text_container = item["simpleText"]?
+    return text_container.as_s
+  elsif text_container = item["runs"]?
+    return text_container.as_a.map(&.["text"].as_s).join("")
+  else
+    nil
+  end
+end
+
 # Parses an item from Youtube's JSON response into a more usable structure.
 # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
 def extract_item(item : JSON::Any, author_fallback : String? = nil,

From e5f07dedbf92459a237165f359d7565e638d4ffa Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Wed, 4 Aug 2021 19:54:41 -0700
Subject: [PATCH 15/22] Typos and tiny styling changes

---
 src/invidious/helpers/extractors.cr | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index dc46d40a..3a90f017 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -35,9 +35,7 @@ private module Parsers
       title = extract_text(item_contents["title"]) || ""
 
       # Extract author information
-      author_info = item_contents["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]?
-      if author_info = item_contents.dig?("ownerText", "runs")
-        author_info = author_info[0]
+      if author_info = item_contents.dig?("ownerText", "runs", 0)
         author = author_info["text"].as_s
         author_id = HelperExtractors.get_browse_endpoint(author_info)
       else
@@ -49,7 +47,7 @@ private module Parsers
       # Instead, in its place is the amount of people currently watching. This behavior should be replicated
       # on Invidious once all features of livestreams are supported. On an unrelated note, defaulting to the current
       # time for publishing isn't a good idea.
-      published = item_contents["publishedTimeText"]?.try &.["simpleText"].try { |t| decode_date(t.as_s) } || Time.local
+      published = item_contents.dig?("publishedTimeText", "simpleText").try { |t| decode_date(t.as_s) } || Time.local
 
       # Typically views are stored under a "simpleText" in the "viewCountText". However, for
       # livestreams and premiered it is stored under a "runs" array: [{"text":123}, {"text": "watching"}]
@@ -119,8 +117,10 @@ private module Parsers
 
       author_thumbnail = HelperExtractors.get_thumbnails(item_contents)
       # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube.
+      # Always simpleText
       # TODO change default value to nil
-      subscriber_count = item_contents.dig?("subscriberCountText").try &.["simpleText"].try { |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
+      subscriber_count = item_contents.dig?("subscriberCountText").try &.["simpleText"].try { \
+         |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
 
       auto_generated = !item_contents["videoCountText"]? ? true : false
 
@@ -420,10 +420,9 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil,
       return result
     end
   end
-  # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer
 end
 
-# Parses multiple items from Youtube's initial JSON response into a more usable structure.
+# Parses multiple items from YouTube's initial JSON response into a more usable structure.
 # The end result is an array of SearchItem.
 def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil,
                   author_id_fallback : String? = nil) : Array(SearchItem)
@@ -436,7 +435,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
     unpackaged_data = initial_data
   end
 
-  # This is identicial to the parser cyling of extract_item().
+  # This is identical to the parser cyling of extract_item().
   ITEM_CONTAINER_EXTRACTOR.each do |extractor|
     results = extractor.process(unpackaged_data)
     if !results.nil?

From 092b8a4e5220cbe7e6eed45d1c331d5596dc68bc Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Thu, 5 Aug 2021 20:31:48 -0700
Subject: [PATCH 16/22] Add documentation to extractors.cr

---
 src/invidious/helpers/extractors.cr | 122 ++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 7 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 3a90f017..32134cc9 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -18,11 +18,22 @@ private ITEM_PARSERS = {
 
 record AuthorFallback, name : String? = nil, id : String? = nil
 
-# The following are the parsers for parsing raw item data into neatly packaged structs.
-# They're accessed through the process() method which validates the given data as applicable
-# to their specific struct and then use the internal parse() method to assemble the struct
-# specific to their category.
+# Namespace for logic relating to parsing InnerTube data into various datastructs.
+#
+# Each of the parsers in this namespace are accessed through the #process() method
+# which validates the given data as applicable to itself. If it is applicable the given
+# data is passed to the private `#parse()` method which returns a datastruct of the given
+# type. Otherwise, nil is returned.
 private module Parsers
+  # Parses a InnerTube videoRenderer into a SearchVideo. Returns nil when the given object isn't a videoRenderer
+  #
+  # A videoRenderer renders a video to click on within the YouTube and Invidious UI. It is **not**
+  # the watchable video itself.
+  #
+  # See specs for example.
+  #
+  # `videoRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+  #
   module VideoRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
@@ -104,6 +115,15 @@ private module Parsers
     end
   end
 
+  # Parses a InnerTube channelRenderer into a SearchChannel. Returns nil when the given object isn't a channelRenderer
+  #
+  # A channelRenderer renders a channel to click on within the YouTube and Invidious UI. It is **not**
+  # the channel page itself.
+  #
+  # See specs for example.
+  #
+  # `channelRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+  #
   module ChannelRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
@@ -139,6 +159,15 @@ private module Parsers
     end
   end
 
+  # Parses a InnerTube gridPlaylistRenderer into a SearchPlaylist. Returns nil when the given object isn't a gridPlaylistRenderer
+  #
+  # A gridPlaylistRenderer renders a playlist, that is located in a grid, to click on within the YouTube and Invidious UI.
+  # It is **not** the playlist itself.
+  #
+  # See specs for example.
+  #
+  # `gridPlaylistRenderer`s can be found on the playlist-tabs of channels and expanded categories.
+  #
   module GridPlaylistRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = item["gridPlaylistRenderer"]?
@@ -165,6 +194,14 @@ private module Parsers
     end
   end
 
+  # Parses a InnerTube playlistRenderer into a SearchPlaylist. Returns nil when the given object isn't a playlistRenderer
+  #
+  # A playlistRenderer renders a playlist to click on within the YouTube and Invidious UI. It is **not** the playlist itself.
+  #
+  # See specs for example.
+  #
+  # `playlistRenderer`s can be found almost everywhere on YouTube. In categories, search results, recommended, etc.
+  #
   module PlaylistRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = item["playlistRenderer"]?
@@ -209,6 +246,16 @@ private module Parsers
     end
   end
 
+  # Parses a InnerTube shelfRenderer into a Category. Returns nil when the given object isn't a shelfRenderer
+  #
+  # A shelfRenderer renders divided sections on YouTube. IE "People also watched" in search results and
+  # the various organizational sections in the channel home page. A separate one (richShelfRenderer) is used
+  # for YouTube home. A shelfRenderer can also sometimes be expanded to show more content within it.
+  #
+  # See specs for example.
+  #
+  # `shelfRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+  #
   module CategoryRendererParser
     def self.process(item : JSON::Any, author_fallback : AuthorFallback)
       if item_contents = item["shelfRenderer"]?
@@ -264,7 +311,34 @@ end
 # the internal Youtube API's JSON response. The result is then packaged into
 # a structure we can more easily use via the parsers above. Their internals are
 # identical to the item parsers.
+
+# Namespace for logic relating to extracting InnerTube's initial response to items we can parse.
+#
+# Each of the extractors in this namespace are accessed through the #process() method
+# which validates the given data as applicable to itself. If it is applicable the given
+# data is passed to the private `#extract()` method which returns an array of
+# parsable items. Otherwise, nil is returned.
+#
+# NOTE perhaps the result from here should be abstracted into a struct in order to
+# get additional metadata regarding the container of the item(s).
 private module Extractors
+  # Extracts items from the selected YouTube tab.
+  #
+  # YouTube tabs are typically stored under "twoColumnBrowseResultsRenderer"
+  # and is structured like this:
+  #
+  # "twoColumnBrowseResultsRenderer": {
+  #   {"tabs": [
+  #     {"tabRenderer":  {
+  #       "endpoint": {...}
+  #       "title": "Playlists",
+  #       "selected": true,
+  #       "content": {...},
+  #       ...
+  #     }}
+  #   ]}
+  # }]
+  #
   module YouTubeTabs
     def self.process(initial_data : Hash(String, JSON::Any))
       if target = initial_data["twoColumnBrowseResultsRenderer"]?
@@ -297,6 +371,23 @@ private module Extractors
     end
   end
 
+  # Extracts items from the InnerTube response for search results
+  #
+  # Search results are typically stored under "twoColumnSearchResultsRenderer"
+  # and is structured like this:
+  #
+  # "twoColumnSearchResultsRenderer": {
+  #   {"primaryContents": {
+  #     {"sectionListRenderer": {
+  #       "contents": [...],
+  #       ...,
+  #       "subMenu": {...},
+  #       "hideBottomSeparator": true,
+  #       "targetId": "search-feed"
+  #     }}
+  #   }}
+  # }
+  #
   module SearchResults
     def self.process(initial_data : Hash(String, JSON::Any))
       if target = initial_data["twoColumnSearchResultsRenderer"]?
@@ -317,6 +408,16 @@ private module Extractors
     end
   end
 
+  # Extracts continuation items from a InnerTube response
+  #
+  # Continuation items (on YouTube) are items which are appended to the
+  # end of the page for continuous scrolling. As such, in many cases,
+  # the items are lacking information such as author or category title,
+  # since the original results has already rendered them on the top of the page.
+  #
+  # The way they are structured is too varied to be accurately written down here.
+  # However, they all eventually lead to an array of parsable items after traversing
+  # through the JSON structure.
   module Continuation
     def self.process(initial_data : Hash(String, JSON::Any))
       if target = initial_data["continuationContents"]?
@@ -339,7 +440,10 @@ private module Extractors
   end
 end
 
-# Helper methods to extract out certain stuff from InnerTube
+# Helper methods to aid in the parsing of InnerTube to data structs.
+#
+# Mostly used to extract out repeated structures to deal with code
+# repetition.
 private module HelperExtractors
   # Retrieves the amount of videos present within the given InnerTube data.
   #
@@ -364,14 +468,14 @@ private module HelperExtractors
   end
 
   # ditto
+  #
   # YouTube sometimes sends the thumbnail as:
   # {"thumbnails": [{"thumbnails": [{"url": "example.com"}, ...]}]}
   def self.get_thumbnails_plural(container : JSON::Any) : String
     return container.dig("thumbnails", 0, "thumbnails", 0, "url").as_s
   end
 
-  # Retrieves the ID required for querying the InnerTube browse endpoint
-  #
+  # Retrieves the ID required for querying the InnerTube browse endpoint.
   # Raises when it's unable to do so
   def self.get_browse_endpoint(container)
     return container.dig("navigationEndpoint", "browseEndpoint", "browseId").as_s
@@ -391,6 +495,10 @@ end
 #
 # Or sometimes just none at all as with the data returned from
 # category continuations.
+#
+# In order to facilitate calling this function with `#[]?`:
+# A nil will be accepted. Of course, since nil cannot be parsed,
+# another nil will be returned.
 def extract_text(item : JSON::Any?) : String?
   if item.nil?
     return nil

From 6df85718e6dac2faa9037fcf2283aa6b5ab819a3 Mon Sep 17 00:00:00 2001
From: syeopite <70992037+syeopite@users.noreply.github.com>
Date: Tue, 28 Sep 2021 15:23:36 +0000
Subject: [PATCH 17/22] Apply suggestions from code review

Co-authored-by: Samantaz Fox <coding@samantaz.fr>
---
 src/invidious/helpers/extractors.cr | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 32134cc9..0c645868 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -139,8 +139,8 @@ private module Parsers
       # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube.
       # Always simpleText
       # TODO change default value to nil
-      subscriber_count = item_contents.dig?("subscriberCountText").try &.["simpleText"].try { \
-         |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
+      subscriber_count = item_contents.dig?("subscriberCountText", "simpleText")
+        .try { |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
 
       auto_generated = !item_contents["videoCountText"]? ? true : false
 
@@ -265,7 +265,8 @@ private module Parsers
 
     private def self.parse(item_contents, author_fallback)
       title = extract_text(item_contents["title"]?) || ""
-      url = item_contents["endpoint"]?.try &.dig("commandMetadata", "webCommandMetadata", "url").as_s
+      url = item_contents.dig?("endpoint", "commandMetadata", "webCommandMetadata", "url")
+        .try &.as_s
 
       # Sometimes a category can have badges.
       badges = [] of Tuple(String, String) # (Badge style, label)
@@ -450,7 +451,7 @@ private module HelperExtractors
   # Returns a 0 when it's unable to do so
   def self.get_video_count(container : JSON::Any) : Int32
     if box = container["videoCountText"]?
-      return extract_text(container["videoCountText"]?).try &.gsub(/\D/, "").to_i || 0
+      return extract_text(box).try &.gsub(/\D/, "").to_i || 0
     elsif box = container["videoCount"]?
       return box.as_s.to_i
     else

From 43ea8fa70698ef94701fdf9da419300b9a6a0710 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 28 Sep 2021 08:19:55 -0700
Subject: [PATCH 18/22] Convert nil for AuthorFallback to empty strings

---
 src/invidious/helpers/extractors.cr | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 0c645868..88248e8d 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -16,7 +16,7 @@ private ITEM_PARSERS = {
   Parsers::CategoryRendererParser,
 }
 
-record AuthorFallback, name : String? = nil, id : String? = nil
+record AuthorFallback, name : String, id : String
 
 # Namespace for logic relating to parsing InnerTube data into various datastructs.
 #
@@ -50,8 +50,8 @@ private module Parsers
         author = author_info["text"].as_s
         author_id = HelperExtractors.get_browse_endpoint(author_info)
       else
-        author = author_fallback.name || ""
-        author_id = author_fallback.id || ""
+        author = author_fallback.name
+        author_id = author_fallback.id
       end
 
       # For live videos (and possibly recently premiered videos) there is no published information.
@@ -132,8 +132,8 @@ private module Parsers
     end
 
     private def self.parse(item_contents, author_fallback)
-      author = extract_text(item_contents["title"]) || author_fallback.name || ""
-      author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id || ""
+      author = extract_text(item_contents["title"]) || author_fallback.name
+      author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id
 
       author_thumbnail = HelperExtractors.get_thumbnails(item_contents)
       # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube.
@@ -185,8 +185,8 @@ private module Parsers
       SearchPlaylist.new({
         title:       title,
         id:          plid,
-        author:      author_fallback.name || "",
-        ucid:        author_fallback.id || "",
+        author:      author_fallback.name,
+        ucid:        author_fallback.id,
         video_count: video_count,
         videos:      [] of SearchPlaylistVideo,
         thumbnail:   playlist_thumbnail,
@@ -516,9 +516,12 @@ end
 
 # Parses an item from Youtube's JSON response into a more usable structure.
 # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel.
-def extract_item(item : JSON::Any, author_fallback : String? = nil,
-                 author_id_fallback : String? = nil)
-  author_fallback = AuthorFallback.new(author_fallback, author_id_fallback)
+def extract_item(item : JSON::Any, author_fallback : String? = "",
+                 author_id_fallback : String? = "")
+  # We "allow" nil values but secretly use empty strings instead. This is to save us the
+  # hassle of modifying every author_fallback and author_id_fallback arg usage
+  # which is more often than not nil.
+  author_fallback = AuthorFallback.new(author_fallback || "", author_id_fallback || "")
 
   # Cycles through all of the item parsers and attempt to parse the raw YT JSON data.
   # Each parser automatically validates the data given to see if the data is

From aa59925374849a4e2aee09de5e65ba027e16f3be Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 28 Sep 2021 08:39:00 -0700
Subject: [PATCH 19/22] Rename get_browse_endpoint to get_browse_id

---
 src/invidious/helpers/extractors.cr | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 88248e8d..13ffe1e4 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -48,7 +48,7 @@ private module Parsers
       # Extract author information
       if author_info = item_contents.dig?("ownerText", "runs", 0)
         author = author_info["text"].as_s
-        author_id = HelperExtractors.get_browse_endpoint(author_info)
+        author_id = HelperExtractors.get_browse_id(author_info)
       else
         author = author_fallback.name
         author_id = author_fallback.id
@@ -218,7 +218,7 @@ private module Parsers
 
       author_info = item_contents.dig("shortBylineText", "runs", 0)
       author = author_info["text"].as_s
-      author_id = HelperExtractors.get_browse_endpoint(author_info)
+      author_id = HelperExtractors.get_browse_id(author_info)
 
       videos = item_contents["videos"]?.try &.as_a.map do |v|
         v = v["childVideoRenderer"]
@@ -478,7 +478,7 @@ private module HelperExtractors
 
   # Retrieves the ID required for querying the InnerTube browse endpoint.
   # Raises when it's unable to do so
-  def self.get_browse_endpoint(container)
+  def self.get_browse_id(container)
     return container.dig("navigationEndpoint", "browseEndpoint", "browseId").as_s
   end
 end

From 9ab242ca2e79ecc8a196a019619fa3ddab31b28a Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 28 Sep 2021 08:50:23 -0700
Subject: [PATCH 20/22] Optimize routing logic of extract_item(s) funcs

---
 src/invidious/helpers/extractors.cr | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 13ffe1e4..c6929162 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -352,7 +352,7 @@ private module Extractors
       content = extract_selected_tab(target["tabs"])["content"]
 
       content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
-        renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"].as_a[0]
+        renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]
 
         # Category extraction
         if items_container = renderer_container_contents["shelfRenderer"]?
@@ -527,8 +527,7 @@ def extract_item(item : JSON::Any, author_fallback : String? = "",
   # Each parser automatically validates the data given to see if the data is
   # applicable to itself. If not nil is returned and the next parser is attemped.
   ITEM_PARSERS.each do |parser|
-    result = parser.process(item, author_fallback)
-    if !result.nil?
+    if result = parser.process(item, author_fallback)
       return result
     end
   end
@@ -542,22 +541,21 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
 
   if unpackaged_data = initial_data["contents"]?.try &.as_h
   elsif unpackaged_data = initial_data["response"]?.try &.as_h
-  elsif unpackaged_data = initial_data["onResponseReceivedActions"]?.try &.as_a.[0].as_h
+  elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h
   else
     unpackaged_data = initial_data
   end
 
-  # This is identical to the parser cyling of extract_item().
+  # This is identical to the parser cycling of extract_item().
   ITEM_CONTAINER_EXTRACTOR.each do |extractor|
-    results = extractor.process(unpackaged_data)
-    if !results.nil?
-      results.each do |item|
-        parsed_result = extract_item(item, author_fallback, author_id_fallback)
-
-        if !parsed_result.nil?
+    if container = extractor.process(unpackaged_data)
+      # Extract items in container
+      container.each do |item|
+        if parsed_result = extract_item(item, author_fallback, author_id_fallback)
           items << parsed_result
         end
       end
+
       return items
     end
   end

From 23049e026f4c4f8fe02f8a911a717791345d44fa Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Tue, 28 Sep 2021 08:55:02 -0700
Subject: [PATCH 21/22] Improve readabltiy of SearchChannel auto-gen detect

---
 src/invidious/helpers/extractors.cr | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index c6929162..83c751e0 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -142,7 +142,9 @@ private module Parsers
       subscriber_count = item_contents.dig?("subscriberCountText", "simpleText")
         .try { |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0
 
-      auto_generated = !item_contents["videoCountText"]? ? true : false
+      # Auto-generated channels doesn't have videoCountText
+      # Taken from: https://github.com/iv-org/invidious/pull/2228#discussion_r717620922
+      auto_generated = item_contents["videoCountText"]?.nil?
 
       video_count = HelperExtractors.get_video_count(item_contents)
       description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""

From 26b28cea498f3d7be10907165e1f9d8322843911 Mon Sep 17 00:00:00 2001
From: syeopite <syeopite@syeopite.dev>
Date: Fri, 1 Oct 2021 05:39:23 -0700
Subject: [PATCH 22/22] Use break instead of short-circuit return

---
 src/invidious/helpers/extractors.cr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 83c751e0..850c93ec 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -558,7 +558,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
         end
       end
 
-      return items
+      break
     end
   end