diff --git a/spec/helpers_spec.cr b/spec/helpers_spec.cr index ada5b28f..b17c8d73 100644 --- a/spec/helpers_spec.cr +++ b/spec/helpers_spec.cr @@ -6,6 +6,7 @@ require "spec" require "yaml" require "../src/invidious/helpers/*" require "../src/invidious/channels/*" +require "../src/invidious/videos" require "../src/invidious/comments" require "../src/invidious/playlists" require "../src/invidious/search" diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr new file mode 100644 index 00000000..850c93ec --- /dev/null +++ b/src/invidious/helpers/extractors.cr @@ -0,0 +1,566 @@ +# This file contains helper methods to parse the Youtube API json data into +# neat little packages we can use + +# Tuple of Parsers/Extractors so we can easily cycle through them. +private ITEM_CONTAINER_EXTRACTOR = { + Extractors::YouTubeTabs, + Extractors::SearchResults, + Extractors::Continuation, +} + +private ITEM_PARSERS = { + Parsers::VideoRendererParser, + Parsers::ChannelRendererParser, + Parsers::GridPlaylistRendererParser, + Parsers::PlaylistRendererParser, + Parsers::CategoryRendererParser, +} + +record AuthorFallback, name : String, id : String + +# Namespace for logic relating to parsing InnerTube data into various datastructs. +# +# Each of the parsers in this namespace are accessed through the #process() method +# which validates the given data as applicable to itself. If it is applicable the given +# data is passed to the private `#parse()` method which returns a datastruct of the given +# type. Otherwise, nil is returned. +private module Parsers + # Parses a InnerTube videoRenderer into a SearchVideo. Returns nil when the given object isn't a videoRenderer + # + # A videoRenderer renders a video to click on within the YouTube and Invidious UI. It is **not** + # the watchable video itself. + # + # See specs for example. + # + # `videoRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. + # + module VideoRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?) + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + video_id = item_contents["videoId"].as_s + title = extract_text(item_contents["title"]) || "" + + # Extract author information + if author_info = item_contents.dig?("ownerText", "runs", 0) + author = author_info["text"].as_s + author_id = HelperExtractors.get_browse_id(author_info) + else + author = author_fallback.name + author_id = author_fallback.id + end + + # For live videos (and possibly recently premiered videos) there is no published information. + # Instead, in its place is the amount of people currently watching. This behavior should be replicated + # on Invidious once all features of livestreams are supported. On an unrelated note, defaulting to the current + # time for publishing isn't a good idea. + published = item_contents.dig?("publishedTimeText", "simpleText").try { |t| decode_date(t.as_s) } || Time.local + + # Typically views are stored under a "simpleText" in the "viewCountText". However, for + # livestreams and premiered it is stored under a "runs" array: [{"text":123}, {"text": "watching"}] + # When view count is disabled the "viewCountText" is not present on InnerTube data. + # TODO change default value to nil and typical encoding type to tuple storing type (watchers, views, etc) + # and count + view_count = item_contents.dig?("viewCountText", "simpleText").try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + + # The length information *should* only always exist in "lengthText". However, the legacy Invidious code + # extracts from "thumbnailOverlays" when it doesn't. More testing is needed to see if this is + # actually needed + if length_container = item_contents["lengthText"]? + length_seconds = decode_length_seconds(length_container["simpleText"].as_s) + elsif length_container = item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?) + length_seconds = extract_text(length_container["thumbnailOverlayTimeStatusRenderer"]["text"]).try { |t| decode_length_seconds(t) } || 0 + else + length_seconds = 0 + end + + live_now = false + paid = false + premium = false + + premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) } + + item_contents["badges"]?.try &.as_a.each do |badge| + b = badge["metadataBadgeRenderer"] + case b["label"].as_s + when "LIVE NOW" + live_now = true + when "New", "4K", "CC" + # TODO + when "Premium" + # TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"] + premium = true + else nil # Ignore + end + end + + SearchVideo.new({ + title: title, + id: video_id, + author: author, + ucid: author_id, + published: published, + views: view_count, + description_html: description_html, + length_seconds: length_seconds, + live_now: live_now, + premium: premium, + premiere_timestamp: premiere_timestamp, + }) + end + end + + # Parses a InnerTube channelRenderer into a SearchChannel. Returns nil when the given object isn't a channelRenderer + # + # A channelRenderer renders a channel to click on within the YouTube and Invidious UI. It is **not** + # the channel page itself. + # + # See specs for example. + # + # `channelRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. + # + module ChannelRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?) + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + author = extract_text(item_contents["title"]) || author_fallback.name + author_id = item_contents["channelId"]?.try &.as_s || author_fallback.id + + author_thumbnail = HelperExtractors.get_thumbnails(item_contents) + # When public subscriber count is disabled, the subscriberCountText isn't sent by InnerTube. + # Always simpleText + # TODO change default value to nil + subscriber_count = item_contents.dig?("subscriberCountText", "simpleText") + .try { |s| short_text_to_number(s.as_s.split(" ")[0]) } || 0 + + # Auto-generated channels doesn't have videoCountText + # Taken from: https://github.com/iv-org/invidious/pull/2228#discussion_r717620922 + auto_generated = item_contents["videoCountText"]?.nil? + + video_count = HelperExtractors.get_video_count(item_contents) + description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + + SearchChannel.new({ + author: author, + ucid: author_id, + author_thumbnail: author_thumbnail, + subscriber_count: subscriber_count, + video_count: video_count, + description_html: description_html, + auto_generated: auto_generated, + }) + end + end + + # Parses a InnerTube gridPlaylistRenderer into a SearchPlaylist. Returns nil when the given object isn't a gridPlaylistRenderer + # + # A gridPlaylistRenderer renders a playlist, that is located in a grid, to click on within the YouTube and Invidious UI. + # It is **not** the playlist itself. + # + # See specs for example. + # + # `gridPlaylistRenderer`s can be found on the playlist-tabs of channels and expanded categories. + # + module GridPlaylistRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["gridPlaylistRenderer"]? + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + title = extract_text(item_contents["title"]) || "" + plid = item_contents["playlistId"]?.try &.as_s || "" + + video_count = HelperExtractors.get_video_count(item_contents) + playlist_thumbnail = HelperExtractors.get_thumbnails(item_contents) + + SearchPlaylist.new({ + title: title, + id: plid, + author: author_fallback.name, + ucid: author_fallback.id, + video_count: video_count, + videos: [] of SearchPlaylistVideo, + thumbnail: playlist_thumbnail, + }) + end + end + + # Parses a InnerTube playlistRenderer into a SearchPlaylist. Returns nil when the given object isn't a playlistRenderer + # + # A playlistRenderer renders a playlist to click on within the YouTube and Invidious UI. It is **not** the playlist itself. + # + # See specs for example. + # + # `playlistRenderer`s can be found almost everywhere on YouTube. In categories, search results, recommended, etc. + # + module PlaylistRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["playlistRenderer"]? + return self.parse(item_contents) + end + end + + private def self.parse(item_contents) + title = item_contents["title"]["simpleText"]?.try &.as_s || "" + plid = item_contents["playlistId"]?.try &.as_s || "" + + video_count = HelperExtractors.get_video_count(item_contents) + playlist_thumbnail = HelperExtractors.get_thumbnails_plural(item_contents) + + author_info = item_contents.dig("shortBylineText", "runs", 0) + author = author_info["text"].as_s + author_id = HelperExtractors.get_browse_id(author_info) + + videos = item_contents["videos"]?.try &.as_a.map do |v| + v = v["childVideoRenderer"] + v_title = v.dig?("title", "simpleText").try &.as_s || "" + v_id = v["videoId"]?.try &.as_s || "" + v_length_seconds = v.dig?("lengthText", "simpleText").try { |t| decode_length_seconds(t.as_s) } || 0 + SearchPlaylistVideo.new({ + title: v_title, + id: v_id, + length_seconds: v_length_seconds, + }) + end || [] of SearchPlaylistVideo + + # TODO: item_contents["publishedTimeText"]? + + SearchPlaylist.new({ + title: title, + id: plid, + author: author, + ucid: author_id, + video_count: video_count, + videos: videos, + thumbnail: playlist_thumbnail, + }) + end + end + + # Parses a InnerTube shelfRenderer into a Category. Returns nil when the given object isn't a shelfRenderer + # + # A shelfRenderer renders divided sections on YouTube. IE "People also watched" in search results and + # the various organizational sections in the channel home page. A separate one (richShelfRenderer) is used + # for YouTube home. A shelfRenderer can also sometimes be expanded to show more content within it. + # + # See specs for example. + # + # `shelfRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc. + # + module CategoryRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["shelfRenderer"]? + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + title = extract_text(item_contents["title"]?) || "" + url = item_contents.dig?("endpoint", "commandMetadata", "webCommandMetadata", "url") + .try &.as_s + + # Sometimes a category can have badges. + badges = [] of Tuple(String, String) # (Badge style, label) + item_contents["badges"]?.try &.as_a.each do |badge| + badge = badge["metadataBadgeRenderer"] + badges << {badge["style"].as_s, badge["label"].as_s} + end + + # Category description + description_html = item_contents["subtitle"]?.try { |desc| parse_content(desc) } || "" + + # Content parsing + contents = [] of SearchItem + + # Content could be in three locations. + if content_container = item_contents["content"]["horizontalListRenderer"]? + elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]? + elsif content_container = item_contents["content"]["verticalListRenderer"]? + else + content_container = item_contents["contents"] + end + + raw_contents = content_container["items"].as_a + raw_contents.each do |item| + result = extract_item(item) + if !result.nil? + contents << result + end + end + + Category.new({ + title: title, + contents: contents, + description_html: description_html, + url: url, + badges: badges, + }) + end + end +end + +# The following are the extractors for extracting an array of items from +# the internal Youtube API's JSON response. The result is then packaged into +# a structure we can more easily use via the parsers above. Their internals are +# identical to the item parsers. + +# Namespace for logic relating to extracting InnerTube's initial response to items we can parse. +# +# Each of the extractors in this namespace are accessed through the #process() method +# which validates the given data as applicable to itself. If it is applicable the given +# data is passed to the private `#extract()` method which returns an array of +# parsable items. Otherwise, nil is returned. +# +# NOTE perhaps the result from here should be abstracted into a struct in order to +# get additional metadata regarding the container of the item(s). +private module Extractors + # Extracts items from the selected YouTube tab. + # + # YouTube tabs are typically stored under "twoColumnBrowseResultsRenderer" + # and is structured like this: + # + # "twoColumnBrowseResultsRenderer": { + # {"tabs": [ + # {"tabRenderer": { + # "endpoint": {...} + # "title": "Playlists", + # "selected": true, + # "content": {...}, + # ... + # }} + # ]} + # }] + # + module YouTubeTabs + def self.process(initial_data : Hash(String, JSON::Any)) + if target = initial_data["twoColumnBrowseResultsRenderer"]? + self.extract(target) + end + end + + private def self.extract(target) + raw_items = [] of JSON::Any + content = extract_selected_tab(target["tabs"])["content"] + + content["sectionListRenderer"]["contents"].as_a.each do |renderer_container| + renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0] + + # Category extraction + if items_container = renderer_container_contents["shelfRenderer"]? + raw_items << renderer_container_contents + next + elsif items_container = renderer_container_contents["gridRenderer"]? + else + items_container = renderer_container_contents + end + + items_container["items"].as_a.each do |item| + raw_items << item + end + end + + return raw_items + end + end + + # Extracts items from the InnerTube response for search results + # + # Search results are typically stored under "twoColumnSearchResultsRenderer" + # and is structured like this: + # + # "twoColumnSearchResultsRenderer": { + # {"primaryContents": { + # {"sectionListRenderer": { + # "contents": [...], + # ..., + # "subMenu": {...}, + # "hideBottomSeparator": true, + # "targetId": "search-feed" + # }} + # }} + # } + # + module SearchResults + def self.process(initial_data : Hash(String, JSON::Any)) + if target = initial_data["twoColumnSearchResultsRenderer"]? + self.extract(target) + end + end + + private def self.extract(target) + raw_items = [] of Array(JSON::Any) + + target.dig("primaryContents", "sectionListRenderer", "contents").as_a.each do |node| + if node = node["itemSectionRenderer"]? + raw_items << node["contents"].as_a + end + end + + return raw_items.flatten + end + end + + # Extracts continuation items from a InnerTube response + # + # Continuation items (on YouTube) are items which are appended to the + # end of the page for continuous scrolling. As such, in many cases, + # the items are lacking information such as author or category title, + # since the original results has already rendered them on the top of the page. + # + # The way they are structured is too varied to be accurately written down here. + # However, they all eventually lead to an array of parsable items after traversing + # through the JSON structure. + module Continuation + def self.process(initial_data : Hash(String, JSON::Any)) + if target = initial_data["continuationContents"]? + self.extract(target) + elsif target = initial_data["appendContinuationItemsAction"]? + self.extract(target) + end + end + + private def self.extract(target) + raw_items = [] of JSON::Any + if content = target["gridContinuation"]? + raw_items = content["items"].as_a + elsif content = target["continuationItems"]? + raw_items = content.as_a + end + + return raw_items + end + end +end + +# Helper methods to aid in the parsing of InnerTube to data structs. +# +# Mostly used to extract out repeated structures to deal with code +# repetition. +private module HelperExtractors + # Retrieves the amount of videos present within the given InnerTube data. + # + # Returns a 0 when it's unable to do so + def self.get_video_count(container : JSON::Any) : Int32 + if box = container["videoCountText"]? + return extract_text(box).try &.gsub(/\D/, "").to_i || 0 + elsif box = container["videoCount"]? + return box.as_s.to_i + else + return 0 + end + end + + # Retrieve lowest quality thumbnail from InnerTube data + # + # TODO allow configuration of image quality (-1 is highest) + # + # Raises when it's unable to parse from the given JSON data. + def self.get_thumbnails(container : JSON::Any) : String + return container.dig("thumbnail", "thumbnails", 0, "url").as_s + end + + # ditto + # + # YouTube sometimes sends the thumbnail as: + # {"thumbnails": [{"thumbnails": [{"url": "example.com"}, ...]}]} + def self.get_thumbnails_plural(container : JSON::Any) : String + return container.dig("thumbnails", 0, "thumbnails", 0, "url").as_s + end + + # Retrieves the ID required for querying the InnerTube browse endpoint. + # Raises when it's unable to do so + def self.get_browse_id(container) + return container.dig("navigationEndpoint", "browseEndpoint", "browseId").as_s + end +end + +# Extracts text from InnerTube response +# +# InnerTube can package text in three different formats +# "runs": [ +# {"text": "something"}, +# {"text": "cont"}, +# ... +# ] +# +# "SimpleText": "something" +# +# Or sometimes just none at all as with the data returned from +# category continuations. +# +# In order to facilitate calling this function with `#[]?`: +# A nil will be accepted. Of course, since nil cannot be parsed, +# another nil will be returned. +def extract_text(item : JSON::Any?) : String? + if item.nil? + return nil + end + + if text_container = item["simpleText"]? + return text_container.as_s + elsif text_container = item["runs"]? + return text_container.as_a.map(&.["text"].as_s).join("") + else + nil + end +end + +# Parses an item from Youtube's JSON response into a more usable structure. +# The end result can either be a SearchVideo, SearchPlaylist or SearchChannel. +def extract_item(item : JSON::Any, author_fallback : String? = "", + author_id_fallback : String? = "") + # We "allow" nil values but secretly use empty strings instead. This is to save us the + # hassle of modifying every author_fallback and author_id_fallback arg usage + # which is more often than not nil. + author_fallback = AuthorFallback.new(author_fallback || "", author_id_fallback || "") + + # Cycles through all of the item parsers and attempt to parse the raw YT JSON data. + # Each parser automatically validates the data given to see if the data is + # applicable to itself. If not nil is returned and the next parser is attemped. + ITEM_PARSERS.each do |parser| + if result = parser.process(item, author_fallback) + return result + end + end +end + +# Parses multiple items from YouTube's initial JSON response into a more usable structure. +# The end result is an array of SearchItem. +def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, + author_id_fallback : String? = nil) : Array(SearchItem) + items = [] of SearchItem + + if unpackaged_data = initial_data["contents"]?.try &.as_h + elsif unpackaged_data = initial_data["response"]?.try &.as_h + elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h + else + unpackaged_data = initial_data + end + + # This is identical to the parser cycling of extract_item(). + ITEM_CONTAINER_EXTRACTOR.each do |extractor| + if container = extractor.process(unpackaged_data) + # Extract items in container + container.each do |item| + if parsed_result = extract_item(item, author_fallback, author_id_fallback) + items << parsed_result + end + end + + break + end + end + + return items +end diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index fb33df1c..99adcd30 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -248,168 +248,40 @@ def html_to_content(description_html : String) end def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) -end + extracted = extract_items(initial_data, author_fallback, author_id_fallback) -def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil) - if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?) - video_id = i["videoId"].as_s - title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" - - author_info = i["ownerText"]?.try &.["runs"]?.try &.as_a?.try &.[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local - view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || - i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]? - .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 - - live_now = false - premium = false - - premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } - - i["badges"]?.try &.as_a.each do |badge| - b = badge["metadataBadgeRenderer"] - case b["label"].as_s - when "LIVE NOW" - live_now = true - when "New", "4K", "CC" - # TODO - when "Premium" - # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] - premium = true - else nil # Ignore - end + target = [] of SearchItem + extracted.each do |i| + if i.is_a?(Category) + i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } + else + target << i end - - SearchVideo.new({ - title: title, - id: video_id, - author: author, - ucid: author_id, - published: published, - views: view_count, - description_html: description_html, - length_seconds: length_seconds, - live_now: live_now, - premium: premium, - premiere_timestamp: premiere_timestamp, - }) - elsif i = item["channelRenderer"]? - author = i["title"]["simpleText"]?.try &.as_s || author_fallback || "" - author_id = i["channelId"]?.try &.as_s || author_id_fallback || "" - - author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try &.["url"]?.try &.as_s || "" - subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 - - auto_generated = false - auto_generated = true if !i["videoCountText"]? - video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - - SearchChannel.new({ - author: author, - ucid: author_id, - author_thumbnail: author_thumbnail, - subscriber_count: subscriber_count, - video_count: video_count, - description_html: description_html, - auto_generated: auto_generated, - }) - elsif i = item["gridPlaylistRenderer"]? - title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" - - SearchPlaylist.new({ - title: title, - id: plid, - author: author_fallback || "", - ucid: author_id_fallback || "", - video_count: video_count, - videos: [] of SearchPlaylistVideo, - thumbnail: playlist_thumbnail, - }) - elsif i = item["playlistRenderer"]? - title = i["title"]["simpleText"]?.try &.as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCount"]?.try &.as_s.to_i || 0 - playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" - - author_info = i["shortBylineText"]?.try &.["runs"]?.try &.as_a?.try &.[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - videos = i["videos"]?.try &.as_a.map do |v| - v = v["childVideoRenderer"] - v_title = v["title"]["simpleText"]?.try &.as_s || "" - v_id = v["videoId"]?.try &.as_s || "" - v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 - SearchPlaylistVideo.new({ - title: v_title, - id: v_id, - length_seconds: v_length_seconds, - }) - end || [] of SearchPlaylistVideo - - # TODO: i["publishedTimeText"]? - - SearchPlaylist.new({ - title: title, - id: plid, - author: author, - ucid: author_id, - video_count: video_count, - videos: videos, - thumbnail: playlist_thumbnail, - }) - elsif i = item["radioRenderer"]? # Mix - # TODO - elsif i = item["showRenderer"]? # Show - # TODO - elsif i = item["shelfRenderer"]? - elsif i = item["horizontalCardListRenderer"]? - elsif i = item["searchPyvRenderer"]? # Ad end + return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) end -def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - items = [] of SearchItem +def extract_selected_tab(tabs) + # Extract the selected tab from the array of tabs Youtube returns + return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] +end - channel_v2_response = initial_data - .try &.["continuationContents"]? - .try &.["gridContinuation"]? - .try &.["items"]? +def fetch_continuation_token(items : Array(JSON::Any)) + # Fetches the continuation token from an array of items + return items.last["continuationItemRenderer"]? + .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s +end - if channel_v2_response - channel_v2_response.try &.as_a.each { |item| - extract_item(item, author_fallback, author_id_fallback) - .try { |t| items << t } - } +def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) + # Fetches the continuation token from initial data + if initial_data["onResponseReceivedActions"]? + continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] else - initial_data.try { |t| t["contents"]? || t["response"]? } - .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || - t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || - t["continuationContents"]? } - .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } - .try &.["contents"].as_a - .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a - .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || - t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t } - .each { |item| - extract_item(item, author_fallback, author_id_fallback) - .try { |t| items << t } - } } + tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) + continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] end - items + return fetch_continuation_token(continuation_items.as_a) end def check_enum(db, enum_name, struct_type = nil) diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr new file mode 100644 index 00000000..61356555 --- /dev/null +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -0,0 +1,256 @@ +struct SearchVideo + include DB::Serializable + + property title : String + property id : String + property author : String + property ucid : String + property published : Time + property views : Int64 + property description_html : String + property length_seconds : Int32 + property live_now : Bool + property premium : Bool + property premiere_timestamp : Time? + + def to_xml(auto_generated, query_params, xml : XML::Builder) + query_params["v"] = self.id + + xml.element("entry") do + xml.element("id") { xml.text "yt:video:#{self.id}" } + xml.element("yt:videoId") { xml.text self.id } + xml.element("yt:channelId") { xml.text self.ucid } + xml.element("title") { xml.text self.title } + xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}") + + xml.element("author") do + if auto_generated + xml.element("name") { xml.text self.author } + xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" } + else + xml.element("name") { xml.text author } + xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" } + end + end + + xml.element("content", type: "xhtml") do + xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do + xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do + xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg") + end + + xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) } + end + end + + xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } + + xml.element("media:group") do + xml.element("media:title") { xml.text self.title } + xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg", + width: "320", height: "180") + xml.element("media:description") { xml.text html_to_content(self.description_html) } + end + + xml.element("media:community") do + xml.element("media:statistics", views: self.views) + end + end + end + + def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil) + if xml + to_xml(HOST_URL, auto_generated, query_params, xml) + else + XML.build do |json| + to_xml(HOST_URL, auto_generated, query_params, xml) + end + end + end + + def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder) + json.object do + json.field "type", "video" + json.field "title", self.title + json.field "videoId", self.id + + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "videoThumbnails" do + generate_thumbnails(json, self.id) + end + + json.field "description", html_to_content(self.description_html) + json.field "descriptionHtml", self.description_html + + json.field "viewCount", self.views + json.field "published", self.published.to_unix + json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) + json.field "lengthSeconds", self.length_seconds + json.field "liveNow", self.live_now + json.field "premium", self.premium + json.field "isUpcoming", self.is_upcoming + + if self.premiere_timestamp + json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix + end + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end + + def is_upcoming + premiere_timestamp ? true : false + end +end + +struct SearchPlaylistVideo + include DB::Serializable + + property title : String + property id : String + property length_seconds : Int32 +end + +struct SearchPlaylist + include DB::Serializable + + property title : String + property id : String + property author : String + property ucid : String + property video_count : Int32 + property videos : Array(SearchPlaylistVideo) + property thumbnail : String? + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "type", "playlist" + json.field "title", self.title + json.field "playlistId", self.id + json.field "playlistThumbnail", self.thumbnail + + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "videoCount", self.video_count + json.field "videos" do + json.array do + self.videos.each do |video| + json.object do + json.field "title", video.title + json.field "videoId", video.id + json.field "lengthSeconds", video.length_seconds + + json.field "videoThumbnails" do + generate_thumbnails(json, video.id) + end + end + end + end + end + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +struct SearchChannel + include DB::Serializable + + property author : String + property ucid : String + property author_thumbnail : String + property subscriber_count : Int32 + property video_count : Int32 + property description_html : String + property auto_generated : Bool + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "type", "channel" + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "authorThumbnails" do + json.array do + qualities = {32, 48, 76, 100, 176, 512} + + qualities.each do |quality| + json.object do + json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}") + json.field "width", quality + json.field "height", quality + end + end + end + end + + json.field "autoGenerated", self.auto_generated + json.field "subCount", self.subscriber_count + json.field "videoCount", self.video_count + + json.field "description", html_to_content(self.description_html) + json.field "descriptionHtml", self.description_html + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +class Category + include DB::Serializable + + property title : String + property contents : Array(SearchItem) | Array(Video) + property url : String? + property description_html : String + property badges : Array(Tuple(String, String))? + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "title", self.title + json.field "contents", self.contents + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category diff --git a/src/invidious/search.cr b/src/invidious/search.cr index a3fcc7a3..d95d802e 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -1,233 +1,3 @@ -struct SearchVideo - include DB::Serializable - - property title : String - property id : String - property author : String - property ucid : String - property published : Time - property views : Int64 - property description_html : String - property length_seconds : Int32 - property live_now : Bool - property premium : Bool - property premiere_timestamp : Time? - - def to_xml(auto_generated, query_params, xml : XML::Builder) - query_params["v"] = self.id - - xml.element("entry") do - xml.element("id") { xml.text "yt:video:#{self.id}" } - xml.element("yt:videoId") { xml.text self.id } - xml.element("yt:channelId") { xml.text self.ucid } - xml.element("title") { xml.text self.title } - xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}") - - xml.element("author") do - if auto_generated - xml.element("name") { xml.text self.author } - xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" } - else - xml.element("name") { xml.text author } - xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" } - end - end - - xml.element("content", type: "xhtml") do - xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do - xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do - xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg") - end - - xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) } - end - end - - xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } - - xml.element("media:group") do - xml.element("media:title") { xml.text self.title } - xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg", - width: "320", height: "180") - xml.element("media:description") { xml.text html_to_content(self.description_html) } - end - - xml.element("media:community") do - xml.element("media:statistics", views: self.views) - end - end - end - - def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil) - if xml - to_xml(HOST_URL, auto_generated, query_params, xml) - else - XML.build do |json| - to_xml(HOST_URL, auto_generated, query_params, xml) - end - end - end - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "video" - json.field "title", self.title - json.field "videoId", self.id - - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "videoThumbnails" do - generate_thumbnails(json, self.id) - end - - json.field "description", html_to_content(self.description_html) - json.field "descriptionHtml", self.description_html - - json.field "viewCount", self.views - json.field "published", self.published.to_unix - json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) - json.field "lengthSeconds", self.length_seconds - json.field "liveNow", self.live_now - json.field "premium", self.premium - json.field "isUpcoming", self.is_upcoming - - if self.premiere_timestamp - json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix - end - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end - - def is_upcoming - premiere_timestamp ? true : false - end -end - -struct SearchPlaylistVideo - include DB::Serializable - - property title : String - property id : String - property length_seconds : Int32 -end - -struct SearchPlaylist - include DB::Serializable - - property title : String - property id : String - property author : String - property ucid : String - property video_count : Int32 - property videos : Array(SearchPlaylistVideo) - property thumbnail : String? - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "playlist" - json.field "title", self.title - json.field "playlistId", self.id - json.field "playlistThumbnail", self.thumbnail - - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "videoCount", self.video_count - json.field "videos" do - json.array do - self.videos.each do |video| - json.object do - json.field "title", video.title - json.field "videoId", video.id - json.field "lengthSeconds", video.length_seconds - - json.field "videoThumbnails" do - generate_thumbnails(json, video.id) - end - end - end - end - end - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end -end - -struct SearchChannel - include DB::Serializable - - property author : String - property ucid : String - property author_thumbnail : String - property subscriber_count : Int32 - property video_count : Int32 - property description_html : String - property auto_generated : Bool - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "channel" - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "authorThumbnails" do - json.array do - qualities = {32, 48, 76, 100, 176, 512} - - qualities.each do |quality| - json.object do - json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}") - json.field "width", quality - json.field "height", quality - end - end - end - end - - json.field "autoGenerated", self.auto_generated - json.field "subCount", self.subscriber_count - json.field "videoCount", self.video_count - - json.field "description", html_to_content(self.description_html) - json.field "descriptionHtml", self.description_html - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end -end - -alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist - def channel_search(query, page, channel) response = YT_POOL.client &.get("/channel/#{channel}") @@ -462,5 +232,20 @@ def process_search_query(query, page, user, region) count, items = search(search_query, search_params, region).as(Tuple) end - {search_query, count, items, operators} + # Light processing to flatten search results out of Categories. + # They should ideally be supported in the future. + items_without_category = [] of SearchItem | ChannelVideo + items.each do |i| + if i.is_a? Category + i.contents.each do |nest_i| + if !nest_i.is_a? Video + items_without_category << nest_i + end + end + else + items_without_category << i + end + end + + {search_query, items_without_category.size, items_without_category, operators} end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index d9c07142..0e6bd77c 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -275,7 +275,7 @@ struct Video end end - def to_json(locale, json : JSON::Builder) + def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder) json.object do json.field "type", "video" diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr index 3391bd17..84da1091 100644 --- a/src/invidious/views/components/item.ecr +++ b/src/invidious/views/components/item.ecr @@ -109,6 +109,7 @@ <% end %> + <% when Category %> <% else %> <% if !env.get("preferences").as(Preferences).thin_mode %>