From a50f64f6e9ab55efa9301915817b11f152625f22 Mon Sep 17 00:00:00 2001 From: syeopite Date: Fri, 7 May 2021 05:13:53 -0700 Subject: [PATCH] Add parser for categories (shelfRenderer) This commit adds a new parser for YT's shelfRenderers which are typically used to denote different categories.The code for featured channels parsing has also been moved to use the new parser but some additional refactoring are needed there. The ContinuationExtractor has also been improved and is now capable of extraction continuation data that is packaged under "appendContinuationItemsAction" In additional this commit adds some useful helper functions to extract the current selected tab the continuation token. This is to mainly reduce code size and repetition. -- This cherry-picked commit also removes the code for parsing featured channels present on the original. (cherry picked from commit 8000d538dbbf1eb9c78e000b1449926ba3b24da9) --- src/invidious/helpers/extractors.cr | 117 +++++++++-- src/invidious/helpers/helpers.cr | 29 ++- src/invidious/helpers/invidiousitems.cr | 256 ++++++++++++++++++++++++ src/invidious/search.cr | 230 --------------------- src/invidious/views/components/item.ecr | 1 + 5 files changed, 389 insertions(+), 244 deletions(-) create mode 100644 src/invidious/helpers/invidiousitems.cr diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr index e8daa913..1fa06c91 100644 --- a/src/invidious/helpers/extractors.cr +++ b/src/invidious/helpers/extractors.cr @@ -13,6 +13,7 @@ private ITEM_PARSERS = { ChannelParser.new, GridPlaylistParser.new, PlaylistParser.new, + CategoryParser.new, } private struct AuthorFallback @@ -95,7 +96,7 @@ end private class ChannelParser < ItemParser def process(item, author_fallback) - if item_contents = item["channelRenderer"]? + if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?) return self.parse(item_contents, author_fallback) end end @@ -194,6 +195,88 @@ private class PlaylistParser < ItemParser end end +private class CategoryParser < ItemParser + def process(item, author_fallback) + if item_contents = item["shelfRenderer"]? + return self.parse(item_contents, author_fallback) + end + end + + def parse(item_contents, author_fallback) + # Title extraction is a bit complicated. There are two possible routes for it + # as well as times when the title attribute just isn't sent by YT. + + title_container = item_contents["title"]? || "" + if !title_container.is_a? String + if title = title_container["simpleText"]? + title = title.as_s + else + title = title_container["runs"][0]["text"].as_s + end + else + title = "" + end + + browse_endpoint = item_contents["endpoint"]?.try &.["browseEndpoint"] || nil + browse_endpoint_data = "" + category_type = 0 # 0: Video, 1: Channels, 2: Playlist/feed, 3: trending + + # There's no endpoint data for video and trending category + if !item_contents["endpoint"]? + if !item_contents["videoId"]? + category_type = 3 + end + end + + if !browse_endpoint.nil? + # Playlist/feed categories doesn't need the params value (nor is it even included in yt response) + # instead it uses the browseId parameter. So if there isn't a params value we can assume the + # category is a playlist/feed + if browse_endpoint["params"]? + browse_endpoint_data = browse_endpoint["params"].as_s + category_type = 1 + else + browse_endpoint_data = browse_endpoint["browseId"].as_s + category_type = 2 + end + end + + # Sometimes a category can have badges. + badges = [] of Tuple(String, String) # (Badge style, label) + item_contents["badges"]?.try &.as_a.each do |badge| + badge = badge["metadataBadgeRenderer"] + badges << {badge["style"].as_s, badge["label"].as_s} + end + + # Content parsing + contents = [] of SearchItem + + # Content could be in three locations. + if content_container = item_contents["content"]["horizontalListRenderer"]? + elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"] + elsif content_container = item_contents["content"]["verticalListRenderer"] + else + content_container = item_contents["contents"] + end + + raw_contents = content_container["items"].as_a + raw_contents.each do |item| + result = extract_item(item) + if !result.nil? + contents << result + end + end + + Category.new({ + title: title, + contents: contents, + browse_endpoint_data: browse_endpoint_data, + continuation_token: nil, + badges: badges, + }) + end +end + # The following are the extractors for extracting an array of items from # the internal Youtube API's JSON response. The result is then packaged into # a structure we can more easily use via the parsers above. Their internals are @@ -217,19 +300,16 @@ private class YoutubeTabsExtractor < ItemsContainerExtractor private def extract(target) raw_items = [] of JSON::Any selected_tab = extract_selected_tab(target["tabs"]) - content = selected_tab["tabRenderer"]["content"] + content = selected_tab["content"] content["sectionListRenderer"]["contents"].as_a.each do |renderer_container| renderer_container = renderer_container["itemSectionRenderer"] renderer_container_contents = renderer_container["contents"].as_a[0] - # Shelf renderer usually refer to a category and would need special handling once - # An extractor for categories are added. But for now it is just used to - # extract items for the trending page + # Category extraction if items_container = renderer_container_contents["shelfRenderer"]? - if items_container["content"]["expandedShelfContentsRenderer"]? - items_container = items_container["content"]["expandedShelfContentsRenderer"] - end + raw_items << renderer_container_contents + next elsif items_container = renderer_container_contents["gridRenderer"]? else items_container = renderer_container_contents @@ -265,6 +345,8 @@ private class ContinuationExtractor < ItemsContainerExtractor def process(initial_data) if target = initial_data["continuationContents"]? self.extract(target) + elsif target = initial_data["appendContinuationItemsAction"]? + self.extract(target) end end @@ -272,13 +354,16 @@ private class ContinuationExtractor < ItemsContainerExtractor raw_items = [] of JSON::Any if content = target["gridContinuation"]? raw_items = content["items"].as_a + elsif content = target["continuationItems"]? + raw_items = content.as_a end return raw_items end end -def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil) +def extract_item(item : JSON::Any, author_fallback : String? = nil, + author_id_fallback : String? = nil) # Parses an item from Youtube's JSON response into a more usable structure. # The end result can either be a SearchVideo, SearchPlaylist or SearchChannel. author_fallback = AuthorFallback.new(author_fallback, author_id_fallback) @@ -295,13 +380,20 @@ def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fa # TODO radioRenderer, showRenderer, shelfRenderer, horizontalCardListRenderer, searchPyvRenderer end -def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) +def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, + author_id_fallback : String? = nil) items = [] of SearchItem - initial_data = initial_data["contents"]?.try &.as_h || initial_data["response"]?.try &.as_h || initial_data + + if unpackaged_data = initial_data["contents"]?.try &.as_h + elsif unpackaged_data = initial_data["response"]?.try &.as_h + elsif unpackaged_data = initial_data["onResponseReceivedActions"]?.try &.as_a.[0].as_h + else + unpackaged_data = initial_data + end # This is identicial to the parser cyling of extract_item(). ITEM_CONTAINER_EXTRACTOR.each do |extractor| - results = extractor.process(initial_data) + results = extractor.process(unpackaged_data) if !results.nil? results.each do |item| parsed_result = extract_item(item, author_fallback, author_id_fallback) @@ -310,6 +402,7 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri items << parsed_result end end + return items end end diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 1a058195..a52c7bd4 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -248,12 +248,37 @@ def html_to_content(description_html : String) end def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) + extracted = extract_items(initial_data, author_fallback, author_id_fallback) + + if extracted.is_a?(Category) + target = extracted.contents + else + target = extracted + end + return target.select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) end def extract_selected_tab(tabs) # Extract the selected tab from the array of tabs Youtube returns - return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0] + return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]["tabRenderer"] +end + +def fetch_continuation_token(items : Array(JSON::Any)) + # Fetches the continuation token from an array of items + return items.last["continuationItemRenderer"]? + .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s +end + +def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) + # Fetches the continuation token from initial data + if initial_data["onResponseReceivedActions"]? + continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] + else + tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) + continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] + end + + return fetch_continuation_token(continuation_items.as_a) end def check_enum(db, enum_name, struct_type = nil) diff --git a/src/invidious/helpers/invidiousitems.cr b/src/invidious/helpers/invidiousitems.cr new file mode 100644 index 00000000..50a47726 --- /dev/null +++ b/src/invidious/helpers/invidiousitems.cr @@ -0,0 +1,256 @@ +struct SearchVideo + include DB::Serializable + + property title : String + property id : String + property author : String + property ucid : String + property published : Time + property views : Int64 + property description_html : String + property length_seconds : Int32 + property live_now : Bool + property premium : Bool + property premiere_timestamp : Time? + + def to_xml(auto_generated, query_params, xml : XML::Builder) + query_params["v"] = self.id + + xml.element("entry") do + xml.element("id") { xml.text "yt:video:#{self.id}" } + xml.element("yt:videoId") { xml.text self.id } + xml.element("yt:channelId") { xml.text self.ucid } + xml.element("title") { xml.text self.title } + xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}") + + xml.element("author") do + if auto_generated + xml.element("name") { xml.text self.author } + xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" } + else + xml.element("name") { xml.text author } + xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" } + end + end + + xml.element("content", type: "xhtml") do + xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do + xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do + xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg") + end + + xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) } + end + end + + xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } + + xml.element("media:group") do + xml.element("media:title") { xml.text self.title } + xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg", + width: "320", height: "180") + xml.element("media:description") { xml.text html_to_content(self.description_html) } + end + + xml.element("media:community") do + xml.element("media:statistics", views: self.views) + end + end + end + + def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil) + if xml + to_xml(HOST_URL, auto_generated, query_params, xml) + else + XML.build do |json| + to_xml(HOST_URL, auto_generated, query_params, xml) + end + end + end + + def to_json(locale : Hash(String, JSON::Any), json : JSON::Builder) + json.object do + json.field "type", "video" + json.field "title", self.title + json.field "videoId", self.id + + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "videoThumbnails" do + generate_thumbnails(json, self.id) + end + + json.field "description", html_to_content(self.description_html) + json.field "descriptionHtml", self.description_html + + json.field "viewCount", self.views + json.field "published", self.published.to_unix + json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) + json.field "lengthSeconds", self.length_seconds + json.field "liveNow", self.live_now + json.field "premium", self.premium + json.field "isUpcoming", self.is_upcoming + + if self.premiere_timestamp + json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix + end + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end + + def is_upcoming + premiere_timestamp ? true : false + end +end + +struct SearchPlaylistVideo + include DB::Serializable + + property title : String + property id : String + property length_seconds : Int32 +end + +struct SearchPlaylist + include DB::Serializable + + property title : String + property id : String + property author : String + property ucid : String + property video_count : Int32 + property videos : Array(SearchPlaylistVideo) + property thumbnail : String? + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "type", "playlist" + json.field "title", self.title + json.field "playlistId", self.id + json.field "playlistThumbnail", self.thumbnail + + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "videoCount", self.video_count + json.field "videos" do + json.array do + self.videos.each do |video| + json.object do + json.field "title", video.title + json.field "videoId", video.id + json.field "lengthSeconds", video.length_seconds + + json.field "videoThumbnails" do + generate_thumbnails(json, video.id) + end + end + end + end + end + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +struct SearchChannel + include DB::Serializable + + property author : String + property ucid : String + property author_thumbnail : String + property subscriber_count : Int32 + property video_count : Int32 + property description_html : String + property auto_generated : Bool + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "type", "channel" + json.field "author", self.author + json.field "authorId", self.ucid + json.field "authorUrl", "/channel/#{self.ucid}" + + json.field "authorThumbnails" do + json.array do + qualities = {32, 48, 76, 100, 176, 512} + + qualities.each do |quality| + json.object do + json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}") + json.field "width", quality + json.field "height", quality + end + end + end + end + + json.field "autoGenerated", self.auto_generated + json.field "subCount", self.subscriber_count + json.field "videoCount", self.video_count + + json.field "description", html_to_content(self.description_html) + json.field "descriptionHtml", self.description_html + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +class Category + include DB::Serializable + + property title : String + property contents : Array(SearchItem) | SearchItem + property browse_endpoint_data : String? + property continuation_token : String? + property badges : Array(Tuple(String, String))? + + def to_json(locale, json : JSON::Builder) + json.object do + json.field "title", self.title + json.field "contents", self.contents + end + end + + def to_json(locale, json : JSON::Builder | Nil = nil) + if json + to_json(locale, json) + else + JSON.build do |json| + to_json(locale, json) + end + end + end +end + +alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category diff --git a/src/invidious/search.cr b/src/invidious/search.cr index a3fcc7a3..eb9c37c5 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -1,233 +1,3 @@ -struct SearchVideo - include DB::Serializable - - property title : String - property id : String - property author : String - property ucid : String - property published : Time - property views : Int64 - property description_html : String - property length_seconds : Int32 - property live_now : Bool - property premium : Bool - property premiere_timestamp : Time? - - def to_xml(auto_generated, query_params, xml : XML::Builder) - query_params["v"] = self.id - - xml.element("entry") do - xml.element("id") { xml.text "yt:video:#{self.id}" } - xml.element("yt:videoId") { xml.text self.id } - xml.element("yt:channelId") { xml.text self.ucid } - xml.element("title") { xml.text self.title } - xml.element("link", rel: "alternate", href: "#{HOST_URL}/watch?#{query_params}") - - xml.element("author") do - if auto_generated - xml.element("name") { xml.text self.author } - xml.element("uri") { xml.text "#{HOST_URL}/channel/#{self.ucid}" } - else - xml.element("name") { xml.text author } - xml.element("uri") { xml.text "#{HOST_URL}/channel/#{ucid}" } - end - end - - xml.element("content", type: "xhtml") do - xml.element("div", xmlns: "http://www.w3.org/1999/xhtml") do - xml.element("a", href: "#{HOST_URL}/watch?#{query_params}") do - xml.element("img", src: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg") - end - - xml.element("p", style: "word-break:break-word;white-space:pre-wrap") { xml.text html_to_content(self.description_html) } - end - end - - xml.element("published") { xml.text self.published.to_s("%Y-%m-%dT%H:%M:%S%:z") } - - xml.element("media:group") do - xml.element("media:title") { xml.text self.title } - xml.element("media:thumbnail", url: "#{HOST_URL}/vi/#{self.id}/mqdefault.jpg", - width: "320", height: "180") - xml.element("media:description") { xml.text html_to_content(self.description_html) } - end - - xml.element("media:community") do - xml.element("media:statistics", views: self.views) - end - end - end - - def to_xml(auto_generated, query_params, xml : XML::Builder | Nil = nil) - if xml - to_xml(HOST_URL, auto_generated, query_params, xml) - else - XML.build do |json| - to_xml(HOST_URL, auto_generated, query_params, xml) - end - end - end - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "video" - json.field "title", self.title - json.field "videoId", self.id - - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "videoThumbnails" do - generate_thumbnails(json, self.id) - end - - json.field "description", html_to_content(self.description_html) - json.field "descriptionHtml", self.description_html - - json.field "viewCount", self.views - json.field "published", self.published.to_unix - json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) - json.field "lengthSeconds", self.length_seconds - json.field "liveNow", self.live_now - json.field "premium", self.premium - json.field "isUpcoming", self.is_upcoming - - if self.premiere_timestamp - json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix - end - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end - - def is_upcoming - premiere_timestamp ? true : false - end -end - -struct SearchPlaylistVideo - include DB::Serializable - - property title : String - property id : String - property length_seconds : Int32 -end - -struct SearchPlaylist - include DB::Serializable - - property title : String - property id : String - property author : String - property ucid : String - property video_count : Int32 - property videos : Array(SearchPlaylistVideo) - property thumbnail : String? - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "playlist" - json.field "title", self.title - json.field "playlistId", self.id - json.field "playlistThumbnail", self.thumbnail - - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "videoCount", self.video_count - json.field "videos" do - json.array do - self.videos.each do |video| - json.object do - json.field "title", video.title - json.field "videoId", video.id - json.field "lengthSeconds", video.length_seconds - - json.field "videoThumbnails" do - generate_thumbnails(json, video.id) - end - end - end - end - end - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end -end - -struct SearchChannel - include DB::Serializable - - property author : String - property ucid : String - property author_thumbnail : String - property subscriber_count : Int32 - property video_count : Int32 - property description_html : String - property auto_generated : Bool - - def to_json(locale, json : JSON::Builder) - json.object do - json.field "type", "channel" - json.field "author", self.author - json.field "authorId", self.ucid - json.field "authorUrl", "/channel/#{self.ucid}" - - json.field "authorThumbnails" do - json.array do - qualities = {32, 48, 76, 100, 176, 512} - - qualities.each do |quality| - json.object do - json.field "url", self.author_thumbnail.gsub(/=\d+/, "=s#{quality}") - json.field "width", quality - json.field "height", quality - end - end - end - end - - json.field "autoGenerated", self.auto_generated - json.field "subCount", self.subscriber_count - json.field "videoCount", self.video_count - - json.field "description", html_to_content(self.description_html) - json.field "descriptionHtml", self.description_html - end - end - - def to_json(locale, json : JSON::Builder | Nil = nil) - if json - to_json(locale, json) - else - JSON.build do |json| - to_json(locale, json) - end - end - end -end - -alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist - def channel_search(query, page, channel) response = YT_POOL.client &.get("/channel/#{channel}") diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr index 68aa1812..ec282216 100644 --- a/src/invidious/views/components/item.ecr +++ b/src/invidious/views/components/item.ecr @@ -96,6 +96,7 @@ <% end %> + <% when Category %> <% else %> <% if !env.get("preferences").as(Preferences).thin_mode %>