From ce7db8d2cb87111af15de2de9faf12aae38283bb Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Sat, 5 Nov 2022 18:56:35 +0100 Subject: [PATCH] extractors: Add continuation token parser --- spec/invidious/hashtag_spec.cr | 4 +- src/invidious/channels/playlists.cr | 16 +----- src/invidious/hashtag.cr | 3 +- src/invidious/helpers/serialized_yt_data.cr | 7 +++ src/invidious/search/processors.cr | 14 ++--- src/invidious/yt_backend/extractors.cr | 54 +++++++++++++++----- src/invidious/yt_backend/extractors_utils.cr | 27 ++-------- 7 files changed, 63 insertions(+), 62 deletions(-) diff --git a/spec/invidious/hashtag_spec.cr b/spec/invidious/hashtag_spec.cr index 77676878..266ec57b 100644 --- a/spec/invidious/hashtag_spec.cr +++ b/spec/invidious/hashtag_spec.cr @@ -4,7 +4,7 @@ Spectator.describe Invidious::Hashtag do it "parses richItemRenderer containers (test 1)" do # Enable mock test_content = load_mock("hashtag/martingarrix_page1") - videos = extract_items(test_content) + videos, _ = extract_items(test_content) expect(typeof(videos)).to eq(Array(SearchItem)) expect(videos.size).to eq(60) @@ -57,7 +57,7 @@ Spectator.describe Invidious::Hashtag do it "parses richItemRenderer containers (test 2)" do # Enable mock test_content = load_mock("hashtag/martingarrix_page2") - videos = extract_items(test_content) + videos, _ = extract_items(test_content) expect(typeof(videos)).to eq(Array(SearchItem)) expect(videos.size).to eq(60) diff --git a/src/invidious/channels/playlists.cr b/src/invidious/channels/playlists.cr index e6c0a1d5..0d46499a 100644 --- a/src/invidious/channels/playlists.cr +++ b/src/invidious/channels/playlists.cr @@ -1,18 +1,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by) if continuation response_json = YoutubeAPI.browse(continuation) - continuation_items = response_json["onResponseReceivedActions"]? - .try &.[0]["appendContinuationItemsAction"]["continuationItems"] - - return [] of SearchItem, nil if !continuation_items - - items = [] of SearchItem - continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item| - parse_item(item, author, ucid).try { |t| items << t } - } - - continuation = continuation_items.as_a.last["continuationItemRenderer"]? - .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s + items, continuation = extract_items(response_json, author, ucid) else url = "/channel/#{ucid}/playlists?flow=list&view=1" @@ -30,8 +19,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by) initial_data = extract_initial_data(response.body) return [] of SearchItem, nil if !initial_data - items = extract_items(initial_data, author, ucid) - continuation = response.body.match(/"token":"(?[^"]+)"/).try &.["continuation"]? + items, continuation = extract_items(initial_data, author, ucid) end return items, continuation diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr index afe31a36..bc329205 100644 --- a/src/invidious/hashtag.cr +++ b/src/invidious/hashtag.cr @@ -8,7 +8,8 @@ module Invidious::Hashtag client_config = YoutubeAPI::ClientConfig.new(region: region) response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config) - return extract_items(response) + items, _ = extract_items(response) + return items end def generate_continuation(hashtag : String, cursor : Int) diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index c52e2a0d..635f0984 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -265,4 +265,11 @@ class Category end end +struct Continuation + getter token + + def initialize(@token : String) + end +end + alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr index 683a4a7e..7e909590 100644 --- a/src/invidious/search/processors.cr +++ b/src/invidious/search/processors.cr @@ -9,7 +9,8 @@ module Invidious::Search client_config = YoutubeAPI::ClientConfig.new(region: query.region) initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config) - return extract_items(initial_data) + items, _ = extract_items(initial_data) + return items end # Search a youtube channel @@ -30,16 +31,7 @@ module Invidious::Search continuation = produce_channel_search_continuation(ucid, query.text, query.page) response_json = YoutubeAPI.browse(continuation) - continuation_items = response_json["onResponseReceivedActions"]? - .try &.[0]["appendContinuationItemsAction"]["continuationItems"] - - return [] of SearchItem if !continuation_items - - items = [] of SearchItem - continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item| - parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t } - end - + items, _ = extract_items(response_json, "", ucid) return items end diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index a4b20d04..baf52118 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -7,7 +7,7 @@ require "../helpers/serialized_yt_data" private ITEM_CONTAINER_EXTRACTOR = { Extractors::YouTubeTabs, Extractors::SearchResults, - Extractors::Continuation, + Extractors::ContinuationContent, } private ITEM_PARSERS = { @@ -18,6 +18,7 @@ private ITEM_PARSERS = { Parsers::CategoryRendererParser, Parsers::RichItemRendererParser, Parsers::ReelItemRendererParser, + Parsers::ContinuationItemRendererParser, } private alias InitialData = Hash(String, JSON::Any) @@ -347,14 +348,9 @@ private module Parsers content_container = item_contents["contents"] end - raw_contents = content_container["items"]?.try &.as_a - if !raw_contents.nil? - raw_contents.each do |item| - result = parse_item(item) - if !result.nil? - contents << result - end - end + content_container["items"]?.try &.as_a.each do |item| + result = parse_item(item, author_fallback.name, author_fallback.id) + contents << result if result.is_a?(SearchItem) end Category.new({ @@ -477,6 +473,35 @@ private module Parsers return {{@type.name}} end end + + # Parses an InnerTube continuationItemRenderer into a Continuation. + # Returns nil when the given object isn't a continuationItemRenderer. + # + # continuationItemRenderer contains various metadata ued to load more + # content (i.e when the user scrolls down). The interesting bit is the + # protobuf object known as the "continutation token". Previously, those + # were generated from sratch, but recent (as of 11/2022) Youtube changes + # are forcing us to extract them from replies. + # + module ContinuationItemRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["continuationItemRenderer"]? + return self.parse(item_contents) + end + end + + private def self.parse(item_contents) + token = item_contents + .dig?("continuationEndpoint", "continuationCommand", "token") + .try &.as_s + + return Continuation.new(token) if token + end + + def self.parser_name + return {{@type.name}} + end + end end # The following are the extractors for extracting an array of items from @@ -746,13 +771,18 @@ def extract_items( initial_data : InitialData, author_fallback : String? = nil, author_id_fallback : String? = nil -) : Array(SearchItem) +) : {Array(SearchItem), String?} items = [] of SearchItem + continuation = nil extract_items(initial_data) do |item| parsed = parse_item(item, author_fallback, author_id_fallback) - items << parsed if !parsed.nil? + + case parsed + when .is_a?(Continuation) then continuation = parsed.token + when .is_a?(SearchItem) then items << parsed + end end - return items + return items, continuation end diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr index f8245160..0cb3c079 100644 --- a/src/invidious/yt_backend/extractors_utils.cr +++ b/src/invidious/yt_backend/extractors_utils.cr @@ -68,10 +68,10 @@ rescue ex return false end -def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) - extracted = extract_items(initial_data, author_fallback, author_id_fallback) +def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) : Array(SearchVideo) + extracted, _ = extract_items(initial_data, author_fallback, author_id_fallback) - target = [] of SearchItem + target = [] of (SearchItem | Continuation) extracted.each do |i| if i.is_a?(Category) i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video } @@ -79,28 +79,11 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str target << i end end - return target.select(SearchVideo).map(&.as(SearchVideo)) + + return target.select(SearchVideo) end def extract_selected_tab(tabs) # Extract the selected tab from the array of tabs Youtube returns return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"] end - -def fetch_continuation_token(items : Array(JSON::Any)) - # Fetches the continuation token from an array of items - return items.last["continuationItemRenderer"]? - .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s -end - -def fetch_continuation_token(initial_data : Hash(String, JSON::Any)) - # Fetches the continuation token from initial data - if initial_data["onResponseReceivedActions"]? - continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"] - else - tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]) - continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"] - end - - return fetch_continuation_token(continuation_items.as_a) -end