From 43bd331e48ad1a19cd3c7a6d5beb72e3127c5edc Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Thu, 1 Apr 2021 02:36:43 +0000 Subject: [PATCH 1/5] Multiple youtube_api.cr helper fixes Add documentation Bump web client version string Add charset=UTF-8 to the 'content-type' header Parse JSON and return it as a Hash Handle API error messages --- src/invidious/channels.cr | 40 +++++++--------------------- src/invidious/helpers/youtube_api.cr | 29 +++++++++++++++++--- src/invidious/playlists.cr | 2 +- src/invidious/search.cr | 3 +-- 4 files changed, 36 insertions(+), 38 deletions(-) diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index 3109b508..bbef3d4f 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -229,22 +229,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) page = 1 LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page") - response_body = get_channel_videos_response(ucid, page, auto_generated: auto_generated) - - videos = [] of SearchVideo - begin - initial_data = JSON.parse(response_body) - raise InfoException.new("Could not extract channel JSON") if !initial_data - - LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel videos page initial_data") - videos = extract_videos(initial_data.as_h, author, ucid) - rescue ex - if response_body.includes?("To continue with your YouTube experience, please fill out the form below.") || - response_body.includes?("https://www.google.com/sorry/index") - raise InfoException.new("Could not extract channel info. Instance is likely blocked.") - end - raise ex - end + initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) + videos = extract_videos(initial_data, author, ucid) LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") rss.xpath_nodes("//feed/entry").each do |entry| @@ -304,10 +290,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) ids = [] of String loop do - response_body = get_channel_videos_response(ucid, page, auto_generated: auto_generated) - initial_data = JSON.parse(response_body) - raise InfoException.new("Could not extract channel JSON") if !initial_data - videos = extract_videos(initial_data.as_h, author, ucid) + initial_data = get_channel_videos_response(ucid, page, auto_generated: auto_generated) + videos = extract_videos(initial_data, author, ucid) count = videos.size videos = videos.map { |video| ChannelVideo.new({ @@ -358,8 +342,7 @@ end def fetch_channel_playlists(ucid, author, continuation, sort_by) if continuation response_json = request_youtube_api_browse(continuation) - result = JSON.parse(response_json) - continuationItems = result["onResponseReceivedActions"]? + continuationItems = response_json["onResponseReceivedActions"]? .try &.[0]["appendContinuationItemsAction"]["continuationItems"] return [] of SearchItem, nil if !continuationItems @@ -964,21 +947,16 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") videos = [] of SearchVideo 2.times do |i| - response_json = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - initial_data = JSON.parse(response_json) - break if !initial_data - videos.concat extract_videos(initial_data.as_h, author, ucid) + initial_data = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) + videos.concat extract_videos(initial_data, author, ucid) end return videos.size, videos end def get_latest_videos(ucid) - response_json = get_channel_videos_response(ucid) - initial_data = JSON.parse(response_json) - return [] of SearchVideo if !initial_data + initial_data = get_channel_videos_response(ucid) author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s - items = extract_videos(initial_data.as_h, author, ucid) - return items + return extract_videos(initial_data, author, ucid) end diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index 30413532..84e0c38f 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -4,8 +4,18 @@ # Hard-coded constants required by the API HARDCODED_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" -HARDCODED_CLIENT_VERS = "2.20210318.08.00" +HARDCODED_CLIENT_VERS = "2.20210330.08.00" +#################################################################### +# request_youtube_api_browse(continuation) +# +# Requests the youtubei/vi/browse endpoint with the required headers +# to get JSON in en-US (english US). +# +# The requested data is a continuation token (ctoken). Depending on +# this token's contents, the returned data can be comments, playlist +# videos, search results, channel community tab, ... +# def request_youtube_api_browse(continuation) # JSON Request data, required by the API data = { @@ -20,12 +30,23 @@ def request_youtube_api_browse(continuation) "continuation": continuation, } - # Send the POST request and return result + # Send the POST request and parse result response = YT_POOL.client &.post( "/youtubei/v1/browse?key=#{HARDCODED_API_KEY}", - headers: HTTP::Headers{"content-type" => "application/json"}, + headers: HTTP::Headers{"content-type" => "application/json; charset=UTF-8"}, body: data.to_json ) - return response.body + initial_data = JSON.parse(response.body).as_h + + # Error handling + if initial_data.has_key?("error") + code = initial_data["error"]["code"] + message = initial_data["error"]["message"].to_s.sub(/(\\n)+\^$/, "") + + raise InfoException.new("Could not extract JSON. Youtube API returned \ + error #{code} with message:
\"#{message}\"") + end + + return initial_data end diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index 073a9986..150f1c15 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -451,7 +451,7 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil) offset = (offset / 100).to_i64 * 100_i64 ctoken = produce_playlist_continuation(playlist.id, offset) - initial_data = JSON.parse(request_youtube_api_browse(ctoken)).as_h + initial_data = request_youtube_api_browse(ctoken) else response = YT_POOL.client &.get("/playlist?list=#{playlist.id}&gl=US&hl=en") initial_data = extract_initial_data(response.body) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 4b216613..7c9c389e 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -246,8 +246,7 @@ def channel_search(query, page, channel) continuation = produce_channel_search_continuation(ucid, query, page) response_json = request_youtube_api_browse(continuation) - result = JSON.parse(response_json) - continuationItems = result["onResponseReceivedActions"]? + continuationItems = response_json["onResponseReceivedActions"]? .try &.[0]["appendContinuationItemsAction"]["continuationItems"] return 0, [] of SearchItem if !continuationItems From 26a7e1b049bde355b5ac05d1923b92c6f4a20179 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Wed, 7 Apr 2021 03:15:02 +0200 Subject: [PATCH 2/5] Use '/youtubei/v1/search' endpoint for search queries --- src/invidious/helpers/youtube_api.cr | 42 +++++++++++++++++++++++++++- src/invidious/search.cr | 7 +---- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index 84e0c38f..dc3a7eb5 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -30,9 +30,49 @@ def request_youtube_api_browse(continuation) "continuation": continuation, } + return _youtube_api_post_json("/youtubei/v1/browse", data) +end + +#################################################################### +# request_youtube_api_search(search_query, params, region) +# +# Requests the youtubei/vi/search endpoint with the required headers +# to get JSON in en-US (english US). +# +# The requested data is a search string, with some additional +# paramters, formatted as a base64 string. +# +def request_youtube_api_search(search_query : String, params : String, region = nil) + # JSON Request data, required by the API + data = { + "query": URI.encode_www_form(search_query), + "context": { + "client": { + "hl": "en", + "gl": region || "US", # Can't be empty! + "clientName": "WEB", + "clientVersion": HARDCODED_CLIENT_VERS, + }, + }, + "params": params, + } + + return _youtube_api_post_json("/youtubei/v1/search", data) +end + +#################################################################### +# _youtube_api_post_json(endpoint, data) +# +# Internal function that does the actual request to youtube servers +# and handles errors. +# +# The requested data is an endpoint (URL without the domain part) +# and the data as a Hash object. +# +def _youtube_api_post_json(endpoint, data) # Send the POST request and parse result response = YT_POOL.client &.post( - "/youtubei/v1/browse?key=#{HARDCODED_API_KEY}", + "#{endpoint}?key=#{HARDCODED_API_KEY}", headers: HTTP::Headers{"content-type" => "application/json; charset=UTF-8"}, body: data.to_json ) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 7c9c389e..662173a0 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -263,14 +263,9 @@ end def search(query, search_params = produce_search_params(content_type: "all"), region = nil) return 0, [] of SearchItem if query.empty? - body = YT_POOL.client(region, &.get("/results?search_query=#{URI.encode_www_form(query)}&sp=#{search_params}&hl=en").body) - return 0, [] of SearchItem if body.empty? - - initial_data = extract_initial_data(body) + initial_data = request_youtube_api_search(query, search_params, region) items = extract_items(initial_data) - # initial_data["estimatedResults"]?.try &.as_s.to_i64 - return items.size, items end From 344ccf3b03f640fa65504d45a1fa8df87e1c744c Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Tue, 13 Apr 2021 19:33:37 +0200 Subject: [PATCH 3/5] Use '/youtubei/v1/browse' endpoint for playlists --- src/invidious/helpers/youtube_api.cr | 36 ++++++++++++++++++++++++---- src/invidious/playlists.cr | 20 +++------------- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index dc3a7eb5..1b8f6dae 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -8,15 +8,20 @@ HARDCODED_CLIENT_VERS = "2.20210330.08.00" #################################################################### # request_youtube_api_browse(continuation) +# request_youtube_api_browse(browse_id, params) # # Requests the youtubei/vi/browse endpoint with the required headers # to get JSON in en-US (english US). # -# The requested data is a continuation token (ctoken). Depending on -# this token's contents, the returned data can be comments, playlist -# videos, search results, channel community tab, ... +# The requested data can either be: # -def request_youtube_api_browse(continuation) +# - A continuation token (ctoken). Depending on this token's +# contents, the returned data can be comments, playlist videos, +# search results, channel community tab, ... +# +# - A playlist ID (parameters MUST be an empty string) +# +def request_youtube_api_browse(continuation : String) # JSON Request data, required by the API data = { "context": { @@ -33,6 +38,29 @@ def request_youtube_api_browse(continuation) return _youtube_api_post_json("/youtubei/v1/browse", data) end +def request_youtube_api_browse(browse_id : String, params : String) + # JSON Request data, required by the API + data = { + "browseId" => browse_id, + "context" => { + "client" => { + "hl" => "en", + "gl" => "US", + "clientName" => "WEB", + "clientVersion" => HARDCODED_CLIENT_VERS, + }, + }, + } + + # Append the additionnal parameters if those were provided + # (this is required for channel info, playlist and community, e.g) + if params != "" + data["params"] = params + end + + return _youtube_api_post_json("/youtubei/v1/browse", data) +end + #################################################################### # request_youtube_api_search(search_query, params, region) # diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index 150f1c15..fe7f82f3 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -361,16 +361,7 @@ def fetch_playlist(plid, locale) plid = "UU#{plid.lchop("UC")}" end - response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en") - if response.status_code != 200 - if response.headers["location"]?.try &.includes? "/sorry/index" - raise InfoException.new("Could not extract playlist info. Instance is likely blocked.") - else - raise InfoException.new("Not a playlist.") - end - end - - initial_data = extract_initial_data(response.body) + initial_data = request_youtube_api_browse("VL" + plid, params: "") playlist_sidebar_renderer = initial_data["sidebar"]?.try &.["playlistSidebarRenderer"]?.try &.["items"]? raise InfoException.new("Could not extract playlistSidebarRenderer.") if !playlist_sidebar_renderer @@ -453,15 +444,10 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil) ctoken = produce_playlist_continuation(playlist.id, offset) initial_data = request_youtube_api_browse(ctoken) else - response = YT_POOL.client &.get("/playlist?list=#{playlist.id}&gl=US&hl=en") - initial_data = extract_initial_data(response.body) + initial_data = request_youtube_api_browse("VL" + playlist.id, params: "") end - if initial_data - return extract_playlist_videos(initial_data) - else - return [] of PlaylistVideo - end + return extract_playlist_videos(initial_data) end end From cbabf0ae7e5d3e3ebe73f46832bd751648263467 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 24 May 2021 13:33:46 +0200 Subject: [PATCH 4/5] Craft the "context" data in a dedicated function As the amount of API endpoint function grow, this will prevent ugly code copy/pasta --- src/invidious/helpers/youtube_api.cr | 50 +++++++++++++--------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index 1b8f6dae..bd120a4c 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -6,6 +6,23 @@ HARDCODED_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" HARDCODED_CLIENT_VERS = "2.20210330.08.00" +#################################################################### +# make_youtube_api_context(region) +# +# Return, as a Hash, the "context" data required to request the +# youtube API endpoints. +# +def make_youtube_api_context(region : String | Nil) : Hash + return { + "client" => { + "hl" => "en", + "gl" => region || "US", # Can't be empty! + "clientName" => "WEB", + "clientVersion" => HARDCODED_CLIENT_VERS, + } + } +end + #################################################################### # request_youtube_api_browse(continuation) # request_youtube_api_browse(browse_id, params) @@ -24,15 +41,8 @@ HARDCODED_CLIENT_VERS = "2.20210330.08.00" def request_youtube_api_browse(continuation : String) # JSON Request data, required by the API data = { - "context": { - "client": { - "hl": "en", - "gl": "US", - "clientName": "WEB", - "clientVersion": HARDCODED_CLIENT_VERS, - }, - }, - "continuation": continuation, + "context" => make_youtube_api_context("US"), + "continuation" => continuation, } return _youtube_api_post_json("/youtubei/v1/browse", data) @@ -42,14 +52,7 @@ def request_youtube_api_browse(browse_id : String, params : String) # JSON Request data, required by the API data = { "browseId" => browse_id, - "context" => { - "client" => { - "hl" => "en", - "gl" => "US", - "clientName" => "WEB", - "clientVersion" => HARDCODED_CLIENT_VERS, - }, - }, + "context" => make_youtube_api_context("US"), } # Append the additionnal parameters if those were provided @@ -73,16 +76,9 @@ end def request_youtube_api_search(search_query : String, params : String, region = nil) # JSON Request data, required by the API data = { - "query": URI.encode_www_form(search_query), - "context": { - "client": { - "hl": "en", - "gl": region || "US", # Can't be empty! - "clientName": "WEB", - "clientVersion": HARDCODED_CLIENT_VERS, - }, - }, - "params": params, + "query" => URI.encode_www_form(search_query), + "context" => make_youtube_api_context(region), + "params" => params, } return _youtube_api_post_json("/youtubei/v1/search", data) From b7fe212a184b5af1ccb9315e106c0ecb2f150590 Mon Sep 17 00:00:00 2001 From: Samantaz Fox Date: Mon, 24 May 2021 15:20:26 +0200 Subject: [PATCH 5/5] Fix youtube API function's documentation --- src/invidious/helpers/youtube_api.cr | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/invidious/helpers/youtube_api.cr b/src/invidious/helpers/youtube_api.cr index bd120a4c..544e635b 100644 --- a/src/invidious/helpers/youtube_api.cr +++ b/src/invidious/helpers/youtube_api.cr @@ -19,7 +19,7 @@ def make_youtube_api_context(region : String | Nil) : Hash "gl" => region || "US", # Can't be empty! "clientName" => "WEB", "clientVersion" => HARDCODED_CLIENT_VERS, - } + }, } end @@ -27,8 +27,9 @@ end # request_youtube_api_browse(continuation) # request_youtube_api_browse(browse_id, params) # -# Requests the youtubei/vi/browse endpoint with the required headers -# to get JSON in en-US (english US). +# Requests the youtubei/v1/browse endpoint with the required headers +# and POST data in order to get a JSON reply in english US that can +# be easily parsed. # # The requested data can either be: # @@ -67,8 +68,10 @@ end #################################################################### # request_youtube_api_search(search_query, params, region) # -# Requests the youtubei/vi/search endpoint with the required headers -# to get JSON in en-US (english US). +# Requests the youtubei/v1/search endpoint with the required headers +# and POST data in order to get a JSON reply. As the search results +# vary depending on the region, a region code can be specified in +# order to get non-US results. # # The requested data is a search string, with some additional # paramters, formatted as a base64 string.