From 62380933b24fe501c2468018867377f740fd3d06 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Thu, 20 Sep 2018 09:36:09 -0500 Subject: [PATCH] Add support for playlists and channels in search --- src/invidious.cr | 266 ++++++++++++++--------- src/invidious/helpers/helpers.cr | 193 +++++++++++----- src/invidious/jobs.cr | 8 +- src/invidious/search.cr | 56 ++++- src/invidious/views/channel.ecr | 4 +- src/invidious/views/components/item.ecr | 54 +++++ src/invidious/views/components/video.ecr | 23 -- src/invidious/views/index.ecr | 4 +- src/invidious/views/playlist.ecr | 4 +- src/invidious/views/search.ecr | 4 +- src/invidious/views/subscriptions.ecr | 8 +- 11 files changed, 414 insertions(+), 210 deletions(-) create mode 100644 src/invidious/views/components/item.ecr delete mode 100644 src/invidious/views/components/video.ecr diff --git a/src/invidious.cr b/src/invidious.cr index 163e1d24..efc72b1e 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -434,6 +434,7 @@ get "/search" do |env| ucids ||= [] of String channel = nil + content_type = "all" date = "" duration = "" features = [] of String @@ -447,6 +448,8 @@ get "/search" do |env| case key when "channel", "user" channel = value + when "content_type", "type" + content_type = value when "date" date = value when "duration" @@ -475,7 +478,7 @@ get "/search" do |env| count = videos.size else begin - search_params = produce_search_params(sort: sort, date: date, content_type: "video", + search_params = produce_search_params(sort: sort, date: date, content_type: content_type, duration: duration, features: features) rescue ex error_message = ex.message @@ -1333,12 +1336,12 @@ get "/feed/subscriptions" do |env| end videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", + ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", user.subscriptions + user.watched, as: ChannelVideo) else args = arg_array(user.subscriptions) videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ - ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) + ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo) end videos.sort_by! { |video| video.published }.reverse! @@ -2540,7 +2543,7 @@ get "/api/v1/channels/:ucid" do |env| json.field "authorThumbnails" do json.array do - qualities = [32, 48, 76, 100, 512] + qualities = [32, 48, 76, 100, 176, 512] qualities.each do |quality| json.object do @@ -2604,102 +2607,102 @@ end ["/api/v1/channels/:ucid/videos", "/api/v1/channels/videos/:ucid"].each do |route| get route do |env| - ucid = env.params.url["ucid"] - page = env.params.query["page"]?.try &.to_i? - page ||= 1 + ucid = env.params.url["ucid"] + page = env.params.query["page"]?.try &.to_i? + page ||= 1 - client = make_client(YT_URL) + client = make_client(YT_URL) - if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) - rss = client.get("/feeds/videos.xml?user=#{ucid}") - rss = XML.parse_html(rss.body) + if !ucid.match(/UC[a-zA-Z0-9_-]{22}/) + rss = client.get("/feeds/videos.xml?user=#{ucid}") + rss = XML.parse_html(rss.body) - ucid = rss.xpath_node("//feed/channelid") - if !ucid - env.response.content_type = "application/json" - next {"error" => "User does not exist"}.to_json - end - - ucid = ucid.content - author = rss.xpath_node("//author/name").not_nil!.content - next env.redirect "/feed/channel/#{ucid}" - else - rss = client.get("/feeds/videos.xml?channel_id=#{ucid}") - rss = XML.parse_html(rss.body) - - ucid = rss.xpath_node("//feed/channelid") - if !ucid - error_message = "User does not exist." - next templated "error" - end - - ucid = ucid.content - author = rss.xpath_node("//author/name").not_nil!.content - end - - # Auto-generated channels - # https://support.google.com/youtube/answer/2579942 - if author.ends_with?(" - Topic") || - {"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author - auto_generated = true - end - - videos = [] of SearchVideo - 2.times do |i| - url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated) - response = client.get(url) - json = JSON.parse(response.body) - - if json["content_html"]? && !json["content_html"].as_s.empty? - document = XML.parse_html(json["content_html"].as_s) - nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) - - if auto_generated - videos += extract_videos(nodeset) - else - videos += extract_videos(nodeset, ucid) + ucid = rss.xpath_node("//feed/channelid") + if !ucid + env.response.content_type = "application/json" + next {"error" => "User does not exist"}.to_json end + + ucid = ucid.content + author = rss.xpath_node("//author/name").not_nil!.content + next env.redirect "/feed/channel/#{ucid}" else - break + rss = client.get("/feeds/videos.xml?channel_id=#{ucid}") + rss = XML.parse_html(rss.body) + + ucid = rss.xpath_node("//feed/channelid") + if !ucid + error_message = "User does not exist." + next templated "error" + end + + ucid = ucid.content + author = rss.xpath_node("//author/name").not_nil!.content end - end - result = JSON.build do |json| - json.array do - videos.each do |video| - json.object do - json.field "title", video.title - json.field "videoId", video.id + # Auto-generated channels + # https://support.google.com/youtube/answer/2579942 + if author.ends_with?(" - Topic") || + {"Popular on YouTube", "Music", "Sports", "Gaming"}.includes? author + auto_generated = true + end - if auto_generated - json.field "author", video.author - json.field "authorId", video.ucid - json.field "authorUrl", "/channel/#{video.ucid}" - else - json.field "author", author - json.field "authorId", ucid - json.field "authorUrl", "/channel/#{ucid}" + videos = [] of SearchVideo + 2.times do |i| + url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated) + response = client.get(url) + json = JSON.parse(response.body) + + if json["content_html"]? && !json["content_html"].as_s.empty? + document = XML.parse_html(json["content_html"].as_s) + nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) + + if auto_generated + videos += extract_videos(nodeset) + else + videos += extract_videos(nodeset, ucid) + end + else + break + end + end + + result = JSON.build do |json| + json.array do + videos.each do |video| + json.object do + json.field "title", video.title + json.field "videoId", video.id + + if auto_generated + json.field "author", video.author + json.field "authorId", video.ucid + json.field "authorUrl", "/channel/#{video.ucid}" + else + json.field "author", author + json.field "authorId", ucid + json.field "authorUrl", "/channel/#{ucid}" + end + + json.field "videoThumbnails" do + generate_thumbnails(json, video.id) + end + + json.field "description", video.description + json.field "descriptionHtml", video.description_html + + json.field "viewCount", video.views + json.field "published", video.published.epoch + json.field "publishedText", "#{recode_date(video.published)} ago" + json.field "lengthSeconds", video.length_seconds end - - json.field "videoThumbnails" do - generate_thumbnails(json, video.id) - end - - json.field "description", video.description - json.field "descriptionHtml", video.description_html - - json.field "viewCount", video.views - json.field "published", video.published.epoch - json.field "publishedText", "#{recode_date(video.published)} ago" - json.field "lengthSeconds", video.length_seconds end end end - end - env.response.content_type = "application/json" - result -end + env.response.content_type = "application/json" + result + end end get "/api/v1/search" do |env| @@ -2722,13 +2725,15 @@ get "/api/v1/search" do |env| features ||= [] of String # TODO: Support other content types - content_type = "video" + content_type = env.params.query["type"]?.try &.downcase + content_type ||= "video" env.response.content_type = "application/json" begin search_params = produce_search_params(sort_by, date, content_type, duration, features) rescue ex + env.response.status_code = 400 next JSON.build do |json| json.object do json.field "error", ex.message @@ -2739,26 +2744,79 @@ get "/api/v1/search" do |env| response = JSON.build do |json| json.array do count, search_results = search(query, page, search_params).as(Tuple) - search_results.each do |video| + search_results.each do |item| json.object do - json.field "title", video.title - json.field "videoId", video.id + case item + when SearchVideo + json.field "type", "video" + json.field "title", item.title + json.field "videoId", item.id - json.field "author", video.author - json.field "authorId", video.ucid - json.field "authorUrl", "/channel/#{video.ucid}" + json.field "author", item.author + json.field "authorId", item.ucid + json.field "authorUrl", "/channel/#{item.ucid}" - json.field "videoThumbnails" do - generate_thumbnails(json, video.id) + json.field "videoThumbnails" do + generate_thumbnails(json, item.id) + end + + json.field "description", item.description + json.field "descriptionHtml", item.description_html + + json.field "viewCount", item.views + json.field "published", item.published.epoch + json.field "publishedText", "#{recode_date(item.published)} ago" + json.field "lengthSeconds", item.length_seconds + json.field "liveNow", item.live_now + when SearchPlaylist + json.field "type", "playlist" + json.field "title", item.title + json.field "playlistId", item.id + + json.field "author", item.author + json.field "authorId", item.ucid + json.field "authorUrl", "/channel/#{item.ucid}" + + json.field "videos" do + json.array do + item.videos.each do |video| + json.object do + json.field "title", video.title + json.field "videoId", video.id + json.field "lengthSeconds", video.length_seconds + + json.field "videoThumbnails" do + generate_thumbnails(json, video.id) + end + end + end + end + end + when SearchChannel + json.field "type", "channel" + json.field "author", item.author + json.field "authorId", item.ucid + json.field "authorUrl", "/channel/#{item.ucid}" + + json.field "authorThumbnails" do + json.array do + qualities = [32, 48, 76, 100, 176, 512] + + qualities.each do |quality| + json.object do + json.field "url", item.author_thumbnail.gsub("=s176-", "=s#{quality}-") + json.field "width", quality + json.field "height", quality + end + end + end + end + + json.field "subCount", item.subscriber_count + json.field "videoCount", item.video_count + json.field "description", item.description + json.field "descriptionHtml", item.description_html end - - json.field "description", video.description - json.field "descriptionHtml", video.description_html - - json.field "viewCount", video.views - json.field "published", video.published.epoch - json.field "publishedText", "#{recode_date(video.published)} ago" - json.field "lengthSeconds", video.length_seconds end end end diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 321e1833..8a2e5980 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -196,8 +196,14 @@ def html_to_content(description_html) end def extract_videos(nodeset, ucid = nil) + videos = extract_items(nodeset, ucid) + videos.select! { |item| !item.is_a?(SearchChannel | SearchPlaylist) } + videos.map { |video| video.as(SearchVideo) } +end + +def extract_items(nodeset, ucid = nil) # TODO: Make this a 'common', so it makes more sense to be used here - videos = [] of SearchVideo + items = [] of SearchItem nodeset.each do |node| anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)) @@ -209,78 +215,147 @@ def extract_videos(nodeset, ucid = nil) next end - case node.xpath_node(%q(.//div)).not_nil!["class"] - when .includes? "yt-lockup-playlist" - next - when .includes? "yt-lockup-channel" - next - end - - title = anchor.content.strip - id = anchor["href"].lchop("/watch?v=") - - if ucid + anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)) + if !anchor author = "" author_id = "" else - anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)) - if !anchor - next - end - author = anchor.content author_id = anchor["href"].split("/")[-1] end - metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li)) - if metadata.empty? + anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a)) + if !anchor next end - - begin - published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts ")) - rescue ex - end - - begin - published ||= Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64) - rescue ex - end - published ||= Time.now - - begin - view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64? - rescue ex - end - - begin - view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64? - rescue ex - end - view_count ||= 0_i64 + title = anchor.content.strip + id = anchor["href"] description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])) description_html, description = html_to_content(description_html) - length_seconds = node.xpath_node(%q(.//span[@class="video-time"])) - if length_seconds - length_seconds = decode_length_seconds(length_seconds.content) - else - length_seconds = -1 - end + case node.xpath_node(%q(.//div)).not_nil!["class"] + when .includes? "yt-lockup-playlist" + plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"] - videos << SearchVideo.new( - title, - id, - author, - author_id, - published, - view_count, - description, - description_html, - length_seconds, - ) + anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a)) + if anchor + video_count = anchor.content.match(/View full playlist \((?\d+)/).try &.["count"].to_i? + end + video_count ||= 0 + + videos = [] of SearchPlaylistVideo + node.xpath_nodes(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video| + anchor = video.xpath_node(%q(.//a)) + if anchor + video_title = anchor.content + id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"] + end + video_title ||= "" + id ||= "" + + anchor = video.xpath_node(%q(.//span/span)) + if anchor + length_seconds = decode_length_seconds(anchor.content) + end + length_seconds ||= 0 + + videos << SearchPlaylistVideo.new( + video_title, + id, + length_seconds + ) + end + + items << SearchPlaylist.new( + title, + plid, + author, + author_id, + video_count, + videos + ) + when .includes? "yt-lockup-channel" + author = title + ucid = id.split("/")[-1] + + author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]? + author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"] + author_thumbnail ||= "" + + subscriber_count = node.xpath_node(%q(.//span[contains(@class, "yt-subscriber-count")])).try &.["title"].delete(",").to_i? + subscriber_count ||= 0 + + video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].delete(",").to_i? + video_count ||= 0 + + items << SearchChannel.new( + author, + ucid, + author_thumbnail, + subscriber_count, + video_count, + description, + description_html + ) + else + id = id.lchop("/watch?v=") + + metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li)) + if metadata.empty? + next + end + + begin + published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts ")) + rescue ex + end + + begin + published ||= Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64) + rescue ex + end + published ||= Time.now + + begin + view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64? + rescue ex + end + + begin + view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64? + rescue ex + end + view_count ||= 0_i64 + + length_seconds = node.xpath_node(%q(.//span[@class="video-time"])) + if length_seconds + length_seconds = decode_length_seconds(length_seconds.content) + else + length_seconds = -1 + end + + live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) + if live_now + live_now = true + else + live_now = false + end + + items << SearchVideo.new( + title, + id, + author, + author_id, + published, + view_count, + description, + description_html, + length_seconds, + live_now + ) + end end - return videos + return items end diff --git a/src/invidious/jobs.cr b/src/invidious/jobs.cr index 1b0a1442..7424ef62 100644 --- a/src/invidious/jobs.cr +++ b/src/invidious/jobs.cr @@ -3,13 +3,17 @@ def crawl_videos(db) random = Random.new search(random.base64(3)).as(Tuple)[1].each do |video| - ids << video.id + if video.is_a?(SearchVideo) + ids << video.id + end end loop do if ids.empty? search(random.base64(3)).as(Tuple)[1].each do |video| - ids << video.id + if video.is_a?(SearchVideo) + ids << video.id + end end end diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 32282afc..ecb33f9d 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -9,9 +9,43 @@ class SearchVideo description: String, description_html: String, length_seconds: Int32, + live_now: Bool, }) end +class SearchPlaylistVideo + add_mapping({ + title: String, + id: String, + length_seconds: Int32, + }) +end + +class SearchPlaylist + add_mapping({ + title: String, + id: String, + author: String, + ucid: String, + video_count: Int32, + videos: Array(SearchPlaylistVideo), + }) +end + +class SearchChannel + add_mapping({ + author: String, + ucid: String, + author_thumbnail: String, + subscriber_count: Int32, + video_count: Int32, + description: String, + description_html: String, + }) +end + +alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist + def channel_search(query, page, channel) client = make_client(YT_URL) @@ -26,7 +60,7 @@ def channel_search(query, page, channel) end if !canonical - return 0, [] of SearchVideo + return 0, [] of SearchItem end ucid = canonical["href"].split("/")[-1] @@ -40,31 +74,31 @@ def channel_search(query, page, channel) nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) count = nodeset.size - videos = extract_videos(nodeset) + items = extract_items(nodeset) else count = 0 - videos = [] of SearchVideo + items = [] of SearchItem end - return count, videos + return count, items end -def search(query, page = 1, search_params = produce_search_params(content_type: "video")) +def search(query, page = 1, search_params = produce_search_params(content_type: "all")) client = make_client(YT_URL) if query.empty? - return {0, [] of SearchVideo} + return {0, [] of SearchItem} end html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=#{search_params}&disable_polymer=1").body if html.empty? - return {0, [] of SearchVideo} + return {0, [] of SearchItem} end html = XML.parse_html(html) nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li)) - videos = extract_videos(nodeset) + items = extract_items(nodeset) - return {nodeset.size, videos} + return {nodeset.size, items} end def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "", @@ -110,8 +144,10 @@ def produce_search_params(sort : String = "relevance", date : String = "", conte "\x10\x04" when "show" "\x10\x05" - else + when "all" "" + else + "\x10\x01" end body += case duration diff --git a/src/invidious/views/channel.ecr b/src/invidious/views/channel.ecr index 672fc09a..2dbe1ce2 100644 --- a/src/invidious/views/channel.ecr +++ b/src/invidious/views/channel.ecr @@ -37,8 +37,8 @@ <% videos.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %> diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr new file mode 100644 index 00000000..47673c0a --- /dev/null +++ b/src/invidious/views/components/item.ecr @@ -0,0 +1,54 @@ +
+
+ <% case item when %> + <% when SearchChannel %> + + <% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %> + <% else %> +
+ +
+ <% end %> +

<%= item.author %>

+
+

<%= number_with_separator(item.subscriber_count) %> subscribers

+
<%= item.description_html %>
+ <% when SearchPlaylist %> + + <% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %> + <% else %> + + <% end %> +

<%= item.title %>

+
+

+ <%= item.author %> +

+

<%= number_with_separator(item.video_count) %> videos

+

PLAYLIST

+ <% else %> + <% if item.responds_to?(:playlists) && !item.playlists.empty? %> + <% params = "&list=#{item.playlists[0]}" %> + <% else %> + <% params = nil %> + <% end %> + + <% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %> + <% else %> + + <% end %> +

<%= item.title %>

+
+ <% if item.responds_to?(:live_now) && item.live_now %> +

LIVE

+ <% end %> +

+ <%= item.author %> +

+ + <% if Time.now - item.published > 1.minute %> +
Shared <%= recode_date(item.published) %> ago
+ <% end %> + <% end %> +
+
diff --git a/src/invidious/views/components/video.ecr b/src/invidious/views/components/video.ecr deleted file mode 100644 index 275f4335..00000000 --- a/src/invidious/views/components/video.ecr +++ /dev/null @@ -1,23 +0,0 @@ -
-
- <% if video.responds_to?(:playlists) && !video.playlists.empty? %> - <% params = "&list=#{video.playlists[0]}" %> - <% else %> - <% params = nil %> - <% end %> - - <% if env.get?("user") && env.get("user").as(User).preferences.thin_mode %> - <% else %> - - <% end %> -

<%= video.title %>

-
-

- <%= video.author %> -

- - <% if Time.now - video.published > 1.minute %> -
Shared <%= recode_date(video.published) %> ago
- <% end %> -
-
diff --git a/src/invidious/views/index.ecr b/src/invidious/views/index.ecr index f58a6d89..675e9ac2 100644 --- a/src/invidious/views/index.ecr +++ b/src/invidious/views/index.ecr @@ -4,8 +4,8 @@ <% top_videos.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %> diff --git a/src/invidious/views/playlist.ecr b/src/invidious/views/playlist.ecr index 56ad79f3..a440f9b5 100644 --- a/src/invidious/views/playlist.ecr +++ b/src/invidious/views/playlist.ecr @@ -26,8 +26,8 @@ <% videos.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %> diff --git a/src/invidious/views/search.ecr b/src/invidious/views/search.ecr index 5ea7345d..d55cba28 100644 --- a/src/invidious/views/search.ecr +++ b/src/invidious/views/search.ecr @@ -4,8 +4,8 @@ <% videos.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %> diff --git a/src/invidious/views/subscriptions.ecr b/src/invidious/views/subscriptions.ecr index 0b8f4248..10102fcc 100644 --- a/src/invidious/views/subscriptions.ecr +++ b/src/invidious/views/subscriptions.ecr @@ -25,8 +25,8 @@ <% notifications.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %> @@ -37,8 +37,8 @@ <% videos.each_slice(4) do |slice| %>
- <% slice.each do |video| %> - <%= rendered "components/video" %> + <% slice.each do |item| %> + <%= rendered "components/item" %> <% end %>
<% end %>