From b43e9ed7e7b0aaa94d9f1b231ac6690e8ed67b98 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Sat, 8 Jun 2019 15:08:27 -0500 Subject: [PATCH] Refactor 'description_html' --- src/invidious.cr | 29 ++++---- src/invidious/comments.cr | 11 +--- src/invidious/helpers/helpers.cr | 27 +++----- src/invidious/playlists.cr | 7 +- src/invidious/search.cr | 8 +-- src/invidious/videos.cr | 80 ++++++++++------------- src/invidious/views/components/player.ecr | 2 +- src/invidious/views/watch.ecr | 8 +-- 8 files changed, 69 insertions(+), 103 deletions(-) diff --git a/src/invidious.cr b/src/invidious.cr index 1a02741b..33e56886 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -473,9 +473,8 @@ get "/watch" do |env| aspect_ratio = "16:9" - video.description = fill_links(video.description, "https", "www.youtube.com") - video.description = replace_links(video.description) - description = video.short_description + video.description_html = fill_links(video.description_html, "https", "www.youtube.com") + video.description_html = replace_links(video.description_html) host_url = make_host_url(config, Kemal.config) host_params = env.request.query_params @@ -648,9 +647,8 @@ get "/embed/:id" do |env| aspect_ratio = nil - video.description = fill_links(video.description, "https", "www.youtube.com") - video.description = replace_links(video.description) - description = video.short_description + video.description_html = fill_links(video.description_html, "https", "www.youtube.com") + video.description_html = replace_links(video.description_html) host_url = make_host_url(config, Kemal.config) host_params = env.request.query_params @@ -2466,7 +2464,7 @@ get "/feed/channel/:ucid" do |env| author = entry.xpath_node("author/name").not_nil!.content ucid = entry.xpath_node("channelid").not_nil!.content - description = entry.xpath_node("group/description").not_nil!.content + description_html = entry.xpath_node("group/description").not_nil!.to_s views = entry.xpath_node("group/community/statistics").not_nil!.["views"].to_i64 videos << SearchVideo.new( @@ -2476,8 +2474,7 @@ get "/feed/channel/:ucid" do |env| ucid: ucid, published: published, views: views, - description: description, - description_html: "", + description_html: description_html, length_seconds: 0, live_now: false, paid: false, @@ -3460,11 +3457,8 @@ get "/api/v1/top" do |env| json.field "published", video.published.to_unix json.field "publishedText", translate(locale, "`x` ago", recode_date(video.published, locale)) - description = video.description.gsub("
", "\n") - description = description.gsub("
", "\n") - description = XML.parse_html(description) - json.field "description", description.content - json.field "descriptionHtml", video.description + json.field "description", html_to_content(video.description_html) + json.field "descriptionHtml", video.description_html end end end @@ -3511,8 +3505,7 @@ get "/api/v1/channels/:ucid" do |env| author = channel_html.xpath_node(%q(//a[contains(@class, "branded-page-header-title-link")])).not_nil!.content author_url = channel_html.xpath_node(%q(//a[@class="channel-header-profile-image-container spf-link"])).not_nil!["href"] author_thumbnail = channel_html.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"] - description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])) - description_html, description = html_to_content(description_html) + description_html = channel_html.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s || "" paid = channel_html.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True" is_family_friendly = channel_html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True" @@ -3607,7 +3600,7 @@ get "/api/v1/channels/:ucid" do |env| json.field "autoGenerated", auto_generated json.field "isFamilyFriendly", is_family_friendly - json.field "description", description + json.field "description", html_to_content(description_html) json.field "descriptionHtml", description_html json.field "allowedRegions", allowed_regions @@ -3884,7 +3877,7 @@ get "/api/v1/playlists/:plid" do |env| end end - json.field "description", playlist.description + json.field "description", html_to_content(playlist.description_html) json.field "descriptionHtml", playlist.description_html json.field "videoCount", playlist.video_count diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr index a652f84a..79b3afaa 100644 --- a/src/invidious/comments.cr +++ b/src/invidious/comments.cr @@ -138,13 +138,8 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m node_comment = node["commentRenderer"] end - content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff') - if content_html - content_html = HTML.escape(content_html) - end - - content_html ||= content_to_comment_html(node_comment["contentText"]["runs"].as_a) - content_html, content = html_to_content(content_html) + content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |block| HTML.escape(block) }.to_s || + content_to_comment_html(node_comment["contentText"]["runs"].as_a).try &.to_s || "" author = node_comment["authorText"]?.try &.["simpleText"] author ||= "" @@ -179,7 +174,7 @@ def fetch_youtube_comments(id, db, continuation, proxies, format, locale, thin_m json.field "isEdited", false end - json.field "content", content + json.field "content", html_to_content(content_html) json.field "contentHtml", content_html json.field "published", published.to_unix json.field "publishedText", translate(locale, "`x` ago", recode_date(published, locale)) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index ef33b736..ae9562a0 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -177,23 +177,17 @@ def login_req(login_form, f_req) return HTTP::Params.encode(data) end -def html_to_content(description_html) - if !description_html - description = "" - description_html = "" - else - description_html = description_html.to_s - description = description_html.gsub("
", "\n") - description = description.gsub("
", "\n") +def html_to_content(description_html : String) + description = description_html.gsub(/(
)|()/, { + "
": "\n", + "
": "\n", + }) - if description.empty? - description = "" - else - description = XML.parse_html(description).content.strip("\n ") - end + if !description.empty? + description = XML.parse_html(description).content.strip("\n ") end - return description_html, description + return description end def extract_videos(nodeset, ucid = nil, author_name = nil) @@ -230,8 +224,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil) author ||= "" author_id ||= "" - description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])) - description_html, description = html_to_content(description_html) + description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || "" tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")])) if !tile @@ -330,7 +323,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil) author_thumbnail: author_thumbnail, subscriber_count: subscriber_count, video_count: video_count, - description: description, description_html: description_html ) else @@ -396,7 +388,6 @@ def extract_items(nodeset, ucid = nil, author_name = nil) ucid: author_id, published: published, views: view_count, - description: description, description_html: description_html, length_seconds: length_seconds, live_now: live_now, diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index 54b7af0a..373d1fba 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -47,7 +47,6 @@ struct Playlist author: String, author_thumbnail: String, ucid: String, - description: String, description_html: String, video_count: Int32, views: Int64, @@ -214,9 +213,8 @@ def fetch_playlist(plid, locale) end title = title.content.strip(" \n") - description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])) - description_html ||= document.xpath_node(%q(//span[@class="pl-header-description-text"])) - description_html, description = html_to_content(description_html) + description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s || + document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || "" # YouTube allows anonymous playlists, so most of this can be empty or optional anchor = document.xpath_node(%q(//ul[@class="pl-header-details"])) @@ -245,7 +243,6 @@ def fetch_playlist(plid, locale) author: author, author_thumbnail: author_thumbnail, ucid: ucid, - description: description, description_html: description_html, video_count: video_count, views: views, diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 49aa422a..c69f96cf 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -31,7 +31,7 @@ struct SearchVideo xml.element("media:title") { xml.text self.title } xml.element("media:thumbnail", url: "#{host_url}/vi/#{self.id}/mqdefault.jpg", width: "320", height: "180") - xml.element("media:description") { xml.text self.description } + xml.element("media:description") { xml.text html_to_content(self.description_html) } end xml.element("media:community") do @@ -64,7 +64,7 @@ struct SearchVideo generate_thumbnails(json, self.id, config, kemal_config) end - json.field "description", self.description + json.field "description", html_to_content(self.description_html) json.field "descriptionHtml", self.description_html json.field "viewCount", self.views @@ -94,7 +94,6 @@ struct SearchVideo ucid: String, published: Time, views: Int64, - description: String, description_html: String, length_seconds: Int32, live_now: Bool, @@ -187,7 +186,7 @@ struct SearchChannel json.field "subCount", self.subscriber_count json.field "videoCount", self.video_count - json.field "description", self.description + json.field "description", html_to_content(self.description_html) json.field "descriptionHtml", self.description_html end end @@ -208,7 +207,6 @@ struct SearchChannel author_thumbnail: String, subscriber_count: Int32, video_count: Int32, - description: String, description_html: String, }) end diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 854c059c..7964aa40 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -286,10 +286,8 @@ struct Video generate_storyboards(json, self.id, self.storyboards, config, kemal_config) end - description_html, description = html_to_content(self.description) - - json.field "description", description - json.field "descriptionHtml", description_html + json.field "description", html_to_content(self.description_html) + json.field "descriptionHtml", self.description_html json.field "published", self.published.to_unix json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) json.field "keywords", self.keywords @@ -467,6 +465,17 @@ struct Video end end + # `description_html` is stored in DB as `description`, which can be + # quite confusing. Since it currently isn't very practical to rename + # it, we instead define a getter and setter here. + def description_html + self.description + end + + def description_html=(other : String) + self.description = other + end + def allow_ratings allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool @@ -796,14 +805,19 @@ struct Video end def short_description - description = self.description.gsub("
", " ") - description = description.gsub("
", " ") - description = XML.parse_html(description).content[0..200].gsub('"', """).gsub("\n", " ").strip(" ") - if description.empty? - description = " " + short_description = self.description_html.gsub(/(
)|(|"|\n)/, { + "
" => " ", + "
" => " ", + "\"" => """, + "\n" => " ", + }) + short_description = XML.parse_html(short_description).content[0..200].strip(" ") + + if short_description.empty? + short_description = " " end - return description + return short_description end def length_seconds @@ -1151,28 +1165,23 @@ def fetch_video(id, proxies, region) end title = info["title"] - author = info["author"] - ucid = info["ucid"] + author = info["author"]? || "" + ucid = info["ucid"]? || "" views = html.xpath_node(%q(//meta[@itemprop="interactionCount"])) - views = views.try &.["content"].to_i64? - views ||= 0_i64 + .try &.["content"].to_i64? || 0_i64 likes = html.xpath_node(%q(//button[@title="I like this"]/span)) - likes = likes.try &.content.delete(",").try &.to_i? - likes ||= 0 + .try &.content.delete(",").try &.to_i? || 0 dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) - dislikes = dislikes.try &.content.delete(",").try &.to_i? - dislikes ||= 0 + .try &.content.delete(",").try &.to_i? || 0 avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1) avg_rating = avg_rating.nan? ? 0.0 : avg_rating info["avg_rating"] = "#{avg_rating}" - description = html.xpath_node(%q(//p[@id="eow-description"])) - description = description ? description.to_xml(options: XML::SaveOptions::NO_DECL) : %q(

) - + description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || "" wilson_score = ci_lower_bound(likes, likes + dislikes) published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"] @@ -1188,7 +1197,8 @@ def fetch_video(id, proxies, region) genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"] genre ||= "" - genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"] + genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]? + genre_url ||= "" # YouTube provides invalid URLs for some genres, so we fix that here case genre @@ -1205,30 +1215,12 @@ def fetch_video(id, proxies, region) when "Trailers" genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g" end - genre_url ||= "" - license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)) - if license - license = license.content - else - license = "" - end + license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || "" + sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])).try &.["title"]? || "0" + author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]? || "" - sub_count_text = html.xpath_node(%q(//span[contains(@class, "yt-subscriber-count")])) - if sub_count_text - sub_count_text = sub_count_text["title"] - else - sub_count_text = "0" - end - - author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)) - if author_thumbnail - author_thumbnail = author_thumbnail["data-thumb"] - else - author_thumbnail = "" - end - - video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description, + video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html, nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail) return video diff --git a/src/invidious/views/components/player.ecr b/src/invidious/views/components/player.ecr index a64ea39d..d128b0f6 100644 --- a/src/invidious/views/components/player.ecr +++ b/src/invidious/views/components/player.ecr @@ -43,7 +43,7 @@ var player_data = { aspect_ratio: '<%= aspect_ratio %>', title: "<%= video.title.dump_unquoted %>", - description: "<%= HTML.escape(description) %>", + description: "<%= HTML.escape(video.short_description) %>", thumbnail: "<%= thumbnail %>" } diff --git a/src/invidious/views/watch.ecr b/src/invidious/views/watch.ecr index 85ca8b8b..36fabcc3 100644 --- a/src/invidious/views/watch.ecr +++ b/src/invidious/views/watch.ecr @@ -1,12 +1,12 @@ <% content_for "header" do %> - + "> - + @@ -17,7 +17,7 @@ - + @@ -185,7 +185,7 @@ var video_data = {

- <%= video.description %> + <%= video.description_html %>