move url parsing to utils method

This commit is contained in:
chunky programmer 2023-04-24 17:40:58 -04:00
parent d420741cc1
commit 1b10446e5e
4 changed files with 62 additions and 103 deletions

View file

@ -635,55 +635,8 @@ def content_to_comment_html(content, video_id : String? = "")
text = HTML.escape(run["text"].as_s) text = HTML.escape(run["text"].as_s)
if run["navigationEndpoint"]? if navigationEndpoint = run.dig?("navigationEndpoint")
if url = run["navigationEndpoint"]["urlEndpoint"]?.try &.["url"].as_s text = parse_link_endpoint(navigationEndpoint, text, video_id)
url = URI.parse(url)
displayed_url = text
if url.host == "youtu.be"
url = "/watch?v=#{url.request_target.lstrip('/')}"
elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com")
if url.path == "/redirect"
# Sometimes, links can be corrupted (why?) so make sure to fallback
# nicely. See https://github.com/iv-org/invidious/issues/2682
url = url.query_params["q"]? || ""
displayed_url = url
else
url = url.request_target
displayed_url = "youtube.com#{url}"
end
end
text = %(<a href="#{url}">#{reduce_uri(displayed_url)}</a>)
elsif watch_endpoint = run["navigationEndpoint"]["watchEndpoint"]?
start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i
link_video_id = watch_endpoint["videoId"].as_s
url = "/watch?v=#{link_video_id}"
url += "&t=#{start_time}" if !start_time.nil?
# If the current video ID (passed through from the caller function)
# is the same as the video ID in the link, add HTML attributes for
# the JS handler function that bypasses page reload.
#
# See: https://github.com/iv-org/invidious/issues/3063
if link_video_id == video_id
start_time ||= 0
text = %(<a href="#{url}" data-onclick="jump_to_time" data-jump-time="#{start_time}">#{reduce_uri(text)}</a>)
else
text = %(<a href="#{url}">#{text}</a>)
end
elsif url = run.dig?("navigationEndpoint", "commandMetadata", "webCommandMetadata", "url").try &.as_s
if text.starts_with?(/\s?[@#]/)
# Handle "pings" in comments and hasthags differently
# See:
# - https://github.com/iv-org/invidious/issues/3038
# - https://github.com/iv-org/invidious/issues/3062
text = %(<a href="#{url}">#{text}</a>)
else
text = %(<a href="#{url}">#{reduce_uri(url)}</a>)
end
end
end end
text = "<b>#{text}</b>" if run["bold"]? text = "<b>#{text}</b>" if run["bold"]?

View file

@ -389,3 +389,56 @@ def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "
end end
return str return str
end end
# Get the html link from a NavigationEndpoint or an innertubeCommand
def parse_link_endpoint(endpoint : JSON::Any, text : String, video_id : String)
if url = endpoint.dig?("urlEndpoint", "url").try &.as_s
url = URI.parse(url)
displayed_url = text
if url.host == "youtu.be"
url = "/watch?v=#{url.request_target.lstrip('/')}"
elsif url.host.nil? || url.host.not_nil!.ends_with?("youtube.com")
if url.path == "/redirect"
# Sometimes, links can be corrupted (why?) so make sure to fallback
# nicely. See https://github.com/iv-org/invidious/issues/2682
url = url.query_params["q"]? || ""
displayed_url = url
else
url = url.request_target
displayed_url = "youtube.com#{url}"
end
end
text = %(<a href="#{url}">#{reduce_uri(displayed_url)}</a>)
elsif watch_endpoint = endpoint.dig?("watchEndpoint")
start_time = watch_endpoint["startTimeSeconds"]?.try &.as_i
link_video_id = watch_endpoint["videoId"].as_s
url = "/watch?v=#{link_video_id}"
url += "&t=#{start_time}" if !start_time.nil?
# If the current video ID (passed through from the caller function)
# is the same as the video ID in the link, add HTML attributes for
# the JS handler function that bypasses page reload.
#
# See: https://github.com/iv-org/invidious/issues/3063
if link_video_id == video_id
start_time ||= 0
text = %(<a href="#{url}" data-onclick="jump_to_time" data-jump-time="#{start_time}">#{reduce_uri(text)}</a>)
else
text = %(<a href="#{url}">#{text}</a>)
end
elsif url = endpoint.dig?("commandMetadata", "webCommandMetadata", "url").try &.as_s
if text.starts_with?(/\s?[@#]/)
# Handle "pings" in comments and hasthags differently
# See:
# - https://github.com/iv-org/invidious/issues/3038
# - https://github.com/iv-org/invidious/issues/3062
text = %(<a href="#{url}">#{text}</a>)
else
text = %(<a href="#{url}">#{reduce_uri(url)}</a>)
end
end
return text
end

View file

@ -1,57 +1,6 @@
require "json" require "json"
require "uri" require "uri"
def parse_command(command : JSON::Any?, string : String) : String?
on_tap = command.dig?("onTap", "innertubeCommand")
# 3rd party URL, extract original URL from YouTube tracking URL
if url_endpoint = on_tap.try &.["urlEndpoint"]?
if url_endpoint["url"].as_s.includes? "youtube.com/redirect"
youtube_url = URI.parse url_endpoint["url"].as_s
original_url = youtube_url.query_params["q"]?
if original_url.nil?
return ""
else
return "<a href=\"#{original_url}\">#{original_url}</a>"
end
else
# not a redirect url, some first party url
# see https://github.com/iv-org/invidious/issues/3751
first_party_url = url_endpoint["url"].as_s
return "<a href=\"#{first_party_url.sub("https://www.youtube.com", "")}\">#{first_party_url}</a>"
end
# 1st party watch URL
elsif watch_endpoint = on_tap.try &.["watchEndpoint"]?
video_id = watch_endpoint["videoId"].as_s
time = watch_endpoint["startTimeSeconds"].as_i
url = "/watch?v=#{video_id}&t=#{time}s"
# if string is a timestamp, use the string instead
# this is a lazy regex for validating timestamps
if /(?:\d{1,2}:){1,2}\d{2}/ =~ string
return "<a href=\"#{url}\">#{string}</a>"
else
return "<a href=\"#{url}\">#{url}</a>"
end
# hashtag/other browse URLs
elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata")
url = browse_endpoint["url"].try &.as_s
# remove unnecessary character in a channel name
if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL"
name = string.match(/@[\w\d.-]+/)
if name.try &.[0]?
return "<a href=\"#{url}\">#{name.try &.[0]}</a>"
end
end
return "<a href=\"#{url}\">#{string}</a>"
end
return "(unknown YouTube desc command)"
end
private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int
copied = 0 copied = 0
while copied < count while copied < count
@ -68,7 +17,7 @@ private def copy_string(str : String::Builder, iter : Iterator, count : Int) : I
return copied return copied
end end
def parse_description(desc : JSON::Any?) : String? def parse_description(desc, video_id : String) : String?
return "" if desc.nil? return "" if desc.nil?
content = desc["content"].as_s content = desc["content"].as_s
@ -100,7 +49,11 @@ def parse_description(desc : JSON::Any?) : String?
copy_string(str2, iter, cmd_length) copy_string(str2, iter, cmd_length)
end end
str << parse_command(command, cmd_content) link = cmd_content
if on_tap = command.dig?("onTap", "innertubeCommand")
link = parse_link_endpoint(on_tap, cmd_content, video_id)
end
str << link
index += cmd_length index += cmd_length
end end

View file

@ -287,7 +287,7 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# description_html = video_secondary_renderer.try &.dig?("description", "runs") # description_html = video_secondary_renderer.try &.dig?("description", "runs")
# .try &.as_a.try { |t| content_to_comment_html(t, video_id) } # .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription")) description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"), video_id)
# Video metadata # Video metadata