mirror of
https://gitea.invidious.io/iv-org/invidious.git
synced 2024-08-15 00:53:41 +00:00
Update get_about_info for polymer (iv-org/invidious#1423) (#1429)
Update get_about_info for polymer (iv-org/invidious#1423)
This commit is contained in:
parent
22d9d16a7a
commit
ec4a22687f
2 changed files with 85 additions and 51 deletions
|
@ -775,10 +775,12 @@ def extract_channel_community_cursor(continuation)
|
||||||
cursor
|
cursor
|
||||||
end
|
end
|
||||||
|
|
||||||
|
INITDATA_PREQUERY = "window[\"ytInitialData\"] = {"
|
||||||
|
|
||||||
def get_about_info(ucid, locale)
|
def get_about_info(ucid, locale)
|
||||||
about = YT_POOL.client &.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
about = YT_POOL.client &.get("/channel/#{ucid}/about?gl=US&hl=en")
|
||||||
if about.status_code != 200
|
if about.status_code != 200
|
||||||
about = YT_POOL.client &.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
|
about = YT_POOL.client &.get("/user/#{ucid}/about?gl=US&hl=en")
|
||||||
end
|
end
|
||||||
|
|
||||||
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
|
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
|
||||||
|
@ -790,6 +792,17 @@ def get_about_info(ucid, locale)
|
||||||
raise error_message
|
raise error_message
|
||||||
end
|
end
|
||||||
|
|
||||||
|
initdata_pre = about.body.index(INITDATA_PREQUERY)
|
||||||
|
initdata_post = initdata_pre.nil? ? nil : about.body.index("};", initdata_pre)
|
||||||
|
if initdata_post.nil?
|
||||||
|
about = XML.parse_html(about.body)
|
||||||
|
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
||||||
|
error_message ||= translate(locale, "Could not get channel info.")
|
||||||
|
raise error_message
|
||||||
|
end
|
||||||
|
initdata_pre = initdata_pre.not_nil! + INITDATA_PREQUERY.size - 1
|
||||||
|
|
||||||
|
initdata = JSON.parse(about.body[initdata_pre, initdata_post - initdata_pre + 1])
|
||||||
about = XML.parse_html(about.body)
|
about = XML.parse_html(about.body)
|
||||||
|
|
||||||
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
if about.xpath_node(%q(//div[contains(@class, "channel-empty-message")]))
|
||||||
|
@ -797,46 +810,49 @@ def get_about_info(ucid, locale)
|
||||||
raise error_message
|
raise error_message
|
||||||
end
|
end
|
||||||
|
|
||||||
if about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).try &.content.empty?
|
author = about.xpath_node(%q(//meta[@name="title"])).not_nil!["content"]
|
||||||
error_message = about.xpath_node(%q(//div[@class="yt-alert-content"])).try &.content.strip
|
author_url = about.xpath_node(%q(//link[@rel="canonical"])).not_nil!["href"]
|
||||||
error_message ||= translate(locale, "Could not get channel info.")
|
author_thumbnail = about.xpath_node(%q(//link[@rel="image_src"])).not_nil!["href"]
|
||||||
raise error_message
|
|
||||||
end
|
|
||||||
|
|
||||||
author = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!.content
|
|
||||||
author_url = about.xpath_node(%q(//span[contains(@class,"qualified-channel-title-text")]/a)).not_nil!["href"]
|
|
||||||
author_thumbnail = about.xpath_node(%q(//img[@class="channel-header-profile-image"])).not_nil!["src"]
|
|
||||||
|
|
||||||
ucid = about.xpath_node(%q(//meta[@itemprop="channelId"])).not_nil!["content"]
|
ucid = about.xpath_node(%q(//meta[@itemprop="channelId"])).not_nil!["content"]
|
||||||
|
|
||||||
banner = about.xpath_node(%q(//div[@id="gh-banner"]/style)).not_nil!.content
|
# Raises a KeyError on failure.
|
||||||
banner = "https:" + banner.match(/background-image: url\((?<url>[^)]+)\)/).not_nil!["url"]
|
banners = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["banner"]?.try &.["thumbnails"]?
|
||||||
|
banner = banners.try &.[-1]?.try &.["url"].as_s?
|
||||||
|
|
||||||
if banner.includes? "channels/c4/default_banner"
|
# if banner.includes? "channels/c4/default_banner"
|
||||||
banner = nil
|
# banner = nil
|
||||||
end
|
# end
|
||||||
|
|
||||||
description_html = about.xpath_node(%q(//div[contains(@class,"about-description")])).try &.to_s ||
|
description = initdata["metadata"]["channelMetadataRenderer"]?.try &.["description"]?.try &.as_s? || ""
|
||||||
%(<div class="about-description branded-page-box-padding"><pre></pre></div>)
|
description_html = HTML.escape(description).gsub("\n", "<br>")
|
||||||
|
|
||||||
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
|
paid = about.xpath_node(%q(//meta[@itemprop="paid"])).not_nil!["content"] == "True"
|
||||||
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
|
is_family_friendly = about.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).not_nil!["content"] == "True"
|
||||||
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
|
allowed_regions = about.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).not_nil!["content"].split(",")
|
||||||
|
|
||||||
related_channels = about.xpath_nodes(%q(//div[contains(@class, "branded-page-related-channels")]/ul/li))
|
related_channels = initdata["contents"]["twoColumnBrowseResultsRenderer"]
|
||||||
related_channels = related_channels.map do |node|
|
.["secondaryContents"]?.try &.["browseSecondaryContentsRenderer"]["contents"][0]?
|
||||||
related_id = node["data-external-id"]?
|
.try &.["verticalChannelSectionRenderer"]?.try &.["items"]?.try &.as_a.map do |node|
|
||||||
|
renderer = node["miniChannelRenderer"]?
|
||||||
|
related_id = renderer.try &.["channelId"]?.try &.as_s?
|
||||||
related_id ||= ""
|
related_id ||= ""
|
||||||
|
|
||||||
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
|
related_title = renderer.try &.["title"]?.try &.["simpleText"]?.try &.as_s?
|
||||||
related_title = anchor.try &.["title"]
|
|
||||||
related_title ||= ""
|
related_title ||= ""
|
||||||
|
|
||||||
related_author_url = anchor.try &.["href"]
|
related_author_url = renderer.try &.["navigationEndpoint"]?.try &.["commandMetadata"]?.try &.["webCommandMetadata"]?
|
||||||
|
.try &.["url"]?.try &.as_s?
|
||||||
related_author_url ||= ""
|
related_author_url ||= ""
|
||||||
|
|
||||||
related_author_thumbnail = node.xpath_node(%q(.//img)).try &.["data-thumb"]
|
related_author_thumbnails = renderer.try &.["thumbnail"]?.try &.["thumbnails"]?.try &.as_a?
|
||||||
|
related_author_thumbnails ||= [] of JSON::Any
|
||||||
|
|
||||||
|
related_author_thumbnail = ""
|
||||||
|
if related_author_thumbnails.size > 0
|
||||||
|
related_author_thumbnail = related_author_thumbnails[-1]["url"]?.try &.as_s?
|
||||||
related_author_thumbnail ||= ""
|
related_author_thumbnail ||= ""
|
||||||
|
end
|
||||||
|
|
||||||
AboutRelatedChannel.new({
|
AboutRelatedChannel.new({
|
||||||
ucid: related_id,
|
ucid: related_id,
|
||||||
|
@ -845,25 +861,43 @@ def get_about_info(ucid, locale)
|
||||||
author_thumbnail: related_author_thumbnail,
|
author_thumbnail: related_author_thumbnail,
|
||||||
})
|
})
|
||||||
end
|
end
|
||||||
|
related_channels ||= [] of AboutRelatedChannel
|
||||||
|
|
||||||
joined = about.xpath_node(%q(//span[contains(., "Joined")]))
|
total_views = 0_i64
|
||||||
.try &.content.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
|
joined = Time.unix(0)
|
||||||
|
tabs = [] of String
|
||||||
|
auto_generated = false
|
||||||
|
|
||||||
total_views = about.xpath_node(%q(//span[contains(., "views")]/b))
|
tabs_json = initdata["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]?.try &.as_a?
|
||||||
.try &.content.try &.gsub(/\D/, "").to_i64? || 0_i64
|
if !tabs_json.nil?
|
||||||
|
# Retrieve information from the tabs array. The index we are looking for varies between channels.
|
||||||
|
tabs_json.each do |node|
|
||||||
|
# Try to find the about section which is located in only one of the tabs.
|
||||||
|
channel_about_meta = node["tabRenderer"]?.try &.["content"]?.try &.["sectionListRenderer"]?
|
||||||
|
.try &.["contents"]?.try &.[0]?.try &.["itemSectionRenderer"]?.try &.["contents"]?
|
||||||
|
.try &.[0]?.try &.["channelAboutFullMetadataRenderer"]?
|
||||||
|
|
||||||
sub_count = about.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
|
if !channel_about_meta.nil?
|
||||||
.try &.["title"].try { |text| short_text_to_number(text) } || 0
|
total_views = channel_about_meta["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D/, "").to_i64? || 0_i64
|
||||||
|
|
||||||
|
# The joined text is split to several sub strings. The reduce joins those strings before parsing the date.
|
||||||
|
joined = channel_about_meta["joinedDateText"]?.try &.["runs"]?.try &.as_a.reduce("") { |acc, node| acc + node["text"].as_s }
|
||||||
|
.try { |text| Time.parse(text, "Joined %b %-d, %Y", Time::Location.local) } || Time.unix(0)
|
||||||
|
|
||||||
# Auto-generated channels
|
# Auto-generated channels
|
||||||
# https://support.google.com/youtube/answer/2579942
|
# https://support.google.com/youtube/answer/2579942
|
||||||
auto_generated = false
|
# For auto-generated channels, channel_about_meta only has ["description"]["simpleText"] and ["primaryLinks"][0]["title"]["simpleText"]
|
||||||
if about.xpath_node(%q(//ul[@class="about-custom-links"]/li/a[@title="Auto-generated by YouTube"])) ||
|
if (channel_about_meta["primaryLinks"]?.try &.size || 0) == 1 && (channel_about_meta["primaryLinks"][0]?)
|
||||||
about.xpath_node(%q(//span[@class="qualified-channel-title-badge"]/span[@title="Auto-generated by YouTube"]))
|
(channel_about_meta["primaryLinks"][0]["title"]?.try &.["simpleText"]?.try &.as_s? || "") == "Auto-generated by YouTube"
|
||||||
auto_generated = true
|
auto_generated = true
|
||||||
end
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
tabs = tabs_json.reject { |node| node["tabRenderer"]?.nil? }.map { |node| node["tabRenderer"]["title"].as_s.downcase }
|
||||||
|
end
|
||||||
|
|
||||||
tabs = about.xpath_nodes(%q(//ul[@id="channel-navigation-menu"]/li/a/span)).map { |node| node.content.downcase }
|
sub_count = initdata["header"]["c4TabbedHeaderRenderer"]?.try &.["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s?
|
||||||
|
.try { |text| short_text_to_number(text.split(" ")[0]) } || 0
|
||||||
|
|
||||||
AboutChannel.new({
|
AboutChannel.new({
|
||||||
ucid: ucid,
|
ucid: ucid,
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="h-box">
|
<div class="h-box">
|
||||||
<p><span style="white-space:pre-wrap"><%= XML.parse_html(channel.description_html).xpath_node(%q(.//pre)).try &.content %></span></p>
|
<p><span style="white-space:pre-wrap"><%= channel.description_html %></span></p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="h-box">
|
<div class="h-box">
|
||||||
|
|
Loading…
Reference in a new issue