Add extractor for fetching community posts

This commit is contained in:
syeopite 2021-07-25 03:18:44 -07:00
parent d156bdd314
commit 5987295275
No known key found for this signature in database
GPG key ID: 6FA616E5A5294A82
4 changed files with 145 additions and 3 deletions

View file

@ -1,3 +1,3 @@
module YouTubeStructs
alias Renderer = Category | VideoRenderer | PlaylistRenderer | ChannelRenderer
alias Renderer = Category | VideoRenderer | PlaylistRenderer | ChannelRenderer | CommunityPost
end

View file

@ -0,0 +1,68 @@
module YouTubeStructs
struct CommunityPoll
include DB::Serializable
property choices : Array(String) # Pull questions
property total_votes : Int32
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "community_poll"
json.field "choices", self.choices.to_json
json.field "total_votes", self.total_votes
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
struct CommunityPost
include DB::Serializable
# Author information
property author : String
property author_thumbnail : String
property author_id : String
# Community post data
property post_id : String
property contents : String
property attachment : (VideoRenderer | PlaylistRenderer | CommunityPoll | String)? # string is image/gif
property likes : Int32
property published : Time
def to_json(locale, json : JSON::Builder)
json.object do
json.field "type", "community_post"
json.field "author", self.author
json.field "authorId", self.author_id
json.field "author_thumbnail", self.author_thumbnail
json.field "authorUrl", "/channel/#{self.author_id}"
json.field "contents", self.contents
json.field "attachment", self.attachment.to_json
json.field "likes", self.likes
json.field "published", self.published.to_unix
end
end
def to_json(locale, json : JSON::Builder | Nil = nil)
if json
to_json(locale, json)
else
JSON.build do |json|
to_json(locale, json)
end
end
end
end
end

View file

@ -14,6 +14,7 @@ private ITEM_PARSERS = {
Parsers::GridPlaylistRendererParser,
Parsers::PlaylistRendererParser,
Parsers::CategoryRendererParser,
Parsers::BackstagePostThreadRendererParser,
}
record AuthorFallback, name : String, id : String
@ -311,6 +312,68 @@ private module Parsers
})
end
end
# Parses a InnerTube backstagePostThreadRenderer into a CommunityPost.
# Returns nil when the given object isn't a backstagePostThreadRenderer
#
# A backstagePostThreadRenderer represents a community post, including all of it's attachments, metadata, contents,
# etc.
#
# See spec for example
#
# `backstagePostThreadRenderer` can only be found in a channel's community or discussion tab.
module BackstagePostThreadRendererParser
def self.process(item, author_fallback)
if item_contents = item["backstagePostThreadRenderer"]?
return self.parse(item_contents["post"]["backstagePostRenderer"])
end
end
def self.parse(item_contents)
post_id = item_contents["postId"].as_s
author_name = item_contents.dig("authorText", "runs", 0, "text").as_s
author_id = item_contents.dig("authorEndpoint", "browseEndpoint", "browseId").as_s
author_thumbnail = item_contents.dig("authorThumbnail", "thumbnails", -1, "url").as_s # last item is highest quality
contents = String.build do |content_text|
item_contents["contentText"]["runs"].as_a.each { |t| content_text << t["text"] }
end
attachment_container = item_contents["backstageAttachment"]?
case attachment_container
when nil
attachment = nil
when .[]?("backstageImageRenderer")
attachment = attachment_container.dig("backstageImageRenderer", "image", "thumbnails", -1, "url").as_s
when .[]?("pollRenderer")
container = attachment_container.dig("pollRenderer")
choices = container["choices"].as_a.map { |i| i["text"]["runs"][0]["text"].as_s }
votes = short_text_to_number(container["totalVotes"]["simpleText"].as_s.split(" ")[0])
attachment = YouTubeStructs::CommunityPoll.new({choices: choices, total_votes: votes})
else
attachment = extract_item(attachment_container)
raise "Unreachable" if !attachment.is_a?(YouTubeStructs::VideoRenderer | YouTubeStructs::PlaylistRenderer)
end
likes = short_text_to_number(item_contents["voteCount"]["simpleText"].as_s.split(" ")[0]) # Youtube doesn't provide dislikes...
published = item_contents["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
YouTubeStructs::CommunityPost.new({
author: author_name,
author_id: author_id,
author_thumbnail: author_thumbnail,
post_id: post_id,
contents: contents,
attachment: attachment,
likes: likes,
published: published,
})
end
end
end
# The following are the extractors for extracting an array of items from
@ -354,11 +417,21 @@ private module Extractors
private def self.extract(target)
raw_items = [] of JSON::Any
content = extract_selected_tab(target["tabs"])["content"]
selected_tab = extract_selected_tab(target["tabs"])
content = selected_tab["content"]
content["sectionListRenderer"]["contents"].as_a.each do |renderer_container|
renderer_container_contents = renderer_container["itemSectionRenderer"]["contents"][0]
renderer_container = renderer_container["itemSectionRenderer"]
# For some odd reason every YT tab request *except* community tabs
# has only one item (the renderer contents array) in the contents array of
# `renderer_container`. For community tabs, this `renderer_container` is the
# just the array of contents. Strange.
if selected_tab["title"] == "Community"
return renderer_container["contents"].as_a
end
renderer_container_contents = renderer_container["contents"].as_a[0]
# Category extraction
if items_container = renderer_container_contents["shelfRenderer"]?
raw_items << renderer_container_contents

View file

@ -110,6 +110,7 @@
<% end %>
</div>
<% when YouTubeStructs::Category %>
<% when YouTubeStructs::CommunityPost %>
<% else %>
<a style="width:100%" href="/watch?v=<%= item.id %>">
<% if !env.get("preferences").as(Preferences).thin_mode %>