invidious-copy-2023-06-08/src/invidious/helpers/helpers.cr

662 lines
21 KiB
Crystal
Raw Normal View History

require "./macros"
2019-04-10 21:23:37 +00:00
struct Nonce
db_mapping({
nonce: String,
expire: Time,
})
end
struct SessionId
db_mapping({
id: String,
email: String,
issued: String,
})
end
struct Annotation
db_mapping({
id: String,
annotations: String,
})
end
struct ConfigPreferences
module StringToArray
def self.to_yaml(value : Array(String), yaml : YAML::Nodes::Builder)
yaml.sequence do
value.each do |element|
yaml.scalar element
end
end
end
def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : Array(String)
begin
unless node.is_a?(YAML::Nodes::Sequence)
node.raise "Expected sequence, not #{node.class}"
end
result = [] of String
2019-04-04 00:04:33 +00:00
node.nodes.each do |item|
unless item.is_a?(YAML::Nodes::Scalar)
node.raise "Expected scalar, not #{item.class}"
end
2019-04-04 00:04:33 +00:00
result << item.value
end
rescue ex
if node.is_a?(YAML::Nodes::Scalar)
result = [node.value, ""]
else
result = ["", ""]
end
end
result
end
end
yaml_mapping({
2019-05-01 04:39:04 +00:00
annotations: {type: Bool, default: false},
annotations_subscribed: {type: Bool, default: false},
autoplay: {type: Bool, default: false},
captions: {type: Array(String), default: ["", "", ""], converter: StringToArray},
comments: {type: Array(String), default: ["youtube", ""], converter: StringToArray},
continue: {type: Bool, default: false},
continue_autoplay: {type: Bool, default: true},
dark_mode: {type: Bool, default: false},
latest_only: {type: Bool, default: false},
listen: {type: Bool, default: false},
local: {type: Bool, default: false},
locale: {type: String, default: "en-US"},
max_results: {type: Int32, default: 40},
notifications_only: {type: Bool, default: false},
quality: {type: String, default: "hd720"},
redirect_feed: {type: Bool, default: false},
related_videos: {type: Bool, default: true},
sort: {type: String, default: "published"},
speed: {type: Float32, default: 1.0_f32},
thin_mode: {type: Bool, default: false},
unseen_only: {type: Bool, default: false},
video_loop: {type: Bool, default: false},
volume: {type: Int32, default: 100},
})
end
2019-03-29 21:30:02 +00:00
struct Config
module ConfigPreferencesConverter
def self.from_yaml(ctx : YAML::ParseContext, node : YAML::Nodes::Node) : Preferences
Preferences.new(*ConfigPreferences.new(ctx, node).to_tuple)
end
def self.to_yaml(value : Preferences, yaml : YAML::Nodes::Builder)
value.to_yaml(yaml)
end
end
2018-08-04 20:30:44 +00:00
YAML.mapping({
2019-01-23 20:28:31 +00:00
channel_threads: Int32, # Number of threads to use for crawling videos from channels (for updating subscriptions)
feed_threads: Int32, # Number of threads to use for updating feeds
db: NamedTuple( # Database configuration
2019-01-23 20:37:04 +00:00
user: String,
password: String,
host: String,
port: Int32,
dbname: String,
),
2019-04-04 12:49:53 +00:00
full_refresh: Bool, # Used for crawling channels: threads should check all videos uploaded by a channel
https_only: Bool?, # Used to tell Invidious it is behind a proxy, so links to resources should be https://
hmac_key: String?, # HMAC signing key for CSRF tokens and verifying pubsub subscriptions
domain: String?, # Domain to be used for links to resources on the site where an absolute URL is required
use_pubsub_feeds: {type: Bool | Int32, default: false}, # Subscribe to channels using PubSubHubbub (requires domain, hmac_key)
default_home: {type: String, default: "Top"},
feed_menu: {type: Array(String), default: ["Popular", "Top", "Trending", "Subscriptions"]},
top_enabled: {type: Bool, default: true},
captcha_enabled: {type: Bool, default: true},
login_enabled: {type: Bool, default: true},
registration_enabled: {type: Bool, default: true},
statistics_enabled: {type: Bool, default: false},
admins: {type: Array(String), default: [] of String},
external_port: {type: Int32?, default: nil},
default_user_preferences: {type: Preferences,
default: Preferences.new(*ConfigPreferences.from_yaml("").to_tuple),
converter: ConfigPreferencesConverter,
},
dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs
check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc.
cache_annotations: {type: Bool, default: false}, # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
banner: {type: String?, default: nil}, # Optional banner to be displayed along top of page for announcements, etc.
2019-05-14 13:21:01 +00:00
hsts: {type: Bool?, default: true}, # Enables 'Strict-Transport-Security'. Ensure that `domain` and all subdomains are served securely
2018-08-04 20:30:44 +00:00
})
end
def rank_videos(db, n)
2018-08-04 20:30:44 +00:00
top = [] of {Float64, String}
db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs|
rs.each do
id = rs.read(String)
wilson_score = rs.read(Float64)
published = rs.read(Time)
# Exponential decay, older videos tend to rank lower
temperature = wilson_score * Math.exp(-0.000005*((Time.now - published).total_minutes))
top << {temperature, id}
end
end
top.sort!
# Make hottest come first
top.reverse!
top = top.map { |a, b| b }
return top[0..n - 1]
2018-08-04 20:30:44 +00:00
end
def login_req(login_form, f_req)
data = {
"pstMsg" => "1",
"checkConnection" => "youtube",
"checkedDomains" => "youtube",
"hl" => "en",
"deviceinfo" => %q([null,null,null,[],null,"US",null,null,[],"GlifWebSignIn",null,[null,null,[]]]),
"f.req" => f_req,
"flowName" => "GlifWebSignIn",
"flowEntry" => "ServiceLogin",
}
data = login_form.merge(data)
return HTTP::Params.encode(data)
end
2018-09-04 13:52:30 +00:00
def html_to_content(description_html)
2018-08-10 13:38:31 +00:00
if !description_html
description = ""
description_html = ""
else
description_html = description_html.to_s
description = description_html.gsub("<br>", "\n")
description = description.gsub("<br/>", "\n")
2018-09-06 21:50:12 +00:00
if description.empty?
description = ""
else
description = XML.parse_html(description).content.strip("\n ")
end
2018-08-10 13:38:31 +00:00
end
2018-09-04 13:52:30 +00:00
return description_html, description
2018-08-10 13:38:31 +00:00
end
def extract_videos(nodeset, ucid = nil, author_name = nil)
videos = extract_items(nodeset, ucid, author_name)
videos.select! { |item| !item.is_a?(SearchChannel | SearchPlaylist) }
videos.map { |video| video.as(SearchVideo) }
end
2019-02-15 23:28:54 +00:00
def extract_items(nodeset, ucid = nil, author_name = nil)
# TODO: Make this a 'common', so it makes more sense to be used here
items = [] of SearchItem
nodeset.each do |node|
2019-02-15 23:28:54 +00:00
anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
if !anchor
next
end
2019-02-15 23:28:54 +00:00
title = anchor.content.strip
id = anchor["href"]
if anchor["href"].starts_with? "https://www.googleadservices.com"
next
end
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
2019-02-15 23:28:54 +00:00
if anchor
2018-09-22 15:49:42 +00:00
author = anchor.content.strip
author_id = anchor["href"].split("/")[-1]
end
2019-02-15 23:28:54 +00:00
author ||= author_name
author_id ||= ucid
author ||= ""
author_id ||= ""
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
description_html, description = html_to_content(description_html)
2018-09-19 20:24:19 +00:00
tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
if !tile
next
end
case tile["class"]
when .includes? "yt-lockup-playlist"
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
2018-08-21 00:25:12 +00:00
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-meta")]/a))
2018-09-29 04:12:35 +00:00
if !anchor
anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
end
2018-09-29 04:12:35 +00:00
video_count = node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b))
if video_count
video_count = video_count.content
if video_count == "50+"
author = "YouTube"
author_id = "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
video_count = video_count.rchop("+")
end
2019-04-12 21:37:35 +00:00
video_count = video_count.gsub(/\D/, "").to_i?
end
video_count ||= 0
videos = [] of SearchPlaylistVideo
2018-09-22 15:49:42 +00:00
node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
anchor = video.xpath_node(%q(.//a))
if anchor
2018-09-22 15:49:42 +00:00
video_title = anchor.content.strip
id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
end
video_title ||= ""
id ||= ""
2018-09-19 20:24:19 +00:00
anchor = video.xpath_node(%q(.//span/span))
if anchor
length_seconds = decode_length_seconds(anchor.content)
end
length_seconds ||= 0
videos << SearchPlaylistVideo.new(
video_title,
id,
length_seconds
)
end
2019-03-17 14:00:00 +00:00
playlist_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
playlist_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
if !playlist_thumbnail || playlist_thumbnail.empty?
thumbnail_id = videos[0]?.try &.id
else
thumbnail_id = playlist_thumbnail.match(/\/vi\/(?<video_id>[a-zA-Z0-9_-]{11})\/\w+\.jpg/).try &.["video_id"]
end
items << SearchPlaylist.new(
title,
plid,
author,
author_id,
video_count,
2019-03-17 14:00:00 +00:00
videos,
thumbnail_id
)
when .includes? "yt-lockup-channel"
2018-09-22 15:49:42 +00:00
author = title.strip
ucid = node.xpath_node(%q(.//button[contains(@class, "yt-uix-subscription-button")])).try &.["data-channel-external-id"]?
ucid ||= id.split("/")[-1]
author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
2019-04-02 13:51:28 +00:00
if author_thumbnail
author_thumbnail = URI.parse(author_thumbnail)
author_thumbnail.scheme = "https"
author_thumbnail = author_thumbnail.to_s
end
author_thumbnail ||= ""
2019-04-12 21:37:35 +00:00
subscriber_count = node.xpath_node(%q(.//span[contains(@class, "yt-subscriber-count")])).try &.["title"].gsub(/\D/, "").to_i?
subscriber_count ||= 0
2019-04-12 21:37:35 +00:00
video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].gsub(/\D/, "").to_i?
video_count ||= 0
items << SearchChannel.new(
2018-12-15 19:02:53 +00:00
author: author,
ucid: ucid,
author_thumbnail: author_thumbnail,
subscriber_count: subscriber_count,
video_count: video_count,
description: description,
description_html: description_html
)
else
id = id.lchop("/watch?v=")
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
begin
published = decode_date(metadata[0].content.lchop("Streamed ").lchop("Starts "))
rescue ex
end
begin
2018-11-04 15:37:12 +00:00
published ||= Time.unix(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
rescue ex
end
published ||= Time.now
begin
view_count = metadata[0].content.rchop(" watching").delete(",").try &.to_i64?
rescue ex
end
begin
view_count ||= metadata.try &.[1].content.delete("No views,").try &.to_i64?
rescue ex
end
view_count ||= 0_i64
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
if length_seconds
length_seconds = decode_length_seconds(length_seconds.content)
else
length_seconds = -1
end
live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")]))
if live_now
live_now = true
else
live_now = false
end
2018-10-16 16:15:14 +00:00
if node.xpath_node(%q(.//span[text()="Premium"]))
premium = true
else
premium = false
end
if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
2018-10-16 16:15:14 +00:00
paid = false
else
paid = true
2018-10-16 16:15:14 +00:00
end
premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64
if premiere_timestamp
premiere_timestamp = Time.unix(premiere_timestamp)
end
items << SearchVideo.new(
2018-12-15 19:02:53 +00:00
title: title,
id: id,
author: author,
ucid: author_id,
published: published,
views: view_count,
description: description,
description_html: description_html,
length_seconds: length_seconds,
live_now: live_now,
paid: paid,
premium: premium,
premiere_timestamp: premiere_timestamp
)
end
end
return items
end
2019-02-15 23:28:54 +00:00
def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
items = [] of SearchPlaylist
nodeset.each do |shelf|
shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
if !shelf_anchor
next
end
title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")]))
if title
title = title.content.strip
end
title ||= ""
id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
if !id
next
end
is_playlist = false
videos = [] of SearchPlaylistVideo
shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list")]/li)).each do |child_node|
type = child_node.xpath_node(%q(./div))
if !type
next
end
case type["class"]
when .includes? "yt-lockup-video"
is_playlist = true
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
if anchor
video_title = anchor.content.strip
video_id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
end
video_title ||= ""
video_id ||= ""
anchor = child_node.xpath_node(%q(.//span[@class="video-time"]))
if anchor
length_seconds = decode_length_seconds(anchor.content)
end
length_seconds ||= 0
videos << SearchPlaylistVideo.new(
video_title,
video_id,
length_seconds
)
when .includes? "yt-lockup-playlist"
anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
if anchor
playlist_title = anchor.content.strip
params = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)
plid = params["list"]
end
playlist_title ||= ""
plid ||= ""
2019-03-17 14:00:00 +00:00
playlist_thumbnail = child_node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
playlist_thumbnail ||= child_node.xpath_node(%q(.//span/img)).try &.["src"]
if !playlist_thumbnail || playlist_thumbnail.empty?
thumbnail_id = videos[0]?.try &.id
else
thumbnail_id = playlist_thumbnail.match(/\/vi\/(?<video_id>[a-zA-Z0-9_-]{11})\/\w+\.jpg/).try &.["video_id"]
end
2019-03-17 23:31:11 +00:00
video_count_label = child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
if video_count_label
2019-04-12 21:37:35 +00:00
video_count = video_count_label.content.gsub(/\D/, "").to_i?
2019-03-17 23:31:11 +00:00
end
video_count ||= 50
2019-02-15 23:28:54 +00:00
items << SearchPlaylist.new(
playlist_title,
plid,
author_name,
ucid,
2019-03-17 23:31:11 +00:00
video_count,
2019-03-17 14:00:00 +00:00
Array(SearchPlaylistVideo).new,
thumbnail_id
2019-02-15 23:28:54 +00:00
)
end
end
if is_playlist
plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
items << SearchPlaylist.new(
title,
plid,
author_name,
ucid,
videos.size,
2019-03-17 14:00:00 +00:00
videos,
videos[0].try &.id
2019-02-15 23:28:54 +00:00
)
end
end
return items
end
2019-04-10 21:23:37 +00:00
def analyze_table(db, logger, table_name, struct_type = nil)
# Create table if it doesn't exist
2019-04-10 22:16:18 +00:00
begin
db.exec("SELECT * FROM #{table_name} LIMIT 0")
rescue ex
2019-04-10 22:09:36 +00:00
logger.write("CREATE TABLE #{table_name}\n")
2019-04-10 21:23:37 +00:00
db.using_connection do |conn|
conn.as(PG::Connection).exec_all(File.read("config/sql/#{table_name}.sql"))
end
end
if !struct_type
return
end
struct_array = struct_type.to_type_tuple
column_array = get_column_array(db, table_name)
column_types = File.read("config/sql/#{table_name}.sql").match(/CREATE TABLE public\.#{table_name}\n\((?<types>[\d\D]*?)\);/)
.try &.["types"].split(",").map { |line| line.strip }
if !column_types
return
end
struct_array.each_with_index do |name, i|
if name != column_array[i]?
if !column_array[i]?
new_column = column_types.select { |line| line.starts_with? name }[0]
logger.write("ALTER TABLE #{table_name} ADD COLUMN #{new_column}\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} ADD COLUMN #{new_column}")
2019-04-10 21:23:37 +00:00
next
end
# Column doesn't exist
if !column_array.includes? name
new_column = column_types.select { |line| line.starts_with? name }[0]
db.exec("ALTER TABLE #{table_name} ADD COLUMN #{new_column}")
end
# Column exists but in the wrong position, rotate
if struct_array.includes? column_array[i]
until name == column_array[i]
new_column = column_types.select { |line| line.starts_with? column_array[i] }[0]?.try &.gsub("#{column_array[i]}", "#{column_array[i]}_new")
# There's a column we didn't expect
if !new_column
logger.write("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]}\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]} CASCADE")
2019-04-10 21:23:37 +00:00
column_array = get_column_array(db, table_name)
next
end
logger.write("ALTER TABLE #{table_name} ADD COLUMN #{new_column}\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} ADD COLUMN #{new_column}")
2019-04-10 21:23:37 +00:00
logger.write("UPDATE #{table_name} SET #{column_array[i]}_new=#{column_array[i]}\n")
2019-04-10 22:09:36 +00:00
db.exec("UPDATE #{table_name} SET #{column_array[i]}_new=#{column_array[i]}")
2019-04-10 21:23:37 +00:00
logger.write("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]} CASCADE\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]} CASCADE")
2019-04-10 21:23:37 +00:00
logger.write("ALTER TABLE #{table_name} RENAME COLUMN #{column_array[i]}_new TO #{column_array[i]}\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} RENAME COLUMN #{column_array[i]}_new TO #{column_array[i]}")
2019-04-10 21:23:37 +00:00
column_array = get_column_array(db, table_name)
end
else
logger.write("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]} CASCADE\n")
2019-04-10 22:09:36 +00:00
db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column_array[i]} CASCADE")
2019-04-10 21:23:37 +00:00
end
end
end
end
class PG::ResultSet
def field(index = @column_index)
@fields.not_nil![index]
end
end
def get_column_array(db, table_name)
column_array = [] of String
db.query("SELECT * FROM #{table_name} LIMIT 0") do |rs|
rs.column_count.times do |i|
column = rs.as(PG::ResultSet).field(i)
column_array << column.name
end
end
return column_array
end
def cache_annotation(db, id, annotations)
if !CONFIG.cache_annotations
return
end
body = XML.parse(annotations)
nodeset = body.xpath_nodes(%q(/document/annotations/annotation))
if nodeset == 0
return
end
has_legacy_annotations = false
nodeset.each do |node|
if !{"branding", "card", "drawer"}.includes? node["type"]?
has_legacy_annotations = true
break
end
end
if has_legacy_annotations
# TODO: Update on conflict?
db.exec("INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING", id, annotations)
end
end
2019-05-19 00:14:58 +00:00
def proxy_file(response, env)
2019-05-20 17:06:44 +00:00
if !response.body_io?
return
end
2019-05-20 17:06:44 +00:00
begin
if response.headers.includes_word?("Content-Encoding", "gzip")
Gzip::Writer.open(env.response) do |deflate|
copy_in_chunks(response.body_io, deflate)
end
elsif response.headers.includes_word?("Content-Encoding", "deflate")
Flate::Writer.open(env.response) do |deflate|
copy_in_chunks(response.body_io, deflate)
end
else
copy_in_chunks(response.body_io, env.response)
2019-05-19 00:14:58 +00:00
end
2019-05-20 17:06:44 +00:00
rescue ex
end
end
# https://stackoverflow.com/a/44802810 <3
def copy_in_chunks(input, output, chunk_size = 4096)
size = 1
while size > 0
size = IO.copy(input, output, chunk_size)
Fiber.yield
2019-05-19 00:14:58 +00:00
end
end