Add config option to cache annotations from IA

This commit is contained in:
Omar Roth 2019-04-15 11:13:09 -05:00
parent 2deb436ccd
commit 3bcb98e644
5 changed files with 101 additions and 44 deletions

View file

@ -103,12 +103,13 @@ $ sudo systemctl start postgresql
$ sudo -i -u postgres $ sudo -i -u postgres
$ psql -c "CREATE USER kemal WITH PASSWORD 'kemal';" # Change 'kemal' here to a stronger password, and update `password` in config/config.yml $ psql -c "CREATE USER kemal WITH PASSWORD 'kemal';" # Change 'kemal' here to a stronger password, and update `password` in config/config.yml
$ createdb -O kemal invidious $ createdb -O kemal invidious
$ psql invidious < /home/invidious/invidious/config/sql/channels.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/channels.sql
$ psql invidious < /home/invidious/invidious/config/sql/videos.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/videos.sql
$ psql invidious < /home/invidious/invidious/config/sql/channel_videos.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/channel_videos.sql
$ psql invidious < /home/invidious/invidious/config/sql/users.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/users.sql
$ psql invidious < /home/invidious/invidious/config/sql/session_ids.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/session_ids.sql
$ psql invidious < /home/invidious/invidious/config/sql/nonces.sql $ psql invidious kemal < /home/invidious/invidious/config/sql/nonces.sql
$ psql invidious kemal < /home/invidious/invidious/config/sql/annotations.sql
$ exit $ exit
``` ```
@ -145,12 +146,13 @@ $ cd invidious
$ brew services start postgresql $ brew services start postgresql
$ psql -c "CREATE ROLE kemal WITH PASSWORD 'kemal';" # Change 'kemal' here to a stronger password, and update `password` in config/config.yml $ psql -c "CREATE ROLE kemal WITH PASSWORD 'kemal';" # Change 'kemal' here to a stronger password, and update `password` in config/config.yml
$ createdb -O kemal invidious $ createdb -O kemal invidious
$ psql invidious < config/sql/channels.sql $ psql invidious kemal < config/sql/channels.sql
$ psql invidious < config/sql/videos.sql $ psql invidious kemal < config/sql/videos.sql
$ psql invidious < config/sql/channel_videos.sql $ psql invidious kemal < config/sql/channel_videos.sql
$ psql invidious < config/sql/users.sql $ psql invidious kemal < config/sql/users.sql
$ psql invidious < config/sql/session_ids.sql $ psql invidious kemal < config/sql/session_ids.sql
$ psql invidious < config/sql/nonces.sql $ psql invidious kemal < config/sql/nonces.sql
$ psql invidious kemal < config/sql/annotations.sql
# Setup Invidious # Setup Invidious
$ shards update && shards install $ shards update && shards install

View file

@ -0,0 +1,12 @@
-- Table: public.annotations
-- DROP TABLE public.annotations;
CREATE TABLE public.annotations
(
id text NOT NULL,
annotations xml,
CONSTRAINT annotations_id_key UNIQUE (id)
);
GRANT ALL ON TABLE public.annotations TO kemal;

View file

@ -18,6 +18,7 @@ if [ ! -f /var/lib/postgresql/data/setupFinished ]; then
su postgres -c 'psql invidious kemal < config/sql/users.sql' su postgres -c 'psql invidious kemal < config/sql/users.sql'
su postgres -c 'psql invidious kemal < config/sql/session_ids.sql' su postgres -c 'psql invidious kemal < config/sql/session_ids.sql'
su postgres -c 'psql invidious kemal < config/sql/nonces.sql' su postgres -c 'psql invidious kemal < config/sql/nonces.sql'
su postgres -c 'psql invidious kemal < config/sql/annotations.sql'
touch /var/lib/postgresql/data/setupFinished touch /var/lib/postgresql/data/setupFinished
echo "### invidious database setup finished" echo "### invidious database setup finished"
exit exit

View file

@ -105,13 +105,17 @@ end
Kemal::CLI.new ARGV Kemal::CLI.new ARGV
# Check table integrity
if CONFIG.check_tables if CONFIG.check_tables
# Check table integrity
analyze_table(PG_DB, logger, "channel_videos", ChannelVideo) analyze_table(PG_DB, logger, "channel_videos", ChannelVideo)
analyze_table(PG_DB, logger, "nonces", Nonce) analyze_table(PG_DB, logger, "nonces", Nonce)
analyze_table(PG_DB, logger, "session_ids", SessionId) analyze_table(PG_DB, logger, "session_ids", SessionId)
analyze_table(PG_DB, logger, "users", User) analyze_table(PG_DB, logger, "users", User)
analyze_table(PG_DB, logger, "videos", Video) analyze_table(PG_DB, logger, "videos", Video)
if CONFIG.cache_annotations
analyze_table(PG_DB, logger, "annotations", Annotation)
end
end end
# Start jobs # Start jobs
@ -2938,37 +2942,43 @@ get "/api/v1/annotations/:id" do |env|
case source case source
when "archive" when "archive"
index = CHARS_SAFE.index(id[0]).not_nil!.to_s.rjust(2, '0') if CONFIG.cache_annotations && (cached_annotation = PG_DB.query_one?("SELECT * FROM annotations WHERE id = $1", id, as: Annotation))
annotations = cached_annotation.annotations
else
index = CHARS_SAFE.index(id[0]).not_nil!.to_s.rjust(2, '0')
# IA doesn't handle leading hyphens, # IA doesn't handle leading hyphens,
# so we use https://archive.org/details/youtubeannotations_64 # so we use https://archive.org/details/youtubeannotations_64
if index == "62" if index == "62"
index = "64" index = "64"
id = id.sub(/^-/, 'A') id = id.sub(/^-/, 'A')
end
file = URI.escape("#{id[0, 3]}/#{id}.xml")
client = make_client(ARCHIVE_URL)
location = client.get("/download/youtubeannotations_#{index}/#{id[0, 2]}.tar/#{file}")
if !location.headers["Location"]?
env.response.status_code = location.status_code
end
response = HTTP::Client.get(URI.parse(location.headers["Location"]))
if response.body.empty?
env.response.status_code = 404
next
end
if response.status_code != 200
env.response.status_code = response.status_code
next
end
annotations = response.body
cache_annotation(PG_DB, id, annotations)
end end
file = URI.escape("#{id[0, 3]}/#{id}.xml")
client = make_client(ARCHIVE_URL)
location = client.get("/download/youtubeannotations_#{index}/#{id[0, 2]}.tar/#{file}")
if !location.headers["Location"]?
env.response.status_code = location.status_code
end
response = HTTP::Client.get(URI.parse(location.headers["Location"]))
if response.body.empty?
env.response.status_code = 404
next
end
if response.status_code != 200
env.response.status_code = response.status_code
next
end
annotations = response.body
when "youtube" when "youtube"
client = make_client(YT_URL) client = make_client(YT_URL)

View file

@ -15,6 +15,13 @@ struct SessionId
}) })
end end
struct Annotation
db_mapping({
id: String,
annotations: String,
})
end
struct ConfigPreferences struct ConfigPreferences
module StringToArray module StringToArray
def self.to_yaml(value : Array(String), yaml : YAML::Nodes::Builder) def self.to_yaml(value : Array(String), yaml : YAML::Nodes::Builder)
@ -114,8 +121,9 @@ user: String,
default: Preferences.new(*ConfigPreferences.from_yaml("").to_tuple), default: Preferences.new(*ConfigPreferences.from_yaml("").to_tuple),
converter: ConfigPreferencesConverter, converter: ConfigPreferencesConverter,
}, },
dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs dmca_content: {type: Array(String), default: [] of String}, # For compliance with DMCA, disables download widget using list of video IDs
check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc. check_tables: {type: Bool, default: false}, # Check table integrity, automatically try to add any missing columns, create tables, etc.
cache_annotations: {type: Bool, default: false}, # Cache annotations requested from IA, will not cache empty annotations or annotations that only contain cards
}) })
end end
@ -590,3 +598,27 @@ def get_column_array(db, table_name)
return column_array return column_array
end end
def cache_annotation(db, id, annotations)
if !CONFIG.cache_annotations
return
end
body = XML.parse(annotations)
nodeset = body.xpath_nodes(%q(/document/annotations/annotation))
if nodeset == 0
return
end
has_legacy_annotations = false
nodeset.each do |node|
if !{"branding", "card", "drawer"}.includes? node["type"]?
has_legacy_annotations = true
break
end
end
# TODO: Update on conflict?
db.exec("INSERT INTO annotations VALUES ($1, $2) ON CONFLICT DO NOTHING", id, annotations)
end