Use materialized views for subscription feeds

This commit is contained in:
Omar Roth 2018-10-09 08:40:29 -05:00
parent 3fe4547f8e
commit 35e63fa3f5
5 changed files with 61 additions and 30 deletions

View file

@ -10,3 +10,4 @@ db:
full_refresh: false full_refresh: false
https_only: false https_only: false
geo_bypass: true geo_bypass: true
update_feeds: true

View file

@ -98,6 +98,12 @@ spawn do
end end
end end
if CONFIG.update_feeds
spawn do
update_feeds(PG_DB)
end
end
decrypt_function = [] of {name: String, value: Int32} decrypt_function = [] of {name: String, value: Int32}
spawn do spawn do
update_decrypt_function do |function| update_decrypt_function do |function|
@ -475,9 +481,8 @@ get "/search" do |env|
user = env.get? "user" user = env.get? "user"
if user if user
user = user.as(User) user = user.as(User)
ucids = user.subscriptions view_name = "subscriptions_#{sha256(user.email)[0..7]}"
end end
ucids ||= [] of String
channel = nil channel = nil
content_type = "all" content_type = "all"
@ -514,14 +519,19 @@ get "/search" do |env|
if channel if channel
count, videos = channel_search(search_query, page, channel) count, videos = channel_search(search_query, page, channel)
elsif subscriptions elsif subscriptions
if view_name
videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM ( videos = PG_DB.query_all("SELECT id,title,published,updated,ucid,author FROM (
SELECT *, SELECT *,
to_tsvector(channel_videos.title) || to_tsvector(#{view_name}.title) ||
to_tsvector(channel_videos.author) to_tsvector(#{view_name}.author)
as document as document
FROM channel_videos WHERE ucid IN (#{arg_array(ucids, 3)}) FROM #{view_name}
) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", [search_query, (page - 1) * 20] + ucids, as: ChannelVideo) ) v_search WHERE v_search.document @@ plainto_tsquery($1) LIMIT 20 OFFSET $2;", search_query, (page - 1) * 20, as: ChannelVideo)
count = videos.size count = videos.size
else
videos = [] of ChannelVideo
count = 0
end
else else
begin begin
search_params = produce_search_params(sort: sort, date: date, content_type: content_type, search_params = produce_search_params(sort: sort, date: date, content_type: content_type,
@ -799,6 +809,12 @@ post "/login" do |env|
PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array) PG_DB.exec("INSERT INTO users VALUES (#{args})", user_array)
view_name = "subscriptions_#{sha256(user.email)[0..7]}"
PG_DB.exec("CREATE MATERIALIZED VIEW #{view_name} AS \
SELECT * FROM channel_videos WHERE \
ucid = ANY ((SELECT subscriptions FROM users WHERE email = '#{user.email}')::text[]) \
ORDER BY published DESC;")
if Kemal.config.ssl || CONFIG.https_only if Kemal.config.ssl || CONFIG.https_only
secure = true secure = true
else else
@ -1364,6 +1380,8 @@ get "/feed/subscriptions" do |env|
notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email, notifications = PG_DB.query_one("SELECT notifications FROM users WHERE email = $1", user.email,
as: Array(String)) as: Array(String))
view_name = "subscriptions_#{sha256(user.email)[0..7]}"
if preferences.notifications_only && !notifications.empty? if preferences.notifications_only && !notifications.empty?
args = arg_array(notifications) args = arg_array(notifications)
@ -1386,39 +1404,34 @@ get "/feed/subscriptions" do |env|
else else
if preferences.latest_only if preferences.latest_only
if preferences.unseen_only if preferences.unseen_only
ucids = arg_array(user.subscriptions)
if user.watched.empty? if user.watched.empty?
watched = "'{}'" watched = "'{}'"
else else
watched = arg_array(user.watched, user.subscriptions.size + 1) watched = arg_array(user.watched)
end end
videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \ videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} WHERE \
ucid IN (#{ucids}) AND id NOT IN (#{watched}) ORDER BY ucid, published DESC", id NOT IN (#{watched}) ORDER BY ucid, published DESC",
user.subscriptions + user.watched, as: ChannelVideo) user.watched, as: ChannelVideo)
else else
args = arg_array(user.subscriptions) videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name}", as: ChannelVideo)
videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \
ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo)
end end
videos.sort_by! { |video| video.published }.reverse! videos.sort_by! { |video| video.published }.reverse!
else else
if preferences.unseen_only if preferences.unseen_only
ucids = arg_array(user.subscriptions, 3)
if user.watched.empty? if user.watched.empty?
watched = "'{}'" watched = "'{}'"
else else
watched = arg_array(user.watched, user.subscriptions.size + 3) watched = arg_array(user.watched, 3)
end end
videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{ucids}) \ videos = PG_DB.query_all("SELECT * FROM #{view_name} WHERE \
AND id NOT IN (#{watched}) ORDER BY published DESC LIMIT $1 OFFSET $2", id NOT IN (#{watched}) LIMIT $1 OFFSET $2",
[limit, offset] + user.subscriptions + user.watched, as: ChannelVideo) [limit, offset] + user.watched, as: ChannelVideo)
else else
args = arg_array(user.subscriptions, 3) videos = PG_DB.query_all("SELECT * FROM #{view_name} \
videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo)
end end
end end
@ -1576,15 +1589,14 @@ get "/feed/private" do |env|
latest_only ||= 0 latest_only ||= 0
latest_only = latest_only == 1 latest_only = latest_only == 1
view_name = "subscriptions_#{sha256(user.email)[0..7]}"
if latest_only if latest_only
args = arg_array(user.subscriptions) videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM #{view_name} ORDER BY ucid, published DESC", as: ChannelVideo)
videos = PG_DB.query_all("SELECT DISTINCT ON (ucid) * FROM channel_videos WHERE \
ucid IN (#{args}) ORDER BY ucid, published DESC", user.subscriptions, as: ChannelVideo)
videos.sort_by! { |video| video.published }.reverse! videos.sort_by! { |video| video.published }.reverse!
else else
args = arg_array(user.subscriptions, 3) videos = PG_DB.query_all("SELECT * FROM #{view_name} \
videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ ORDER BY published DESC LIMIT $1 OFFSET $2", limit, offset, as: ChannelVideo)
ORDER BY published DESC LIMIT $1 OFFSET $2", [limit, offset] + user.subscriptions, as: ChannelVideo)
end end
sort = env.params.query["sort"]? sort = env.params.query["sort"]?

View file

@ -15,6 +15,7 @@ class Config
hmac_key: String?, hmac_key: String?,
full_refresh: Bool, full_refresh: Bool,
geo_bypass: Bool, geo_bypass: Bool,
update_feeds: Bool,
}) })
end end

View file

@ -238,3 +238,9 @@ def write_var_int(value : Int)
return bytes return bytes
end end
def sha256(text)
digest = OpenSSL::Digest.new("SHA256")
digest << text
return digest.hexdigest
end

View file

@ -104,6 +104,17 @@ def refresh_videos(db)
end end
end end
def update_feeds(db)
loop do
users = db.query_all("SELECT email FROM users", as: String)
users.each do |email|
view_name = "subscriptions_#{sha256(email)[0..7]}"
db.exec("REFRESH MATERIALIZED VIEW #{view_name}")
end
end
end
def pull_top_videos(config, db) def pull_top_videos(config, db)
if config.dl_api_key if config.dl_api_key
DetectLanguage.configure do |dl_config| DetectLanguage.configure do |dl_config|