Add RefreshChannelsJob traces

Traces can be enabled with `-l trace`.

The problem with subscriptions is that sometimes requests to YouTube never
finish. As soon as that happens `channel-threads` times subscriptions stop
being refreshed. This is most likely a problem with the lsquick bindings.
This commit is contained in:
saltycrys 2020-12-27 05:20:33 +01:00
parent 420ceffbb0
commit c4ef055248
5 changed files with 53 additions and 29 deletions

View file

@ -144,7 +144,7 @@ class ChannelRedirect < Exception
end
end
def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, max_threads = 10)
def get_batch_channels(channels, db, logger, refresh = false, pull_all_videos = true, max_threads = 10)
finished_channel = Channel(String | Nil).new
spawn do
@ -160,7 +160,7 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma
active_threads += 1
spawn do
begin
get_channel(ucid, db, refresh, pull_all_videos)
get_channel(ucid, db, logger, refresh, pull_all_videos)
finished_channel.send(ucid)
rescue ex
finished_channel.send(nil)
@ -181,10 +181,10 @@ def get_batch_channels(channels, db, refresh = false, pull_all_videos = true, ma
return final
end
def get_channel(id, db, refresh = true, pull_all_videos = true)
def get_channel(id, db, logger, refresh = true, pull_all_videos = true)
if channel = db.query_one?("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel)
if refresh && Time.utc - channel.updated > 10.minutes
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel = fetch_channel(id, db, logger, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
@ -192,7 +192,7 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
ON CONFLICT (id) DO UPDATE SET author = $2, updated = $3", args: channel_array)
end
else
channel = fetch_channel(id, db, pull_all_videos: pull_all_videos)
channel = fetch_channel(id, db, logger, pull_all_videos: pull_all_videos)
channel_array = channel.to_a
args = arg_array(channel_array)
@ -202,8 +202,12 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
return channel
end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
def fetch_channel(ucid, db, logger, pull_all_videos = true, locale = nil)
logger.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}, locale = #{locale}")
logger.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
logger.trace("fetch_channel: #{ucid} : Parsing RSS feed")
rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title))
@ -219,14 +223,19 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
auto_generated = true
end
logger.trace("fetch_channel: #{ucid} : author = #{author}, auto_generated = #{auto_generated}")
page = 1
logger.trace("fetch_channel: #{ucid} : Downloading channel videos page")
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = [] of SearchVideo
begin
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
raise InfoException.new("Could not extract channel JSON") if !initial_data
logger.trace("fetch_channel: #{ucid} : Extracting videos from channel videos page initial_data")
videos = extract_videos(initial_data.as_h, author, ucid)
rescue ex
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
@ -236,6 +245,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
raise ex
end
logger.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
rss.xpath_nodes("//feed/entry").each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content
title = entry.xpath_node("title").not_nil!.content
@ -269,6 +279,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
views: views,
})
logger.trace("fetch_channel: #{ucid} : video #{video_id} : Updating or inserting video")
# We don't include the 'premiere_timestamp' here because channel pages don't include them,
# meaning the above timestamp is always null
was_insert = db.query_one("INSERT INTO channel_videos VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) \
@ -276,8 +288,13 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
updated = $4, ucid = $5, author = $6, length_seconds = $7, \
live_now = $8, views = $10 returning (xmax=0) as was_insert", *video.to_tuple, as: Bool)
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid) if was_insert
if was_insert
logger.trace("fetch_channel: #{ucid} : video #{video_id} : Inserted, updating subscriptions")
db.exec("UPDATE users SET notifications = array_append(notifications, $1), \
feed_needs_update = true WHERE $2 = ANY(subscriptions)", video.id, video.ucid)
else
logger.trace("fetch_channel: #{ucid} : video #{video_id} : Updated")
end
end
if pull_all_videos

View file

@ -20,18 +20,23 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob
id = rs.read(String)
if active_fibers >= lim_fibers
logger.trace("RefreshChannelsJob: Fiber limit reached, waiting...")
if active_channel.receive
logger.trace("RefreshChannelsJob: Fiber limit ok, continuing")
active_fibers -= 1
end
end
logger.trace("RefreshChannelsJob: #{id} : Spawning fiber")
active_fibers += 1
spawn do
begin
logger.trace("RefreshChannelsJob: Fetching channel #{id}")
channel = fetch_channel(id, db, config.full_refresh)
logger.trace("RefreshChannelsJob: #{id} fiber : Fetching channel")
channel = fetch_channel(id, db, logger, config.full_refresh)
lim_fibers = max_fibers
logger.trace("RefreshChannelsJob: #{id} fiber : Updating DB")
db.exec("UPDATE channels SET updated = $1, author = $2, deleted = false WHERE id = $3", Time.utc, channel.author, id)
rescue ex
logger.error("RefreshChannelsJob: #{id} : #{ex.message}")
@ -39,7 +44,7 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob
db.exec("UPDATE channels SET updated = $1, deleted = true WHERE id = $2", Time.utc, id)
else
lim_fibers = 1
logger.error("RefreshChannelsJob: #{id} : backing off for #{backoff}s")
logger.error("RefreshChannelsJob: #{id} fiber : backing off for #{backoff}s")
sleep backoff
if backoff < 1.days
backoff += backoff
@ -47,13 +52,15 @@ class Invidious::Jobs::RefreshChannelsJob < Invidious::Jobs::BaseJob
backoff = 1.days
end
end
ensure
logger.trace("RefreshChannelsJob: #{id} fiber : Done")
active_channel.send(true)
end
active_channel.send(true)
end
end
end
logger.debug("RefreshChannelsJob: Done, sleeping for one minute")
sleep 1.minute
Fiber.yield
end

View file

@ -267,7 +267,7 @@ class Invidious::Routes::Login < Invidious::Routes::BaseRoute
raise "Couldn't get SID."
end
user, sid = get_user(sid, headers, PG_DB)
user, sid = get_user(sid, headers, PG_DB, logger)
# We are now logged in
traceback << "done.<br/>"

View file

@ -269,12 +269,12 @@ struct Preferences
end
end
def get_user(sid, headers, db, refresh = true)
def get_user(sid, headers, db, logger, refresh = true)
if email = db.query_one?("SELECT email FROM session_ids WHERE id = $1", sid, as: String)
user = db.query_one("SELECT * FROM users WHERE email = $1", email, as: User)
if refresh && Time.utc - user.updated > 1.minute
user, sid = fetch_user(sid, headers, db)
user, sid = fetch_user(sid, headers, db, logger)
user_array = user.to_a
user_array[4] = user_array[4].to_json # User preferences
args = arg_array(user_array)
@ -292,7 +292,7 @@ def get_user(sid, headers, db, refresh = true)
end
end
else
user, sid = fetch_user(sid, headers, db)
user, sid = fetch_user(sid, headers, db, logger)
user_array = user.to_a
user_array[4] = user_array[4].to_json # User preferences
args = arg_array(user.to_a)
@ -313,7 +313,7 @@ def get_user(sid, headers, db, refresh = true)
return user, sid
end
def fetch_user(sid, headers, db)
def fetch_user(sid, headers, db, logger)
feed = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
feed = XML.parse_html(feed.body)
@ -326,7 +326,7 @@ def fetch_user(sid, headers, db)
end
end
channels = get_batch_channels(channels, db, false, false)
channels = get_batch_channels(channels, db, logger, false, false)
email = feed.xpath_node(%q(//a[@class="yt-masthead-picker-header yt-masthead-picker-active-account"]))
if email