Update refresh_channels to properly utilize workers

This commit is contained in:
Omar Roth 2018-08-07 20:25:59 -05:00
parent f588132cad
commit f3c7409d72
2 changed files with 33 additions and 22 deletions

View file

@ -83,15 +83,7 @@ crawl_threads.times do
end end
end end
total_channels = PG_DB.query_one("SELECT count(*) FROM channels", as: Int64) refresh_channels(PG_DB, channel_threads)
channel_threads.times do |i|
limit = total_channels / channel_threads
offset = limit.not_nil! * i
spawn do
refresh_channels(PG_DB, limit, offset)
end
end
video_threads.times do |i| video_threads.times do |i|
spawn do spawn do

View file

@ -44,25 +44,44 @@ def crawl_videos(db)
end end
end end
def refresh_channels(db, limit = 0, offset = 0) def refresh_channels(db, max_threads = 1)
loop do max_channel = Channel(Int32).new
db.query("SELECT id FROM channels ORDER BY updated limit $1 offset $2", limit, offset) do |rs|
rs.each do
client = make_client(YT_URL)
begin spawn do
max_threads = max_channel.receive
active_threads = 0
active_channel = Channel(Bool).new
loop do
db.query("SELECT id FROM channels ORDER BY updated") do |rs|
rs.each do
id = rs.read(String) id = rs.read(String)
if active_threads >= max_threads
if active_channel.receive
active_threads -= 1
end
end
active_threads += 1
spawn do
begin
client = make_client(YT_URL)
channel = fetch_channel(id, client, db, false) channel = fetch_channel(id, client, db, false)
db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id) db.exec("UPDATE channels SET updated = $1 WHERE id = $2", Time.now, id)
rescue ex rescue ex
STDOUT << id << " : " << ex.message << "\n" STDOUT << id << " : " << ex.message << "\n"
next end
active_channel.send(true)
end
end
end end
end end
end end
Fiber.yield max_channel.send(max_threads)
end
end end
def refresh_videos(db) def refresh_videos(db)