Remove client pool

This commit is contained in:
Omar Roth 2018-04-28 09:22:06 -05:00
parent 8477579e0b
commit 92f78ff541
3 changed files with 17 additions and 62 deletions

View file

@ -1,4 +1,3 @@
pool_size: 20
channel_threads: 5 channel_threads: 5
threads: 5 threads: 5
db: db:

View file

@ -15,7 +15,6 @@ end
class Config class Config
YAML.mapping({ YAML.mapping({
pool_size: Int32,
threads: Int32, threads: Int32,
channel_threads: Int32, channel_threads: Int32,
db: NamedTuple( db: NamedTuple(
@ -178,14 +177,6 @@ def elapsed_text(elapsed)
"#{(millis * 1000).round(2)}µs" "#{(millis * 1000).round(2)}µs"
end end
def get_client(pool)
while pool.empty?
sleep rand(0..10).milliseconds
end
return pool.shift
end
def fetch_video(id, client) def fetch_video(id, client)
info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en").body info = client.get("/get_video_info?video_id=#{id}&el=detailpage&ps=default&eurl=&gl=US&hl=en").body
html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}").body html = client.get("/watch?v=#{id}&bpctr=#{Time.new.epoch + 2000}").body
@ -312,7 +303,7 @@ def decrypt_signature(a)
return a.join("") return a.join("")
end end
def rank_videos(db, n, pool, filter) def rank_videos(db, n, filter, url)
top = [] of {Float64, String} top = [] of {Float64, String}
db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs| db.query("SELECT id, wilson_score, published FROM videos WHERE views > 5000 ORDER BY published DESC LIMIT 1000") do |rs|
@ -339,15 +330,13 @@ def rank_videos(db, n, pool, filter)
if language_list.size == n if language_list.size == n
break break
else else
client = get_client(pool) client = make_client(url)
begin begin
video = get_video(id, client, db) video = get_video(id, client, db)
rescue ex rescue ex
next next
end end
pool << client
if video.language if video.language
language = video.language language = video.language
else else

View file

@ -25,20 +25,11 @@ require "./cookie_fix"
CONFIG = Config.from_yaml(File.read("config/config.yml")) CONFIG = Config.from_yaml(File.read("config/config.yml"))
pool_size = CONFIG.pool_size
threads = CONFIG.threads threads = CONFIG.threads
channel_threads = CONFIG.channel_threads channel_threads = CONFIG.channel_threads
Kemal.config.extra_options do |parser| Kemal.config.extra_options do |parser|
parser.banner = "Usage: invidious [arguments]" parser.banner = "Usage: invidious [arguments]"
parser.on("-z SIZE", "--youtube-pool=SIZE", "Number of clients in youtube pool (default: #{pool_size})") do |number|
begin
pool_size = number.to_i
rescue ex
puts "SIZE must be integer"
exit
end
end
parser.on("-t THREADS", "--youtube-threads=THREADS", "Number of threads for crawling (default: #{threads})") do |number| parser.on("-t THREADS", "--youtube-threads=THREADS", "Number of threads for crawling (default: #{threads})") do |number|
begin begin
threads = number.to_i threads = number.to_i
@ -73,25 +64,17 @@ YT_URL = URI.parse("https://www.youtube.com")
REDDIT_URL = URI.parse("https://api.reddit.com") REDDIT_URL = URI.parse("https://api.reddit.com")
LOGIN_URL = URI.parse("https://accounts.google.com") LOGIN_URL = URI.parse("https://accounts.google.com")
youtube_pool = Deque.new(pool_size) do
make_client(YT_URL)
end
# Refresh youtube_pool by crawling YT
threads.times do threads.times do
spawn do spawn do
ids = Deque(String).new ids = Deque(String).new
random = Random.new random = Random.new
client = get_client(youtube_pool) client = make_client(YT_URL)
search(random.base64(3), client) do |id| search(random.base64(3), client) do |id|
ids << id ids << id
end end
youtube_pool << client
loop do loop do
client = get_client(youtube_pool)
if ids.empty? if ids.empty?
search(random.base64(3), client) do |id| search(random.base64(3), client) do |id|
ids << id ids << id
@ -103,7 +86,7 @@ threads.times do
video = get_video(id, client, PG_DB) video = get_video(id, client, PG_DB)
rescue ex rescue ex
STDOUT << id << " : " << ex.message << "\n" STDOUT << id << " : " << ex.message << "\n"
youtube_pool << make_client(YT_URL) client = make_client(YT_URL)
next next
ensure ensure
ids.delete(id) ids.delete(id)
@ -125,8 +108,6 @@ threads.times do
end end
end end
end end
youtube_pool << client
end end
end end
end end
@ -139,16 +120,16 @@ channel_threads.times do |i|
OFFSET (SELECT count(*)*$1/$2 FROM channels)" OFFSET (SELECT count(*)*$1/$2 FROM channels)"
PG_DB.query(query, i, channel_threads) do |rs| PG_DB.query(query, i, channel_threads) do |rs|
rs.each do rs.each do
client = get_client(youtube_pool) client = make_client(YT_URL)
begin begin
id = rs.read(String) id = rs.read(String)
channel = get_channel(id, client, PG_DB) channel = get_channel(id, client, PG_DB)
rescue ex rescue ex
STDOUT << id << " : " << ex.message << "\n" STDOUT << id << " : " << ex.message << "\n"
youtube_pool << make_client(YT_URL) client = make_client(YT_URL)
next next
end end
youtube_pool << client
end end
end end
end end
@ -169,7 +150,7 @@ spawn do
loop do loop do
begin begin
top = rank_videos(PG_DB, 40, youtube_pool, filter) top = rank_videos(PG_DB, 40, filter, YT_URL)
rescue ex rescue ex
next next
end end
@ -183,13 +164,12 @@ spawn do
videos = [] of Video videos = [] of Video
top.each do |id| top.each do |id|
client = get_client(youtube_pool) client = make_client(YT_URL)
begin begin
videos << get_video(id, client, PG_DB) videos << get_video(id, client, PG_DB)
rescue ex rescue ex
next next
end end
youtube_pool << client
end end
top_videos = videos top_videos = videos
@ -237,8 +217,6 @@ get "/watch" do |env|
env.params.query.delete_all("listen") env.params.query.delete_all("listen")
end end
client = get_client(youtube_pool)
authorized = env.get? "authorized" authorized = env.get? "authorized"
if authorized if authorized
sid = env.get("sid").as(String) sid = env.get("sid").as(String)
@ -248,13 +226,12 @@ get "/watch" do |env|
subscriptions ||= [] of String subscriptions ||= [] of String
client = make_client(YT_URL)
begin begin
video = get_video(id, client, PG_DB) video = get_video(id, client, PG_DB)
rescue ex rescue ex
error_message = ex.message error_message = ex.message
next templated "error" next templated "error"
ensure
youtube_pool << client
end end
fmt_stream = [] of HTTP::Params fmt_stream = [] of HTTP::Params
@ -364,13 +341,11 @@ get "/search" do |env|
page = env.params.query["page"]?.try &.to_i page = env.params.query["page"]?.try &.to_i
page ||= 1 page ||= 1
client = get_client(youtube_pool) client = make_client(YT_URL)
html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body html = client.get("/results?q=#{URI.escape(query)}&page=#{page}&sp=EgIQAVAU").body
html = XML.parse_html(html) html = XML.parse_html(html)
youtube_pool << client
videos = Array(Hash(String, String)).new videos = Array(Hash(String, String)).new
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item| html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |item|
@ -499,9 +474,8 @@ post "/login" do |env|
sid = login.cookies["SID"].value sid = login.cookies["SID"].value
client = get_client(youtube_pool) client = make_client(YT_URL)
user = get_user(sid, client, headers, PG_DB) user = get_user(sid, client, headers, PG_DB)
youtube_pool << client
# We are now logged in # We are now logged in
@ -552,13 +526,11 @@ get "/api/manifest/dash/id/:id" do |env|
local = env.params.query["local"]?.try &.== "true" local = env.params.query["local"]?.try &.== "true"
id = env.params.url["id"] id = env.params.url["id"]
yt_client = get_client(youtube_pool) client = make_client(YT_URL)
begin begin
video = get_video(id, yt_client, PG_DB) video = get_video(id, client, PG_DB)
rescue ex rescue ex
halt env, status_code: 403 halt env, status_code: 403
ensure
youtube_pool << yt_client
end end
adaptive_fmts = [] of HTTP::Params adaptive_fmts = [] of HTTP::Params
@ -670,9 +642,8 @@ get "/feed/subscriptions" do |env|
sid = env.get("sid").as(String) sid = env.get("sid").as(String)
client = get_client(youtube_pool) client = make_client(YT_URL)
user = get_user(sid, client, headers, PG_DB) user = get_user(sid, client, headers, PG_DB)
youtube_pool << client
args = arg_array(user.subscriptions, 3) args = arg_array(user.subscriptions, 3)
videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \ videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args}) \
@ -716,7 +687,7 @@ get "/modify_notifications" do |env|
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["Cookie"] = env.request.headers["Cookie"] headers["Cookie"] = env.request.headers["Cookie"]
client = get_client(youtube_pool) client = make_client(YT_URL)
subs = client.get("/subscription_manager?disable_polymer=1", headers) subs = client.get("/subscription_manager?disable_polymer=1", headers)
headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"]
match = subs.body.match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/) match = subs.body.match(/'XSRF_TOKEN': "(?<session_token>[A-Za-z0-9\_\-\=]+)"/)
@ -737,8 +708,6 @@ get "/modify_notifications" do |env|
client.post("/subscription_ajax?action_update_subscription_preferences=1", headers, HTTP::Params.encode(channel_req)).body client.post("/subscription_ajax?action_update_subscription_preferences=1", headers, HTTP::Params.encode(channel_req)).body
end end
youtube_pool << client
end end
env.redirect referer env.redirect referer
@ -764,7 +733,7 @@ get "/subscription_ajax" do |env|
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["Cookie"] = env.request.headers["Cookie"] headers["Cookie"] = env.request.headers["Cookie"]
client = get_client(youtube_pool) client = make_client(YT_URL)
subs = client.get("/subscription_manager?disable_polymer=1", headers) subs = client.get("/subscription_manager?disable_polymer=1", headers)
headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"] headers["Cookie"] += "; " + subs.cookies.add_request_headers(headers)["Cookie"]
@ -795,8 +764,6 @@ get "/subscription_ajax" do |env|
PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid) PG_DB.exec("UPDATE users SET subscriptions = array_remove(subscriptions,$1) WHERE id = $2", channel_id, sid)
end end
end end
youtube_pool << client
end end
env.redirect referer env.redirect referer