Add HTTPClient pool

This commit is contained in:
Omar Roth 2019-10-25 12:58:16 -04:00
parent aba2c5b938
commit 6930570fa2
No known key found for this signature in database
GPG key ID: B8254FB7EC3D37F2
14 changed files with 115 additions and 111 deletions

View file

@ -18,6 +18,9 @@ dependencies:
kemal: kemal:
github: kemalcr/kemal github: kemalcr/kemal
version: ~> 0.26.0 version: ~> 0.26.0
pool:
github: ysbaddaden/pool
version: ~> 0.2.3
crystal: 0.31.1 crystal: 0.31.1

View file

@ -46,6 +46,7 @@ PUBSUB_URL = URI.parse("https://pubsubhubbub.appspot.com")
REDDIT_URL = URI.parse("https://www.reddit.com") REDDIT_URL = URI.parse("https://www.reddit.com")
TEXTCAPTCHA_URL = URI.parse("http://textcaptcha.com") TEXTCAPTCHA_URL = URI.parse("http://textcaptcha.com")
YT_URL = URI.parse("https://www.youtube.com") YT_URL = URI.parse("https://www.youtube.com")
YT_IMG_URL = URI.parse("https://i.ytimg.com")
CHARS_SAFE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" CHARS_SAFE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
TEST_IDS = {"AgbeGFYluEA", "BaW_jenozKc", "a9LDPn-MO4I", "ddFvjfvPnqk", "iqKdEhx-dD4"} TEST_IDS = {"AgbeGFYluEA", "BaW_jenozKc", "a9LDPn-MO4I", "ddFvjfvPnqk", "iqKdEhx-dD4"}
@ -91,6 +92,9 @@ LOCALES = {
"zh-TW" => load_locale("zh-TW"), "zh-TW" => load_locale("zh-TW"),
} }
YT_POOL = HTTPPool.new(YT_URL, capacity: CONFIG.pool_size, timeout: 0.05)
YT_IMG_POOL = HTTPPool.new(YT_IMG_URL, capacity: CONFIG.pool_size, timeout: 0.05)
config = CONFIG config = CONFIG
logger = Invidious::LogHandler.new logger = Invidious::LogHandler.new
@ -658,8 +662,7 @@ get "/embed/:id" do |env|
next env.redirect url next env.redirect url
when "live_stream" when "live_stream"
client = make_client(YT_URL) response = YT_POOL.client &.get("/embed/live_stream?channel=#{env.params.query["channel"]? || ""}")
response = client.get("/embed/live_stream?channel=#{env.params.query["channel"]? || ""}")
video_id = response.body.match(/"video_id":"(?<video_id>[a-zA-Z0-9_-]{11})"/).try &.["video_id"] video_id = response.body.match(/"video_id":"(?<video_id>[a-zA-Z0-9_-]{11})"/).try &.["video_id"]
env.params.query.delete_all("channel") env.params.query.delete_all("channel")
@ -2250,8 +2253,7 @@ get "/modify_notifications" do |env|
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["Cookie"] = env.request.headers["Cookie"] headers["Cookie"] = env.request.headers["Cookie"]
client = make_client(YT_URL) html = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
html = client.get("/subscription_manager?disable_polymer=1", headers)
cookies = HTTP::Cookies.from_headers(headers) cookies = HTTP::Cookies.from_headers(headers)
html.cookies.each do |cookie| html.cookies.each do |cookie|
@ -2280,7 +2282,7 @@ get "/modify_notifications" do |env|
channel_id = channel.content.lstrip("/channel/").not_nil! channel_id = channel.content.lstrip("/channel/").not_nil!
channel_req["channel_id"] = channel_id channel_req["channel_id"] = channel_id
client.post("/subscription_ajax?action_update_subscription_preferences=1", headers, form: channel_req) YT_POOL.client &.post("/subscription_ajax?action_update_subscription_preferences=1", headers, form: channel_req)
end end
end end
@ -2558,8 +2560,7 @@ post "/data_control" do |env|
if match = channel["url"].as_s.match(/\/channel\/(?<channel>UC[a-zA-Z0-9_-]{22})/) if match = channel["url"].as_s.match(/\/channel\/(?<channel>UC[a-zA-Z0-9_-]{22})/)
next match["channel"] next match["channel"]
elsif match = channel["url"].as_s.match(/\/user\/(?<user>.+)/) elsif match = channel["url"].as_s.match(/\/user\/(?<user>.+)/)
client = make_client(YT_URL) response = YT_POOL.client &.get("/user/#{match["user"]}?disable_polymer=1&hl=en&gl=US")
response = client.get("/user/#{match["user"]}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"])) canonical = document.xpath_node(%q(//link[@rel="canonical"]))
@ -3084,8 +3085,7 @@ get "/feed/channel/:ucid" do |env|
next error_message next error_message
end end
client = make_client(YT_URL) rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{channel.ucid}").body
rss = client.get("/feeds/videos.xml?channel_id=#{channel.ucid}").body
rss = XML.parse_html(rss) rss = XML.parse_html(rss)
videos = [] of SearchVideo videos = [] of SearchVideo
@ -3229,8 +3229,7 @@ get "/feed/playlist/:plid" do |env|
end end
end end
client = make_client(YT_URL) response = YT_POOL.client &.get("/feeds/videos.xml?playlist_id=#{plid}")
response = client.get("/feeds/videos.xml?playlist_id=#{plid}")
document = XML.parse(response.body) document = XML.parse(response.body)
document.xpath_nodes(%q(//*[@href]|//*[@url])).each do |node| document.xpath_nodes(%q(//*[@href]|//*[@url])).each do |node|
@ -3395,14 +3394,13 @@ end
{"/channel/:ucid/live", "/user/:user/live", "/c/:user/live"}.each do |route| {"/channel/:ucid/live", "/user/:user/live", "/c/:user/live"}.each do |route|
get route do |env| get route do |env|
locale = LOCALES[env.get("preferences").as(Preferences).locale]? locale = LOCALES[env.get("preferences").as(Preferences).locale]?
client = make_client(YT_URL)
# Appears to be a bug in routing, having several routes configured # Appears to be a bug in routing, having several routes configured
# as `/a/:a`, `/b/:a`, `/c/:a` results in 404 # as `/a/:a`, `/b/:a`, `/c/:a` results in 404
value = env.request.resource.split("/")[2] value = env.request.resource.split("/")[2]
body = "" body = ""
{"channel", "user", "c"}.each do |type| {"channel", "user", "c"}.each do |type|
response = client.get("/#{type}/#{value}/live?disable_polymer=1") response = YT_POOL.client &.get("/#{type}/#{value}/live?disable_polymer=1")
if response.status_code == 200 if response.status_code == 200
body = response.body body = response.body
end end
@ -3433,10 +3431,9 @@ end
get "/c/:user" do |env| get "/c/:user" do |env|
locale = LOCALES[env.get("preferences").as(Preferences).locale]? locale = LOCALES[env.get("preferences").as(Preferences).locale]?
client = make_client(YT_URL)
user = env.params.url["user"] user = env.params.url["user"]
response = client.get("/c/#{user}") response = YT_POOL.client &.get("/c/#{user}")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
anchor = document.xpath_node(%q(//a[contains(@class,"branded-page-header-title-link")])) anchor = document.xpath_node(%q(//a[contains(@class,"branded-page-header-title-link")]))
@ -3676,7 +3673,6 @@ get "/api/v1/storyboards/:id" do |env|
id = env.params.url["id"] id = env.params.url["id"]
region = env.params.query["region"]? region = env.params.query["region"]?
client = make_client(YT_URL)
begin begin
video = get_video(id, PG_DB, region: region) video = get_video(id, PG_DB, region: region)
rescue ex : VideoRedirect rescue ex : VideoRedirect
@ -3764,7 +3760,6 @@ get "/api/v1/captions/:id" do |env|
# In future this should be investigated as an alternative, since it does not require # In future this should be investigated as an alternative, since it does not require
# getting video info. # getting video info.
client = make_client(YT_URL)
begin begin
video = get_video(id, PG_DB, region: region) video = get_video(id, PG_DB, region: region)
rescue ex : VideoRedirect rescue ex : VideoRedirect
@ -3823,7 +3818,7 @@ get "/api/v1/captions/:id" do |env|
# Auto-generated captions often have cues that aren't aligned properly with the video, # Auto-generated captions often have cues that aren't aligned properly with the video,
# as well as some other markup that makes it cumbersome, so we try to fix that here # as well as some other markup that makes it cumbersome, so we try to fix that here
if caption.name.simpleText.includes? "auto-generated" if caption.name.simpleText.includes? "auto-generated"
caption_xml = client.get(url).body caption_xml = YT_POOL.client &.get(url).body
caption_xml = XML.parse(caption_xml) caption_xml = XML.parse(caption_xml)
webvtt = String.build do |str| webvtt = String.build do |str|
@ -3866,7 +3861,7 @@ get "/api/v1/captions/:id" do |env|
end end
end end
else else
webvtt = client.get("#{url}&format=vtt").body webvtt = YT_POOL.client &.get("#{url}&format=vtt").body
end end
if title = env.params.query["title"]? if title = env.params.query["title"]?
@ -4013,9 +4008,7 @@ get "/api/v1/annotations/:id" do |env|
cache_annotation(PG_DB, id, annotations) cache_annotation(PG_DB, id, annotations)
end end
when "youtube" when "youtube"
client = make_client(YT_URL) response = YT_POOL.client &.get("/annotations_invideo?video_id=#{id}")
response = client.get("/annotations_invideo?video_id=#{id}")
if response.status_code != 200 if response.status_code != 200
env.response.status_code = response.status_code env.response.status_code = response.status_code
@ -5115,7 +5108,6 @@ get "/api/manifest/dash/id/:id" do |env|
# we can opt to only add a source to a representation if it has a unique height within that representation # we can opt to only add a source to a representation if it has a unique height within that representation
unique_res = env.params.query["unique_res"]? && (env.params.query["unique_res"] == "true" || env.params.query["unique_res"] == "1") unique_res = env.params.query["unique_res"]? && (env.params.query["unique_res"] == "true" || env.params.query["unique_res"] == "1")
client = make_client(YT_URL)
begin begin
video = get_video(id, PG_DB, region: region) video = get_video(id, PG_DB, region: region)
rescue ex : VideoRedirect rescue ex : VideoRedirect
@ -5126,7 +5118,7 @@ get "/api/manifest/dash/id/:id" do |env|
end end
if dashmpd = video.player_response["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s if dashmpd = video.player_response["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s
manifest = client.get(dashmpd).body manifest = YT_POOL.client &.get(dashmpd).body
manifest = manifest.gsub(/<BaseURL>[^<]+<\/BaseURL>/) do |baseurl| manifest = manifest.gsub(/<BaseURL>[^<]+<\/BaseURL>/) do |baseurl|
url = baseurl.lchop("<BaseURL>") url = baseurl.lchop("<BaseURL>")
@ -5226,8 +5218,7 @@ get "/api/manifest/dash/id/:id" do |env|
end end
get "/api/manifest/hls_variant/*" do |env| get "/api/manifest/hls_variant/*" do |env|
client = make_client(YT_URL) manifest = YT_POOL.client &.get(env.request.path)
manifest = client.get(env.request.path)
if manifest.status_code != 200 if manifest.status_code != 200
env.response.status_code = manifest.status_code env.response.status_code = manifest.status_code
@ -5252,8 +5243,7 @@ get "/api/manifest/hls_variant/*" do |env|
end end
get "/api/manifest/hls_playlist/*" do |env| get "/api/manifest/hls_playlist/*" do |env|
client = make_client(YT_URL) manifest = YT_POOL.client &.get(env.request.path)
manifest = client.get(env.request.path)
if manifest.status_code != 200 if manifest.status_code != 200
env.response.status_code = manifest.status_code env.response.status_code = manifest.status_code
@ -5616,10 +5606,6 @@ get "/videoplayback" do |env|
end end
end end
# We need this so the below route works as expected
get "/ggpht*" do |env|
end
get "/ggpht/*" do |env| get "/ggpht/*" do |env|
host = "https://yt3.ggpht.com" host = "https://yt3.ggpht.com"
client = make_client(URI.parse(host)) client = make_client(URI.parse(host))
@ -5745,12 +5731,9 @@ get "/vi/:id/:name" do |env|
id = env.params.url["id"] id = env.params.url["id"]
name = env.params.url["name"] name = env.params.url["name"]
host = "https://i.ytimg.com"
client = make_client(URI.parse(host))
if name == "maxres.jpg" if name == "maxres.jpg"
build_thumbnails(id, config, Kemal.config).each do |thumb| build_thumbnails(id, config, Kemal.config).each do |thumb|
if client.head("/vi/#{id}/#{thumb[:url]}.jpg").status_code == 200 if YT_IMG_POOL.client &.head("/vi/#{id}/#{thumb[:url]}.jpg").status_code == 200
name = thumb[:url] + ".jpg" name = thumb[:url] + ".jpg"
break break
end end
@ -5766,7 +5749,7 @@ get "/vi/:id/:name" do |env|
end end
begin begin
client.get(url, headers) do |response| YT_IMG_POOL.client &.get(url, headers) do |response|
env.response.status_code = response.status_code env.response.status_code = response.status_code
response.headers.each do |key, value| response.headers.each do |key, value|
if !RESPONSE_HEADERS_BLACKLIST.includes? key if !RESPONSE_HEADERS_BLACKLIST.includes? key
@ -5789,9 +5772,7 @@ end
# Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos # Undocumented, creates anonymous playlist with specified 'video_ids', max 50 videos
get "/watch_videos" do |env| get "/watch_videos" do |env|
client = make_client(YT_URL) response = YT_POOL.client &.get(env.request.resource)
response = client.get("#{env.request.path}?#{env.request.query}")
if url = response.headers["Location"]? if url = response.headers["Location"]?
url = URI.parse(url).full_path url = URI.parse(url).full_path
next env.redirect url next env.redirect url
@ -5805,11 +5786,10 @@ error 404 do |env|
item = md["id"] item = md["id"]
# Check if item is branding URL e.g. https://youtube.com/gaming # Check if item is branding URL e.g. https://youtube.com/gaming
client = make_client(YT_URL) response = YT_POOL.client &.get("/#{item}")
response = client.get("/#{item}")
if response.status_code == 301 if response.status_code == 301
response = client.get(response.headers["Location"]) response = YT_POOL.client &.get(response.headers["Location"])
end end
if response.body.empty? if response.body.empty?
@ -5837,8 +5817,7 @@ error 404 do |env|
end end
# Check if item is video ID # Check if item is video ID
client = make_client(YT_URL) if item.match(/^[a-zA-Z0-9_-]{11}$/) && YT_POOL.client &.head("/watch?v=#{item}").status_code != 404
if item.match(/^[a-zA-Z0-9_-]{11}$/) && client.head("/watch?v=#{item}").status_code != 404
env.response.headers["Location"] = url env.response.headers["Location"] = url
halt env, status_code: 302 halt env, status_code: 302
end end

View file

@ -195,9 +195,7 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
end end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
client = make_client(YT_URL) rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body
rss = XML.parse_html(rss) rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title)) author = rss.xpath_node(%q(//feed/title))
@ -216,7 +214,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1 page = 1
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty? if json["content_html"]? && !json["content_html"].as_s.empty?
@ -296,7 +294,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
loop do loop do
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty? if json["content_html"]? && !json["content_html"].as_s.empty?
@ -375,12 +373,10 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
end end
def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
client = make_client(YT_URL)
if continuation if continuation
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated) url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["load_more_widget_html"].as_s.empty? if json["load_more_widget_html"].as_s.empty?
@ -399,7 +395,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
elsif auto_generated elsif auto_generated
url = "/channel/#{ucid}" url = "/channel/#{ucid}"
response = client.get(url) response = YT_POOL.client &.get(url)
html = XML.parse_html(response.body) html = XML.parse_html(response.body)
nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")])) nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
@ -415,7 +411,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
url += "&sort=dd" url += "&sort=dd"
end end
response = client.get(url) response = YT_POOL.client &.get(url)
html = XML.parse_html(response.body) html = XML.parse_html(response.body)
continuation = html.xpath_node(%q(//button[@data-uix-load-more-href])) continuation = html.xpath_node(%q(//button[@data-uix-load-more-href]))
@ -625,13 +621,12 @@ end
# TODO: Add "sort_by" # TODO: Add "sort_by"
def fetch_channel_community(ucid, continuation, locale, config, kemal_config, format, thin_mode) def fetch_channel_community(ucid, continuation, locale, config, kemal_config, format, thin_mode)
client = make_client(YT_URL)
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36" headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
response = client.get("/channel/#{ucid}/community?gl=US&hl=en", headers) response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en", headers)
if response.status_code == 404 if response.status_code == 404
response = client.get("/user/#{ucid}/community?gl=US&hl=en", headers) response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en", headers)
end end
if response.status_code == 404 if response.status_code == 404
@ -668,7 +663,7 @@ def fetch_channel_community(ucid, continuation, locale, config, kemal_config, fo
session_token: session_token, session_token: session_token,
} }
response = client.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req) response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
body = JSON.parse(response.body) body = JSON.parse(response.body)
body = body["response"]["continuationContents"]["itemSectionContinuation"]? || body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
@ -929,11 +924,9 @@ def extract_channel_community_cursor(continuation)
end end
def get_about_info(ucid, locale) def get_about_info(ucid, locale)
client = make_client(YT_URL) about = YT_POOL.client &.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
about = client.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
if about.status_code == 404 if about.status_code == 404
about = client.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en") about = YT_POOL.client &.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
end end
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/) if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
@ -1038,11 +1031,9 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
count = 0 count = 0
videos = [] of SearchVideo videos = [] of SearchVideo
client = make_client(YT_URL)
2.times do |i| 2.times do |i|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty? if json["content_html"]? && !json["content_html"].as_s.empty?
@ -1067,11 +1058,10 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end end
def get_latest_videos(ucid) def get_latest_videos(ucid)
client = make_client(YT_URL)
videos = [] of SearchVideo videos = [] of SearchVideo
url = produce_channel_videos_url(ucid, 0) url = produce_channel_videos_url(ucid, 0)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty? if json["content_html"]? && !json["content_html"].as_s.empty?

View file

@ -85,7 +85,6 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
session_token: session_token, session_token: session_token,
} }
client = make_client(YT_URL, video.info["region"]?)
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["content-type"] = "application/x-www-form-urlencoded" headers["content-type"] = "application/x-www-form-urlencoded"
@ -98,7 +97,7 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
headers["x-youtube-client-name"] = "1" headers["x-youtube-client-name"] = "1"
headers["x-youtube-client-version"] = "2.20180719" headers["x-youtube-client-version"] = "2.20180719"
response = client.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req) response = YT_POOL.client(region, &.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req))
response = JSON.parse(response.body) response = JSON.parse(response.body)
if !response["response"]["continuationContents"]? if !response["response"]["continuationContents"]?

View file

@ -234,6 +234,7 @@ struct Config
force_resolve: {type: Socket::Family, default: Socket::Family::UNSPEC, converter: FamilyConverter}, # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729) force_resolve: {type: Socket::Family, default: Socket::Family::UNSPEC, converter: FamilyConverter}, # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729)
port: {type: Int32, default: 3000}, # Port to listen for connections (overrided by command line argument) port: {type: Int32, default: 3000}, # Port to listen for connections (overrided by command line argument)
host_binding: {type: String, default: "0.0.0.0"}, # Host to bind (overrided by command line argument) host_binding: {type: String, default: "0.0.0.0"}, # Host to bind (overrided by command line argument)
pool_size: {type: Int32, default: 100},
}) })
end end

View file

@ -77,6 +77,10 @@ class HTTPClient < HTTP::Client
end end
end end
def unset_proxy
@socket = nil
end
def proxy_connection_options def proxy_connection_options
opts = {} of Symbol => Float64 | Nil opts = {} of Symbol => Float64 | Nil

View file

@ -1,8 +1,7 @@
def fetch_decrypt_function(id = "CvFH_6DNRCY") def fetch_decrypt_function(id = "CvFH_6DNRCY")
client = make_client(YT_URL) document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
document = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
url = document.match(/src="(?<url>\/yts\/jsbin\/player_ias-.{9}\/en_US\/base.js)"/).not_nil!["url"] url = document.match(/src="(?<url>\/yts\/jsbin\/player_ias-.{9}\/en_US\/base.js)"/).not_nil!["url"]
player = client.get(url).body player = YT_POOL.client &.get(url).body
function_name = player.match(/^(?<name>[^=]+)=function\(a\){a=a\.split\(""\)/m).not_nil!["name"] function_name = player.match(/^(?<name>[^=]+)=function\(a\){a=a\.split\(""\)/m).not_nil!["name"]
function_body = player.match(/^#{Regex.escape(function_name)}=function\(a\){(?<body>[^}]+)}/m).not_nil!["body"] function_body = player.match(/^#{Regex.escape(function_name)}=function\(a\){(?<body>[^}]+)}/m).not_nil!["body"]

View file

@ -1,3 +1,46 @@
require "pool/connection"
struct HTTPPool
property! url : URI
property! capacity : Int32
property! timeout : Float64
property pool : ConnectionPool(HTTPClient)
def initialize(url : URI, @capacity = 5, @timeout = 5.0)
@url = url
@pool = build_pool
end
def client(region = nil, &block)
pool.connection do |conn|
if region
PROXY_LIST[region]?.try &.sample(40).each do |proxy|
begin
proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
conn.set_proxy(proxy)
break
rescue ex
end
end
end
response = yield conn
conn.unset_proxy
response
end
end
private def build_pool
ConnectionPool(HTTPClient).new(capacity: capacity, timeout: timeout) do
client = HTTPClient.new(url)
client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC
client.read_timeout = 5.seconds
client.connect_timeout = 5.seconds
client
end
end
end
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html # See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
def ci_lower_bound(pos, n) def ci_lower_bound(pos, n)
if n == 0 if n == 0
@ -21,8 +64,8 @@ end
def make_client(url : URI, region = nil) def make_client(url : URI, region = nil)
client = HTTPClient.new(url) client = HTTPClient.new(url)
client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC
client.read_timeout = 15.seconds client.read_timeout = 5.seconds
client.connect_timeout = 15.seconds client.connect_timeout = 5.seconds
if region if region
PROXY_LIST[region]?.try &.sample(40).each do |proxy| PROXY_LIST[region]?.try &.sample(40).each do |proxy|

View file

@ -19,14 +19,13 @@ struct Mix
end end
def fetch_mix(rdid, video_id, cookies = nil, locale = nil) def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
client = make_client(YT_URL)
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
if cookies if cookies
headers = cookies.add_request_headers(headers) headers = cookies.add_request_headers(headers)
end end
response = client.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en&has_verified=1&bpctr=9999999999", headers) response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en&has_verified=1&bpctr=9999999999", headers)
initial_data = extract_initial_data(response.body) initial_data = extract_initial_data(response.body)

View file

@ -384,13 +384,11 @@ def get_playlist(db, plid, locale, refresh = true, force_refresh = false)
end end
def fetch_playlist(plid, locale) def fetch_playlist(plid, locale)
client = make_client(YT_URL)
if plid.starts_with? "UC" if plid.starts_with? "UC"
plid = "UU#{plid.lchop("UC")}" plid = "UU#{plid.lchop("UC")}"
end end
response = client.get("/playlist?list=#{plid}&hl=en&disable_polymer=1") response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
if response.status_code != 200 if response.status_code != 200
raise translate(locale, "Not a playlist.") raise translate(locale, "Not a playlist.")
end end
@ -458,10 +456,8 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil)
end end
def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuation = nil) def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuation = nil)
client = make_client(YT_URL)
if continuation if continuation
html = client.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999") html = YT_POOL.client &.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
html = XML.parse_html(html.body) html = XML.parse_html(html.body)
index = html.xpath_node(%q(//span[@id="playlist-current-index"])).try &.content.to_i?.try &.- 1 index = html.xpath_node(%q(//span[@id="playlist-current-index"])).try &.content.to_i?.try &.- 1
@ -471,7 +467,7 @@ def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuat
if video_count > 100 if video_count > 100
url = produce_playlist_url(plid, offset) url = produce_playlist_url(plid, offset)
response = client.get(url) response = YT_POOL.client &.get(url)
response = JSON.parse(response.body) response = JSON.parse(response.body)
if !response["content_html"]? || response["content_html"].as_s.empty? if !response["content_html"]? || response["content_html"].as_s.empty?
raise translate(locale, "Empty playlist") raise translate(locale, "Empty playlist")
@ -483,7 +479,7 @@ def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuat
elsif offset > 100 elsif offset > 100
return [] of PlaylistVideo return [] of PlaylistVideo
else # Extract first page of videos else # Extract first page of videos
response = client.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1") response = YT_POOL.client &.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")])) nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))

View file

@ -222,20 +222,18 @@ end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
def channel_search(query, page, channel) def channel_search(query, page, channel)
client = make_client(YT_URL) response = YT_POOL.client &.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US")
response = client.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"])) canonical = document.xpath_node(%q(//link[@rel="canonical"]))
if !canonical if !canonical
response = client.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US") response = YT_POOL.client &.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"])) canonical = document.xpath_node(%q(//link[@rel="canonical"]))
end end
if !canonical if !canonical
response = client.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US") response = YT_POOL.client &.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body) document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"])) canonical = document.xpath_node(%q(//link[@rel="canonical"]))
end end
@ -247,7 +245,7 @@ def channel_search(query, page, channel)
ucid = canonical["href"].split("/")[-1] ucid = canonical["href"].split("/")[-1]
url = produce_channel_search_url(ucid, query, page) url = produce_channel_search_url(ucid, query, page)
response = client.get(url) response = YT_POOL.client &.get(url)
json = JSON.parse(response.body) json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty? if json["content_html"]? && !json["content_html"].as_s.empty?
@ -265,12 +263,11 @@ def channel_search(query, page, channel)
end end
def search(query, page = 1, search_params = produce_search_params(content_type: "all"), region = nil) def search(query, page = 1, search_params = produce_search_params(content_type: "all"), region = nil)
client = make_client(YT_URL, region)
if query.empty? if query.empty?
return {0, [] of SearchItem} return {0, [] of SearchItem}
end end
html = client.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body html = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body)
if html.empty? if html.empty?
return {0, [] of SearchItem} return {0, [] of SearchItem}
end end

View file

@ -1,5 +1,4 @@
def fetch_trending(trending_type, region, locale) def fetch_trending(trending_type, region, locale)
client = make_client(YT_URL)
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36" headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
@ -12,7 +11,7 @@ def fetch_trending(trending_type, region, locale)
if trending_type && trending_type != "Default" if trending_type && trending_type != "Default"
trending_type = trending_type.downcase.capitalize trending_type = trending_type.downcase.capitalize
response = client.get("/feed/trending?gl=#{region}&hl=en", headers).body response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en", headers).body
initial_data = extract_initial_data(response) initial_data = extract_initial_data(response)
@ -23,13 +22,13 @@ def fetch_trending(trending_type, region, locale)
url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"] url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
url = url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s url = url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
url += "&disable_polymer=1&gl=#{region}&hl=en" url += "&disable_polymer=1&gl=#{region}&hl=en"
trending = client.get(url).body trending = YT_POOL.client &.get(url).body
plid = extract_plid(url) plid = extract_plid(url)
else else
trending = client.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
end end
else else
trending = client.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
end end
trending = XML.parse_html(trending) trending = XML.parse_html(trending)

View file

@ -143,8 +143,7 @@ def get_user(sid, headers, db, refresh = true)
end end
def fetch_user(sid, headers, db) def fetch_user(sid, headers, db)
client = make_client(YT_URL) feed = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
feed = client.get("/subscription_manager?disable_polymer=1", headers)
feed = XML.parse_html(feed.body) feed = XML.parse_html(feed.body)
channels = [] of String channels = [] of String
@ -254,8 +253,7 @@ def subscribe_ajax(channel_id, action, env_headers)
headers = HTTP::Headers.new headers = HTTP::Headers.new
headers["Cookie"] = env_headers["Cookie"] headers["Cookie"] = env_headers["Cookie"]
client = make_client(YT_URL) html = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
html = client.get("/subscription_manager?disable_polymer=1", headers)
cookies = HTTP::Cookies.from_headers(headers) cookies = HTTP::Cookies.from_headers(headers)
html.cookies.each do |cookie| html.cookies.each do |cookie|
@ -279,7 +277,7 @@ def subscribe_ajax(channel_id, action, env_headers)
} }
post_url = "/subscription_ajax?#{action}=1&c=#{channel_id}" post_url = "/subscription_ajax?#{action}=1&c=#{channel_id}"
client.post(post_url, headers, form: post_req) YT_POOL.client &.post(post_url, headers, form: post_req)
end end
end end
@ -288,8 +286,7 @@ end
# headers = HTTP::Headers.new # headers = HTTP::Headers.new
# headers["Cookie"] = env_headers["Cookie"] # headers["Cookie"] = env_headers["Cookie"]
# #
# client = make_client(YT_URL) # html = YT_POOL.client &.get("/view_all_playlists?disable_polymer=1", headers)
# html = client.get("/view_all_playlists?disable_polymer=1", headers)
# #
# cookies = HTTP::Cookies.from_headers(headers) # cookies = HTTP::Cookies.from_headers(headers)
# html.cookies.each do |cookie| # html.cookies.each do |cookie|

View file

@ -1146,8 +1146,7 @@ def extract_player_config(body, html)
end end
def fetch_video(id, region) def fetch_video(id, region)
client = make_client(YT_URL, region) response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
response = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
if md = response.headers["location"]?.try &.match(/v=(?<id>[a-zA-Z0-9_-]{11})/) if md = response.headers["location"]?.try &.match(/v=(?<id>[a-zA-Z0-9_-]{11})/)
raise VideoRedirect.new(video_id: md["id"]) raise VideoRedirect.new(video_id: md["id"])
@ -1167,8 +1166,7 @@ def fetch_video(id, region)
bypass_regions = PROXY_LIST.keys & allowed_regions bypass_regions = PROXY_LIST.keys & allowed_regions
if !bypass_regions.empty? if !bypass_regions.empty?
region = bypass_regions[rand(bypass_regions.size)] region = bypass_regions[rand(bypass_regions.size)]
client = make_client(YT_URL, region) response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
response = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
html = XML.parse_html(response.body) html = XML.parse_html(response.body)
info = extract_player_config(response.body, html) info = extract_player_config(response.body, html)
@ -1180,10 +1178,10 @@ def fetch_video(id, region)
# Try to pull streams from embed URL # Try to pull streams from embed URL
if info["reason"]? if info["reason"]?
embed_page = client.get("/embed/#{id}").body embed_page = YT_POOL.client &.get("/embed/#{id}").body
sts = embed_page.match(/"sts"\s*:\s*(?<sts>\d+)/).try &.["sts"]? sts = embed_page.match(/"sts"\s*:\s*(?<sts>\d+)/).try &.["sts"]?
sts ||= "" sts ||= ""
embed_info = HTTP::Params.parse(client.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body) embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body)
if !embed_info["reason"]? if !embed_info["reason"]?
embed_info.each do |key, value| embed_info.each do |key, value|