Add HTTPClient pool

This commit is contained in:
Omar Roth 2019-10-25 12:58:16 -04:00
parent aba2c5b938
commit 6930570fa2
No known key found for this signature in database
GPG key ID: B8254FB7EC3D37F2
14 changed files with 115 additions and 111 deletions

View file

@ -195,9 +195,7 @@ def get_channel(id, db, refresh = true, pull_all_videos = true)
end
def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
client = make_client(YT_URL)
rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
rss = XML.parse_html(rss)
author = rss.xpath_node(%q(//feed/title))
@ -216,7 +214,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?
@ -296,7 +294,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
loop do
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?
@ -375,12 +373,10 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
end
def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
client = make_client(YT_URL)
if continuation
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["load_more_widget_html"].as_s.empty?
@ -399,7 +395,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
elsif auto_generated
url = "/channel/#{ucid}"
response = client.get(url)
response = YT_POOL.client &.get(url)
html = XML.parse_html(response.body)
nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
@ -415,7 +411,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
url += "&sort=dd"
end
response = client.get(url)
response = YT_POOL.client &.get(url)
html = XML.parse_html(response.body)
continuation = html.xpath_node(%q(//button[@data-uix-load-more-href]))
@ -625,13 +621,12 @@ end
# TODO: Add "sort_by"
def fetch_channel_community(ucid, continuation, locale, config, kemal_config, format, thin_mode)
client = make_client(YT_URL)
headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
response = client.get("/channel/#{ucid}/community?gl=US&hl=en", headers)
response = YT_POOL.client &.get("/channel/#{ucid}/community?gl=US&hl=en", headers)
if response.status_code == 404
response = client.get("/user/#{ucid}/community?gl=US&hl=en", headers)
response = YT_POOL.client &.get("/user/#{ucid}/community?gl=US&hl=en", headers)
end
if response.status_code == 404
@ -668,7 +663,7 @@ def fetch_channel_community(ucid, continuation, locale, config, kemal_config, fo
session_token: session_token,
}
response = client.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
response = YT_POOL.client &.post("/comment_service_ajax?action_get_comments=1&ctoken=#{continuation}&continuation=#{continuation}&hl=en&gl=US", headers, form: post_req)
body = JSON.parse(response.body)
body = body["response"]["continuationContents"]["itemSectionContinuation"]? ||
@ -929,11 +924,9 @@ def extract_channel_community_cursor(continuation)
end
def get_about_info(ucid, locale)
client = make_client(YT_URL)
about = client.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
about = YT_POOL.client &.get("/channel/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
if about.status_code == 404
about = client.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
about = YT_POOL.client &.get("/user/#{ucid}/about?disable_polymer=1&gl=US&hl=en")
end
if md = about.headers["location"]?.try &.match(/\/channel\/(?<ucid>UC[a-zA-Z0-9_-]{22})/)
@ -1038,11 +1031,9 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
count = 0
videos = [] of SearchVideo
client = make_client(YT_URL)
2.times do |i|
url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?
@ -1067,11 +1058,10 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end
def get_latest_videos(ucid)
client = make_client(YT_URL)
videos = [] of SearchVideo
url = produce_channel_videos_url(ucid, 0)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?

View file

@ -85,7 +85,6 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
session_token: session_token,
}
client = make_client(YT_URL, video.info["region"]?)
headers = HTTP::Headers.new
headers["content-type"] = "application/x-www-form-urlencoded"
@ -98,7 +97,7 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so
headers["x-youtube-client-name"] = "1"
headers["x-youtube-client-version"] = "2.20180719"
response = client.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req)
response = YT_POOL.client(region, &.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req))
response = JSON.parse(response.body)
if !response["response"]["continuationContents"]?

View file

@ -234,6 +234,7 @@ struct Config
force_resolve: {type: Socket::Family, default: Socket::Family::UNSPEC, converter: FamilyConverter}, # Connect to YouTube over 'ipv6', 'ipv4'. Will sometimes resolve fix issues with rate-limiting (see https://github.com/ytdl-org/youtube-dl/issues/21729)
port: {type: Int32, default: 3000}, # Port to listen for connections (overrided by command line argument)
host_binding: {type: String, default: "0.0.0.0"}, # Host to bind (overrided by command line argument)
pool_size: {type: Int32, default: 100},
})
end

View file

@ -77,6 +77,10 @@ class HTTPClient < HTTP::Client
end
end
def unset_proxy
@socket = nil
end
def proxy_connection_options
opts = {} of Symbol => Float64 | Nil

View file

@ -1,8 +1,7 @@
def fetch_decrypt_function(id = "CvFH_6DNRCY")
client = make_client(YT_URL)
document = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body
url = document.match(/src="(?<url>\/yts\/jsbin\/player_ias-.{9}\/en_US\/base.js)"/).not_nil!["url"]
player = client.get(url).body
player = YT_POOL.client &.get(url).body
function_name = player.match(/^(?<name>[^=]+)=function\(a\){a=a\.split\(""\)/m).not_nil!["name"]
function_body = player.match(/^#{Regex.escape(function_name)}=function\(a\){(?<body>[^}]+)}/m).not_nil!["body"]

View file

@ -1,3 +1,46 @@
require "pool/connection"
struct HTTPPool
property! url : URI
property! capacity : Int32
property! timeout : Float64
property pool : ConnectionPool(HTTPClient)
def initialize(url : URI, @capacity = 5, @timeout = 5.0)
@url = url
@pool = build_pool
end
def client(region = nil, &block)
pool.connection do |conn|
if region
PROXY_LIST[region]?.try &.sample(40).each do |proxy|
begin
proxy = HTTPProxy.new(proxy_host: proxy[:ip], proxy_port: proxy[:port])
conn.set_proxy(proxy)
break
rescue ex
end
end
end
response = yield conn
conn.unset_proxy
response
end
end
private def build_pool
ConnectionPool(HTTPClient).new(capacity: capacity, timeout: timeout) do
client = HTTPClient.new(url)
client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC
client.read_timeout = 5.seconds
client.connect_timeout = 5.seconds
client
end
end
end
# See http://www.evanmiller.org/how-not-to-sort-by-average-rating.html
def ci_lower_bound(pos, n)
if n == 0
@ -21,8 +64,8 @@ end
def make_client(url : URI, region = nil)
client = HTTPClient.new(url)
client.family = (url.host == "www.youtube.com") ? CONFIG.force_resolve : Socket::Family::UNSPEC
client.read_timeout = 15.seconds
client.connect_timeout = 15.seconds
client.read_timeout = 5.seconds
client.connect_timeout = 5.seconds
if region
PROXY_LIST[region]?.try &.sample(40).each do |proxy|

View file

@ -19,14 +19,13 @@ struct Mix
end
def fetch_mix(rdid, video_id, cookies = nil, locale = nil)
client = make_client(YT_URL)
headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
if cookies
headers = cookies.add_request_headers(headers)
end
response = client.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en&has_verified=1&bpctr=9999999999", headers)
response = YT_POOL.client &.get("/watch?v=#{video_id}&list=#{rdid}&gl=US&hl=en&has_verified=1&bpctr=9999999999", headers)
initial_data = extract_initial_data(response.body)

View file

@ -384,13 +384,11 @@ def get_playlist(db, plid, locale, refresh = true, force_refresh = false)
end
def fetch_playlist(plid, locale)
client = make_client(YT_URL)
if plid.starts_with? "UC"
plid = "UU#{plid.lchop("UC")}"
end
response = client.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
if response.status_code != 200
raise translate(locale, "Not a playlist.")
end
@ -458,10 +456,8 @@ def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil)
end
def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuation = nil)
client = make_client(YT_URL)
if continuation
html = client.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
html = YT_POOL.client &.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
html = XML.parse_html(html.body)
index = html.xpath_node(%q(//span[@id="playlist-current-index"])).try &.content.to_i?.try &.- 1
@ -471,7 +467,7 @@ def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuat
if video_count > 100
url = produce_playlist_url(plid, offset)
response = client.get(url)
response = YT_POOL.client &.get(url)
response = JSON.parse(response.body)
if !response["content_html"]? || response["content_html"].as_s.empty?
raise translate(locale, "Empty playlist")
@ -483,7 +479,7 @@ def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuat
elsif offset > 100
return [] of PlaylistVideo
else # Extract first page of videos
response = client.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1")
response = YT_POOL.client &.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1")
document = XML.parse_html(response.body)
nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))

View file

@ -222,20 +222,18 @@ end
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist
def channel_search(query, page, channel)
client = make_client(YT_URL)
response = client.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US")
response = YT_POOL.client &.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"]))
if !canonical
response = client.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US")
response = YT_POOL.client &.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"]))
end
if !canonical
response = client.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US")
response = YT_POOL.client &.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US")
document = XML.parse_html(response.body)
canonical = document.xpath_node(%q(//link[@rel="canonical"]))
end
@ -247,7 +245,7 @@ def channel_search(query, page, channel)
ucid = canonical["href"].split("/")[-1]
url = produce_channel_search_url(ucid, query, page)
response = client.get(url)
response = YT_POOL.client &.get(url)
json = JSON.parse(response.body)
if json["content_html"]? && !json["content_html"].as_s.empty?
@ -265,12 +263,11 @@ def channel_search(query, page, channel)
end
def search(query, page = 1, search_params = produce_search_params(content_type: "all"), region = nil)
client = make_client(YT_URL, region)
if query.empty?
return {0, [] of SearchItem}
end
html = client.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body
html = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body)
if html.empty?
return {0, [] of SearchItem}
end

View file

@ -1,5 +1,4 @@
def fetch_trending(trending_type, region, locale)
client = make_client(YT_URL)
headers = HTTP::Headers.new
headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"
@ -12,7 +11,7 @@ def fetch_trending(trending_type, region, locale)
if trending_type && trending_type != "Default"
trending_type = trending_type.downcase.capitalize
response = client.get("/feed/trending?gl=#{region}&hl=en", headers).body
response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en", headers).body
initial_data = extract_initial_data(response)
@ -23,13 +22,13 @@ def fetch_trending(trending_type, region, locale)
url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
url = url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s
url += "&disable_polymer=1&gl=#{region}&hl=en"
trending = client.get(url).body
trending = YT_POOL.client &.get(url).body
plid = extract_plid(url)
else
trending = client.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
end
else
trending = client.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body
end
trending = XML.parse_html(trending)

View file

@ -143,8 +143,7 @@ def get_user(sid, headers, db, refresh = true)
end
def fetch_user(sid, headers, db)
client = make_client(YT_URL)
feed = client.get("/subscription_manager?disable_polymer=1", headers)
feed = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
feed = XML.parse_html(feed.body)
channels = [] of String
@ -254,8 +253,7 @@ def subscribe_ajax(channel_id, action, env_headers)
headers = HTTP::Headers.new
headers["Cookie"] = env_headers["Cookie"]
client = make_client(YT_URL)
html = client.get("/subscription_manager?disable_polymer=1", headers)
html = YT_POOL.client &.get("/subscription_manager?disable_polymer=1", headers)
cookies = HTTP::Cookies.from_headers(headers)
html.cookies.each do |cookie|
@ -279,7 +277,7 @@ def subscribe_ajax(channel_id, action, env_headers)
}
post_url = "/subscription_ajax?#{action}=1&c=#{channel_id}"
client.post(post_url, headers, form: post_req)
YT_POOL.client &.post(post_url, headers, form: post_req)
end
end
@ -288,8 +286,7 @@ end
# headers = HTTP::Headers.new
# headers["Cookie"] = env_headers["Cookie"]
#
# client = make_client(YT_URL)
# html = client.get("/view_all_playlists?disable_polymer=1", headers)
# html = YT_POOL.client &.get("/view_all_playlists?disable_polymer=1", headers)
#
# cookies = HTTP::Cookies.from_headers(headers)
# html.cookies.each do |cookie|

View file

@ -1146,8 +1146,7 @@ def extract_player_config(body, html)
end
def fetch_video(id, region)
client = make_client(YT_URL, region)
response = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
if md = response.headers["location"]?.try &.match(/v=(?<id>[a-zA-Z0-9_-]{11})/)
raise VideoRedirect.new(video_id: md["id"])
@ -1167,8 +1166,7 @@ def fetch_video(id, region)
bypass_regions = PROXY_LIST.keys & allowed_regions
if !bypass_regions.empty?
region = bypass_regions[rand(bypass_regions.size)]
client = make_client(YT_URL, region)
response = client.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999"))
html = XML.parse_html(response.body)
info = extract_player_config(response.body, html)
@ -1180,10 +1178,10 @@ def fetch_video(id, region)
# Try to pull streams from embed URL
if info["reason"]?
embed_page = client.get("/embed/#{id}").body
embed_page = YT_POOL.client &.get("/embed/#{id}").body
sts = embed_page.match(/"sts"\s*:\s*(?<sts>\d+)/).try &.["sts"]?
sts ||= ""
embed_info = HTTP::Params.parse(client.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body)
embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body)
if !embed_info["reason"]?
embed_info.each do |key, value|