From 5af594bf0f620580bcc69e43b056871b0d908c2d Mon Sep 17 00:00:00 2001 From: syeopite Date: Sun, 27 Jun 2021 15:01:22 -0700 Subject: [PATCH] Create and use random user agents on each request --- src/invidious.cr | 1 + src/invidious/helpers/random-user-agents.cr | 46 +++++++++++++++++++++ src/invidious/helpers/utils.cr | 2 +- 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 src/invidious/helpers/random-user-agents.cr diff --git a/src/invidious.cr b/src/invidious.cr index 89292f05..abe3cf15 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -66,6 +66,7 @@ SOFTWARE = { "branch" => "#{CURRENT_BRANCH}", } +UA_LIST = prepare_random_user_agents() YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size, timeout: 2.0, use_quic: CONFIG.use_quic) # CLI diff --git a/src/invidious/helpers/random-user-agents.cr b/src/invidious/helpers/random-user-agents.cr new file mode 100644 index 00000000..f16689fb --- /dev/null +++ b/src/invidious/helpers/random-user-agents.cr @@ -0,0 +1,46 @@ +# Using different browsers would allow us to disguise our traffic even more. +# However, due to the widely different version and operating system values as well as +# their frequency on different OSes we're going to need a separate dataset +# for everything. + +AGENT_COMPONENTS = { + "safari" => { + "os" => ["Macintosh; Intel Mac OS X 10_15_7", "Macintosh; Intel Mac OS X 10_15_6"], + "versions" => ["14.1.1", "14.1", "14.0.3"], + }, + + "firefox" => { + "os" => ["Macintosh; Intel Mac OS X 10.15'", "Macintosh; Intel Mac OS X 10.14", + "Windows NT 10.0; Win64; x64", "X11; Ubuntu; Linux x86_64", + "X11; Linux x86_64"], + "versions" => ["88.0 ", "89.0"], + }, +} + +private def generate_user_agent + agent_component = AGENT_COMPONENTS.keys.sample(1)[0] + os = AGENT_COMPONENTS[agent_component]["os"].sample(1)[0] + version = AGENT_COMPONENTS[agent_component]["versions"].sample(1)[0] + base = "Mozilla/5.0 " + + case agent_component + when "safari" + base += "(#{os}) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/#{version} Safari/605.1.15" + when "firefox" + base += "(#{os}; rv:#{version})) Gecko/20100101 Firefox/#{version}" + end + + return base +end + +# Prepare 10 user agents to randomly choose from in +# order to avoid detection +def prepare_random_user_agents + ua_list = [] of String + 10.times { ua_list << generate_user_agent() } + + # Conserve memory and deallocate AGENT_COMPONENTS + AGENT_COMPONENTS.clear + + return ua_list +end diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 6ee07d7a..d26600c8 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -2,7 +2,7 @@ require "lsquic" require "db" def add_yt_headers(request) - request.headers["user-agent"] ||= "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36" + request.headers["user-agent"] ||= UA_LIST.sample(1)[0] request.headers["accept-charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" request.headers["accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" request.headers["accept-language"] ||= "en-us,en;q=0.5"