diff --git a/Dockerfile b/Dockerfile index 2b3b239..0f59097 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,11 +6,12 @@ COPY ./src/ ./src/ RUN crystal build ./src/instances.cr --release FROM alpine:latest -RUN apk add --no-cache gc pcre libgcc +RUN apk add --no-cache gc pcre libgcc yaml WORKDIR /app RUN addgroup -g 1000 -S invidious && \ adduser -u 1000 -S invidious -G invidious COPY ./assets/ ./assets/ +COPY ./config.yml ./config.yml COPY --from=builder /app/instances . EXPOSE 3000 diff --git a/config.yml b/config.yml new file mode 100644 index 0000000..08f2a5e --- /dev/null +++ b/config.yml @@ -0,0 +1,9 @@ +# Uses the system's CURL binary to do so. +fetch_onion_instance_stats: true + +# TOR's Sock proxy address that CURL uses to connect to hidden services +tor_sock_proxy_address: "127.0.0.1" +tor_sock_proxy_port: 9050 + +# Minutes before refreshing the instance stats +minutes_between_refresh: 30 \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 83e664a..57fc7a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,13 @@ version: '3' services: + tor-socks-proxy: + container_name: tor-socks-proxy + image: peterdavehello/tor-socks-proxy:latest + ports: + - "127.0.0.1:8853:53/udp" + - "127.0.0.1:9150:9150/tcp" + restart: unless-stopped + instances: build: . restart: unless-stopped diff --git a/src/fetch.cr b/src/fetch.cr new file mode 100644 index 0000000..63fba64 --- /dev/null +++ b/src/fetch.cr @@ -0,0 +1,117 @@ +def fetch_country(md) + region = md["flag"]?.try { |region| region.codepoints.map { |codepoint| (codepoint - 0x1f1a5).chr }.join("") } + flag = md["flag"]? + country_name = md["country_name"]? + + return flag, region, country_name +end + +def fetch_notes(md) + notes = md["notes"].strip("|") + if notes.empty? + notes = nil + end + + return notes +end + +def prepare_http_instance(md, instances, monitors) + uri = URI.parse(md["uri"]) + host = md["host"] + + flag, region, country_name = fetch_country(md) + + status_url = md["status_url"]? + + privacy_policy = md["privacy_policy"]? + + ddos_protection = md["ddos_protection"].strip + if ddos_protection == "None" + ddos_protection = nil + end + + owner = {name: md["owner"].strip("@"), url: md["owner_url"]} + notes = fetch_notes(md) + + client = HTTP::Client.new(uri) + client.connect_timeout = 5.seconds + client.read_timeout = 5.seconds + + begin + stats = JSON.parse(client.get("/api/v1/stats").body) + rescue ex + stats = nil + end + + monitor = monitors.try &.select { |monitor| monitor["name"].try &.as_s == host }[0]? + return {flag: flag, region: region, country_name: country_name, stats: stats, type: "https", uri: uri.to_s, status_url: status_url, + privacy_policy: privacy_policy, ddos_protection: ddos_protection, + owner: owner, notes: notes, monitor: monitor || instances[host]?.try &.[:monitor]?} +end + +def prepare_onion_instance(md, instances) + uri = URI.parse(md["uri"]) + host = md["host"] + + clearnet_url = md["clearnet_url"] + flag, region, country_name = fetch_country(md) + privacy_policy = md["privacy_policy"]? + owner = {name: md["owner"].strip("@"), url: md["owner_url"]} + notes = fetch_notes(md) + + if CONFIG["fetch_onion_instance_stats"]? + begin + args = Process.parse_arguments("--socks5-hostname '#{CONFIG["tor_sock_proxy_address"]}:#{CONFIG["tor_sock_proxy_port"]}' 'http://#{uri.host}/api/v1/stats'") + response = nil + Process.run("curl", args: args) do |result| + data = result.output.read_line + response = JSON.parse(data) + end + + stats = response + rescue ex + stats = nil + end + else + stats = nil + end + + return {flag: flag, region: region, country_name: country_name, stats: stats, type: "onion", uri: uri.to_s, clearnet_url: clearnet_url, + privacy_policy: privacy_policy, owner: owner, notes: notes, + monitor: nil} +end + +def get_clearnet_instances(body, instances, monitors) + # Crystal currently lacks a markdown parser that supports tables. So... + clear_net_regexes = [ + /\[(?[^ \]]+)\]\((?[^\)]+)\)/, # Address column + /(?[\x{1f100}-\x{1f1ff}]{2}) (?[^ ]+)/, # Country column + /((\[[^\]]+\]\(.*\){1}\])\((?.*)\)|(None))/, # Status column + /((\[[^ \]]+\]\((?[^\)]+)\))|(None))/, # privacy policy column + /(?.*)/, # DDOS protection column + /\[(?[^ \]]+)\]\((?[^\)]+)\)/, # Owner column + /(?.*)/, # Note column + ] + + body.scan(/#{clear_net_regexes.join(/ +\| +/)}/mx).each do |md| + host = md["host"] + instances[host] = prepare_http_instance(md, instances, monitors) + end +end + +def get_onion_instances(body, instances) + # Crystal currently lacks a markdown parser that supports tables. So... + clear_net_regexes = [ + /\[(?[^ \]]+)\]\((?[^\)]+)\)/, # Address column + /(?[\x{1f100}-\x{1f1ff}]{2}) (?[^ ]+)/, # Country column + /\[(?[^ \]]+)\]\((?[^\)]+)\)/, # Clearnet instance column + /((\[[^ \]]+\]\((?[^\)]+)\))|(None))/, # privacy policy column + /\[(?[^ \]]+)\]\((?[^\)]+)\)/, # Owner column + /(?.*)/, # Notes column + ] + + body.scan(/#{clear_net_regexes.join(/ +\| +/)}/mx).each do |md| + host = md["host"] + instances[host] = prepare_onion_instance(md, instances) + end +end diff --git a/src/helpers/helpers.cr b/src/helpers/helpers.cr new file mode 100644 index 0000000..f599087 --- /dev/null +++ b/src/helpers/helpers.cr @@ -0,0 +1,6 @@ +require "yaml" + +def load_config + config = YAML.parse(File.read("config.yml")) + return config +end diff --git a/src/instances.cr b/src/instances.cr index 71c4551..e461072 100644 --- a/src/instances.cr +++ b/src/instances.cr @@ -18,15 +18,22 @@ require "http/client" require "kemal" require "uri" +require "./fetch.cr" +require "./helpers/helpers.cr" + +CONFIG = load_config() + Kemal::CLI.new ARGV macro rendered(filename) render "src/instances/views/#{{{filename}}}.ecr" end -alias Instance = NamedTuple(flag: String?, region: String?, stats: JSON::Any?, type: String, uri: String, monitor: JSON::Any?) +alias Owner = NamedTuple(name: String, url: String) +alias ClearNetInstance = NamedTuple(flag: String?, region: String?, country_name: String?, stats: JSON::Any?, type: String, uri: String, status_url: String?, privacy_policy: String?, ddos_protection: String?, owner: Owner, notes: String?, monitor: JSON::Any?) +alias OnionInstance = NamedTuple(flag: String?, region: String?, country_name: String?, stats: JSON::Any?, type: String, uri: String, clearnet_url: String?, privacy_policy: String?, owner: Owner, notes: String?, monitor: JSON::Any?) -INSTANCES = {} of String => Instance +INSTANCES = {} of String => ClearNetInstance | OnionInstance spawn do loop do @@ -51,44 +58,20 @@ spawn do end end begin - body = HTTP::Client.get(URI.parse("https://raw.githubusercontent.com/iv-org/documentation/master/Invidious-Instances.md")).body + # Needs to be replaced once merged! + body = HTTP::Client.get(URI.parse("https://raw.githubusercontent.com/TheFrenchGhosty/documentation/instances-list-rewrite/Public-Instances.md")).body rescue ex body = "" end - instances = {} of String => Instance - - body = body.split("### Blocked:")[0] - body.scan(/\[(?[^ \]]+)\]\((?[^\)]+)\)( .(?[\x{1f100}-\x{1f1ff}]{2}))?/mx).each do |md| - region = md["region"]?.try { |region| region.codepoints.map { |codepoint| (codepoint - 0x1f1a5).chr }.join("") } - flag = md["region"]? - - uri = URI.parse(md["uri"]) - host = md["host"] - - case type = host.split(".")[-1] - when "onion" - when "i2p" - else - type = uri.scheme.not_nil! - client = HTTP::Client.new(uri) - client.connect_timeout = 5.seconds - client.read_timeout = 5.seconds - begin - stats = JSON.parse(client.get("/api/v1/stats").body) - rescue ex - stats = nil - end - end - - monitor = monitors.try &.select { |monitor| monitor["name"].try &.as_s == host }[0]? - instances[host] = {flag: flag, region: region, stats: stats, type: type, uri: uri.to_s, monitor: monitor || instances[host]?.try &.[:monitor]?} - end + instances = {} of String => ClearNetInstance | OnionInstance + get_clearnet_instances(body, instances, monitors) + get_onion_instances(body, instances) INSTANCES.clear INSTANCES.merge! instances - sleep 5.minutes + sleep CONFIG["minutes_between_refresh"].as_i.minutes end end @@ -134,13 +117,13 @@ static_headers do |response, filepath, filestat| end SORT_PROCS = { - "health" => ->(name : String, instance : Instance) { -(instance[:monitor]?.try &.["30dRatio"]["ratio"].as_s.to_f || 0.0) }, - "location" => ->(name : String, instance : Instance) { instance[:region]? || "ZZ" }, - "name" => ->(name : String, instance : Instance) { name }, - "signup" => ->(name : String, instance : Instance) { instance[:stats]?.try &.["openRegistrations"]?.try { |bool| bool.as_bool ? 0 : 1 } || 2 }, - "type" => ->(name : String, instance : Instance) { instance[:type] }, - "users" => ->(name : String, instance : Instance) { -(instance[:stats]?.try &.["usage"]?.try &.["users"]["total"].as_i || 0) }, - "version" => ->(name : String, instance : Instance) { instance[:stats]?.try &.["software"]?.try &.["version"].as_s.try &.split("-", 2)[0].split(".").map { |a| -a.to_i } || [0, 0, 0] }, + "health" => ->(name : String, instance : ClearNetInstance | OnionInstance) { -(instance[:monitor]?.try &.["30dRatio"]["ratio"].as_s.to_f || 0.0) }, + "location" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:region]? || "ZZ" }, + "name" => ->(name : String, instance : ClearNetInstance | OnionInstance) { name }, + "signup" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:stats]?.try &.["openRegistrations"]?.try { |bool| bool.as_bool ? 0 : 1 } || 2 }, + "type" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:type] }, + "users" => ->(name : String, instance : ClearNetInstance | OnionInstance) { -(instance[:stats]?.try &.["usage"]?.try &.["users"]["total"].as_i || 0) }, + "version" => ->(name : String, instance : ClearNetInstance | OnionInstance) { instance[:stats]?.try &.["software"]?.try &.["version"].as_s.try &.split("-", 2)[0].split(".").map { |a| -a.to_i } || [0, 0, 0] }, } def sort_instances(instances, sort_by)