summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamantaz Fox <coding@samantaz.fr>2024-10-08 17:31:20 +0200
committerSamantaz Fox <coding@samantaz.fr>2024-10-08 17:31:20 +0200
commit7c79ee7cc224014cad1c30290fdf2ab996632a9e (patch)
treebe2080062e518121dbef02ac44cc345b6a1920ce /src
parent53e8a5d62d4d7d66e8819f221a1c7b168102995c (diff)
parent9980c0e00f99963373beec50736c97f240f31dcb (diff)
downloadinvidious-7c79ee7cc224014cad1c30290fdf2ab996632a9e.tar.gz
invidious-7c79ee7cc224014cad1c30290fdf2ab996632a9e.tar.bz2
invidious-7c79ee7cc224014cad1c30290fdf2ab996632a9e.zip
Performance: Improve speed of automatic instance redirection (#4193)
The automatic instance redirection implemented in #1940 fetches a new list of instances each time someone queries the /redirect endpoint. This is extremely inefficient... This PR optimizes all that into a background job that only fetches a single list every 30 minutes. This should performance quite a bit. No related issue was opened.
Diffstat (limited to 'src')
-rw-r--r--src/invidious.cr2
-rw-r--r--src/invidious/helpers/utils.cr62
-rw-r--r--src/invidious/jobs/instance_refresh_job.cr97
-rw-r--r--src/invidious/routes/misc.cr11
4 files changed, 109 insertions, 63 deletions
diff --git a/src/invidious.cr b/src/invidious.cr
index 3804197e..63f2a9cc 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -189,6 +189,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL
Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new
+Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new
+
Invidious::Jobs.start_all
def popular_videos
diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr
index 8e9e9a6a..4d9bb28d 100644
--- a/src/invidious/helpers/utils.cr
+++ b/src/invidious/helpers/utils.cr
@@ -323,68 +323,6 @@ def parse_range(range)
return 0_i64, nil
end
-def fetch_random_instance
- begin
- instance_api_client = make_client(URI.parse("https://api.invidious.io"))
-
- # Timeouts
- instance_api_client.connect_timeout = 10.seconds
- instance_api_client.dns_timeout = 10.seconds
-
- instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
- instance_api_client.close
- rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException
- instance_list = [] of JSON::Any
- end
-
- filtered_instance_list = [] of String
-
- instance_list.each do |data|
- # TODO Check if current URL is onion instance and use .onion types if so.
- if data[1]["type"] == "https"
- # Instances can have statistics disabled, which is an requirement of version validation.
- # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails.
- begin
- data[1]["stats"].as_nil
- next
- rescue TypeCastError
- end
-
- # stats endpoint could also lack the software dict.
- next if data[1]["stats"]["software"]?.nil?
-
- # Makes sure the instance isn't too outdated.
- if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"]
- remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
- next if !remote_commit_date
-
- remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
- local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
-
- next if (remote_commit_date - local_commit_date).abs.days > 30
-
- begin
- data[1]["monitor"].as_nil
- health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"]
- filtered_instance_list << data[0].as_s if health.to_s.to_f > 90
- rescue TypeCastError
- # We can't check the health if the monitoring is broken. Thus we'll just add it to the list
- # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that
- # it's an error that often occurs with all the instances at the same time, we have to just skip the check.
- filtered_instance_list << data[0].as_s
- end
- end
- end
- end
-
- # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io
- if filtered_instance_list.size == 0
- return "redirect.invidious.io"
- end
-
- return filtered_instance_list.sample(1)[0]
-end
-
def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String
str = uri.to_s.sub(/^https?:\/\//, "")
if str.size > max_length
diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr
new file mode 100644
index 00000000..cb4280b9
--- /dev/null
+++ b/src/invidious/jobs/instance_refresh_job.cr
@@ -0,0 +1,97 @@
+class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob
+ # We update the internals of a constant as so it can be accessed from anywhere
+ # within the codebase
+ #
+ # "INSTANCES" => Array(Tuple(String, String)) # region, instance
+
+ INSTANCES = {"INSTANCES" => [] of Tuple(String, String)}
+
+ def initialize
+ end
+
+ def begin
+ loop do
+ refresh_instances
+ LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes")
+ sleep 30.minute
+ Fiber.yield
+ end
+ end
+
+ # Refreshes the list of instances used for redirects.
+ #
+ # Does the following three checks for each instance
+ # - Is it a clear-net instance?
+ # - Is it an instance with a good uptime?
+ # - Is it an updated instance?
+ private def refresh_instances
+ raw_instance_list = self.fetch_instances
+ filtered_instance_list = [] of Tuple(String, String)
+
+ raw_instance_list.each do |instance_data|
+ # TODO allow Tor hidden service instances when the current instance
+ # is also a hidden service. Same for i2p and any other non-clearnet instances.
+ begin
+ domain = instance_data[0]
+ info = instance_data[1]
+ stats = info["stats"]
+
+ next unless info["type"] == "https"
+ next if bad_uptime?(info["monitor"])
+ next if outdated?(stats["software"]["version"])
+
+ filtered_instance_list << {info["region"].as_s, domain.as_s}
+ rescue ex
+ if domain
+ LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
+ else
+ LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ")
+ end
+ end
+ end
+
+ if !filtered_instance_list.empty?
+ INSTANCES["INSTANCES"] = filtered_instance_list
+ end
+ end
+
+ # Fetches information regarding instances from api.invidious.io or an otherwise configured URL
+ private def fetch_instances : Array(JSON::Any)
+ begin
+ # We directly call the stdlib HTTP::Client here as it allows us to negate the effects
+ # of the force_resolve config option. This is needed as api.invidious.io does not support ipv6
+ # and as such the following request raises if we were to use force_resolve with the ipv6 value.
+ instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io"))
+
+ # Timeouts
+ instance_api_client.connect_timeout = 10.seconds
+ instance_api_client.dns_timeout = 10.seconds
+
+ raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a
+ instance_api_client.close
+ rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException
+ raw_instance_list = [] of JSON::Any
+ end
+
+ return raw_instance_list
+ end
+
+ # Checks if the given target instance is outdated
+ private def outdated?(target_instance_version) : Bool
+ remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/)
+ return false if !remote_commit_date
+
+ remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC)
+ local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC)
+
+ return (remote_commit_date - local_commit_date).abs.days > 30
+ end
+
+ # Checks if the uptime of the target instance is greater than 90% over a 30 day period
+ private def bad_uptime?(target_instance_health_monitor) : Bool
+ return true if !target_instance_health_monitor["down"].as_bool == false
+ return true if target_instance_health_monitor["uptime"].as_f < 90
+
+ return false
+ end
+end
diff --git a/src/invidious/routes/misc.cr b/src/invidious/routes/misc.cr
index d6bd9571..8b620d63 100644
--- a/src/invidious/routes/misc.cr
+++ b/src/invidious/routes/misc.cr
@@ -40,7 +40,16 @@ module Invidious::Routes::Misc
def self.cross_instance_redirect(env)
referer = get_referer(env)
- instance_url = fetch_random_instance
+
+ instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"]
+ if instance_list.empty?
+ instance_url = "redirect.invidious.io"
+ else
+ # Sample returns an array
+ # Instances are packaged as {region, domain} in the instance list
+ instance_url = instance_list.sample(1)[0][1]
+ end
+
env.redirect "https://#{instance_url}#{referer}"
end
end