diff options
Diffstat (limited to 'src')
31 files changed, 696 insertions, 373 deletions
diff --git a/src/invidious.cr b/src/invidious.cr index 3804197e..b422dcbb 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -23,6 +23,7 @@ require "kilt" require "./ext/kemal_content_for.cr" require "./ext/kemal_static_file_handler.cr" +require "http_proxy" require "athena-negotiation" require "openssl/hmac" require "option_parser" @@ -92,6 +93,10 @@ SOFTWARE = { YT_POOL = YoutubeConnectionPool.new(YT_URL, capacity: CONFIG.pool_size) +# Image request pool + +GGPHT_POOL = YoutubeConnectionPool.new(URI.parse("https://yt3.ggpht.com"), capacity: CONFIG.pool_size) + # CLI Kemal.config.extra_options do |parser| parser.banner = "Usage: invidious [arguments]" @@ -117,6 +122,9 @@ Kemal.config.extra_options do |parser| parser.on("-l LEVEL", "--log-level=LEVEL", "Log level, one of #{LogLevel.values} (default: #{CONFIG.log_level})") do |log_level| CONFIG.log_level = LogLevel.parse(log_level) end + parser.on("-k", "--colorize", "Colorize logs") do + CONFIG.colorize_logs = true + end parser.on("-v", "--version", "Print version") do puts SOFTWARE.to_pretty_json exit @@ -133,7 +141,7 @@ if CONFIG.output.upcase != "STDOUT" FileUtils.mkdir_p(File.dirname(CONFIG.output)) end OUTPUT = CONFIG.output.upcase == "STDOUT" ? STDOUT : File.open(CONFIG.output, mode: "a") -LOGGER = Invidious::LogHandler.new(OUTPUT, CONFIG.log_level) +LOGGER = Invidious::LogHandler.new(OUTPUT, CONFIG.log_level, CONFIG.colorize_logs) # Check table integrity Invidious::Database.check_integrity(CONFIG) @@ -189,6 +197,8 @@ Invidious::Jobs.register Invidious::Jobs::NotificationJob.new(CONNECTION_CHANNEL Invidious::Jobs.register Invidious::Jobs::ClearExpiredItemsJob.new +Invidious::Jobs.register Invidious::Jobs::InstanceListRefreshJob.new + Invidious::Jobs.start_all def popular_videos diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 29546e38..1478c8fc 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -223,7 +223,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) length_seconds = channel_video.try &.length_seconds length_seconds ||= 0 - live_now = channel_video.try &.live_now + live_now = channel_video.try &.badges.live_now? live_now ||= false premiere_timestamp = channel_video.try &.premiere_timestamp @@ -275,7 +275,7 @@ def fetch_channel(ucid, pull_all_videos : Bool) ucid: video.ucid, author: video.author, length_seconds: video.length_seconds, - live_now: video.live_now, + live_now: video.badges.live_now?, premiere_timestamp: video.premiere_timestamp, views: video.views, }) diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index 6cc30142..96400f47 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -1,78 +1,3 @@ -def produce_channel_content_continuation(ucid, content_type, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) - object_inner_2 = { - "2:0:embedded" => { - "1:0:varint" => 0_i64, - }, - "5:varint" => 50_i64, - "6:varint" => 1_i64, - "7:varint" => (page * 30).to_i64, - "9:varint" => 1_i64, - "10:varint" => 0_i64, - } - - object_inner_2_encoded = object_inner_2 - .try { |i| Protodec::Any.cast_json(i) } - .try { |i| Protodec::Any.from_json(i) } - .try { |i| Base64.urlsafe_encode(i) } - .try { |i| URI.encode_www_form(i) } - - content_type_numerical = - case content_type - when "videos" then 15 - when "livestreams" then 14 - else 15 # Fallback to "videos" - end - - sort_by_numerical = - case sort_by - when "newest" then 1_i64 - when "popular" then 2_i64 - when "oldest" then 4_i64 - else 1_i64 # Fallback to "newest" - end - - object_inner_1 = { - "110:embedded" => { - "3:embedded" => { - "#{content_type_numerical}:embedded" => { - "1:embedded" => { - "1:string" => object_inner_2_encoded, - }, - "2:embedded" => { - "1:string" => "00000000-0000-0000-0000-000000000000", - }, - "3:varint" => sort_by_numerical, - }, - }, - }, - } - - object_inner_1_encoded = object_inner_1 - .try { |i| Protodec::Any.cast_json(i) } - .try { |i| Protodec::Any.from_json(i) } - .try { |i| Base64.urlsafe_encode(i) } - .try { |i| URI.encode_www_form(i) } - - object = { - "80226972:embedded" => { - "2:string" => ucid, - "3:string" => object_inner_1_encoded, - "35:string" => "browse-feed#{ucid}videos102", - }, - } - - continuation = object.try { |i| Protodec::Any.cast_json(i) } - .try { |i| Protodec::Any.from_json(i) } - .try { |i| Base64.urlsafe_encode(i) } - .try { |i| URI.encode_www_form(i) } - - return continuation -end - -def make_initial_content_ctoken(ucid, content_type, sort_by) : String - return produce_channel_content_continuation(ucid, content_type, sort_by: sort_by) -end - module Invidious::Channel::Tabs extend self @@ -101,7 +26,7 @@ module Invidious::Channel::Tabs end def get_videos(author : String, ucid : String, *, continuation : String? = nil, sort_by = "newest") - continuation ||= make_initial_content_ctoken(ucid, "videos", sort_by) + continuation ||= make_initial_videos_ctoken(ucid, sort_by) initial_data = YoutubeAPI.browse(continuation: continuation) return extract_items(initial_data, author, ucid) @@ -130,14 +55,10 @@ module Invidious::Channel::Tabs # Shorts # ------------------- - def get_shorts(channel : AboutChannel, continuation : String? = nil) - if continuation.nil? - # EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts" - # TODO: try to extract the continuation tokens that allows other sorting options - initial_data = YoutubeAPI.browse(channel.ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D") - else - initial_data = YoutubeAPI.browse(continuation: continuation) - end + def get_shorts(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest") + continuation ||= make_initial_shorts_ctoken(channel.ucid, sort_by) + initial_data = YoutubeAPI.browse(continuation: continuation) + return extract_items(initial_data, channel.author, channel.ucid) end @@ -145,9 +66,8 @@ module Invidious::Channel::Tabs # Livestreams # ------------------- - def get_livestreams(channel : AboutChannel, continuation : String? = nil, sort_by = "newest") - continuation ||= make_initial_content_ctoken(channel.ucid, "livestreams", sort_by) - + def get_livestreams(channel : AboutChannel, *, continuation : String? = nil, sort_by = "newest") + continuation ||= make_initial_livestreams_ctoken(channel.ucid, sort_by) initial_data = YoutubeAPI.browse(continuation: continuation) return extract_items(initial_data, channel.author, channel.ucid) @@ -171,4 +91,102 @@ module Invidious::Channel::Tabs return items, next_continuation end + + # ------------------- + # C-tokens + # ------------------- + + private def sort_options_videos_short(sort_by : String) + case sort_by + when "newest" then return 4_i64 + when "popular" then return 2_i64 + when "oldest" then return 5_i64 + else return 4_i64 # Fallback to "newest" + end + end + + # Generate the initial "continuation token" to get the first page of the + # "videos" tab. The following page requires the ctoken provided in that + # first page, and so on. + private def make_initial_videos_ctoken(ucid : String, sort_by = "newest") + object = { + "15:embedded" => { + "2:embedded" => { + "1:string" => "00000000-0000-0000-0000-000000000000", + }, + "4:varint" => sort_options_videos_short(sort_by), + }, + } + + return channel_ctoken_wrap(ucid, object) + end + + # Generate the initial "continuation token" to get the first page of the + # "shorts" tab. The following page requires the ctoken provided in that + # first page, and so on. + private def make_initial_shorts_ctoken(ucid : String, sort_by = "newest") + object = { + "10:embedded" => { + "2:embedded" => { + "1:string" => "00000000-0000-0000-0000-000000000000", + }, + "4:varint" => sort_options_videos_short(sort_by), + }, + } + + return channel_ctoken_wrap(ucid, object) + end + + # Generate the initial "continuation token" to get the first page of the + # "livestreams" tab. The following page requires the ctoken provided in that + # first page, and so on. + private def make_initial_livestreams_ctoken(ucid : String, sort_by = "newest") + sort_by_numerical = + case sort_by + when "newest" then 12_i64 + when "popular" then 14_i64 + when "oldest" then 13_i64 + else 12_i64 # Fallback to "newest" + end + + object = { + "14:embedded" => { + "2:embedded" => { + "1:string" => "00000000-0000-0000-0000-000000000000", + }, + "5:varint" => sort_by_numerical, + }, + } + + return channel_ctoken_wrap(ucid, object) + end + + # The protobuf structure common between videos/shorts/livestreams + private def channel_ctoken_wrap(ucid : String, object) + object_inner = { + "110:embedded" => { + "3:embedded" => object, + }, + } + + object_inner_encoded = object_inner + .try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } + + object = { + "80226972:embedded" => { + "2:string" => ucid, + "3:string" => object_inner_encoded, + }, + } + + continuation = object.try { |i| Protodec::Any.cast_json(i) } + .try { |i| Protodec::Any.from_json(i) } + .try { |i| Base64.urlsafe_encode(i) } + .try { |i| URI.encode_www_form(i) } + + return continuation + end end diff --git a/src/invidious/config.cr b/src/invidious/config.cr index c4ddcdb3..c4ca622f 100644 --- a/src/invidious/config.cr +++ b/src/invidious/config.cr @@ -13,6 +13,7 @@ struct ConfigPreferences property annotations : Bool = false property annotations_subscribed : Bool = false + property preload : Bool = true property autoplay : Bool = false property captions : Array(String) = ["", "", ""] property comments : Array(String) = ["youtube", ""] @@ -54,6 +55,15 @@ struct ConfigPreferences end end +struct HTTPProxyConfig + include YAML::Serializable + + property user : String + property password : String + property host : String + property port : Int32 +end + class Config include YAML::Serializable @@ -68,6 +78,8 @@ class Config property output : String = "STDOUT" # Default log level, valid YAML values are ints and strings, see src/invidious/helpers/logger.cr property log_level : LogLevel = LogLevel::Info + # Enables colors in logs. Useful for debugging purposes + property colorize_logs : Bool = false # Database configuration with separate parameters (username, hostname, etc) property db : DBConfig? = nil @@ -128,6 +140,8 @@ class Config property host_binding : String = "0.0.0.0" # Pool size for HTTP requests to youtube.com and ytimg.com (each domain has a separate pool of `pool_size`) property pool_size : Int32 = 100 + # HTTP Proxy configuration + property http_proxy : HTTPProxyConfig? = nil # Use Innertube's transcripts API instead of timedtext for closed captions property use_innertube_for_captions : Bool = false diff --git a/src/invidious/helpers/crystal_class_overrides.cr b/src/invidious/helpers/crystal_class_overrides.cr index fec3f62c..3040d7a0 100644 --- a/src/invidious/helpers/crystal_class_overrides.cr +++ b/src/invidious/helpers/crystal_class_overrides.cr @@ -18,6 +18,40 @@ end class HTTP::Client property family : Socket::Family = Socket::Family::UNSPEC + # Override stdlib to automatically initialize proxy if configured + # + # Accurate as of crystal 1.12.1 + + def initialize(@host : String, port = nil, tls : TLSContext = nil) + check_host_only(@host) + + {% if flag?(:without_openssl) %} + if tls + raise "HTTP::Client TLS is disabled because `-D without_openssl` was passed at compile time" + end + @tls = nil + {% else %} + @tls = case tls + when true + OpenSSL::SSL::Context::Client.new + when OpenSSL::SSL::Context::Client + tls + when false, nil + nil + end + {% end %} + + @port = (port || (@tls ? 443 : 80)).to_i + + self.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy + end + + def initialize(@io : IO, @host = "", @port = 80) + @reconnect = false + + self.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy + end + private def io io = @io return io if io diff --git a/src/invidious/helpers/errors.cr b/src/invidious/helpers/errors.cr index b2df682d..b7643194 100644 --- a/src/invidious/helpers/errors.cr +++ b/src/invidious/helpers/errors.cr @@ -43,6 +43,8 @@ def error_template_helper(env : HTTP::Server::Context, status_code : Int32, exce # URLs for the error message below url_faq = "https://github.com/iv-org/documentation/blob/master/docs/faq.md" url_search_issues = "https://github.com/iv-org/invidious/issues" + url_search_issues += "?q=is:issue+is:open+" + url_search_issues += URI.encode_www_form("[Bug] #{issue_title}") url_switch = "https://redirect.invidious.io" + env.request.resource diff --git a/src/invidious/helpers/i18n.cr b/src/invidious/helpers/i18n.cr index 23a1aafc..1ba3ea61 100644 --- a/src/invidious/helpers/i18n.cr +++ b/src/invidious/helpers/i18n.cr @@ -1,8 +1,22 @@ +# Languages requiring a better level of translation (at least 20%) +# to be added to the list below: +# +# "af" => "", # Afrikaans +# "az" => "", # Azerbaijani +# "be" => "", # Belarusian +# "bn_BD" => "", # Bengali (Bangladesh) +# "ia" => "", # Interlingua +# "or" => "", # Odia +# "tk" => "", # Turkmen +# "tok => "", # Toki Pona +# LOCALES_LIST = { "ar" => "العربية", # Arabic + "bg" => "български", # Bulgarian "bn" => "বাংলা", # Bengali "ca" => "Català", # Catalan "cs" => "Čeština", # Czech + "cy" => "Cymraeg", # Welsh "da" => "Dansk", # Danish "de" => "Deutsch", # German "el" => "Ελληνικά", # Greek @@ -23,6 +37,7 @@ LOCALES_LIST = { "it" => "Italiano", # Italian "ja" => "日本語", # Japanese "ko" => "한국어", # Korean + "lmo" => "Lombard", # Lombard "lt" => "Lietuvių", # Lithuanian "nb-NO" => "Norsk bokmål", # Norwegian Bokmål "nl" => "Nederlands", # Dutch diff --git a/src/invidious/helpers/logger.cr b/src/invidious/helpers/logger.cr index b443073e..03349595 100644 --- a/src/invidious/helpers/logger.cr +++ b/src/invidious/helpers/logger.cr @@ -1,3 +1,5 @@ +require "colorize" + enum LogLevel All = 0 Trace = 1 @@ -10,7 +12,9 @@ enum LogLevel end class Invidious::LogHandler < Kemal::BaseLogHandler - def initialize(@io : IO = STDOUT, @level = LogLevel::Debug) + def initialize(@io : IO = STDOUT, @level = LogLevel::Debug, use_color : Bool = true) + Colorize.enabled = use_color + Colorize.on_tty_only! end def call(context : HTTP::Server::Context) @@ -39,10 +43,22 @@ class Invidious::LogHandler < Kemal::BaseLogHandler @io.flush end + def color(level) + case level + when LogLevel::Trace then :cyan + when LogLevel::Debug then :green + when LogLevel::Info then :white + when LogLevel::Warn then :yellow + when LogLevel::Error then :red + when LogLevel::Fatal then :magenta + else :default + end + end + {% for level in %w(trace debug info warn error fatal) %} def {{level.id}}(message : String) if LogLevel::{{level.id.capitalize}} >= @level - puts("#{Time.utc} [{{level.id}}] #{message}") + puts("#{Time.utc} [{{level.id}}] #{message}".colorize(color(LogLevel::{{level.id.capitalize}}))) end end {% end %} diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr index 463d5557..1fef5f93 100644 --- a/src/invidious/helpers/serialized_yt_data.cr +++ b/src/invidious/helpers/serialized_yt_data.cr @@ -1,3 +1,16 @@ +@[Flags] +enum VideoBadges + LiveNow + Premium + ThreeD + FourK + New + EightK + VR180 + VR360 + ClosedCaptions +end + struct SearchVideo include DB::Serializable @@ -9,10 +22,9 @@ struct SearchVideo property views : Int64 property description_html : String property length_seconds : Int32 - property live_now : Bool - property premium : Bool property premiere_timestamp : Time? property author_verified : Bool + property badges : VideoBadges def to_xml(auto_generated, query_params, xml : XML::Builder) query_params["v"] = self.id @@ -88,13 +100,20 @@ struct SearchVideo json.field "published", self.published.to_unix json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) json.field "lengthSeconds", self.length_seconds - json.field "liveNow", self.live_now - json.field "premium", self.premium + json.field "liveNow", self.badges.live_now? + json.field "premium", self.badges.premium? json.field "isUpcoming", self.upcoming? if self.premiere_timestamp json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix end + json.field "isNew", self.badges.new? + json.field "is4k", self.badges.four_k? + json.field "is8k", self.badges.eight_k? + json.field "isVr180", self.badges.vr180? + json.field "isVr360", self.badges.vr360? + json.field "is3d", self.badges.three_d? + json.field "hasCaptions", self.badges.closed_captions? end end diff --git a/src/invidious/helpers/sig_helper.cr b/src/invidious/helpers/sig_helper.cr index 9e72c1c7..6d198a42 100644 --- a/src/invidious/helpers/sig_helper.cr +++ b/src/invidious/helpers/sig_helper.cr @@ -175,8 +175,9 @@ module Invidious::SigHelper @queue = {} of TransactionID => Transaction @conn : Connection + @uri_or_path : String - def initialize(uri_or_path) + def initialize(@uri_or_path) @conn = Connection.new(uri_or_path) listen end @@ -186,10 +187,26 @@ module Invidious::SigHelper LOGGER.debug("SigHelper: Multiplexor listening") - # TODO: reopen socket if unexpectedly closed spawn do loop do - receive_data + begin + receive_data + rescue ex + LOGGER.info("SigHelper: Connection to helper died with '#{ex.message}' trying to reconnect...") + # We close the socket because for some reason is not closed. + @conn.close + loop do + begin + @conn = Connection.new(@uri_or_path) + LOGGER.info("SigHelper: Reconnected to SigHelper!") + rescue ex + LOGGER.debug("SigHelper: Reconnection to helper unsuccessful with error '#{ex.message}'. Retrying") + sleep 500.milliseconds + next + end + break if !@conn.closed? + end + end Fiber.yield end end diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index 8e9e9a6a..4d9bb28d 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -323,68 +323,6 @@ def parse_range(range) return 0_i64, nil end -def fetch_random_instance - begin - instance_api_client = make_client(URI.parse("https://api.invidious.io")) - - # Timeouts - instance_api_client.connect_timeout = 10.seconds - instance_api_client.dns_timeout = 10.seconds - - instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a - instance_api_client.close - rescue Socket::ConnectError | IO::TimeoutError | JSON::ParseException - instance_list = [] of JSON::Any - end - - filtered_instance_list = [] of String - - instance_list.each do |data| - # TODO Check if current URL is onion instance and use .onion types if so. - if data[1]["type"] == "https" - # Instances can have statistics disabled, which is an requirement of version validation. - # as_nil? doesn't exist. Thus we'll have to handle the error raised if as_nil fails. - begin - data[1]["stats"].as_nil - next - rescue TypeCastError - end - - # stats endpoint could also lack the software dict. - next if data[1]["stats"]["software"]?.nil? - - # Makes sure the instance isn't too outdated. - if remote_version = data[1]["stats"]?.try &.["software"]?.try &.["version"] - remote_commit_date = remote_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) - next if !remote_commit_date - - remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) - local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) - - next if (remote_commit_date - local_commit_date).abs.days > 30 - - begin - data[1]["monitor"].as_nil - health = data[1]["monitor"].as_h["dailyRatios"][0].as_h["ratio"] - filtered_instance_list << data[0].as_s if health.to_s.to_f > 90 - rescue TypeCastError - # We can't check the health if the monitoring is broken. Thus we'll just add it to the list - # and move on. Ideally we'll ignore any instance that has broken health monitoring but due to the fact that - # it's an error that often occurs with all the instances at the same time, we have to just skip the check. - filtered_instance_list << data[0].as_s - end - end - end - end - - # If for some reason no instances managed to get fetched successfully then we'll just redirect to redirect.invidious.io - if filtered_instance_list.size == 0 - return "redirect.invidious.io" - end - - return filtered_instance_list.sample(1)[0] -end - def reduce_uri(uri : URI | String, max_length : Int32 = 50, suffix : String = "…") : String str = uri.to_s.sub(/^https?:\/\//, "") if str.size > max_length diff --git a/src/invidious/jobs/instance_refresh_job.cr b/src/invidious/jobs/instance_refresh_job.cr new file mode 100644 index 00000000..cb4280b9 --- /dev/null +++ b/src/invidious/jobs/instance_refresh_job.cr @@ -0,0 +1,97 @@ +class Invidious::Jobs::InstanceListRefreshJob < Invidious::Jobs::BaseJob + # We update the internals of a constant as so it can be accessed from anywhere + # within the codebase + # + # "INSTANCES" => Array(Tuple(String, String)) # region, instance + + INSTANCES = {"INSTANCES" => [] of Tuple(String, String)} + + def initialize + end + + def begin + loop do + refresh_instances + LOGGER.info("InstanceListRefreshJob: Done, sleeping for 30 minutes") + sleep 30.minute + Fiber.yield + end + end + + # Refreshes the list of instances used for redirects. + # + # Does the following three checks for each instance + # - Is it a clear-net instance? + # - Is it an instance with a good uptime? + # - Is it an updated instance? + private def refresh_instances + raw_instance_list = self.fetch_instances + filtered_instance_list = [] of Tuple(String, String) + + raw_instance_list.each do |instance_data| + # TODO allow Tor hidden service instances when the current instance + # is also a hidden service. Same for i2p and any other non-clearnet instances. + begin + domain = instance_data[0] + info = instance_data[1] + stats = info["stats"] + + next unless info["type"] == "https" + next if bad_uptime?(info["monitor"]) + next if outdated?(stats["software"]["version"]) + + filtered_instance_list << {info["region"].as_s, domain.as_s} + rescue ex + if domain + LOGGER.info("InstanceListRefreshJob: failed to parse information from '#{domain}' because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ") + else + LOGGER.info("InstanceListRefreshJob: failed to parse information from an instance because \"#{ex}\"\n\"#{ex.backtrace.join('\n')}\" ") + end + end + end + + if !filtered_instance_list.empty? + INSTANCES["INSTANCES"] = filtered_instance_list + end + end + + # Fetches information regarding instances from api.invidious.io or an otherwise configured URL + private def fetch_instances : Array(JSON::Any) + begin + # We directly call the stdlib HTTP::Client here as it allows us to negate the effects + # of the force_resolve config option. This is needed as api.invidious.io does not support ipv6 + # and as such the following request raises if we were to use force_resolve with the ipv6 value. + instance_api_client = HTTP::Client.new(URI.parse("https://api.invidious.io")) + + # Timeouts + instance_api_client.connect_timeout = 10.seconds + instance_api_client.dns_timeout = 10.seconds + + raw_instance_list = JSON.parse(instance_api_client.get("/instances.json").body).as_a + instance_api_client.close + rescue ex : Socket::ConnectError | IO::TimeoutError | JSON::ParseException + raw_instance_list = [] of JSON::Any + end + + return raw_instance_list + end + + # Checks if the given target instance is outdated + private def outdated?(target_instance_version) : Bool + remote_commit_date = target_instance_version.as_s.match(/\d{4}\.\d{2}\.\d{2}/) + return false if !remote_commit_date + + remote_commit_date = Time.parse(remote_commit_date[0], "%Y.%m.%d", Time::Location::UTC) + local_commit_date = Time.parse(CURRENT_VERSION, "%Y.%m.%d", Time::Location::UTC) + + return (remote_commit_date - local_commit_date).abs.days > 30 + end + + # Checks if the uptime of the target instance is greater than 90% over a 30 day period + private def bad_uptime?(target_instance_health_monitor) : Bool + return true if !target_instance_health_monitor["down"].as_bool == false + return true if target_instance_health_monitor["uptime"].as_f < 90 + + return false + end +end diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr index 3e6eef95..a51e88b4 100644 --- a/src/invidious/playlists.cr +++ b/src/invidious/playlists.cr @@ -270,7 +270,7 @@ end def subscribe_playlist(user, playlist) playlist = InvidiousPlaylist.new({ - title: playlist.title.byte_slice(0, 150), + title: playlist.title[..150], id: playlist.id, author: user.email, description: "", # Max 5000 characters diff --git a/src/invidious/routes/api/v1/search.cr b/src/invidious/routes/api/v1/search.cr index 2922b060..59a30745 100644 --- a/src/invidious/routes/api/v1/search.cr +++ b/src/invidious/routes/api/v1/search.cr @@ -31,9 +31,7 @@ module Invidious::Routes::API::V1::Search query = env.params.query["q"]? || "" begin - client = HTTP::Client.new("suggestqueries-clients6.youtube.com") - client.before_request { |r| add_yt_headers(r) } - + client = make_client(URI.parse("https://suggestqueries-clients6.youtube.com"), force_youtube_headers: true) url = "/complete/search?client=youtube&hl=en&gl=#{region}&q=#{URI.encode_www_form(query)}&gs_ri=youtube&ds=yt" response = client.get(url).body diff --git a/src/invidious/routes/channels.cr b/src/invidious/routes/channels.cr index 952098e0..7d634cbb 100644 --- a/src/invidious/routes/channels.cr +++ b/src/invidious/routes/channels.cr @@ -20,10 +20,11 @@ module Invidious::Routes::Channels sort_by = env.params.query["sort_by"]?.try &.downcase if channel.auto_generated + sort_by ||= "last" sort_options = {"last", "oldest", "newest"} items, next_continuation = fetch_channel_playlists( - channel.ucid, channel.author, continuation, (sort_by || "last") + channel.ucid, channel.author, continuation, sort_by ) items.uniq! do |item| @@ -49,9 +50,11 @@ module Invidious::Routes::Channels end next_continuation = nil else + sort_by ||= "newest" sort_options = {"newest", "oldest", "popular"} - items, next_continuation = Channel::Tabs.get_videos( - channel, continuation: continuation, sort_by: (sort_by || "newest") + + items, next_continuation = Channel::Tabs.get_60_videos( + channel, continuation: continuation, sort_by: sort_by ) end end @@ -82,13 +85,12 @@ module Invidious::Routes::Channels end next_continuation = nil else - # TODO: support sort option for shorts - sort_by = "" - sort_options = [] of String + sort_by = env.params.query["sort_by"]?.try &.downcase || "newest" + sort_options = {"newest", "oldest", "popular"} # Fetch items and continuation token items, next_continuation = Channel::Tabs.get_shorts( - channel, continuation: continuation + channel, continuation: continuation, sort_by: sort_by ) end diff --git a/src/invidious/routes/feeds.cr b/src/invidious/routes/feeds.cr index e20a7139..ea7fb396 100644 --- a/src/invidious/routes/feeds.cr +++ b/src/invidious/routes/feeds.cr @@ -192,11 +192,9 @@ module Invidious::Routes::Feeds views: views, description_html: description_html, length_seconds: 0, - live_now: false, - paid: false, - premium: false, premiere_timestamp: nil, author_verified: false, + badges: VideoBadges::None, }) end diff --git a/src/invidious/routes/images.cr b/src/invidious/routes/images.cr index b6a2e110..639697db 100644 --- a/src/invidious/routes/images.cr +++ b/src/invidious/routes/images.cr @@ -11,29 +11,9 @@ module Invidious::Routes::Images end end - # We're encapsulating this into a proc in order to easily reuse this - # portion of the code for each request block below. - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 - env.response.headers.delete("Transfer-Encoding") - return - end - - proxy_file(response, env) - } - begin - HTTP::Client.get("https://yt3.ggpht.com#{url}") do |resp| - return request_proc.call(resp) + GGPHT_POOL.client &.get(url, headers) do |resp| + return self.proxy_image(env, resp) end rescue ex end @@ -61,27 +41,10 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Connection"] = "close" - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 - return env.response.headers.delete("Transfer-Encoding") - end - - proxy_file(response, env) - } - begin - HTTP::Client.get("https://#{authority}.ytimg.com#{url}") do |resp| - return request_proc.call(resp) + get_ytimg_pool(authority).client &.get(url, headers) do |resp| + env.response.headers["Connection"] = "close" + return self.proxy_image(env, resp) end rescue ex end @@ -101,26 +64,9 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end - end - - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 && response.status_code != 404 - return env.response.headers.delete("Transfer-Encoding") - end - - proxy_file(response, env) - } - begin - HTTP::Client.get("https://i9.ytimg.com#{url}") do |resp| - return request_proc.call(resp) + get_ytimg_pool("i9").client &.get(url, headers) do |resp| + return self.proxy_image(env, resp) end rescue ex end @@ -165,8 +111,7 @@ module Invidious::Routes::Images if name == "maxres.jpg" build_thumbnails(id).each do |thumb| thumbnail_resource_path = "/vi/#{id}/#{thumb[:url]}.jpg" - # This can likely be optimized into a (small) pool sometime in the future. - if HTTP::Client.head("https://i.ytimg.com#{thumbnail_resource_path}").status_code == 200 + if get_ytimg_pool("i9").client &.head(thumbnail_resource_path, headers).status_code == 200 name = thumb[:url] + ".jpg" break end @@ -181,29 +126,28 @@ module Invidious::Routes::Images end end - request_proc = ->(response : HTTP::Client::Response) { - env.response.status_code = response.status_code - response.headers.each do |key, value| - if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) - env.response.headers[key] = value - end + begin + get_ytimg_pool("i").client &.get(url, headers) do |resp| + return self.proxy_image(env, resp) end + rescue ex + end + end - env.response.headers["Access-Control-Allow-Origin"] = "*" - - if response.status_code >= 300 && response.status_code != 404 - return env.response.headers.delete("Transfer-Encoding") + private def self.proxy_image(env, response) + env.response.status_code = response.status_code + response.headers.each do |key, value| + if !RESPONSE_HEADERS_BLACKLIST.includes?(key.downcase) + env.response.headers[key] = value end + end - proxy_file(response, env) - } + env.response.headers["Access-Control-Allow-Origin"] = "*" - begin - # This can likely be optimized into a (small) pool sometime in the future. - HTTP::Client.get("https://i.ytimg.com#{url}") do |resp| - return request_proc.call(resp) - end - rescue ex + if response.status_code >= 300 + return env.response.headers.delete("Transfer-Encoding") end + + return proxy_file(response, env) end end diff --git a/src/invidious/routes/misc.cr b/src/invidious/routes/misc.cr index d6bd9571..8b620d63 100644 --- a/src/invidious/routes/misc.cr +++ b/src/invidious/routes/misc.cr @@ -40,7 +40,16 @@ module Invidious::Routes::Misc def self.cross_instance_redirect(env) referer = get_referer(env) - instance_url = fetch_random_instance + + instance_list = Invidious::Jobs::InstanceListRefreshJob::INSTANCES["INSTANCES"] + if instance_list.empty? + instance_url = "redirect.invidious.io" + else + # Sample returns an array + # Instances are packaged as {region, domain} in the instance list + instance_url = instance_list.sample(1)[0][1] + end + env.redirect "https://#{instance_url}#{referer}" end end diff --git a/src/invidious/routes/preferences.cr b/src/invidious/routes/preferences.cr index 05bc2714..39ca77c0 100644 --- a/src/invidious/routes/preferences.cr +++ b/src/invidious/routes/preferences.cr @@ -27,6 +27,10 @@ module Invidious::Routes::PreferencesRoute annotations_subscribed ||= "off" annotations_subscribed = annotations_subscribed == "on" + preload = env.params.body["preload"]?.try &.as(String) + preload ||= "off" + preload = preload == "on" + autoplay = env.params.body["autoplay"]?.try &.as(String) autoplay ||= "off" autoplay = autoplay == "on" @@ -144,6 +148,7 @@ module Invidious::Routes::PreferencesRoute preferences = Preferences.from_json({ annotations: annotations, annotations_subscribed: annotations_subscribed, + preload: preload, autoplay: autoplay, captions: captions, comments: comments, diff --git a/src/invidious/routes/video_playback.cr b/src/invidious/routes/video_playback.cr index 24693662..26852d06 100644 --- a/src/invidious/routes/video_playback.cr +++ b/src/invidious/routes/video_playback.cr @@ -42,7 +42,7 @@ module Invidious::Routes::VideoPlayback headers["Range"] = "bytes=#{range_for_head}" end - client = make_client(URI.parse(host), region, force_resolve = true) + client = make_client(URI.parse(host), region, force_resolve: true) response = HTTP::Client::Response.new(500) error = "" 5.times do @@ -57,7 +57,7 @@ module Invidious::Routes::VideoPlayback if new_host != host host = new_host client.close - client = make_client(URI.parse(new_host), region, force_resolve = true) + client = make_client(URI.parse(new_host), region, force_resolve: true) end url = "#{location.request_target}&host=#{location.host}#{region ? "®ion=#{region}" : ""}" @@ -71,7 +71,7 @@ module Invidious::Routes::VideoPlayback fvip = "3" host = "https://r#{fvip}---#{mn}.googlevideo.com" - client = make_client(URI.parse(host), region, force_resolve = true) + client = make_client(URI.parse(host), region, force_resolve: true) rescue ex error = ex.message end @@ -196,7 +196,7 @@ module Invidious::Routes::VideoPlayback break else client.close - client = make_client(URI.parse(host), region, force_resolve = true) + client = make_client(URI.parse(host), region, force_resolve: true) end end diff --git a/src/invidious/user/preferences.cr b/src/invidious/user/preferences.cr index b3059403..0a8525f3 100644 --- a/src/invidious/user/preferences.cr +++ b/src/invidious/user/preferences.cr @@ -4,6 +4,7 @@ struct Preferences property annotations : Bool = CONFIG.default_user_preferences.annotations property annotations_subscribed : Bool = CONFIG.default_user_preferences.annotations_subscribed + property preload : Bool = CONFIG.default_user_preferences.preload property autoplay : Bool = CONFIG.default_user_preferences.autoplay property automatic_instance_redirect : Bool = CONFIG.default_user_preferences.automatic_instance_redirect diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index 921132f0..ae09e736 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -27,12 +27,6 @@ struct Video @captions = [] of Invidious::Videos::Captions::Metadata @[DB::Field(ignore: true)] - property adaptive_fmts : Array(Hash(String, JSON::Any))? - - @[DB::Field(ignore: true)] - property fmt_stream : Array(Hash(String, JSON::Any))? - - @[DB::Field(ignore: true)] property description : String? module JSONConverter @@ -98,72 +92,24 @@ struct Video # Methods for parsing streaming data - def convert_url(fmt) - if cfr = fmt["signatureCipher"]?.try { |json| HTTP::Params.parse(json.as_s) } - sp = cfr["sp"] - url = URI.parse(cfr["url"]) - params = url.query_params - - LOGGER.debug("Videos: Decoding '#{cfr}'") - - unsig = DECRYPT_FUNCTION.try &.decrypt_signature(cfr["s"]) - params[sp] = unsig if unsig + def fmt_stream : Array(Hash(String, JSON::Any)) + if formats = info.dig?("streamingData", "formats") + return formats + .as_a.map(&.as_h) + .sort_by! { |f| f["width"]?.try &.as_i || 0 } else - url = URI.parse(fmt["url"].as_s) - params = url.query_params - end - - n = DECRYPT_FUNCTION.try &.decrypt_nsig(params["n"]) - params["n"] = n if n - - if token = CONFIG.po_token - params["pot"] = token - end - - params["host"] = url.host.not_nil! - if region = self.info["region"]?.try &.as_s - params["region"] = region - end - - url.query_params = params - LOGGER.trace("Videos: new url is '#{url}'") - - return url.to_s - rescue ex - LOGGER.debug("Videos: Error when parsing video URL") - LOGGER.trace(ex.inspect_with_backtrace) - return "" - end - - def fmt_stream - return @fmt_stream.as(Array(Hash(String, JSON::Any))) if @fmt_stream - - fmt_stream = info.dig?("streamingData", "formats") - .try &.as_a.map &.as_h || [] of Hash(String, JSON::Any) - - fmt_stream.each do |fmt| - fmt["url"] = JSON::Any.new(self.convert_url(fmt)) + return [] of Hash(String, JSON::Any) end - - fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 } - @fmt_stream = fmt_stream - return @fmt_stream.as(Array(Hash(String, JSON::Any))) end - def adaptive_fmts - return @adaptive_fmts.as(Array(Hash(String, JSON::Any))) if @adaptive_fmts - - fmt_stream = info.dig("streamingData", "adaptiveFormats") - .try &.as_a.map &.as_h || [] of Hash(String, JSON::Any) - - fmt_stream.each do |fmt| - fmt["url"] = JSON::Any.new(self.convert_url(fmt)) + def adaptive_fmts : Array(Hash(String, JSON::Any)) + if formats = info.dig?("streamingData", "adaptiveFormats") + return formats + .as_a.map(&.as_h) + .sort_by! { |f| f["width"]?.try &.as_i || 0 } + else + return [] of Hash(String, JSON::Any) end - - fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 } - @adaptive_fmts = fmt_stream - - return @adaptive_fmts.as(Array(Hash(String, JSON::Any))) end def video_streams diff --git a/src/invidious/videos/caption.cr b/src/invidious/videos/caption.cr index 484e61d2..c811cfe1 100644 --- a/src/invidious/videos/caption.cr +++ b/src/invidious/videos/caption.cr @@ -123,6 +123,7 @@ module Invidious::Videos "Esperanto", "Estonian", "Filipino", + "Filipino (auto-generated)", "Finnish", "French", "French (auto-generated)", diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr index c28ba634..70075da9 100644 --- a/src/invidious/videos/parser.cr +++ b/src/invidious/videos/parser.cr @@ -142,10 +142,21 @@ def extract_video_info(video_id : String) params.delete("reason") end - {"captions", "playabilityStatus", "playerConfig", "storyboards", "streamingData"}.each do |f| + {"captions", "playabilityStatus", "playerConfig", "storyboards"}.each do |f| params[f] = player_response[f] if player_response[f]? end + # Convert URLs, if those are present + if streaming_data = player_response["streamingData"]? + %w[formats adaptiveFormats].each do |key| + streaming_data.as_h[key]?.try &.as_a.each do |format| + format.as_h["url"] = JSON::Any.new(convert_url(format)) + end + end + + params["streamingData"] = streaming_data + end + # Data structure version, for cache control params["version"] = JSON::Any.new(Video::SCHEMA_VERSION.to_i64) @@ -463,3 +474,35 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any return params end + +private def convert_url(fmt) + if cfr = fmt["signatureCipher"]?.try { |json| HTTP::Params.parse(json.as_s) } + sp = cfr["sp"] + url = URI.parse(cfr["url"]) + params = url.query_params + + LOGGER.debug("convert_url: Decoding '#{cfr}'") + + unsig = DECRYPT_FUNCTION.try &.decrypt_signature(cfr["s"]) + params[sp] = unsig if unsig + else + url = URI.parse(fmt["url"].as_s) + params = url.query_params + end + + n = DECRYPT_FUNCTION.try &.decrypt_nsig(params["n"]) + params["n"] = n if n + + if token = CONFIG.po_token + params["pot"] = token + end + + url.query_params = params + LOGGER.trace("convert_url: new url is '#{url}'") + + return url.to_s +rescue ex + LOGGER.debug("convert_url: Error when parsing video URL") + LOGGER.trace(ex.inspect_with_backtrace) + return "" +end diff --git a/src/invidious/videos/video_preferences.cr b/src/invidious/videos/video_preferences.cr index 34cf7ff0..48177bd8 100644 --- a/src/invidious/videos/video_preferences.cr +++ b/src/invidious/videos/video_preferences.cr @@ -2,6 +2,7 @@ struct VideoPreferences include JSON::Serializable property annotations : Bool + property preload : Bool property autoplay : Bool property comments : Array(String) property continue : Bool @@ -28,6 +29,7 @@ end def process_video_params(query, preferences) annotations = query["iv_load_policy"]?.try &.to_i? + preload = query["preload"]?.try { |q| (q == "true" || q == "1").to_unsafe } autoplay = query["autoplay"]?.try { |q| (q == "true" || q == "1").to_unsafe } comments = query["comments"]?.try &.split(",").map(&.downcase) continue = query["continue"]?.try { |q| (q == "true" || q == "1").to_unsafe } @@ -50,6 +52,7 @@ def process_video_params(query, preferences) if preferences # region ||= preferences.region annotations ||= preferences.annotations.to_unsafe + preload ||= preferences.preload.to_unsafe autoplay ||= preferences.autoplay.to_unsafe comments ||= preferences.comments continue ||= preferences.continue.to_unsafe @@ -70,6 +73,7 @@ def process_video_params(query, preferences) end annotations ||= CONFIG.default_user_preferences.annotations.to_unsafe + preload ||= CONFIG.default_user_preferences.preload.to_unsafe autoplay ||= CONFIG.default_user_preferences.autoplay.to_unsafe comments ||= CONFIG.default_user_preferences.comments continue ||= CONFIG.default_user_preferences.continue.to_unsafe @@ -89,6 +93,7 @@ def process_video_params(query, preferences) save_player_pos ||= CONFIG.default_user_preferences.save_player_pos.to_unsafe annotations = annotations == 1 + preload = preload == 1 autoplay = autoplay == 1 continue = continue == 1 continue_autoplay = continue_autoplay == 1 @@ -128,6 +133,7 @@ def process_video_params(query, preferences) params = VideoPreferences.new({ annotations: annotations, + preload: preload, autoplay: autoplay, comments: comments, continue: continue, diff --git a/src/invidious/views/components/player.ecr b/src/invidious/views/components/player.ecr index c3c02df0..5c28358b 100644 --- a/src/invidious/views/components/player.ecr +++ b/src/invidious/views/components/player.ecr @@ -1,5 +1,6 @@ <video style="outline:none;width:100%;background-color:#000" playsinline poster="<%= thumbnail %>" id="player" class="on-video_player video-js player-style-<%= params.player_style %>" + preload="<% if params.preload %>auto<% else %>none<% end %>" <% if params.autoplay %>autoplay<% end %> <% if params.video_loop %>loop<% end %> <% if params.controls %>controls<% end %>> diff --git a/src/invidious/views/user/preferences.ecr b/src/invidious/views/user/preferences.ecr index b89c73ca..cf8b5593 100644 --- a/src/invidious/views/user/preferences.ecr +++ b/src/invidious/views/user/preferences.ecr @@ -13,6 +13,11 @@ </div> <div class="pure-control-group"> + <label for="preload"><%= translate(locale, "preferences_preload_label") %></label> + <input name="preload" id="preload" type="checkbox" <% if preferences.preload %>checked<% end %>> + </div> + + <div class="pure-control-group"> <label for="autoplay"><%= translate(locale, "preferences_autoplay_label") %></label> <input name="autoplay" id="autoplay" type="checkbox" <% if preferences.autoplay %>checked<% end %>> </div> diff --git a/src/invidious/yt_backend/connection_pool.cr b/src/invidious/yt_backend/connection_pool.cr index ca612083..c4a73aa7 100644 --- a/src/invidious/yt_backend/connection_pool.cr +++ b/src/invidious/yt_backend/connection_pool.cr @@ -1,17 +1,6 @@ -def add_yt_headers(request) - request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal" - request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" - - request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" - request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - request.headers["Accept-Language"] ||= "en-us,en;q=0.5" - - # Preserve original cookies and add new YT consent cookie for EU servers - request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}" - if !CONFIG.cookies.empty? - request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" - end -end +# Mapping of subdomain => YoutubeConnectionPool +# This is needed as we may need to access arbitrary subdomains of ytimg +private YTIMG_POOLS = {} of String => YoutubeConnectionPool struct YoutubeConnectionPool property! url : URI @@ -26,15 +15,15 @@ struct YoutubeConnectionPool def client(&) conn = pool.checkout + # Proxy needs to be reinstated every time we get a client from the pool + conn.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy + begin response = yield conn rescue ex conn.close - conn = HTTP::Client.new(url) + conn = make_client(url, force_resolve: true) - conn.family = CONFIG.force_resolve - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" response = yield conn ensure pool.release(conn) @@ -44,25 +33,45 @@ struct YoutubeConnectionPool end private def build_pool - DB::Pool(HTTP::Client).new(initial_pool_size: 0, max_pool_size: capacity, max_idle_pool_size: capacity, checkout_timeout: timeout) do - conn = HTTP::Client.new(url) - conn.family = CONFIG.force_resolve - conn.family = Socket::Family::INET if conn.family == Socket::Family::UNSPEC - conn.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" - conn + options = DB::Pool::Options.new( + initial_pool_size: 0, + max_pool_size: capacity, + max_idle_pool_size: capacity, + checkout_timeout: timeout + ) + + DB::Pool(HTTP::Client).new(options) do + next make_client(url, force_resolve: true) end end end -def make_client(url : URI, region = nil, force_resolve : Bool = false) +def add_yt_headers(request) + request.headers.delete("User-Agent") if request.headers["User-Agent"] == "Crystal" + request.headers["User-Agent"] ||= "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36" + + request.headers["Accept-Charset"] ||= "ISO-8859-1,utf-8;q=0.7,*;q=0.7" + request.headers["Accept"] ||= "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + request.headers["Accept-Language"] ||= "en-us,en;q=0.5" + + # Preserve original cookies and add new YT consent cookie for EU servers + request.headers["Cookie"] = "#{request.headers["cookie"]?}; CONSENT=PENDING+#{Random.rand(100..999)}" + if !CONFIG.cookies.empty? + request.headers["Cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" + end +end + +def make_client(url : URI, region = nil, force_resolve : Bool = false, force_youtube_headers : Bool = false) client = HTTP::Client.new(url) + client.proxy = make_configured_http_proxy_client() if CONFIG.http_proxy # Force the usage of a specific configured IP Family if force_resolve client.family = CONFIG.force_resolve + client.family = Socket::Family::INET if client.family == Socket::Family::UNSPEC end - client.before_request { |r| add_yt_headers(r) } if url.host == "www.youtube.com" + client.before_request { |r| add_yt_headers(r) } if url.host.try &.ends_with?("youtube.com") || force_youtube_headers client.read_timeout = 10.seconds client.connect_timeout = 10.seconds @@ -70,10 +79,38 @@ def make_client(url : URI, region = nil, force_resolve : Bool = false) end def make_client(url : URI, region = nil, force_resolve : Bool = false, &) - client = make_client(url, region, force_resolve) + client = make_client(url, region, force_resolve: force_resolve) begin yield client ensure client.close end end + +def make_configured_http_proxy_client + # This method is only called when configuration for an HTTP proxy are set + config_proxy = CONFIG.http_proxy.not_nil! + + return HTTP::Proxy::Client.new( + config_proxy.host, + config_proxy.port, + + username: config_proxy.user, + password: config_proxy.password, + ) +end + +# Fetches a HTTP pool for the specified subdomain of ytimg.com +# +# Creates a new one when the specified pool for the subdomain does not exist +def get_ytimg_pool(subdomain) + if pool = YTIMG_POOLS[subdomain]? + return pool + else + LOGGER.info("ytimg_pool: Creating a new HTTP pool for \"https://#{subdomain}.ytimg.com\"") + pool = YoutubeConnectionPool.new(URI.parse("https://#{subdomain}.ytimg.com"), capacity: CONFIG.pool_size) + YTIMG_POOLS[subdomain] = pool + + return pool + end +end diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index 38dc2c04..2631b62a 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -21,6 +21,7 @@ private ITEM_PARSERS = { Parsers::ItemSectionRendererParser, Parsers::ContinuationItemRendererParser, Parsers::HashtagRendererParser, + Parsers::LockupViewModelParser, } private alias InitialData = Hash(String, JSON::Any) @@ -108,21 +109,30 @@ private module Parsers length_seconds = 0 end - live_now = false - premium = false - premiere_timestamp = item_contents.dig?("upcomingEventData", "startTime").try { |t| Time.unix(t.as_s.to_i64) } - + badges = VideoBadges::None item_contents["badges"]?.try &.as_a.each do |badge| b = badge["metadataBadgeRenderer"] case b["label"].as_s - when "LIVE NOW" - live_now = true - when "New", "4K", "CC" - # TODO + when "LIVE" + badges |= VideoBadges::LiveNow + when "New" + badges |= VideoBadges::New + when "4K" + badges |= VideoBadges::FourK + when "8K" + badges |= VideoBadges::EightK + when "VR180" + badges |= VideoBadges::VR180 + when "360°" + badges |= VideoBadges::VR360 + when "3D" + badges |= VideoBadges::ThreeD + when "CC" + badges |= VideoBadges::ClosedCaptions when "Premium" # TODO: Potentially available as item_contents["topStandaloneBadge"]["metadataBadgeRenderer"] - premium = true + badges |= VideoBadges::Premium else nil # Ignore end end @@ -136,10 +146,9 @@ private module Parsers views: view_count, description_html: description_html, length_seconds: length_seconds, - live_now: live_now, - premium: premium, premiere_timestamp: premiere_timestamp, author_verified: author_verified, + badges: badges, }) end @@ -459,9 +468,9 @@ private module Parsers # Parses an InnerTube richItemRenderer into a SearchVideo. # Returns nil when the given object isn't a RichItemRenderer # - # A richItemRenderer seems to be a simple wrapper for a videoRenderer, used - # by the result page for hashtags and for the podcast tab on channels. - # It is located inside a continuationItems container for hashtags. + # A richItemRenderer seems to be a simple wrapper for a various other types, + # used on the hashtags result page and the channel podcast tab. It is located + # itself inside a richGridRenderer container. # module RichItemRendererParser def self.process(item : JSON::Any, author_fallback : AuthorFallback) @@ -474,6 +483,8 @@ private module Parsers child = VideoRendererParser.process(item_contents, author_fallback) child ||= ReelItemRendererParser.process(item_contents, author_fallback) child ||= PlaylistRendererParser.process(item_contents, author_fallback) + child ||= LockupViewModelParser.process(item_contents, author_fallback) + child ||= ShortsLockupViewModelParser.process(item_contents, author_fallback) return child end @@ -488,6 +499,9 @@ private module Parsers # reelItemRenderer items are used in the new (2022) channel layout, # in the "shorts" tab. # + # NOTE: As of 10/2024, it might have been fully replaced by shortsLockupViewModel + # TODO: Confirm that hypothesis + # module ReelItemRendererParser def self.process(item : JSON::Any, author_fallback : AuthorFallback) if item_contents = item["reelItemRenderer"]? @@ -563,10 +577,138 @@ private module Parsers views: view_count, description_html: "", length_seconds: duration, - live_now: false, - premium: false, premiere_timestamp: Time.unix(0), author_verified: false, + badges: VideoBadges::None, + }) + end + + def self.parser_name + return {{@type.name}} + end + end + + # Parses an InnerTube lockupViewModel into a SearchPlaylist. + # Returns nil when the given object is not a lockupViewModel. + # + # This structure is present since November 2024 on the "podcasts" and + # "playlists" tabs of the channel page. It is usually encapsulated in either + # a richItemRenderer or a richGridRenderer. + # + module LockupViewModelParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["lockupViewModel"]? + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + playlist_id = item_contents["contentId"].as_s + + thumbnail_view_model = item_contents.dig( + "contentImage", "collectionThumbnailViewModel", + "primaryThumbnail", "thumbnailViewModel" + ) + + thumbnail = thumbnail_view_model.dig("image", "sources", 0, "url").as_s + + # This complicated sequences tries to extract the following data structure: + # "overlays": [{ + # "thumbnailOverlayBadgeViewModel": { + # "thumbnailBadges": [{ + # "thumbnailBadgeViewModel": { + # "text": "430 episodes", + # "badgeStyle": "THUMBNAIL_OVERLAY_BADGE_STYLE_DEFAULT" + # } + # }] + # } + # }] + # + # NOTE: this simplistic `.to_i` conversion might not work on larger + # playlists and hasn't been tested. + video_count = thumbnail_view_model.dig("overlays").as_a + .compact_map(&.dig?("thumbnailOverlayBadgeViewModel", "thumbnailBadges").try &.as_a) + .flatten + .find(nil, &.dig?("thumbnailBadgeViewModel", "text").try { |node| + {"episodes", "videos"}.any? { |str| node.as_s.ends_with?(str) } + }) + .try &.dig("thumbnailBadgeViewModel", "text").as_s.to_i(strict: false) + + metadata = item_contents.dig("metadata", "lockupMetadataViewModel") + title = metadata.dig("title", "content").as_s + + # TODO: Retrieve "updated" info from metadata parts + # rows = metadata.dig("metadata", "contentMetadataViewModel", "metadataRows").as_a + # parts_text = rows.map(&.dig?("metadataParts", "text", "content").try &.as_s) + # One of these parts should contain a string like: "Updated 2 days ago" + + # TODO: Maybe add a button to access the first video of the playlist? + # item_contents.dig("rendererContext", "commandContext", "onTap", "innertubeCommand", "watchEndpoint") + # Available fields: "videoId", "playlistId", "params" + + return SearchPlaylist.new({ + title: title, + id: playlist_id, + author: author_fallback.name, + ucid: author_fallback.id, + video_count: video_count || -1, + videos: [] of SearchPlaylistVideo, + thumbnail: thumbnail, + author_verified: false, + }) + end + + def self.parser_name + return {{@type.name}} + end + end + + # Parses an InnerTube shortsLockupViewModel into a SearchVideo. + # Returns nil when the given object is not a shortsLockupViewModel. + # + # This structure is present since around October 2024 on the "shorts" tab of + # the channel page and likely replaces the reelItemRenderer structure. It is + # usually (always?) encapsulated in a richItemRenderer. + # + module ShortsLockupViewModelParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item["shortsLockupViewModel"]? + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + # TODO: Maybe add support for "oardefault.jpg" thumbnails? + # thumbnail = item_contents.dig("thumbnail", "sources", 0, "url").as_s + # Gives: https://i.ytimg.com/vi/{video_id}/oardefault.jpg?... + + video_id = item_contents.dig( + "onTap", "innertubeCommand", "reelWatchEndpoint", "videoId" + ).as_s + + title = item_contents.dig("overlayMetadata", "primaryText", "content").as_s + + view_count = short_text_to_number( + item_contents.dig("overlayMetadata", "secondaryText", "content").as_s + ) + + # Approximate to one minute, as "shorts" generally don't exceed that. + # NOTE: The actual duration is not provided by Youtube anymore. + # TODO: Maybe use -1 as an error value and handle that on the frontend? + duration = 60_i32 + + SearchVideo.new({ + title: title, + id: video_id, + author: author_fallback.name, + ucid: author_fallback.id, + published: Time.unix(0), + views: view_count, + description_html: "", + length_seconds: duration, + premiere_timestamp: Time.unix(0), + author_verified: false, + badges: VideoBadges::None, }) end diff --git a/src/invidious/yt_backend/url_sanitizer.cr b/src/invidious/yt_backend/url_sanitizer.cr index 725382ee..d539dadb 100644 --- a/src/invidious/yt_backend/url_sanitizer.cr +++ b/src/invidious/yt_backend/url_sanitizer.cr @@ -111,7 +111,7 @@ module UrlSanitizer new_uri.path = "/watch" new_params = copy_params(unsafe_uri.query_params, :watch) - new_params["id"] = breadcrumbs[0] + new_params["v"] = breadcrumbs[0] new_uri.query_params = new_params end diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index baa3cd92..e0a3181f 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -638,6 +638,11 @@ module YoutubeAPI # Send the POST request body = YT_POOL.client() do |client| client.post(url, headers: headers, body: data.to_json) do |response| + if response.status_code != 200 + raise InfoException.new("Error: non 200 status code. Youtube API returned \ + status code #{response.status_code}. See <a href=\"https://docs.invidious.io/youtube-errors-explained/\"> \ + https://docs.invidious.io/youtube-errors-explained/</a> for troubleshooting.") + end self._decompress(response.body_io, response.headers["Content-Encoding"]?) end end |
