diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/invidious/routes/api/v1/videos.cr | 9 | ||||
| -rw-r--r-- | src/invidious/videos/transcript.cr | 111 | ||||
| -rw-r--r-- | src/invidious/views/channel.ecr | 4 | ||||
| -rw-r--r-- | src/invidious/views/watch.ecr | 2 | ||||
| -rw-r--r-- | src/invidious/yt_backend/youtube_api.cr | 23 |
5 files changed, 91 insertions, 58 deletions
diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr index 9281f4dd..faff2f59 100644 --- a/src/invidious/routes/api/v1/videos.cr +++ b/src/invidious/routes/api/v1/videos.cr @@ -89,9 +89,14 @@ module Invidious::Routes::API::V1::Videos if CONFIG.use_innertube_for_captions params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated) - initial_data = YoutubeAPI.get_transcript(params) - webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code) + transcript = Invidious::Videos::Transcript.from_raw( + YoutubeAPI.get_transcript(params), + caption.language_code, + caption.auto_generated + ) + + webvtt = transcript.to_vtt else # Timedtext API handling url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr index dac00eea..9cd064c5 100644 --- a/src/invidious/videos/transcript.cr +++ b/src/invidious/videos/transcript.cr @@ -1,8 +1,26 @@ module Invidious::Videos - # Namespace for methods primarily relating to Transcripts - module Transcript - record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String + # A `Transcripts` struct encapsulates a sequence of lines that together forms the whole transcript for a given YouTube video. + # These lines can be categorized into two types: section headings and regular lines representing content from the video. + struct Transcript + # Types + record HeadingLine, start_ms : Time::Span, end_ms : Time::Span, line : String + record RegularLine, start_ms : Time::Span, end_ms : Time::Span, line : String + alias TranscriptLine = HeadingLine | RegularLine + property lines : Array(TranscriptLine) + + property language_code : String + property auto_generated : Bool + + # User friendly label for the current transcript. + # Example: "English (auto-generated)" + property label : String + + # Initializes a new Transcript struct with the contents and associated metadata describing it + def initialize(@lines : Array(TranscriptLine), @language_code : String, @auto_generated : Bool, @label : String) + end + + # Generates a protobuf string to fetch the requested transcript from YouTube def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String kind = auto_generated ? "asr" : "" @@ -30,48 +48,79 @@ module Invidious::Videos return params end - def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String - # Convert into array of TranscriptLine - lines = self.parse(initial_data) + # Constructs a Transcripts struct from the initial YouTube response + def self.from_raw(initial_data : Hash(String, JSON::Any), language_code : String, auto_generated : Bool) + transcript_panel = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", + "content", "transcriptSearchPanelRenderer") - settings_field = { - "Kind" => "captions", - "Language" => target_language, - } + segment_list = transcript_panel.dig("body", "transcriptSegmentListRenderer") - # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt() - vtt = WebVTT.build(settings_field) do |vtt| - lines.each do |line| - vtt.cue(line.start_ms, line.end_ms, line.line) - end + if !segment_list["initialSegments"]? + raise NotFoundException.new("Requested transcript does not exist") end - return vtt - end + # Extract user-friendly label for the current transcript + + footer_language_menu = transcript_panel.dig?( + "footer", "transcriptFooterRenderer", "languageMenu", "sortFilterSubMenuRenderer", "subMenuItems" + ) - private def self.parse(initial_data : Hash(String, JSON::Any)) - body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer", - "content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer", - "initialSegments").as_a + if footer_language_menu + label = footer_language_menu.as_a.select(&.["selected"].as_bool)[0]["title"].as_s + else + label = language_code + end + + # Extract transcript lines + + initial_segments = segment_list["initialSegments"].as_a lines = [] of TranscriptLine - body.each do |line| - # Transcript section headers. They are not apart of the captions and as such we can safely skip them. - if line.as_h.has_key?("transcriptSectionHeaderRenderer") - next + + initial_segments.each do |line| + if unpacked_line = line["transcriptSectionHeaderRenderer"]? + line_type = HeadingLine + else + unpacked_line = line["transcriptSegmentRenderer"] + line_type = RegularLine end - line = line["transcriptSegmentRenderer"] + start_ms = unpacked_line["startMs"].as_s.to_i.millisecond + end_ms = unpacked_line["endMs"].as_s.to_i.millisecond + text = extract_text(unpacked_line["snippet"]) || "" + + lines << line_type.new(start_ms, end_ms, text) + end + + return Transcript.new( + lines: lines, + language_code: language_code, + auto_generated: auto_generated, + label: label + ) + end - start_ms = line["startMs"].as_s.to_i.millisecond - end_ms = line["endMs"].as_s.to_i.millisecond + # Converts transcript lines to a WebVTT file + # + # This is used within Invidious to replace subtitles + # as to workaround YouTube's rate-limited timedtext endpoint. + def to_vtt + settings_field = { + "Kind" => "captions", + "Language" => @language_code, + } - text = extract_text(line["snippet"]) || "" + vtt = WebVTT.build(settings_field) do |vtt| + @lines.each do |line| + # Section headers are excluded from the VTT conversion as to + # match the regular captions returned from YouTube as much as possible + next if line.is_a? HeadingLine - lines << TranscriptLine.new(start_ms, end_ms, text) + vtt.cue(line.start_ms, line.end_ms, line.line) + end end - return lines + return vtt end end end diff --git a/src/invidious/views/channel.ecr b/src/invidious/views/channel.ecr index 09df106d..a84e44bc 100644 --- a/src/invidious/views/channel.ecr +++ b/src/invidious/views/channel.ecr @@ -30,13 +30,13 @@ <meta property="og:site_name" content="Invidious"> <meta property="og:url" content="<%= HOST_URL %>/channel/<%= ucid %>"> <meta property="og:title" content="<%= author %>"> -<meta property="og:image" content="/ggpht<%= channel_profile_pic %>"> +<meta property="og:image" content="<%= HOST_URL %>/ggpht<%= channel_profile_pic %>"> <meta property="og:description" content="<%= channel.description %>"> <meta name="twitter:card" content="summary"> <meta name="twitter:url" content="<%= HOST_URL %>/channel/<%= ucid %>"> <meta name="twitter:title" content="<%= author %>"> <meta name="twitter:description" content="<%= channel.description %>"> -<meta name="twitter:image" content="/ggpht<%= channel_profile_pic %>"> +<meta name="twitter:image" content="<%= HOST_URL %>/ggpht<%= channel_profile_pic %>"> <link rel="alternate" type="application/rss+xml" title="RSS" href="/feed/channel/<%= ucid %>" /> <%- end -%> diff --git a/src/invidious/views/watch.ecr b/src/invidious/views/watch.ecr index 7a1cf2c3..9e7467dd 100644 --- a/src/invidious/views/watch.ecr +++ b/src/invidious/views/watch.ecr @@ -10,7 +10,7 @@ <meta property="og:site_name" content="<%= author %> | Invidious"> <meta property="og:url" content="<%= HOST_URL %>/watch?v=<%= video.id %>"> <meta property="og:title" content="<%= title %>"> -<meta property="og:image" content="/vi/<%= video.id %>/maxres.jpg"> +<meta property="og:image" content="<%= HOST_URL %>/vi/<%= video.id %>/maxres.jpg"> <meta property="og:description" content="<%= HTML.escape(video.short_description) %>"> <meta property="og:type" content="video.other"> <meta property="og:video:url" content="<%= HOST_URL %>/embed/<%= video.id %>"> diff --git a/src/invidious/yt_backend/youtube_api.cr b/src/invidious/yt_backend/youtube_api.cr index 727ce9a3..c8b037c8 100644 --- a/src/invidious/yt_backend/youtube_api.cr +++ b/src/invidious/yt_backend/youtube_api.cr @@ -5,9 +5,6 @@ module YoutubeAPI extend self - private DEFAULT_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8" - private ANDROID_API_KEY = "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w" - # For Android versions, see https://en.wikipedia.org/wiki/Android_version_history private ANDROID_APP_VERSION = "19.14.42" private ANDROID_USER_AGENT = "com.google.android.youtube/19.14.42 (Linux; U; Android 12; US) gzip" @@ -52,7 +49,6 @@ module YoutubeAPI name: "WEB", name_proto: "1", version: "2.20240304.00.00", - api_key: DEFAULT_API_KEY, screen: "WATCH_FULL_SCREEN", os_name: "Windows", os_version: WINDOWS_VERSION, @@ -62,7 +58,6 @@ module YoutubeAPI name: "WEB_EMBEDDED_PLAYER", name_proto: "56", version: "1.20240303.00.00", - api_key: DEFAULT_API_KEY, screen: "EMBED", os_name: "Windows", os_version: WINDOWS_VERSION, @@ -72,7 +67,6 @@ module YoutubeAPI name: "MWEB", name_proto: "2", version: "2.20240304.08.00", - api_key: DEFAULT_API_KEY, os_name: "Android", os_version: ANDROID_VERSION, platform: "MOBILE", @@ -81,7 +75,6 @@ module YoutubeAPI name: "WEB", name_proto: "1", version: "2.20240304.00.00", - api_key: DEFAULT_API_KEY, screen: "EMBED", os_name: "Windows", os_version: WINDOWS_VERSION, @@ -94,7 +87,6 @@ module YoutubeAPI name: "ANDROID", name_proto: "3", version: ANDROID_APP_VERSION, - api_key: ANDROID_API_KEY, android_sdk_version: ANDROID_SDK_VERSION, user_agent: ANDROID_USER_AGENT, os_name: "Android", @@ -105,13 +97,11 @@ module YoutubeAPI name: "ANDROID_EMBEDDED_PLAYER", name_proto: "55", version: ANDROID_APP_VERSION, - api_key: "AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw", }, ClientType::AndroidScreenEmbed => { name: "ANDROID", name_proto: "3", version: ANDROID_APP_VERSION, - api_key: DEFAULT_API_KEY, screen: "EMBED", android_sdk_version: ANDROID_SDK_VERSION, user_agent: ANDROID_USER_AGENT, @@ -123,7 +113,6 @@ module YoutubeAPI name: "ANDROID_TESTSUITE", name_proto: "30", version: ANDROID_TS_APP_VERSION, - api_key: ANDROID_API_KEY, android_sdk_version: ANDROID_SDK_VERSION, user_agent: ANDROID_TS_USER_AGENT, os_name: "Android", @@ -137,7 +126,6 @@ module YoutubeAPI name: "IOS", name_proto: "5", version: IOS_APP_VERSION, - api_key: "AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc", user_agent: IOS_USER_AGENT, device_make: "Apple", device_model: "iPhone14,5", @@ -149,7 +137,6 @@ module YoutubeAPI name: "IOS_MESSAGES_EXTENSION", name_proto: "66", version: IOS_APP_VERSION, - api_key: DEFAULT_API_KEY, user_agent: IOS_USER_AGENT, device_make: "Apple", device_model: "iPhone14,5", @@ -161,7 +148,6 @@ module YoutubeAPI name: "IOS_MUSIC", name_proto: "26", version: "6.42", - api_key: "AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s", user_agent: "com.google.ios.youtubemusic/6.42 (iPhone14,5; U; CPU iOS 17_4 like Mac OS X;)", device_make: "Apple", device_model: "iPhone14,5", @@ -176,13 +162,11 @@ module YoutubeAPI name: "TVHTML5", name_proto: "7", version: "7.20240304.10.00", - api_key: DEFAULT_API_KEY, }, ClientType::TvHtml5ScreenEmbed => { name: "TVHTML5_SIMPLY_EMBEDDED_PLAYER", name_proto: "85", version: "2.0", - api_key: DEFAULT_API_KEY, screen: "EMBED", }, } @@ -238,11 +222,6 @@ module YoutubeAPI end # :ditto: - def api_key : String - HARDCODED_CLIENTS[@client_type][:api_key] - end - - # :ditto: def screen : String HARDCODED_CLIENTS[@client_type][:screen]? || "" end @@ -606,7 +585,7 @@ module YoutubeAPI client_config ||= DEFAULT_CLIENT_CONFIG # Query parameters - url = "#{endpoint}?key=#{client_config.api_key}&prettyPrint=false" + url = "#{endpoint}?prettyPrint=false" headers = HTTP::Headers{ "Content-Type" => "application/json; charset=UTF-8", |
