summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/invidious/routes/api/v1/videos.cr9
-rw-r--r--src/invidious/videos/transcript.cr86
2 files changed, 61 insertions, 34 deletions
diff --git a/src/invidious/routes/api/v1/videos.cr b/src/invidious/routes/api/v1/videos.cr
index 9281f4dd..faff2f59 100644
--- a/src/invidious/routes/api/v1/videos.cr
+++ b/src/invidious/routes/api/v1/videos.cr
@@ -89,9 +89,14 @@ module Invidious::Routes::API::V1::Videos
if CONFIG.use_innertube_for_captions
params = Invidious::Videos::Transcript.generate_param(id, caption.language_code, caption.auto_generated)
- initial_data = YoutubeAPI.get_transcript(params)
- webvtt = Invidious::Videos::Transcript.convert_transcripts_to_vtt(initial_data, caption.language_code)
+ transcript = Invidious::Videos::Transcript.from_raw(
+ YoutubeAPI.get_transcript(params),
+ caption.language_code,
+ caption.auto_generated
+ )
+
+ webvtt = transcript.to_vtt
else
# Timedtext API handling
url = URI.parse("#{caption.base_url}&tlang=#{tlang}").request_target
diff --git a/src/invidious/videos/transcript.cr b/src/invidious/videos/transcript.cr
index dac00eea..42f29f46 100644
--- a/src/invidious/videos/transcript.cr
+++ b/src/invidious/videos/transcript.cr
@@ -1,8 +1,21 @@
module Invidious::Videos
- # Namespace for methods primarily relating to Transcripts
- module Transcript
- record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String
+ # A `Transcripts` struct encapsulates a sequence of lines that together forms the whole transcript for a given YouTube video.
+ # These lines can be categorized into two types: section headings and regular lines representing content from the video.
+ struct Transcript
+ # Types
+ record HeadingLine, start_ms : Time::Span, end_ms : Time::Span, line : String
+ record RegularLine, start_ms : Time::Span, end_ms : Time::Span, line : String
+ alias TranscriptLine = HeadingLine | RegularLine
+
+ property lines : Array(TranscriptLine)
+ property language_code : String
+ property auto_generated : Bool
+
+ # Initializes a new Transcript struct with the contents and associated metadata describing it
+ def initialize(@lines : Array(TranscriptLine), @language_code : String, @auto_generated : Bool)
+ end
+ # Generates a protobuf string to fetch the requested transcript from YouTube
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
kind = auto_generated ? "asr" : ""
@@ -30,48 +43,57 @@ module Invidious::Videos
return params
end
- def self.convert_transcripts_to_vtt(initial_data : Hash(String, JSON::Any), target_language : String) : String
- # Convert into array of TranscriptLine
- lines = self.parse(initial_data)
-
- settings_field = {
- "Kind" => "captions",
- "Language" => target_language,
- }
-
- # Taken from Invidious::Videos::Captions::Metadata.timedtext_to_vtt()
- vtt = WebVTT.build(settings_field) do |vtt|
- lines.each do |line|
- vtt.cue(line.start_ms, line.end_ms, line.line)
- end
- end
-
- return vtt
- end
-
- private def self.parse(initial_data : Hash(String, JSON::Any))
+ # Constructs a Transcripts struct from the initial YouTube response
+ def self.from_raw(initial_data : Hash(String, JSON::Any), language_code : String, auto_generated : Bool)
body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer",
"content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer",
"initialSegments").as_a
lines = [] of TranscriptLine
+
body.each do |line|
- # Transcript section headers. They are not apart of the captions and as such we can safely skip them.
- if line.as_h.has_key?("transcriptSectionHeaderRenderer")
- next
+ if unpacked_line = line["transcriptSectionHeaderRenderer"]?
+ line_type = HeadingLine
+ else
+ unpacked_line = line["transcriptSegmentRenderer"]
+ line_type = RegularLine
end
- line = line["transcriptSegmentRenderer"]
+ start_ms = unpacked_line["startMs"].as_s.to_i.millisecond
+ end_ms = unpacked_line["endMs"].as_s.to_i.millisecond
+ text = extract_text(unpacked_line["snippet"]) || ""
- start_ms = line["startMs"].as_s.to_i.millisecond
- end_ms = line["endMs"].as_s.to_i.millisecond
+ lines << line_type.new(start_ms, end_ms, text)
+ end
- text = extract_text(line["snippet"]) || ""
+ return Transcript.new(
+ lines: lines,
+ language_code: language_code,
+ auto_generated: auto_generated,
+ )
+ end
- lines << TranscriptLine.new(start_ms, end_ms, text)
+ # Converts transcript lines to a WebVTT file
+ #
+ # This is used within Invidious to replace subtitles
+ # as to workaround YouTube's rate-limited timedtext endpoint.
+ def to_vtt
+ settings_field = {
+ "Kind" => "captions",
+ "Language" => @language_code,
+ }
+
+ vtt = WebVTT.build(settings_field) do |vtt|
+ @lines.each do |line|
+ # Section headers are excluded from the VTT conversion as to
+ # match the regular captions returned from YouTube as much as possible
+ next if line.is_a? HeadingLine
+
+ vtt.cue(line.start_ms, line.end_ms, line.line)
+ end
end
- return lines
+ return vtt
end
end
end