summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/invidious/channels/videos.cr35
-rw-r--r--src/invidious/frontend/watch_page.cr2
-rw-r--r--src/invidious/videos/description.cr105
-rw-r--r--src/invidious/videos/parser.cr11
-rw-r--r--src/invidious/yt_backend/extractors.cr81
5 files changed, 174 insertions, 60 deletions
diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr
index befec03d..12ed4a7d 100644
--- a/src/invidious/channels/videos.cr
+++ b/src/invidious/channels/videos.cr
@@ -30,7 +30,9 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
"15:embedded" => {
"1:embedded" => {
"1:string" => object_inner_2_encoded,
- "2:string" => "00000000-0000-0000-0000-000000000000",
+ },
+ "2:embedded" => {
+ "1:string" => "00000000-0000-0000-0000-000000000000",
},
"3:varint" => sort_by_numerical,
},
@@ -127,38 +129,15 @@ module Invidious::Channel::Tabs
# Shorts
# -------------------
- private def fetch_shorts_data(ucid : String, continuation : String? = nil)
+ def get_shorts(channel : AboutChannel, continuation : String? = nil)
if continuation.nil?
# EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts"
# TODO: try to extract the continuation tokens that allows other sorting options
- return YoutubeAPI.browse(ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D")
+ initial_data = YoutubeAPI.browse(channel.ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D")
else
- return YoutubeAPI.browse(continuation: continuation)
- end
- end
-
- def get_shorts(channel : AboutChannel, continuation : String? = nil)
- initial_data = self.fetch_shorts_data(channel.ucid, continuation)
-
- begin
- # Try to parse the initial data fetched above
- return extract_items(initial_data, channel.author, channel.ucid)
- rescue ex : RetryOnceException
- # Sometimes, for a completely unknown reason, the "reelItemRenderer"
- # object is missing some critical information (it happens once in about
- # 20 subsequent requests). Refreshing the page is required to properly
- # show the "shorts" tab.
- #
- # In order to make the experience smoother for the user, we simulate
- # said page refresh by fetching again the JSON. If that still doesn't
- # work, we raise a BrokenTubeException, as something is really broken.
- begin
- initial_data = self.fetch_shorts_data(channel.ucid, continuation)
- return extract_items(initial_data, channel.author, channel.ucid)
- rescue ex : RetryOnceException
- raise BrokenTubeException.new "reelPlayerHeaderSupportedRenderers"
- end
+ initial_data = YoutubeAPI.browse(continuation: continuation)
end
+ return extract_items(initial_data, channel.author, channel.ucid)
end
# -------------------
diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr
index a9b00860..e3214469 100644
--- a/src/invidious/frontend/watch_page.cr
+++ b/src/invidious/frontend/watch_page.cr
@@ -20,7 +20,7 @@ module Invidious::Frontend::WatchPage
def download_widget(locale : String, video : Video, video_assets : VideoAssets) : String
if CONFIG.disabled?("downloads")
- return "<p id=\"download\">#{translate(locale, "Download is disabled.")}</p>"
+ return "<p id=\"download\">#{translate(locale, "Download is disabled")}</p>"
end
return String.build(4000) do |str|
diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr
new file mode 100644
index 00000000..2017955d
--- /dev/null
+++ b/src/invidious/videos/description.cr
@@ -0,0 +1,105 @@
+require "json"
+require "uri"
+
+def parse_command(command : JSON::Any?, string : String) : String?
+ on_tap = command.dig?("onTap", "innertubeCommand")
+
+ # 3rd party URL, extract original URL from YouTube tracking URL
+ if url_endpoint = on_tap.try &.["urlEndpoint"]?
+ youtube_url = URI.parse url_endpoint["url"].as_s
+
+ original_url = youtube_url.query_params["q"]?
+ if original_url.nil?
+ return ""
+ else
+ return "<a href=\"#{original_url}\">#{original_url}</a>"
+ end
+ # 1st party watch URL
+ elsif watch_endpoint = on_tap.try &.["watchEndpoint"]?
+ video_id = watch_endpoint["videoId"].as_s
+ time = watch_endpoint["startTimeSeconds"].as_i
+
+ url = "/watch?v=#{video_id}&t=#{time}s"
+
+ # if string is a timestamp, use the string instead
+ # this is a lazy regex for validating timestamps
+ if /(?:\d{1,2}:){1,2}\d{2}/ =~ string
+ return "<a href=\"#{url}\">#{string}</a>"
+ else
+ return "<a href=\"#{url}\">#{url}</a>"
+ end
+ # hashtag/other browse URLs
+ elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata")
+ url = browse_endpoint["url"].try &.as_s
+
+ # remove unnecessary character in a channel name
+ if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL"
+ name = string.match(/@[\w\d.-]+/)
+ if name.try &.[0]?
+ return "<a href=\"#{url}\">#{name.try &.[0]}</a>"
+ end
+ end
+
+ return "<a href=\"#{url}\">#{string}</a>"
+ end
+
+ return "(unknown YouTube desc command)"
+end
+
+private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int
+ copied = 0
+ while copied < count
+ cp = iter.next
+ break if cp.is_a?(Iterator::Stop)
+
+ str << cp.chr
+
+ # A codepoint from the SMP counts twice
+ copied += 1 if cp > 0xFFFF
+ copied += 1
+ end
+
+ return copied
+end
+
+def parse_description(desc : JSON::Any?) : String?
+ return "" if desc.nil?
+
+ content = desc["content"].as_s
+ return "" if content.empty?
+
+ commands = desc["commandRuns"]?.try &.as_a
+ return content if commands.nil?
+
+ # Not everything is stored in UTF-8 on youtube's side. The SMP codepoints
+ # (0x10000 and above) are encoded as UTF-16 surrogate pairs, which are
+ # automatically decoded by the JSON parser. It means that we need to count
+ # copied byte in a special manner, preventing the use of regular string copy.
+ iter = content.each_codepoint
+
+ index = 0
+
+ return String.build do |str|
+ commands.each do |command|
+ cmd_start = command["startIndex"].as_i
+ cmd_length = command["length"].as_i
+
+ # Copy the text chunk between this command and the previous if needed.
+ length = cmd_start - index
+ index += copy_string(str, iter, length)
+
+ # We need to copy the command's text using the iterator
+ # and the special function defined above.
+ cmd_content = String.build(cmd_length) do |str2|
+ copy_string(str2, iter, cmd_length)
+ end
+
+ str << parse_command(command, cmd_content)
+ index += cmd_length
+ end
+
+ # Copy the end of the string (past the last command).
+ remaining_length = content.size - index
+ copy_string(str, iter, remaining_length) if remaining_length > 0
+ end
+end
diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr
index 608ae99d..1c6d118d 100644
--- a/src/invidious/videos/parser.cr
+++ b/src/invidious/videos/parser.cr
@@ -284,8 +284,10 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
description = microformat.dig?("description", "simpleText").try &.as_s || ""
short_description = player_response.dig?("videoDetails", "shortDescription")
- description_html = video_secondary_renderer.try &.dig?("description", "runs")
- .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
+ # description_html = video_secondary_renderer.try &.dig?("description", "runs")
+ # .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
+
+ description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"))
# Video metadata
@@ -330,7 +332,10 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
# Used when the video has multiple songs
if song_title = music_desc.dig?("carouselLockupRenderer", "videoLockup", "compactVideoRenderer", "title")
# "simpleText" for plain text / "runs" when song has a link
- song = song_title["simpleText"]? || song_title.dig("runs", 0, "text")
+ song = song_title["simpleText"]? || song_title.dig?("runs", 0, "text")
+
+ # some videos can have empty tracks. See: https://www.youtube.com/watch?v=eBGIQ7ZuuiU
+ next if !song
end
music_desc.dig?("carouselLockupRenderer", "infoRows").try &.as_a.each do |desc|
diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr
index b14ad7b9..9c041361 100644
--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@@ -18,6 +18,7 @@ private ITEM_PARSERS = {
Parsers::CategoryRendererParser,
Parsers::RichItemRendererParser,
Parsers::ReelItemRendererParser,
+ Parsers::ItemSectionRendererParser,
Parsers::ContinuationItemRendererParser,
}
@@ -377,6 +378,30 @@ private module Parsers
end
end
+ # Parses an InnerTube itemSectionRenderer into a SearchVideo.
+ # Returns nil when the given object isn't a ItemSectionRenderer
+ #
+ # A itemSectionRenderer seems to be a simple wrapper for a videoRenderer, used
+ # by the result page for channel searches. It is located inside a continuationItems
+ # container.It is very similar to RichItemRendererParser
+ #
+ module ItemSectionRendererParser
+ def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+ if item_contents = item.dig?("itemSectionRenderer", "contents", 0)
+ return self.parse(item_contents, author_fallback)
+ end
+ end
+
+ private def self.parse(item_contents, author_fallback)
+ child = VideoRendererParser.process(item_contents, author_fallback)
+ return child
+ end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
+ end
+
# Parses an InnerTube richItemRenderer into a SearchVideo.
# Returns nil when the given object isn't a RichItemRenderer
#
@@ -423,44 +448,43 @@ private module Parsers
"overlay", "reelPlayerOverlayRenderer"
)
- # Sometimes, the "reelPlayerOverlayRenderer" object is missing the
- # important part of the response. We use this exception to tell
- # the calling function to fetch the content again.
- if !reel_player_overlay.as_h.has_key?("reelPlayerHeaderSupportedRenderers")
- raise RetryOnceException.new
- end
-
- video_details_container = reel_player_overlay.dig(
- "reelPlayerHeaderSupportedRenderers",
- "reelPlayerHeaderRenderer"
- )
-
- # Author infos
+ if video_details_container = reel_player_overlay.dig?(
+ "reelPlayerHeaderSupportedRenderers",
+ "reelPlayerHeaderRenderer"
+ )
+ # Author infos
- author = video_details_container
- .dig?("channelTitleText", "runs", 0, "text")
- .try &.as_s || author_fallback.name
+ author = video_details_container
+ .dig?("channelTitleText", "runs", 0, "text")
+ .try &.as_s || author_fallback.name
- ucid = video_details_container
- .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId")
- .try &.as_s || author_fallback.id
+ ucid = video_details_container
+ .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId")
+ .try &.as_s || author_fallback.id
- # Title & publication date
+ # Title & publication date
- title = video_details_container.dig?("reelTitleText")
- .try { |t| extract_text(t) } || ""
+ title = video_details_container.dig?("reelTitleText")
+ .try { |t| extract_text(t) } || ""
- published = video_details_container
- .dig?("timestampText", "simpleText")
- .try { |t| decode_date(t.as_s) } || Time.utc
+ published = video_details_container
+ .dig?("timestampText", "simpleText")
+ .try { |t| decode_date(t.as_s) } || Time.utc
+ # View count
+ view_count_text = video_details_container.dig?("viewCountText", "simpleText")
+ else
+ author = author_fallback.name
+ ucid = author_fallback.id
+ published = Time.utc
+ title = item_contents.dig?("headline", "simpleText").try &.as_s || ""
+ end
# View count
# View count used to be in the reelWatchEndpoint, but that changed?
- view_count_text = item_contents.dig?("viewCountText", "simpleText")
- view_count_text ||= video_details_container.dig?("viewCountText", "simpleText")
+ view_count_text ||= item_contents.dig?("viewCountText", "simpleText")
- view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+ view_count = short_text_to_number(view_count_text.try &.as_s || "0")
# Duration
@@ -773,6 +797,7 @@ end
def extract_items(initial_data : InitialData, &block)
if unpackaged_data = initial_data["contents"]?.try &.as_h
elsif unpackaged_data = initial_data["response"]?.try &.as_h
+ elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 1).try &.as_h
elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h
else
unpackaged_data = initial_data