diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/invidious/channels/videos.cr | 35 | ||||
| -rw-r--r-- | src/invidious/frontend/watch_page.cr | 2 | ||||
| -rw-r--r-- | src/invidious/videos/description.cr | 105 | ||||
| -rw-r--r-- | src/invidious/videos/parser.cr | 11 | ||||
| -rw-r--r-- | src/invidious/yt_backend/extractors.cr | 81 |
5 files changed, 174 insertions, 60 deletions
diff --git a/src/invidious/channels/videos.cr b/src/invidious/channels/videos.cr index befec03d..12ed4a7d 100644 --- a/src/invidious/channels/videos.cr +++ b/src/invidious/channels/videos.cr @@ -30,7 +30,9 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so "15:embedded" => { "1:embedded" => { "1:string" => object_inner_2_encoded, - "2:string" => "00000000-0000-0000-0000-000000000000", + }, + "2:embedded" => { + "1:string" => "00000000-0000-0000-0000-000000000000", }, "3:varint" => sort_by_numerical, }, @@ -127,38 +129,15 @@ module Invidious::Channel::Tabs # Shorts # ------------------- - private def fetch_shorts_data(ucid : String, continuation : String? = nil) + def get_shorts(channel : AboutChannel, continuation : String? = nil) if continuation.nil? # EgZzaG9ydHPyBgUKA5oBAA%3D%3D is the protobuf object to load "shorts" # TODO: try to extract the continuation tokens that allows other sorting options - return YoutubeAPI.browse(ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D") + initial_data = YoutubeAPI.browse(channel.ucid, params: "EgZzaG9ydHPyBgUKA5oBAA%3D%3D") else - return YoutubeAPI.browse(continuation: continuation) - end - end - - def get_shorts(channel : AboutChannel, continuation : String? = nil) - initial_data = self.fetch_shorts_data(channel.ucid, continuation) - - begin - # Try to parse the initial data fetched above - return extract_items(initial_data, channel.author, channel.ucid) - rescue ex : RetryOnceException - # Sometimes, for a completely unknown reason, the "reelItemRenderer" - # object is missing some critical information (it happens once in about - # 20 subsequent requests). Refreshing the page is required to properly - # show the "shorts" tab. - # - # In order to make the experience smoother for the user, we simulate - # said page refresh by fetching again the JSON. If that still doesn't - # work, we raise a BrokenTubeException, as something is really broken. - begin - initial_data = self.fetch_shorts_data(channel.ucid, continuation) - return extract_items(initial_data, channel.author, channel.ucid) - rescue ex : RetryOnceException - raise BrokenTubeException.new "reelPlayerHeaderSupportedRenderers" - end + initial_data = YoutubeAPI.browse(continuation: continuation) end + return extract_items(initial_data, channel.author, channel.ucid) end # ------------------- diff --git a/src/invidious/frontend/watch_page.cr b/src/invidious/frontend/watch_page.cr index a9b00860..e3214469 100644 --- a/src/invidious/frontend/watch_page.cr +++ b/src/invidious/frontend/watch_page.cr @@ -20,7 +20,7 @@ module Invidious::Frontend::WatchPage def download_widget(locale : String, video : Video, video_assets : VideoAssets) : String if CONFIG.disabled?("downloads") - return "<p id=\"download\">#{translate(locale, "Download is disabled.")}</p>" + return "<p id=\"download\">#{translate(locale, "Download is disabled")}</p>" end return String.build(4000) do |str| diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr new file mode 100644 index 00000000..2017955d --- /dev/null +++ b/src/invidious/videos/description.cr @@ -0,0 +1,105 @@ +require "json" +require "uri" + +def parse_command(command : JSON::Any?, string : String) : String? + on_tap = command.dig?("onTap", "innertubeCommand") + + # 3rd party URL, extract original URL from YouTube tracking URL + if url_endpoint = on_tap.try &.["urlEndpoint"]? + youtube_url = URI.parse url_endpoint["url"].as_s + + original_url = youtube_url.query_params["q"]? + if original_url.nil? + return "" + else + return "<a href=\"#{original_url}\">#{original_url}</a>" + end + # 1st party watch URL + elsif watch_endpoint = on_tap.try &.["watchEndpoint"]? + video_id = watch_endpoint["videoId"].as_s + time = watch_endpoint["startTimeSeconds"].as_i + + url = "/watch?v=#{video_id}&t=#{time}s" + + # if string is a timestamp, use the string instead + # this is a lazy regex for validating timestamps + if /(?:\d{1,2}:){1,2}\d{2}/ =~ string + return "<a href=\"#{url}\">#{string}</a>" + else + return "<a href=\"#{url}\">#{url}</a>" + end + # hashtag/other browse URLs + elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata") + url = browse_endpoint["url"].try &.as_s + + # remove unnecessary character in a channel name + if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL" + name = string.match(/@[\w\d.-]+/) + if name.try &.[0]? + return "<a href=\"#{url}\">#{name.try &.[0]}</a>" + end + end + + return "<a href=\"#{url}\">#{string}</a>" + end + + return "(unknown YouTube desc command)" +end + +private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int + copied = 0 + while copied < count + cp = iter.next + break if cp.is_a?(Iterator::Stop) + + str << cp.chr + + # A codepoint from the SMP counts twice + copied += 1 if cp > 0xFFFF + copied += 1 + end + + return copied +end + +def parse_description(desc : JSON::Any?) : String? + return "" if desc.nil? + + content = desc["content"].as_s + return "" if content.empty? + + commands = desc["commandRuns"]?.try &.as_a + return content if commands.nil? + + # Not everything is stored in UTF-8 on youtube's side. The SMP codepoints + # (0x10000 and above) are encoded as UTF-16 surrogate pairs, which are + # automatically decoded by the JSON parser. It means that we need to count + # copied byte in a special manner, preventing the use of regular string copy. + iter = content.each_codepoint + + index = 0 + + return String.build do |str| + commands.each do |command| + cmd_start = command["startIndex"].as_i + cmd_length = command["length"].as_i + + # Copy the text chunk between this command and the previous if needed. + length = cmd_start - index + index += copy_string(str, iter, length) + + # We need to copy the command's text using the iterator + # and the special function defined above. + cmd_content = String.build(cmd_length) do |str2| + copy_string(str2, iter, cmd_length) + end + + str << parse_command(command, cmd_content) + index += cmd_length + end + + # Copy the end of the string (past the last command). + remaining_length = content.size - index + copy_string(str, iter, remaining_length) if remaining_length > 0 + end +end diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr index 608ae99d..1c6d118d 100644 --- a/src/invidious/videos/parser.cr +++ b/src/invidious/videos/parser.cr @@ -284,8 +284,10 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any description = microformat.dig?("description", "simpleText").try &.as_s || "" short_description = player_response.dig?("videoDetails", "shortDescription") - description_html = video_secondary_renderer.try &.dig?("description", "runs") - .try &.as_a.try { |t| content_to_comment_html(t, video_id) } + # description_html = video_secondary_renderer.try &.dig?("description", "runs") + # .try &.as_a.try { |t| content_to_comment_html(t, video_id) } + + description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription")) # Video metadata @@ -330,7 +332,10 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any # Used when the video has multiple songs if song_title = music_desc.dig?("carouselLockupRenderer", "videoLockup", "compactVideoRenderer", "title") # "simpleText" for plain text / "runs" when song has a link - song = song_title["simpleText"]? || song_title.dig("runs", 0, "text") + song = song_title["simpleText"]? || song_title.dig?("runs", 0, "text") + + # some videos can have empty tracks. See: https://www.youtube.com/watch?v=eBGIQ7ZuuiU + next if !song end music_desc.dig?("carouselLockupRenderer", "infoRows").try &.as_a.each do |desc| diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr index b14ad7b9..9c041361 100644 --- a/src/invidious/yt_backend/extractors.cr +++ b/src/invidious/yt_backend/extractors.cr @@ -18,6 +18,7 @@ private ITEM_PARSERS = { Parsers::CategoryRendererParser, Parsers::RichItemRendererParser, Parsers::ReelItemRendererParser, + Parsers::ItemSectionRendererParser, Parsers::ContinuationItemRendererParser, } @@ -377,6 +378,30 @@ private module Parsers end end + # Parses an InnerTube itemSectionRenderer into a SearchVideo. + # Returns nil when the given object isn't a ItemSectionRenderer + # + # A itemSectionRenderer seems to be a simple wrapper for a videoRenderer, used + # by the result page for channel searches. It is located inside a continuationItems + # container.It is very similar to RichItemRendererParser + # + module ItemSectionRendererParser + def self.process(item : JSON::Any, author_fallback : AuthorFallback) + if item_contents = item.dig?("itemSectionRenderer", "contents", 0) + return self.parse(item_contents, author_fallback) + end + end + + private def self.parse(item_contents, author_fallback) + child = VideoRendererParser.process(item_contents, author_fallback) + return child + end + + def self.parser_name + return {{@type.name}} + end + end + # Parses an InnerTube richItemRenderer into a SearchVideo. # Returns nil when the given object isn't a RichItemRenderer # @@ -423,44 +448,43 @@ private module Parsers "overlay", "reelPlayerOverlayRenderer" ) - # Sometimes, the "reelPlayerOverlayRenderer" object is missing the - # important part of the response. We use this exception to tell - # the calling function to fetch the content again. - if !reel_player_overlay.as_h.has_key?("reelPlayerHeaderSupportedRenderers") - raise RetryOnceException.new - end - - video_details_container = reel_player_overlay.dig( - "reelPlayerHeaderSupportedRenderers", - "reelPlayerHeaderRenderer" - ) - - # Author infos + if video_details_container = reel_player_overlay.dig?( + "reelPlayerHeaderSupportedRenderers", + "reelPlayerHeaderRenderer" + ) + # Author infos - author = video_details_container - .dig?("channelTitleText", "runs", 0, "text") - .try &.as_s || author_fallback.name + author = video_details_container + .dig?("channelTitleText", "runs", 0, "text") + .try &.as_s || author_fallback.name - ucid = video_details_container - .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId") - .try &.as_s || author_fallback.id + ucid = video_details_container + .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId") + .try &.as_s || author_fallback.id - # Title & publication date + # Title & publication date - title = video_details_container.dig?("reelTitleText") - .try { |t| extract_text(t) } || "" + title = video_details_container.dig?("reelTitleText") + .try { |t| extract_text(t) } || "" - published = video_details_container - .dig?("timestampText", "simpleText") - .try { |t| decode_date(t.as_s) } || Time.utc + published = video_details_container + .dig?("timestampText", "simpleText") + .try { |t| decode_date(t.as_s) } || Time.utc + # View count + view_count_text = video_details_container.dig?("viewCountText", "simpleText") + else + author = author_fallback.name + ucid = author_fallback.id + published = Time.utc + title = item_contents.dig?("headline", "simpleText").try &.as_s || "" + end # View count # View count used to be in the reelWatchEndpoint, but that changed? - view_count_text = item_contents.dig?("viewCountText", "simpleText") - view_count_text ||= video_details_container.dig?("viewCountText", "simpleText") + view_count_text ||= item_contents.dig?("viewCountText", "simpleText") - view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + view_count = short_text_to_number(view_count_text.try &.as_s || "0") # Duration @@ -773,6 +797,7 @@ end def extract_items(initial_data : InitialData, &block) if unpackaged_data = initial_data["contents"]?.try &.as_h elsif unpackaged_data = initial_data["response"]?.try &.as_h + elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 1).try &.as_h elsif unpackaged_data = initial_data.dig?("onResponseReceivedActions", 0).try &.as_h else unpackaged_data = initial_data |
