extractors: Add support for reelItemRenderer

author: Samantaz Fox <coding@samantaz.fr> 2022-10-31 21:30:10 +0100
committer: Samantaz Fox <coding@samantaz.fr> 2022-11-01 17:48:43 +0100
commit: 46a63e6150f83bca90563068ebb12ecdf5e0d3c6 (patch)
tree: 8d0ff934a484cfb2dedd8a44ac942e548d7c0704 /src
parent: f267394bbe2bd972e0157913ae253bfaa79ead0f (diff)
download: invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.tar.gz
invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.tar.bz2
invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.zip
1 files changed, 86 insertions, 1 deletions
diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr
index 18b48152..8112930d 100644
--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@@ -17,6 +17,7 @@ private ITEM_PARSERS = {
   Parsers::PlaylistRendererParser,
   Parsers::CategoryRendererParser,
   Parsers::RichItemRendererParser,
+  Parsers::ReelItemRendererParser,
 }
 
 record AuthorFallback, name : String, id : String
@@ -369,7 +370,7 @@ private module Parsers
   end
 
   # Parses an InnerTube richItemRenderer into a SearchVideo.
-  # Returns nil when the given object isn't a shelfRenderer
+  # Returns nil when the given object isn't a RichItemRenderer
   #
   # A richItemRenderer seems to be a simple wrapper for a videoRenderer, used
   # by the result page for hashtags. It is located inside a continuationItems
@@ -390,6 +391,90 @@ private module Parsers
       return {{@type.name}}
     end
   end
+
+  # Parses an InnerTube reelItemRenderer into a SearchVideo.
+  # Returns nil when the given object isn't a reelItemRenderer
+  #
+  # reelItemRenderer items are used in the new (2022) channel layout,
+  # in the "shorts" tab.
+  #
+  module ReelItemRendererParser
+    def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+      if item_contents = item["reelItemRenderer"]?
+        return self.parse(item_contents, author_fallback)
+      end
+    end
+
+    private def self.parse(item_contents, author_fallback)
+      video_id = item_contents["videoId"].as_s
+
+      video_details_container = item_contents.dig(
+        "navigationEndpoint", "reelWatchEndpoint",
+        "overlay", "reelPlayerOverlayRenderer",
+        "reelPlayerHeaderSupportedRenderers",
+        "reelPlayerHeaderRenderer"
+      )
+
+      # Author infos
+
+      author = video_details_container
+        .dig?("channelTitleText", "runs", 0, "text")
+        .try &.as_s || author_fallback.name
+
+      ucid = video_details_container
+        .dig?("channelNavigationEndpoint", "browseEndpoint", "browseId")
+        .try &.as_s || author_fallback.id
+
+      # Title & publication date
+
+      title = video_details_container.dig?("reelTitleText")
+        .try { |t| extract_text(t) } || ""
+
+      published = video_details_container
+        .dig?("timestampText", "simpleText")
+        .try { |t| decode_date(t.as_s) } || Time.utc
+
+      # View count
+
+      view_count_text = video_details_container.dig?("viewCountText", "simpleText")
+      view_count_text ||= video_details_container
+        .dig?("viewCountText", "accessibility", "accessibilityData", "label")
+
+      view_count = view_count_text.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+
+      # Duration
+
+      a11y_data = item_contents
+        .dig?("accessibility", "accessibilityData", "label")
+        .try &.as_s || ""
+
+      regex_match = /- (?<min>\d+ minutes? )?(?<sec>\d+ seconds?)+ -/.match(a11y_data)
+
+      minutes = regex_match.try &.["min"].to_i(strict: false) || 0
+      seconds = regex_match.try &.["sec"].to_i(strict: false) || 0
+
+      duration = (minutes*60 + seconds)
+
+      SearchVideo.new({
+        title:              title,
+        id:                 video_id,
+        author:             author,
+        ucid:               ucid,
+        published:          published,
+        views:              view_count,
+        description_html:   "",
+        length_seconds:     duration,
+        live_now:           false,
+        premium:            false,
+        premiere_timestamp: Time.unix(0),
+        author_verified:    false,
+      })
+    end
+
+    def self.parser_name
+      return {{@type.name}}
+    end
+  end
 end
 
 # The following are the extractors for extracting an array of items from
author	Samantaz Fox <coding@samantaz.fr>	2022-10-31 21:30:10 +0100
committer	Samantaz Fox <coding@samantaz.fr>	2022-11-01 17:48:43 +0100
commit	46a63e6150f83bca90563068ebb12ecdf5e0d3c6 (patch)
tree	8d0ff934a484cfb2dedd8a44ac942e548d7c0704 /src
parent	f267394bbe2bd972e0157913ae253bfaa79ead0f (diff)
download	invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.tar.gz invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.tar.bz2 invidious-46a63e6150f83bca90563068ebb12ecdf5e0d3c6.zip