Merge pull request #2856 from SamantazFox/fix-related-videos

Fix related videos
author: Samantaz Fox <coding@samantaz.fr> 2022-02-03 19:23:32 +0100
committer: GitHub <noreply@github.com> 2022-02-03 19:23:32 +0100
commit: bd221b7b2c861c50339db69d0654c9fc30080031 (patch)
tree: 4221b59e6126e4c30f24a8a6394e9461e17bf7de /src
parent: 0ca333715b06689f3120c136dfeab493f30bf867 (diff)
parent: ba37259258277aafb6fc700dabecead695bd624e (diff)
download: invidious-bd221b7b2c861c50339db69d0654c9fc30080031.tar.gz
invidious-bd221b7b2c861c50339db69d0654c9fc30080031.tar.bz2
invidious-bd221b7b2c861c50339db69d0654c9fc30080031.zip
4 files changed, 124 insertions, 44 deletions
diff --git a/src/invidious/exceptions.cr b/src/invidious/exceptions.cr
new file mode 100644
index 00000000..391a574d
--- /dev/null
+++ b/src/invidious/exceptions.cr
@@ -0,0 +1,8 @@
+# Exception used to hold the name of the missing item
+# Should be used in all parsing functions
+class BrokenTubeException < InfoException
+  getter element : String
+
+  def initialize(@element)
+  end
+end
diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr
index d77d56d2..446e8e03 100644
--- a/src/invidious/videos.cr
+++ b/src/invidious/videos.cr
@@ -446,7 +446,7 @@ struct Video
                 end
 
                 json.field "author", rv["author"]
-                json.field "authorUrl", rv["author_url"]?
+                json.field "authorUrl", "/channel/#{rv["ucid"]?}"
                 json.field "authorId", rv["ucid"]?
                 if rv["author_thumbnail"]?
                   json.field "authorThumbnails" do
@@ -455,7 +455,7 @@ struct Video
 
                       qualities.each do |quality|
                         json.object do
-                          json.field "url", rv["author_thumbnail"]?.try &.gsub(/s\d+-/, "s#{quality}-")
+                          json.field "url", rv["author_thumbnail"].gsub(/s\d+-/, "s#{quality}-")
                           json.field "width", quality
                           json.field "height", quality
                         end
@@ -465,7 +465,7 @@ struct Video
                 end
 
                 json.field "lengthSeconds", rv["length_seconds"]?.try &.to_i
-                json.field "viewCountText", rv["short_view_count_text"]?
+                json.field "viewCountText", rv["short_view_count"]?
                 json.field "viewCount", rv["view_count"]?.try &.empty? ? nil : rv["view_count"].to_i64
               end
             end
@@ -802,23 +802,50 @@ class VideoRedirect < Exception
   end
 end
 
-def parse_related(r : JSON::Any) : JSON::Any?
-  # TODO: r["endScreenPlaylistRenderer"], etc.
-  return if !r["endScreenVideoRenderer"]?
-  r = r["endScreenVideoRenderer"].as_h
-
-  return if !r["lengthInSeconds"]?
-
-  rv = {} of String => JSON::Any
-  rv["author"] = r["shortBylineText"]["runs"][0]?.try &.["text"] || JSON::Any.new("")
-  rv["ucid"] = r["shortBylineText"]["runs"][0]?.try &.["navigationEndpoint"]["browseEndpoint"]["browseId"] || JSON::Any.new("")
-  rv["author_url"] = JSON::Any.new("/channel/#{rv["ucid"]}")
-  rv["length_seconds"] = JSON::Any.new(r["lengthInSeconds"].as_i.to_s)
-  rv["title"] = r["title"]["simpleText"]
-  rv["short_view_count_text"] = JSON::Any.new(r["shortViewCountText"]?.try &.["simpleText"]?.try &.as_s || "")
-  rv["view_count"] = JSON::Any.new(r["title"]["accessibility"]?.try &.["accessibilityData"]["label"].as_s.match(/(?<views>[1-9](\d+,?)*) views/).try &.["views"].gsub(/\D/, "") || "")
-  rv["id"] = r["videoId"]
-  JSON::Any.new(rv)
+# Use to parse both "compactVideoRenderer" and "endScreenVideoRenderer".
+# The former is preferred as it has more videos in it. The second has
+# the same 11 first entries as the compact rendered.
+#
+# TODO: "compactRadioRenderer" (Mix) and
+def parse_related_video(related : JSON::Any) : Hash(String, JSON::Any)?
+  return nil if !related["videoId"]?
+
+  # The compact renderer has video length in seconds, where the end
+  # screen rendered has a full text version ("42:40")
+  length = related["lengthInSeconds"]?.try &.as_i.to_s
+  length ||= related.dig?("lengthText", "simpleText").try do |box|
+    decode_length_seconds(box.as_s).to_s
+  end
+
+  # Both have "short", so the "long" option shouldn't be required
+  channel_info = (related["shortBylineText"]? || related["longBylineText"]?)
+    .try &.dig?("runs", 0)
+
+  author = channel_info.try &.dig?("text")
+  ucid = channel_info.try { |ci| HelperExtractors.get_browse_id(ci) }
+
+  # "4,088,033 views", only available on compact renderer
+  # and when video is not a livestream
+  view_count = related.dig?("viewCountText", "simpleText")
+    .try &.as_s.gsub(/\D/, "")
+
+  short_view_count = related.try do |r|
+    HelperExtractors.get_short_view_count(r).to_s
+  end
+
+  LOGGER.trace("parse_related_video: Found \"watchNextEndScreenRenderer\" container")
+
+  # TODO: when refactoring video types, make a struct for related videos
+  # or reuse an existing type, if that fits.
+  return {
+    "id"               => related["videoId"],
+    "title"            => related["title"]["simpleText"],
+    "author"           => author || JSON::Any.new(""),
+    "ucid"             => JSON::Any.new(ucid || ""),
+    "length_seconds"   => JSON::Any.new(length || "0"),
+    "view_count"       => JSON::Any.new(view_count || "0"),
+    "short_view_count" => JSON::Any.new(short_view_count || "0"),
+  }
 end
 
 def extract_video_info(video_id : String, proxy_region : String? = nil, context_screen : String? = nil)
@@ -871,30 +898,61 @@ def extract_video_info(video_id : String, proxy_region : String? = nil, context_
     params[f] = player_response[f] if player_response[f]?
   end
 
-  params["relatedVideos"] = (
-    player_response
-      .dig?("playerOverlays", "playerOverlayRenderer", "endScreen", "watchNextEndScreenRenderer", "results")
-      .try &.as_a.compact_map { |r| parse_related r } || \
-       player_response
-        .dig?("webWatchNextResponseExtensionData", "relatedVideoArgs")
-        .try &.as_s.split(",").map { |r|
-          r = HTTP::Params.parse(r).to_h
-          JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) }))
-        }
-  ).try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any)
-
   # Top level elements
 
-  primary_results = player_response
-    .dig?("contents", "twoColumnWatchNextResults", "results", "results", "contents")
+  main_results = player_response.dig?("contents", "twoColumnWatchNextResults")
+
+  raise BrokenTubeException.new("twoColumnWatchNextResults") if !main_results
+
+  primary_results = main_results.dig?("results", "results", "contents")
+  secondary_results = main_results
+    .dig?("secondaryResults", "secondaryResults", "results")
+
+  raise BrokenTubeException.new("results") if !primary_results
+  raise BrokenTubeException.new("secondaryResults") if !secondary_results
 
   video_primary_renderer = primary_results
-    .try &.as_a.find(&.["videoPrimaryInfoRenderer"]?)
-      .try &.["videoPrimaryInfoRenderer"]
+    .as_a.find(&.["videoPrimaryInfoRenderer"]?)
+    .try &.["videoPrimaryInfoRenderer"]
 
   video_secondary_renderer = primary_results
-    .try &.as_a.find(&.["videoSecondaryInfoRenderer"]?)
-      .try &.["videoSecondaryInfoRenderer"]
+    .as_a.find(&.["videoSecondaryInfoRenderer"]?)
+    .try &.["videoSecondaryInfoRenderer"]
+
+  raise BrokenTubeException.new("videoPrimaryInfoRenderer") if !video_primary_renderer
+  raise BrokenTubeException.new("videoSecondaryInfoRenderer") if !video_secondary_renderer
+
+  # Related videos
+
+  LOGGER.debug("extract_video_info: parsing related videos...")
+
+  related = [] of JSON::Any
+
+  # Parse "compactVideoRenderer" items (under secondary results)
+  secondary_results.as_a.each do |element|
+    if item = element["compactVideoRenderer"]?
+      related_video = parse_related_video(item)
+      related << JSON::Any.new(related_video) if related_video
+    end
+  end
+
+  # If nothing was found previously, fall back to end screen renderer
+  if related.empty?
+    # Container for "endScreenVideoRenderer" items
+    player_overlays = player_response.dig?(
+      "playerOverlays", "playerOverlayRenderer",
+      "endScreen", "watchNextEndScreenRenderer", "results"
+    )
+
+    player_overlays.try &.as_a.each do |element|
+      if item = element["endScreenVideoRenderer"]?
+        related_video = parse_related_video(item)
+        related << JSON::Any.new(related_video) if related_video
+      end
+    end
+  end
+
+  params["relatedVideos"] = JSON::Any.new(related)
 
   # Likes/dislikes
 
diff --git a/src/invidious/views/watch.ecr b/src/invidious/views/watch.ecr
index 00f5f8b7..2e0aee99 100644
--- a/src/invidious/views/watch.ecr
+++ b/src/invidious/views/watch.ecr
@@ -321,11 +321,11 @@ we're going to need to do it here in order to allow for translations.
                                     </div>
 
                                     <div class="pure-u-10-24" style="text-align:right">
-                                        <% if views = rv["short_view_count_text"]?.try &.delete(", views watching") %>
-                                            <% if !views.empty? %>
-                                                <b class="width:100%"><%= translate_count(locale, "generic_views_count", views.to_i? || 0) %></b>
-                                            <% end %>
-                                        <% end %>
+                                        <b class="width:100%"><%=
+                                            views = rv["view_count"]?.try &.to_i?
+                                            views ||= rv["view_count_short"]?.try { |x| short_text_to_number(x) }
+                                            translate_count(locale, "generic_views_count", views || 0, NumberFormatting::Short)
+                                        %></b>
                                     </div>
                                 </h5>
                             </a>
diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr
index 66b3cdef..41d95962 100644
--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@@ -505,7 +505,7 @@ end
 #
 # Mostly used to extract out repeated structures to deal with code
 # repetition.
-private module HelperExtractors
+module HelperExtractors
   # Retrieves the amount of videos present within the given InnerTube data.
   #
   # Returns a 0 when it's unable to do so
@@ -519,6 +519,20 @@ private module HelperExtractors
     end
   end
 
+  # Retrieves the amount of views/viewers a video has.
+  # Seems to be used on related videos only
+  #
+  # Returns "0" when unable to parse
+  def self.get_short_view_count(container : JSON::Any) : String
+    box = container["shortViewCountText"]?
+    return "0" if !box
+
+    # Simpletext: "4M views"
+    # runs: {"text": "1.1K"},{"text":" watching"}
+    return box["simpleText"]?.try &.as_s.sub(" views", "") ||
+      box.dig?("runs", 0, "text").try &.as_s || "0"
+  end
+
   # Retrieve lowest quality thumbnail from InnerTube data
   #
   # TODO allow configuration of image quality (-1 is highest)
author	Samantaz Fox <coding@samantaz.fr>	2022-02-03 19:23:32 +0100
committer	GitHub <noreply@github.com>	2022-02-03 19:23:32 +0100
commit	bd221b7b2c861c50339db69d0654c9fc30080031 (patch)
tree	4221b59e6126e4c30f24a8a6394e9461e17bf7de /src
parent	0ca333715b06689f3120c136dfeab493f30bf867 (diff)
parent	ba37259258277aafb6fc700dabecead695bd624e (diff)
download	invidious-bd221b7b2c861c50339db69d0654c9fc30080031.tar.gz invidious-bd221b7b2c861c50339db69d0654c9fc30080031.tar.bz2 invidious-bd221b7b2c861c50339db69d0654c9fc30080031.zip