Update playlists to support polymer redesign

author: Omar Roth <omarroth@protonmail.com> 2020-06-24 22:18:09 -0400
committer: Omar Roth <omarroth@protonmail.com> 2020-06-24 22:19:36 -0400
commit: 8c65b8c16fe06375b7b8c29514a065626a12fe04 (patch)
tree: 94abb331305dcf06cc9fc1817fae8267dd240c91 /src
parent: 1f435522b4c469e0a2c34dfc41da27db93db2803 (diff)
download: invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.tar.gz
invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.tar.bz2
invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.zip
2 files changed, 108 insertions, 98 deletions
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index aaec19c5..ca7c9ebc 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -400,6 +400,22 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
               description_html: description_html,
               auto_generated: auto_generated,
             )
+          elsif i = item["gridPlaylistRenderer"]?
+            title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+            plid = i["playlistId"]?.try &.as_s || ""
+
+            video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+            playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+
+            items << SearchPlaylist.new(
+              title: title,
+              id: plid,
+              author: author_fallback || "",
+              ucid: author_id_fallback || "",
+              video_count: video_count,
+              videos: [] of SearchPlaylistVideo,
+              thumbnail: playlist_thumbnail
+            )
           elsif i = item["playlistRenderer"]?
             title = i["title"]["simpleText"]?.try &.as_s || ""
             plid = i["playlistId"]?.try &.as_s || ""
diff --git a/src/invidious/playlists.cr b/src/invidious/playlists.cr
index fcf73dad..a1b7715d 100644
--- a/src/invidious/playlists.cr
+++ b/src/invidious/playlists.cr
@@ -118,7 +118,7 @@ struct Playlist
         end
       end
 
-      json.field "description", html_to_content(self.description_html)
+      json.field "description", self.description
       json.field "descriptionHtml", self.description_html
       json.field "videoCount", self.video_count
 
@@ -153,7 +153,7 @@ struct Playlist
     author:           String,
     author_thumbnail: String,
     ucid:             String,
-    description_html: String,
+    description:      String,
     video_count:      Int32,
     views:            Int64,
     updated:          Time,
@@ -163,6 +163,10 @@ struct Playlist
   def privacy
     PlaylistPrivacy::Public
   end
+
+  def description_html
+    HTML.escape(self.description).gsub("\n", "<br>")
+  end
 end
 
 enum PlaylistPrivacy
@@ -298,52 +302,6 @@ def subscribe_playlist(db, user, playlist)
   return playlist
 end
 
-def extract_playlist(plid, nodeset, index)
-  videos = [] of PlaylistVideo
-
-  nodeset.each_with_index do |video, offset|
-    anchor = video.xpath_node(%q(.//td[@class="pl-video-title"]))
-    if !anchor
-      next
-    end
-
-    title = anchor.xpath_node(%q(.//a)).not_nil!.content.strip(" \n")
-    id = anchor.xpath_node(%q(.//a)).not_nil!["href"].lchop("/watch?v=")[0, 11]
-
-    anchor = anchor.xpath_node(%q(.//div[@class="pl-video-owner"]/a))
-    if anchor
-      author = anchor.content
-      ucid = anchor["href"].split("/")[2]
-    else
-      author = ""
-      ucid = ""
-    end
-
-    anchor = video.xpath_node(%q(.//td[@class="pl-video-time"]/div/div[1]))
-    if anchor && !anchor.content.empty?
-      length_seconds = decode_length_seconds(anchor.content)
-      live_now = false
-    else
-      length_seconds = 0
-      live_now = true
-    end
-
-    videos << PlaylistVideo.new(
-      title: title,
-      id: id,
-      author: author,
-      ucid: ucid,
-      length_seconds: length_seconds,
-      published: Time.utc,
-      plid: plid,
-      index: (index + offset).to_i64,
-      live_now: live_now
-    )
-  end
-
-  return videos
-end
-
 def produce_playlist_url(id, index)
   if id.starts_with? "UC"
     id = "UU" + id.lchop("UC")
@@ -389,58 +347,64 @@ def fetch_playlist(plid, locale)
     plid = "UU#{plid.lchop("UC")}"
   end
 
-  response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en&disable_polymer=1")
+  response = YT_POOL.client &.get("/playlist?list=#{plid}&hl=en")
   if response.status_code != 200
-    raise translate(locale, "Not a playlist.")
+    if response.headers["location"]?.try &.includes? "/sorry/index"
+      raise "Could not extract playlist info. Instance is likely blocked."
+    else
+      raise translate(locale, "Not a playlist.")
+    end
   end
 
-  body = response.body.gsub(/<button[^>]+><span[^>]+>\s*less\s*<img[^>]+>\n<\/span><\/button>/, "")
-  document = XML.parse_html(body)
+  initial_data = extract_initial_data(response.body)
+  playlist_info = initial_data["sidebar"]?.try &.["playlistSidebarRenderer"]?.try &.["items"]?.try &.[0]["playlistSidebarPrimaryInfoRenderer"]?
 
-  title = document.xpath_node(%q(//h1[@class="pl-header-title"]))
-  if !title
-    raise translate(locale, "Playlist does not exist.")
-  end
-  title = title.content.strip(" \n")
+  raise "Could not extract playlist info" if !playlist_info
+  title = playlist_info["title"]?.try &.["runs"][0]?.try &.["text"]?.try &.as_s || ""
+
+  desc_item = playlist_info["description"]?
+  description = desc_item.try &.["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || desc_item.try &.["simpleText"]?.try &.as_s || ""
 
-  description_html = document.xpath_node(%q(//span[@class="pl-header-description-text"]/div/div[1])).try &.to_s ||
-                     document.xpath_node(%q(//span[@class="pl-header-description-text"])).try &.to_s || ""
+  thumbnail = playlist_info["thumbnailRenderer"]?.try &.["playlistVideoThumbnailRenderer"]?
+    .try &.["thumbnail"]["thumbnails"][0]["url"]?.try &.as_s
 
-  playlist_thumbnail = document.xpath_node(%q(//div[@class="pl-header-thumb"]/img)).try &.["data-thumb"]? ||
-                       document.xpath_node(%q(//div[@class="pl-header-thumb"]/img)).try &.["src"]
+  views = 0_i64
+  updated = Time.utc
+  video_count = 0
+  playlist_info["stats"]?.try &.as_a.each do |stat|
+    text = stat["runs"]?.try &.as_a.map(&.["text"].as_s).join("") || stat["simpleText"]?.try &.as_s
+    next if !text
 
-  # YouTube allows anonymous playlists, so most of this can be empty or optional
-  anchor = document.xpath_node(%q(//ul[@class="pl-header-details"]))
-  author = anchor.try &.xpath_node(%q(.//li[1]/a)).try &.content
-  author ||= ""
-  author_thumbnail = document.xpath_node(%q(//img[@class="channel-header-profile-image"])).try &.["src"]
-  author_thumbnail ||= ""
-  ucid = anchor.try &.xpath_node(%q(.//li[1]/a)).try &.["href"].split("/")[-1]
-  ucid ||= ""
+    if text.includes? "videos"
+      video_count = text.gsub(/\D/, "").to_i? || 0
+    elsif text.includes? "views"
+      views = text.gsub(/\D/, "").to_i64? || 0_i64
+    else
+      updated = decode_date(text.lchop("Last updated on ").lchop("Updated "))
+    end
+  end
 
-  video_count = anchor.try &.xpath_node(%q(.//li[2])).try &.content.gsub(/\D/, "").to_i?
-  video_count ||= 0
+  author_info = initial_data["sidebar"]?.try &.["playlistSidebarRenderer"]?.try &.["items"]?.try &.[1]["playlistSidebarSecondaryInfoRenderer"]?
+    .try &.["videoOwner"]["videoOwnerRenderer"]?
 
-  views = anchor.try &.xpath_node(%q(.//li[3])).try &.content.gsub(/\D/, "").to_i64?
-  views ||= 0_i64
+  raise "Could not extract author info" if !author_info
 
-  updated = anchor.try &.xpath_node(%q(.//li[4])).try &.content.lchop("Last updated on ").lchop("Updated ").try { |date| decode_date(date) }
-  updated ||= Time.utc
+  author_thumbnail = author_info["thumbnail"]["thumbnails"][0]["url"]?.try &.as_s || ""
+  author = author_info["title"]["runs"][0]["text"]?.try &.as_s || ""
+  ucid = author_info["title"]["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"]?.try &.as_s || ""
 
-  playlist = Playlist.new(
+  return Playlist.new(
     title: title,
     id: plid,
     author: author,
     author_thumbnail: author_thumbnail,
     ucid: ucid,
-    description_html: description_html,
+    description: description,
     video_count: video_count,
     views: views,
     updated: updated,
-    thumbnail: playlist_thumbnail,
+    thumbnail: thumbnail
   )
-
-  return playlist
 end
 
 def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil)
@@ -458,35 +422,26 @@ end
 
 def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuation = nil)
   if continuation
-    html = YT_POOL.client &.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")
-    html = XML.parse_html(html.body)
-
-    index = html.xpath_node(%q(//span[@id="playlist-current-index"])).try &.content.to_i?.try &.- 1
-    offset = index || offset
+    response = YT_POOL.client &.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en")
+    initial_data = extract_initial_data(response.body)
+    offset = initial_data["currentVideoEndpoint"]?.try &.["watchEndpoint"]?.try &.["index"]?.try &.as_i64 || offset
   end
 
   if video_count > 100
     url = produce_playlist_url(plid, offset)
 
     response = YT_POOL.client &.get(url)
-    response = JSON.parse(response.body)
-    if !response["content_html"]? || response["content_html"].as_s.empty?
-      raise translate(locale, "Empty playlist")
-    end
-
-    document = XML.parse_html(response["content_html"].as_s)
-    nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))
-    videos = extract_playlist(plid, nodeset, offset)
+    initial_data = JSON.parse(response.body).as_a.find(&.as_h.["response"]?).try &.as_h
   elsif offset > 100
     return [] of PlaylistVideo
   else # Extract first page of videos
-    response = YT_POOL.client &.get("/playlist?list=#{plid}&gl=US&hl=en&disable_polymer=1")
-    document = XML.parse_html(response.body)
-    nodeset = document.xpath_nodes(%q(.//tr[contains(@class, "pl-video")]))
-
-    videos = extract_playlist(plid, nodeset, 0)
+    response = YT_POOL.client &.get("/playlist?list=#{plid}&gl=US&hl=en")
+    initial_data = extract_initial_data(response.body)
   end
 
+  return [] of PlaylistVideo if !initial_data
+  videos = extract_playlist_videos(initial_data)
+
   until videos.empty? || videos[0].index == offset
     videos.shift
   end
@@ -494,6 +449,45 @@ def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuat
   return videos
 end
 
+def extract_playlist_videos(initial_data : Hash(String, JSON::Any))
+  videos = [] of PlaylistVideo
+
+  (initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select(&.["tabRenderer"]["selected"]?.try &.as_bool)[0]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["playlistVideoListRenderer"]["contents"].as_a ||
+    initial_data["response"]?.try &.["continuationContents"]["playlistVideoListContinuation"]["contents"].as_a).try &.each do |item|
+    if i = item["playlistVideoRenderer"]?
+      video_id = i["navigationEndpoint"]["watchEndpoint"]["videoId"].as_s
+      plid = i["navigationEndpoint"]["watchEndpoint"]["playlistId"].as_s
+      index = i["navigationEndpoint"]["watchEndpoint"]["index"].as_i64
+
+      thumbnail = i["thumbnail"]["thumbnails"][0]["url"].as_s
+      title = i["title"].try { |t| t["simpleText"]? || t["runs"]?.try &.[0]["text"]? }.try &.as_s || ""
+      author = i["shortBylineText"]?.try &.["runs"][0]["text"].as_s || ""
+      ucid = i["shortBylineText"]?.try &.["runs"][0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s || ""
+      length_seconds = i["lengthSeconds"]?.try &.as_s.to_i
+      live = false
+
+      if !length_seconds
+        live = true
+        length_seconds = 0
+      end
+
+      videos << PlaylistVideo.new(
+        title: title,
+        id: video_id,
+        author: author,
+        ucid: ucid,
+        length_seconds: length_seconds,
+        published: Time.utc,
+        plid: plid,
+        live_now: live,
+        index: index - 1
+      )
+    end
+  end
+
+  return videos
+end
+
 def template_playlist(playlist)
   html = <<-END_HTML
   <h3>
author	Omar Roth <omarroth@protonmail.com>	2020-06-24 22:18:09 -0400
committer	Omar Roth <omarroth@protonmail.com>	2020-06-24 22:19:36 -0400
commit	8c65b8c16fe06375b7b8c29514a065626a12fe04 (patch)
tree	94abb331305dcf06cc9fc1817fae8267dd240c91 /src
parent	1f435522b4c469e0a2c34dfc41da27db93db2803 (diff)
download	invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.tar.gz invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.tar.bz2 invidious-8c65b8c16fe06375b7b8c29514a065626a12fe04.zip