From 4a6e920d0e4e2dd4b67006e203e5776893c3639b Mon Sep 17 00:00:00 2001 From: Ben Heller Date: Wed, 2 Sep 2020 13:28:57 -0700 Subject: Use new youtube API to fetch channel videos (#1355) * Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line --- src/invidious/channels.cr | 64 ++++++--- src/invidious/helpers/helpers.cr | 298 +++++++++++++++++++++------------------ 2 files changed, 205 insertions(+), 157 deletions(-) (limited to 'src') diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index da062755..007aa06c 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) page = 1 - url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page, auto_generated: auto_generated) videos = [] of SearchVideo begin @@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) ids = [] of String loop do - url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page, auto_generated: auto_generated) initial_data = JSON.parse(response.body).as_a.find &.["response"]? raise "Could not extract JSON" if !initial_data videos = extract_videos(initial_data.as_h, author, ucid) @@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) return items, continuation end -def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest") +def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false) object = { "80226972:embedded" => { "2:string" => ucid, @@ -411,18 +409,33 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = " }, } - if auto_generated - seed = Time.unix(1525757349) - until seed >= Time.utc - seed += 1.month - end - timestamp = seed - (page - 1).months + if !v2 + if auto_generated + seed = Time.unix(1525757349) + until seed >= Time.utc + seed += 1.month + end + timestamp = seed - (page - 1).months - object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" + object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64 + object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}" + else + object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 + object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" + end else object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64 - object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}" + + object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ + "1:embedded" => { + "1:varint" => 6307666885028338688_i64, + "2:embedded" => { + "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({ + "1:varint" => 30_i64 * (page - 1), + }))), + }, + }, + }))) end case sort_by @@ -901,12 +914,28 @@ def get_about_info(ucid, locale) }) end +def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest") + url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false) + response = YT_POOL.client &.get(url) + initial_data = JSON.parse(response.body).as_a.find &.["response"]? + return response if !initial_data + needs_v2 = initial_data + .try &.["response"]?.try &.["alerts"]? + .try &.as_a.any? { |alert| + alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" } + } + if needs_v2 + url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true) + response = YT_POOL.client &.get(url) + end + response +end + def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") videos = [] of SearchVideo 2.times do |i| - url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) initial_data = JSON.parse(response.body).as_a.find &.["response"]? break if !initial_data videos.concat extract_videos(initial_data.as_h, author, ucid) @@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") end def get_latest_videos(ucid) - url = produce_channel_videos_url(ucid, 0) - response = YT_POOL.client &.get(url) + response = get_channel_videos_response(ucid, 1) initial_data = JSON.parse(response.body).as_a.find &.["response"]? return [] of SearchVideo if !initial_data author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index 56f856c0..6571f818 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -164,148 +164,168 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) end +def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil) + if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?) + video_id = i["videoId"].as_s + title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" + + author_info = i["ownerText"]?.try &.["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || author_fallback || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" + + published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local + view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || + i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]? + .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 + + live_now = false + paid = false + premium = false + + premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } + + i["badges"]?.try &.as_a.each do |badge| + b = badge["metadataBadgeRenderer"] + case b["label"].as_s + when "LIVE NOW" + live_now = true + when "New", "4K", "CC" + # TODO + when "Premium" + paid = true + + # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] + premium = true + else nil # Ignore + end + end + + SearchVideo.new({ + title: title, + id: video_id, + author: author, + ucid: author_id, + published: published, + views: view_count, + description_html: description_html, + length_seconds: length_seconds, + live_now: live_now, + paid: paid, + premium: premium, + premiere_timestamp: premiere_timestamp, + }) + elsif i = item["channelRenderer"]? + author = i["title"]["simpleText"]?.try &.as_s || author_fallback || "" + author_id = i["channelId"]?.try &.as_s || author_id_fallback || "" + + author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || "" + subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 + + auto_generated = false + auto_generated = true if !i["videoCountText"]? + video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + + SearchChannel.new({ + author: author, + ucid: author_id, + author_thumbnail: author_thumbnail, + subscriber_count: subscriber_count, + video_count: video_count, + description_html: description_html, + auto_generated: auto_generated, + }) + elsif i = item["gridPlaylistRenderer"]? + title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || "" + plid = i["playlistId"]?.try &.as_s || "" + + video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 + playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" + + SearchPlaylist.new({ + title: title, + id: plid, + author: author_fallback || "", + ucid: author_id_fallback || "", + video_count: video_count, + videos: [] of SearchPlaylistVideo, + thumbnail: playlist_thumbnail, + }) + elsif i = item["playlistRenderer"]? + title = i["title"]["simpleText"]?.try &.as_s || "" + plid = i["playlistId"]?.try &.as_s || "" + + video_count = i["videoCount"]?.try &.as_s.to_i || 0 + playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" + + author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || author_fallback || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" + + videos = i["videos"]?.try &.as_a.map do |v| + v = v["childVideoRenderer"] + v_title = v["title"]["simpleText"]?.try &.as_s || "" + v_id = v["videoId"]?.try &.as_s || "" + v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 + SearchPlaylistVideo.new({ + title: v_title, + id: v_id, + length_seconds: v_length_seconds, + }) + end || [] of SearchPlaylistVideo + + # TODO: i["publishedTimeText"]? + + SearchPlaylist.new({ + title: title, + id: plid, + author: author, + ucid: author_id, + video_count: video_count, + videos: videos, + thumbnail: playlist_thumbnail, + }) + elsif i = item["radioRenderer"]? # Mix + # TODO + elsif i = item["showRenderer"]? # Show + # TODO + elsif i = item["shelfRenderer"]? + elsif i = item["horizontalCardListRenderer"]? + elsif i = item["searchPyvRenderer"]? # Ad + end +end + def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) items = [] of SearchItem - initial_data.try { |t| t["contents"]? || t["response"]? } - .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || - t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || - t["continuationContents"]? } - .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } - .try &.["contents"].as_a - .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a - .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || - t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t } - .each { |item| - if i = item["videoRenderer"]? - video_id = i["videoId"].as_s - title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" - - author_info = i["ownerText"]?.try &.["runs"].as_a[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local - view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 - - live_now = false - paid = false - premium = false - - premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } - - i["badges"]?.try &.as_a.each do |badge| - b = badge["metadataBadgeRenderer"] - case b["label"].as_s - when "LIVE NOW" - live_now = true - when "New", "4K", "CC" - # TODO - when "Premium" - paid = true - - # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] - premium = true - else nil # Ignore - end - end - - items << SearchVideo.new({ - title: title, - id: video_id, - author: author, - ucid: author_id, - published: published, - views: view_count, - description_html: description_html, - length_seconds: length_seconds, - live_now: live_now, - paid: paid, - premium: premium, - premiere_timestamp: premiere_timestamp, - }) - elsif i = item["channelRenderer"]? - author = i["title"]["simpleText"]?.try &.as_s || author_fallback || "" - author_id = i["channelId"]?.try &.as_s || author_id_fallback || "" - - author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || "" - subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 - - auto_generated = false - auto_generated = true if !i["videoCountText"]? - video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" - - items << SearchChannel.new({ - author: author, - ucid: author_id, - author_thumbnail: author_thumbnail, - subscriber_count: subscriber_count, - video_count: video_count, - description_html: description_html, - auto_generated: auto_generated, - }) - elsif i = item["gridPlaylistRenderer"]? - title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 - playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || "" - - items << SearchPlaylist.new({ - title: title, - id: plid, - author: author_fallback || "", - ucid: author_id_fallback || "", - video_count: video_count, - videos: [] of SearchPlaylistVideo, - thumbnail: playlist_thumbnail, - }) - elsif i = item["playlistRenderer"]? - title = i["title"]["simpleText"]?.try &.as_s || "" - plid = i["playlistId"]?.try &.as_s || "" - - video_count = i["videoCount"]?.try &.as_s.to_i || 0 - playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" - - author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]? - author = author_info.try &.["text"].as_s || author_fallback || "" - author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || "" - - videos = i["videos"]?.try &.as_a.map do |v| - v = v["childVideoRenderer"] - v_title = v["title"]["simpleText"]?.try &.as_s || "" - v_id = v["videoId"]?.try &.as_s || "" - v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 - SearchPlaylistVideo.new({ - title: v_title, - id: v_id, - length_seconds: v_length_seconds, - }) - end || [] of SearchPlaylistVideo - - # TODO: i["publishedTimeText"]? - - items << SearchPlaylist.new({ - title: title, - id: plid, - author: author, - ucid: author_id, - video_count: video_count, - videos: videos, - thumbnail: playlist_thumbnail, - }) - elsif i = item["radioRenderer"]? # Mix - # TODO - elsif i = item["showRenderer"]? # Show - # TODO - elsif i = item["shelfRenderer"]? - elsif i = item["horizontalCardListRenderer"]? - elsif i = item["searchPyvRenderer"]? # Ad - end - } } + channel_v2_response = initial_data + .try &.["response"]? + .try &.["continuationContents"]? + .try &.["gridContinuation"]? + .try &.["items"]? + + if channel_v2_response + channel_v2_response.try &.as_a.each { |item| + extract_item(item, author_fallback, author_id_fallback) + .try { |t| items << t } + } + else + initial_data.try { |t| t["contents"]? || t["response"]? } + .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] || + t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || + t["continuationContents"]? } + .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } + .try &.["contents"].as_a + .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a + .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || + t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t } + .each { |item| + extract_item(item, author_fallback, author_id_fallback) + .try { |t| items << t } + } } + end items end -- cgit v1.2.3