summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorBen Heller <ben@bheller.me>2020-09-02 13:28:57 -0700
committerGitHub <noreply@github.com>2020-09-02 20:28:57 +0000
commit4a6e920d0e4e2dd4b67006e203e5776893c3639b (patch)
tree01cec67bf862968d56f078d901608c153bea83b0 /src
parent13f58d602f0884349bcef5d84f045c4c6140ced7 (diff)
downloadinvidious-4a6e920d0e4e2dd4b67006e203e5776893c3639b.tar.gz
invidious-4a6e920d0e4e2dd4b67006e203e5776893c3639b.tar.bz2
invidious-4a6e920d0e4e2dd4b67006e203e5776893c3639b.zip
Use new youtube API to fetch channel videos (#1355)
* Use new API to fetch videos from channels This mirrors the process used by subscriptions.gir.st. The old API is tried first, and if it fails then the new one is used. * Use the new API whenever getting videos from a channel I created the get_channel_videos_response function because now instead of just getting a single url, there are extra steps involved in getting the API response for channel videos, and these steps don't need to be repeated throughout the code. The only remaining exception is the bypass_captcha function, which still only makes a request to the old API. I don't know whether this code needs to be updated to use the new API for captcha bypassing to work correctly. * Correctly determine video length with new api * Remove unnecessary line
Diffstat (limited to 'src')
-rw-r--r--src/invidious/channels.cr64
-rw-r--r--src/invidious/helpers/helpers.cr298
2 files changed, 205 insertions, 157 deletions
diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr
index da062755..007aa06c 100644
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@@ -213,8 +213,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1
- url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
- response = YT_POOL.client &.get(url)
+ response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
videos = [] of SearchVideo
begin
@@ -291,8 +290,7 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
ids = [] of String
loop do
- url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
- response = YT_POOL.client &.get(url)
+ response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
raise "Could not extract JSON" if !initial_data
videos = extract_videos(initial_data.as_h, author, ucid)
@@ -396,7 +394,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
return items, continuation
end
-def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
object = {
"80226972:embedded" => {
"2:string" => ucid,
@@ -411,18 +409,33 @@ def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "
},
}
- if auto_generated
- seed = Time.unix(1525757349)
- until seed >= Time.utc
- seed += 1.month
- end
- timestamp = seed - (page - 1).months
+ if !v2
+ if auto_generated
+ seed = Time.unix(1525757349)
+ until seed >= Time.utc
+ seed += 1.month
+ end
+ timestamp = seed - (page - 1).months
- object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
- object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
+ object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0x36_i64
+ object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{timestamp.to_unix}"
+ else
+ object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
+ object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
+ end
else
object["80226972:embedded"]["3:base64"].as(Hash)["4:varint"] = 0_i64
- object["80226972:embedded"]["3:base64"].as(Hash)["15:string"] = "#{page}"
+
+ object["80226972:embedded"]["3:base64"].as(Hash)["61:string"] = Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
+ "1:embedded" => {
+ "1:varint" => 6307666885028338688_i64,
+ "2:embedded" => {
+ "1:string" => Base64.urlsafe_encode(Protodec::Any.from_json(Protodec::Any.cast_json({
+ "1:varint" => 30_i64 * (page - 1),
+ }))),
+ },
+ },
+ })))
end
case sort_by
@@ -901,12 +914,28 @@ def get_about_info(ucid, locale)
})
end
+def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
+ url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: false)
+ response = YT_POOL.client &.get(url)
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ return response if !initial_data
+ needs_v2 = initial_data
+ .try &.["response"]?.try &.["alerts"]?
+ .try &.as_a.any? { |alert|
+ alert.try &.["alertRenderer"]?.try &.["type"]?.try { |t| t == "ERROR" }
+ }
+ if needs_v2
+ url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
+ response = YT_POOL.client &.get(url)
+ end
+ response
+end
+
def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo
2.times do |i|
- url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
- response = YT_POOL.client &.get(url)
+ response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data
videos.concat extract_videos(initial_data.as_h, author, ucid)
@@ -916,8 +945,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end
def get_latest_videos(ucid)
- url = produce_channel_videos_url(ucid, 0)
- response = YT_POOL.client &.get(url)
+ response = get_channel_videos_response(ucid, 1)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return [] of SearchVideo if !initial_data
author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index 56f856c0..6571f818 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -164,148 +164,168 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
+def extract_item(item : JSON::Any, author_fallback : String? = nil, author_id_fallback : String? = nil)
+ if i = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
+ video_id = i["videoId"].as_s
+ title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
+
+ author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
+
+ published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
+ view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+ length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } ||
+ i["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?).try &.["thumbnailOverlayTimeStatusRenderer"]?
+ .try &.["text"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
+
+ live_now = false
+ paid = false
+ premium = false
+
+ premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
+
+ i["badges"]?.try &.as_a.each do |badge|
+ b = badge["metadataBadgeRenderer"]
+ case b["label"].as_s
+ when "LIVE NOW"
+ live_now = true
+ when "New", "4K", "CC"
+ # TODO
+ when "Premium"
+ paid = true
+
+ # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
+ premium = true
+ else nil # Ignore
+ end
+ end
+
+ SearchVideo.new({
+ title: title,
+ id: video_id,
+ author: author,
+ ucid: author_id,
+ published: published,
+ views: view_count,
+ description_html: description_html,
+ length_seconds: length_seconds,
+ live_now: live_now,
+ paid: paid,
+ premium: premium,
+ premiere_timestamp: premiere_timestamp,
+ })
+ elsif i = item["channelRenderer"]?
+ author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
+ author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
+
+ author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
+ subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
+
+ auto_generated = false
+ auto_generated = true if !i["videoCountText"]?
+ video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+ description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
+
+ SearchChannel.new({
+ author: author,
+ ucid: author_id,
+ author_thumbnail: author_thumbnail,
+ subscriber_count: subscriber_count,
+ video_count: video_count,
+ description_html: description_html,
+ auto_generated: auto_generated,
+ })
+ elsif i = item["gridPlaylistRenderer"]?
+ title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
+ plid = i["playlistId"]?.try &.as_s || ""
+
+ video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
+ playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
+
+ SearchPlaylist.new({
+ title: title,
+ id: plid,
+ author: author_fallback || "",
+ ucid: author_id_fallback || "",
+ video_count: video_count,
+ videos: [] of SearchPlaylistVideo,
+ thumbnail: playlist_thumbnail,
+ })
+ elsif i = item["playlistRenderer"]?
+ title = i["title"]["simpleText"]?.try &.as_s || ""
+ plid = i["playlistId"]?.try &.as_s || ""
+
+ video_count = i["videoCount"]?.try &.as_s.to_i || 0
+ playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
+
+ author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
+
+ videos = i["videos"]?.try &.as_a.map do |v|
+ v = v["childVideoRenderer"]
+ v_title = v["title"]["simpleText"]?.try &.as_s || ""
+ v_id = v["videoId"]?.try &.as_s || ""
+ v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
+ SearchPlaylistVideo.new({
+ title: v_title,
+ id: v_id,
+ length_seconds: v_length_seconds,
+ })
+ end || [] of SearchPlaylistVideo
+
+ # TODO: i["publishedTimeText"]?
+
+ SearchPlaylist.new({
+ title: title,
+ id: plid,
+ author: author,
+ ucid: author_id,
+ video_count: video_count,
+ videos: videos,
+ thumbnail: playlist_thumbnail,
+ })
+ elsif i = item["radioRenderer"]? # Mix
+ # TODO
+ elsif i = item["showRenderer"]? # Show
+ # TODO
+ elsif i = item["shelfRenderer"]?
+ elsif i = item["horizontalCardListRenderer"]?
+ elsif i = item["searchPyvRenderer"]? # Ad
+ end
+end
+
def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
- initial_data.try { |t| t["contents"]? || t["response"]? }
- .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
- t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
- t["continuationContents"]? }
- .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
- .try &.["contents"].as_a
- .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
- .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
- t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
- .each { |item|
- if i = item["videoRenderer"]?
- video_id = i["videoId"].as_s
- title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
-
- author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
- author = author_info.try &.["text"].as_s || author_fallback || ""
- author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
-
- published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
- view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
- description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
- length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0
-
- live_now = false
- paid = false
- premium = false
-
- premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) }
-
- i["badges"]?.try &.as_a.each do |badge|
- b = badge["metadataBadgeRenderer"]
- case b["label"].as_s
- when "LIVE NOW"
- live_now = true
- when "New", "4K", "CC"
- # TODO
- when "Premium"
- paid = true
-
- # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"]
- premium = true
- else nil # Ignore
- end
- end
-
- items << SearchVideo.new({
- title: title,
- id: video_id,
- author: author,
- ucid: author_id,
- published: published,
- views: view_count,
- description_html: description_html,
- length_seconds: length_seconds,
- live_now: live_now,
- paid: paid,
- premium: premium,
- premiere_timestamp: premiere_timestamp,
- })
- elsif i = item["channelRenderer"]?
- author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
- author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
-
- author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
- subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
-
- auto_generated = false
- auto_generated = true if !i["videoCountText"]?
- video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
- description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
-
- items << SearchChannel.new({
- author: author,
- ucid: author_id,
- author_thumbnail: author_thumbnail,
- subscriber_count: subscriber_count,
- video_count: video_count,
- description_html: description_html,
- auto_generated: auto_generated,
- })
- elsif i = item["gridPlaylistRenderer"]?
- title = i["title"]["runs"].as_a[0]?.try &.["text"].as_s || ""
- plid = i["playlistId"]?.try &.as_s || ""
-
- video_count = i["videoCountText"]["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0
- playlist_thumbnail = i["thumbnail"]["thumbnails"][0]?.try &.["url"]?.try &.as_s || ""
-
- items << SearchPlaylist.new({
- title: title,
- id: plid,
- author: author_fallback || "",
- ucid: author_id_fallback || "",
- video_count: video_count,
- videos: [] of SearchPlaylistVideo,
- thumbnail: playlist_thumbnail,
- })
- elsif i = item["playlistRenderer"]?
- title = i["title"]["simpleText"]?.try &.as_s || ""
- plid = i["playlistId"]?.try &.as_s || ""
-
- video_count = i["videoCount"]?.try &.as_s.to_i || 0
- playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
-
- author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
- author = author_info.try &.["text"].as_s || author_fallback || ""
- author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
-
- videos = i["videos"]?.try &.as_a.map do |v|
- v = v["childVideoRenderer"]
- v_title = v["title"]["simpleText"]?.try &.as_s || ""
- v_id = v["videoId"]?.try &.as_s || ""
- v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0
- SearchPlaylistVideo.new({
- title: v_title,
- id: v_id,
- length_seconds: v_length_seconds,
- })
- end || [] of SearchPlaylistVideo
-
- # TODO: i["publishedTimeText"]?
-
- items << SearchPlaylist.new({
- title: title,
- id: plid,
- author: author,
- ucid: author_id,
- video_count: video_count,
- videos: videos,
- thumbnail: playlist_thumbnail,
- })
- elsif i = item["radioRenderer"]? # Mix
- # TODO
- elsif i = item["showRenderer"]? # Show
- # TODO
- elsif i = item["shelfRenderer"]?
- elsif i = item["horizontalCardListRenderer"]?
- elsif i = item["searchPyvRenderer"]? # Ad
- end
- } }
+ channel_v2_response = initial_data
+ .try &.["response"]?
+ .try &.["continuationContents"]?
+ .try &.["gridContinuation"]?
+ .try &.["items"]?
+
+ if channel_v2_response
+ channel_v2_response.try &.as_a.each { |item|
+ extract_item(item, author_fallback, author_id_fallback)
+ .try { |t| items << t }
+ }
+ else
+ initial_data.try { |t| t["contents"]? || t["response"]? }
+ .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
+ t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
+ t["continuationContents"]? }
+ .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+ .try &.["contents"].as_a
+ .each { |c| c.try &.["itemSectionRenderer"]?.try &.["contents"].as_a
+ .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
+ t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
+ .each { |item|
+ extract_item(item, author_fallback, author_id_fallback)
+ .try { |t| items << t }
+ } }
+ end
items
end