summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/invidious/channels.cr100
-rw-r--r--src/invidious/helpers/helpers.cr326
-rw-r--r--src/invidious/search.cr3
-rw-r--r--src/invidious/users.cr2
4 files changed, 46 insertions, 385 deletions
diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr
index cbfa521d..e7bcf00e 100644
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@@ -216,30 +216,18 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
+ videos = [] of SearchVideo
begin
- json = JSON.parse(response.body)
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ raise "Could not extract JSON" if !initial_data
+ videos = extract_videos(initial_data.as_h, author, ucid)
rescue ex
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
response.body.includes?("https://www.google.com/sorry/index")
raise "Could not extract channel info. Instance is likely blocked."
end
-
- raise "Could not extract JSON"
- end
-
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-
- if auto_generated
- videos = extract_videos_html(nodeset)
- else
- videos = extract_videos_html(nodeset, ucid, author)
- end
end
- videos ||= [] of ChannelVideo
-
rss.xpath_nodes("//feed/entry").each do |entry|
video_id = entry.xpath_node("videoid").not_nil!.content
title = entry.xpath_node("title").not_nil!.content
@@ -305,24 +293,11 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
loop do
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated)
response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
-
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
- else
- break
- end
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ raise "Could not extract JSON" if !initial_data
+ videos = extract_videos(initial_data.as_h, author, ucid)
- nodeset = nodeset.not_nil!
-
- if auto_generated
- videos = extract_videos_html(nodeset)
- else
- videos = extract_videos_html(nodeset, ucid, author)
- end
-
- count = nodeset.size
+ count = videos.size
videos = videos.map { |video| ChannelVideo.new(
id: video.id,
title: video.title,
@@ -387,23 +362,11 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
url = produce_channel_playlists_url(ucid, continuation, sort_by, auto_generated)
response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
-
- if json["load_more_widget_html"].as_s.empty?
- continuation = nil
- else
- continuation = XML.parse_html(json["load_more_widget_html"].as_s)
- continuation = continuation.xpath_node(%q(//button[@data-uix-load-more-href]))
-
- if continuation
- continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
- end
- end
- html = XML.parse_html(json["content_html"].as_s)
- nodeset = html.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
+ continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
+ initial_data = JSON.parse(response.body).as_a.find(&.["response"]?).try &.as_h
else
- url = "/channel/#{ucid}/playlists?disable_polymer=1&flow=list&view=1"
+ url = "/channel/#{ucid}/playlists?flow=list&view=1"
case sort_by
when "last", "last_added"
@@ -416,21 +379,13 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by)
end
response = YT_POOL.client &.get(url)
- html = XML.parse_html(response.body)
-
- continuation = html.xpath_node(%q(//button[@data-uix-load-more-href]))
- if continuation
- continuation = extract_channel_playlists_cursor(continuation["data-uix-load-more-href"], auto_generated)
- end
-
- nodeset = html.xpath_nodes(%q(//ul[@id="browse-items-primary"]/li[contains(@class, "feed-item-container")]))
+ continuation = response.body.match(/"continuation":"(?<continuation>[^"]+)"/).try &.["continuation"]?
+ initial_data = extract_initial_data(response.body)
end
- if auto_generated
- items = extract_shelf_items(nodeset, ucid, author)
- else
- items = extract_items_html(nodeset, ucid, author)
- end
+ return [] of SearchItem, nil if !initial_data
+ items = extract_items(initial_data)
+ continuation = extract_channel_playlists_cursor(continuation, auto_generated) if continuation
return items, continuation
end
@@ -530,9 +485,8 @@ def produce_channel_playlists_url(ucid, cursor, sort = "newest", auto_generated
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
end
-def extract_channel_playlists_cursor(url, auto_generated)
- cursor = URI.parse(url).query_params
- .try { |i| URI.decode_www_form(i["continuation"]) }
+def extract_channel_playlists_cursor(cursor, auto_generated)
+ cursor = URI.decode_www_form(cursor)
.try { |i| Base64.decode(i) }
.try { |i| IO::Memory.new(i) }
.try { |i| Protodec::Any.parse(i) }
@@ -949,25 +903,19 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
break if !initial_data
- videos.concat extract_videos(initial_data.as_h)
+ videos.concat extract_videos(initial_data.as_h, author, ucid)
end
return videos.size, videos
end
def get_latest_videos(ucid)
- videos = [] of SearchVideo
-
url = produce_channel_videos_url(ucid, 0)
response = YT_POOL.client &.get(url)
- json = JSON.parse(response.body)
-
- if json["content_html"]? && !json["content_html"].as_s.empty?
- document = XML.parse_html(json["content_html"].as_s)
- nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
-
- videos = extract_videos_html(nodeset, ucid)
- end
+ initial_data = JSON.parse(response.body).as_a.find &.["response"]?
+ return [] of SearchVideo if !initial_data
+ author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
+ items = extract_videos(initial_data.as_h, author, ucid)
- return videos
+ return items
end
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index 7a251052..aaec19c5 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -313,32 +313,30 @@ def html_to_content(description_html : String)
return description
end
-def extract_videos(initial_data : Hash(String, JSON::Any))
- extract_items(initial_data).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
+def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
+ extract_items(initial_data, author_fallback, author_id_fallback).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
end
-def extract_items(initial_data : Hash(String, JSON::Any))
+def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
items = [] of SearchItem
- initial_data.try { |t|
- t["contents"]? || t["response"]?
- }.try { |t|
- t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a[0]?.try &.["tabRenderer"]["content"] ||
+ initial_data.try { |t| t["contents"]? || t["response"]? }
+ .try { |t| t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a.select(&.["tabRenderer"]?.try &.["selected"].as_bool)[0]?.try &.["tabRenderer"]["content"] ||
t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] ||
- t["continuationContents"]?
- }.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
- .try &.["contents"]
- .as_a.each { |c|
- c.try &.["itemSectionRenderer"]["contents"].as_a
- .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || t }
+ t["continuationContents"]? }
+ .try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? }
+ .try &.["contents"].as_a
+ .each { |c| c.try &.["itemSectionRenderer"]["contents"].as_a
+ .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a ||
+ t[0]?.try &.["gridRenderer"]?.try &.["items"].as_a || t }
.each { |item|
if i = item["videoRenderer"]?
video_id = i["videoId"].as_s
title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || ""
author_info = i["ownerText"]?.try &.["runs"].as_a[0]?
- author = author_info.try &.["text"].as_s || ""
- author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local
view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
@@ -382,8 +380,8 @@ def extract_items(initial_data : Hash(String, JSON::Any))
premiere_timestamp: premiere_timestamp
)
elsif i = item["channelRenderer"]?
- author = i["title"]["simpleText"]?.try &.as_s || ""
- author_id = i["channelId"]?.try &.as_s || ""
+ author = i["title"]["simpleText"]?.try &.as_s || author_fallback || ""
+ author_id = i["channelId"]?.try &.as_s || author_id_fallback || ""
author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || ""
subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0
@@ -409,9 +407,9 @@ def extract_items(initial_data : Hash(String, JSON::Any))
video_count = i["videoCount"]?.try &.as_s.to_i || 0
playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || ""
- author_info = i["shortBylineText"]["runs"].as_a[0]?
- author = author_info.try &.["text"].as_s || ""
- author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || ""
+ author_info = i["shortBylineText"]?.try &.["runs"].as_a[0]?
+ author = author_info.try &.["text"].as_s || author_fallback || ""
+ author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || author_id_fallback || ""
videos = i["videos"]?.try &.as_a.map do |v|
v = v["childVideoRenderer"]
@@ -444,297 +442,11 @@ def extract_items(initial_data : Hash(String, JSON::Any))
elsif i = item["horizontalCardListRenderer"]?
elsif i = item["searchPyvRenderer"]? # Ad
end
- }
- }
+ } }
items
end
-def extract_videos_html(nodeset, ucid = nil, author_name = nil)
- extract_items_html(nodeset, ucid, author_name).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo))
-end
-
-def extract_items_html(nodeset, ucid = nil, author_name = nil)
- # TODO: Make this a 'CommonItem', so it makes more sense to be used here
- items = [] of SearchItem
-
- nodeset.each do |node|
- anchor = node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if !anchor
- next
- end
- title = anchor.content.strip
- id = anchor["href"]
-
- if anchor["href"].starts_with? "https://www.googleadservices.com"
- next
- end
-
- author_id = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.["href"].split("/")[-1] || ucid || ""
- author = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).try &.content.strip || author_name || ""
- description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")])).try &.to_s || ""
-
- tile = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-tile")]))
- if !tile
- next
- end
-
- case tile["class"]
- when .includes? "yt-lockup-playlist"
- plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
-
- anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-meta")]/a))
-
- if !anchor
- anchor = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/a))
- end
-
- video_count = node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
- node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
- if video_count
- video_count = video_count.content
-
- if video_count == "50+"
- author = "YouTube"
- author_id = "UC-9-kyTW8ZkZNDHQJ6FgpwQ"
- end
-
- video_count = video_count.gsub(/\D/, "").to_i?
- end
- video_count ||= 0
-
- videos = [] of SearchPlaylistVideo
- node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
- anchor = video.xpath_node(%q(.//a))
- if anchor
- video_title = anchor.content.strip
- id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- id ||= ""
-
- anchor = video.xpath_node(%q(.//span/span))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- id,
- length_seconds
- )
- end
-
- playlist_thumbnail = node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
- playlist_thumbnail ||= node.xpath_node(%q(.//span/img)).try &.["src"]
-
- items << SearchPlaylist.new(
- title: title,
- id: plid,
- author: author,
- ucid: author_id,
- video_count: video_count,
- videos: videos,
- thumbnail: playlist_thumbnail
- )
- when .includes? "yt-lockup-channel"
- author = title.strip
-
- ucid = node.xpath_node(%q(.//button[contains(@class, "yt-uix-subscription-button")])).try &.["data-channel-external-id"]?
- ucid ||= id.split("/")[-1]
-
- author_thumbnail = node.xpath_node(%q(.//div/span/img)).try &.["data-thumb"]?
- author_thumbnail ||= node.xpath_node(%q(.//div/span/img)).try &.["src"]
- if author_thumbnail
- author_thumbnail = URI.parse(author_thumbnail)
- author_thumbnail.scheme = "https"
- author_thumbnail = author_thumbnail.to_s
- end
-
- author_thumbnail ||= ""
-
- subscriber_count = node.xpath_node(%q(.//span[contains(@class, "subscriber-count")]))
- .try &.["title"].try { |text| short_text_to_number(text) } || 0
-
- video_count = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li)).try &.content.split(" ")[0].gsub(/\D/, "").to_i?
-
- items << SearchChannel.new(
- author: author,
- ucid: ucid,
- author_thumbnail: author_thumbnail,
- subscriber_count: subscriber_count,
- video_count: video_count || 0,
- description_html: description_html,
- auto_generated: video_count ? false : true,
- )
- else
- id = id.lchop("/watch?v=")
-
- metadata = node.xpath_node(%q(.//div[contains(@class,"yt-lockup-meta")]/ul))
-
- published = metadata.try &.xpath_node(%q(.//li[contains(text(), " ago")])).try { |node| decode_date(node.content.sub(/^[a-zA-Z]+ /, "")) }
- published ||= metadata.try &.xpath_node(%q(.//span[@data-timestamp])).try { |node| Time.unix(node["data-timestamp"].to_i64) }
- published ||= Time.utc
-
- view_count = metadata.try &.xpath_node(%q(.//li[contains(text(), " views")])).try &.content.gsub(/\D/, "").to_i64?
- view_count ||= 0_i64
-
- length_seconds = node.xpath_node(%q(.//span[@class="video-time"])).try { |node| decode_length_seconds(node.content) }
- length_seconds ||= -1
-
- live_now = node.xpath_node(%q(.//span[contains(@class, "yt-badge-live")])) ? true : false
- premium = node.xpath_node(%q(.//span[text()="Premium"])) ? true : false
-
- if !premium || node.xpath_node(%q(.//span[contains(text(), "Free episode")]))
- paid = false
- else
- paid = true
- end
-
- premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64?
- if premiere_timestamp
- premiere_timestamp = Time.unix(premiere_timestamp)
- end
-
- items << SearchVideo.new(
- title: title,
- id: id,
- author: author,
- ucid: author_id,
- published: published,
- views: view_count,
- description_html: description_html,
- length_seconds: length_seconds,
- live_now: live_now,
- paid: paid,
- premium: premium,
- premiere_timestamp: premiere_timestamp
- )
- end
- end
-
- return items
-end
-
-def extract_shelf_items(nodeset, ucid = nil, author_name = nil)
- items = [] of SearchPlaylist
-
- nodeset.each do |shelf|
- shelf_anchor = shelf.xpath_node(%q(.//h2[contains(@class, "branded-page-module-title")]))
- next if !shelf_anchor
-
- title = shelf_anchor.xpath_node(%q(.//span[contains(@class, "branded-page-module-title-text")])).try &.content.strip
- title ||= ""
-
- id = shelf_anchor.xpath_node(%q(.//a)).try &.["href"]
- next if !id
-
- shelf_is_playlist = false
- videos = [] of SearchPlaylistVideo
-
- shelf.xpath_nodes(%q(.//ul[contains(@class, "yt-uix-shelfslider-list") or contains(@class, "expanded-shelf-content-list")]/li)).each do |child_node|
- type = child_node.xpath_node(%q(./div))
- next if !type
-
- case type["class"]
- when .includes? "yt-lockup-video"
- shelf_is_playlist = true
-
- anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if anchor
- video_title = anchor.content.strip
- video_id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- video_id ||= ""
-
- anchor = child_node.xpath_node(%q(.//span[@class="video-time"]))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- video_id,
- length_seconds
- )
- when .includes? "yt-lockup-playlist"
- anchor = child_node.xpath_node(%q(.//h3[contains(@class, "yt-lockup-title")]/a))
- if anchor
- playlist_title = anchor.content.strip
- params = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)
- plid = params["list"]
- end
- playlist_title ||= ""
- plid ||= ""
-
- playlist_thumbnail = child_node.xpath_node(%q(.//span/img)).try &.["data-thumb"]?
- playlist_thumbnail ||= child_node.xpath_node(%q(.//span/img)).try &.["src"]
-
- video_count = child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]/b)) ||
- child_node.xpath_node(%q(.//span[@class="formatted-video-count-label"]))
- if video_count
- video_count = video_count.content.gsub(/\D/, "").to_i?
- end
- video_count ||= 50
-
- videos = [] of SearchPlaylistVideo
- child_node.xpath_nodes(%q(.//*[contains(@class, "yt-lockup-playlist-items")]/li)).each do |video|
- anchor = video.xpath_node(%q(.//a))
- if anchor
- video_title = anchor.content.strip
- id = HTTP::Params.parse(URI.parse(anchor["href"]).query.not_nil!)["v"]
- end
- video_title ||= ""
- id ||= ""
-
- anchor = video.xpath_node(%q(.//span/span))
- if anchor
- length_seconds = decode_length_seconds(anchor.content)
- end
- length_seconds ||= 0
-
- videos << SearchPlaylistVideo.new(
- video_title,
- id,
- length_seconds
- )
- end
-
- items << SearchPlaylist.new(
- title: playlist_title,
- id: plid,
- author: author_name,
- ucid: ucid,
- video_count: video_count,
- videos: videos,
- thumbnail: playlist_thumbnail
- )
- else
- next # Skip
- end
- end
-
- if shelf_is_playlist
- plid = HTTP::Params.parse(URI.parse(id).query.not_nil!)["list"]
-
- items << SearchPlaylist.new(
- title: title,
- id: plid,
- author: author_name,
- ucid: ucid,
- video_count: videos.size,
- videos: videos,
- thumbnail: "https://i.ytimg.com/vi/#{videos[0].id}/mqdefault.jpg"
- )
- end
- end
-
- return items
-end
-
def check_enum(db, logger, enum_name, struct_type = nil)
return # TODO
if !db.query_one?("SELECT true FROM pg_type WHERE typname = $1", enum_name, as: Bool)
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index b4bd6226..92baed0b 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -243,7 +243,8 @@ def channel_search(query, page, channel)
response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find &.["response"]?
return 0, [] of SearchItem if !initial_data
- items = extract_items(initial_data.as_h)
+ author = initial_data["response"]?.try &.["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
+ items = extract_items(initial_data.as_h, author, ucid)
return items.size, items
end
diff --git a/src/invidious/users.cr b/src/invidious/users.cr
index ba15692c..f3cfafa3 100644
--- a/src/invidious/users.cr
+++ b/src/invidious/users.cr
@@ -286,7 +286,7 @@ end
# headers = HTTP::Headers.new
# headers["Cookie"] = env_headers["Cookie"]
#
-# html = YT_POOL.client &.get("/view_all_playlists?disable_polymer=1", headers)
+# html = YT_POOL.client &.get("/view_all_playlists", headers)
#
# cookies = HTTP::Cookies.from_headers(headers)
# html.cookies.each do |cookie|