summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamantaz Fox <coding@samantaz.fr>2022-11-05 18:56:35 +0100
committerSamantaz Fox <coding@samantaz.fr>2022-12-22 16:13:26 +0100
commitce7db8d2cb87111af15de2de9faf12aae38283bb (patch)
tree324cd74d5c1e0142bcbfbcc80bfff9653307783a /src
parentbdc51cd20fd2df99c2fe5ddc281aada86000a783 (diff)
downloadinvidious-ce7db8d2cb87111af15de2de9faf12aae38283bb.tar.gz
invidious-ce7db8d2cb87111af15de2de9faf12aae38283bb.tar.bz2
invidious-ce7db8d2cb87111af15de2de9faf12aae38283bb.zip
extractors: Add continuation token parser
Diffstat (limited to 'src')
-rw-r--r--src/invidious/channels/playlists.cr16
-rw-r--r--src/invidious/hashtag.cr3
-rw-r--r--src/invidious/helpers/serialized_yt_data.cr7
-rw-r--r--src/invidious/search/processors.cr14
-rw-r--r--src/invidious/yt_backend/extractors.cr54
-rw-r--r--src/invidious/yt_backend/extractors_utils.cr27
6 files changed, 61 insertions, 60 deletions
diff --git a/src/invidious/channels/playlists.cr b/src/invidious/channels/playlists.cr
index e6c0a1d5..0d46499a 100644
--- a/src/invidious/channels/playlists.cr
+++ b/src/invidious/channels/playlists.cr
@@ -1,18 +1,7 @@
def fetch_channel_playlists(ucid, author, continuation, sort_by)
if continuation
response_json = YoutubeAPI.browse(continuation)
- continuation_items = response_json["onResponseReceivedActions"]?
- .try &.[0]["appendContinuationItemsAction"]["continuationItems"]
-
- return [] of SearchItem, nil if !continuation_items
-
- items = [] of SearchItem
- continuation_items.as_a.select(&.as_h.has_key?("gridPlaylistRenderer")).each { |item|
- parse_item(item, author, ucid).try { |t| items << t }
- }
-
- continuation = continuation_items.as_a.last["continuationItemRenderer"]?
- .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
+ items, continuation = extract_items(response_json, author, ucid)
else
url = "/channel/#{ucid}/playlists?flow=list&view=1"
@@ -30,8 +19,7 @@ def fetch_channel_playlists(ucid, author, continuation, sort_by)
initial_data = extract_initial_data(response.body)
return [] of SearchItem, nil if !initial_data
- items = extract_items(initial_data, author, ucid)
- continuation = response.body.match(/"token":"(?<continuation>[^"]+)"/).try &.["continuation"]?
+ items, continuation = extract_items(initial_data, author, ucid)
end
return items, continuation
diff --git a/src/invidious/hashtag.cr b/src/invidious/hashtag.cr
index afe31a36..bc329205 100644
--- a/src/invidious/hashtag.cr
+++ b/src/invidious/hashtag.cr
@@ -8,7 +8,8 @@ module Invidious::Hashtag
client_config = YoutubeAPI::ClientConfig.new(region: region)
response = YoutubeAPI.browse(continuation: ctoken, client_config: client_config)
- return extract_items(response)
+ items, _ = extract_items(response)
+ return items
end
def generate_continuation(hashtag : String, cursor : Int)
diff --git a/src/invidious/helpers/serialized_yt_data.cr b/src/invidious/helpers/serialized_yt_data.cr
index c52e2a0d..635f0984 100644
--- a/src/invidious/helpers/serialized_yt_data.cr
+++ b/src/invidious/helpers/serialized_yt_data.cr
@@ -265,4 +265,11 @@ class Category
end
end
+struct Continuation
+ getter token
+
+ def initialize(@token : String)
+ end
+end
+
alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist | Category
diff --git a/src/invidious/search/processors.cr b/src/invidious/search/processors.cr
index 683a4a7e..7e909590 100644
--- a/src/invidious/search/processors.cr
+++ b/src/invidious/search/processors.cr
@@ -9,7 +9,8 @@ module Invidious::Search
client_config = YoutubeAPI::ClientConfig.new(region: query.region)
initial_data = YoutubeAPI.search(query.text, search_params, client_config: client_config)
- return extract_items(initial_data)
+ items, _ = extract_items(initial_data)
+ return items
end
# Search a youtube channel
@@ -30,16 +31,7 @@ module Invidious::Search
continuation = produce_channel_search_continuation(ucid, query.text, query.page)
response_json = YoutubeAPI.browse(continuation)
- continuation_items = response_json["onResponseReceivedActions"]?
- .try &.[0]["appendContinuationItemsAction"]["continuationItems"]
-
- return [] of SearchItem if !continuation_items
-
- items = [] of SearchItem
- continuation_items.as_a.select(&.as_h.has_key?("itemSectionRenderer")).each do |item|
- parse_item(item["itemSectionRenderer"]["contents"].as_a[0]).try { |t| items << t }
- end
-
+ items, _ = extract_items(response_json, "", ucid)
return items
end
diff --git a/src/invidious/yt_backend/extractors.cr b/src/invidious/yt_backend/extractors.cr
index a4b20d04..baf52118 100644
--- a/src/invidious/yt_backend/extractors.cr
+++ b/src/invidious/yt_backend/extractors.cr
@@ -7,7 +7,7 @@ require "../helpers/serialized_yt_data"
private ITEM_CONTAINER_EXTRACTOR = {
Extractors::YouTubeTabs,
Extractors::SearchResults,
- Extractors::Continuation,
+ Extractors::ContinuationContent,
}
private ITEM_PARSERS = {
@@ -18,6 +18,7 @@ private ITEM_PARSERS = {
Parsers::CategoryRendererParser,
Parsers::RichItemRendererParser,
Parsers::ReelItemRendererParser,
+ Parsers::ContinuationItemRendererParser,
}
private alias InitialData = Hash(String, JSON::Any)
@@ -347,14 +348,9 @@ private module Parsers
content_container = item_contents["contents"]
end
- raw_contents = content_container["items"]?.try &.as_a
- if !raw_contents.nil?
- raw_contents.each do |item|
- result = parse_item(item)
- if !result.nil?
- contents << result
- end
- end
+ content_container["items"]?.try &.as_a.each do |item|
+ result = parse_item(item, author_fallback.name, author_fallback.id)
+ contents << result if result.is_a?(SearchItem)
end
Category.new({
@@ -477,6 +473,35 @@ private module Parsers
return {{@type.name}}
end
end
+
+ # Parses an InnerTube continuationItemRenderer into a Continuation.
+ # Returns nil when the given object isn't a continuationItemRenderer.
+ #
+ # continuationItemRenderer contains various metadata ued to load more
+ # content (i.e when the user scrolls down). The interesting bit is the
+ # protobuf object known as the "continutation token". Previously, those
+ # were generated from sratch, but recent (as of 11/2022) Youtube changes
+ # are forcing us to extract them from replies.
+ #
+ module ContinuationItemRendererParser
+ def self.process(item : JSON::Any, author_fallback : AuthorFallback)
+ if item_contents = item["continuationItemRenderer"]?
+ return self.parse(item_contents)
+ end
+ end
+
+ private def self.parse(item_contents)
+ token = item_contents
+ .dig?("continuationEndpoint", "continuationCommand", "token")
+ .try &.as_s
+
+ return Continuation.new(token) if token
+ end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
+ end
end
# The following are the extractors for extracting an array of items from
@@ -746,13 +771,18 @@ def extract_items(
initial_data : InitialData,
author_fallback : String? = nil,
author_id_fallback : String? = nil
-) : Array(SearchItem)
+) : {Array(SearchItem), String?}
items = [] of SearchItem
+ continuation = nil
extract_items(initial_data) do |item|
parsed = parse_item(item, author_fallback, author_id_fallback)
- items << parsed if !parsed.nil?
+
+ case parsed
+ when .is_a?(Continuation) then continuation = parsed.token
+ when .is_a?(SearchItem) then items << parsed
+ end
end
- return items
+ return items, continuation
end
diff --git a/src/invidious/yt_backend/extractors_utils.cr b/src/invidious/yt_backend/extractors_utils.cr
index f8245160..0cb3c079 100644
--- a/src/invidious/yt_backend/extractors_utils.cr
+++ b/src/invidious/yt_backend/extractors_utils.cr
@@ -68,10 +68,10 @@ rescue ex
return false
end
-def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil)
- extracted = extract_items(initial_data, author_fallback, author_id_fallback)
+def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : String? = nil, author_id_fallback : String? = nil) : Array(SearchVideo)
+ extracted, _ = extract_items(initial_data, author_fallback, author_id_fallback)
- target = [] of SearchItem
+ target = [] of (SearchItem | Continuation)
extracted.each do |i|
if i.is_a?(Category)
i.contents.each { |cate_i| target << cate_i if !cate_i.is_a? Video }
@@ -79,28 +79,11 @@ def extract_videos(initial_data : Hash(String, JSON::Any), author_fallback : Str
target << i
end
end
- return target.select(SearchVideo).map(&.as(SearchVideo))
+
+ return target.select(SearchVideo)
end
def extract_selected_tab(tabs)
# Extract the selected tab from the array of tabs Youtube returns
return selected_target = tabs.as_a.select(&.["tabRenderer"]?.try &.["selected"]?.try &.as_bool)[0]["tabRenderer"]
end
-
-def fetch_continuation_token(items : Array(JSON::Any))
- # Fetches the continuation token from an array of items
- return items.last["continuationItemRenderer"]?
- .try &.["continuationEndpoint"]["continuationCommand"]["token"].as_s
-end
-
-def fetch_continuation_token(initial_data : Hash(String, JSON::Any))
- # Fetches the continuation token from initial data
- if initial_data["onResponseReceivedActions"]?
- continuation_items = initial_data["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
- else
- tab = extract_selected_tab(initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"])
- continuation_items = tab["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["gridRenderer"]["items"]
- end
-
- return fetch_continuation_token(continuation_items.as_a)
-end