summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/invidious/helpers/extractors.cr77
1 files changed, 67 insertions, 10 deletions
diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 850c93ec..c8a6cd4a 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -74,7 +74,15 @@ private module Parsers
if length_container = item_contents["lengthText"]?
length_seconds = decode_length_seconds(length_container["simpleText"].as_s)
elsif length_container = item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?)
- length_seconds = extract_text(length_container["thumbnailOverlayTimeStatusRenderer"]["text"]).try { |t| decode_length_seconds(t) } || 0
+ # This needs to only go down the `simpleText` path (if possible). If more situations came up that requires
+ # a specific pathway then we should add an argument to extract_text that'll make this possible
+ length_seconds = length_container.dig?("thumbnailOverlayTimeStatusRenderer", "text", "simpleText")
+
+ if length_seconds
+ length_seconds = decode_length_seconds(length_seconds.as_s)
+ else
+ length_seconds = 0
+ end
else
length_seconds = 0
end
@@ -113,6 +121,10 @@ private module Parsers
premiere_timestamp: premiere_timestamp,
})
end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
end
# Parses a InnerTube channelRenderer into a SearchChannel. Returns nil when the given object isn't a channelRenderer
@@ -159,6 +171,10 @@ private module Parsers
auto_generated: auto_generated,
})
end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
end
# Parses a InnerTube gridPlaylistRenderer into a SearchPlaylist. Returns nil when the given object isn't a gridPlaylistRenderer
@@ -194,6 +210,10 @@ private module Parsers
thumbnail: playlist_thumbnail,
})
end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
end
# Parses a InnerTube playlistRenderer into a SearchPlaylist. Returns nil when the given object isn't a playlistRenderer
@@ -207,20 +227,20 @@ private module Parsers
module PlaylistRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["playlistRenderer"]?
- return self.parse(item_contents)
+ return self.parse(item_contents, author_fallback)
end
end
- private def self.parse(item_contents)
+ private def self.parse(item_contents, author_fallback)
title = item_contents["title"]["simpleText"]?.try &.as_s || ""
plid = item_contents["playlistId"]?.try &.as_s || ""
video_count = HelperExtractors.get_video_count(item_contents)
playlist_thumbnail = HelperExtractors.get_thumbnails_plural(item_contents)
- author_info = item_contents.dig("shortBylineText", "runs", 0)
- author = author_info["text"].as_s
- author_id = HelperExtractors.get_browse_id(author_info)
+ author_info = item_contents.dig?("shortBylineText", "runs", 0)
+ author = author_info.try &.["text"].as_s || author_fallback.name
+ author_id = author_info.try { |x| HelperExtractors.get_browse_id(x) } || author_fallback.id
videos = item_contents["videos"]?.try &.as_a.map do |v|
v = v["childVideoRenderer"]
@@ -246,6 +266,10 @@ private module Parsers
thumbnail: playlist_thumbnail,
})
end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
end
# Parses a InnerTube shelfRenderer into a Category. Returns nil when the given object isn't a shelfRenderer
@@ -283,11 +307,17 @@ private module Parsers
# Content parsing
contents = [] of SearchItem
- # Content could be in three locations.
- if content_container = item_contents["content"]["horizontalListRenderer"]?
- elsif content_container = item_contents["content"]["expandedShelfContentsRenderer"]?
- elsif content_container = item_contents["content"]["verticalListRenderer"]?
+ # InnerTube recognizes some "special" categories, which are organized differently.
+ if special_category_container = item_contents["content"]?
+ if content_container = special_category_container["horizontalListRenderer"]?
+ elsif content_container = special_category_container["expandedShelfContentsRenderer"]?
+ elsif content_container = special_category_container["verticalListRenderer"]?
+ else
+ # Anything else, such as `horizontalMovieListRenderer` is currently unsupported.
+ return
+ end
else
+ # "Normal" category.
content_container = item_contents["contents"]
end
@@ -307,6 +337,10 @@ private module Parsers
badges: badges,
})
end
+
+ def self.parser_name
+ return {{@type.name}}
+ end
end
end
@@ -372,6 +406,10 @@ private module Extractors
return raw_items
end
+
+ def self.extractor_name
+ return {{@type.name}}
+ end
end
# Extracts items from the InnerTube response for search results
@@ -409,6 +447,10 @@ private module Extractors
return raw_items.flatten
end
+
+ def self.extractor_name
+ return {{@type.name}}
+ end
end
# Extracts continuation items from a InnerTube response
@@ -440,6 +482,10 @@ private module Extractors
return raw_items
end
+
+ def self.extractor_name
+ return {{@type.name}}
+ end
end
end
@@ -529,8 +575,14 @@ def extract_item(item : JSON::Any, author_fallback : String? = "",
# Each parser automatically validates the data given to see if the data is
# applicable to itself. If not nil is returned and the next parser is attemped.
ITEM_PARSERS.each do |parser|
+ LOGGER.trace("extract_item: Attempting to parse item using \"#{parser.parser_name}\" (cycling...)")
+
if result = parser.process(item, author_fallback)
+ LOGGER.debug("extract_item: Successfully parsed via #{parser.parser_name}")
+
return result
+ else
+ LOGGER.trace("extract_item: Parser \"#{parser.parser_name}\" does not apply. Cycling to the next one...")
end
end
end
@@ -550,7 +602,10 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
# This is identical to the parser cycling of extract_item().
ITEM_CONTAINER_EXTRACTOR.each do |extractor|
+ LOGGER.trace("extract_items: Attempting to extract item container using \"#{extractor.extractor_name}\" (cycling...)")
+
if container = extractor.process(unpackaged_data)
+ LOGGER.debug("extract_items: Successfully unpacked container with \"#{extractor.extractor_name}\"")
# Extract items in container
container.each do |item|
if parsed_result = extract_item(item, author_fallback, author_id_fallback)
@@ -559,6 +614,8 @@ def extract_items(initial_data : Hash(String, JSON::Any), author_fallback : Stri
end
break
+ else
+ LOGGER.trace("extract_items: Extractor \"#{extractor.extractor_name}\" does not apply. Cycling to the next one...")
end
end