summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsyeopite <syeopite@syeopite.dev>2021-08-05 20:31:48 -0700
committersyeopite <syeopite@syeopite.dev>2021-09-24 21:07:07 -0700
commit092b8a4e5220cbe7e6eed45d1c331d5596dc68bc (patch)
tree96bf070c59cc7455fd5c2c801a8934d367e38d55
parente5f07dedbf92459a237165f359d7565e638d4ffa (diff)
downloadinvidious-092b8a4e5220cbe7e6eed45d1c331d5596dc68bc.tar.gz
invidious-092b8a4e5220cbe7e6eed45d1c331d5596dc68bc.tar.bz2
invidious-092b8a4e5220cbe7e6eed45d1c331d5596dc68bc.zip
Add documentation to extractors.cr
-rw-r--r--src/invidious/helpers/extractors.cr122
1 files changed, 115 insertions, 7 deletions
diff --git a/src/invidious/helpers/extractors.cr b/src/invidious/helpers/extractors.cr
index 3a90f017..32134cc9 100644
--- a/src/invidious/helpers/extractors.cr
+++ b/src/invidious/helpers/extractors.cr
@@ -18,11 +18,22 @@ private ITEM_PARSERS = {
record AuthorFallback, name : String? = nil, id : String? = nil
-# The following are the parsers for parsing raw item data into neatly packaged structs.
-# They're accessed through the process() method which validates the given data as applicable
-# to their specific struct and then use the internal parse() method to assemble the struct
-# specific to their category.
+# Namespace for logic relating to parsing InnerTube data into various datastructs.
+#
+# Each of the parsers in this namespace are accessed through the #process() method
+# which validates the given data as applicable to itself. If it is applicable the given
+# data is passed to the private `#parse()` method which returns a datastruct of the given
+# type. Otherwise, nil is returned.
private module Parsers
+ # Parses a InnerTube videoRenderer into a SearchVideo. Returns nil when the given object isn't a videoRenderer
+ #
+ # A videoRenderer renders a video to click on within the YouTube and Invidious UI. It is **not**
+ # the watchable video itself.
+ #
+ # See specs for example.
+ #
+ # `videoRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+ #
module VideoRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = (item["videoRenderer"]? || item["gridVideoRenderer"]?)
@@ -104,6 +115,15 @@ private module Parsers
end
end
+ # Parses a InnerTube channelRenderer into a SearchChannel. Returns nil when the given object isn't a channelRenderer
+ #
+ # A channelRenderer renders a channel to click on within the YouTube and Invidious UI. It is **not**
+ # the channel page itself.
+ #
+ # See specs for example.
+ #
+ # `channelRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+ #
module ChannelRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = (item["channelRenderer"]? || item["gridChannelRenderer"]?)
@@ -139,6 +159,15 @@ private module Parsers
end
end
+ # Parses a InnerTube gridPlaylistRenderer into a SearchPlaylist. Returns nil when the given object isn't a gridPlaylistRenderer
+ #
+ # A gridPlaylistRenderer renders a playlist, that is located in a grid, to click on within the YouTube and Invidious UI.
+ # It is **not** the playlist itself.
+ #
+ # See specs for example.
+ #
+ # `gridPlaylistRenderer`s can be found on the playlist-tabs of channels and expanded categories.
+ #
module GridPlaylistRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["gridPlaylistRenderer"]?
@@ -165,6 +194,14 @@ private module Parsers
end
end
+ # Parses a InnerTube playlistRenderer into a SearchPlaylist. Returns nil when the given object isn't a playlistRenderer
+ #
+ # A playlistRenderer renders a playlist to click on within the YouTube and Invidious UI. It is **not** the playlist itself.
+ #
+ # See specs for example.
+ #
+ # `playlistRenderer`s can be found almost everywhere on YouTube. In categories, search results, recommended, etc.
+ #
module PlaylistRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["playlistRenderer"]?
@@ -209,6 +246,16 @@ private module Parsers
end
end
+ # Parses a InnerTube shelfRenderer into a Category. Returns nil when the given object isn't a shelfRenderer
+ #
+ # A shelfRenderer renders divided sections on YouTube. IE "People also watched" in search results and
+ # the various organizational sections in the channel home page. A separate one (richShelfRenderer) is used
+ # for YouTube home. A shelfRenderer can also sometimes be expanded to show more content within it.
+ #
+ # See specs for example.
+ #
+ # `shelfRenderer`s can be found almost everywhere on YouTube. In categories, search results, channels, etc.
+ #
module CategoryRendererParser
def self.process(item : JSON::Any, author_fallback : AuthorFallback)
if item_contents = item["shelfRenderer"]?
@@ -264,7 +311,34 @@ end
# the internal Youtube API's JSON response. The result is then packaged into
# a structure we can more easily use via the parsers above. Their internals are
# identical to the item parsers.
+
+# Namespace for logic relating to extracting InnerTube's initial response to items we can parse.
+#
+# Each of the extractors in this namespace are accessed through the #process() method
+# which validates the given data as applicable to itself. If it is applicable the given
+# data is passed to the private `#extract()` method which returns an array of
+# parsable items. Otherwise, nil is returned.
+#
+# NOTE perhaps the result from here should be abstracted into a struct in order to
+# get additional metadata regarding the container of the item(s).
private module Extractors
+ # Extracts items from the selected YouTube tab.
+ #
+ # YouTube tabs are typically stored under "twoColumnBrowseResultsRenderer"
+ # and is structured like this:
+ #
+ # "twoColumnBrowseResultsRenderer": {
+ # {"tabs": [
+ # {"tabRenderer": {
+ # "endpoint": {...}
+ # "title": "Playlists",
+ # "selected": true,
+ # "content": {...},
+ # ...
+ # }}
+ # ]}
+ # }]
+ #
module YouTubeTabs
def self.process(initial_data : Hash(String, JSON::Any))
if target = initial_data["twoColumnBrowseResultsRenderer"]?
@@ -297,6 +371,23 @@ private module Extractors
end
end
+ # Extracts items from the InnerTube response for search results
+ #
+ # Search results are typically stored under "twoColumnSearchResultsRenderer"
+ # and is structured like this:
+ #
+ # "twoColumnSearchResultsRenderer": {
+ # {"primaryContents": {
+ # {"sectionListRenderer": {
+ # "contents": [...],
+ # ...,
+ # "subMenu": {...},
+ # "hideBottomSeparator": true,
+ # "targetId": "search-feed"
+ # }}
+ # }}
+ # }
+ #
module SearchResults
def self.process(initial_data : Hash(String, JSON::Any))
if target = initial_data["twoColumnSearchResultsRenderer"]?
@@ -317,6 +408,16 @@ private module Extractors
end
end
+ # Extracts continuation items from a InnerTube response
+ #
+ # Continuation items (on YouTube) are items which are appended to the
+ # end of the page for continuous scrolling. As such, in many cases,
+ # the items are lacking information such as author or category title,
+ # since the original results has already rendered them on the top of the page.
+ #
+ # The way they are structured is too varied to be accurately written down here.
+ # However, they all eventually lead to an array of parsable items after traversing
+ # through the JSON structure.
module Continuation
def self.process(initial_data : Hash(String, JSON::Any))
if target = initial_data["continuationContents"]?
@@ -339,7 +440,10 @@ private module Extractors
end
end
-# Helper methods to extract out certain stuff from InnerTube
+# Helper methods to aid in the parsing of InnerTube to data structs.
+#
+# Mostly used to extract out repeated structures to deal with code
+# repetition.
private module HelperExtractors
# Retrieves the amount of videos present within the given InnerTube data.
#
@@ -364,14 +468,14 @@ private module HelperExtractors
end
# ditto
+ #
# YouTube sometimes sends the thumbnail as:
# {"thumbnails": [{"thumbnails": [{"url": "example.com"}, ...]}]}
def self.get_thumbnails_plural(container : JSON::Any) : String
return container.dig("thumbnails", 0, "thumbnails", 0, "url").as_s
end
- # Retrieves the ID required for querying the InnerTube browse endpoint
- #
+ # Retrieves the ID required for querying the InnerTube browse endpoint.
# Raises when it's unable to do so
def self.get_browse_endpoint(container)
return container.dig("navigationEndpoint", "browseEndpoint", "browseId").as_s
@@ -391,6 +495,10 @@ end
#
# Or sometimes just none at all as with the data returned from
# category continuations.
+#
+# In order to facilitate calling this function with `#[]?`:
+# A nil will be accepted. Of course, since nil cannot be parsed,
+# another nil will be returned.
def extract_text(item : JSON::Any?) : String?
if item.nil?
return nil