diff options
| author | Samantaz Fox <coding@samantaz.fr> | 2023-05-08 00:53:08 +0200 |
|---|---|---|
| committer | Samantaz Fox <coding@samantaz.fr> | 2023-05-08 01:05:48 +0200 |
| commit | ce1fb8d08c86f747ee638289c8bcfeb208702445 (patch) | |
| tree | 40cb684011e415f0a978d405222fe7026e7bce7f /src | |
| parent | 1eb1bae3706204908fa57b6c91fe00041d4346c5 (diff) | |
| download | invidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.tar.gz invidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.tar.bz2 invidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.zip | |
Use XML.parse instead of XML.parse_html
Due to recent changes to libxml2 (between 2.9.14 and 2.10.4,
See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the
HTML parser doesn't take into account the namespaces (xmlns).
Because HTML shouldn't contain namespaces anyway, there is no
reason for use to keep using it. But switching to the XML
parser means that we have to pass the namespaces to every
single 'xpath_node(s)' method for it to be able to properly
navigate the XML structure.
Diffstat (limited to 'src')
| -rw-r--r-- | src/invidious/channels/channels.cr | 36 |
1 files changed, 25 insertions, 11 deletions
diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr index 63dd2194..b09d93b1 100644 --- a/src/invidious/channels/channels.cr +++ b/src/invidious/channels/channels.cr @@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool) LOGGER.debug("fetch_channel: #{ucid}") LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}") + namespaces = { + "yt" => "http://www.youtube.com/xml/schemas/2015", + "media" => "http://search.yahoo.com/mrss/", + "default" => "http://www.w3.org/2005/Atom", + } + LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed") rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed") - rss = XML.parse_html(rss) + rss = XML.parse(rss) - author = rss.xpath_node(%q(//feed/title)) + author = rss.xpath_node("//default:feed/default:title", namespaces) if !author raise InfoException.new("Deleted or invalid channel") end @@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool) videos, continuation = IV::Channel::Tabs.get_videos(channel) LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed") - rss.xpath_nodes("//feed/entry").each do |entry| - video_id = entry.xpath_node("videoid").not_nil!.content - title = entry.xpath_node("title").not_nil!.content - published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content) - updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content) - author = entry.xpath_node("author/name").not_nil!.content - ucid = entry.xpath_node("channelid").not_nil!.content - views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64? - views ||= 0_i64 + rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry| + video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content + title = entry.xpath_node("default:title", namespaces).not_nil!.content + + published = Time.parse_rfc3339( + entry.xpath_node("default:published", namespaces).not_nil!.content + ) + updated = Time.parse_rfc3339( + entry.xpath_node("default:updated", namespaces).not_nil!.content + ) + + author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content + ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content + + views = entry + .xpath_node("media:group/media:community/media:statistics", namespaces) + .try &.["views"]?.try &.to_i64? || 0_i64 channel_video = videos .select(SearchVideo) |
