summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSamantaz Fox <coding@samantaz.fr>2023-05-08 00:53:08 +0200
committerSamantaz Fox <coding@samantaz.fr>2023-05-08 01:05:48 +0200
commitce1fb8d08c86f747ee638289c8bcfeb208702445 (patch)
tree40cb684011e415f0a978d405222fe7026e7bce7f
parent1eb1bae3706204908fa57b6c91fe00041d4346c5 (diff)
downloadinvidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.tar.gz
invidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.tar.bz2
invidious-ce1fb8d08c86f747ee638289c8bcfeb208702445.zip
Use XML.parse instead of XML.parse_html
Due to recent changes to libxml2 (between 2.9.14 and 2.10.4, See https://gitlab.gnome.org/GNOME/libxml2/-/issues/508), the HTML parser doesn't take into account the namespaces (xmlns). Because HTML shouldn't contain namespaces anyway, there is no reason for use to keep using it. But switching to the XML parser means that we have to pass the namespaces to every single 'xpath_node(s)' method for it to be able to properly navigate the XML structure.
-rw-r--r--src/invidious/channels/channels.cr36
1 files changed, 25 insertions, 11 deletions
diff --git a/src/invidious/channels/channels.cr b/src/invidious/channels/channels.cr
index 63dd2194..b09d93b1 100644
--- a/src/invidious/channels/channels.cr
+++ b/src/invidious/channels/channels.cr
@@ -159,12 +159,18 @@ def fetch_channel(ucid, pull_all_videos : Bool)
LOGGER.debug("fetch_channel: #{ucid}")
LOGGER.trace("fetch_channel: #{ucid} : pull_all_videos = #{pull_all_videos}")
+ namespaces = {
+ "yt" => "http://www.youtube.com/xml/schemas/2015",
+ "media" => "http://search.yahoo.com/mrss/",
+ "default" => "http://www.w3.org/2005/Atom",
+ }
+
LOGGER.trace("fetch_channel: #{ucid} : Downloading RSS feed")
rss = YT_POOL.client &.get("/feeds/videos.xml?channel_id=#{ucid}").body
LOGGER.trace("fetch_channel: #{ucid} : Parsing RSS feed")
- rss = XML.parse_html(rss)
+ rss = XML.parse(rss)
- author = rss.xpath_node(%q(//feed/title))
+ author = rss.xpath_node("//default:feed/default:title", namespaces)
if !author
raise InfoException.new("Deleted or invalid channel")
end
@@ -192,15 +198,23 @@ def fetch_channel(ucid, pull_all_videos : Bool)
videos, continuation = IV::Channel::Tabs.get_videos(channel)
LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel RSS feed")
- rss.xpath_nodes("//feed/entry").each do |entry|
- video_id = entry.xpath_node("videoid").not_nil!.content
- title = entry.xpath_node("title").not_nil!.content
- published = Time.parse_rfc3339(entry.xpath_node("published").not_nil!.content)
- updated = Time.parse_rfc3339(entry.xpath_node("updated").not_nil!.content)
- author = entry.xpath_node("author/name").not_nil!.content
- ucid = entry.xpath_node("channelid").not_nil!.content
- views = entry.xpath_node("group/community/statistics").try &.["views"]?.try &.to_i64?
- views ||= 0_i64
+ rss.xpath_nodes("//default:feed/default:entry", namespaces).each do |entry|
+ video_id = entry.xpath_node("yt:videoid", namespaces).not_nil!.content
+ title = entry.xpath_node("default:title", namespaces).not_nil!.content
+
+ published = Time.parse_rfc3339(
+ entry.xpath_node("default:published", namespaces).not_nil!.content
+ )
+ updated = Time.parse_rfc3339(
+ entry.xpath_node("default:updated", namespaces).not_nil!.content
+ )
+
+ author = entry.xpath_node("default:author/default:name", namespaces).not_nil!.content
+ ucid = entry.xpath_node("yt:channelid", namespaces).not_nil!.content
+
+ views = entry
+ .xpath_node("media:group/media:community/media:statistics", namespaces)
+ .try &.["views"]?.try &.to_i64? || 0_i64
channel_video = videos
.select(SearchVideo)