summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSamantaz Fox <coding@samantaz.fr>2023-04-10 17:54:22 +0200
committerSamantaz Fox <coding@samantaz.fr>2023-04-10 17:54:22 +0200
commitadc605024f0417d0e8b48ad151927bf012b16e71 (patch)
tree23ead9efcf2a06cf7bf90b862c1c8cee371ed6a4 /src
parent525e4bd67a4cabee646ad01bf69352023e12bdbc (diff)
parent9a765418d1410ceda3a27ebcd2febd9fe4319edc (diff)
downloadinvidious-adc605024f0417d0e8b48ad151927bf012b16e71.tar.gz
invidious-adc605024f0417d0e8b48ad151927bf012b16e71.tar.bz2
invidious-adc605024f0417d0e8b48ad151927bf012b16e71.zip
Videos: Add support for attributed descriptions (#3701)
Diffstat (limited to 'src')
-rw-r--r--src/invidious/videos/description.cr105
-rw-r--r--src/invidious/videos/parser.cr6
2 files changed, 109 insertions, 2 deletions
diff --git a/src/invidious/videos/description.cr b/src/invidious/videos/description.cr
new file mode 100644
index 00000000..2017955d
--- /dev/null
+++ b/src/invidious/videos/description.cr
@@ -0,0 +1,105 @@
+require "json"
+require "uri"
+
+def parse_command(command : JSON::Any?, string : String) : String?
+ on_tap = command.dig?("onTap", "innertubeCommand")
+
+ # 3rd party URL, extract original URL from YouTube tracking URL
+ if url_endpoint = on_tap.try &.["urlEndpoint"]?
+ youtube_url = URI.parse url_endpoint["url"].as_s
+
+ original_url = youtube_url.query_params["q"]?
+ if original_url.nil?
+ return ""
+ else
+ return "<a href=\"#{original_url}\">#{original_url}</a>"
+ end
+ # 1st party watch URL
+ elsif watch_endpoint = on_tap.try &.["watchEndpoint"]?
+ video_id = watch_endpoint["videoId"].as_s
+ time = watch_endpoint["startTimeSeconds"].as_i
+
+ url = "/watch?v=#{video_id}&t=#{time}s"
+
+ # if string is a timestamp, use the string instead
+ # this is a lazy regex for validating timestamps
+ if /(?:\d{1,2}:){1,2}\d{2}/ =~ string
+ return "<a href=\"#{url}\">#{string}</a>"
+ else
+ return "<a href=\"#{url}\">#{url}</a>"
+ end
+ # hashtag/other browse URLs
+ elsif browse_endpoint = on_tap.try &.dig?("commandMetadata", "webCommandMetadata")
+ url = browse_endpoint["url"].try &.as_s
+
+ # remove unnecessary character in a channel name
+ if browse_endpoint["webPageType"]?.try &.as_s == "WEB_PAGE_TYPE_CHANNEL"
+ name = string.match(/@[\w\d.-]+/)
+ if name.try &.[0]?
+ return "<a href=\"#{url}\">#{name.try &.[0]}</a>"
+ end
+ end
+
+ return "<a href=\"#{url}\">#{string}</a>"
+ end
+
+ return "(unknown YouTube desc command)"
+end
+
+private def copy_string(str : String::Builder, iter : Iterator, count : Int) : Int
+ copied = 0
+ while copied < count
+ cp = iter.next
+ break if cp.is_a?(Iterator::Stop)
+
+ str << cp.chr
+
+ # A codepoint from the SMP counts twice
+ copied += 1 if cp > 0xFFFF
+ copied += 1
+ end
+
+ return copied
+end
+
+def parse_description(desc : JSON::Any?) : String?
+ return "" if desc.nil?
+
+ content = desc["content"].as_s
+ return "" if content.empty?
+
+ commands = desc["commandRuns"]?.try &.as_a
+ return content if commands.nil?
+
+ # Not everything is stored in UTF-8 on youtube's side. The SMP codepoints
+ # (0x10000 and above) are encoded as UTF-16 surrogate pairs, which are
+ # automatically decoded by the JSON parser. It means that we need to count
+ # copied byte in a special manner, preventing the use of regular string copy.
+ iter = content.each_codepoint
+
+ index = 0
+
+ return String.build do |str|
+ commands.each do |command|
+ cmd_start = command["startIndex"].as_i
+ cmd_length = command["length"].as_i
+
+ # Copy the text chunk between this command and the previous if needed.
+ length = cmd_start - index
+ index += copy_string(str, iter, length)
+
+ # We need to copy the command's text using the iterator
+ # and the special function defined above.
+ cmd_content = String.build(cmd_length) do |str2|
+ copy_string(str2, iter, cmd_length)
+ end
+
+ str << parse_command(command, cmd_content)
+ index += cmd_length
+ end
+
+ # Copy the end of the string (past the last command).
+ remaining_length = content.size - index
+ copy_string(str, iter, remaining_length) if remaining_length > 0
+ end
+end
diff --git a/src/invidious/videos/parser.cr b/src/invidious/videos/parser.cr
index 13ee5f65..1c6d118d 100644
--- a/src/invidious/videos/parser.cr
+++ b/src/invidious/videos/parser.cr
@@ -284,8 +284,10 @@ def parse_video_info(video_id : String, player_response : Hash(String, JSON::Any
description = microformat.dig?("description", "simpleText").try &.as_s || ""
short_description = player_response.dig?("videoDetails", "shortDescription")
- description_html = video_secondary_renderer.try &.dig?("description", "runs")
- .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
+ # description_html = video_secondary_renderer.try &.dig?("description", "runs")
+ # .try &.as_a.try { |t| content_to_comment_html(t, video_id) }
+
+ description_html = parse_description(video_secondary_renderer.try &.dig?("attributedDescription"))
# Video metadata