summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorOmar Roth <omarroth@hotmail.com>2018-03-03 15:59:21 -0600
committerOmar Roth <omarroth@hotmail.com>2018-03-03 15:59:21 -0600
commitd573461a671a0ccbf1b06c5e16cbc7c5cff52139 (patch)
tree6a03672df7e2d922ec37b40ae7e1be9b9b57ebc7 /src
parentb8fe82a7f7d05f4a2af0d7445e2633dea5169395 (diff)
downloadinvidious-d573461a671a0ccbf1b06c5e16cbc7c5cff52139.tar.gz
invidious-d573461a671a0ccbf1b06c5e16cbc7c5cff52139.tar.bz2
invidious-d573461a671a0ccbf1b06c5e16cbc7c5cff52139.zip
Parse HTML properly instead of relying on regexes
Diffstat (limited to 'src')
-rw-r--r--src/helpers.cr29
1 files changed, 26 insertions, 3 deletions
diff --git a/src/helpers.cr b/src/helpers.cr
index 45204132..094d7dc1 100644
--- a/src/helpers.cr
+++ b/src/helpers.cr
@@ -132,8 +132,19 @@ def fetch_video(id, client)
dislikes = dislikes ? dislikes.content.delete(",").to_i : 0
description = html.xpath_node(%q(//p[@id="eow-description"]))
+ if description
+ description.xpath_nodes(%q(//a/@href)).each do |match|
+ uri = URI.parse(match.content)
+
+ if uri.host =~ /(www\.)?youtube.com/
+ uri = uri.full_path
+ puts uri
+ end
+
+ match.content = uri.to_s
+ end
+ end
description = description ? description.to_xml : ""
- description = description.gsub(/(https:\/\/)|(http:\/\/)?(www\.)?(youtube\.com)/, "")
wilson_score = ci_lower_bound(likes, likes + dislikes)
@@ -278,6 +289,20 @@ def template_comments(root)
author = child["data"]["author"]
score = child["data"]["score"]
body_html = HTML.unescape(child["data"]["body_html"].as_s)
+ body_html = XML.parse_html(body_html)
+
+ body_html.xpath_nodes(%q(//a/@href)).each do |match|
+ uri = URI.parse(match.content)
+
+ if uri.host =~ /(www\.)?youtube.com/
+ uri = uri.full_path
+ puts uri
+ end
+
+ match.content = uri.to_s
+ end
+
+ body_html = body_html.to_s
replies_html = ""
if child["data"]["replies"] != ""
@@ -317,8 +342,6 @@ def template_comments(root)
end
end
- html = html.gsub(/(https:\/\/)|(http:\/\/)?(www\.)?(youtube\.com)/, "")
-
return html
end