Pull 'extract_videos' out into seperate function

author: Omar Roth <omarroth@hotmail.com> 2018-08-10 09:44:19 -0500
committer: Omar Roth <omarroth@hotmail.com> 2018-08-10 09:44:19 -0500
commit: 15c26d022bc03ec3ea49af0a2fe0b7edddffbc28 (patch)
tree: 5922dfbd375e202ee247b16bb32eacbf97e3bdcd /src
parent: 2f8716d97fa361e284d53b7fe725a5802ead497d (diff)
download: invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.tar.gz
invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.tar.bz2
invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.zip
4 files changed, 157 insertions, 249 deletions
diff --git a/src/invidious.cr b/src/invidious.cr
index 1851ec2b..ec6c0553 100644
--- a/src/invidious.cr
+++ b/src/invidious.cr
@@ -1283,23 +1283,31 @@ get "/feed/channel/:ucid" do |env|
   if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
     rss = client.get("/feeds/videos.xml?user=#{ucid}")
     rss = XML.parse_html(rss.body)
+
     ucid = rss.xpath_node("//feed/channelid")
     if !ucid
       error_message = "User does not exist."
       halt env, status_code: 404, response: error_message
     end
 
-    next env.redirect "/channel/#{ucid}"
+    ucid = ucid.content
+    next env.redirect "/feed/channel/#{ucid}"
   end
 
   url = produce_videos_url(ucid)
   response = client.get(url)
-  response = JSON.parse(response.body)
-  if !response["content_html"]?
-    error_message = "This channel does not exist."
-    halt env, status_code: 404, response: error_message
+  json = JSON.parse(response.body)
+
+  if json["content_html"].as_s.empty?
+    if response.status_code == 500
+      error_message = "This channel does not exist."
+      halt env, status_code: 404, response: error_message
+    else
+      next ""
+    end
   end
-  content_html = response["content_html"].as_s
+
+  content_html = json["content_html"].as_s
   document = XML.parse_html(content_html)
 
   channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
@@ -1321,7 +1329,8 @@ get "/feed/channel/:ucid" do |env|
         xml.element("uri") { xml.text "#{host_url}/channel/#{ucid}" }
       end
 
-      extract_channel_videos(document, channel.author, ucid).each do |video|
+      nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
+      extract_videos(nodeset).each do |video|
         xml.element("entry") do
           xml.element("id") { xml.text "yt:video:#{video.id}" }
           xml.element("yt:videoId") { xml.text video.id }
@@ -1480,12 +1489,14 @@ get "/channel/:ucid" do |env|
   if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
     rss = client.get("/feeds/videos.xml?user=#{ucid}")
     rss = XML.parse_html(rss.body)
+
     ucid = rss.xpath_node("//feed/channelid")
     if !ucid
       error_message = "User does not exist."
       next templated "error"
     end
 
+    ucid = ucid.content
     next env.redirect "/channel/#{ucid}"
   end
 
@@ -1520,7 +1531,7 @@ get "/channel/:ucid" do |env|
     id = HTTP::Params.parse(href.query.not_nil!)["v"]
     title = node.content
 
-    videos << ChannelVideo.new(id, title, Time.now, Time.now, ucid, author)
+    videos << ChannelVideo.new(id, title, Time.now, Time.now, "", "")
   end
 
   templated "channel"
@@ -2002,54 +2013,24 @@ get "/api/v1/trending" do |env|
   trending = XML.parse_html(trending)
   videos = JSON.build do |json|
     json.array do
-      trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"])).each do |node|
-        anchor = node.xpath_node(%q(.//h3/a)).not_nil!
-
-        title = anchor.content
-        id = anchor["href"].lchop("/watch?v=")
-
-        anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).not_nil!
-        author = anchor.content
-        author_url = anchor["href"]
-
-        metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-        if metadata.size == 0
-          next
-        elsif metadata.size == 1
-          view_count = metadata[0].content.rchop(" watching").delete(",").to_i64
-          published = Time.now
-        else
-          published = decode_date(metadata[0].content)
-
-          view_count = metadata[1].content.rchop(" views")
-          if view_count == "No"
-            view_count = 0_i64
-          else
-            view_count = view_count.delete(",").to_i64
-          end
-        end
-
-        description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-        description, description_html = html_to_description(description_html)
-
-        length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
-
+      nodeset = trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"]))
+      extract_videos(nodeset).each do |video|
         json.object do
-          json.field "title", title
-          json.field "videoId", id
+          json.field "title", video.title
+          json.field "videoId", video.id
           json.field "videoThumbnails" do
-            generate_thumbnails(json, id)
+            generate_thumbnails(json, video.id)
           end
 
-          json.field "lengthSeconds", length_seconds
-          json.field "viewCount", view_count
+          json.field "lengthSeconds", video.length_seconds
+          json.field "viewCount", video.views
 
-          json.field "author", author
-          json.field "authorUrl", author_url
+          json.field "author", video.author
+          json.field "authorUrl", "/channel/#{video.ucid}"
 
-          json.field "published", published.epoch
-          json.field "description", description
-          json.field "descriptionHtml", description_html
+          json.field "published", video.published.epoch
+          json.field "description", video.description
+          json.field "descriptionHtml", video.description_html
         end
       end
     end
@@ -2096,16 +2077,17 @@ get "/api/v1/channels/:ucid" do |env|
 
   client = make_client(YT_URL)
   if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
-    rss = client.get("/feeds/videos.xml?user=#{ucid}").body
-    rss = XML.parse_html(rss)
+    rss = client.get("/feeds/videos.xml?user=#{ucid}")
+    rss = XML.parse_html(rss.body)
 
     ucid = rss.xpath_node("//feed/channelid")
-    if ucid
-      ucid = ucid.content
-    else
+    if !ucid
       env.response.content_type = "application/json"
       next {"error" => "User does not exist"}.to_json
     end
+
+    ucid = ucid.content
+    next env.redirect "/api/v1/channels/#{ucid}"
   end
 
   channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
@@ -2212,25 +2194,36 @@ get "/api/v1/channels/:ucid/videos" do |env|
 
   client = make_client(YT_URL)
   if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
-    rss = client.get("/feeds/videos.xml?user=#{ucid}").body
-    rss = XML.parse_html(rss)
+    rss = client.get("/feeds/videos.xml?user=#{ucid}")
+    rss = XML.parse_html(rss.body)
 
     ucid = rss.xpath_node("//feed/channelid")
-    if ucid
-      ucid = ucid.content
-    else
+    if !ucid
       env.response.content_type = "application/json"
       next {"error" => "User does not exist"}.to_json
     end
+
+    ucid = ucid.content
+    url = "/api/v1/channels/#{ucid}/videos"
+    if env.params.query
+      url += "?#{env.params.query}"
+    end
+    next env.redirect url
   end
 
   url = produce_videos_url(ucid, page)
   response = client.get(url)
 
   json = JSON.parse(response.body)
-  if !json["content_html"]? || json["content_html"].as_s.empty?
+  if !json["content_html"]?
     env.response.content_type = "application/json"
-    next {"error" => "No videos or nonexistent channel"}.to_json
+
+    if response.status_code == 500
+      response = {"Error" => "Channel does not exist"}.to_json
+      halt env, status_code: 404, response: response
+    else
+      next Array(String).new.to_json
+    end
   end
 
   content_html = json["content_html"].as_s
@@ -2242,47 +2235,22 @@ get "/api/v1/channels/:ucid/videos" do |env|
 
   videos = JSON.build do |json|
     json.array do
-      document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
-        anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)).not_nil!
-        title = anchor.content.strip
-        video_id = anchor["href"].lchop("/watch?v=")
-
-        metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-        if metadata.size == 0
-          next
-        elsif metadata.size == 1
-          view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
-          published = Time.now
-        else
-          published = decode_date(metadata[0].content)
-
-          view_count = metadata[1].content.split(" ")[0]
-          if view_count == "No"
-            view_count = 0_i64
-          else
-            view_count = view_count.delete(",").to_i64
-          end
-        end
-
-        description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-        description, description_html = html_to_description(description_html)
-
-        length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
-
+      nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
+      extract_videos(nodeset, ucid).each do |video|
         json.object do
-          json.field "title", title
-          json.field "videoId", video_id
+          json.field "title", video.title
+          json.field "videoId", video.id
 
           json.field "videoThumbnails" do
-            generate_thumbnails(json, video_id)
+            generate_thumbnails(json, video.id)
           end
 
-          json.field "description", description
-          json.field "descriptionHtml", description_html
+          json.field "description", video.description
+          json.field "descriptionHtml", video.description_html
 
-          json.field "viewCount", view_count
-          json.field "published", published.epoch
-          json.field "lengthSeconds", length_seconds
+          json.field "viewCount", video.views
+          json.field "published", video.published.epoch
+          json.field "lengthSeconds", video.length_seconds
         end
       end
     end
@@ -2344,7 +2312,7 @@ get "/api/v1/search" do |env|
           json.field "description", video.description
           json.field "descriptionHtml", video.description_html
 
-          json.field "viewCount", video.view_count
+          json.field "viewCount", video.views
           json.field "published", video.published.epoch
           json.field "lengthSeconds", video.length_seconds
         end
diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr
index fc5f90f8..f6cdad76 100644
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@@ -130,69 +130,3 @@ def fetch_channel(ucid, client, db, pull_all_videos = true)
 
   return channel
 end
-
-def extract_channel_videos(document, author, ucid)
-  channel_videos = [] of Video
-  document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
-    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
-    if !anchor
-      next
-    end
-
-    if anchor["href"].starts_with? "https://www.googleadservices.com"
-      next
-    end
-
-    title = anchor.content.strip
-    id = anchor["href"].lchop("/watch?v=")
-
-    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-    if metadata.size == 0
-      next
-    elsif metadata.size == 1
-      view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
-      published = Time.now
-    else
-      published = decode_date(metadata[0].content)
-
-      view_count = metadata[1].content.split(" ")[0]
-      if view_count == "No"
-        view_count = 0_i64
-      else
-        view_count = view_count.delete(",").to_i64
-      end
-    end
-
-    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-    description, description_html = html_to_description(description_html)
-
-    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
-    if length_seconds
-      length_seconds = decode_length_seconds(length_seconds.content)
-    else
-      length_seconds = -1
-    end
-
-    info = HTTP::Params.parse("length_seconds=#{length_seconds}")
-    channel_videos << Video.new(
-      id,
-      info,
-      Time.now,
-      title,
-      view_count,
-      0,   # Like count
-      0,   # Dislike count
-      0.0, # Wilson score
-      published,
-      description,
-      "", # Language,
-      author,
-      ucid,
-      [] of String, # Allowed regions
-      true,         # Is family friendly
-      ""            # Genre
-    )
-  end
-
-  return channel_videos
-end
diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr
index f3c2c295..68b7faf2 100644
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@@ -286,3 +286,91 @@ def html_to_description(description_html)
 
   return description, description_html
 end
+
+def extract_videos(nodeset, ucid = nil)
+  # TODO: Make this a 'common', so it makes more sense to be used here
+  videos = [] of SearchVideo
+
+  nodeset.each do |node|
+    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
+    if !anchor
+      next
+    end
+
+    if anchor["href"].starts_with? "https://www.googleadservices.com"
+      next
+    end
+
+    title = anchor.content.strip
+    id = anchor["href"].lchop("/watch?v=")
+
+    if ucid
+      author = ""
+      author_id = ""
+    else
+      anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
+      if !anchor
+        next
+      end
+
+      author = anchor.content
+      author_id = anchor["href"].split("/")[-1]
+    end
+
+    # Skip playlists
+    if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
+      next
+    end
+
+    # Skip movies
+    if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
+      next
+    end
+
+    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
+    if metadata.size == 0
+      next
+    elsif metadata.size == 1
+      if metadata[0].content.starts_with? "Starts"
+        view_count = 0_i64
+        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
+      else
+        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
+        published = Time.now
+      end
+    else
+      published = decode_date(metadata[0].content)
+
+      view_count = metadata[1].content.split(" ")[0]
+      if view_count == "No"
+        view_count = 0_i64
+      else
+        view_count = view_count.delete(",").to_i64
+      end
+    end
+
+    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
+    description, description_html = html_to_description(description_html)
+
+    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
+    if length_seconds
+      length_seconds = decode_length_seconds(length_seconds.content)
+    else
+      length_seconds = -1
+    end
+
+    videos << SearchVideo.new(
+      title,
+      id,
+      author,
+      author_id,
+      published,
+      view_count,
+      description,
+      description_html,
+      length_seconds,
+    )
+  end
+
+  return videos
+end
diff --git a/src/invidious/search.cr b/src/invidious/search.cr
index 82b14dcc..8ed18437 100644
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@@ -5,7 +5,7 @@ class SearchVideo
     author:           String,
     ucid:             String,
     published:        Time,
-    view_count:       Int64,
+    views:            Int64,
     description:      String,
     description_html: String,
     length_seconds:   Int32,
@@ -20,90 +20,8 @@ def search(query, page = 1, search_params = build_search_params(content_type: "v
   end
 
   html = XML.parse_html(html)
-  videos = [] of SearchVideo
-
-  html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
-    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
-    if !anchor
-      next
-    end
-
-    if anchor["href"].starts_with? "https://www.googleadservices.com"
-      next
-    end
-
-    title = anchor.content.strip
-    video_id = anchor["href"].lchop("/watch?v=")
-
-    anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
-    if !anchor
-      next
-    end
-    author = anchor.content
-    author_url = anchor["href"]
-    ucid = author_url.split("/")[-1]
-
-    # Skip playlists
-    if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
-      next
-    end
-
-    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-    if metadata.size == 0
-      next
-    elsif metadata.size == 1
-      # Skip movies
-      if metadata[0].content.includes? "·"
-        next
-      end
-
-      if metadata[0].content.starts_with? "Starts"
-        view_count = 0_i64
-        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
-      else
-        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
-        published = Time.now
-      end
-    else
-      # Skip movies
-      if metadata[0].content.includes? "·"
-        next
-      end
-
-      published = decode_date(metadata[0].content)
-
-      view_count = metadata[1].content.split(" ")[0]
-      if view_count == "No"
-        view_count = 0_i64
-      else
-        view_count = view_count.delete(",").to_i64
-      end
-    end
-
-    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-    description, description_html = html_to_description(description_html)
-
-    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
-    if length_seconds
-      length_seconds = decode_length_seconds(length_seconds.content)
-    else
-      length_seconds = -1
-    end
-
-    video = SearchVideo.new(
-      title,
-      video_id,
-      author,
-      ucid,
-      published,
-      view_count,
-      description,
-      description_html,
-      length_seconds,
-    )
-
-    videos << video
-  end
+  nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
+  videos = extract_videos(nodeset)
 
   return videos
 end
author	Omar Roth <omarroth@hotmail.com>	2018-08-10 09:44:19 -0500
committer	Omar Roth <omarroth@hotmail.com>	2018-08-10 09:44:19 -0500
commit	15c26d022bc03ec3ea49af0a2fe0b7edddffbc28 (patch)
tree	5922dfbd375e202ee247b16bb32eacbf97e3bdcd /src
parent	2f8716d97fa361e284d53b7fe725a5802ead497d (diff)
download	invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.tar.gz invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.tar.bz2 invidious-15c26d022bc03ec3ea49af0a2fe0b7edddffbc28.zip