Pull 'extract_videos' out into seperate function

2024-08-15 00:53:18 +00:00 · 2018-08-10 09:44:19 -05:00 · 2018-08-10 09:44:19 -05:00 · 15c26d022b
commit 15c26d022b
parent 2f8716d97f
4 changed files with 157 additions and 249 deletions
--- a/src/invidious/channels.cr
+++ b/src/invidious/channels.cr
@ -130,69 +130,3 @@ def fetch_channel(ucid, client, db, pull_all_videos = true)

  return channel
 end
-
-def extract_channel_videos(document, author, ucid)
-  channel_videos = [] of Video
-  document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
-    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
-    if !anchor
-      next
-    end
-
-    if anchor["href"].starts_with? "https://www.googleadservices.com"
-      next
-    end
-
-    title = anchor.content.strip
-    id = anchor["href"].lchop("/watch?v=")
-
-    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-    if metadata.size == 0
-      next
-    elsif metadata.size == 1
-      view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
-      published = Time.now
-    else
-      published = decode_date(metadata[0].content)
-
-      view_count = metadata[1].content.split(" ")[0]
-      if view_count == "No"
-        view_count = 0_i64
-      else
-        view_count = view_count.delete(",").to_i64
-      end
-    end
-
-    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-    description, description_html = html_to_description(description_html)
-
-    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
-    if length_seconds
-      length_seconds = decode_length_seconds(length_seconds.content)
-    else
-      length_seconds = -1
-    end
-
-    info = HTTP::Params.parse("length_seconds=#{length_seconds}")
-    channel_videos << Video.new(
-      id,
-      info,
-      Time.now,
-      title,
-      view_count,
-      0,   # Like count
-      0,   # Dislike count
-      0.0, # Wilson score
-      published,
-      description,
-      "", # Language,
-      author,
-      ucid,
-      [] of String, # Allowed regions
-      true,         # Is family friendly
-      ""            # Genre
-    )
-  end
-
-  return channel_videos
-end
--- a/src/invidious/helpers/helpers.cr
+++ b/src/invidious/helpers/helpers.cr
@ -286,3 +286,91 @@ def html_to_description(description_html)

  return description, description_html
 end
+
+def extract_videos(nodeset, ucid = nil)
+  # TODO: Make this a 'common', so it makes more sense to be used here
+  videos = [] of SearchVideo
+
+  nodeset.each do |node|
+    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
+    if !anchor
+      next
+    end
+
+    if anchor["href"].starts_with? "https://www.googleadservices.com"
+      next
+    end
+
+    title = anchor.content.strip
+    id = anchor["href"].lchop("/watch?v=")
+
+    if ucid
+      author = ""
+      author_id = ""
+    else
+      anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
+      if !anchor
+        next
+      end
+
+      author = anchor.content
+      author_id = anchor["href"].split("/")[-1]
+    end
+
+    # Skip playlists
+    if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
+      next
+    end
+
+    # Skip movies
+    if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
+      next
+    end
+
+    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
+    if metadata.size == 0
+      next
+    elsif metadata.size == 1
+      if metadata[0].content.starts_with? "Starts"
+        view_count = 0_i64
+        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
+      else
+        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
+        published = Time.now
+      end
+    else
+      published = decode_date(metadata[0].content)
+
+      view_count = metadata[1].content.split(" ")[0]
+      if view_count == "No"
+        view_count = 0_i64
+      else
+        view_count = view_count.delete(",").to_i64
+      end
+    end
+
+    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
+    description, description_html = html_to_description(description_html)
+
+    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
+    if length_seconds
+      length_seconds = decode_length_seconds(length_seconds.content)
+    else
+      length_seconds = -1
+    end
+
+    videos << SearchVideo.new(
+      title,
+      id,
+      author,
+      author_id,
+      published,
+      view_count,
+      description,
+      description_html,
+      length_seconds,
+    )
+  end
+
+  return videos
+end
--- a/src/invidious/search.cr
+++ b/src/invidious/search.cr
@ -5,7 +5,7 @@ class SearchVideo
    author:           String,
    ucid:             String,
    published:        Time,
-    view_count:       Int64,
+    views:            Int64,
    description:      String,
    description_html: String,
    length_seconds:   Int32,
@ -20,90 +20,8 @@ def search(query, page = 1, search_params = build_search_params(content_type: "v
  end

  html = XML.parse_html(html)
-  videos = [] of SearchVideo
-
-  html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
-    anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
-    if !anchor
-      next
-    end
-
-    if anchor["href"].starts_with? "https://www.googleadservices.com"
-      next
-    end
-
-    title = anchor.content.strip
-    video_id = anchor["href"].lchop("/watch?v=")
-
-    anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
-    if !anchor
-      next
-    end
-    author = anchor.content
-    author_url = anchor["href"]
-    ucid = author_url.split("/")[-1]
-
-    # Skip playlists
-    if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
-      next
-    end
-
-    metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
-    if metadata.size == 0
-      next
-    elsif metadata.size == 1
-      # Skip movies
-      if metadata[0].content.includes? "·"
-        next
-      end
-
-      if metadata[0].content.starts_with? "Starts"
-        view_count = 0_i64
-        published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
-      else
-        view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
-        published = Time.now
-      end
-    else
-      # Skip movies
-      if metadata[0].content.includes? "·"
-        next
-      end
-
-      published = decode_date(metadata[0].content)
-
-      view_count = metadata[1].content.split(" ")[0]
-      if view_count == "No"
-        view_count = 0_i64
-      else
-        view_count = view_count.delete(",").to_i64
-      end
-    end
-
-    description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
-    description, description_html = html_to_description(description_html)
-
-    length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
-    if length_seconds
-      length_seconds = decode_length_seconds(length_seconds.content)
-    else
-      length_seconds = -1
-    end
-
-    video = SearchVideo.new(
-      title,
-      video_id,
-      author,
-      ucid,
-      published,
-      view_count,
-      description,
-      description_html,
-      length_seconds,
-    )
-
-    videos << video
-  end
+  nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
+  videos = extract_videos(nodeset)

  return videos
 end