mirror of
https://gitea.invidious.io/iv-org/invidious-copy-2022-03-16.git
synced 2024-08-15 00:53:18 +00:00
Pull 'extract_videos' out into seperate function
This commit is contained in:
parent
2f8716d97f
commit
15c26d022b
4 changed files with 157 additions and 249 deletions
|
@ -130,69 +130,3 @@ def fetch_channel(ucid, client, db, pull_all_videos = true)
|
|||
|
||||
return channel
|
||||
end
|
||||
|
||||
def extract_channel_videos(document, author, ucid)
|
||||
channel_videos = [] of Video
|
||||
document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
|
||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
||||
if !anchor
|
||||
next
|
||||
end
|
||||
|
||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
||||
next
|
||||
end
|
||||
|
||||
title = anchor.content.strip
|
||||
id = anchor["href"].lchop("/watch?v=")
|
||||
|
||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
||||
if metadata.size == 0
|
||||
next
|
||||
elsif metadata.size == 1
|
||||
view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
|
||||
published = Time.now
|
||||
else
|
||||
published = decode_date(metadata[0].content)
|
||||
|
||||
view_count = metadata[1].content.split(" ")[0]
|
||||
if view_count == "No"
|
||||
view_count = 0_i64
|
||||
else
|
||||
view_count = view_count.delete(",").to_i64
|
||||
end
|
||||
end
|
||||
|
||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
||||
description, description_html = html_to_description(description_html)
|
||||
|
||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
||||
if length_seconds
|
||||
length_seconds = decode_length_seconds(length_seconds.content)
|
||||
else
|
||||
length_seconds = -1
|
||||
end
|
||||
|
||||
info = HTTP::Params.parse("length_seconds=#{length_seconds}")
|
||||
channel_videos << Video.new(
|
||||
id,
|
||||
info,
|
||||
Time.now,
|
||||
title,
|
||||
view_count,
|
||||
0, # Like count
|
||||
0, # Dislike count
|
||||
0.0, # Wilson score
|
||||
published,
|
||||
description,
|
||||
"", # Language,
|
||||
author,
|
||||
ucid,
|
||||
[] of String, # Allowed regions
|
||||
true, # Is family friendly
|
||||
"" # Genre
|
||||
)
|
||||
end
|
||||
|
||||
return channel_videos
|
||||
end
|
||||
|
|
|
@ -286,3 +286,91 @@ def html_to_description(description_html)
|
|||
|
||||
return description, description_html
|
||||
end
|
||||
|
||||
def extract_videos(nodeset, ucid = nil)
|
||||
# TODO: Make this a 'common', so it makes more sense to be used here
|
||||
videos = [] of SearchVideo
|
||||
|
||||
nodeset.each do |node|
|
||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
||||
if !anchor
|
||||
next
|
||||
end
|
||||
|
||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
||||
next
|
||||
end
|
||||
|
||||
title = anchor.content.strip
|
||||
id = anchor["href"].lchop("/watch?v=")
|
||||
|
||||
if ucid
|
||||
author = ""
|
||||
author_id = ""
|
||||
else
|
||||
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
|
||||
if !anchor
|
||||
next
|
||||
end
|
||||
|
||||
author = anchor.content
|
||||
author_id = anchor["href"].split("/")[-1]
|
||||
end
|
||||
|
||||
# Skip playlists
|
||||
if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
|
||||
next
|
||||
end
|
||||
|
||||
# Skip movies
|
||||
if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
|
||||
next
|
||||
end
|
||||
|
||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
||||
if metadata.size == 0
|
||||
next
|
||||
elsif metadata.size == 1
|
||||
if metadata[0].content.starts_with? "Starts"
|
||||
view_count = 0_i64
|
||||
published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
|
||||
else
|
||||
view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
|
||||
published = Time.now
|
||||
end
|
||||
else
|
||||
published = decode_date(metadata[0].content)
|
||||
|
||||
view_count = metadata[1].content.split(" ")[0]
|
||||
if view_count == "No"
|
||||
view_count = 0_i64
|
||||
else
|
||||
view_count = view_count.delete(",").to_i64
|
||||
end
|
||||
end
|
||||
|
||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
||||
description, description_html = html_to_description(description_html)
|
||||
|
||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
||||
if length_seconds
|
||||
length_seconds = decode_length_seconds(length_seconds.content)
|
||||
else
|
||||
length_seconds = -1
|
||||
end
|
||||
|
||||
videos << SearchVideo.new(
|
||||
title,
|
||||
id,
|
||||
author,
|
||||
author_id,
|
||||
published,
|
||||
view_count,
|
||||
description,
|
||||
description_html,
|
||||
length_seconds,
|
||||
)
|
||||
end
|
||||
|
||||
return videos
|
||||
end
|
||||
|
|
|
@ -5,7 +5,7 @@ class SearchVideo
|
|||
author: String,
|
||||
ucid: String,
|
||||
published: Time,
|
||||
view_count: Int64,
|
||||
views: Int64,
|
||||
description: String,
|
||||
description_html: String,
|
||||
length_seconds: Int32,
|
||||
|
@ -20,90 +20,8 @@ def search(query, page = 1, search_params = build_search_params(content_type: "v
|
|||
end
|
||||
|
||||
html = XML.parse_html(html)
|
||||
videos = [] of SearchVideo
|
||||
|
||||
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
|
||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
||||
if !anchor
|
||||
next
|
||||
end
|
||||
|
||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
||||
next
|
||||
end
|
||||
|
||||
title = anchor.content.strip
|
||||
video_id = anchor["href"].lchop("/watch?v=")
|
||||
|
||||
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
|
||||
if !anchor
|
||||
next
|
||||
end
|
||||
author = anchor.content
|
||||
author_url = anchor["href"]
|
||||
ucid = author_url.split("/")[-1]
|
||||
|
||||
# Skip playlists
|
||||
if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
|
||||
next
|
||||
end
|
||||
|
||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
||||
if metadata.size == 0
|
||||
next
|
||||
elsif metadata.size == 1
|
||||
# Skip movies
|
||||
if metadata[0].content.includes? "·"
|
||||
next
|
||||
end
|
||||
|
||||
if metadata[0].content.starts_with? "Starts"
|
||||
view_count = 0_i64
|
||||
published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
|
||||
else
|
||||
view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
|
||||
published = Time.now
|
||||
end
|
||||
else
|
||||
# Skip movies
|
||||
if metadata[0].content.includes? "·"
|
||||
next
|
||||
end
|
||||
|
||||
published = decode_date(metadata[0].content)
|
||||
|
||||
view_count = metadata[1].content.split(" ")[0]
|
||||
if view_count == "No"
|
||||
view_count = 0_i64
|
||||
else
|
||||
view_count = view_count.delete(",").to_i64
|
||||
end
|
||||
end
|
||||
|
||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
||||
description, description_html = html_to_description(description_html)
|
||||
|
||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
||||
if length_seconds
|
||||
length_seconds = decode_length_seconds(length_seconds.content)
|
||||
else
|
||||
length_seconds = -1
|
||||
end
|
||||
|
||||
video = SearchVideo.new(
|
||||
title,
|
||||
video_id,
|
||||
author,
|
||||
ucid,
|
||||
published,
|
||||
view_count,
|
||||
description,
|
||||
description_html,
|
||||
length_seconds,
|
||||
)
|
||||
|
||||
videos << video
|
||||
end
|
||||
nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
|
||||
videos = extract_videos(nodeset)
|
||||
|
||||
return videos
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue