mirror of
https://gitea.invidious.io/iv-org/invidious-copy-2022-08-14.git
synced 2024-08-15 00:53:20 +00:00
Pull 'extract_videos' out into seperate function
This commit is contained in:
parent
2f8716d97f
commit
15c26d022b
4 changed files with 157 additions and 249 deletions
164
src/invidious.cr
164
src/invidious.cr
|
@ -1283,23 +1283,31 @@ get "/feed/channel/:ucid" do |env|
|
||||||
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
||||||
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
||||||
rss = XML.parse_html(rss.body)
|
rss = XML.parse_html(rss.body)
|
||||||
|
|
||||||
ucid = rss.xpath_node("//feed/channelid")
|
ucid = rss.xpath_node("//feed/channelid")
|
||||||
if !ucid
|
if !ucid
|
||||||
error_message = "User does not exist."
|
error_message = "User does not exist."
|
||||||
halt env, status_code: 404, response: error_message
|
halt env, status_code: 404, response: error_message
|
||||||
end
|
end
|
||||||
|
|
||||||
next env.redirect "/channel/#{ucid}"
|
ucid = ucid.content
|
||||||
|
next env.redirect "/feed/channel/#{ucid}"
|
||||||
end
|
end
|
||||||
|
|
||||||
url = produce_videos_url(ucid)
|
url = produce_videos_url(ucid)
|
||||||
response = client.get(url)
|
response = client.get(url)
|
||||||
response = JSON.parse(response.body)
|
json = JSON.parse(response.body)
|
||||||
if !response["content_html"]?
|
|
||||||
error_message = "This channel does not exist."
|
if json["content_html"].as_s.empty?
|
||||||
halt env, status_code: 404, response: error_message
|
if response.status_code == 500
|
||||||
|
error_message = "This channel does not exist."
|
||||||
|
halt env, status_code: 404, response: error_message
|
||||||
|
else
|
||||||
|
next ""
|
||||||
|
end
|
||||||
end
|
end
|
||||||
content_html = response["content_html"].as_s
|
|
||||||
|
content_html = json["content_html"].as_s
|
||||||
document = XML.parse_html(content_html)
|
document = XML.parse_html(content_html)
|
||||||
|
|
||||||
channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
|
channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
|
||||||
|
@ -1321,7 +1329,8 @@ get "/feed/channel/:ucid" do |env|
|
||||||
xml.element("uri") { xml.text "#{host_url}/channel/#{ucid}" }
|
xml.element("uri") { xml.text "#{host_url}/channel/#{ucid}" }
|
||||||
end
|
end
|
||||||
|
|
||||||
extract_channel_videos(document, channel.author, ucid).each do |video|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
||||||
|
extract_videos(nodeset).each do |video|
|
||||||
xml.element("entry") do
|
xml.element("entry") do
|
||||||
xml.element("id") { xml.text "yt:video:#{video.id}" }
|
xml.element("id") { xml.text "yt:video:#{video.id}" }
|
||||||
xml.element("yt:videoId") { xml.text video.id }
|
xml.element("yt:videoId") { xml.text video.id }
|
||||||
|
@ -1480,12 +1489,14 @@ get "/channel/:ucid" do |env|
|
||||||
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
||||||
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
||||||
rss = XML.parse_html(rss.body)
|
rss = XML.parse_html(rss.body)
|
||||||
|
|
||||||
ucid = rss.xpath_node("//feed/channelid")
|
ucid = rss.xpath_node("//feed/channelid")
|
||||||
if !ucid
|
if !ucid
|
||||||
error_message = "User does not exist."
|
error_message = "User does not exist."
|
||||||
next templated "error"
|
next templated "error"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ucid = ucid.content
|
||||||
next env.redirect "/channel/#{ucid}"
|
next env.redirect "/channel/#{ucid}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -1520,7 +1531,7 @@ get "/channel/:ucid" do |env|
|
||||||
id = HTTP::Params.parse(href.query.not_nil!)["v"]
|
id = HTTP::Params.parse(href.query.not_nil!)["v"]
|
||||||
title = node.content
|
title = node.content
|
||||||
|
|
||||||
videos << ChannelVideo.new(id, title, Time.now, Time.now, ucid, author)
|
videos << ChannelVideo.new(id, title, Time.now, Time.now, "", "")
|
||||||
end
|
end
|
||||||
|
|
||||||
templated "channel"
|
templated "channel"
|
||||||
|
@ -2002,54 +2013,24 @@ get "/api/v1/trending" do |env|
|
||||||
trending = XML.parse_html(trending)
|
trending = XML.parse_html(trending)
|
||||||
videos = JSON.build do |json|
|
videos = JSON.build do |json|
|
||||||
json.array do
|
json.array do
|
||||||
trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"])).each do |node|
|
nodeset = trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"]))
|
||||||
anchor = node.xpath_node(%q(.//h3/a)).not_nil!
|
extract_videos(nodeset).each do |video|
|
||||||
|
|
||||||
title = anchor.content
|
|
||||||
id = anchor["href"].lchop("/watch?v=")
|
|
||||||
|
|
||||||
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a)).not_nil!
|
|
||||||
author = anchor.content
|
|
||||||
author_url = anchor["href"]
|
|
||||||
|
|
||||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
|
||||||
if metadata.size == 0
|
|
||||||
next
|
|
||||||
elsif metadata.size == 1
|
|
||||||
view_count = metadata[0].content.rchop(" watching").delete(",").to_i64
|
|
||||||
published = Time.now
|
|
||||||
else
|
|
||||||
published = decode_date(metadata[0].content)
|
|
||||||
|
|
||||||
view_count = metadata[1].content.rchop(" views")
|
|
||||||
if view_count == "No"
|
|
||||||
view_count = 0_i64
|
|
||||||
else
|
|
||||||
view_count = view_count.delete(",").to_i64
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
|
||||||
description, description_html = html_to_description(description_html)
|
|
||||||
|
|
||||||
length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
|
|
||||||
|
|
||||||
json.object do
|
json.object do
|
||||||
json.field "title", title
|
json.field "title", video.title
|
||||||
json.field "videoId", id
|
json.field "videoId", video.id
|
||||||
json.field "videoThumbnails" do
|
json.field "videoThumbnails" do
|
||||||
generate_thumbnails(json, id)
|
generate_thumbnails(json, video.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
json.field "lengthSeconds", length_seconds
|
json.field "lengthSeconds", video.length_seconds
|
||||||
json.field "viewCount", view_count
|
json.field "viewCount", video.views
|
||||||
|
|
||||||
json.field "author", author
|
json.field "author", video.author
|
||||||
json.field "authorUrl", author_url
|
json.field "authorUrl", "/channel/#{video.ucid}"
|
||||||
|
|
||||||
json.field "published", published.epoch
|
json.field "published", video.published.epoch
|
||||||
json.field "description", description
|
json.field "description", video.description
|
||||||
json.field "descriptionHtml", description_html
|
json.field "descriptionHtml", video.description_html
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -2096,16 +2077,17 @@ get "/api/v1/channels/:ucid" do |env|
|
||||||
|
|
||||||
client = make_client(YT_URL)
|
client = make_client(YT_URL)
|
||||||
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
||||||
rss = client.get("/feeds/videos.xml?user=#{ucid}").body
|
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
||||||
rss = XML.parse_html(rss)
|
rss = XML.parse_html(rss.body)
|
||||||
|
|
||||||
ucid = rss.xpath_node("//feed/channelid")
|
ucid = rss.xpath_node("//feed/channelid")
|
||||||
if ucid
|
if !ucid
|
||||||
ucid = ucid.content
|
|
||||||
else
|
|
||||||
env.response.content_type = "application/json"
|
env.response.content_type = "application/json"
|
||||||
next {"error" => "User does not exist"}.to_json
|
next {"error" => "User does not exist"}.to_json
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ucid = ucid.content
|
||||||
|
next env.redirect "/api/v1/channels/#{ucid}"
|
||||||
end
|
end
|
||||||
|
|
||||||
channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
|
channel = get_channel(ucid, client, PG_DB, pull_all_videos: false)
|
||||||
|
@ -2212,25 +2194,36 @@ get "/api/v1/channels/:ucid/videos" do |env|
|
||||||
|
|
||||||
client = make_client(YT_URL)
|
client = make_client(YT_URL)
|
||||||
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
if !ucid.match(/UC[a-zA-Z0-9_-]{22}/)
|
||||||
rss = client.get("/feeds/videos.xml?user=#{ucid}").body
|
rss = client.get("/feeds/videos.xml?user=#{ucid}")
|
||||||
rss = XML.parse_html(rss)
|
rss = XML.parse_html(rss.body)
|
||||||
|
|
||||||
ucid = rss.xpath_node("//feed/channelid")
|
ucid = rss.xpath_node("//feed/channelid")
|
||||||
if ucid
|
if !ucid
|
||||||
ucid = ucid.content
|
|
||||||
else
|
|
||||||
env.response.content_type = "application/json"
|
env.response.content_type = "application/json"
|
||||||
next {"error" => "User does not exist"}.to_json
|
next {"error" => "User does not exist"}.to_json
|
||||||
end
|
end
|
||||||
|
|
||||||
|
ucid = ucid.content
|
||||||
|
url = "/api/v1/channels/#{ucid}/videos"
|
||||||
|
if env.params.query
|
||||||
|
url += "?#{env.params.query}"
|
||||||
|
end
|
||||||
|
next env.redirect url
|
||||||
end
|
end
|
||||||
|
|
||||||
url = produce_videos_url(ucid, page)
|
url = produce_videos_url(ucid, page)
|
||||||
response = client.get(url)
|
response = client.get(url)
|
||||||
|
|
||||||
json = JSON.parse(response.body)
|
json = JSON.parse(response.body)
|
||||||
if !json["content_html"]? || json["content_html"].as_s.empty?
|
if !json["content_html"]?
|
||||||
env.response.content_type = "application/json"
|
env.response.content_type = "application/json"
|
||||||
next {"error" => "No videos or nonexistent channel"}.to_json
|
|
||||||
|
if response.status_code == 500
|
||||||
|
response = {"Error" => "Channel does not exist"}.to_json
|
||||||
|
halt env, status_code: 404, response: response
|
||||||
|
else
|
||||||
|
next Array(String).new.to_json
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
content_html = json["content_html"].as_s
|
content_html = json["content_html"].as_s
|
||||||
|
@ -2242,47 +2235,22 @@ get "/api/v1/channels/:ucid/videos" do |env|
|
||||||
|
|
||||||
videos = JSON.build do |json|
|
videos = JSON.build do |json|
|
||||||
json.array do
|
json.array do
|
||||||
document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
|
nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")]))
|
||||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a)).not_nil!
|
extract_videos(nodeset, ucid).each do |video|
|
||||||
title = anchor.content.strip
|
|
||||||
video_id = anchor["href"].lchop("/watch?v=")
|
|
||||||
|
|
||||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
|
||||||
if metadata.size == 0
|
|
||||||
next
|
|
||||||
elsif metadata.size == 1
|
|
||||||
view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
|
|
||||||
published = Time.now
|
|
||||||
else
|
|
||||||
published = decode_date(metadata[0].content)
|
|
||||||
|
|
||||||
view_count = metadata[1].content.split(" ")[0]
|
|
||||||
if view_count == "No"
|
|
||||||
view_count = 0_i64
|
|
||||||
else
|
|
||||||
view_count = view_count.delete(",").to_i64
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
|
||||||
description, description_html = html_to_description(description_html)
|
|
||||||
|
|
||||||
length_seconds = decode_length_seconds(node.xpath_node(%q(.//span[@class="video-time"])).not_nil!.content)
|
|
||||||
|
|
||||||
json.object do
|
json.object do
|
||||||
json.field "title", title
|
json.field "title", video.title
|
||||||
json.field "videoId", video_id
|
json.field "videoId", video.id
|
||||||
|
|
||||||
json.field "videoThumbnails" do
|
json.field "videoThumbnails" do
|
||||||
generate_thumbnails(json, video_id)
|
generate_thumbnails(json, video.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
json.field "description", description
|
json.field "description", video.description
|
||||||
json.field "descriptionHtml", description_html
|
json.field "descriptionHtml", video.description_html
|
||||||
|
|
||||||
json.field "viewCount", view_count
|
json.field "viewCount", video.views
|
||||||
json.field "published", published.epoch
|
json.field "published", video.published.epoch
|
||||||
json.field "lengthSeconds", length_seconds
|
json.field "lengthSeconds", video.length_seconds
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -2344,7 +2312,7 @@ get "/api/v1/search" do |env|
|
||||||
json.field "description", video.description
|
json.field "description", video.description
|
||||||
json.field "descriptionHtml", video.description_html
|
json.field "descriptionHtml", video.description_html
|
||||||
|
|
||||||
json.field "viewCount", video.view_count
|
json.field "viewCount", video.views
|
||||||
json.field "published", video.published.epoch
|
json.field "published", video.published.epoch
|
||||||
json.field "lengthSeconds", video.length_seconds
|
json.field "lengthSeconds", video.length_seconds
|
||||||
end
|
end
|
||||||
|
|
|
@ -130,69 +130,3 @@ def fetch_channel(ucid, client, db, pull_all_videos = true)
|
||||||
|
|
||||||
return channel
|
return channel
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_channel_videos(document, author, ucid)
|
|
||||||
channel_videos = [] of Video
|
|
||||||
document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])).each do |node|
|
|
||||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
|
||||||
if !anchor
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
title = anchor.content.strip
|
|
||||||
id = anchor["href"].lchop("/watch?v=")
|
|
||||||
|
|
||||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
|
||||||
if metadata.size == 0
|
|
||||||
next
|
|
||||||
elsif metadata.size == 1
|
|
||||||
view_count = metadata[0].content.split(" ")[0].delete(",").to_i64
|
|
||||||
published = Time.now
|
|
||||||
else
|
|
||||||
published = decode_date(metadata[0].content)
|
|
||||||
|
|
||||||
view_count = metadata[1].content.split(" ")[0]
|
|
||||||
if view_count == "No"
|
|
||||||
view_count = 0_i64
|
|
||||||
else
|
|
||||||
view_count = view_count.delete(",").to_i64
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
|
||||||
description, description_html = html_to_description(description_html)
|
|
||||||
|
|
||||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
|
||||||
if length_seconds
|
|
||||||
length_seconds = decode_length_seconds(length_seconds.content)
|
|
||||||
else
|
|
||||||
length_seconds = -1
|
|
||||||
end
|
|
||||||
|
|
||||||
info = HTTP::Params.parse("length_seconds=#{length_seconds}")
|
|
||||||
channel_videos << Video.new(
|
|
||||||
id,
|
|
||||||
info,
|
|
||||||
Time.now,
|
|
||||||
title,
|
|
||||||
view_count,
|
|
||||||
0, # Like count
|
|
||||||
0, # Dislike count
|
|
||||||
0.0, # Wilson score
|
|
||||||
published,
|
|
||||||
description,
|
|
||||||
"", # Language,
|
|
||||||
author,
|
|
||||||
ucid,
|
|
||||||
[] of String, # Allowed regions
|
|
||||||
true, # Is family friendly
|
|
||||||
"" # Genre
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
return channel_videos
|
|
||||||
end
|
|
||||||
|
|
|
@ -286,3 +286,91 @@ def html_to_description(description_html)
|
||||||
|
|
||||||
return description, description_html
|
return description, description_html
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def extract_videos(nodeset, ucid = nil)
|
||||||
|
# TODO: Make this a 'common', so it makes more sense to be used here
|
||||||
|
videos = [] of SearchVideo
|
||||||
|
|
||||||
|
nodeset.each do |node|
|
||||||
|
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
||||||
|
if !anchor
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
title = anchor.content.strip
|
||||||
|
id = anchor["href"].lchop("/watch?v=")
|
||||||
|
|
||||||
|
if ucid
|
||||||
|
author = ""
|
||||||
|
author_id = ""
|
||||||
|
else
|
||||||
|
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
|
||||||
|
if !anchor
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
author = anchor.content
|
||||||
|
author_id = anchor["href"].split("/")[-1]
|
||||||
|
end
|
||||||
|
|
||||||
|
# Skip playlists
|
||||||
|
if node.xpath_node(%q(.//div[contains(@class, "yt-playlist-renderer")]))
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
# Skip movies
|
||||||
|
if node.xpath_node(%q(.//div[contains(@class, "yt-lockup-movie-top-content")]))
|
||||||
|
next
|
||||||
|
end
|
||||||
|
|
||||||
|
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
||||||
|
if metadata.size == 0
|
||||||
|
next
|
||||||
|
elsif metadata.size == 1
|
||||||
|
if metadata[0].content.starts_with? "Starts"
|
||||||
|
view_count = 0_i64
|
||||||
|
published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
|
||||||
|
else
|
||||||
|
view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
|
||||||
|
published = Time.now
|
||||||
|
end
|
||||||
|
else
|
||||||
|
published = decode_date(metadata[0].content)
|
||||||
|
|
||||||
|
view_count = metadata[1].content.split(" ")[0]
|
||||||
|
if view_count == "No"
|
||||||
|
view_count = 0_i64
|
||||||
|
else
|
||||||
|
view_count = view_count.delete(",").to_i64
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
||||||
|
description, description_html = html_to_description(description_html)
|
||||||
|
|
||||||
|
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
||||||
|
if length_seconds
|
||||||
|
length_seconds = decode_length_seconds(length_seconds.content)
|
||||||
|
else
|
||||||
|
length_seconds = -1
|
||||||
|
end
|
||||||
|
|
||||||
|
videos << SearchVideo.new(
|
||||||
|
title,
|
||||||
|
id,
|
||||||
|
author,
|
||||||
|
author_id,
|
||||||
|
published,
|
||||||
|
view_count,
|
||||||
|
description,
|
||||||
|
description_html,
|
||||||
|
length_seconds,
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
return videos
|
||||||
|
end
|
||||||
|
|
|
@ -5,7 +5,7 @@ class SearchVideo
|
||||||
author: String,
|
author: String,
|
||||||
ucid: String,
|
ucid: String,
|
||||||
published: Time,
|
published: Time,
|
||||||
view_count: Int64,
|
views: Int64,
|
||||||
description: String,
|
description: String,
|
||||||
description_html: String,
|
description_html: String,
|
||||||
length_seconds: Int32,
|
length_seconds: Int32,
|
||||||
|
@ -20,90 +20,8 @@ def search(query, page = 1, search_params = build_search_params(content_type: "v
|
||||||
end
|
end
|
||||||
|
|
||||||
html = XML.parse_html(html)
|
html = XML.parse_html(html)
|
||||||
videos = [] of SearchVideo
|
nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li))
|
||||||
|
videos = extract_videos(nodeset)
|
||||||
html.xpath_nodes(%q(//ol[@class="item-section"]/li)).each do |node|
|
|
||||||
anchor = node.xpath_node(%q(.//h3[contains(@class,"yt-lockup-title")]/a))
|
|
||||||
if !anchor
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
if anchor["href"].starts_with? "https://www.googleadservices.com"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
title = anchor.content.strip
|
|
||||||
video_id = anchor["href"].lchop("/watch?v=")
|
|
||||||
|
|
||||||
anchor = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-byline")]/a))
|
|
||||||
if !anchor
|
|
||||||
next
|
|
||||||
end
|
|
||||||
author = anchor.content
|
|
||||||
author_url = anchor["href"]
|
|
||||||
ucid = author_url.split("/")[-1]
|
|
||||||
|
|
||||||
# Skip playlists
|
|
||||||
if node.xpath_node(%q(.//ol[contains(@class, "yt-lockup-playlist-items")]))
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
metadata = node.xpath_nodes(%q(.//div[contains(@class,"yt-lockup-meta")]/ul/li))
|
|
||||||
if metadata.size == 0
|
|
||||||
next
|
|
||||||
elsif metadata.size == 1
|
|
||||||
# Skip movies
|
|
||||||
if metadata[0].content.includes? "·"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
if metadata[0].content.starts_with? "Starts"
|
|
||||||
view_count = 0_i64
|
|
||||||
published = Time.epoch(metadata[0].xpath_node(%q(.//span)).not_nil!["data-timestamp"].to_i64)
|
|
||||||
else
|
|
||||||
view_count = metadata[0].content.lchop("Streamed ").split(" ")[0].delete(",").to_i64
|
|
||||||
published = Time.now
|
|
||||||
end
|
|
||||||
else
|
|
||||||
# Skip movies
|
|
||||||
if metadata[0].content.includes? "·"
|
|
||||||
next
|
|
||||||
end
|
|
||||||
|
|
||||||
published = decode_date(metadata[0].content)
|
|
||||||
|
|
||||||
view_count = metadata[1].content.split(" ")[0]
|
|
||||||
if view_count == "No"
|
|
||||||
view_count = 0_i64
|
|
||||||
else
|
|
||||||
view_count = view_count.delete(",").to_i64
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
description_html = node.xpath_node(%q(.//div[contains(@class, "yt-lockup-description")]))
|
|
||||||
description, description_html = html_to_description(description_html)
|
|
||||||
|
|
||||||
length_seconds = node.xpath_node(%q(.//span[@class="video-time"]))
|
|
||||||
if length_seconds
|
|
||||||
length_seconds = decode_length_seconds(length_seconds.content)
|
|
||||||
else
|
|
||||||
length_seconds = -1
|
|
||||||
end
|
|
||||||
|
|
||||||
video = SearchVideo.new(
|
|
||||||
title,
|
|
||||||
video_id,
|
|
||||||
author,
|
|
||||||
ucid,
|
|
||||||
published,
|
|
||||||
view_count,
|
|
||||||
description,
|
|
||||||
description_html,
|
|
||||||
length_seconds,
|
|
||||||
)
|
|
||||||
|
|
||||||
videos << video
|
|
||||||
end
|
|
||||||
|
|
||||||
return videos
|
return videos
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue