mirror of
https://gitea.invidious.io/iv-org/invidious-copy-2023-06-08.git
synced 2024-08-15 00:53:38 +00:00
Parse HTML properly instead of relying on regexes
This commit is contained in:
parent
b8fe82a7f7
commit
d573461a67
1 changed files with 26 additions and 3 deletions
|
@ -132,8 +132,19 @@ def fetch_video(id, client)
|
||||||
dislikes = dislikes ? dislikes.content.delete(",").to_i : 0
|
dislikes = dislikes ? dislikes.content.delete(",").to_i : 0
|
||||||
|
|
||||||
description = html.xpath_node(%q(//p[@id="eow-description"]))
|
description = html.xpath_node(%q(//p[@id="eow-description"]))
|
||||||
|
if description
|
||||||
|
description.xpath_nodes(%q(//a/@href)).each do |match|
|
||||||
|
uri = URI.parse(match.content)
|
||||||
|
|
||||||
|
if uri.host =~ /(www\.)?youtube.com/
|
||||||
|
uri = uri.full_path
|
||||||
|
puts uri
|
||||||
|
end
|
||||||
|
|
||||||
|
match.content = uri.to_s
|
||||||
|
end
|
||||||
|
end
|
||||||
description = description ? description.to_xml : ""
|
description = description ? description.to_xml : ""
|
||||||
description = description.gsub(/(https:\/\/)|(http:\/\/)?(www\.)?(youtube\.com)/, "")
|
|
||||||
|
|
||||||
wilson_score = ci_lower_bound(likes, likes + dislikes)
|
wilson_score = ci_lower_bound(likes, likes + dislikes)
|
||||||
|
|
||||||
|
@ -278,6 +289,20 @@ def template_comments(root)
|
||||||
author = child["data"]["author"]
|
author = child["data"]["author"]
|
||||||
score = child["data"]["score"]
|
score = child["data"]["score"]
|
||||||
body_html = HTML.unescape(child["data"]["body_html"].as_s)
|
body_html = HTML.unescape(child["data"]["body_html"].as_s)
|
||||||
|
body_html = XML.parse_html(body_html)
|
||||||
|
|
||||||
|
body_html.xpath_nodes(%q(//a/@href)).each do |match|
|
||||||
|
uri = URI.parse(match.content)
|
||||||
|
|
||||||
|
if uri.host =~ /(www\.)?youtube.com/
|
||||||
|
uri = uri.full_path
|
||||||
|
puts uri
|
||||||
|
end
|
||||||
|
|
||||||
|
match.content = uri.to_s
|
||||||
|
end
|
||||||
|
|
||||||
|
body_html = body_html.to_s
|
||||||
|
|
||||||
replies_html = ""
|
replies_html = ""
|
||||||
if child["data"]["replies"] != ""
|
if child["data"]["replies"] != ""
|
||||||
|
@ -317,8 +342,6 @@ def template_comments(root)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
html = html.gsub(/(https:\/\/)|(http:\/\/)?(www\.)?(youtube\.com)/, "")
|
|
||||||
|
|
||||||
return html
|
return html
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue