mirror of
https://gitea.invidious.io/iv-org/invidious.git
synced 2024-08-15 00:53:41 +00:00
Add method to parse transcript JSON into structs
This commit is contained in:
parent
8e18d445a7
commit
4b3ac1a757
1 changed files with 37 additions and 0 deletions
|
@ -1,6 +1,8 @@
|
||||||
module Invidious::Videos
|
module Invidious::Videos
|
||||||
# Namespace for methods primarily relating to Transcripts
|
# Namespace for methods primarily relating to Transcripts
|
||||||
module Transcript
|
module Transcript
|
||||||
|
record TranscriptLine, start_ms : Time::Span, end_ms : Time::Span, line : String
|
||||||
|
|
||||||
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
|
def self.generate_param(video_id : String, language_code : String, auto_generated : Bool) : String
|
||||||
if !auto_generated
|
if !auto_generated
|
||||||
is_auto_generated = ""
|
is_auto_generated = ""
|
||||||
|
@ -30,5 +32,40 @@ module Invidious::Videos
|
||||||
|
|
||||||
return params
|
return params
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.convert_transcripts_to_vtt(initial_data : JSON::Any, target_language : String) : String
|
||||||
|
# Convert into TranscriptLine
|
||||||
|
|
||||||
|
vtt = String.build do |vtt|
|
||||||
|
result << <<-END_VTT
|
||||||
|
WEBVTT
|
||||||
|
Kind: captions
|
||||||
|
Language: #{tlang}
|
||||||
|
|
||||||
|
|
||||||
|
END_VTT
|
||||||
|
|
||||||
|
vtt << "\n\n"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.parse(initial_data : Hash(String, JSON::Any))
|
||||||
|
body = initial_data.dig("actions", 0, "updateEngagementPanelAction", "content", "transcriptRenderer",
|
||||||
|
"content", "transcriptSearchPanelRenderer", "body", "transcriptSegmentListRenderer",
|
||||||
|
"initialSegments").as_a
|
||||||
|
|
||||||
|
lines = [] of TranscriptLine
|
||||||
|
body.each do |line|
|
||||||
|
line = line["transcriptSegmentRenderer"]
|
||||||
|
start_ms = line["startMs"].as_s.to_i.millisecond
|
||||||
|
end_ms = line["endMs"].as_s.to_i.millisecond
|
||||||
|
|
||||||
|
text = extract_text(line["snippet"]) || ""
|
||||||
|
|
||||||
|
lines << TranscriptLine.new(start_ms, end_ms, text)
|
||||||
|
end
|
||||||
|
|
||||||
|
return lines
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue