WebVTT::Builder: Add logic to escape special chars (#4414)

Note: WebVTT does allow some tags in the cue payload in some circumstances
while this PR just blindly escapes everything:
https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_payload_text_tags
This commit is contained in:
Samantaz Fox 2024-02-19 00:03:21 +01:00
commit 962ce23cc2
No known key found for this signature in database
GPG key ID: F42821059186176E
2 changed files with 59 additions and 22 deletions

View file

@ -1,34 +1,27 @@
require "../../spec_helper.cr" require "../../spec_helper.cr"
MockLines = [ MockLines = ["Line 1", "Line 2"]
{ MockLinesWithEscapableCharacter = ["<Line 1>", "&Line 2>", '\u200E' + "Line\u200F 3", "\u00A0Line 4"]
"start_time": Time::Span.new(seconds: 1),
"end_time": Time::Span.new(seconds: 2),
"text": "Line 1",
},
{
"start_time": Time::Span.new(seconds: 2),
"end_time": Time::Span.new(seconds: 3),
"text": "Line 2",
},
]
Spectator.describe "WebVTT::Builder" do Spectator.describe "WebVTT::Builder" do
it "correctly builds a vtt file" do it "correctly builds a vtt file" do
result = WebVTT.build do |vtt| result = WebVTT.build do |vtt|
MockLines.each do |line| 2.times do |i|
vtt.cue(line["start_time"], line["end_time"], line["text"]) vtt.cue(
Time::Span.new(seconds: i),
Time::Span.new(seconds: i + 1),
MockLines[i]
)
end end
end end
expect(result).to eq([ expect(result).to eq([
"WEBVTT", "WEBVTT",
"", "",
"00:00:01.000 --> 00:00:02.000", "00:00:00.000 --> 00:00:01.000",
"Line 1", "Line 1",
"", "",
"00:00:02.000 --> 00:00:03.000", "00:00:01.000 --> 00:00:02.000",
"Line 2", "Line 2",
"", "",
"", "",
@ -42,8 +35,12 @@ Spectator.describe "WebVTT::Builder" do
} }
result = WebVTT.build(setting_fields) do |vtt| result = WebVTT.build(setting_fields) do |vtt|
MockLines.each do |line| 2.times do |i|
vtt.cue(line["start_time"], line["end_time"], line["text"]) vtt.cue(
Time::Span.new(seconds: i),
Time::Span.new(seconds: i + 1),
MockLines[i]
)
end end
end end
@ -52,13 +49,39 @@ Spectator.describe "WebVTT::Builder" do
"Kind: captions", "Kind: captions",
"Language: en", "Language: en",
"", "",
"00:00:01.000 --> 00:00:02.000", "00:00:00.000 --> 00:00:01.000",
"Line 1", "Line 1",
"", "",
"00:00:02.000 --> 00:00:03.000", "00:00:01.000 --> 00:00:02.000",
"Line 2", "Line 2",
"", "",
"", "",
].join('\n')) ].join('\n'))
end end
it "properly escapes characters" do
result = WebVTT.build do |vtt|
4.times do |i|
vtt.cue(Time::Span.new(seconds: i), Time::Span.new(seconds: i + 1), MockLinesWithEscapableCharacter[i])
end
end
expect(result).to eq([
"WEBVTT",
"",
"00:00:00.000 --> 00:00:01.000",
"&lt;Line 1&gt;",
"",
"00:00:01.000 --> 00:00:02.000",
"&amp;Line 2&gt;",
"",
"00:00:02.000 --> 00:00:03.000",
"&lrm;Line&rlm; 3",
"",
"00:00:03.000 --> 00:00:04.000",
"&nbsp;Line 4",
"",
"",
].join('\n'))
end
end end

View file

@ -4,13 +4,23 @@
module WebVTT module WebVTT
# A WebVTT builder generates WebVTT files # A WebVTT builder generates WebVTT files
private class Builder private class Builder
# See https://developer.mozilla.org/en-US/docs/Web/API/WebVTT_API#cue_payload
private ESCAPE_SUBSTITUTIONS = {
'&' => "&amp;",
'<' => "&lt;",
'>' => "&gt;",
'\u200E' => "&lrm;",
'\u200F' => "&rlm;",
'\u00A0' => "&nbsp;",
}
def initialize(@io : IO) def initialize(@io : IO)
end end
# Writes an vtt cue with the specified time stamp and contents # Writes an vtt cue with the specified time stamp and contents
def cue(start_time : Time::Span, end_time : Time::Span, text : String) def cue(start_time : Time::Span, end_time : Time::Span, text : String)
timestamp(start_time, end_time) timestamp(start_time, end_time)
@io << text @io << self.escape(text)
@io << "\n\n" @io << "\n\n"
end end
@ -29,6 +39,10 @@ module WebVTT
@io << '.' << timestamp.milliseconds.to_s.rjust(3, '0') @io << '.' << timestamp.milliseconds.to_s.rjust(3, '0')
end end
private def escape(text : String) : String
return text.gsub(ESCAPE_SUBSTITUTIONS)
end
def document(setting_fields : Hash(String, String)? = nil, &) def document(setting_fields : Hash(String, String)? = nil, &)
@io << "WEBVTT\n" @io << "WEBVTT\n"