Only modify cues for auto-generated captions

2024-08-15 00:43:26 +00:00 · 2019-05-18 20:27:19 -05:00 · 2019-05-18 20:27:19 -05:00 · 5730280325
commit 5730280325
parent ab4df7e078
1 changed files with 35 additions and 26 deletions
--- a/src/invidious.cr
+++ b/src/invidious.cr
@ -3203,45 +3203,54 @@ get "/api/v1/captions/:id" do |env|
    caption = caption[0]
  end
-  caption_xml = client.get(caption.baseUrl + "&tlang=#{tlang}").body
+  url = caption.baseUrl + "&tlang=#{tlang}"
  caption_xml = XML.parse(caption_xml)
-  webvtt = <<-END_VTT
+  # Auto-generated captions often have cues that aren't aligned properly with the video,
-  WEBVTT
+  # as well as some other markup that makes it cumbersome, so we try to fix that here
-  Kind: captions
+  if caption.name.simpleText.includes? "auto-generated"
-  Language: #{tlang || caption.languageCode}
+    caption_xml = client.get(url).body
    caption_xml = XML.parse(caption_xml)
    webvtt = <<-END_VTT
    WEBVTT
    Kind: captions
    Language: #{tlang || caption.languageCode}
-  END_VTT
+    END_VTT
-  caption_nodes = caption_xml.xpath_nodes("//transcript/text")
+    caption_nodes = caption_xml.xpath_nodes("//transcript/text")
-  caption_nodes.each_with_index do |node, i|
+    caption_nodes.each_with_index do |node, i|
-    start_time = node["start"].to_f.seconds
+      start_time = node["start"].to_f.seconds
-    duration = node["dur"]?.try &.to_f.seconds
+      duration = node["dur"]?.try &.to_f.seconds
-    duration ||= start_time
+      duration ||= start_time
-    if caption_nodes.size > i + 1
+      if caption_nodes.size > i + 1
-      end_time = caption_nodes[i + 1]["start"].to_f.seconds
+        end_time = caption_nodes[i + 1]["start"].to_f.seconds
-    else
+      else
-      end_time = start_time + duration
+        end_time = start_time + duration
-    end
+      end
-    start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
+      start_time = "#{start_time.hours.to_s.rjust(2, '0')}:#{start_time.minutes.to_s.rjust(2, '0')}:#{start_time.seconds.to_s.rjust(2, '0')}.#{start_time.milliseconds.to_s.rjust(3, '0')}"
-    end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
+      end_time = "#{end_time.hours.to_s.rjust(2, '0')}:#{end_time.minutes.to_s.rjust(2, '0')}:#{end_time.seconds.to_s.rjust(2, '0')}.#{end_time.milliseconds.to_s.rjust(3, '0')}"
-    text = HTML.unescape(node.content)
+      text = HTML.unescape(node.content)
-    text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
+      text = text.gsub(/<font color="#[a-fA-F0-9]{6}">/, "")
-    text = text.gsub(/<\/font>/, "")
+      text = text.gsub(/<\/font>/, "")
-    if md = text.match(/(?<name>.*) : (?<text>.*)/)
+      if md = text.match(/(?<name>.*) : (?<text>.*)/)
-      text = "<v #{md["name"]}>#{md["text"]}</v>"
+        text = "<v #{md["name"]}>#{md["text"]}</v>"
-    end
+      end
-    webvtt += <<-END_CUE
+      webvtt += <<-END_CUE
    #{start_time} --> #{end_time}
    #{text}
    END_CUE
    end
  else
    url += "&format=vtt"
    webvtt = client.get(url).body
  end
  if title = env.params.query["title"]?