Extractors: Add support for shorts

Fixes #2708
This commit is contained in:
Samantaz Fox 2021-12-12 20:58:45 +01:00
parent 2ac19eb8fc
commit f54e247eb4
No known key found for this signature in database
GPG key ID: F42821059186176E
2 changed files with 26 additions and 9 deletions

View file

@ -21,10 +21,17 @@ def elapsed_text(elapsed)
end
def decode_length_seconds(string)
length_seconds = string.gsub(/[^0-9:]/, "").split(":").map &.to_i
length_seconds = string.gsub(/[^0-9:]/, "")
return 0_i32 if length_seconds.empty?
length_seconds = length_seconds.split(":").map { |x| x.to_i? || 0 }
length_seconds = [0] * (3 - length_seconds.size) + length_seconds
length_seconds = Time::Span.new hours: length_seconds[0], minutes: length_seconds[1], seconds: length_seconds[2]
length_seconds = length_seconds.total_seconds.to_i
length_seconds = Time::Span.new(
hours: length_seconds[0],
minutes: length_seconds[1],
seconds: length_seconds[2]
).total_seconds.to_i32
return length_seconds
end

View file

@ -49,6 +49,9 @@ private module Parsers
if author_info = item_contents.dig?("ownerText", "runs", 0)
author = author_info["text"].as_s
author_id = HelperExtractors.get_browse_id(author_info)
elsif author_info = item_contents.dig?("shortBylineText", "runs", 0)
author = author_info["text"].as_s
author_id = HelperExtractors.get_browse_id(author_info)
else
author = author_fallback.name
author_id = author_fallback.id
@ -68,18 +71,25 @@ private module Parsers
view_count = item_contents.dig?("viewCountText", "simpleText").try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64
description_html = item_contents["descriptionSnippet"]?.try { |t| parse_content(t) } || ""
# The length information *should* only always exist in "lengthText". However, the legacy Invidious code
# extracts from "thumbnailOverlays" when it doesn't. More testing is needed to see if this is
# actually needed
# The length information generally exist in "lengthText". However, the info can sometimes
# be retrieved from "thumbnailOverlays" (e.g when the video is a "shorts" one).
if length_container = item_contents["lengthText"]?
length_seconds = decode_length_seconds(length_container["simpleText"].as_s)
elsif length_container = item_contents["thumbnailOverlays"]?.try &.as_a.find(&.["thumbnailOverlayTimeStatusRenderer"]?)
# This needs to only go down the `simpleText` path (if possible). If more situations came up that requires
# a specific pathway then we should add an argument to extract_text that'll make this possible
length_seconds = length_container.dig?("thumbnailOverlayTimeStatusRenderer", "text", "simpleText")
length_text = length_container.dig?("thumbnailOverlayTimeStatusRenderer", "text", "simpleText")
if length_seconds
length_seconds = decode_length_seconds(length_seconds.as_s)
if length_text
length_text = length_text.as_s
if length_text == "SHORTS"
# Approximate length to one minute, as "shorts" generally don't exceed that length.
# TODO: Add some sort of metadata for the type of video (normal, live, premiere, shorts)
length_seconds = 60_i32
else
length_seconds = decode_length_seconds(length_text)
end
else
length_seconds = 0
end