From 1eca969cf6b4096789014619285c98d1def40ee3 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Mon, 15 Jun 2020 17:33:23 -0500 Subject: [PATCH] Add support for polymer redesign --- config/sql/videos.sql | 17 - spec/helpers_spec.cr | 4 +- src/invidious.cr | 173 ++-- src/invidious/channels.cr | 32 +- src/invidious/comments.cr | 24 +- src/invidious/helpers/helpers.cr | 168 +++- src/invidious/helpers/jobs.cr | 2 +- src/invidious/helpers/signatures.cr | 4 +- src/invidious/helpers/utils.cr | 2 +- src/invidious/mixes.cr | 1 - src/invidious/search.cr | 69 +- src/invidious/trending.cr | 24 +- src/invidious/users.cr | 4 +- src/invidious/videos.cr | 926 ++++++++-------------- src/invidious/views/components/item.ecr | 4 +- src/invidious/views/components/player.ecr | 12 +- src/invidious/views/watch.ecr | 54 +- 17 files changed, 634 insertions(+), 886 deletions(-) diff --git a/config/sql/videos.sql b/config/sql/videos.sql index 6ded01de..8def2f83 100644 --- a/config/sql/videos.sql +++ b/config/sql/videos.sql @@ -7,23 +7,6 @@ CREATE TABLE public.videos id text NOT NULL, info text, updated timestamp with time zone, - title text, - views bigint, - likes integer, - dislikes integer, - wilson_score double precision, - published timestamp with time zone, - description text, - language text, - author text, - ucid text, - allowed_regions text[], - is_family_friendly boolean, - genre text, - genre_url text, - license text, - sub_count_text text, - author_thumbnail text, CONSTRAINT videos_pkey PRIMARY KEY (id) ); diff --git a/spec/helpers_spec.cr b/spec/helpers_spec.cr index 37e36c61..26922bb2 100644 --- a/spec/helpers_spec.cr +++ b/spec/helpers_spec.cr @@ -27,9 +27,9 @@ describe "Helper" do describe "#produce_channel_search_url" do it "correctly produces token for searching a specific channel" do - produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("/browse_ajax?continuation=4qmFsgI-EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWnpaV0Z5WTJnd0FqZ0JZQUZxQUxnQkFIb0RNVEF3WgA%3D&gl=US&hl=en") + produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "", 100).should eq("/browse_ajax?continuation=4qmFsgI2EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0RNVEF3dUFFQVoA&gl=US&hl=en") - produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("/browse_ajax?continuation=4qmFsgJ8EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaIEVnWnpaV0Z5WTJnd0FqZ0JZQUZxQUxnQkFIb0JNQT09Wj7Qn9C-INC-0LbQuOCktuClgeCkquCkpOCkv-CksOCkquCkv-WtkOiAjOaZguCuuOCvjeCuseCvgOCuqeCuvw%3D%3D&gl=US&hl=en") + produce_channel_search_url("UCXuqSBlHAE6Xw-yeJA0Tunw", "По ожиशुपतिरपि子而時ஸ்றீனி", 0).should eq("/browse_ajax?continuation=4qmFsgJ0EhhVQ1h1cVNCbEhBRTZYdy15ZUpBMFR1bncaGEVnWnpaV0Z5WTJnNEFYb0JNTGdCQUE9PVo-0J_QviDQvtC20LjgpLbgpYHgpKrgpKTgpL_gpLDgpKrgpL_lrZDogIzmmYLgrrjgr43grrHgr4Dgrqngrr8%3D&gl=US&hl=en") end end diff --git a/src/invidious.cr b/src/invidious.cr index 958f95f7..c95c6419 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -510,16 +510,16 @@ get "/watch" do |env| comment_html ||= "" end - fmt_stream = video.fmt_stream(decrypt_function) - adaptive_fmts = video.adaptive_fmts(decrypt_function) + fmt_stream = video.fmt_stream + adaptive_fmts = video.adaptive_fmts if params.local - fmt_stream.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path } - adaptive_fmts.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path } + fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) } + adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) } end - video_streams = video.video_streams(adaptive_fmts) - audio_streams = video.audio_streams(adaptive_fmts) + video_streams = video.video_streams + audio_streams = video.audio_streams # Older videos may not have audio sources available. # We redirect here so they're not unplayable @@ -549,33 +549,23 @@ get "/watch" do |env| aspect_ratio = "16:9" - video.description_html = fill_links(video.description_html, "https", "www.youtube.com") - video.description_html = replace_links(video.description_html) - - host_url = make_host_url(config, Kemal.config) - - if video.player_response["streamingData"]?.try &.["hlsManifestUrl"]? - hlsvp = video.player_response["streamingData"]["hlsManifestUrl"].as_s - hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", host_url) - end - thumbnail = "/vi/#{video.id}/maxres.jpg" if params.raw if params.listen - url = audio_streams[0]["url"] + url = audio_streams[0]["url"].as_s audio_streams.each do |fmt| - if fmt["bitrate"] == params.quality.rchop("k") - url = fmt["url"] + if fmt["bitrate"].as_i == params.quality.rchop("k").to_i + url = fmt["url"].as_s end end else - url = fmt_stream[0]["url"] + url = fmt_stream[0]["url"].as_s fmt_stream.each do |fmt| - if fmt["label"].split(" - ")[0] == params.quality - url = fmt["url"] + if fmt["quality"].as_s == params.quality + url = fmt["url"].as_s end end end @@ -583,24 +573,6 @@ get "/watch" do |env| next env.redirect url end - rvs = [] of Hash(String, String) - video.info["rvs"]?.try &.split(",").each do |rv| - rvs << HTTP::Params.parse(rv).to_h - end - - rating = video.info["avg_rating"].to_f64 - if video.views > 0 - engagement = ((video.dislikes.to_f + video.likes.to_f)/video.views * 100) - else - engagement = 0 - end - - playability_status = video.player_response["playabilityStatus"]? - if playability_status && playability_status["status"] == "LIVE_STREAM_OFFLINE" && !video.premiere_timestamp - reason = playability_status["reason"]?.try &.as_s - end - reason ||= "" - templated "watch" end @@ -752,16 +724,16 @@ get "/embed/:id" do |env| notifications.delete(id) end - fmt_stream = video.fmt_stream(decrypt_function) - adaptive_fmts = video.adaptive_fmts(decrypt_function) + fmt_stream = video.fmt_stream + adaptive_fmts = video.adaptive_fmts if params.local - fmt_stream.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path } - adaptive_fmts.each { |fmt| fmt["url"] = URI.parse(fmt["url"]).full_path } + fmt_stream.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) } + adaptive_fmts.each { |fmt| fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) } end - video_streams = video.video_streams(adaptive_fmts) - audio_streams = video.audio_streams(adaptive_fmts) + video_streams = video.video_streams + audio_streams = video.audio_streams if audio_streams.empty? && !video.live_now if params.quality == "dash" @@ -788,25 +760,13 @@ get "/embed/:id" do |env| aspect_ratio = nil - video.description_html = fill_links(video.description_html, "https", "www.youtube.com") - video.description_html = replace_links(video.description_html) - - host_url = make_host_url(config, Kemal.config) - - if video.player_response["streamingData"]?.try &.["hlsManifestUrl"]? - hlsvp = video.player_response["streamingData"]["hlsManifestUrl"].as_s - hlsvp = hlsvp.gsub("https://manifest.googlevideo.com", host_url) - end - thumbnail = "/vi/#{video.id}/maxres.jpg" if params.raw - url = fmt_stream[0]["url"] + url = fmt_stream[0]["url"].as_s fmt_stream.each do |fmt| - if fmt["label"].split(" - ")[0] == params.quality - url = fmt["url"] - end + url = fmt["url"].as_s if fmt["quality"].as_s == params.quality end next env.redirect url @@ -1469,7 +1429,6 @@ post "/login" do |env| traceback = IO::Memory.new # See https://github.com/ytdl-org/youtube-dl/blob/2019.04.07/youtube_dl/extractor/youtube.py#L82 - # TODO: Convert to QUIC begin client = QUIC::Client.new(LOGIN_URL) headers = HTTP::Headers.new @@ -2329,8 +2288,7 @@ get "/modify_notifications" do |env| end headers = cookies.add_request_headers(headers) - match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) - if match + if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/) session_token = match["session_token"] else next env.redirect referer @@ -3575,14 +3533,14 @@ get "/channel/:ucid" do |env| item.author end end - items = items.select { |item| item.is_a?(SearchPlaylist) }.map { |item| item.as(SearchPlaylist) } + items = items.select(&.is_a?(SearchPlaylist)).map(&.as(SearchPlaylist)) items.each { |item| item.author = "" } else sort_options = {"newest", "oldest", "popular"} sort_by ||= "newest" - items, count = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) - items.select! { |item| !item.paid } + count, items = get_60_videos(channel.ucid, channel.author, page, channel.auto_generated, sort_by) + items.reject! &.paid env.set "search", "channel:#{channel.ucid} " end @@ -5125,7 +5083,7 @@ get "/api/manifest/dash/id/:id" do |env| next end - if dashmpd = video.player_response["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s + if dashmpd = video.dash_manifest_url manifest = YT_POOL.client &.get(URI.parse(dashmpd).full_path).body manifest = manifest.gsub(/[^<]+<\/BaseURL>/) do |baseurl| @@ -5142,16 +5100,16 @@ get "/api/manifest/dash/id/:id" do |env| next manifest end - adaptive_fmts = video.adaptive_fmts(decrypt_function) + adaptive_fmts = video.adaptive_fmts if local adaptive_fmts.each do |fmt| - fmt["url"] = URI.parse(fmt["url"]).full_path + fmt["url"] = JSON::Any.new(URI.parse(fmt["url"].as_s).full_path) end end - audio_streams = video.audio_streams(adaptive_fmts) - video_streams = video.video_streams(adaptive_fmts).sort_by { |stream| {stream["size"].split("x")[0].to_i, stream["fps"].to_i} }.reverse + audio_streams = video.audio_streams + video_streams = video.video_streams.sort_by { |stream| {stream["width"].as_i, stream["fps"].as_i} }.reverse XML.build(indent: " ", encoding: "UTF-8") do |xml| xml.element("MPD", "xmlns": "urn:mpeg:dash:schema:mpd:2011", @@ -5161,24 +5119,22 @@ get "/api/manifest/dash/id/:id" do |env| i = 0 {"audio/mp4", "audio/webm"}.each do |mime_type| - mime_streams = audio_streams.select { |stream| stream["type"].starts_with? mime_type } - if mime_streams.empty? - next - end + mime_streams = audio_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type } + next if mime_streams.empty? xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true) do mime_streams.each do |fmt| - codecs = fmt["type"].split("codecs=")[1].strip('"') - bandwidth = fmt["bitrate"].to_i * 1000 - itag = fmt["itag"] - url = fmt["url"] + codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"') + bandwidth = fmt["bitrate"].as_i + itag = fmt["itag"].as_i + url = fmt["url"].as_s xml.element("Representation", id: fmt["itag"], codecs: codecs, bandwidth: bandwidth) do xml.element("AudioChannelConfiguration", schemeIdUri: "urn:mpeg:dash:23003:3:audio_channel_configuration:2011", value: "2") xml.element("BaseURL") { xml.text url } - xml.element("SegmentBase", indexRange: fmt["index"]) do - xml.element("Initialization", range: fmt["init"]) + xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do + xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}") end end end @@ -5187,21 +5143,24 @@ get "/api/manifest/dash/id/:id" do |env| i += 1 end + potential_heights = {4320, 2160, 1440, 1080, 720, 480, 360, 240, 144} + {"video/mp4", "video/webm"}.each do |mime_type| - mime_streams = video_streams.select { |stream| stream["type"].starts_with? mime_type } + mime_streams = video_streams.select { |stream| stream["mimeType"].as_s.starts_with? mime_type } next if mime_streams.empty? heights = [] of Int32 xml.element("AdaptationSet", id: i, mimeType: mime_type, startWithSAP: 1, subsegmentAlignment: true, scanType: "progressive") do mime_streams.each do |fmt| - codecs = fmt["type"].split("codecs=")[1].strip('"') - bandwidth = fmt["bitrate"] - itag = fmt["itag"] - url = fmt["url"] - width, height = fmt["size"].split("x").map { |i| i.to_i } + codecs = fmt["mimeType"].as_s.split("codecs=")[1].strip('"') + bandwidth = fmt["bitrate"].as_i + itag = fmt["itag"].as_i + url = fmt["url"].as_s + width = fmt["width"].as_i + height = fmt["height"].as_i # Resolutions reported by YouTube player (may not accurately reflect source) - height = [4320, 2160, 1440, 1080, 720, 480, 360, 240, 144].sort_by { |i| (height - i).abs }[0] + height = potential_heights.min_by { |i| (height - i).abs } next if unique_res && heights.includes? height heights << height @@ -5209,8 +5168,8 @@ get "/api/manifest/dash/id/:id" do |env| startWithSAP: "1", maxPlayoutRate: "1", bandwidth: bandwidth, frameRate: fmt["fps"]) do xml.element("BaseURL") { xml.text url } - xml.element("SegmentBase", indexRange: fmt["index"]) do - xml.element("Initialization", range: fmt["init"]) + xml.element("SegmentBase", indexRange: "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}") do + xml.element("Initialization", range: "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}") end end end @@ -5224,10 +5183,10 @@ get "/api/manifest/dash/id/:id" do |env| end get "/api/manifest/hls_variant/*" do |env| - manifest = YT_POOL.client &.get(env.request.path) + response = YT_POOL.client &.get(env.request.path) - if manifest.status_code != 200 - env.response.status_code = manifest.status_code + if response.status_code != 200 + env.response.status_code = response.status_code next end @@ -5247,10 +5206,10 @@ get "/api/manifest/hls_variant/*" do |env| end get "/api/manifest/hls_playlist/*" do |env| - manifest = YT_POOL.client &.get(env.request.path) + response = YT_POOL.client &.get(env.request.path) - if manifest.status_code != 200 - env.response.status_code = manifest.status_code + if response.status_code != 200 + env.response.status_code = response.status_code next end @@ -5320,7 +5279,7 @@ get "/latest_version" do |env| end id ||= env.params.query["id"]? - itag ||= env.params.query["itag"]? + itag ||= env.params.query["itag"]?.try &.to_i region = env.params.query["region"]? @@ -5335,26 +5294,16 @@ get "/latest_version" do |env| video = get_video(id, PG_DB, region: region) - fmt_stream = video.fmt_stream(decrypt_function) - adaptive_fmts = video.adaptive_fmts(decrypt_function) + fmt = video.fmt_stream.find(nil) { |f| f["itag"].as_i == itag } || video.adaptive_fmts.find(nil) { |f| f["itag"].as_i == itag } + url = fmt.try &.["url"]?.try &.as_s - urls = (fmt_stream + adaptive_fmts).select { |fmt| fmt["itag"] == itag } - if urls.empty? + if !url env.response.status_code = 404 next - elsif urls.size > 1 - env.response.status_code = 409 - next end - url = urls[0]["url"] - if local - url = URI.parse(url).full_path.not_nil! - end - - if title - url += "&title=#{title}" - end + url = URI.parse(url).full_path.not_nil! if local + url = "#{url}&title=#{title}" if title env.redirect url end diff --git a/src/invidious/channels.cr b/src/invidious/channels.cr index f1a57eee..cbfa521d 100644 --- a/src/invidious/channels.cr +++ b/src/invidious/channels.cr @@ -232,9 +232,9 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) if auto_generated - videos = extract_videos(nodeset) + videos = extract_videos_html(nodeset) else - videos = extract_videos(nodeset, ucid, author) + videos = extract_videos_html(nodeset, ucid, author) end end @@ -317,9 +317,9 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil) nodeset = nodeset.not_nil! if auto_generated - videos = extract_videos(nodeset) + videos = extract_videos_html(nodeset) else - videos = extract_videos(nodeset, ucid, author) + videos = extract_videos_html(nodeset, ucid, author) end count = nodeset.size @@ -429,7 +429,7 @@ def fetch_channel_playlists(ucid, author, auto_generated, continuation, sort_by) if auto_generated items = extract_shelf_items(nodeset, ucid, author) else - items = extract_items(nodeset, ucid, author) + items = extract_items_html(nodeset, ucid, author) end return items, continuation @@ -584,16 +584,8 @@ def fetch_channel_community(ucid, continuation, locale, format, thin_mode) headers = HTTP::Headers.new headers["cookie"] = response.cookies.add_request_headers(headers)["cookie"] - headers["content-type"] = "application/x-www-form-urlencoded" - headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ==" - headers["x-spf-previous"] = "" - headers["x-spf-referer"] = "" - - headers["x-youtube-client-name"] = "1" - headers["x-youtube-client-version"] = "2.20180719" - - session_token = response.body.match(/"XSRF_TOKEN":"(?[A-Za-z0-9\_\-\=]+)"/).try &.["session_token"]? || "" + session_token = response.body.match(/"XSRF_TOKEN":"(?[^"]+)"/).try &.["session_token"]? || "" post_req = { session_token: session_token, } @@ -633,13 +625,7 @@ def fetch_channel_community(ucid, continuation, locale, format, thin_mode) next if !post - if !post["contentText"]? - content_html = "" - else - content_html = post["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s || - post["contentText"]["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || "" - end - + content_html = post["contentText"]?.try { |t| parse_content(t) } || "" author = post["authorText"]?.try &.["simpleText"]? || "" json.object do @@ -960,7 +946,7 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest") 2.times do |i| url = produce_channel_videos_url(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by) - response = YT_POOL.client &.get(url, headers) + response = YT_POOL.client &.get(url) initial_data = JSON.parse(response.body).as_a.find &.["response"]? break if !initial_data videos.concat extract_videos(initial_data.as_h) @@ -980,7 +966,7 @@ def get_latest_videos(ucid) document = XML.parse_html(json["content_html"].as_s) nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) - videos = extract_videos(nodeset, ucid) + videos = extract_videos_html(nodeset, ucid) end return videos diff --git a/src/invidious/comments.cr b/src/invidious/comments.cr index 24564bb9..5490d2ea 100644 --- a/src/invidious/comments.cr +++ b/src/invidious/comments.cr @@ -59,7 +59,7 @@ end def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, sort_by = "top") video = get_video(id, db, region: region) - session_token = video.info["session_token"]? + session_token = video.session_token case cursor when nil, "" @@ -85,17 +85,9 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so session_token: session_token, } - headers = HTTP::Headers.new - - headers["content-type"] = "application/x-www-form-urlencoded" - headers["cookie"] = video.info["cookie"] - - headers["x-client-data"] = "CIi2yQEIpbbJAQipncoBCNedygEIqKPKAQ==" - headers["x-spf-previous"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999" - headers["x-spf-referer"] = "https://www.youtube.com/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999" - - headers["x-youtube-client-name"] = "1" - headers["x-youtube-client-version"] = "2.20180719" + headers = HTTP::Headers{ + "cookie" => video.cookie, + } response = YT_POOL.client(region, &.post("/comment_service_ajax?action_get_comments=1&hl=en&gl=US", headers, form: post_req)) response = JSON.parse(response.body) @@ -150,8 +142,7 @@ def fetch_youtube_comments(id, db, cursor, format, locale, thin_mode, region, so node_comment = node["commentRenderer"] end - content_html = node_comment["contentText"]["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s || - node_comment["contentText"]["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || "" + content_html = node_comment["contentText"]?.try { |t| parse_content(t) } || "" author = node_comment["authorText"]?.try &.["simpleText"]? || "" json.field "author", author @@ -523,6 +514,11 @@ def fill_links(html, scheme, host) return html.to_xml(options: XML::SaveOptions::NO_DECL) end +def parse_content(content : JSON::Any) : String + content["simpleText"]?.try &.as_s.rchop('\ufeff').try { |b| HTML.escape(b) }.to_s || + content["runs"]?.try &.as_a.try { |r| content_to_comment_html(r).try &.to_s } || "" +end + def content_to_comment_html(content) comment_html = content.map do |run| text = HTML.escape(run["text"].as_s) diff --git a/src/invidious/helpers/helpers.cr b/src/invidious/helpers/helpers.cr index b572ee1c..7a251052 100644 --- a/src/invidious/helpers/helpers.cr +++ b/src/invidious/helpers/helpers.cr @@ -313,13 +313,149 @@ def html_to_content(description_html : String) return description end -def extract_videos(nodeset, ucid = nil, author_name = nil) - videos = extract_items(nodeset, ucid, author_name) - videos.select { |item| item.is_a?(SearchVideo) }.map { |video| video.as(SearchVideo) } +def extract_videos(initial_data : Hash(String, JSON::Any)) + extract_items(initial_data).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) end -def extract_items(nodeset, ucid = nil, author_name = nil) - # TODO: Make this a 'common', so it makes more sense to be used here +def extract_items(initial_data : Hash(String, JSON::Any)) + items = [] of SearchItem + + initial_data.try { |t| + t["contents"]? || t["response"]? + }.try { |t| + t["twoColumnBrowseResultsRenderer"]?.try &.["tabs"].as_a[0]?.try &.["tabRenderer"]["content"] || + t["twoColumnSearchResultsRenderer"]?.try &.["primaryContents"] || + t["continuationContents"]? + }.try { |t| t["sectionListRenderer"]? || t["sectionListContinuation"]? } + .try &.["contents"] + .as_a.each { |c| + c.try &.["itemSectionRenderer"]["contents"].as_a + .try { |t| t[0]?.try &.["shelfRenderer"]?.try &.["content"]["expandedShelfContentsRenderer"]?.try &.["items"].as_a || t } + .each { |item| + if i = item["videoRenderer"]? + video_id = i["videoId"].as_s + title = i["title"].try { |t| t["simpleText"]?.try &.as_s || t["runs"]?.try &.as_a.map(&.["text"].as_s).join("") } || "" + + author_info = i["ownerText"]?.try &.["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || "" + + published = i["publishedTimeText"]?.try &.["simpleText"]?.try { |t| decode_date(t.as_s) } || Time.local + view_count = i["viewCountText"]?.try &.["simpleText"]?.try &.as_s.gsub(/\D+/, "").to_i64? || 0_i64 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + length_seconds = i["lengthText"]?.try &.["simpleText"]?.try &.as_s.try { |t| decode_length_seconds(t) } || 0 + + live_now = false + paid = false + premium = false + + premiere_timestamp = i["upcomingEventData"]?.try &.["startTime"]?.try { |t| Time.unix(t.as_s.to_i64) } + + i["badges"]?.try &.as_a.each do |badge| + b = badge["metadataBadgeRenderer"] + case b["label"].as_s + when "LIVE NOW" + live_now = true + when "New", "4K", "CC" + # TODO + when "Premium" + paid = true + + # TODO: Potentially available as i["topStandaloneBadge"]["metadataBadgeRenderer"] + premium = true + else nil # Ignore + end + end + + items << SearchVideo.new( + title: title, + id: video_id, + author: author, + ucid: author_id, + published: published, + views: view_count, + description_html: description_html, + length_seconds: length_seconds, + live_now: live_now, + paid: paid, + premium: premium, + premiere_timestamp: premiere_timestamp + ) + elsif i = item["channelRenderer"]? + author = i["title"]["simpleText"]?.try &.as_s || "" + author_id = i["channelId"]?.try &.as_s || "" + + author_thumbnail = i["thumbnail"]["thumbnails"]?.try &.as_a[0]?.try { |u| "https:#{u["url"]}" } || "" + subscriber_count = i["subscriberCountText"]?.try &.["simpleText"]?.try &.as_s.try { |s| short_text_to_number(s.split(" ")[0]) } || 0 + + auto_generated = false + auto_generated = true if !i["videoCountText"]? + video_count = i["videoCountText"]?.try &.["runs"].as_a[0]?.try &.["text"].as_s.gsub(/\D/, "").to_i || 0 + description_html = i["descriptionSnippet"]?.try { |t| parse_content(t) } || "" + + items << SearchChannel.new( + author: author, + ucid: author_id, + author_thumbnail: author_thumbnail, + subscriber_count: subscriber_count, + video_count: video_count, + description_html: description_html, + auto_generated: auto_generated, + ) + elsif i = item["playlistRenderer"]? + title = i["title"]["simpleText"]?.try &.as_s || "" + plid = i["playlistId"]?.try &.as_s || "" + + video_count = i["videoCount"]?.try &.as_s.to_i || 0 + playlist_thumbnail = i["thumbnails"].as_a[0]?.try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"].as_s || "" + + author_info = i["shortBylineText"]["runs"].as_a[0]? + author = author_info.try &.["text"].as_s || "" + author_id = author_info.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]["browseId"].as_s || "" + + videos = i["videos"]?.try &.as_a.map do |v| + v = v["childVideoRenderer"] + v_title = v["title"]["simpleText"]?.try &.as_s || "" + v_id = v["videoId"]?.try &.as_s || "" + v_length_seconds = v["lengthText"]?.try &.["simpleText"]?.try { |t| decode_length_seconds(t.as_s) } || 0 + SearchPlaylistVideo.new( + title: v_title, + id: v_id, + length_seconds: v_length_seconds + ) + end || [] of SearchPlaylistVideo + + # TODO: i["publishedTimeText"]? + + items << SearchPlaylist.new( + title: title, + id: plid, + author: author, + ucid: author_id, + video_count: video_count, + videos: videos, + thumbnail: playlist_thumbnail + ) + elsif i = item["radioRenderer"]? # Mix + # TODO + elsif i = item["showRenderer"]? # Show + # TODO + elsif i = item["shelfRenderer"]? + elsif i = item["horizontalCardListRenderer"]? + elsif i = item["searchPyvRenderer"]? # Ad + end + } + } + + items +end + +def extract_videos_html(nodeset, ucid = nil, author_name = nil) + extract_items_html(nodeset, ucid, author_name).select(&.is_a?(SearchVideo)).map(&.as(SearchVideo)) +end + +def extract_items_html(nodeset, ucid = nil, author_name = nil) + # TODO: Make this a 'CommonItem', so it makes more sense to be used here items = [] of SearchItem nodeset.each do |node| @@ -456,7 +592,7 @@ def extract_items(nodeset, ucid = nil, author_name = nil) paid = true end - premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64 + premiere_timestamp = node.xpath_node(%q(.//ul[@class="yt-lockup-meta-info"]/li/span[@class="localized-date"])).try &.["data-timestamp"]?.try &.to_i64? if premiere_timestamp premiere_timestamp = Time.unix(premiere_timestamp) end @@ -683,12 +819,12 @@ def check_table(db, logger, table_name, struct_type = nil) return if column_array.size <= struct_array.size - # column_array.each do |column| - # if !struct_array.includes? column - # logger.puts("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE") - # db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE") - # end - # end + column_array.each do |column| + if !struct_array.includes? column + logger.puts("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE") + db.exec("ALTER TABLE #{table_name} DROP COLUMN #{column} CASCADE") + end + end end class PG::ResultSet @@ -864,12 +1000,12 @@ def create_notification_stream(env, topics, connection_channel) end end -def extract_initial_data(body) - initial_data = body.match(/window\["ytInitialData"\] = (?.*?);\n/).try &.["info"] || "{}" +def extract_initial_data(body) : Hash(String, JSON::Any) + initial_data = body.match(/window\["ytInitialData"\]\s*=\s*(?.*?);+\n/).try &.["info"] || "{}" if initial_data.starts_with?("JSON.parse(\"") - return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s) + return JSON.parse(JSON.parse(%({"initial_data":"#{initial_data[12..-3]}"}))["initial_data"].as_s).as_h else - return JSON.parse(initial_data) + return JSON.parse(initial_data).as_h end end diff --git a/src/invidious/helpers/jobs.cr b/src/invidious/helpers/jobs.cr index a9aee064..e3d7b520 100644 --- a/src/invidious/helpers/jobs.cr +++ b/src/invidious/helpers/jobs.cr @@ -201,7 +201,7 @@ end def bypass_captcha(captcha_key, logger) loop do begin - {"/watch?v=CvFH_6DNRCY&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")}.each do |path| + {"/watch?v=CvFH_6DNRCY&gl=US&hl=en&has_verified=1&bpctr=9999999999", produce_channel_videos_url(ucid: "UCXuqSBlHAE6Xw-yeJA0Tunw")}.each do |path| response = YT_POOL.client &.get(path) if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") html = XML.parse_html(response.body) diff --git a/src/invidious/helpers/signatures.cr b/src/invidious/helpers/signatures.cr index 0aaacd04..5eabb91b 100644 --- a/src/invidious/helpers/signatures.cr +++ b/src/invidious/helpers/signatures.cr @@ -1,8 +1,8 @@ alias SigProc = Proc(Array(String), Int32, Array(String)) def fetch_decrypt_function(id = "CvFH_6DNRCY") - document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1").body - url = document.match(/src="(?.*player_ias[^\/]+\/en_US\/base.js)"/).not_nil!["url"] + document = YT_POOL.client &.get("/watch?v=#{id}&gl=US&hl=en").body + url = document.match(/src="(?\/yts\/jsbin\/player_ias-[^\/]+\/en_US\/base.js)"/).not_nil!["url"] player = YT_POOL.client &.get(url).body function_name = player.match(/^(?[^=]+)=function\(\w\){\w=\w\.split\(""\);[^\. ]+\.[^( ]+/m).not_nil!["name"] diff --git a/src/invidious/helpers/utils.cr b/src/invidious/helpers/utils.cr index a39a0b16..a51f15ce 100644 --- a/src/invidious/helpers/utils.cr +++ b/src/invidious/helpers/utils.cr @@ -8,7 +8,7 @@ def add_yt_headers(request) request.headers["accept-language"] ||= "en-us,en;q=0.5" return if request.resource.starts_with? "/sorry/index" request.headers["x-youtube-client-name"] ||= "1" - request.headers["x-youtube-client-version"] ||= "1.20180719" + request.headers["x-youtube-client-version"] ||= "2.20200609" if !CONFIG.cookies.empty? request.headers["cookie"] = "#{(CONFIG.cookies.map { |c| "#{c.name}=#{c.value}" }).join("; ")}; #{request.headers["cookie"]?}" end diff --git a/src/invidious/mixes.cr b/src/invidious/mixes.cr index 04a37b87..6c01d78b 100644 --- a/src/invidious/mixes.cr +++ b/src/invidious/mixes.cr @@ -20,7 +20,6 @@ end def fetch_mix(rdid, video_id, cookies = nil, locale = nil) headers = HTTP::Headers.new - headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" if cookies headers = cookies.add_request_headers(headers) diff --git a/src/invidious/search.cr b/src/invidious/search.cr index 7a88f316..b4bd6226 100644 --- a/src/invidious/search.cr +++ b/src/invidious/search.cr @@ -96,6 +96,10 @@ struct SearchVideo end end + def is_upcoming + premiere_timestamp ? true : false + end + db_mapping({ title: String, id: String, @@ -227,61 +231,35 @@ end alias SearchItem = SearchVideo | SearchChannel | SearchPlaylist def channel_search(query, page, channel) - response = YT_POOL.client &.get("/channel/#{channel}?disable_polymer=1&hl=en&gl=US") - document = XML.parse_html(response.body) - canonical = document.xpath_node(%q(//link[@rel="canonical"])) + response = YT_POOL.client &.get("/channel/#{channel}?hl=en&gl=US") + response = YT_POOL.client &.get("/user/#{channel}?hl=en&gl=US") if response.headers["location"]? + response = YT_POOL.client &.get("/c/#{channel}?hl=en&gl=US") if response.headers["location"]? - if !canonical - response = YT_POOL.client &.get("/c/#{channel}?disable_polymer=1&hl=en&gl=US") - document = XML.parse_html(response.body) - canonical = document.xpath_node(%q(//link[@rel="canonical"])) - end + ucid = response.body.match(/\\"channelId\\":\\"(?[^\\]+)\\"/).try &.["ucid"]? - if !canonical - response = YT_POOL.client &.get("/user/#{channel}?disable_polymer=1&hl=en&gl=US") - document = XML.parse_html(response.body) - canonical = document.xpath_node(%q(//link[@rel="canonical"])) - end - - if !canonical - return 0, [] of SearchItem - end - - ucid = canonical["href"].split("/")[-1] + return 0, [] of SearchItem if !ucid url = produce_channel_search_url(ucid, query, page) response = YT_POOL.client &.get(url) - json = JSON.parse(response.body) + initial_data = JSON.parse(response.body).as_a.find &.["response"]? + return 0, [] of SearchItem if !initial_data + items = extract_items(initial_data.as_h) - if json["content_html"]? && !json["content_html"].as_s.empty? - document = XML.parse_html(json["content_html"].as_s) - nodeset = document.xpath_nodes(%q(//li[contains(@class, "feed-item-container")])) - - count = nodeset.size - items = extract_items(nodeset) - else - count = 0 - items = [] of SearchItem - end - - return count, items + return items.size, items end def search(query, page = 1, search_params = produce_search_params(content_type: "all"), region = nil) - if query.empty? - return {0, [] of SearchItem} - end + return 0, [] of SearchItem if query.empty? - html = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en&disable_polymer=1").body) - if html.empty? - return {0, [] of SearchItem} - end + body = YT_POOL.client(region, &.get("/results?q=#{URI.encode_www_form(query)}&page=#{page}&sp=#{search_params}&hl=en").body) + return 0, [] of SearchItem if body.empty? - html = XML.parse_html(html) - nodeset = html.xpath_nodes(%q(//ol[@class="item-section"]/li)) - items = extract_items(nodeset) + initial_data = extract_initial_data(body) + items = extract_items(initial_data) - return {nodeset.size, items} + # initial_data["estimatedResults"]?.try &.as_s.to_i64 + + return items.size, items end def produce_search_params(sort : String = "relevance", date : String = "", content_type : String = "", @@ -387,12 +365,9 @@ def produce_channel_search_url(ucid, query, page) "2:string" => ucid, "3:base64" => { "2:string" => "search", - "6:varint" => 2_i64, "7:varint" => 1_i64, - "12:varint" => 1_i64, - "13:string" => "", - "23:varint" => 0_i64, "15:string" => "#{page}", + "23:varint" => 0_i64, }, "11:string" => query, }, diff --git a/src/invidious/trending.cr b/src/invidious/trending.cr index 017c42f5..8d078387 100644 --- a/src/invidious/trending.cr +++ b/src/invidious/trending.cr @@ -1,7 +1,4 @@ def fetch_trending(trending_type, region, locale) - headers = HTTP::Headers.new - headers["User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36" - region ||= "US" region = region.upcase @@ -11,7 +8,7 @@ def fetch_trending(trending_type, region, locale) if trending_type && trending_type != "Default" trending_type = trending_type.downcase.capitalize - response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en", headers).body + response = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body initial_data = extract_initial_data(response) @@ -21,31 +18,28 @@ def fetch_trending(trending_type, region, locale) if url url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"] url = url["channelListSubMenuAvatarRenderer"]["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"].as_s - url += "&disable_polymer=1&gl=#{region}&hl=en" + url = "#{url}&gl=#{region}&hl=en" trending = YT_POOL.client &.get(url).body plid = extract_plid(url) else - trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body + trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body end else - trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en&disable_polymer=1").body + trending = YT_POOL.client &.get("/feed/trending?gl=#{region}&hl=en").body end - trending = XML.parse_html(trending) - nodeset = trending.xpath_nodes(%q(//ul/li[@class="expanded-shelf-content-item-wrapper"])) - trending = extract_videos(nodeset) + initial_data = extract_initial_data(trending) + trending = extract_videos(initial_data) return {trending, plid} end def extract_plid(url) - plid = URI.parse(url) - .try { |i| HTTP::Params.parse(i.query.not_nil!)["bp"] } + return url.try { |i| URI.parse(i).query } + .try { |i| HTTP::Params.parse(i)["bp"] } .try { |i| URI.decode_www_form(i) } .try { |i| Base64.decode(i) } .try { |i| IO::Memory.new(i) } .try { |i| Protodec::Any.parse(i) } - .try { |i| i["44:0:embedded"]["2:1:string"].as_s } - - return plid + .try &.["44:0:embedded"]?.try &.["2:1:string"]?.try &.as_s end diff --git a/src/invidious/users.cr b/src/invidious/users.cr index 0aa94d82..ba15692c 100644 --- a/src/invidious/users.cr +++ b/src/invidious/users.cr @@ -267,7 +267,7 @@ def subscribe_ajax(channel_id, action, env_headers) end headers = cookies.add_request_headers(headers) - if match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) + if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/) session_token = match["session_token"] headers["content-type"] = "application/x-www-form-urlencoded" @@ -300,7 +300,7 @@ end # end # headers = cookies.add_request_headers(headers) # -# if match = html.body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) +# if match = html.body.match(/'XSRF_TOKEN': "(?[^"]+)"/) # session_token = match["session_token"] # # headers["content-type"] = "application/x-www-form-urlencoded" diff --git a/src/invidious/videos.cr b/src/invidious/videos.cr index ed5847e4..f2638f14 100644 --- a/src/invidious/videos.cr +++ b/src/invidious/videos.cr @@ -246,12 +246,9 @@ struct VideoPreferences end struct Video - property player_json : JSON::Any? - property recommended_json : JSON::Any? - - module HTTPParamConverter + module JSONConverter def self.from_rs(rs) - HTTP::Params.parse(rs.read(String)) + JSON.parse(rs.read(String)).as_h end end @@ -271,7 +268,7 @@ struct Video generate_storyboards(json, self.id, self.storyboards) end - json.field "description", html_to_content(self.description_html) + json.field "description", self.description json.field "descriptionHtml", self.description_html json.field "published", self.published.to_unix json.field "publishedText", translate(locale, "`x` ago", recode_date(self.published, locale)) @@ -310,13 +307,13 @@ struct Video json.field "lengthSeconds", self.length_seconds json.field "allowRatings", self.allow_ratings - json.field "rating", self.info["avg_rating"].to_f32 + json.field "rating", self.average_rating json.field "isListed", self.is_listed json.field "liveNow", self.live_now json.field "isUpcoming", self.is_upcoming if self.premiere_timestamp - json.field "premiereTimestamp", self.premiere_timestamp.not_nil!.to_unix + json.field "premiereTimestamp", self.premiere_timestamp.try &.to_unix end if hlsvp = self.hls_manifest_url @@ -328,21 +325,21 @@ struct Video json.field "adaptiveFormats" do json.array do - self.adaptive_fmts(decrypt_function).each do |fmt| + self.adaptive_fmts.each do |fmt| json.object do - json.field "index", fmt["index"] - json.field "bitrate", fmt["bitrate"] - json.field "init", fmt["init"] + json.field "index", "#{fmt["indexRange"]["start"]}-#{fmt["indexRange"]["end"]}" + json.field "bitrate", fmt["bitrate"].as_i.to_s + json.field "init", "#{fmt["initRange"]["start"]}-#{fmt["initRange"]["end"]}" json.field "url", fmt["url"] - json.field "itag", fmt["itag"] - json.field "type", fmt["type"] - json.field "clen", fmt["clen"] - json.field "lmt", fmt["lmt"] - json.field "projectionType", fmt["projection_type"] + json.field "itag", fmt["itag"].as_i.to_s + json.field "type", fmt["mimeType"] + json.field "clen", fmt["contentLength"] + json.field "lmt", fmt["lastModified"] + json.field "projectionType", fmt["projectionType"] fmt_info = itag_to_metadata?(fmt["itag"]) if fmt_info - fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.to_i || 30 + fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.as_i || 30 json.field "fps", fps json.field "container", fmt_info["ext"] json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"] @@ -368,16 +365,16 @@ struct Video json.field "formatStreams" do json.array do - self.fmt_stream(decrypt_function).each do |fmt| + self.fmt_stream.each do |fmt| json.object do json.field "url", fmt["url"] - json.field "itag", fmt["itag"] - json.field "type", fmt["type"] + json.field "itag", fmt["itag"].as_i.to_s + json.field "type", fmt["mimeType"] json.field "quality", fmt["quality"] fmt_info = itag_to_metadata?(fmt["itag"]) if fmt_info - fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.to_i || 30 + fps = fmt_info["fps"]?.try &.to_i || fmt["fps"]?.try &.as_i || 30 json.field "fps", fps json.field "container", fmt_info["ext"] json.field "encoding", fmt_info["vcodec"]? || fmt_info["acodec"] @@ -415,9 +412,7 @@ struct Video json.field "recommendedVideos" do json.array do - self.info["rvs"]?.try &.split(",").each do |rv| - rv = HTTP::Params.parse(rv) - + self.related_videos.each do |rv| if rv["id"]? json.object do json.field "videoId", rv["id"] @@ -436,7 +431,7 @@ struct Video qualities.each do |quality| json.object do - json.field "url", rv["author_thumbnail"].gsub(/s\d+-/, "s#{quality}-") + json.field "url", rv["author_thumbnail"]?.try &.gsub(/s\d+-/, "s#{quality}-") json.field "width", quality json.field "height", quality end @@ -445,9 +440,9 @@ struct Video end end - json.field "lengthSeconds", rv["length_seconds"].to_i - json.field "viewCountText", rv["short_view_count_text"] - json.field "viewCount", rv["view_count"]?.try &.to_i64 + json.field "lengthSeconds", rv["length_seconds"]?.try &.to_i + json.field "viewCountText", rv["short_view_count_text"]? + json.field "viewCount", rv["view_count"]?.try &.empty? ? nil : rv["view_count"].to_i64 end end end @@ -466,256 +461,150 @@ struct Video end end - # `description_html` is stored in DB as `description`, which can be - # quite confusing. Since it currently isn't very practical to rename - # it, we instead define a getter and setter here. - def description_html - self.description + def title + info["videoDetails"]["title"]?.try &.as_s || "" end - def description_html=(other : String) - self.description = other + def ucid + info["videoDetails"]["channelId"]?.try &.as_s || "" + end + + def author + info["videoDetails"]["author"]?.try &.as_s || "" + end + + def length_seconds : Int32 + info["microformat"]?.try &.["playerMicroformatRenderer"]?.try &.["lengthSeconds"]?.try &.as_s.to_i || + info["videoDetails"]["lengthSeconds"]?.try &.as_s.to_i || 0 + end + + def views : Int64 + info["videoDetails"]["viewCount"]?.try &.as_s.to_i64 || 0_i64 + end + + def likes : Int64 + info["likes"]?.try &.as_i64 || 0_i64 + end + + def dislikes : Int64 + info["dislikes"]?.try &.as_i64 || 0_i64 + end + + def average_rating : Float64 + # (likes / (likes + dislikes) * 4 + 1) + info["videoDetails"]["averageRating"]?.try { |t| t.as_f? || t.as_i64?.try &.to_f64 }.try &.round(4) || 0.0 + end + + def published : Time + info["microformat"]?.try &.["playerMicroformatRenderer"]?.try &.["publishDate"]?.try { |t| Time.parse(t.as_s, "%Y-%m-%d", Time::Location.local) } || Time.local + end + + def published=(other : Time) + info["microformat"].as_h["playerMicroformatRenderer"].as_h["publishDate"] = JSON::Any.new(other.to_s("%Y-%m-%d")) + end + + def cookie + info["cookie"]?.try &.as_h.map { |k, v| "#{k}=#{v}" }.join("; ") || "" end def allow_ratings - allow_ratings = player_response["videoDetails"]?.try &.["allowRatings"]?.try &.as_bool - - if allow_ratings.nil? - return true - end - - return allow_ratings + r = info["videoDetails"]["allowRatings"]?.try &.as_bool + r.nil? ? false : r end def live_now - live_now = player_response["videoDetails"]?.try &.["isLive"]?.try &.as_bool - - if live_now.nil? - return false - end - - return live_now + info["videoDetails"]["isLiveContent"]?.try &.as_bool || false end def is_listed - is_listed = player_response["videoDetails"]?.try &.["isCrawlable"]?.try &.as_bool - - if is_listed.nil? - return true - end - - return is_listed + info["videoDetails"]["isCrawlable"]?.try &.as_bool || false end def is_upcoming - is_upcoming = player_response["videoDetails"]?.try &.["isUpcoming"]?.try &.as_bool - - if is_upcoming.nil? - return false - end - - return is_upcoming + info["videoDetails"]["isUpcoming"]?.try &.as_bool || false end - def premiere_timestamp - if self.is_upcoming - premiere_timestamp = player_response["playabilityStatus"]? - .try &.["liveStreamability"]? - .try &.["liveStreamabilityRenderer"]? - .try &.["offlineSlate"]? - .try &.["liveStreamOfflineSlateRenderer"]? - .try &.["scheduledStartTime"]?.try &.as_s.to_i64 - end - - if premiere_timestamp - premiere_timestamp = Time.unix(premiere_timestamp) - end - - return premiere_timestamp + def premiere_timestamp : Time? + info["microformat"]?.try &.["playerMicroformatRenderer"]? + .try &.["liveBroadcastDetails"]?.try &.["startTimestamp"]?.try { |t| Time.parse_rfc3339(t.as_s) } end def keywords - keywords = player_response["videoDetails"]?.try &.["keywords"]?.try &.as_a - keywords ||= [] of String - - return keywords + info["videoDetails"]["keywords"]?.try &.as_a.map &.as_s || [] of String end - def fmt_stream(decrypt_function) - streams = [] of HTTP::Params - - if fmt_streams = player_response["streamingData"]?.try &.["formats"]? - fmt_streams.as_a.each do |fmt_stream| - if !fmt_stream.as_h? - next - end - - fmt = {} of String => String - - fmt["lmt"] = fmt_stream["lastModified"]?.try &.as_s || "0" - fmt["projection_type"] = "1" - fmt["type"] = fmt_stream["mimeType"].as_s - fmt["clen"] = fmt_stream["contentLength"]?.try &.as_s || "0" - fmt["bitrate"] = fmt_stream["bitrate"]?.try &.as_i.to_s || "0" - fmt["itag"] = fmt_stream["itag"].as_i.to_s - if fmt_stream["url"]? - fmt["url"] = fmt_stream["url"].as_s - end - if cipher = fmt_stream["cipher"]? || fmt_stream["signatureCipher"]? - HTTP::Params.parse(cipher.as_s).each do |key, value| - fmt[key] = value - end - end - fmt["quality"] = fmt_stream["quality"].as_s - - if fmt_stream["width"]? - fmt["size"] = "#{fmt_stream["width"]}x#{fmt_stream["height"]}" - fmt["height"] = fmt_stream["height"].as_i.to_s - end - - if fmt_stream["fps"]? - fmt["fps"] = fmt_stream["fps"].as_i.to_s - end - - if fmt_stream["qualityLabel"]? - fmt["quality_label"] = fmt_stream["qualityLabel"].as_s - end - - params = HTTP::Params.new - fmt.each do |key, value| - params[key] = value - end - - streams << params - end - - streams.sort_by! { |stream| stream["height"].to_i }.reverse! - elsif fmt_stream = self.info["url_encoded_fmt_stream_map"]? - fmt_stream.split(",").each do |string| - if !string.empty? - streams << HTTP::Params.parse(string) - end - end - end - - streams.each { |s| s.add("label", "#{s["quality"]} - #{s["type"].split(";")[0].split("/")[1]}") } - streams = streams.uniq { |s| s["label"] } - - if self.info["region"]? - streams.each do |fmt| - fmt["url"] += "®ion=" + self.info["region"] - end - end - - streams.each do |fmt| - fmt["url"] += "&host=" + (URI.parse(fmt["url"]).host || "") - fmt["url"] += decrypt_signature(fmt, decrypt_function) - end - - return streams + def related_videos + info["relatedVideos"]?.try &.as_a.map { |h| h.as_h.transform_values &.as_s } || [] of Hash(String, String) end - def adaptive_fmts(decrypt_function) - adaptive_fmts = [] of HTTP::Params - - if fmts = player_response["streamingData"]?.try &.["adaptiveFormats"]? - fmts.as_a.each do |adaptive_fmt| - next if !adaptive_fmt.as_h? - fmt = {} of String => String - - if init = adaptive_fmt["initRange"]? - fmt["init"] = "#{init["start"]}-#{init["end"]}" - end - fmt["init"] ||= "0-0" - - fmt["lmt"] = adaptive_fmt["lastModified"]?.try &.as_s || "0" - fmt["projection_type"] = "1" - fmt["type"] = adaptive_fmt["mimeType"].as_s - fmt["clen"] = adaptive_fmt["contentLength"]?.try &.as_s || "0" - fmt["bitrate"] = adaptive_fmt["bitrate"]?.try &.as_i.to_s || "0" - fmt["itag"] = adaptive_fmt["itag"].as_i.to_s - if adaptive_fmt["url"]? - fmt["url"] = adaptive_fmt["url"].as_s - end - if cipher = adaptive_fmt["cipher"]? || adaptive_fmt["signatureCipher"]? - HTTP::Params.parse(cipher.as_s).each do |key, value| - fmt[key] = value - end - end - if index = adaptive_fmt["indexRange"]? - fmt["index"] = "#{index["start"]}-#{index["end"]}" - end - fmt["index"] ||= "0-0" - - if adaptive_fmt["width"]? - fmt["size"] = "#{adaptive_fmt["width"]}x#{adaptive_fmt["height"]}" - end - - if adaptive_fmt["fps"]? - fmt["fps"] = adaptive_fmt["fps"].as_i.to_s - end - - if adaptive_fmt["qualityLabel"]? - fmt["quality_label"] = adaptive_fmt["qualityLabel"].as_s - end - - params = HTTP::Params.new - fmt.each do |key, value| - params[key] = value - end - - adaptive_fmts << params - end - elsif fmts = self.info["adaptive_fmts"]? - fmts.split(",") do |string| - adaptive_fmts << HTTP::Params.parse(string) - end - end - - if self.info["region"]? - adaptive_fmts.each do |fmt| - fmt["url"] += "®ion=" + self.info["region"] - end - end - - adaptive_fmts.each do |fmt| - fmt["url"] += "&host=" + (URI.parse(fmt["url"]).host || "") - fmt["url"] += decrypt_signature(fmt, decrypt_function) - end - - return adaptive_fmts + def allowed_regions + info["microformat"]?.try &.["playerMicroformatRenderer"]? + .try &.["availableCountries"]?.try &.as_a.map &.as_s || [] of String end - def video_streams(adaptive_fmts) - video_streams = adaptive_fmts.select { |s| s["type"].starts_with? "video" } - - return video_streams + def author_thumbnail : String + info["authorThumbnail"]?.try &.as_s || "" end - def audio_streams(adaptive_fmts) - audio_streams = adaptive_fmts.select { |s| s["type"].starts_with? "audio" } - audio_streams.sort_by! { |s| s["bitrate"].to_i }.reverse! - audio_streams.each do |stream| - stream["bitrate"] = (stream["bitrate"].to_f64/1000).to_i.to_s + def sub_count_text : String + info["subCountText"]?.try &.as_s || "-" + end + + def fmt_stream + return @fmt_stream.as(Array(Hash(String, JSON::Any))) if @fmt_stream + fmt_stream = info["streamingData"]?.try &.["formats"]?.try &.as_a.map &.as_h || [] of Hash(String, JSON::Any) + fmt_stream.each do |fmt| + if s = (fmt["cipher"]? || fmt["signatureCipher"]?).try { |h| HTTP::Params.parse(h.as_s) } + s.each do |k, v| + fmt[k] = JSON::Any.new(v) + end + fmt["url"] = JSON::Any.new("#{fmt["url"]}#{decrypt_signature(fmt)}") + end + + fmt["url"] = JSON::Any.new("#{fmt["url"]}&host=#{URI.parse(fmt["url"].as_s).host}") + fmt["url"] = JSON::Any.new("#{fmt["url"]}®ion=#{self.info["region"]}") if self.info["region"]? end - - return audio_streams + fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 } + @fmt_stream = fmt_stream + return @fmt_stream.as(Array(Hash(String, JSON::Any))) end - def player_response - @player_json = JSON.parse(@info["player_response"]) if !@player_json - @player_json.not_nil! + def adaptive_fmts + return @adaptive_fmts.as(Array(Hash(String, JSON::Any))) if @adaptive_fmts + fmt_stream = info["streamingData"]?.try &.["adaptiveFormats"]?.try &.as_a.map &.as_h || [] of Hash(String, JSON::Any) + fmt_stream.each do |fmt| + if s = (fmt["cipher"]? || fmt["signatureCipher"]?).try { |h| HTTP::Params.parse(h.as_s) } + s.each do |k, v| + fmt[k] = JSON::Any.new(v) + end + fmt["url"] = JSON::Any.new("#{fmt["url"]}#{decrypt_signature(fmt)}") + end + + fmt["url"] = JSON::Any.new("#{fmt["url"]}&host=#{URI.parse(fmt["url"].as_s).host}") + fmt["url"] = JSON::Any.new("#{fmt["url"]}®ion=#{self.info["region"]}") if self.info["region"]? + end + fmt_stream.sort_by! { |f| f["width"]?.try &.as_i || 0 } + @adaptive_fmts = fmt_stream + return @adaptive_fmts.as(Array(Hash(String, JSON::Any))) + end + + def video_streams + adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("video") + end + + def audio_streams + adaptive_fmts.select &.["mimeType"]?.try &.as_s.starts_with?("audio") end def storyboards - storyboards = player_response["storyboards"]? + storyboards = info["storyboards"]? .try &.as_h .try &.["playerStoryboardSpecRenderer"]? .try &.["spec"]? .try &.as_s.split("|") if !storyboards - if storyboard = player_response["storyboards"]? + if storyboard = info["storyboards"]? .try &.as_h .try &.["playerLiveStoryboardSpecRenderer"]? .try &.["spec"]? @@ -743,9 +632,7 @@ struct Video storyboard_height: Int32, storyboard_count: Int32) - if !storyboards - return items - end + return items if !storyboards url = URI.parse(storyboards.shift) params = HTTP::Params.parse(url.query || "") @@ -779,82 +666,98 @@ struct Video end def paid - reason = player_response["playabilityStatus"]?.try &.["reason"]? + reason = info["playabilityStatus"]?.try &.["reason"]? paid = reason == "This video requires payment to watch." ? true : false - - return paid + paid end def premium - if info["premium"]? - self.info["premium"] == "true" - else - false - end + keywords.includes? "YouTube Red" end - def captions - captions = [] of Caption - if player_response["captions"]? - caption_list = player_response["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]?.try &.as_a - caption_list ||= [] of JSON::Any - - caption_list.each do |caption| - caption = Caption.from_json(caption.to_json) - caption.name.simpleText = caption.name.simpleText.split(" - ")[0] - captions << caption - end + def captions : Array(Caption) + return @captions.as(Array(Caption)) if @captions + captions = info["captions"]?.try &.["playerCaptionsTracklistRenderer"]?.try &.["captionTracks"]?.try &.as_a.map do |caption| + caption = Caption.from_json(caption.to_json) + caption.name.simpleText = caption.name.simpleText.split(" - ")[0] + caption end + captions ||= [] of Caption + @captions = captions + return @captions.as(Array(Caption)) + end - return captions + def description + description = info["microformat"]?.try &.["playerMicroformatRenderer"]? + .try &.["description"]?.try &.["simpleText"]?.try &.as_s || "" + end + + # TODO + def description=(value : String) + @description = value + end + + def description_html + info["descriptionHtml"]?.try &.as_s || "

" + end + + def description_html=(value : String) + info["descriptionHtml"] = JSON::Any.new(value) end def short_description - short_description = self.description_html.gsub(/(
)|(|"|\n)/, { - "
": " ", - "
": " ", - "\"": """, - "\n": " ", - }) - short_description = XML.parse_html(short_description).content[0..200].strip(" ") - - if short_description.empty? - short_description = " " - end - - return short_description + info["shortDescription"]?.try &.as_s || "" end - def length_seconds - player_response["videoDetails"]["lengthSeconds"].as_s.to_i + def hls_manifest_url : String? + info["streamingData"]?.try &.["hlsManifestUrl"]?.try &.as_s + end + + def dash_manifest_url + info["streamingData"]?.try &.["dashManifestUrl"]?.try &.as_s + end + + def genre : String + info["genre"]?.try &.as_s || "" + end + + def genre_url : String + info["genreUcid"]? ? "/channel/#{info["genreUcid"]}" : "" + end + + def license : String? + info["license"]?.try &.as_s + end + + def is_family_friendly : Bool + info["microformat"]?.try &.["playerMicroformatRenderer"]["isFamilySafe"]?.try &.as_bool || false + end + + def wilson_score : Float64 + ci_lower_bound(likes, likes + dislikes).round(4) + end + + def engagement : Float64 + ((likes + dislikes) / views).round(4) + end + + def reason : String? + info["reason"]?.try &.as_s + end + + def session_token : String? + info["sessionToken"]?.try &.as_s? end db_mapping({ - id: String, - info: { - type: HTTP::Params, - default: HTTP::Params.parse(""), - converter: Video::HTTPParamConverter, - }, - updated: Time, - title: String, - views: Int64, - likes: Int32, - dislikes: Int32, - wilson_score: Float64, - published: Time, - description: String, - language: String?, - author: String, - ucid: String, - allowed_regions: Array(String), - is_family_friendly: Bool, - genre: String, - genre_url: String, - license: String, - sub_count_text: String, - author_thumbnail: String, + id: String, + info: {type: Hash(String, JSON::Any), converter: Video::JSONConverter}, + updated: Time, }) + + @captions : Array(Caption)? + @adaptive_fmts : Array(Hash(String, JSON::Any))? + @fmt_stream : Array(Hash(String, JSON::Any))? end struct Caption @@ -878,121 +781,64 @@ class VideoRedirect < Exception end end -def get_video(id, db, refresh = true, region = nil, force_refresh = false) - if (video = db.query_one?("SELECT * FROM videos WHERE id = $1", id, as: Video)) && !region - # If record was last updated over 10 minutes ago, or video has since premiered, - # refresh (expire param in response lasts for 6 hours) - if (refresh && - (Time.utc - video.updated > 10.minutes) || - (video.premiere_timestamp && video.premiere_timestamp.as(Time) < Time.utc)) || - force_refresh - begin - video = fetch_video(id, region) - video_array = video.to_a +def parse_related(r : JSON::Any) : JSON::Any? + # TODO: r["endScreenPlaylistRenderer"], etc. + return if !r["endScreenVideoRenderer"]? + r = r["endScreenVideoRenderer"].as_h - args = arg_array(video_array[1..-1], 2) + return if !r["lengthInSeconds"]? - db.exec("UPDATE videos SET (info,updated,title,views,likes,dislikes,wilson_score,\ - published,description,language,author,ucid,allowed_regions,is_family_friendly,\ - genre,genre_url,license,sub_count_text,author_thumbnail)\ - = (#{args}) WHERE id = $1", args: video_array) - rescue ex - db.exec("DELETE FROM videos * WHERE id = $1", id) - raise ex - end - end - else - video = fetch_video(id, region) - video_array = video.to_a - - args = arg_array(video_array) - - if !region - db.exec("INSERT INTO videos VALUES (#{args}) ON CONFLICT (id) DO NOTHING", args: video_array) - end - end - - return video + rv = {} of String => JSON::Any + rv["author"] = r["shortBylineText"]["runs"][0]?.try &.["text"] || JSON::Any.new("") + rv["ucid"] = r["shortBylineText"]["runs"][0]?.try &.["navigationEndpoint"]["browseEndpoint"]["browseId"] || JSON::Any.new("") + rv["author_url"] = JSON::Any.new("/channel/#{rv["ucid"]}") + rv["length_seconds"] = JSON::Any.new(r["lengthInSeconds"].as_i.to_s) + rv["title"] = r["title"]["simpleText"] + rv["short_view_count_text"] = JSON::Any.new(r["shortViewCountText"]?.try &.["simpleText"]?.try &.as_s || "") + rv["view_count"] = JSON::Any.new(r["title"]["accessibility"]?.try &.["accessibilityData"]["label"].as_s.match(/(?[1-9](\d+,?)*) views/).try &.["views"].gsub(/\D/, "") || "") + rv["id"] = r["videoId"] + JSON::Any.new(rv) end -def extract_recommended(recommended_videos) - rvs = [] of HTTP::Params +def extract_polymer_config(body) + params = {} of String => JSON::Any + player_response = body.match(/window\["ytInitialPlayerResponse"\]\s*=\s*(?.*?);\n/) + .try { |r| JSON.parse(r["info"]).as_h } - recommended_videos.try &.each do |compact_renderer| - if compact_renderer["compactRadioRenderer"]? || compact_renderer["compactPlaylistRenderer"]? - # TODO - elsif video_renderer = compact_renderer["compactVideoRenderer"]? - recommended_video = HTTP::Params.new - recommended_video["id"] = video_renderer["videoId"].as_s - recommended_video["title"] = video_renderer["title"]["simpleText"].as_s - - next if !video_renderer["shortBylineText"]? - - recommended_video["author"] = video_renderer["shortBylineText"]["runs"].as_a[0]["text"].as_s - recommended_video["ucid"] = video_renderer["shortBylineText"]["runs"].as_a[0]["navigationEndpoint"]["browseEndpoint"]["browseId"].as_s - recommended_video["author_thumbnail"] = video_renderer["channelThumbnail"]["thumbnails"][0]["url"].as_s - - if view_count = video_renderer["viewCountText"]?.try { |field| field["simpleText"]?.try &.as_s || field["runs"][0]?.try &.["text"].as_s }.try &.delete(", views watching").to_i64?.try &.to_s - recommended_video["view_count"] = view_count - recommended_video["short_view_count_text"] = "#{number_to_short_text(view_count.to_i64)} views" - end - recommended_video["length_seconds"] = decode_length_seconds(video_renderer["lengthText"]?.try &.["simpleText"]?.try &.as_s || "0:00").to_s - - rvs << recommended_video - end + if body.includes?("To continue with your YouTube experience, please fill out the form below.") || + body.includes?("https://www.google.com/sorry/index") + params["reason"] = JSON::Any.new("Could not extract video info. Instance is likely blocked.") + elsif !player_response + params["reason"] = JSON::Any.new("Video unavailable.") + elsif player_response["playabilityStatus"]?.try &.["status"]?.try &.as_s != "OK" + reason = player_response["playabilityStatus"]["errorScreen"]?.try &.["playerErrorMessageRenderer"]?.try &.["subreason"]?.try { |s| s["simpleText"]?.try &.as_s || s["runs"].as_a.map { |r| r["text"] }.join("") } || + player_response["playabilityStatus"]["reason"].as_s + params["reason"] = JSON::Any.new(reason) end - rvs -end + params["sessionToken"] = JSON::Any.new(body.match(/"XSRF_TOKEN":"(?[^"]+)"/).try &.["session_token"]?) + params["shortDescription"] = JSON::Any.new(body.match(/"og:description" content="(?[^"]+)"/).try &.["description"]?) -def extract_polymer_config(body, html) - params = HTTP::Params.new + return params if !player_response - params["session_token"] = body.match(/"XSRF_TOKEN":"(?[A-Za-z0-9\_\-\=]+)"/).try &.["session_token"] || "" - - html_info = JSON.parse(body.match(/ytplayer\.config = (?.*?);ytplayer\.load/).try &.["info"] || "{}").try &.["args"]?.try &.as_h - - if html_info - html_info.each do |key, value| - params[key] = value.to_s - end + {"captions", "microformat", "playabilityStatus", "storyboards", "videoDetails"}.each do |f| + params[f] = player_response[f] if player_response[f]? end - initial_data = extract_initial_data(body) + yt_initial_data = body.match(/window\["ytInitialData"\]\s*=\s*(?.*?);\n/) + .try { |r| JSON.parse(r["info"]).as_h } - primary_results = initial_data["contents"]? - .try &.["twoColumnWatchNextResults"]? - .try &.["results"]? - .try &.["results"]? - .try &.["contents"]? - - comment_continuation = primary_results.try &.as_a.select { |object| object["itemSectionRenderer"]? }[0]? - .try &.["itemSectionRenderer"]? - .try &.["continuations"]? - .try &.[0]? - .try &.["nextContinuationData"]? - - params["ctoken"] = comment_continuation.try &.["continuation"]?.try &.as_s || "" - params["itct"] = comment_continuation.try &.["clickTrackingParams"]?.try &.as_s || "" - - rvs = initial_data["contents"]? - .try &.["twoColumnWatchNextResults"]? - .try &.["secondaryResults"]? - .try &.["secondaryResults"]? - .try &.["results"]? - .try &.as_a - - params["rvs"] = extract_recommended(rvs).join(",") - - # TODO: Watching now - params["views"] = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]? - .try &.["videoPrimaryInfoRenderer"]? - .try &.["viewCount"]? - .try &.["videoViewCountRenderer"]? - .try &.["viewCount"]? - .try &.["simpleText"]? - .try &.as_s.gsub(/\D/, "").to_i64.to_s || "0" + params["relatedVideos"] = yt_initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]? + .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r| + parse_related r + }.try { |a| JSON::Any.new(a) } || yt_initial_data.try &.["webWatchNextResponseExtensionData"]?.try &.["relatedVideoArgs"]? + .try &.as_s.split(",").map { |r| + r = HTTP::Params.parse(r).to_h + JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) })) + }.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any) + primary_results = yt_initial_data.try &.["contents"]?.try &.["twoColumnWatchNextResults"]?.try &.["results"]? + .try &.["results"]?.try &.["contents"]? sentiment_bar = primary_results.try &.as_a.select { |object| object["videoPrimaryInfoRenderer"]? }[0]? .try &.["videoPrimaryInfoRenderer"]? .try &.["sentimentBar"]? @@ -1000,34 +846,13 @@ def extract_polymer_config(body, html) .try &.["tooltip"]? .try &.as_s - likes, dislikes = sentiment_bar.try &.split(" / ").map { |a| a.delete(", ").to_i32 }[0, 2] || {0, 0} + likes, dislikes = sentiment_bar.try &.split(" / ", 2).map &.gsub(/\D/, "").to_i64 || {0_i64, 0_i64} + params["likes"] = JSON::Any.new(likes) + params["dislikes"] = JSON::Any.new(dislikes) - params["likes"] = "#{likes}" - params["dislikes"] = "#{dislikes}" - - published = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]? - .try &.["videoSecondaryInfoRenderer"]? - .try &.["dateText"]? - .try &.["simpleText"]? - .try &.as_s.split(" ")[-3..-1].join(" ") - - if published - params["published"] = Time.parse(published, "%b %-d, %Y", Time::Location.local).to_unix.to_s - else - params["published"] = Time.utc(1990, 1, 1).to_unix.to_s - end - - params["description_html"] = "

" - - description_html = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]? - .try &.["videoSecondaryInfoRenderer"]? - .try &.["description"]? - .try &.["runs"]? - .try &.as_a - - if description_html - params["description_html"] = content_to_comment_html(description_html) - end + params["descriptionHtml"] = JSON::Any.new(primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]? + .try &.["videoSecondaryInfoRenderer"]?.try &.["description"]?.try &.["runs"]? + .try &.as_a.try { |t| content_to_comment_html(t).gsub("\n", "
") } || "

") metadata = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]? .try &.["videoSecondaryInfoRenderer"]? @@ -1036,10 +861,6 @@ def extract_polymer_config(body, html) .try &.["rows"]? .try &.as_a - params["genre"] = "" - params["genre_ucid"] = "" - params["license"] = "" - metadata.try &.each do |row| title = row["metadataRowRenderer"]?.try &.["title"]?.try &.["simpleText"]?.try &.as_s contents = row["metadataRowRenderer"]? @@ -1050,220 +871,125 @@ def extract_polymer_config(body, html) contents = contents.try &.["runs"]? .try &.as_a[0]? - params["genre"] = contents.try &.["text"]? - .try &.as_s || "" - params["genre_ucid"] = contents.try &.["navigationEndpoint"]? - .try &.["browseEndpoint"]? - .try &.["browseId"]?.try &.as_s || "" + params["genre"] = JSON::Any.new(contents.try &.["text"]?.try &.as_s || "") + params["genreUcid"] = JSON::Any.new(contents.try &.["navigationEndpoint"]?.try &.["browseEndpoint"]? + .try &.["browseId"]?.try &.as_s || "") elsif title.try &.== "License" contents = contents.try &.["runs"]? .try &.as_a[0]? - params["license"] = contents.try &.["text"]? - .try &.as_s || "" + params["license"] = JSON::Any.new(contents.try &.["text"]?.try &.as_s || "") elsif title.try &.== "Licensed to YouTube by" - params["license"] = contents.try &.["simpleText"]? - .try &.as_s || "" + params["license"] = JSON::Any.new(contents.try &.["simpleText"]?.try &.as_s || "") end end author_info = primary_results.try &.as_a.select { |object| object["videoSecondaryInfoRenderer"]? }[0]? - .try &.["videoSecondaryInfoRenderer"]? - .try &.["owner"]? - .try &.["videoOwnerRenderer"]? + .try &.["videoSecondaryInfoRenderer"]?.try &.["owner"]?.try &.["videoOwnerRenderer"]? - params["author_thumbnail"] = author_info.try &.["thumbnail"]? - .try &.["thumbnails"]? - .try &.as_a[0]? - .try &.["url"]? - .try &.as_s || "" + params["authorThumbnail"] = JSON::Any.new(author_info.try &.["thumbnail"]? + .try &.["thumbnails"]?.try &.as_a[0]?.try &.["url"]? + .try &.as_s || "") - params["sub_count_text"] = author_info.try &.["subscriberCountText"]? - .try &.["simpleText"]? - .try &.as_s.gsub(/\D/, "") || "0" + params["subCountText"] = JSON::Any.new(author_info.try &.["subscriberCountText"]? + .try { |t| t["simpleText"]? || t["runs"]?.try &.[0]?.try &.["text"]? }.try &.as_s.split(" ", 2)[0] || "-") - return params + initial_data = body.match(/ytplayer\.config\s*=\s*(?.*?);ytplayer\.web_player_context_config/) + .try { |r| JSON.parse(r["info"]) }.try &.["args"]["player_response"]? + .try &.as_s?.try &.try { |r| JSON.parse(r).as_h } + + return params if !initial_data + + {"playabilityStatus", "streamingData"}.each do |f| + params[f] = initial_data[f] if initial_data[f]? + end + + params end -def extract_player_config(body, html) - params = HTTP::Params.new - - if md = body.match(/'XSRF_TOKEN': "(?[A-Za-z0-9\_\-\=]+)"/) - params["session_token"] = md["session_token"] - end - - if md = body.match(/'RELATED_PLAYER_ARGS': (?.*?),\n/) - recommended_json = JSON.parse(md["json"]) - rvs_params = recommended_json["rvs"].as_s.split(",").map { |params| HTTP::Params.parse(params) } - - if watch_next_response = recommended_json["watch_next_response"]? - watch_next_json = JSON.parse(watch_next_response.as_s) - rvs = watch_next_json["contents"]? - .try &.["twoColumnWatchNextResults"]? - .try &.["secondaryResults"]? - .try &.["secondaryResults"]? - .try &.["results"]? - .try &.as_a - - rvs = extract_recommended(rvs).compact_map do |rv| - if !rv["short_view_count_text"]? - rv_params = rvs_params.select { |rv_params| rv_params["id"]? == (rv["id"]? || "") }[0]? - - if rv_params.try &.["short_view_count_text"]? - rv["short_view_count_text"] = rv_params.not_nil!["short_view_count_text"] - rv - else - nil - end - else - rv - end +def get_video(id, db, refresh = true, region = nil, force_refresh = false) + if (video = db.query_one?("SELECT * FROM videos WHERE id = $1", id, as: Video)) && !region + # If record was last updated over 10 minutes ago, or video has since premiered, + # refresh (expire param in response lasts for 6 hours) + if (refresh && + (Time.utc - video.updated > 10.minutes) || + (video.premiere_timestamp.try &.< Time.utc)) || + force_refresh + begin + video = fetch_video(id, region) + db.exec("UPDATE videos SET (id, info, updated) = ($1, $2, $3) WHERE id = $1", video.id, video.info.to_json, video.updated) + rescue ex + db.exec("DELETE FROM videos * WHERE id = $1", id) + raise ex end - params["rvs"] = (rvs.map &.to_s).join(",") - end - end - - html_info = body.match(/ytplayer\.config = (?.*?);ytplayer\.load/).try &.["info"] - - if html_info - JSON.parse(html_info)["args"].as_h.each do |key, value| - params[key] = value.to_s end else - error_message = html.xpath_node(%q(//h1[@id="unavailable-message"])) - if error_message - params["reason"] = error_message.content.strip - elsif body.includes?("To continue with your YouTube experience, please fill out the form below.") || - body.includes?("https://www.google.com/sorry/index") - params["reason"] = "Could not extract video info. Instance is likely blocked." - else - params["reason"] = "Video unavailable." + video = fetch_video(id, region) + if !region + db.exec("INSERT INTO videos VALUES ($1, $2, $3) ON CONFLICT (id) DO NOTHING", video.id, video.info.to_json, video.updated) end end - return params + return video end def fetch_video(id, region) - response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")) + response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) if md = response.headers["location"]?.try &.match(/v=(?[a-zA-Z0-9_-]{11})/) raise VideoRedirect.new(video_id: md["id"]) end - html = XML.parse_html(response.body) - info = extract_player_config(response.body, html) - info["cookie"] = response.cookies.to_h.map { |name, cookie| "#{name}=#{cookie.value}" }.join("; ") - - allowed_regions = html.xpath_node(%q(//meta[@itemprop="regionsAllowed"])).try &.["content"].split(",") - if !allowed_regions || allowed_regions == [""] - allowed_regions = [] of String - end + info = extract_polymer_config(response.body) + info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) }) + allowed_regions = info["microformat"]?.try &.["playerMicroformatRenderer"]["availableCountries"]?.try &.as_a.map &.as_s || [] of String # Check for region-blocks - if info["reason"]? && info["reason"].includes?("your country") + if info["reason"]?.try &.as_s.includes?("your country") bypass_regions = PROXY_LIST.keys & allowed_regions if !bypass_regions.empty? region = bypass_regions[rand(bypass_regions.size)] - response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&disable_polymer=1&has_verified=1&bpctr=9999999999")) + response = YT_POOL.client(region, &.get("/watch?v=#{id}&gl=US&hl=en&has_verified=1&bpctr=9999999999")) - html = XML.parse_html(response.body) - info = extract_player_config(response.body, html) - - info["region"] = region if region - info["cookie"] = response.cookies.to_h.map { |name, cookie| "#{name}=#{cookie.value}" }.join("; ") + region_info = extract_polymer_config(response.body) + region_info["region"] = JSON::Any.new(region) if region + region_info["cookie"] = JSON::Any.new(response.cookies.to_h.transform_values { |v| JSON::Any.new(v.value) }) + info = region_info if !region_info["reason"]? end end # Try to pull streams from embed URL if info["reason"]? embed_page = YT_POOL.client &.get("/embed/#{id}").body - sts = embed_page.match(/"sts"\s*:\s*(?\d+)/).try &.["sts"]? - sts ||= "" - embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&disable_polymer=1&sts=#{sts}").body) + sts = embed_page.match(/"sts"\s*:\s*(?\d+)/).try &.["sts"]? || "" + embed_info = HTTP::Params.parse(YT_POOL.client &.get("/get_video_info?html5=1&video_id=#{id}&eurl=https://youtube.googleapis.com/v/#{id}&gl=US&hl=en&sts=#{sts}").body) - if !embed_info["reason"]? - embed_info.each do |key, value| - info[key] = value.to_s + if embed_info["player_response"]? + player_response = JSON.parse(embed_info["player_response"]) + {"captions", "microformat", "playabilityStatus", "streamingData", "videoDetails", "storyboards"}.each do |f| + info[f] = player_response[f] if player_response[f]? end - else - raise info["reason"] end + + initial_data = JSON.parse(embed_info["watch_next_response"]) if embed_info["watch_next_response"]? + + info["relatedVideos"] = initial_data.try &.["playerOverlays"]?.try &.["playerOverlayRenderer"]? + .try &.["endScreen"]?.try &.["watchNextEndScreenRenderer"]?.try &.["results"]?.try &.as_a.compact_map { |r| + parse_related r + }.try { |a| JSON::Any.new(a) } || embed_info["rvs"]?.try &.split(",").map { |r| + r = HTTP::Params.parse(r).to_h + JSON::Any.new(Hash.zip(r.keys, r.values.map { |v| JSON::Any.new(v) })) + }.try { |a| JSON::Any.new(a) } || JSON::Any.new([] of JSON::Any) end - if info["reason"]? && !info["player_response"]? - raise info["reason"] - end - - player_json = JSON.parse(info["player_response"]) - if reason = player_json["playabilityStatus"]?.try &.["reason"]?.try &.as_s - raise reason - end - - title = player_json["videoDetails"]["title"].as_s - author = player_json["videoDetails"]["author"]?.try &.as_s || "" - ucid = player_json["videoDetails"]["channelId"]?.try &.as_s || "" - - info["premium"] = html.xpath_node(%q(.//span[text()="Premium"])) ? "true" : "false" - - views = html.xpath_node(%q(//meta[@itemprop="interactionCount"])) - .try &.["content"].to_i64? || 0_i64 - - likes = html.xpath_node(%q(//button[@title="I like this"]/span)) - .try &.content.delete(",").try &.to_i? || 0 - - dislikes = html.xpath_node(%q(//button[@title="I dislike this"]/span)) - .try &.content.delete(",").try &.to_i? || 0 - - avg_rating = (likes.to_f/(likes.to_f + dislikes.to_f) * 4 + 1) - avg_rating = avg_rating.nan? ? 0.0 : avg_rating - info["avg_rating"] = "#{avg_rating}" - - description_html = html.xpath_node(%q(//p[@id="eow-description"])).try &.to_xml(options: XML::SaveOptions::NO_DECL) || "

" - wilson_score = ci_lower_bound(likes, likes + dislikes) - - published = html.xpath_node(%q(//meta[@itemprop="datePublished"])).try &.["content"] - published ||= Time.utc.to_s("%Y-%m-%d") - published = Time.parse(published, "%Y-%m-%d", Time::Location.local) - - is_family_friendly = html.xpath_node(%q(//meta[@itemprop="isFamilyFriendly"])).try &.["content"] == "True" - is_family_friendly ||= true - - genre = html.xpath_node(%q(//meta[@itemprop="genre"])).try &.["content"] - genre ||= "" - - genre_url = html.xpath_node(%(//ul[contains(@class, "watch-info-tag-list")]/li/a[text()="#{genre}"])).try &.["href"]? - genre_url ||= "" - - # YouTube provides invalid URLs for some genres, so we fix that here - case genre - when "Comedy" - genre_url = "/channel/UCQZ43c4dAA9eXCQuXWu9aTw" - when "Education" - genre_url = "/channel/UCdxpofrI-dO6oYfsqHDHphw" - when "Gaming" - genre_url = "/channel/UCOpNcN46UbXVtpKMrmU4Abg" - when "Movies" - genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g" - when "Nonprofits & Activism" - genre_url = "/channel/UCfFyYRYslvuhwMDnx6KjUvw" - when "Trailers" - genre_url = "/channel/UClgRkhTL3_hImCAmdLfDE4g" - else nil # Ignore - end - - license = html.xpath_node(%q(//h4[contains(text(),"License")]/parent::*/ul/li)).try &.content || "" - sub_count_text = html.xpath_node(%q(//span[contains(@class, "subscriber-count")])).try &.["title"]? || "0" - author_thumbnail = html.xpath_node(%(//span[@class="yt-thumb-clip"]/img)).try &.["data-thumb"]?.try &.gsub(/^\/\//, "https://") || "" - - video = Video.new(id, info, Time.utc, title, views, likes, dislikes, wilson_score, published, description_html, - nil, author, ucid, allowed_regions, is_family_friendly, genre, genre_url, license, sub_count_text, author_thumbnail) + raise info["reason"]?.try &.as_s || "" if !info["videoDetails"]? + video = Video.new(id, info, Time.utc) return video end -def itag_to_metadata?(itag : String) - return VIDEO_FORMATS[itag]? +def itag_to_metadata?(itag : JSON::Any) + return VIDEO_FORMATS[itag.to_s]? end def process_continuation(db, query, plid, id) diff --git a/src/invidious/views/components/item.ecr b/src/invidious/views/components/item.ecr index e9baba2c..0c19fc1b 100644 --- a/src/invidious/views/components/item.ecr +++ b/src/invidious/views/components/item.ecr @@ -85,7 +85,7 @@

- <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp && item.premiere_timestamp.not_nil! > Time.utc %> + <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp.try &.> Time.utc %>
<%= translate(locale, "Premieres in `x`", recode_date((item.premiere_timestamp.as(Time) - Time.utc).ago, locale)) %>
<% elsif Time.utc - item.published > 1.minute %>
<%= translate(locale, "Shared `x` ago", recode_date(item.published, locale)) %>
@@ -144,7 +144,7 @@

- <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp && item.premiere_timestamp.not_nil! > Time.utc %> + <% if item.responds_to?(:premiere_timestamp) && item.premiere_timestamp.try &.> Time.utc %>
<%= translate(locale, "Premieres in `x`", recode_date((item.premiere_timestamp.as(Time) - Time.utc).ago, locale)) %>
<% elsif Time.utc - item.published > 1.minute %>
<%= translate(locale, "Shared `x` ago", recode_date(item.published, locale)) %>
diff --git a/src/invidious/views/components/player.ecr b/src/invidious/views/components/player.ecr index 3c30f69e..6b01d25f 100644 --- a/src/invidious/views/components/player.ecr +++ b/src/invidious/views/components/player.ecr @@ -3,23 +3,23 @@ <% if params.autoplay %>autoplay<% end %> <% if params.video_loop %>loop<% end %> <% if params.controls %>controls<% end %>> - <% if hlsvp && !CONFIG.disabled?("livestreams") %> - + <% if (hlsvp = video.hls_manifest_url) && !CONFIG.disabled?("livestreams") %> + <% else %> <% if params.listen %> <% audio_streams.each_with_index do |fmt, i| %> - <% if params.local %>&local=true<% end %>" type='<%= fmt["type"] %>' label="<%= fmt["bitrate"] %>k" selected="<%= i == 0 ? true : false %>"> + <% if params.local %>&local=true<% end %>" type='<%= fmt["mimeType"] %>' label="<%= fmt["bitrate"] %>k" selected="<%= i == 0 ? true : false %>"> <% end %> <% else %> <% if params.quality == "dash" %> - + <% end %> <% fmt_stream.each_with_index do |fmt, i| %> <% if params.quality %> - <% if params.local %>&local=true<% end %>" type='<%= fmt["type"] %>' label="<%= fmt["label"] %>" selected="<%= params.quality == fmt["label"].split(" - ")[0] %>"> + <% if params.local %>&local=true<% end %>" type='<%= fmt["mimeType"] %>' label="<%= fmt["quality"] %>" selected="<%= params.quality == fmt["quality"] %>"> <% else %> - <% if params.local %>&local=true<% end %>" type='<%= fmt["type"] %>' label="<%= fmt["label"] %>" selected="<%= i == 0 ? true : false %>"> + <% if params.local %>&local=true<% end %>" type='<%= fmt["mimeType"] %>' label="<%= fmt["quality"] %>" selected="<%= i == 0 ? true : false %>"> <% end %> <% end %> <% end %> diff --git a/src/invidious/views/watch.ecr b/src/invidious/views/watch.ecr index ae6341e0..9a1e6c32 100644 --- a/src/invidious/views/watch.ecr +++ b/src/invidious/views/watch.ecr @@ -33,8 +33,8 @@ "index" => continuation, "plid" => plid, "length_seconds" => video.length_seconds.to_f, - "play_next" => !rvs.empty? && !plid && params.continue, - "next_video" => rvs.select { |rv| rv["id"]? }[0]?.try &.["id"], + "play_next" => !video.related_videos.empty? && !plid && params.continue, + "next_video" => video.related_videos.select { |rv| rv["id"]? }[0]?.try &.["id"], "youtube_comments_text" => HTML.escape(translate(locale, "View YouTube comments")), "reddit_comments_text" => HTML.escape(translate(locale, "View Reddit comments")), "reddit_permalink_text" => HTML.escape(translate(locale, "View more comments on Reddit")), @@ -72,13 +72,13 @@
<% end %> - <% if !reason.empty? %> + <% if video.reason %>

- <%= reason %> + <%= video.reason %>

- <% elsif video.premiere_timestamp %> + <% elsif video.premiere_timestamp.try &.> Time.utc %>

- <%= translate(locale, "Premieres in `x`", recode_date((video.premiere_timestamp.as(Time) - Time.utc).ago, locale)) %> + <%= video.premiere_timestamp.try { |t| translate(locale, "Premieres in `x`", recode_date((t - Time.utc).ago, locale)) } %>

<% end %> @@ -137,18 +137,18 @@