From 208f32661b8f698b06dac141f4bcf98e04daf0a0 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Wed, 6 Jun 2018 13:21:53 -0500 Subject: [PATCH] Fix notifications --- src/helpers.cr | 180 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 145 insertions(+), 35 deletions(-) diff --git a/src/helpers.cr b/src/helpers.cr index 75aa5c78..c195cc28 100644 --- a/src/helpers.cr +++ b/src/helpers.cr @@ -542,12 +542,12 @@ def login_req(login_form, f_req) return HTTP::Params.encode(data) end -def get_channel(id, client, db) +def get_channel(id, client, db, refresh = true, pull_videos = true) if db.query_one?("SELECT EXISTS (SELECT true FROM channels WHERE id = $1)", id, as: Bool) channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel) - if Time.now - channel.updated > 1.minute - channel = fetch_channel(id, client, db) + if refresh && Time.now - channel.updated > 10.minutes + channel = fetch_channel(id, client, db, pull_videos) channel_array = channel.to_a args = arg_array(channel_array) @@ -555,7 +555,7 @@ def get_channel(id, client, db) ON CONFLICT (id) DO UPDATE SET updated = $3", channel_array) end else - channel = fetch_channel(id, client, db) + channel = fetch_channel(id, client, db, pull_videos) args = arg_array(channel.to_a) db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a) end @@ -563,35 +563,112 @@ def get_channel(id, client, db) return channel end -def fetch_channel(ucid, client, db) +def fetch_channel(ucid, client, db, pull_videos = true) rss = client.get("/feeds/videos.xml?channel_id=#{ucid}").body rss = XML.parse_html(rss) - db.exec("DELETE FROM channel_videos * WHERE ucid = $1", ucid) - - rss.xpath_nodes("//feed/entry").each do |entry| - video_id = entry.xpath_node("videoid").not_nil!.content - title = entry.xpath_node("title").not_nil!.content - published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z") - updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z") - author = entry.xpath_node("author/name").not_nil!.content - ucid = entry.xpath_node("channelid").not_nil!.content - - video = ChannelVideo.new(video_id, title, published, updated, ucid, author) - - video_array = video.to_a - args = arg_array(video_array) - - db.exec("UPDATE users SET notifications = notifications || $1 \ - WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video_id, published, ucid) - - # TODO: Update record on conflict - db.exec("INSERT INTO channel_videos VALUES (#{args})\ - ON CONFLICT (id) DO NOTHING", video_array) + author = rss.xpath_node(%q(//feed/title)) + if !author + raise "Deleted or invalid channel" end + author = author.content - author = rss.xpath_node("//feed/author/name").try &.content - author ||= "" + if !pull_videos + rss.xpath_nodes("//feed/entry").each do |entry| + video_id = entry.xpath_node("videoid").not_nil!.content + title = entry.xpath_node("title").not_nil!.content + published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z") + updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z") + author = entry.xpath_node("author/name").not_nil!.content + ucid = entry.xpath_node("channelid").not_nil!.content + + video = ChannelVideo.new(video_id, title, published, Time.now, ucid, author) + + db.exec("UPDATE users SET notifications = notifications || $1 \ + WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, Time.now, ucid) + + video_array = video.to_a + args = arg_array(video_array) + db.exec("INSERT INTO channel_videos VALUES (#{args})\ + ON CONFLICT (id) DO NOTHING", video_array) + end + else + videos = [] of ChannelVideo + page = 1 + + loop do + url = produce_videos_url(ucid, page) + response = client.get(url) + + json = JSON.parse(response.body) + content_html = json["content_html"].as_s + if content_html.empty? + # If we don't get anything, move on + break + end + document = XML.parse_html(content_html) + + document.xpath_nodes(%q(//li[contains(@class, "channels-content-item")])).each do |item| + root = item.xpath_node(%q(div/div/div[@class="yt-lockup-content"])) + if !root + raise "could not find root" + end + + anchor = root.xpath_node(%q(h3[contains(@class,"yt-lockup-title")]/a)) + if !anchor + raise "could not find anchor" + end + title = anchor.content.strip + video_id = anchor["href"].lchop("/watch?v=") + + published = root.xpath_node(%q(div[@class="yt-lockup-meta"]/ul/li[2])) + if !published + # This happens on Youtube red videos, here we just skip them + next + end + published = published.content.split(" ") + span = published[0].to_i + case published[1] + when .includes? "second" + span = span.seconds + when .includes? "minute" + span = span.minutes + when .includes? "hour" + span = span.hours + when .includes? "day" + span = span.days + when .includes? "week" + span = span.weeks + when .includes? "month" + span = span.months + when .includes? "year" + span = span.years + else + raise "Unrecognized time: #{published[1]}" + end + + published = Time.now - span + + videos << ChannelVideo.new(video_id, title, published, Time.now, ucid, author) + end + + if document.xpath_nodes(%q(//li[contains(@class, "channels-content-item")])).size < 30 + break + end + + page += 1 + end + + db.exec("DELETE FROM channel_videos * WHERE ucid = $1", ucid) + videos.each do |video| + db.exec("UPDATE users SET notifications = notifications || $1 \ + WHERE updated < $2 AND $3 = ANY(subscriptions) AND $1 <> ALL(notifications)", video.id, video.published, ucid) + + video_array = video.to_a + args = arg_array(video_array) + db.exec("INSERT INTO channel_videos VALUES (#{args})", video_array) + end + end channel = InvidiousChannel.new(ucid, author, Time.now) @@ -602,8 +679,8 @@ def get_user(sid, client, headers, db) if db.query_one?("SELECT EXISTS (SELECT true FROM users WHERE id = $1)", sid, as: Bool) user = db.query_one("SELECT * FROM users WHERE id = $1", sid, as: User) - if Time.now - user.updated > 1.minutes - user = fetch_user(sid, client, headers) + if Time.now - user.updated > 1.minute + user = fetch_user(sid, client, headers, db) user_array = user.to_a args = arg_array(user_array) @@ -611,7 +688,7 @@ def get_user(sid, client, headers, db) ON CONFLICT (email) DO UPDATE SET id = $1, updated = $2, subscriptions = $4", user_array) end else - user = fetch_user(sid, client, headers) + user = fetch_user(sid, client, headers, db) user_array = user.to_a args = arg_array(user.to_a) @@ -622,7 +699,7 @@ def get_user(sid, client, headers, db) return user end -def fetch_user(sid, client, headers) +def fetch_user(sid, client, headers, db) feed = client.get("/subscription_manager?disable_polymer=1", headers) feed = XML.parse_html(feed.body) @@ -630,9 +707,13 @@ def fetch_user(sid, client, headers) feed.xpath_nodes(%q(//ul[@id="guide-channels"]/li/a)).each do |channel| if !["Popular on YouTube", "Music", "Sports", "Gaming"].includes? channel["title"] channel_id = channel["href"].lstrip("/channel/") - get_channel(channel_id, client, PG_DB) - channels << channel_id + begin + channel = get_channel(channel_id, client, db, false, false) + channels << channel.id + rescue ex + next + end end end @@ -670,7 +751,7 @@ def decode_time(string) end def produce_playlist_url(ucid, index) - ucid = ucid.lstrip("UC") + ucid = ucid.lchop("UC") ucid = "VLUU" + ucid continuation = write_var_int(index) @@ -702,6 +783,35 @@ def produce_playlist_url(ucid, index) return url end +def produce_videos_url(ucid, page) + page = "#{page}" + + meta = "\x12\x06videos \x00\x30\x01\x38\x01\x60\x01\x6a\x00\x7a" + meta += page.size.to_u8.unsafe_chr + meta += page + meta += "\xb8\x01\x00" + + meta = Base64.urlsafe_encode(meta) + meta = URI.escape(meta) + + continuation = "\x12" + continuation += ucid.size.to_u8.unsafe_chr + continuation += ucid + continuation += "\x1a" + continuation += meta.size.to_u8.unsafe_chr + continuation += meta + + continuation = continuation.size.to_u8.unsafe_chr + continuation + continuation = "\xe2\xa9\x85\xb2\x02" + continuation + + continuation = Base64.urlsafe_encode(continuation) + continuation = URI.escape(continuation) + + url = "/browse_ajax?continuation=#{continuation}" + + return url +end + def read_var_int(bytes) numRead = 0 result = 0