From 239a6c892cf58a0d85184f5eda05437743b8c8b7 Mon Sep 17 00:00:00 2001 From: Omar Roth Date: Wed, 28 Mar 2018 22:29:54 -0500 Subject: [PATCH] Use seperate table for videos pulled from RSS --- config/sql/channel_videos.sql | 20 ++++++++++++ config/sql/channels.sql | 14 ++------ setup.sh | 1 + src/helpers.cr | 61 ++++++++++++++++++++++++++--------- src/invidious.cr | 29 +++++------------ src/views/subscriptions.ecr | 10 +++--- 6 files changed, 81 insertions(+), 54 deletions(-) create mode 100644 config/sql/channel_videos.sql diff --git a/config/sql/channel_videos.sql b/config/sql/channel_videos.sql new file mode 100644 index 00000000..e6cb3c8f --- /dev/null +++ b/config/sql/channel_videos.sql @@ -0,0 +1,20 @@ +-- Table: public.channel_videos + +-- DROP TABLE public.channel_videos; + +CREATE TABLE public.channel_videos +( + id text COLLATE pg_catalog."default" NOT NULL, + title text COLLATE pg_catalog."default", + published timestamp with time zone, + updated timestamp with time zone, + ucid text COLLATE pg_catalog."default", + author text COLLATE pg_catalog."default", + CONSTRAINT channel_videos_id_key UNIQUE (id) +) +WITH ( + OIDS = FALSE +) +TABLESPACE pg_default; + +GRANT ALL ON TABLE public.channel_videos TO kemal; diff --git a/config/sql/channels.sql b/config/sql/channels.sql index a908c785..d24329f3 100644 --- a/config/sql/channels.sql +++ b/config/sql/channels.sql @@ -5,9 +5,8 @@ CREATE TABLE public.channels ( id text COLLATE pg_catalog."default" NOT NULL, - rss text COLLATE pg_catalog."default", - updated timestamp with time zone, - author text COLLATE pg_catalog."default" + author text COLLATE pg_catalog."default", + updated timestamp with time zone ) WITH ( OIDS = FALSE @@ -15,12 +14,3 @@ WITH ( TABLESPACE pg_default; GRANT ALL ON TABLE public.channels TO kemal; - --- Index: channel_id_idx - --- DROP INDEX public.channel_id_idx; - -CREATE UNIQUE INDEX channel_id_idx - ON public.channels USING btree - (id COLLATE pg_catalog."default") - TABLESPACE pg_default; \ No newline at end of file diff --git a/setup.sh b/setup.sh index 333312d0..664073e4 100755 --- a/setup.sh +++ b/setup.sh @@ -4,3 +4,4 @@ createdb invidious createuser kemal psql invidious < config/sql/channels.sql psql invidious < config/sql/videos.sql +psql invidious < config/sql/channel_videos.sql diff --git a/src/helpers.cr b/src/helpers.cr index c44405b1..08473de9 100644 --- a/src/helpers.cr +++ b/src/helpers.cr @@ -62,15 +62,20 @@ class InvidiousChannel end add_mapping({ - id: String, - rss: { - type: XML::Node, - default: XML.parse_html(""), - converter: InvidiousChannel::XMLConverter, - - }, - updated: Time, + id: String, author: String, + updated: Time, + }) +end + +class ChannelVideo + add_mapping({ + id: String, + title: String, + published: Time, + updated: Time, + ucid: String, + author: String, }) end @@ -203,10 +208,16 @@ def get_video(id, client, db, refresh = true) # If record was last updated over an hour ago, refresh (expire param in response lasts for 6 hours) if refresh && Time.now - video.updated > 1.hours - db.exec("DELETE FROM videos * WHERE id = $1", id) + begin video = fetch_video(id, client) - args = arg_array(video.to_a) - db.exec("INSERT INTO videos VALUES (#{args})", video.to_a) + video_array = video.to_a[1..-1] + args = arg_array(video_array) + + db.exec("UPDATE videos SET (id,info,updated,title,views,likes,dislikes,wilson_score,published,description,language)\ + = (#{args}) WHERE id = '#{video.id}'", video_array) + rescue ex + db.exec("DELETE FROM videos * WHERE id = $1", id) + end end else video = fetch_video(id, client) @@ -490,14 +501,14 @@ def get_channel(id, client, db) channel = db.query_one("SELECT * FROM channels WHERE id = $1", id, as: InvidiousChannel) if Time.now - channel.updated > 1.minutes - channel = fetch_channel(id, client) + channel = fetch_channel(id, client, db) channel_array = channel.to_a[1..-1] args = arg_array(channel_array) - db.exec("UPDATE channels SET (rss,updated,author) = (#{args}) WHERE id = '#{channel.id}'", channel_array) + db.exec("UPDATE channels SET (author,updated) = (#{args}) WHERE id = '#{channel.id}'", channel_array) end else - channel = fetch_channel(id, client) + channel = fetch_channel(id, client, db) args = arg_array(channel.to_a) db.exec("INSERT INTO channels VALUES (#{args})", channel.to_a) end @@ -505,13 +516,31 @@ def get_channel(id, client, db) return channel end -def fetch_channel(id, client) +def fetch_channel(id, client, db) rss = client.get("/feeds/videos.xml?channel_id=#{id}").body rss = XML.parse_html(rss) + rss.xpath_nodes("//feed/entry").each do |entry| + video_id = entry.xpath_node("videoid").not_nil!.content + title = entry.xpath_node("title").not_nil!.content + published = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z") + updated = Time.parse(entry.xpath_node("updated").not_nil!.content, "%FT%X%z") + author = entry.xpath_node("author/name").not_nil!.content + ucid = entry.xpath_node("channelid").not_nil!.content + + video = ChannelVideo.new(video_id, title, published, updated, ucid, author) + + video_array = video.to_a[1..-1] + args = arg_array(video_array) + + # TODO: Update record on conflict + db.exec("INSERT INTO channel_videos VALUES (#{arg_array(video.to_a)})\ + ON CONFLICT (id) DO NOTHING", video.to_a) + end + author = rss.xpath_node("//feed/author/name").not_nil!.content - channel = InvidiousChannel.new(id, rss, Time.now, author) + channel = InvidiousChannel.new(id, author, Time.now) return channel end diff --git a/src/invidious.cr b/src/invidious.cr index 67bbc875..82a40ed1 100644 --- a/src/invidious.cr +++ b/src/invidious.cr @@ -574,35 +574,22 @@ get "/feed/subscriptions" do |env| feed = client.get("/subscription_manager?action_takeout=1", headers).body - videos = Array(Hash(String, String | Time)).new + channels = [] of String feed = XML.parse_html(feed) feed.xpath_nodes("//opml/outline/outline").each do |channel| id = channel["xmlurl"][-24..-1] - rss = get_channel(id, client, PG_DB).rss + get_channel(id, client, PG_DB) - rss.xpath_nodes("//feed/entry").each do |entry| - video = {} of String => String | Time - - video["id"] = entry.xpath_node("videoid").not_nil!.content - video["title"] = entry.xpath_node("title").not_nil!.content - video["published"] = Time.parse(entry.xpath_node("published").not_nil!.content, "%FT%X%z") - video["author"] = entry.xpath_node("author/name").not_nil!.content - video["ucid"] = entry.xpath_node("channelid").not_nil!.content - video["thumbnail"] = entry.xpath_node("group/thumbnail").not_nil!["url"].gsub(/hqdefault\.jpg$/, "mqdefault.jpg") - - videos << video - end + channels << id end - youtube_pool << client - videos.sort_by! { |video| video["published"].as(Time).epoch } - videos.reverse! - - start = (page - 1)*max_results - stop = start + max_results - 1 - videos = videos[start..stop] + time = Time.now + args = arg_array(channels) + offset = (page - 1) * max_results + videos = PG_DB.query_all("SELECT * FROM channel_videos WHERE ucid IN (#{args})\ + ORDER BY published DESC LIMIT #{max_results} OFFSET #{offset}", channels, as: ChannelVideo) templated "subscriptions" else diff --git a/src/views/subscriptions.ecr b/src/views/subscriptions.ecr index 26bd3fc4..0b94e6a3 100644 --- a/src/views/subscriptions.ecr +++ b/src/views/subscriptions.ecr @@ -7,15 +7,15 @@ <% slice.each do |video| %>
- "> - "/> -

<%= video["title"] %>

+
+ +

<%= video.title %>

- "><%= video["author"] %> + <%= video.author %>

-

Shared <%= video["published"].as(Time).to_s("%B %-d, %Y at %r") %>
+
Shared <%= video.published.to_s("%B %-d, %Y at %r") %>