Initial commit.

This commit is contained in:
Kavin 2022-10-22 13:28:40 +01:00
commit e1a1db2160
No known key found for this signature in database
GPG key ID: 49451E4482CC5BCD
19 changed files with 2571 additions and 0 deletions
.env.gitignoreCargo.lockCargo.tomlRocket.tomldiesel.tomldocker-compose.yml
migrations
.keep
00000000000000_diesel_initial_setup
2022-10-21-181413_create_sponsor_times
2022-10-21-184904_create_sponsor_times_indexes
src

1
.env Normal file
View file

@ -0,0 +1 @@
DATABASE_URL=postgres://sponsorblock:password123@localhost/sponsorblock

6
.gitignore vendored Normal file
View file

@ -0,0 +1,6 @@
/target
/export
/mirror
# Idea IDEs
.idea/

2090
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

21
Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
edition = "2021"
name = "sponsorblock-mirror"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
csv = "1.1.6"
diesel = {version = "2.0.2", features = ["postgres"]}
diesel_logger = "0.2.0"
diesel_migrations = "2.0.0"
lazy_static = "1.4.0"
num_cpus = "1.13.1"
regex = "1.6.0"
reqwest = {version = "0.11.12", features = ["json", "rustls-tls", "gzip", "brotli"]}
rocket = {git = "https://github.com/SergioBenitez/Rocket"}
rocket_sync_db_pools = {git = "https://github.com/SergioBenitez/Rocket", features = ["diesel_postgres_pool"]}
serde = {version = "1.0.145", features = ["derive"]}
serde_json = "1.0.86"
tokio = {version = "1", features = ["full"]}

8
Rocket.toml Normal file
View file

@ -0,0 +1,8 @@
[release]
address = "0.0.0.0"
[debug.databases]
sponsorblock = { url = "postgresql://sponsorblock:password123@localhost" }
[release.databases]
sponsorblock = { url = "postgresql://sponsorblock:password123@sponsorblock" }

8
diesel.toml Normal file
View file

@ -0,0 +1,8 @@
# For documentation on how to configure this file,
# see https://diesel.rs/guides/configuring-diesel-cli
[print_schema]
file = "src/schema.rs"
[migrations_directory]
dir = "migrations"

26
docker-compose.yml Normal file
View file

@ -0,0 +1,26 @@
services:
sb-mirror:
image: mchangrh/sb-mirror:latest
container_name: sb-mirror
volumes:
- ./mirror:/mirror
- ./export:/export
ports:
- 873:873
environment:
- MIRROR=TRUE
- MIRROR_URL=mirror.sb.mchang.xyz
postgres:
image: postgres:15-alpine
container_name: postgres-sb-mirror
ports:
- 5432:5432
volumes:
- postgres_data:/var/lib/postgresql/data
- ./mirror:/mirror
environment:
- POSTGRES_DB=sponsorblock
- POSTGRES_PASSWORD=password123
- POSTGRES_USER=sponsorblock
volumes:
postgres_data: null

0
migrations/.keep Normal file
View file

View file

@ -0,0 +1,6 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
DROP FUNCTION IF EXISTS diesel_set_updated_at();

View file

@ -0,0 +1,36 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
-- Sets up a trigger for the given table to automatically set a column called
-- `updated_at` whenever the row is modified (unless `updated_at` was included
-- in the modified columns)
--
-- # Example
--
-- ```sql
-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
--
-- SELECT diesel_manage_updated_at('users');
-- ```
CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
BEGIN
EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
BEGIN
IF (
NEW IS DISTINCT FROM OLD AND
NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
) THEN
NEW.updated_at := current_timestamp;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;

View file

@ -0,0 +1 @@
DROP TABLE IF EXISTS "sponsorTimes";

View file

@ -0,0 +1,23 @@
CREATE UNLOGGED TABLE "sponsorTimes"
(
"videoID" TEXT NOT NULL,
"startTime" REAL NOT NULL,
"endTime" REAL NOT NULL,
"votes" INTEGER NOT NULL,
"locked" INTEGER NOT NULL default '0',
"incorrectVotes" INTEGER NOT NULL default '1',
"UUID" TEXT NOT NULL UNIQUE PRIMARY KEY,
"userID" TEXT NOT NULL,
"timeSubmitted" BIGINT NOT NULL,
"views" INTEGER NOT NULL,
"category" TEXT NOT NULL DEFAULT 'sponsor',
"actionType" TEXT NOT NULL DEFAULT 'skip',
"service" TEXT NOT NULL DEFAULT 'YouTube',
"videoDuration" REAL NOT NULL DEFAULT '0',
"hidden" INTEGER NOT NULL DEFAULT '0',
"reputation" REAL NOT NULL DEFAULT 0,
"shadowHidden" INTEGER NOT NULL,
"hashedVideoID" TEXT NOT NULL default '',
"userAgent" TEXT NOT NULL default '',
"description" TEXT NOT NULL default ''
);

View file

@ -0,0 +1,5 @@
DROP INDEX IF EXISTS "sponsor_category_idx";
DROP INDEX IF EXISTS "sponsor_hash_idx";
DROP EXTENSION IF EXISTS btree_gin;
DROP INDEX IF EXISTS "sponsor_hidden_idx";
DROP INDEX IF EXISTS "sponsor_votes_idx";

View file

@ -0,0 +1,5 @@
CREATE INDEX sponsor_category_idx ON "sponsorTimes"(category);
CREATE EXTENSION btree_gin;
CREATE INDEX sponsor_hash_idx ON "sponsorTimes"("hashedVideoID" COLLATE "C");
CREATE INDEX sponsor_hidden_idx ON "sponsorTimes"("hidden", "shadowHidden");
CREATE INDEX sponsor_votes_idx ON "sponsorTimes"("votes");

126
src/main.rs Normal file
View file

@ -0,0 +1,126 @@
#[macro_use]
extern crate rocket;
use std::path::Path;
use std::sync::{Arc, Mutex};
use std::thread::sleep;
use std::time::{Duration, Instant, SystemTime};
use diesel::connection::SimpleConnection;
use rocket::{Build, Rocket};
use rocket::fairing::AdHoc;
use rocket_sync_db_pools::database;
use tokio::time::interval;
use structs::{Segment, Sponsor};
use crate::routes::skip_segments;
mod models;
mod routes;
mod schema;
mod structs;
#[database("sponsorblock")]
pub struct Db(diesel::PgConnection);
async fn run_migrations(rocket: Rocket<Build>) -> Rocket<Build> {
use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness};
const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations/");
Db::get_one(&rocket)
.await
.expect("Failed to get a database connection")
.run(|c| {
MigrationHarness::run_pending_migrations(c, MIGRATIONS)
.expect("Failed to run migrations");
}).await;
rocket
}
static mut LAST_UPDATE: Option<SystemTime> = None;
// async fn import_db(conn: Rocket<Orbit>) -> _ {
// let path = Path::new("mirror/sponsorTimes.csv");
//
// loop {
// let last_update = unsafe { LAST_UPDATE };
//
// // see if file exists
// if path.exists() && (last_update.is_none() || last_update.unwrap().elapsed().unwrap_or_default().as_secs() > 60) {
//
// // Check last modified time
// let last_modified = path.metadata().unwrap().modified().unwrap();
//
// // Check if file was modified since last update
// if last_update.is_none() || last_modified > last_update.unwrap() {
//
// // Use COPY FROM to import the CSV file
// let start = Instant::now();
// println!("Importing database...");
// conn.batch_execute("COPY sponsorblock FROM '/mirror/sponsorTimes.csv' DELIMITER ',' CSV HEADER;")
// .expect("Failed to import database");
// println!("Imported database in {}ms", start.elapsed().as_millis());
//
// unsafe {
// LAST_UPDATE = Some(last_modified);
// }
// }
//
// sleep(Duration::from_secs(60));
// }
// sleep(Duration::from_secs(30));
// }
// }
#[launch]
fn rocket() -> Rocket<Build> {
rocket::build()
.attach(Db::fairing())
.attach(AdHoc::on_ignite("Diesel Migrations", run_migrations))
.attach(AdHoc::on_liftoff("background database", |rocket| {
Box::pin(async move {
let mut interval = interval(Duration::from_secs(30));
let path = Path::new("mirror/sponsorTimes.csv");
// Get an actual DB connection
let db = Db::get_one(rocket).await.unwrap();
tokio::spawn(async move {
loop {
interval.tick().await;
let last_update = unsafe { LAST_UPDATE };
// see if file exists
if path.exists() && (last_update.is_none() || last_update.unwrap().elapsed().unwrap_or_default().as_secs() > 60) {
// Check last modified time
let last_modified = path.metadata().unwrap().modified().unwrap();
// Check if file was modified since last update
if last_update.is_none() || last_modified > last_update.unwrap() {
// Use COPY FROM to import the CSV file
let start = Instant::now();
println!("Importing database...");
// Execute a query of some kind
db.run(move |c| {
c.batch_execute("BEGIN TRANSACTION; TRUNCATE \"sponsorTimes\"; COPY \"sponsorTimes\" FROM '/mirror/sponsorTimes.csv' DELIMITER ',' CSV HEADER; COMMIT;")
.expect("Failed to import database");
}).await;
println!("Imported database in {}ms", start.elapsed().as_millis());
unsafe {
LAST_UPDATE = Some(last_modified);
}
}
sleep(Duration::from_secs(60));
}
}
});
})
})
).mount("/", routes![skip_segments])
}

52
src/models.rs Normal file
View file

@ -0,0 +1,52 @@
use diesel::prelude::*;
use serde::Serialize;
use crate::schema::*;
#[derive(Debug, Serialize, Queryable, Insertable)]
#[diesel(table_name = sponsorTimes)]
pub struct SponsorTime {
#[serde(rename = "videoID")]
#[diesel(column_name = videoID)]
pub video_id: String,
#[serde(rename = "startTime")]
#[diesel(column_name = startTime)]
pub start_time: f32,
#[serde(rename = "endTime")]
#[diesel(column_name = endTime)]
pub end_time: f32,
pub votes: i32,
pub locked: i32,
#[serde(rename = "incorrectVotes")]
#[diesel(column_name = incorrectVotes)]
pub incorrect_votes: i32,
#[serde(rename = "UUID")]
#[diesel(column_name = UUID)]
pub uuid: String,
#[serde(rename = "userID")]
#[diesel(column_name = userID)]
pub user_id: String,
#[serde(rename = "timeSubmitted")]
#[diesel(column_name = timeSubmitted)]
pub time_submitted: i64,
pub views: i32,
pub category: String,
#[serde(rename = "actionType")]
#[diesel(column_name = actionType)]
pub action_type: String,
pub service: String,
#[serde(rename = "videoDuration")]
#[diesel(column_name = videoDuration)]
pub video_duration: f32,
pub hidden: i32,
pub reputation: f32,
#[serde(rename = "shadowHidden")]
#[diesel(column_name = shadowHidden)]
pub shadow_hidden: i32,
#[serde(rename = "hashedVideoID")]
#[diesel(column_name = hashedVideoID)]
pub hashed_video_id: String,
#[serde(rename = "userAgent")]
#[diesel(column_name = userAgent)]
pub user_agent: String,
pub description: String,
}

104
src/routes.rs Normal file
View file

@ -0,0 +1,104 @@
use std::collections::HashMap;
use diesel::prelude::*;
use lazy_static::lazy_static;
use rocket::response::content;
use crate::{Db, Segment, Sponsor};
use crate::models::SponsorTime;
use crate::schema::sponsorTimes::dsl::*;
// init regex to match hash/hex
lazy_static! {
static ref RE: regex::Regex = regex::Regex::new(r"^[0-9a-f]{4}$").unwrap();
}
#[get("/api/skipSegments/<hash>?<categories>")]
pub async fn skip_segments(
hash: String,
categories: Option<&str>,
db: Db,
) -> content::RawJson<String> {
let hash = hash.to_lowercase();
// Check if hash matches hex regex
if !RE.is_match(&hash) {
return content::RawJson("Hash prefix does not match format requirements.".to_string());
}
let hc = hash.clone();
let cat: Vec<String> = serde_json::from_str(categories.unwrap_or("[]")).unwrap();
if cat.is_empty() && categories.is_some() {
return content::RawJson(
"[]".to_string(),
);
}
let results: Vec<SponsorTime> = db.run(move |conn| {
let base_filter = sponsorTimes
.filter(shadowHidden.eq(0))
.filter(hidden.eq(0))
.filter(votes.ge(0))
.filter(hashedVideoID.like(format!("{}%", hc)));
let queried = {
if cat.is_empty() {
base_filter
.get_results::<SponsorTime>(conn)
.expect("Failed to query sponsor times")
} else {
base_filter
.filter(category.eq_any(cat))
.get_results::<SponsorTime>(conn)
.expect("Failed to query sponsor times")
}
};
queried
}).await;
// Create map of Sponsors - Hash, Sponsor
let mut sponsors: HashMap<String, Sponsor> = HashMap::new();
for result in results {
let sponsor = {
sponsors.entry(result.hashed_video_id.clone()).or_insert(Sponsor {
hash: result.hashed_video_id,
video_id: result.video_id,
segments: Vec::new(),
})
};
sponsor.segments.push(Segment {
uuid: result.uuid,
action_type: result.action_type,
category: result.category,
description: result.description,
locked: result.locked,
segment: vec![result.start_time, result.end_time],
user_id: result.user_id,
video_duration: result.video_duration,
votes: result.votes,
});
}
if !sponsors.is_empty() {
let sponsors: Vec<&Sponsor> = sponsors.values().collect();
return content::RawJson(serde_json::to_string(&sponsors).unwrap());
}
let resp = reqwest::get(format!(
"https://sponsor.ajay.app/api/skipSegments/{}?categories={}",
hash,
categories.unwrap_or("[]"),
))
.await
.unwrap()
.text()
.await
.unwrap();
return content::RawJson(resp);
}

26
src/schema.rs Normal file
View file

@ -0,0 +1,26 @@
// @generated automatically by Diesel CLI.
diesel::table! {
sponsorTimes (UUID) {
videoID -> Text,
startTime -> Float4,
endTime -> Float4,
votes -> Int4,
locked -> Int4,
incorrectVotes -> Int4,
UUID -> Text,
userID -> Text,
timeSubmitted -> Int8,
views -> Int4,
category -> Text,
actionType -> Text,
service -> Text,
videoDuration -> Float4,
hidden -> Int4,
reputation -> Float4,
shadowHidden -> Int4,
hashedVideoID -> Text,
userAgent -> Text,
description -> Text,
}
}

27
src/structs.rs Normal file
View file

@ -0,0 +1,27 @@
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Sponsor {
pub hash: String,
#[serde(rename = "video_id")]
pub video_id: String,
pub segments: Vec<Segment>,
}
#[derive(Serialize, Deserialize)]
pub struct Segment {
#[serde(rename = "uuid")]
pub uuid: String,
#[serde(rename = "action_type")]
pub action_type: String,
pub category: String,
pub description: String,
pub locked: i32,
pub segment: Vec<f32>,
#[serde(rename = "user_id")]
pub user_id: String,
#[serde(rename = "video_duration")]
pub video_duration: f32,
pub votes: i32,
}