diff --git a/README.md b/README.md index d0ddd1a..2edf53b 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,41 @@ It also uses [sb-mirror](https://github.com/mchangrh/sb-mirror) for mirroring th Feel free to add your instance to this list by making a pull request. You can also configure Piped-Backend to use your mirror by changing the `SPONSORBLOCK_SERVERS` configuration value. + +## Compatibility + +This implementation does not implement the full SponsorBlock server API. It supports hash-based queries to `/api/skipSegments/`, with optional `categories` parameter, and queries to `/api/skipSegments` with required `videoID` and optional `categories` parameters. + +The browser extension works with only the hash-based query endpoint, but other clients, such as the one in ReVanced, require the video ID endpoint, and additionally query `/api/userInfo` and `/api/isUserVip`. Right now there are stub implementations for these. ReVanced had not yet been verified as compatible. + +## Using with Docker Compose + +To run the server under Docker Compose, run: + +``` +docker compose up +``` + +This starts the API server, a database, and a mirroring service to download the SponsorBlock data from the `sponsorblock.kavin.rocks` mirror and keep it up to date. + +The API will be available on `http://localhost:8000`. For example, you can try `http://localhost:8000/api/skipSegments/aabf` or `http://localhost:8000/api/skipSegments?videoID=eQ_8F4nzyiw`. **It will take a few minutes at least for the database to download and import,** so these will not return data on the first run. + +## Building + +To make a local release build, use `cargo build --release`. This will produce a binary in `target/release/sponsorblock-mirror`. + +To make a Docker container, you need to do a BuildKit Docker build, not a normal Docker build. Make sure you have `buildx` available in your Docker, and run: +```bash +docker buildx build --load -t 1337kavin/sponsorblock-mirror . +``` + +## Troubleshooting + +* If the linker complains about a missing `-lpq`, make sure you have the PostgreSQL development libraries, which may be in a `libpq-dev` package or your distribution's equivalent. + +* If Docker complains that `the --mount option requires BuildKit`, make sure you are building with `docker buildx build` and not `docker build`. + +* To access the PostgreSQL database directly, you can `docker exec -ti postgres-sb-mirror bash -c 'psql $POSTGRES_DB $POSTGRES_USER'`. + +* Requests for videos not in the database are forwarded to `https://sponsor.ajay.app/`, which may be down or malfunctioning. A response of the string `Internal Server Error` is likely to be from there, rather than from this application. + diff --git a/docker-compose.yml b/docker-compose.yml index 5eb0d76..d5d7f56 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,3 +1,5 @@ +version: "3" + services: sb-mirror: image: mchangrh/sb-mirror:latest diff --git a/migrations/2022-10-30-175428_create_video_id_index/down.sql b/migrations/2022-10-30-175428_create_video_id_index/down.sql new file mode 100644 index 0000000..62c20f9 --- /dev/null +++ b/migrations/2022-10-30-175428_create_video_id_index/down.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS "sponsor_video_id_idx"; diff --git a/migrations/2022-10-30-175428_create_video_id_index/up.sql b/migrations/2022-10-30-175428_create_video_id_index/up.sql new file mode 100644 index 0000000..1cc7870 --- /dev/null +++ b/migrations/2022-10-30-175428_create_video_id_index/up.sql @@ -0,0 +1 @@ +CREATE INDEX sponsor_video_id_idx ON "sponsorTimes"("videoID"); diff --git a/src/main.rs b/src/main.rs index 4ff7def..527783a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,7 +14,7 @@ use tokio::time::interval; use structs::{Segment, Sponsor}; -use crate::routes::skip_segments; +use crate::routes::{skip_segments, skip_segments_by_id, fake_is_user_vip, fake_user_info}; mod models; mod routes; @@ -125,5 +125,5 @@ fn rocket() -> Rocket { }) }) ).attach(CORS) - .mount("/", routes![skip_segments]) + .mount("/", routes![skip_segments, skip_segments_by_id, fake_is_user_vip, fake_user_info]) } diff --git a/src/routes.rs b/src/routes.rs index 52442dc..f00abe5 100644 --- a/src/routes.rs +++ b/src/routes.rs @@ -6,34 +6,115 @@ use rocket::response::content; use crate::{Db, Segment, Sponsor}; use crate::models::SponsorTime; -use crate::schema::sponsorTimes::dsl::*; +// We *must* use "videoID" as an argument name to get Rocket to let us access +// the query parameter by that name, but if videoID is already used we +// can't do that. +use crate::schema::sponsorTimes::dsl::{ + sponsorTimes, + shadowHidden, + hidden, + votes, + category, + hashedVideoID, + videoID as column_videoID +}; -// init regex to match hash/hex +// init regexes to match hash/hex or video ID lazy_static! { - static ref RE: regex::Regex = regex::Regex::new(r"^[0-9a-f]{4}$").unwrap(); + static ref HASH_RE: regex::Regex = regex::Regex::new(r"^[0-9a-f]{4}$").unwrap(); + static ref ID_RE: regex::Regex = regex::Regex::new(r"^[a-zA-Z0-9_-]{6,11}$").unwrap(); } +// Segments can be fetched either by full video ID, or by prefix of hashed +// video ID. Different clients make different queries. This represents either +// kind of constraint. +enum VideoName { + ByHashPrefix(String), + ByID(String), +} + + #[get("/api/skipSegments/?")] pub async fn skip_segments( hash: String, categories: Option<&str>, db: Db, ) -> content::RawJson { + let hash = hash.to_lowercase(); // Check if hash matches hex regex - if !RE.is_match(&hash) { + if !HASH_RE.is_match(&hash) { return content::RawJson("Hash prefix does not match format requirements.".to_string()); } - let hc = hash.clone(); + let sponsors = find_skip_segments(VideoName::ByHashPrefix(hash.clone()), categories, db).await; + + if sponsors.is_empty() { + // Fall back to central Sponsorblock server + let resp = reqwest::get(format!( + "https://sponsor.ajay.app/api/skipSegments/{}?categories={}", + hash, + categories.unwrap_or("[\"sponsor\"]"), + )) + .await + .unwrap() + .text() + .await + .unwrap(); + + return content::RawJson(resp); + } + + return content::RawJson(serde_json::to_string(&sponsors).unwrap()); +} + +#[get("/api/skipSegments?&")] +pub async fn skip_segments_by_id( + #[allow(non_snake_case)] + videoID: String, + categories: Option<&str>, + db: Db, +) -> content::RawJson { + + // Check if ID matches ID regex + if !ID_RE.is_match(&videoID) { + return content::RawJson("videoID does not match format requirements".to_string()); + } + + let sponsors = find_skip_segments(VideoName::ByID(videoID.clone()), categories, db).await; + + if sponsors.is_empty() { + // Fall back to central Sponsorblock server + let resp = reqwest::get(format!( + "https://sponsor.ajay.app/api/skipSegments?videoID={}&categories={}", + videoID, + categories.unwrap_or("[\"sponsor\"]"), + )) + .await + .unwrap() + .text() + .await + .unwrap(); + + return content::RawJson(resp); + } + + // Doing a lookup by video ID should return only one Sponsor object with + // one list of segments. We need to return just the list of segments. + return content::RawJson(serde_json::to_string(&sponsors[0].segments).unwrap()); +} + +async fn find_skip_segments( + name: VideoName, + categories: Option<&str>, + db: Db, +) -> Vec { let cat: Vec = serde_json::from_str(categories.unwrap_or("[\"sponsor\"]")).unwrap(); if cat.is_empty() { - return content::RawJson( - "[]".to_string(), - ); + return Vec::new(); } let results: Vec = db.run(move |conn| { @@ -41,16 +122,18 @@ pub async fn skip_segments( .filter(shadowHidden.eq(0)) .filter(hidden.eq(0)) .filter(votes.ge(0)) - .filter(hashedVideoID.like(format!("{}%", hc))); + .filter(category.eq_any(cat)); // We know cat isn't empty at this point - let queried = { - if cat.is_empty() { + let queried = match name { + VideoName::ByHashPrefix(hash_prefix) => { base_filter + .filter(hashedVideoID.like(format!("{}%", hash_prefix))) .get_results::(conn) .expect("Failed to query sponsor times") - } else { + } + VideoName::ByID(video_id) => { base_filter - .filter(category.eq_any(cat)) + .filter(column_videoID.eq(video_id)) .get_results::(conn) .expect("Failed to query sponsor times") } @@ -71,17 +154,7 @@ pub async fn skip_segments( }) }; - let segment = Segment { - uuid: result.uuid.clone(), - action_type: result.action_type.clone(), - category: result.category.clone(), - description: result.description.clone(), - locked: result.locked, - segment: vec![result.start_time, result.end_time], - user_id: result.user_id.clone(), - video_duration: result.video_duration, - votes: result.votes, - }; + let segment = build_segment(result); let hash = result.hashed_video_id.clone(); @@ -113,23 +186,7 @@ pub async fn skip_segments( sponsor.segments.sort_by(|a, b| a.partial_cmp(b).unwrap()); } - if !sponsors.is_empty() { - let sponsors: Vec<&Sponsor> = sponsors.values().collect(); - return content::RawJson(serde_json::to_string(&sponsors).unwrap()); - } - - let resp = reqwest::get(format!( - "https://sponsor.ajay.app/api/skipSegments/{}?categories={}", - hash, - categories.unwrap_or("[]"), - )) - .await - .unwrap() - .text() - .await - .unwrap(); - - return content::RawJson(resp); + return sponsors.into_values().collect(); } fn similar_segments(segment: &Segment, hash: &str, segments: &Vec) -> Vec { @@ -147,17 +204,7 @@ fn similar_segments(segment: &Segment, hash: &str, segments: &Vec) let is_similar = is_overlap(segment, &seg.category, &seg.action_type, seg.start_time, seg.end_time); if is_similar { - similar_segments.push(Segment { - uuid: seg.uuid.clone(), - action_type: seg.action_type.clone(), - category: seg.category.clone(), - description: seg.description.clone(), - locked: seg.locked, - segment: vec![seg.start_time, seg.end_time], - user_id: seg.user_id.clone(), - video_duration: seg.video_duration, - votes: seg.votes, - }); + similar_segments.push(build_segment(seg)); } } @@ -197,4 +244,34 @@ fn best_segment(segments: &Vec) -> Segment { } best_segment -} \ No newline at end of file +} + +fn build_segment (sponsor_time: &SponsorTime) -> Segment { + Segment { + uuid: sponsor_time.uuid.clone(), + action_type: sponsor_time.action_type.clone(), + category: sponsor_time.category.clone(), + description: sponsor_time.description.clone(), + locked: sponsor_time.locked, + segment: vec![sponsor_time.start_time, sponsor_time.end_time], + user_id: sponsor_time.user_id.clone(), + video_duration: sponsor_time.video_duration, + votes: sponsor_time.votes, + } +} + +// These additional routes are faked to protect ReVanced from seeing errors. We +// don't *need* to do this to support ReVanced, but it gets rid of the +// perpetual "Loading..." in the settings. + +// This would take a userID +#[get("/api/isUserVIP")] +pub async fn fake_is_user_vip() -> content::RawJson { + content::RawJson("{\"hashedUserID\": \"\", \"vip\": false}".to_string()) +} + +// This would take a userID and an optional list values +#[get("/api/userInfo")] +pub async fn fake_user_info() -> content::RawJson { + content::RawJson("{\"userID\": \"\", \"userName\": \"\", \"minutesSaved\": 0, \"segmentCount\": 0, \"viewCount\": 0}".to_string()) +}