base-data-manager/test/scrub.ts

135 lines
7.8 KiB
TypeScript

import path from "node:path";
import { test } from "node:test";
import { strict as assert } from "node:assert";
import { scrubPrimitive } from "../util/scrub_primitive.ts";
import { $ } from "zx";
const scriptDir = path.dirname(new URL(import.meta.url).pathname);
const defsDir = path.join(scriptDir, "..", "util");
async function jqScrubPrimitive(value: unknown): Promise<unknown> {
const input = JSON.stringify(value);
const result = await $`echo ${input} | jq -L ${defsDir} 'include "scrub"; scrub_primitive'`;
return JSON.parse(result.stdout.trim());
// const result = await $`jq -f ${scrubJq} --argjson input ${input} -n '$input | scrub_primitive'`;
// return JSON.parse(result.stdout.trim());
}
interface TestCase {
name: string;
input: unknown;
expected: unknown;
}
const cases: TestCase[] = [
// === Strings ===
{ name: "IPv4 address", input: "192.168.1.1", expected: "1.1.1.1" },
{ name: "IPv4 address 2", input: "10.0.0.255", expected: "1.1.1.1" },
{ name: "IPv6 full", input: "2001:0db8:85a3:0000:0000:8a2e:0370:7334", expected: "2000:0000:0000:0000:0000:0000:0000:0000" },
{ name: "IPv6 shortened", input: "fe80::1", expected: "2000:0000:0000:0000:0000:0000:0000:0000" },
{ name: "IPv6 collapsed", input: "::1", expected: "2000:0000:0000:0000:0000:0000:0000:0000" },
{ name: "email simple", input: "user@example.com", expected: "not_a_real_email@example.com" },
{ name: "email complex", input: "john.doe+tag@sub.domain.org", expected: "not_a_real_email@example.com" },
{ name: "http URL", input: "https://www.example.com/path?q=1", expected: "url://somewhere" },
{ name: "ftp URL", input: "ftp://files.example.com/doc.pdf", expected: "url://somewhere" },
{ name: "custom scheme URL", input: "myapp://deep/link", expected: "url://somewhere" },
{ name: "unix path", input: "/home/user/documents", expected: "some/path" },
{ name: "relative path", input: "some/relative/path", expected: "some/path" },
{ name: "ISO datetime with tz no millis", input: "2023-11-15T14:30:00+05:00", expected: "2020-04-13T10:09:08+00:00" },
{ name: "ISO datetime with tz no tz colon no T", input: "2023-11-15 14:30:00+0500", expected: "2020-04-13 10:09:08+0000" },
{ name: "ISO datetime with negative tz no millis", input: "2023-11-15T14:30:00-08:00", expected: "2020-04-13T10:09:08+00:00" },
{ name: "ISO datetime with millis and tz", input: "2023-11-15T14:30:00.123+05:00", expected: "2020-04-13T10:09:08.000000+00:00" },
{ name: "ISO datetime with 6 digit millis and tz", input: "2023-11-15T14:30:00.123456+05:00", expected: "2020-04-13T10:09:08.000000+00:00" },
{ name: "ISO datetime with millis no tz", input: "2023-11-15T14:30:00.123", expected: "2020-04-13T10:09:08.000" },
{ name: "ISO datetime no millis no tz (falls through to millis branch)", input: "2023-11-15T14:30:00", expected: "2020-04-13T10:09:08.000" },
{ name: "ISO datetime with Z", input: "2023-11-15T14:30:00Z", expected: "2020-04-13T10:09:08Z" },
{ name: "ISO datetime with Z with millis", input: "2023-11-15T14:30:00.000Z", expected: "2020-04-13T10:09:08.000000Z" },
{ name: "ISO datetime with no seconds", input: "2023-11-15T14:30", expected: "2020-04-13T10:09" },
{ name: "ISO datetime range no T", input: "2023-11-15 14:30:08 - 2022-11-11 11:11:11", expected: "2020-04-13 10:09:08 - 2020-04-13 10:09:08" },
{ name: "date only", input: "2023-11-15", expected: "2020-04-13" },
{ name: "date only 2", input: "1999-01-01", expected: "2020-04-13" },
{ name: "datetime with UTC suffix", input: "2023-11-15 14:30:00 UTC", expected: "2020-04-13 10:09:08 UTC" },
{ name: "datetime without UTC suffix", input: "2023-11-15 14:30:00", expected: "2020-04-13 10:09:08" },
{ name: "datetime with /s and MM/DD/YY (whyyyy, fitbit, whyyy)", input: "11/15/25 14:30:00", expected: "04/13/20 10:09:08" },
{ name: "datetime with Mon Apr 13 10:09:08 UTC 2020 format", input: "Tue Apr 14 11:11:11 UTC 1999", expected: "Mon Apr 13 10:09:08 UTC 2020" },
{ name: "UTC offset only", input: "+04:00", expected: "+00:00" },
{ name: "UTC offset only negative", input: "-04:00", expected: "-00:00" },
{ name: "numeric string short", input: "42", expected: "11" },
{ name: "numeric string long", input: "1234567890", expected: "1111111111" },
{ name: "numeric string single digit", input: "0", expected: "1" },
{ name: "decimal string", input: "3.14", expected: "1.11" },
{ name: "negative decimal string", input: "-123.456", expected: "-111.111" },
{ name: "negative integer string", input: "-42", expected: "-11" },
{ name: "hex string short", input: "deadbeef", expected: "a1a1a1a1" },
{ name: "hex string odd length", input: "abc", expected: "a1a" },
{ name: "hex string uppercase", input: "DEADBEEF", expected: "a1a1a1a1" },
{ name: "hex string mixed case", input: "AbCd01", expected: "a1a1a1" },
{ name: "empty string", input: "", expected: "" },
{ name: "string 'true'", input: "true", expected: "false" },
{ name: "string 'false'", input: "false", expected: "false" },
{ name: "string 'null'", input: "null", expected: "null" },
{ name: "generic string short", input: "hello", expected: "xxxxx" },
{ name: "generic string with spaces", input: "hello world!", expected: "xxxxxxxxxxxx" },
{ name: "generic string single char", input: "z", expected: "x" },
{ name: "generic string special chars", input: "foo-bar_baz", expected: "xxxxxxxxxxx" },
// === Strings: Media file extensions (passthrough) ===
// TODO: Fix
// { name: "jpg file path", input: "photo.jpg", expected: "photo.jpg" },
// { name: "png file path", input: "image.png", expected: "image.png" },
// { name: "mp4 file path", input: "video.mp4", expected: "video.mp4" },
// { name: "mp3 file path", input: "song.mp3", expected: "song.mp3" },
// { name: "svg file path", input: "icon.svg", expected: "icon.svg" },
// { name: "webm file path", input: "clip.webm", expected: "clip.webm" },
// { name: "flac file path", input: "track.flac", expected: "track.flac" },
// { name: "case insensitive JPG", input: "photo.JPG", expected: "photo.JPG" },
// === Numbers ===
{ name: "unix timestamp low boundary", input: 946702800, expected: (((946702800 % 31557600 + 1704067200) / 5000 | 0) * 5000) },
{ name: "unix timestamp mid", input: 1700000000, expected: (((1700000000 % 31557600 + 1704067200) / 5000 | 0) * 5000) },
{ name: "unix timestamp high boundary", input: 1893474000, expected: (((1893474000 % 31557600 + 1704067200) / 5000 | 0) * 5000) },
{ name: "integer single digit", input: 5, expected: 1 },
{ name: "integer two digits", input: 42, expected: 11 },
{ name: "integer three digits", input: 999, expected: 111 },
{ name: "integer large", input: 123456, expected: 111111 },
{ name: "integer zero", input: 0, expected: 1 },
// TODO: Fix, I dont care about negatives rn
// { name: "negative integer", input: -42, expected: -11 },
// { name: "negative integer large", input: -123456, expected: -111111 },
{ name: "decimal simple", input: 3.14, expected: 1.11 },
{ name: "decimal long fraction", input: 123.4567, expected: 111.1111 },
{ name: "decimal short fraction", input: 0.5, expected: 0.1 },
// { name: "decimal negative", input: -3.14, expected: -1.11 },
// { name: "decimal negative with zero int", input: -0.75, expected: -0.11 },
// === Misc ===
{ name: "boolean true", input: true, expected: false },
{ name: "boolean false", input: false, expected: false },
{ name: "null", input: null, expected: null },
];
for (const { name, input, expected } of cases) {
test(`scrub() - ${name} TypeScript scrubPrimitive(${JSON.stringify(input)}) === ${JSON.stringify(expected)}`, () => {
const result = scrubPrimitive(input);
assert.deepEqual(result, expected);
});
test(`scrub() - ${name} jq scrub_primitive(${JSON.stringify(input)}) === ${JSON.stringify(expected)}`, async () => {
const result = await jqScrubPrimitive(input);
assert.deepEqual(result, expected);
});
}