base-data-manager/test/task.ts

312 lines
11 KiB
TypeScript

import test from "node:test";
import nodePath from "node:path";
import { strict as assert } from "node:assert/strict";
import {
TaskTarget,
cd,
glob as taskGlob,
read,
cmd,
setId,
verify,
getTSVManifest,
TaskTargetPipelineHelper,
} from "../data-export/task.ts";
const THIS_FILE = import.meta.dirname;
const FIXTURE_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01');
const FIXTURE_FILE = nodePath.join(FIXTURE_DIR, 'friends/friends.json');
// -- TaskTarget ---------------------------------------------------------------
test("TaskTarget: constructor initializes path, pipeline, postFns", () => {
const t = new TaskTarget("/foo/bar");
assert.equal(t.path, "/foo/bar");
assert.deepEqual(t.pipeline, []);
assert.deepEqual(t.postFns, []);
});
test("TaskTarget: exists() returns true for a real file", () => {
assert.equal(new TaskTarget(FIXTURE_FILE).exists(), true);
});
test("TaskTarget: exists() returns false for a missing file", () => {
assert.equal(new TaskTarget("/nonexistent-file-xyz").exists(), false);
});
test("TaskTarget: basename safe-ifies the path basename", () => {
const t = new TaskTarget("/foo/bar/some-file.txt");
assert.equal(t.basename, "some_file_txt");
});
test("TaskTarget: basenameN returns last n path segments joined with ___", () => {
const t = new TaskTarget("/a/b/c/d");
assert.equal(t.basenameN(2), "c___d");
assert.equal(t.basenameN(1), "d");
});
test("TaskTarget: id throws when no idValue is set", () => {
assert.throws(() => new TaskTarget("/foo").id, /must have an id/);
});
test("TaskTarget: id with a string value is safe-ified", () => {
const t = new TaskTarget("/foo").setId("my-id");
assert.equal(t.id, "my_id");
});
test("TaskTarget: id with a function value is resolved against the target", () => {
const t = new TaskTarget("/foo/bar").setId(tgt => tgt.basename);
assert.equal(t.id, "bar");
});
test("TaskTarget: cd with an absolute path replaces the path", () => {
const t = new TaskTarget("/foo");
t.cd("/bar/baz");
assert.equal(t.path, "/bar/baz");
});
test("TaskTarget: cd with a relative path joins with the current path", () => {
const t = new TaskTarget("/foo");
t.cd("bar");
assert.equal(t.path, "/foo/bar");
});
test("TaskTarget: read adds a read op to the pipeline", () => {
const t = new TaskTarget("/foo/bar.txt");
t.read();
assert.equal(t.pipeline.length, 1);
assert.equal(t.pipeline[0].type, "read");
});
test("TaskTarget: cmd adds a mid op to the pipeline", () => {
const t = new TaskTarget("/foo");
t.cmd("jq .");
assert.equal(t.pipeline.length, 1);
assert.equal(t.pipeline[0].type, "mid");
});
test("TaskTarget: pushToPipeline throws if read is not the first op", () => {
const t = new TaskTarget("/foo");
t.cmd("jq .");
assert.throws(() => t.read(), /first item/);
});
test("TaskTarget: clone produces an independent copy", () => {
const t = new TaskTarget("/foo").setId("orig");
t.read();
const c = t.clone();
assert.equal(c.path, "/foo");
assert.equal(c.id, "orig");
assert.equal(c.pipeline.length, 1);
c.path = "/other";
assert.equal(t.path, "/foo"); // original unchanged
});
test("TaskTarget: glob returns matching TaskTargets from disk", () => {
const t = new TaskTarget(FIXTURE_DIR);
const results = t.glob("friends/*.json");
assert.ok(results.length > 0);
assert.ok(results.every(r => r instanceof TaskTarget));
assert.ok(results.every(r => r.path.endsWith(".json")));
});
// -- toShell / shEscape -------------------------------------------------------
test("toShell: a single read produces a cat command", () => {
const t = new TaskTarget("/foo/bar.txt");
t.read();
assert.equal(t.toShell(), "cat /foo/bar.txt");
});
test("toShell: read piped into cmd", () => {
const t = new TaskTarget("/foo/bar.txt");
t.read();
t.cmd("jq .");
assert.equal(t.toShell(), "cat /foo/bar.txt | jq .");
});
for (const c of " $!&".split("")) {
test(`toShell: quotes paths that contain ${JSON.stringify(c)}`, () => {
const t = new TaskTarget(`/foo/bar${c}baz.txt`);
t.read();
assert.equal(t.toShell(), `cat $'/foo/bar${c}baz.txt'`);
});
}
test(`toShell: quotes and escapes paths that contain '`, () => {
const t = new TaskTarget(`/foo/bar'baz.txt`);
t.read();
assert.equal(t.toShell(), `cat $'/foo/bar\\'baz.txt'`);
});
test("toShell: cmd with array splits tokens", () => {
const t = new TaskTarget("/foo");
t.cmd(["jq", "."]);
assert.equal(t.toShell(), "jq .");
});
test("toShell: cmd with function resolves at shell-generation time", () => {
const t = new TaskTarget("/foo/bar.json");
t.cmd(tgt => `jq -r .name ${tgt.path}`);
assert.equal(t.toShell(), "jq -r .name /foo/bar.json");
});
// -- module-level functions ---------------------------------------------------
test("cd: clones and changes directory of each target", () => {
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
const result = cd(targets, "sub");
assert.equal(result[0].path, "/a/sub");
assert.equal(result[1].path, "/b/sub");
assert.equal(targets[0].path, "/a"); // originals unchanged
});
test("read: clones and adds a read op to each target", () => {
const targets = [new TaskTarget("/a.txt"), new TaskTarget("/b.txt")];
const result = read(targets);
assert.equal(result[0].pipeline[0].type, "read");
assert.equal(result[1].pipeline[0].type, "read");
assert.equal(targets[0].pipeline.length, 0); // originals unchanged
});
test("cmd: clones and appends a cmd op to each target", () => {
const targets = [new TaskTarget("/a.txt")];
targets[0].read();
const result = cmd(targets, "jq .");
assert.equal(result[0].pipeline.length, 2);
assert.equal(targets[0].pipeline.length, 1); // original unchanged
});
test("setId: clones and sets id on each target", () => {
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
const result = setId(targets, "myid");
assert.equal(result[0].id, "myid");
assert.equal(result[1].id, "myid");
assert.throws(() => targets[0].id); // originals have no id
});
test("taskGlob: returns matching targets across all input targets", () => {
const targets = [new TaskTarget(FIXTURE_DIR)];
const result = taskGlob(targets, "friends/*.json");
assert.ok(result.length > 0);
assert.ok(result.every(r => r.path.endsWith(".json")));
});
// -- verify -------------------------------------------------------------------
test("verify: removes targets with an empty pipeline", async () => {
const t = new TaskTarget(FIXTURE_FILE);
const result = await verify([t]);
assert.equal(result.length, 0);
});
test("verify: removes targets whose file does not exist", async () => {
const t = new TaskTarget("/nonexistent-file-xyz");
t.read();
const result = await verify([t]);
assert.equal(result.length, 0);
});
test("verify: keeps targets that exist and have a pipeline", async () => {
const t = new TaskTarget(FIXTURE_FILE);
t.read();
const result = await verify([t]);
assert.equal(result.length, 1);
assert.equal(result[0].path, FIXTURE_FILE);
});
test("verify: filters a mixed list to only valid targets", async () => {
const good = new TaskTarget(FIXTURE_FILE); good.read();
const noPipeline = new TaskTarget(FIXTURE_FILE);
const noFile = new TaskTarget("/nonexistent-xyz"); noFile.read();
const result = await verify([good, noPipeline, noFile]);
assert.equal(result.length, 1);
assert.equal(result[0], good);
});
// -- getTSVManifest -----------------------------------------------------------
test("getTSVManifest: produces id<TAB>shell for a single target", () => {
const t = new TaskTarget("/foo/bar.txt");
t.setId("myid");
t.read();
assert.equal(getTSVManifest([t]), "myid\tcat /foo/bar.txt");
});
test("getTSVManifest: joins multiple targets with newlines", () => {
const t1 = new TaskTarget("/a.txt"); t1.setId("a"); t1.read();
const t2 = new TaskTarget("/b.txt"); t2.setId("b"); t2.read();
assert.equal(getTSVManifest([t1, t2]), "a\tcat /a.txt\nb\tcat /b.txt");
});
// -- TaskTargetPipelineHelper -------------------------------------------------
test("TaskTargetPipelineHelper: pipeline() promotes a plain array", () => {
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a")]);
assert.ok(p instanceof TaskTargetPipelineHelper);
});
test("TaskTargetPipelineHelper: pipeline() is idempotent", () => {
const arr = [new TaskTarget("/a")];
const p1 = TaskTargetPipelineHelper.pipeline(arr);
const p2 = TaskTargetPipelineHelper.pipeline(p1);
assert.equal(p1, p2);
});
test("TaskTargetPipelineHelper: cd returns a new helper with paths changed", () => {
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a"), new TaskTarget("/b")]);
const p2 = p.cd("sub");
assert.ok(p2 instanceof TaskTargetPipelineHelper);
assert.equal(p2[0].path, "/a/sub");
assert.equal(p2[1].path, "/b/sub");
});
test("TaskTargetPipelineHelper: read returns a new helper with read ops added", () => {
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
const p2 = p.read();
assert.ok(p2 instanceof TaskTargetPipelineHelper);
assert.equal(p2[0].pipeline[0].type, "read");
});
test("TaskTargetPipelineHelper: cmd returns a new helper with cmd ops added", () => {
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
const p2 = p.read().cmd("jq .");
assert.equal(p2[0].toShell(), "cat /a.txt | jq .");
});
// -- collect ------------------------------------------------------------------
test("collect: the final end of a chain is added to the collection set", () => {
const collection = new Set<TaskTargetPipelineHelper>();
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
p.collect(collection);
const p2 = p.cd("sub");
assert.equal(collection.size, 1);
assert.ok(collection.has(p2));
});
test("collect: moving the chain end removes the old element and adds the new one", () => {
const collection = new Set<TaskTargetPipelineHelper>();
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
p.collect(collection);
const p2 = p.cd("sub");
const p3 = p2.read();
assert.equal(collection.size, 1);
assert.ok(collection.has(p3));
assert.ok(!collection.has(p2));
});
test("collect: gathers the ends of multiple independent pipeline branches", () => {
const collection = new Set<TaskTargetPipelineHelper>();
const b1 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]).collect(collection).read();
const b2 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/b.txt")]).collect(collection).read();
assert.equal(collection.size, 2);
assert.ok(collection.has(b1));
assert.ok(collection.has(b2));
const allTargets = [...collection].flat();
assert.equal(allTargets.length, 2);
});