base-data-manager/test/task.ts

import test from "node:test";
import nodePath from "node:path";
import { strict as assert } from "node:assert/strict";
import {
  TaskTarget,
  cd,
  glob as taskGlob,
  read,
  cmd,
  setId,
  verify,
  getTSVManifest,
  TaskTargetPipelineHelper,
} from "../data-export/task.ts";

const THIS_FILE = import.meta.dirname;
const FIXTURE_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01');
const FIXTURE_FILE = nodePath.join(FIXTURE_DIR, 'friends/friends.json');

// -- TaskTarget ---------------------------------------------------------------

test("TaskTarget: constructor initializes path, pipeline, postFns", () => {
  const t = new TaskTarget("/foo/bar");
  assert.equal(t.path, "/foo/bar");
  assert.deepEqual(t.pipeline, []);
  assert.deepEqual(t.postFns, []);
});

test("TaskTarget: exists() returns true for a real file", () => {
  assert.equal(new TaskTarget(FIXTURE_FILE).exists(), true);
});

test("TaskTarget: exists() returns false for a missing file", () => {
  assert.equal(new TaskTarget("/nonexistent-file-xyz").exists(), false);
});

test("TaskTarget: basename safe-ifies the path basename", () => {
  const t = new TaskTarget("/foo/bar/some-file.txt");
  assert.equal(t.basename, "some_file_txt");
});

test("TaskTarget: basenameN returns last n path segments joined with ___", () => {
  const t = new TaskTarget("/a/b/c/d");
  assert.equal(t.basenameN(2), "c___d");
  assert.equal(t.basenameN(1), "d");
});

test("TaskTarget: id throws when no idValue is set", () => {
  assert.throws(() => new TaskTarget("/foo").id, /must have an id/);
});

test("TaskTarget: id with a string value is safe-ified", () => {
  const t = new TaskTarget("/foo").setId("my-id");
  assert.equal(t.id, "my_id");
});

test("TaskTarget: id with a function value is resolved against the target", () => {
  const t = new TaskTarget("/foo/bar").setId(tgt => tgt.basename);
  assert.equal(t.id, "bar");
});

test("TaskTarget: cd with an absolute path replaces the path", () => {
  const t = new TaskTarget("/foo");
  t.cd("/bar/baz");
  assert.equal(t.path, "/bar/baz");
});

test("TaskTarget: cd with a relative path joins with the current path", () => {
  const t = new TaskTarget("/foo");
  t.cd("bar");
  assert.equal(t.path, "/foo/bar");
});

test("TaskTarget: read adds a read op to the pipeline", () => {
  const t = new TaskTarget("/foo/bar.txt");
  t.read();
  assert.equal(t.pipeline.length, 1);
  assert.equal(t.pipeline[0].type, "read");
});

test("TaskTarget: cmd adds a mid op to the pipeline", () => {
  const t = new TaskTarget("/foo");
  t.cmd("jq .");
  assert.equal(t.pipeline.length, 1);
  assert.equal(t.pipeline[0].type, "mid");
});

test("TaskTarget: pushToPipeline throws if read is not the first op", () => {
  const t = new TaskTarget("/foo");
  t.cmd("jq .");
  assert.throws(() => t.read(), /first item/);
});

test("TaskTarget: clone produces an independent copy", () => {
  const t = new TaskTarget("/foo").setId("orig");
  t.read();
  const c = t.clone();
  assert.equal(c.path, "/foo");
  assert.equal(c.id, "orig");
  assert.equal(c.pipeline.length, 1);
  c.path = "/other";
  assert.equal(t.path, "/foo"); // original unchanged
});

test("TaskTarget: glob returns matching TaskTargets from disk", () => {
  const t = new TaskTarget(FIXTURE_DIR);
  const results = t.glob("friends/*.json");
  assert.ok(results.length > 0);
  assert.ok(results.every(r => r instanceof TaskTarget));
  assert.ok(results.every(r => r.path.endsWith(".json")));
});

// -- toShell / shEscape -------------------------------------------------------

test("toShell: a single read produces a cat command", () => {
  const t = new TaskTarget("/foo/bar.txt");
  t.read();
  assert.equal(t.toShell(), "cat /foo/bar.txt");
});

test("toShell: read piped into cmd", () => {
  const t = new TaskTarget("/foo/bar.txt");
  t.read();
  t.cmd("jq .");
  assert.equal(t.toShell(), "cat /foo/bar.txt | jq .");
});

for (const c of " $!&".split("")) {
  test(`toShell: quotes paths that contain ${JSON.stringify(c)}`, () => {
    const t = new TaskTarget(`/foo/bar${c}baz.txt`);
    t.read();
    assert.equal(t.toShell(), `cat $'/foo/bar${c}baz.txt'`);
  });
}
test(`toShell: quotes and escapes paths that contain '`, () => {
  const t = new TaskTarget(`/foo/bar'baz.txt`);
  t.read();
  assert.equal(t.toShell(), `cat $'/foo/bar\\'baz.txt'`);
});

test("toShell: cmd with array splits tokens", () => {
  const t = new TaskTarget("/foo");
  t.cmd(["jq", "."]);
  assert.equal(t.toShell(), "jq .");
});

test("toShell: cmd with function resolves at shell-generation time", () => {
  const t = new TaskTarget("/foo/bar.json");
  t.cmd(tgt => `jq -r .name ${tgt.path}`);
  assert.equal(t.toShell(), "jq -r .name /foo/bar.json");
});

// -- module-level functions ---------------------------------------------------

test("cd: clones and changes directory of each target", () => {
  const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
  const result = cd(targets, "sub");
  assert.equal(result[0].path, "/a/sub");
  assert.equal(result[1].path, "/b/sub");
  assert.equal(targets[0].path, "/a"); // originals unchanged
});

test("read: clones and adds a read op to each target", () => {
  const targets = [new TaskTarget("/a.txt"), new TaskTarget("/b.txt")];
  const result = read(targets);
  assert.equal(result[0].pipeline[0].type, "read");
  assert.equal(result[1].pipeline[0].type, "read");
  assert.equal(targets[0].pipeline.length, 0); // originals unchanged
});

test("cmd: clones and appends a cmd op to each target", () => {
  const targets = [new TaskTarget("/a.txt")];
  targets[0].read();
  const result = cmd(targets, "jq .");
  assert.equal(result[0].pipeline.length, 2);
  assert.equal(targets[0].pipeline.length, 1); // original unchanged
});

test("setId: clones and sets id on each target", () => {
  const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
  const result = setId(targets, "myid");
  assert.equal(result[0].id, "myid");
  assert.equal(result[1].id, "myid");
  assert.throws(() => targets[0].id); // originals have no id
});

test("taskGlob: returns matching targets across all input targets", () => {
  const targets = [new TaskTarget(FIXTURE_DIR)];
  const result = taskGlob(targets, "friends/*.json");
  assert.ok(result.length > 0);
  assert.ok(result.every(r => r.path.endsWith(".json")));
});

// -- verify -------------------------------------------------------------------

test("verify: removes targets with an empty pipeline", async () => {
  const t = new TaskTarget(FIXTURE_FILE);
  const result = await verify([t]);
  assert.equal(result.length, 0);
});

test("verify: removes targets whose file does not exist", async () => {
  const t = new TaskTarget("/nonexistent-file-xyz");
  t.read();
  const result = await verify([t]);
  assert.equal(result.length, 0);
});

test("verify: keeps targets that exist and have a pipeline", async () => {
  const t = new TaskTarget(FIXTURE_FILE);
  t.read();
  const result = await verify([t]);
  assert.equal(result.length, 1);
  assert.equal(result[0].path, FIXTURE_FILE);
});

test("verify: filters a mixed list to only valid targets", async () => {
  const good = new TaskTarget(FIXTURE_FILE); good.read();
  const noPipeline = new TaskTarget(FIXTURE_FILE);
  const noFile = new TaskTarget("/nonexistent-xyz"); noFile.read();
  const result = await verify([good, noPipeline, noFile]);
  assert.equal(result.length, 1);
  assert.equal(result[0], good);
});

// -- getTSVManifest -----------------------------------------------------------

test("getTSVManifest: produces id<TAB>shell for a single target", () => {
  const t = new TaskTarget("/foo/bar.txt");
  t.setId("myid");
  t.read();
  assert.equal(getTSVManifest([t]), "myid\tcat /foo/bar.txt");
});

test("getTSVManifest: joins multiple targets with newlines", () => {
  const t1 = new TaskTarget("/a.txt"); t1.setId("a"); t1.read();
  const t2 = new TaskTarget("/b.txt"); t2.setId("b"); t2.read();
  assert.equal(getTSVManifest([t1, t2]), "a\tcat /a.txt\nb\tcat /b.txt");
});

// -- TaskTargetPipelineHelper -------------------------------------------------

test("TaskTargetPipelineHelper: pipeline() promotes a plain array", () => {
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a")]);
  assert.ok(p instanceof TaskTargetPipelineHelper);
});

test("TaskTargetPipelineHelper: pipeline() is idempotent", () => {
  const arr = [new TaskTarget("/a")];
  const p1 = TaskTargetPipelineHelper.pipeline(arr);
  const p2 = TaskTargetPipelineHelper.pipeline(p1);
  assert.equal(p1, p2);
});

test("TaskTargetPipelineHelper: cd returns a new helper with paths changed", () => {
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a"), new TaskTarget("/b")]);
  const p2 = p.cd("sub");
  assert.ok(p2 instanceof TaskTargetPipelineHelper);
  assert.equal(p2[0].path, "/a/sub");
  assert.equal(p2[1].path, "/b/sub");
});

test("TaskTargetPipelineHelper: read returns a new helper with read ops added", () => {
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
  const p2 = p.read();
  assert.ok(p2 instanceof TaskTargetPipelineHelper);
  assert.equal(p2[0].pipeline[0].type, "read");
});

test("TaskTargetPipelineHelper: cmd returns a new helper with cmd ops added", () => {
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
  const p2 = p.read().cmd("jq .");
  assert.equal(p2[0].toShell(), "cat /a.txt | jq .");
});

// -- collect ------------------------------------------------------------------

test("collect: the final end of a chain is added to the collection set", () => {
  const collection = new Set<TaskTargetPipelineHelper>();
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
  p.collect(collection);

  const p2 = p.cd("sub");
  assert.equal(collection.size, 1);
  assert.ok(collection.has(p2));
});

test("collect: moving the chain end removes the old element and adds the new one", () => {
  const collection = new Set<TaskTargetPipelineHelper>();
  const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
  p.collect(collection);

  const p2 = p.cd("sub");
  const p3 = p2.read();
  assert.equal(collection.size, 1);
  assert.ok(collection.has(p3));
  assert.ok(!collection.has(p2));
});

test("collect: gathers the ends of multiple independent pipeline branches", () => {
  const collection = new Set<TaskTargetPipelineHelper>();

  const b1 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]).collect(collection).read();
  const b2 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/b.txt")]).collect(collection).read();

  assert.equal(collection.size, 2);
  assert.ok(collection.has(b1));
  assert.ok(collection.has(b2));

  const allTargets = [...collection].flat();
  assert.equal(allTargets.length, 2);
});