108 lines
3.6 KiB
JavaScript
Executable file
108 lines
3.6 KiB
JavaScript
Executable file
#!/usr/bin/env -S node
|
|
|
|
import { $, path, minimist } from "zx";
|
|
import { strict as assert } from "node:assert";
|
|
import fs from "node:fs/promises";
|
|
import { parse } from "csv-parse/sync";
|
|
import { stringify } from "csv-stringify/sync";
|
|
import { scrubPrimitive } from "./scrub_primitive.ts";
|
|
|
|
$.verbose = true;
|
|
|
|
/**Catches any p Promise throws and instead returns those in a tuple*/
|
|
async function ptry<TRet, TError = Error>(
|
|
p: Promise<TRet>
|
|
): Promise<[TError, undefined] | [undefined, TRet]> {
|
|
try {
|
|
const result = await p;
|
|
return [undefined, result];
|
|
} catch (err) {
|
|
return [err as TError, undefined];
|
|
}
|
|
}
|
|
|
|
class UnsupportedScrubError extends Error {}
|
|
|
|
interface ScrubOptions {
|
|
hasHeaders?: boolean;
|
|
overrideType?: "csv" | "json"
|
|
}
|
|
|
|
/**Scrubs a file, json or csv*/
|
|
async function scrubFile(inFile: string, outFile: string, options?: ScrubOptions): Promise<[(Error | undefined), undefined]> {
|
|
if (inFile.endsWith(".csv") || overrideType === "csv") {
|
|
const [maybeErr, inCSV] = await ptry(fs.readFile(inFile, { encoding: "utf8" }));
|
|
if (maybeErr) {
|
|
return [maybeErr, undefined];
|
|
}
|
|
const hasHeaders = options?.hasHeaders ?? false;
|
|
|
|
const rows = parse(inCSV);
|
|
const MAX_ROWS = 20;
|
|
let scrubbedRows;
|
|
if (hasHeaders) {
|
|
const header = rows[0];
|
|
const scrubbedRest = rows.slice(1, MAX_ROWS + 1).map(row => row.map(cell => scrubPrimitive(cell)));
|
|
scrubbedRows = [header, ...scrubbedRest];
|
|
}
|
|
else {
|
|
scrubbedRows = rows.slice(0, MAX_ROWS).map(row => row.map(cell => scrubPrimitive(cell)));
|
|
}
|
|
|
|
const outCSV = stringify(scrubbedRows);
|
|
const [maybeErr2] = await ptry(fs.writeFile(outFile, outCSV, { encoding: "utf8" }));
|
|
return [maybeErr2, undefined];
|
|
}
|
|
else if (inFile.endsWith(".json") || overrideType === "json") {
|
|
const [jqErr] = await ptry($`cat ${inFile} | jq -L ${scriptDir} 'include "scrub"; scrub' > ${outFile}`);
|
|
return [jqErr, undefined];
|
|
}
|
|
else {
|
|
return [new UnsupportedScrubError(`No method for scrubbing file '${inFile}'`), undefined];
|
|
}
|
|
}
|
|
|
|
const scriptDir = path.dirname(new URL(import.meta.url).pathname);
|
|
|
|
const argv = minimist(process.argv.slice(2), {
|
|
boolean: ["has-headers"],
|
|
string: ["override-type"],
|
|
});
|
|
const fileOrGlob = argv._[0];
|
|
const hasHeaders = argv["has-headers"]; // already a boolean, no need for !!
|
|
const overrideType = argv["override-type"];
|
|
assert(fileOrGlob, "Usage: ./scrub.ts [--has-headers] [--override-type=csv] <file_or_glob>");
|
|
assert(overrideType === undefined || overrideType === "" || overrideType === "csv" || overrideType === "json", "Override type must be either 'json' or 'csv'");
|
|
|
|
console.log(`Matching files against passed file_or_glob: '${fileOrGlob}'`);
|
|
|
|
const filePaths: string[] = [];
|
|
for await (const file of fs.glob(fileOrGlob)) {
|
|
const resolved = path.resolve(file);
|
|
filePaths.push(resolved);
|
|
}
|
|
|
|
console.log("filePaths", filePaths);
|
|
assert(filePaths.length > 0, `No files found matching: ${fileOrGlob}`);
|
|
|
|
for (const file of filePaths) {
|
|
console.log(`Processing: ${file}`);
|
|
const tmpFile = `${file}.tmp`;
|
|
const piiFile = `${file}.DELETE-THIS-HAS-PII`;
|
|
|
|
const [scrubError] = await scrubFile(file, tmpFile, { hasHeaders, overrideType });
|
|
if (scrubError instanceof UnsupportedScrubError) {
|
|
console.warn(scrubError.message);
|
|
continue;
|
|
}
|
|
assert(!scrubError, `Error processing ${file}: ${scrubError}`);
|
|
|
|
const [mvErr] = await ptry($`mv ${file} ${piiFile}`);
|
|
assert(!mvErr, `Error moving ${file} to ${piiFile}: ${mvErr}`);
|
|
|
|
const [mv2Err] = await ptry($`mv ${tmpFile} ${file}`);
|
|
assert(!mv2Err, `Error moving ${tmpFile} to ${file}: ${mv2Err}`);
|
|
}
|
|
|
|
console.log();
|
|
console.log("Done!");
|