#!/usr/bin/env -S node import { $, path, minimist } from "zx"; import { strict as assert } from "node:assert"; import fs from "node:fs/promises"; import { parse } from "csv-parse/sync"; import { stringify } from "csv-stringify/sync"; import { scrubPrimitive } from "./scrub_primitive.ts"; $.verbose = true; /**Catches any p Promise throws and instead returns those in a tuple*/ async function ptry( p: Promise ): Promise<[TError, undefined] | [undefined, TRet]> { try { const result = await p; return [undefined, result]; } catch (err) { return [err as TError, undefined]; } } class UnsupportedScrubError extends Error {} interface ScrubOptions { hasHeaders?: boolean; overrideType?: "csv" | "json" } /**Scrubs a file, json or csv*/ async function scrubFile(inFile: string, outFile: string, options?: ScrubOptions): Promise<[(Error | undefined), undefined]> { if (inFile.endsWith(".csv") || overrideType === "csv") { const [maybeErr, inCSV] = await ptry(fs.readFile(inFile, { encoding: "utf8" })); if (maybeErr) { return [maybeErr, undefined]; } const hasHeaders = options?.hasHeaders ?? false; const rows = parse(inCSV); const MAX_ROWS = 20; let scrubbedRows; if (hasHeaders) { const header = rows[0]; const scrubbedRest = rows.slice(1, MAX_ROWS + 1).map(row => row.map(cell => scrubPrimitive(cell))); scrubbedRows = [header, ...scrubbedRest]; } else { scrubbedRows = rows.slice(0, MAX_ROWS).map(row => row.map(cell => scrubPrimitive(cell))); } const outCSV = stringify(scrubbedRows); const [maybeErr2] = await ptry(fs.writeFile(outFile, outCSV, { encoding: "utf8" })); return [maybeErr2, undefined]; } else if (inFile.endsWith(".json") || overrideType === "json") { const [jqErr] = await ptry($`cat ${inFile} | jq -L ${scriptDir} 'include "scrub"; scrub' > ${outFile}`); return [jqErr, undefined]; } else { return [new UnsupportedScrubError(`No method for scrubbing file '${inFile}'`), undefined]; } } const scriptDir = path.dirname(new URL(import.meta.url).pathname); const argv = minimist(process.argv.slice(2), { boolean: ["has-headers"], string: ["override-type"], }); const fileOrGlob = argv._[0]; const hasHeaders = argv["has-headers"]; // already a boolean, no need for !! const overrideType = argv["override-type"]; assert(fileOrGlob, "Usage: ./scrub.ts [--has-headers] [--override-type=csv] "); assert(overrideType === undefined || overrideType === "" || overrideType === "csv" || overrideType === "json", "Override type must be either 'json' or 'csv'"); console.log(`Matching files against passed file_or_glob: '${fileOrGlob}'`); const filePaths: string[] = []; for await (const file of fs.glob(fileOrGlob)) { const resolved = path.resolve(file); filePaths.push(resolved); } console.log("filePaths", filePaths); assert(filePaths.length > 0, `No files found matching: ${fileOrGlob}`); for (const file of filePaths) { console.log(`Processing: ${file}`); const tmpFile = `${file}.tmp`; const piiFile = `${file}.DELETE-THIS-HAS-PII`; const [scrubError] = await scrubFile(file, tmpFile, { hasHeaders, overrideType }); if (scrubError instanceof UnsupportedScrubError) { console.warn(scrubError.message); continue; } assert(!scrubError, `Error processing ${file}: ${scrubError}`); const [mvErr] = await ptry($`mv ${file} ${piiFile}`); assert(!mvErr, `Error moving ${file} to ${piiFile}: ${mvErr}`); const [mv2Err] = await ptry($`mv ${tmpFile} ${file}`); assert(!mv2Err, `Error moving ${tmpFile} to ${file}: ${mv2Err}`); } console.log(); console.log("Done!");