90 lines
No EOL
3.6 KiB
TypeScript
90 lines
No EOL
3.6 KiB
TypeScript
import { type DatabaseSync } from "node:sqlite";
|
|
import { fileURLToPath } from "node:url";
|
|
import { google } from "./data-export/google.ts";
|
|
import { facebook, facebook_v2 } from "./data-export/facebook.ts";
|
|
import { type TaskTarget, execPaths } from "./data-export/task.ts";
|
|
import * as DataIO from "./data-export/io.ts";
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
|
|
export const startTime = Date.now();
|
|
export const elapsed = ()=>`${((Date.now() - startTime) / 1000).toFixed(2)}s`;
|
|
|
|
export async function loadTaskInNewDb(targets: TaskTarget[]): Promise<DatabaseSync> {
|
|
console.log(`${elapsed()} - Run all targets`);
|
|
const out = await DataIO.runPipeline(targets);
|
|
console.log(`${elapsed()} - Final targets exported to CSV. Got ${out.length} targets`);
|
|
|
|
// TODO: Add an option to output everything plainly as CSV in a single directory
|
|
|
|
console.log(`${elapsed()} - Building combined database table in :memory:`);
|
|
const db = DataIO.getDefaultDB();
|
|
await DataIO.loadIntoDb(db, out);
|
|
|
|
const tableCount = db.prepare(`SELECT COUNT(*) as count FROM base_data_manager_metadata`).get()!.count;
|
|
console.log(`${elapsed()} - Single database built with ${tableCount} tables`);
|
|
|
|
return db;
|
|
}
|
|
|
|
async function main() {
|
|
// Configurable stuff
|
|
const sqlitePath = 'your.db';
|
|
|
|
console.log(`${elapsed()} - Building targets`);
|
|
const targets = await execPaths([
|
|
{path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()}
|
|
// {path: "/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01", op: facebook()}
|
|
// {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip", op: pipe(unzip(), facebook_v2())}
|
|
// {path: "/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001", op: facebook_v2()}
|
|
]);
|
|
console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`);
|
|
|
|
const db = await loadTaskInNewDb(targets);
|
|
|
|
console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`);
|
|
DataIO.dumpDBToDisk(db, sqlitePath);
|
|
|
|
console.log(`${elapsed()} - Database written to disk`);
|
|
}
|
|
|
|
if (process.argv[1] === __filename) {
|
|
main();
|
|
}
|
|
|
|
// TODO: Move this into here
|
|
// csvSink(
|
|
// summarization?: [string, string][]
|
|
// ) {
|
|
// // TODO:
|
|
// return this;
|
|
|
|
// // Ingest this csv into the database at the given id
|
|
// // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
|
|
// // Add a post processing function for these targets that prints out the summarization
|
|
// // stats
|
|
// // this.post(async (t: TaskTarget)=>{
|
|
// // // We only do the first one so far for the summarization
|
|
// // let queryLine: string;
|
|
// // let formatFn: (r: any)=>string;
|
|
// // const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
|
|
// // if (type === "numeric") {
|
|
// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
|
|
// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
|
|
// // }
|
|
// // else {
|
|
// // queryLine = `count(*) as n`;
|
|
// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
|
|
// // }
|
|
|
|
// // const cmd = "sqlite-utils";
|
|
// // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
|
|
// // const { stdout, stderr } = await execFile(cmd, args);
|
|
// // const results = JSON.parse(stdout);
|
|
// // const result = results[0]; // should only be one result in the array for this type of query
|
|
// // const logLine = formatFn(result);
|
|
// // (t as any).log = logLine;
|
|
// // });
|
|
|
|
// // return this;
|
|
// }
|