import { type DatabaseSync } from "node:sqlite"; import { fileURLToPath } from "node:url"; import { google } from "./data-export/google.ts"; import { facebook, facebook_v2 } from "./data-export/facebook.ts"; import { type TaskTarget, execPaths } from "./data-export/task.ts"; import * as DataIO from "./data-export/io.ts"; const __filename = fileURLToPath(import.meta.url); export const startTime = Date.now(); export const elapsed = ()=>`${((Date.now() - startTime) / 1000).toFixed(2)}s`; export async function loadTaskInNewDb(targets: TaskTarget[]): Promise { console.log(`${elapsed()} - Run all targets`); const out = await DataIO.runPipeline(targets); console.log(`${elapsed()} - Final targets exported to CSV. Got ${out.length} targets`); // TODO: Add an option to output everything plainly as CSV in a single directory console.log(`${elapsed()} - Building combined database table in :memory:`); const db = DataIO.getDefaultDB(); await DataIO.loadIntoDb(db, out); const tableCount = db.prepare(`SELECT COUNT(*) as count FROM base_data_manager_metadata`).get()!.count; console.log(`${elapsed()} - Single database built with ${tableCount} tables`); return db; } async function main() { // Configurable stuff const sqlitePath = 'your.db'; console.log(`${elapsed()} - Building targets`); const targets = await execPaths([ {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()} // {path: "/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01", op: facebook()} // {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip", op: pipe(unzip(), facebook_v2())} // {path: "/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001", op: facebook_v2()} ]); console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); const db = await loadTaskInNewDb(targets); console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`); DataIO.dumpDBToDisk(db, sqlitePath); console.log(`${elapsed()} - Database written to disk`); } if (process.argv[1] === __filename) { main(); } // TODO: Move this into here // csvSink( // summarization?: [string, string][] // ) { // // TODO: // return this; // // Ingest this csv into the database at the given id // // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]); // // Add a post processing function for these targets that prints out the summarization // // stats // // this.post(async (t: TaskTarget)=>{ // // // We only do the first one so far for the summarization // // let queryLine: string; // // let formatFn: (r: any)=>string; // // const [columnName, type] = summarization?.[0] ?? [undefined, undefined]; // // if (type === "numeric") { // // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; // // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; // // } // // else { // // queryLine = `count(*) as n`; // // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; // // } // // const cmd = "sqlite-utils"; // // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`] // // const { stdout, stderr } = await execFile(cmd, args); // // const results = JSON.parse(stdout); // // const result = results[0]; // should only be one result in the array for this type of query // // const logLine = formatFn(result); // // (t as any).log = logLine; // // }); // // return this; // }