import { type SQLOutputValue, type DatabaseSync } from "node:sqlite"; import { createWriteStream } from 'node:fs'; import { fileURLToPath } from "node:url"; import "./data-export/facebook.ts"; import { facebook } from "./data-export/facebook.ts"; import { execPaths, COLUMN_TYPES } from "./data-export/task.ts"; import * as DataIO from "./data-export/io.ts"; import { startTime, elapsed, loadTaskInNewDb } from "./main.ts"; const __filename = fileURLToPath(import.meta.url); function dumpDBTableToCSV(db: DatabaseSync, tableName: string, outputFile: string) { const stream = createWriteStream(outputFile); const stmt = db.prepare(`SELECT * FROM ${tableName}`); let headerWritten = false; for (const row of stmt.iterate()) { if (!headerWritten) { stream.write(Object.keys(row).join(',') + '\n'); headerWritten = true; } stream.write(Object.values(row).map(v => `"${String(v ?? '').replace(/"/g, '""')}"`).join(',') + '\n'); } stream.end(); } function getColumnNames(db: DatabaseSync, tableName: string) { return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[]; } function templateToSql(template: string, columns: string[]) { // Convert '{0}, {1}' to '%s, %s' const args: string[] = []; const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => { args.push(columns[parseInt(index)]); return '%s'; }); return `printf('${sqlTemplate}', ${args.join(', ')})`; } function sqlLiteral(str: string | undefined | null): string { if (str === null || str === undefined) { return 'NULL'; } // Escape single quotes by doubling them const escaped = str.replace(/'/g, "''"); // Wrap in single quotes return `'${escaped}'`; } async function main() { // Configure the tasks to run console.log(`${elapsed()} - Building targets`); const targets = await execPaths([ {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()} ]); console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); const db = await loadTaskInNewDb(targets); // New output tables db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`); //(message, email, note, // social, location, media, event, document, // bookmark; defaults to note) type ColumnMetaType = (keyof typeof COLUMN_TYPES); interface MetadataRow { id: string, perRowDescription?: string, perRowTags?: string, columnMeta: ColumnMetaType[], columnNames: string[], metaId?: string } function verifyMetdataRow(input: Record): undefined | MetadataRow { const { id, perRowDescription, perRowTags, columnMeta: columnMetaCSV, metaId } = input; if (!id) { console.error("Row did not have id/tableName, skipping"); return undefined; } if (typeof id !== "string") { console.error(`Id must be string, got ${typeof id}, ${id}`); return undefined; } if (!columnMetaCSV) { console.warn(`${id} did not have columnMeta, nothing to do. Skipping`); return undefined; // No column information } if (typeof columnMetaCSV !== "string") { console.warn(`${id} did not have columnMeta of type string. Skipping`); return undefined; } const columnMeta = columnMetaCSV.split(",") as ColumnMetaType[]; // Get the column names from the table id const columnNames = getColumnNames(db, id); if (columnNames.length !== columnMeta.length) { console.error(`columnNames and columnMeta did not have same length. skipping`); return undefined; } if (typeof perRowDescription !== "string" && perRowDescription !== undefined && perRowDescription !== null) { console.warn(`Invalid typeof perRowDescription, was ${typeof perRowDescription}, value ${perRowDescription}`); return undefined; } if (typeof perRowTags !== "string" && perRowTags !== undefined && perRowTags !== null) { console.warn(`Invalid typeof perRowTags, was ${typeof perRowTags}, value ${perRowTags}`); return undefined; } if (typeof metaId !== "string" && metaId !== undefined && metaId !== null) { console.warn(`Invalid typeof metaId, was ${typeof metaId}, value ${metaId}`); return undefined; } return { id, perRowDescription: perRowDescription ?? undefined, perRowTags: perRowTags ?? undefined, columnMeta, columnNames, metaId: metaId ?? undefined }; } /**Maps columnMeta names to the column names*/ function metaToNames(meta: MetadataRow): Partial> { const out: Partial> = {}; for (const [idx, name] of meta.columnNames.entries()) { const metaName = meta.columnMeta[idx]; if (out[metaName]) { console.warn(`Duplicate column with metaName "${metaName}". The current one which will be used is "${out[metaName]}". Skipping the duplicate.`); continue; } out[metaName] = name; } return out; } function metaParts(metaNameToColumnName: Partial>): Record { const out: Record = {} as any; for (const type of Object.keys(COLUMN_TYPES) as ColumnMetaType[]) { if (!metaNameToColumnName[type]) { out[type] = "NULL"; continue; } // Wrap in brackets so column names like "from" don't cause any issues out[type] = `[${metaNameToColumnName[type]}]` } return out; } // Iterate over all the tables and their metadata const statement = db.prepare(`SELECT id, perRowDescription, perRowTags, columnMeta, metaId FROM base_data_manager_metadata`); for (const row of statement.iterate()) { const verified = verifyMetdataRow(row); if (!verified) { continue; } const { id, perRowDescription, perRowTags, columnMeta, columnNames, metaId } = verified; const metaNameToColumnName = metaToNames(verified); const part = metaParts(metaNameToColumnName); // Now find what to insert into each row of the combined // Per row tags is an string of csv'd items but needs to be made a literal // TODO: Make this either a template string or have jq do something // tagsPart = templateToSqlExpr(target.perRowTags, columnNames); const tagsPart = sqlLiteral(perRowTags); // Choose what to do with this table based on what meta is present if ( !!metaNameToColumnName.sender && !!metaNameToColumnName.isodatetime ) { if (!metaId) { console.warn(`Chat ${id} with .sender but no .metaId. Skipping`); continue; } // First pull the name of the conversation out of the metaId const receiverThreadTitle = db.prepare(`SELECT title FROM ${metaId} WHERE (id=${sqlLiteral(id)})`).get()?.title; if (!receiverThreadTitle || typeof receiverThreadTitle !== "string") { console.warn(`Chat ${id} with .metaId ${metaId} returned invalid receiverThreadTitle ${typeof receiverThreadTitle}. Skipping`); continue; } const receiverPart = sqlLiteral(receiverThreadTitle); // Put this table into the combined table db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${part.text}, 'message', ${part.sender}, ${receiverPart}, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`); } else if (!!metaNameToColumnName.isodatetime) { // Put this table into the combined table let descriptionPart = perRowDescription ? templateToSql(perRowDescription, columnNames) : `'An entry from the ${id} table'`; // Default is just kinda garbo... db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${descriptionPart}, 'node', NULL, NULL, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`); } else { console.warn(`Table with id ${id} had no isodatetime or anything else of value, skipping...`); } } const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()?.count; console.log(`${elapsed()} - Combined database built with ${count} rows`); // Dump it to the disk for debugging const sqlitePath = "debug_your.csv.db"; console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`); await DataIO.dumpDBToDisk(db, sqlitePath); console.log(`${elapsed()} - Database written to disk`); // Dump it all to the path specified dumpDBTableToCSV(db, "combined", "your.csv"); console.log(`${elapsed()} - Combined database written to disk as CSV`); db.close(); } if (process.argv[1] === __filename) { main(); }