import { type DatabaseSync, type SQLOutputValue } from "node:sqlite"; import { fileURLToPath } from "node:url"; import { stringify } from "csv-stringify/sync"; import { facebook } from "./data-export/facebook.ts"; import { execPaths, COLUMN_TYPES } from "./data-export/task.ts"; import { elapsed, loadTaskInNewDb } from "./main.ts"; const __filename = fileURLToPath(import.meta.url); type ColumnMetaType = keyof typeof COLUMN_TYPES; interface MetadataRow { id: string; perRowTags?: string; columnMeta: ColumnMetaType[]; columnNames: string[]; } // ── query helpers ───────────────────────────────────────────────────────────── function q(name: string) { return `"${name}"`; } /** Format a number compactly: integer if whole, otherwise 4 sig figs */ function fmt(v: number): string { if (!isFinite(v)) return String(v); if (Number.isInteger(v)) return String(v); return v.toPrecision(4).replace(/\.?0+$/, ''); } /** Non-null filter: treats SQL NULL, empty string, and the literal "null" as missing */ function notNull(col: string) { return `${q(col)} IS NOT NULL AND ${q(col)} != '' AND ${q(col)} != 'null'`; } function rowCount(db: DatabaseSync, table: string): number { return (db.prepare(`SELECT count(*) as n FROM ${q(table)}`).get() as { n: number }).n; } function datetimeRange(db: DatabaseSync, table: string, col: string): string { const r = db.prepare( `SELECT MIN(${q(col)}) as lo, MAX(${q(col)}) as hi FROM ${q(table)} WHERE ${notNull(col)}` ).get() as { lo: string | null; hi: string | null }; if (!r.lo) return '(no dates)'; // Trim to date portion if it looks like a full ISO datetime — keeps the line shorter const trim = (s: string) => s.length > 10 && s[10] === 'T' ? s.slice(0, 10) : s; return `${trim(r.lo)}..${trim(r.hi!)}`; } function numericRange(db: DatabaseSync, table: string, col: string): { lo: number; hi: number } | null { const r = db.prepare( `SELECT MIN(CAST(${q(col)} AS REAL)) as lo, MAX(CAST(${q(col)} AS REAL)) as hi FROM ${q(table)} WHERE ${notNull(col)}` ).get() as { lo: number | null; hi: number | null }; return r.lo !== null ? { lo: r.lo, hi: r.hi! } : null; } function topValues(db: DatabaseSync, table: string, col: string, n: number): { distinct: number; top: { v: string; c: number }[] } { const distinct = (db.prepare( `SELECT count(distinct ${q(col)}) as d FROM ${q(table)} WHERE ${notNull(col)}` ).get() as { d: number }).d; const top = db.prepare( `SELECT ${q(col)} as v, count(*) as c FROM ${q(table)} WHERE ${notNull(col)} GROUP BY ${q(col)} ORDER BY c DESC LIMIT ${n}` ).all() as { v: string; c: number }[]; return { distinct, top }; } // ── metadata parsing (mirrors timelinize.ts) ────────────────────────────────── function getColumnNames(db: DatabaseSync, tableName: string): string[] { return db.prepare(`PRAGMA table_info(${q(tableName)})`).all().map(c => (c as any).name) as string[]; } function parseMetadataRow(db: DatabaseSync, row: Record): MetadataRow | undefined { const { id, perRowTags, columnMeta: columnMetaCSV } = row; if (!id || typeof id !== 'string') return undefined; const columnNames = getColumnNames(db, id); // columnMeta may be absent for tables without type annotations — still useful to show let columnMeta: ColumnMetaType[] = []; if (columnMetaCSV && typeof columnMetaCSV === 'string') { const parsed = columnMetaCSV.split(',') as ColumnMetaType[]; if (parsed.length === columnNames.length) { columnMeta = parsed; } } return { id, perRowTags: typeof perRowTags === 'string' ? perRowTags : undefined, columnMeta, columnNames, }; } /** Maps semantic type names → the actual column name in this table (first match wins) */ function metaToNames(meta: MetadataRow): Partial> { const out: Partial> = {}; for (const [idx, colName] of meta.columnNames.entries()) { const type = meta.columnMeta[idx]; if (!type || out[type]) continue; // skip untyped or already-seen types out[type] = colName; } return out; } // ── table row builder ───────────────────────────────────────────────────────── const TOP_SENDERS = 3; const MAX_SENDER_LEN = 20; interface SummaryRow { type: string; id: string; n: string; dates: string; senders: string; geo: string; tags: string; } function buildSummaryRow(db: DatabaseSync, meta: MetadataRow): SummaryRow { const { id, perRowTags } = meta; const typeMap = metaToNames(meta); const n = rowCount(db, id); // ── shape label ───────────────────────────────────────────────────────────── let type: string; if (typeMap.sender && typeMap.isodatetime) type = 'chat'; else if (typeMap.isodatetime) type = 'time'; else type = 'static'; if (typeMap.lat && typeMap.lng) type += '+geo'; // ── datetime range ────────────────────────────────────────────────────────── const dates = typeMap.isodatetime ? datetimeRange(db, id, typeMap.isodatetime) : ''; // ── sender info ───────────────────────────────────────────────────────────── let senders = ''; if (typeMap.sender) { const { distinct, top } = topValues(db, id, typeMap.sender, TOP_SENDERS); const topStr = top .map(r => { const name = r.v.length > MAX_SENDER_LEN ? r.v.slice(0, MAX_SENDER_LEN - 1) + '…' : r.v; return `${name}×${r.c}`; }) .join(', '); senders = `${distinct} [${topStr}]`; } // ── geo ranges ────────────────────────────────────────────────────────────── let geo = ''; const latR = typeMap.lat ? numericRange(db, id, typeMap.lat) : null; const lngR = typeMap.lng ? numericRange(db, id, typeMap.lng) : null; if (latR && lngR) { geo = `[${fmt(latR.lo)}..${fmt(latR.hi)}] [${fmt(lngR.lo)}..${fmt(lngR.hi)}]`; } else if (latR) { geo = `lat=[${fmt(latR.lo)}..${fmt(latR.hi)}]`; } return { type, id, n: String(n), dates, senders, geo, tags: perRowTags ?? '' }; } // ── CSV output ──────────────────────────────────────────────────────────────── const COLUMNS: { key: keyof SummaryRow; header: string }[] = [ { key: 'type', header: 'type' }, { key: 'id', header: 'id' }, { key: 'n', header: 'n' }, { key: 'dates', header: 'dates' }, { key: 'senders', header: 'senders' }, { key: 'geo', header: 'lat / lng' }, { key: 'tags', header: 'tags' }, ]; function printCSV(summaryRows: SummaryRow[]) { const records = [ COLUMNS.map(c => c.header), ...summaryRows.map(row => COLUMNS.map(c => row[c.key])), ]; process.stdout.write(stringify(records)); } // ── main ───────────────────────────────────────────────────────────────────── async function main() { process.stderr.write(`${elapsed()} - Building targets\n`); const targets = await execPaths([ {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()} // {path: "/home/cobertos/Seafile/archive/ExportedServiceData/fitbit/FullHumanName", op: fitbit()} ]); process.stderr.write(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets\n`); const db = await loadTaskInNewDb(targets); const rows = db.prepare( `SELECT id, perRowTags, columnMeta FROM base_data_manager_metadata ORDER BY id` ).all() as Record[]; const summaryRows: SummaryRow[] = []; for (const row of rows) { const meta = parseMetadataRow(db, row); if (!meta) continue; summaryRows.push(buildSummaryRow(db, meta)); } printCSV(summaryRows); db.close(); } if (process.argv[1] === __filename) { main(); }