216 lines
8.6 KiB
TypeScript
216 lines
8.6 KiB
TypeScript
import { type DatabaseSync, type SQLOutputValue } from "node:sqlite";
|
||
import { fileURLToPath } from "node:url";
|
||
import { stringify } from "csv-stringify/sync";
|
||
import { facebook } from "./data-export/facebook.ts";
|
||
import { execPaths, COLUMN_TYPES } from "./data-export/task.ts";
|
||
import { elapsed, loadTaskInNewDb } from "./main.ts";
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
|
||
type ColumnMetaType = keyof typeof COLUMN_TYPES;
|
||
|
||
interface MetadataRow {
|
||
id: string;
|
||
perRowTags?: string;
|
||
columnMeta: ColumnMetaType[];
|
||
columnNames: string[];
|
||
}
|
||
|
||
// ── query helpers ─────────────────────────────────────────────────────────────
|
||
|
||
function q(name: string) {
|
||
return `"${name}"`;
|
||
}
|
||
|
||
/** Format a number compactly: integer if whole, otherwise 4 sig figs */
|
||
function fmt(v: number): string {
|
||
if (!isFinite(v)) return String(v);
|
||
if (Number.isInteger(v)) return String(v);
|
||
return v.toPrecision(4).replace(/\.?0+$/, '');
|
||
}
|
||
|
||
/** Non-null filter: treats SQL NULL, empty string, and the literal "null" as missing */
|
||
function notNull(col: string) {
|
||
return `${q(col)} IS NOT NULL AND ${q(col)} != '' AND ${q(col)} != 'null'`;
|
||
}
|
||
|
||
function rowCount(db: DatabaseSync, table: string): number {
|
||
return (db.prepare(`SELECT count(*) as n FROM ${q(table)}`).get() as { n: number }).n;
|
||
}
|
||
|
||
function datetimeRange(db: DatabaseSync, table: string, col: string): string {
|
||
const r = db.prepare(
|
||
`SELECT MIN(${q(col)}) as lo, MAX(${q(col)}) as hi FROM ${q(table)} WHERE ${notNull(col)}`
|
||
).get() as { lo: string | null; hi: string | null };
|
||
if (!r.lo) return '(no dates)';
|
||
// Trim to date portion if it looks like a full ISO datetime — keeps the line shorter
|
||
const trim = (s: string) => s.length > 10 && s[10] === 'T' ? s.slice(0, 10) : s;
|
||
return `${trim(r.lo)}..${trim(r.hi!)}`;
|
||
}
|
||
|
||
function numericRange(db: DatabaseSync, table: string, col: string): { lo: number; hi: number } | null {
|
||
const r = db.prepare(
|
||
`SELECT MIN(CAST(${q(col)} AS REAL)) as lo, MAX(CAST(${q(col)} AS REAL)) as hi
|
||
FROM ${q(table)} WHERE ${notNull(col)}`
|
||
).get() as { lo: number | null; hi: number | null };
|
||
return r.lo !== null ? { lo: r.lo, hi: r.hi! } : null;
|
||
}
|
||
|
||
function topValues(db: DatabaseSync, table: string, col: string, n: number): { distinct: number; top: { v: string; c: number }[] } {
|
||
const distinct = (db.prepare(
|
||
`SELECT count(distinct ${q(col)}) as d FROM ${q(table)} WHERE ${notNull(col)}`
|
||
).get() as { d: number }).d;
|
||
const top = db.prepare(
|
||
`SELECT ${q(col)} as v, count(*) as c FROM ${q(table)} WHERE ${notNull(col)}
|
||
GROUP BY ${q(col)} ORDER BY c DESC LIMIT ${n}`
|
||
).all() as { v: string; c: number }[];
|
||
return { distinct, top };
|
||
}
|
||
|
||
// ── metadata parsing (mirrors timelinize.ts) ──────────────────────────────────
|
||
|
||
function getColumnNames(db: DatabaseSync, tableName: string): string[] {
|
||
return db.prepare(`PRAGMA table_info(${q(tableName)})`).all().map(c => (c as any).name) as string[];
|
||
}
|
||
|
||
function parseMetadataRow(db: DatabaseSync, row: Record<string, SQLOutputValue>): MetadataRow | undefined {
|
||
const { id, perRowTags, columnMeta: columnMetaCSV } = row;
|
||
if (!id || typeof id !== 'string') return undefined;
|
||
|
||
const columnNames = getColumnNames(db, id);
|
||
|
||
// columnMeta may be absent for tables without type annotations — still useful to show
|
||
let columnMeta: ColumnMetaType[] = [];
|
||
if (columnMetaCSV && typeof columnMetaCSV === 'string') {
|
||
const parsed = columnMetaCSV.split(',') as ColumnMetaType[];
|
||
if (parsed.length === columnNames.length) {
|
||
columnMeta = parsed;
|
||
}
|
||
}
|
||
|
||
return {
|
||
id,
|
||
perRowTags: typeof perRowTags === 'string' ? perRowTags : undefined,
|
||
columnMeta,
|
||
columnNames,
|
||
};
|
||
}
|
||
|
||
/** Maps semantic type names → the actual column name in this table (first match wins) */
|
||
function metaToNames(meta: MetadataRow): Partial<Record<ColumnMetaType, string>> {
|
||
const out: Partial<Record<ColumnMetaType, string>> = {};
|
||
for (const [idx, colName] of meta.columnNames.entries()) {
|
||
const type = meta.columnMeta[idx];
|
||
if (!type || out[type]) continue; // skip untyped or already-seen types
|
||
out[type] = colName;
|
||
}
|
||
return out;
|
||
}
|
||
|
||
// ── table row builder ─────────────────────────────────────────────────────────
|
||
|
||
const TOP_SENDERS = 3;
|
||
const MAX_SENDER_LEN = 20;
|
||
|
||
interface SummaryRow {
|
||
type: string;
|
||
id: string;
|
||
n: string;
|
||
dates: string;
|
||
senders: string;
|
||
geo: string;
|
||
tags: string;
|
||
}
|
||
|
||
function buildSummaryRow(db: DatabaseSync, meta: MetadataRow): SummaryRow {
|
||
const { id, perRowTags } = meta;
|
||
const typeMap = metaToNames(meta);
|
||
const n = rowCount(db, id);
|
||
|
||
// ── shape label ─────────────────────────────────────────────────────────────
|
||
let type: string;
|
||
if (typeMap.sender && typeMap.isodatetime) type = 'chat';
|
||
else if (typeMap.isodatetime) type = 'time';
|
||
else type = 'static';
|
||
if (typeMap.lat && typeMap.lng) type += '+geo';
|
||
|
||
// ── datetime range ──────────────────────────────────────────────────────────
|
||
const dates = typeMap.isodatetime ? datetimeRange(db, id, typeMap.isodatetime) : '';
|
||
|
||
// ── sender info ─────────────────────────────────────────────────────────────
|
||
let senders = '';
|
||
if (typeMap.sender) {
|
||
const { distinct, top } = topValues(db, id, typeMap.sender, TOP_SENDERS);
|
||
const topStr = top
|
||
.map(r => {
|
||
const name = r.v.length > MAX_SENDER_LEN ? r.v.slice(0, MAX_SENDER_LEN - 1) + '…' : r.v;
|
||
return `${name}×${r.c}`;
|
||
})
|
||
.join(', ');
|
||
senders = `${distinct} [${topStr}]`;
|
||
}
|
||
|
||
// ── geo ranges ──────────────────────────────────────────────────────────────
|
||
let geo = '';
|
||
const latR = typeMap.lat ? numericRange(db, id, typeMap.lat) : null;
|
||
const lngR = typeMap.lng ? numericRange(db, id, typeMap.lng) : null;
|
||
if (latR && lngR) {
|
||
geo = `[${fmt(latR.lo)}..${fmt(latR.hi)}] [${fmt(lngR.lo)}..${fmt(lngR.hi)}]`;
|
||
} else if (latR) {
|
||
geo = `lat=[${fmt(latR.lo)}..${fmt(latR.hi)}]`;
|
||
}
|
||
|
||
return { type, id, n: String(n), dates, senders, geo, tags: perRowTags ?? '' };
|
||
}
|
||
|
||
// ── CSV output ────────────────────────────────────────────────────────────────
|
||
|
||
const COLUMNS: { key: keyof SummaryRow; header: string }[] = [
|
||
{ key: 'type', header: 'type' },
|
||
{ key: 'id', header: 'id' },
|
||
{ key: 'n', header: 'n' },
|
||
{ key: 'dates', header: 'dates' },
|
||
{ key: 'senders', header: 'senders' },
|
||
{ key: 'geo', header: 'lat / lng' },
|
||
{ key: 'tags', header: 'tags' },
|
||
];
|
||
|
||
function printCSV(summaryRows: SummaryRow[]) {
|
||
const records = [
|
||
COLUMNS.map(c => c.header),
|
||
...summaryRows.map(row => COLUMNS.map(c => row[c.key])),
|
||
];
|
||
process.stdout.write(stringify(records));
|
||
}
|
||
|
||
// ── main ─────────────────────────────────────────────────────────────────────
|
||
|
||
async function main() {
|
||
process.stderr.write(`${elapsed()} - Building targets\n`);
|
||
const targets = await execPaths([
|
||
{path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()}
|
||
// {path: "/home/cobertos/Seafile/archive/ExportedServiceData/fitbit/FullHumanName", op: fitbit()}
|
||
]);
|
||
process.stderr.write(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets\n`);
|
||
|
||
const db = await loadTaskInNewDb(targets);
|
||
|
||
const rows = db.prepare(
|
||
`SELECT id, perRowTags, columnMeta FROM base_data_manager_metadata ORDER BY id`
|
||
).all() as Record<string, SQLOutputValue>[];
|
||
|
||
const summaryRows: SummaryRow[] = [];
|
||
for (const row of rows) {
|
||
const meta = parseMetadataRow(db, row);
|
||
if (!meta) continue;
|
||
summaryRows.push(buildSummaryRow(db, meta));
|
||
}
|
||
|
||
printCSV(summaryRows);
|
||
|
||
db.close();
|
||
}
|
||
|
||
if (process.argv[1] === __filename) {
|
||
main();
|
||
}
|