225 lines
8.6 KiB
TypeScript
225 lines
8.6 KiB
TypeScript
import { type SQLOutputValue, type DatabaseSync } from "node:sqlite";
|
|
import { createWriteStream } from 'node:fs';
|
|
import { fileURLToPath } from "node:url";
|
|
import "./data-export/facebook.ts";
|
|
import { facebook } from "./data-export/facebook.ts";
|
|
import { execPaths, COLUMN_TYPES } from "./data-export/task.ts";
|
|
import * as DataIO from "./data-export/io.ts";
|
|
import {
|
|
startTime,
|
|
elapsed,
|
|
loadTaskInNewDb
|
|
} from "./main.ts";
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
|
|
function dumpDBTableToCSV(db: DatabaseSync, tableName: string, outputFile: string) {
|
|
const stream = createWriteStream(outputFile);
|
|
const stmt = db.prepare(`SELECT * FROM ${tableName}`);
|
|
|
|
let headerWritten = false;
|
|
for (const row of stmt.iterate()) {
|
|
if (!headerWritten) {
|
|
stream.write(Object.keys(row).join(',') + '\n');
|
|
headerWritten = true;
|
|
}
|
|
stream.write(Object.values(row).map(v => `"${String(v ?? '').replace(/"/g, '""')}"`).join(',') + '\n');
|
|
}
|
|
|
|
stream.end();
|
|
}
|
|
function getColumnNames(db: DatabaseSync, tableName: string) {
|
|
return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[];
|
|
}
|
|
function templateToSql(template: string, columns: string[]) {
|
|
// Convert '{0}, {1}' to '%s, %s'
|
|
const args: string[] = [];
|
|
const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => {
|
|
args.push(columns[parseInt(index)]);
|
|
return '%s';
|
|
});
|
|
return `printf('${sqlTemplate}', ${args.join(', ')})`;
|
|
}
|
|
function sqlLiteral(str: string | undefined | null): string {
|
|
if (str === null || str === undefined) {
|
|
return 'NULL';
|
|
}
|
|
|
|
// Escape single quotes by doubling them
|
|
const escaped = str.replace(/'/g, "''");
|
|
|
|
// Wrap in single quotes
|
|
return `'${escaped}'`;
|
|
}
|
|
|
|
async function main() {
|
|
// Configure the tasks to run
|
|
console.log(`${elapsed()} - Building targets`);
|
|
const targets = await execPaths([
|
|
{path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()}
|
|
]);
|
|
console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`);
|
|
const db = await loadTaskInNewDb(targets);
|
|
|
|
// New output tables
|
|
db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`);
|
|
|
|
//(message, email, note,
|
|
// social, location, media, event, document,
|
|
// bookmark; defaults to note)
|
|
|
|
type ColumnMetaType = (keyof typeof COLUMN_TYPES);
|
|
interface MetadataRow {
|
|
id: string,
|
|
perRowDescription?: string,
|
|
perRowTags?: string,
|
|
columnMeta: ColumnMetaType[],
|
|
columnNames: string[],
|
|
metaId?: string
|
|
}
|
|
function verifyMetdataRow(input: Record<string, SQLOutputValue>): undefined | MetadataRow {
|
|
const { id, perRowDescription, perRowTags, columnMeta: columnMetaCSV, metaId } = input;
|
|
if (!id) {
|
|
console.error("Row did not have id/tableName, skipping");
|
|
return undefined;
|
|
}
|
|
if (typeof id !== "string") {
|
|
console.error(`Id must be string, got ${typeof id}, ${id}`);
|
|
return undefined;
|
|
}
|
|
if (!columnMetaCSV) {
|
|
console.warn(`${id} did not have columnMeta, nothing to do. Skipping`);
|
|
return undefined; // No column information
|
|
}
|
|
if (typeof columnMetaCSV !== "string") {
|
|
console.warn(`${id} did not have columnMeta of type string. Skipping`);
|
|
return undefined;
|
|
}
|
|
const columnMeta = columnMetaCSV.split(",") as ColumnMetaType[];
|
|
|
|
// Get the column names from the table id
|
|
const columnNames = getColumnNames(db, id);
|
|
if (columnNames.length !== columnMeta.length) {
|
|
console.error(`columnNames and columnMeta did not have same length. skipping`);
|
|
return undefined;
|
|
}
|
|
|
|
if (typeof perRowDescription !== "string" && perRowDescription !== undefined && perRowDescription !== null) {
|
|
console.warn(`Invalid typeof perRowDescription, was ${typeof perRowDescription}, value ${perRowDescription}`);
|
|
return undefined;
|
|
}
|
|
if (typeof perRowTags !== "string" && perRowTags !== undefined && perRowTags !== null) {
|
|
console.warn(`Invalid typeof perRowTags, was ${typeof perRowTags}, value ${perRowTags}`);
|
|
return undefined;
|
|
}
|
|
if (typeof metaId !== "string" && metaId !== undefined && metaId !== null) {
|
|
console.warn(`Invalid typeof metaId, was ${typeof metaId}, value ${metaId}`);
|
|
return undefined;
|
|
}
|
|
|
|
return {
|
|
id,
|
|
perRowDescription: perRowDescription ?? undefined,
|
|
perRowTags: perRowTags ?? undefined,
|
|
columnMeta,
|
|
columnNames,
|
|
metaId: metaId ?? undefined
|
|
};
|
|
}
|
|
|
|
/**Maps columnMeta names to the column names*/
|
|
function metaToNames(meta: MetadataRow): Partial<Record<ColumnMetaType, string>> {
|
|
const out: Partial<Record<ColumnMetaType, string>> = {};
|
|
for (const [idx, name] of meta.columnNames.entries()) {
|
|
const metaName = meta.columnMeta[idx];
|
|
if (out[metaName]) {
|
|
console.warn(`Duplicate column with metaName "${metaName}". The current one which will be used is "${out[metaName]}". Skipping the duplicate.`);
|
|
continue;
|
|
}
|
|
out[metaName] = name;
|
|
}
|
|
return out;
|
|
}
|
|
function metaParts(metaNameToColumnName: Partial<Record<ColumnMetaType, string>>): Record<ColumnMetaType, string> {
|
|
const out: Record<ColumnMetaType, string> = {} as any;
|
|
for (const type of Object.keys(COLUMN_TYPES) as ColumnMetaType[]) {
|
|
if (!metaNameToColumnName[type]) {
|
|
out[type] = "NULL";
|
|
continue;
|
|
}
|
|
// Wrap in brackets so column names like "from" don't cause any issues
|
|
out[type] = `[${metaNameToColumnName[type]}]`
|
|
}
|
|
return out;
|
|
}
|
|
|
|
// Iterate over all the tables and their metadata
|
|
const statement = db.prepare(`SELECT id, perRowDescription, perRowTags, columnMeta, metaId FROM base_data_manager_metadata`);
|
|
for (const row of statement.iterate()) {
|
|
const verified = verifyMetdataRow(row);
|
|
if (!verified) {
|
|
continue;
|
|
}
|
|
const { id, perRowDescription, perRowTags, columnMeta, columnNames, metaId } = verified;
|
|
const metaNameToColumnName = metaToNames(verified);
|
|
const part = metaParts(metaNameToColumnName);
|
|
|
|
// Now find what to insert into each row of the combined
|
|
// Per row tags is an string of csv'd items but needs to be made a literal
|
|
// TODO: Make this either a template string or have jq do something
|
|
// tagsPart = templateToSqlExpr(target.perRowTags, columnNames);
|
|
const tagsPart = sqlLiteral(perRowTags);
|
|
|
|
// Choose what to do with this table based on what meta is present
|
|
if (
|
|
!!metaNameToColumnName.sender
|
|
&& !!metaNameToColumnName.isodatetime
|
|
) {
|
|
if (!metaId) {
|
|
console.warn(`Chat ${id} with .sender but no .metaId. Skipping`);
|
|
continue;
|
|
}
|
|
|
|
// First pull the name of the conversation out of the metaId
|
|
const receiverThreadTitle = db.prepare(`SELECT title FROM ${metaId} WHERE (id=${sqlLiteral(id)})`).get()?.title;
|
|
if (!receiverThreadTitle || typeof receiverThreadTitle !== "string") {
|
|
console.warn(`Chat ${id} with .metaId ${metaId} returned invalid receiverThreadTitle ${typeof receiverThreadTitle}. Skipping`);
|
|
continue;
|
|
}
|
|
const receiverPart = sqlLiteral(receiverThreadTitle);
|
|
|
|
// Put this table into the combined table
|
|
db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${part.text}, 'message', ${part.sender}, ${receiverPart}, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`);
|
|
}
|
|
else if (!!metaNameToColumnName.isodatetime) {
|
|
// Put this table into the combined table
|
|
let descriptionPart = perRowDescription
|
|
? templateToSql(perRowDescription, columnNames)
|
|
: `'An entry from the ${id} table'`; // Default is just kinda garbo...
|
|
db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${descriptionPart}, 'node', NULL, NULL, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`);
|
|
}
|
|
else {
|
|
console.warn(`Table with id ${id} had no isodatetime or anything else of value, skipping...`);
|
|
}
|
|
}
|
|
|
|
const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()?.count;
|
|
console.log(`${elapsed()} - Combined database built with ${count} rows`);
|
|
|
|
// Dump it to the disk for debugging
|
|
const sqlitePath = "debug_your.csv.db";
|
|
console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`);
|
|
await DataIO.dumpDBToDisk(db, sqlitePath);
|
|
|
|
console.log(`${elapsed()} - Database written to disk`);
|
|
|
|
// Dump it all to the path specified
|
|
dumpDBTableToCSV(db, "combined", "your.csv");
|
|
console.log(`${elapsed()} - Combined database written to disk as CSV`);
|
|
db.close();
|
|
}
|
|
|
|
if (process.argv[1] === __filename) {
|
|
main();
|
|
}
|
|
|