Compare commits
4 commits
9c3bdaa100
...
c093fbfcee
| Author | SHA1 | Date | |
|---|---|---|---|
| c093fbfcee | |||
| 7d815833e6 | |||
| a4fbe1618d | |||
| f6d0427a45 |
60 changed files with 5399 additions and 1421 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -1,4 +1,5 @@
|
|||
node_modules/
|
||||
your.db
|
||||
*.db
|
||||
your.csv
|
||||
.gitSAFE
|
||||
*.DELETE-THIS-HAS-PII
|
||||
25
README.md
Normal file
25
README.md
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# base-data-manager
|
||||
|
||||
A Typescript project for parsing through many types of data exports to tabular formats
|
||||
|
||||
** This is heavily WIP, and mostly just a toy for myself **
|
||||
|
||||
### Installation
|
||||
|
||||
* Install `jq`
|
||||
* Install sqlite `csv.so` extension (Hardcoded to `/home/cobertos/sqlite-files/` currently)
|
||||
* Install `node` + `pnpm i`
|
||||
* See `main.ts` for current example usage
|
||||
|
||||
|
||||
### Proposed Architecture
|
||||
|
||||
The architecture runs in 2 steps.
|
||||
|
||||
The first step is unopinionated in it's output format. It's meant to take the source data exactly as-is and output it as csv. All source data should pass through, but will be normalized in csv
|
||||
|
||||
**TODO: It's not completely unopinionated, there is some normalization for names of columns I think we want to apply? Or maybe we apply that later...**
|
||||
|
||||
An optional second step combines everything into a single SQLite database. From here we normalize many different types of data across multiple exports into a single opinionated output. For example, message threads/channels should all have the same table format, or end up in the same table
|
||||
|
||||
**TODO: No idea if the second part should be a part of this project... but it currently is**
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,105 +1,115 @@
|
|||
import { TaskTargetPipelineHelper } from "./task.ts";
|
||||
import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
|
||||
import { htmlSelectorChunkedDuplex } from "./html.ts";
|
||||
|
||||
export function google(this: TaskTargetPipelineHelper){
|
||||
const p = this.assignMeta({ idValue: t=>`Google - ${t.basename}` }); // Generic ID for everything in here
|
||||
const col: Set<TaskTargetPipelineHelper> = new Set();
|
||||
|
||||
// TODO: There is a root takeout folder
|
||||
export function google(){
|
||||
return pipe(
|
||||
// Generic ID for everything in here
|
||||
assignMeta({ idValue: t=>`Google - ${t.basename}` }),
|
||||
branchGen(function*() {
|
||||
// TODO: There is a root takeout folder
|
||||
|
||||
|
||||
p.collect(col).cd('Access Log Activity/Activities - A list of Google services accessed by.csv').read()
|
||||
p.collect(col).cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv').read()
|
||||
yield pipe(cd('Access Log Activity/Activities - A list of Google services accessed by.csv'), read())
|
||||
yield pipe(cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv'), read())
|
||||
|
||||
// Assignments - data was empty
|
||||
// Business messages - GMB messages, there's some but so far outside of what I want
|
||||
// TODO: Calendar, exports an .ics
|
||||
// Assignments - data was empty
|
||||
// Business messages - GMB messages, there's some but so far outside of what I want
|
||||
// TODO: Calendar, exports an .ics
|
||||
|
||||
// a = t.fork().cd(`Chrome`)
|
||||
// TODO: Assersses and mode.json
|
||||
// TODO: Bookmarks.csv
|
||||
// TODO: Device Information.json
|
||||
// TODO: Dictionary.csv
|
||||
// TODO: ...
|
||||
p.collect(col).cd('Chrome/History.json')
|
||||
.read()
|
||||
// TODO: Typed Url", no data
|
||||
// TODO: "session", complex data
|
||||
// Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
|
||||
// TODO: time_usec IS WRONG!! Needs to be ms
|
||||
.cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
|
||||
(
|
||||
."Browser History"[]
|
||||
| [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
|
||||
)
|
||||
| @csv`])
|
||||
// a = t.fork().cd(`Chrome`)
|
||||
// TODO: Assersses and mode.json
|
||||
// TODO: Bookmarks.csv
|
||||
// TODO: Device Information.json
|
||||
// TODO: Dictionary.csv
|
||||
// TODO: ...
|
||||
yield pipe(
|
||||
cd('Chrome/History.json'),
|
||||
read(),
|
||||
// TODO: Typed Url", no data
|
||||
// TODO: "session", complex data
|
||||
// Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
|
||||
// TODO: time_usec IS WRONG!! Needs to be ms
|
||||
cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
|
||||
(
|
||||
."Browser History"[]
|
||||
| [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
|
||||
)
|
||||
| @csv
|
||||
`])
|
||||
);
|
||||
|
||||
// TODO: Contactss, exports an .vcf
|
||||
// TODO: ...
|
||||
// TODO: Contactss, exports an .vcf
|
||||
// TODO: ...
|
||||
|
||||
// a = t.fork().cd(`Google Pay`)
|
||||
p.collect(col).cd(`Google Pay/Google transactions`).glob(`transactions_*.csv`)
|
||||
.read()
|
||||
// .fork("a").cd(`Money sends and requests`)
|
||||
// .fork().cd(`Money sends and requests.csv`)
|
||||
// .read()
|
||||
// .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
|
||||
// TODO: One more folder, and it only has a pdf
|
||||
// a = t.fork().cd(`Google Pay`)
|
||||
yield pipe(
|
||||
cd(`Google Pay/Google transactions`),
|
||||
glob(`transactions_*.csv`),
|
||||
read(),
|
||||
// .fork("a").cd(`Money sends and requests`)
|
||||
// .fork().cd(`Money sends and requests.csv`)
|
||||
// .read()
|
||||
// .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
|
||||
// TODO: One more folder, and it only has a pdf
|
||||
);
|
||||
|
||||
// TODO: Google Play Movies _ TV - no data
|
||||
// TODO: ...
|
||||
// TODO: Google Play Movies _ TV - no data
|
||||
// TODO: ...
|
||||
|
||||
p.collect(col).cd("Location History/Location History.json")
|
||||
.read()
|
||||
// TODO: This is missing
|
||||
// "altitude" : 158,
|
||||
// "verticalAccuracy" : 68
|
||||
// and the activity models. I had no idea google tries to determine if I'm "tilting"
|
||||
.cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
|
||||
(
|
||||
.locations[]
|
||||
| [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
|
||||
)
|
||||
| @csv`])
|
||||
// There's also the semantic history but that's an entire nother can of worms
|
||||
// it seems like
|
||||
yield pipe(
|
||||
cd("Location History/Location History.json"),
|
||||
read(),
|
||||
// TODO: This is missing
|
||||
// "altitude" : 158,
|
||||
// "verticalAccuracy" : 68
|
||||
// and the activity models. I had no idea google tries to determine if I'm "tilting"
|
||||
cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
|
||||
(
|
||||
.locations[]
|
||||
| [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
|
||||
)
|
||||
| @csv
|
||||
`])
|
||||
);
|
||||
// There's also the semantic history but that's an entire nother can of worms
|
||||
// it seems like
|
||||
|
||||
// TODO: Needs no-headers!
|
||||
// a = t.fork().cd(`My Activity`)
|
||||
// a.fork().glob(`**/MyActivity.html`)
|
||||
// .setId(t=>`Google - ${t.basenameN(2)}`)
|
||||
// .read()
|
||||
// .pipe(()=>{
|
||||
// // Parses the MyActivity format, chunking it into pieces of HTML text
|
||||
// // and then parsing out the text
|
||||
// const dup = htmlSelectorChunkedDuplex(
|
||||
// (tag, attrs)=>{
|
||||
// // TODO: We also probably want to get and parse each
|
||||
// // ".content-cell.mdl-typography--caption" as well (it
|
||||
// // has location for websearches and sometimes a details field)
|
||||
// // but then we have to get ".mdl-grid" and parse it
|
||||
// return attrs.class?.includes("content-cell")
|
||||
// && attrs.class?.includes("mdl-typography--body-1")
|
||||
// && !attrs.class?.includes("mdl-typography--text-right")
|
||||
// },
|
||||
// (chunk)=>{
|
||||
// const text = chunk.innerText;
|
||||
// const split = text.split("\n");
|
||||
// const timestamp = split.pop(); // TODO: need to parse this
|
||||
// const rest = split.join("\n");
|
||||
// // TODO: Escape instead of replace
|
||||
// const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes
|
||||
// // Return a CSV
|
||||
// return `"${restSafe}","${timestamp}"\n`;
|
||||
// }
|
||||
// );
|
||||
// return dup;
|
||||
// })
|
||||
// TODO: Needs no-headers!
|
||||
// a = t.fork().cd(`My Activity`)
|
||||
// a.fork().glob(`**/MyActivity.html`)
|
||||
// .setId(t=>`Google - ${t.basenameN(2)}`)
|
||||
// .read()
|
||||
// .pipe(()=>{
|
||||
// // Parses the MyActivity format, chunking it into pieces of HTML text
|
||||
// // and then parsing out the text
|
||||
// const dup = htmlSelectorChunkedDuplex(
|
||||
// (tag, attrs)=>{
|
||||
// // TODO: We also probably want to get and parse each
|
||||
// // ".content-cell.mdl-typography--caption" as well (it
|
||||
// // has location for websearches and sometimes a details field)
|
||||
// // but then we have to get ".mdl-grid" and parse it
|
||||
// return attrs.class?.includes("content-cell")
|
||||
// && attrs.class?.includes("mdl-typography--body-1")
|
||||
// && !attrs.class?.includes("mdl-typography--text-right")
|
||||
// },
|
||||
// (chunk)=>{
|
||||
// const text = chunk.innerText;
|
||||
// const split = text.split("\n");
|
||||
// const timestamp = split.pop(); // TODO: need to parse this
|
||||
// const rest = split.join("\n");
|
||||
// // TODO: Escape instead of replace
|
||||
// const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes
|
||||
// // Return a CSV
|
||||
// return `"${restSafe}","${timestamp}"\n`;
|
||||
// }
|
||||
// );
|
||||
// return dup;
|
||||
// })
|
||||
|
||||
// TODO: News
|
||||
// TODO: Profile
|
||||
// TODO: Tasks - No data
|
||||
|
||||
return Array.from(col);
|
||||
// TODO: News
|
||||
// TODO: Profile
|
||||
// TODO: Tasks - No data
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
|
|
|
|||
52
data-export/io.ts
Normal file
52
data-export/io.ts
Normal file
|
|
@ -0,0 +1,52 @@
|
|||
import fs from 'node:fs/promises';
|
||||
import fsSync from 'node:fs';
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
import { type ProcessOutputAggregate, type RunOutput, TaskTarget, runAll, type ProcessOutputSimple } from "./task.ts";
|
||||
import { ProcessOutput } from 'zx';
|
||||
|
||||
|
||||
async function loadCSVTable(
|
||||
db: DatabaseSync,
|
||||
target: TaskTarget,
|
||||
result: ProcessOutput | ProcessOutputAggregate | ProcessOutputSimple
|
||||
) {
|
||||
const id = target.id;
|
||||
const table = id;
|
||||
const tmpPath = `/tmp/${id}.csv`;
|
||||
// console.log(`Writing ${tmpPath}`);
|
||||
const fd = await fs.open(tmpPath, 'w');
|
||||
await fs.writeFile(fd, result.stdout, { encoding: 'utf8' });
|
||||
await fd.close();
|
||||
// console.log(`Loading ${tmpPath} → table ${table}`);
|
||||
|
||||
db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${tmpPath}', header);`);
|
||||
db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`);
|
||||
db.exec(`DROP TABLE IF EXISTS intermediate;`);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: This should really have the same name throughout the codebase?
|
||||
export const runPipeline = runAll;
|
||||
|
||||
/**
|
||||
* @param db Must be a DatabaseSync with the csv.so extension enabled
|
||||
*/
|
||||
export async function loadIntoDb(db: DatabaseSync, runOutput: RunOutput[]) {
|
||||
for (const {result, target} of runOutput) {
|
||||
await loadCSVTable(db, target, result);
|
||||
}
|
||||
}
|
||||
export function getDefaultDB(): DatabaseSync {
|
||||
const db = new DatabaseSync(":memory:", { allowExtension: true });
|
||||
db.loadExtension("/home/cobertos/sqlite-files/csv.so")
|
||||
db.enableLoadExtension(false);
|
||||
return db;
|
||||
}
|
||||
export async function dumpDBToDisk(db: DatabaseSync, dumpPath: string) {
|
||||
if (fsSync.existsSync(dumpPath)) {
|
||||
await fs.unlink(dumpPath); // unlink the old
|
||||
}
|
||||
|
||||
// Dump it all to the path specified
|
||||
db.exec(`VACUUM main INTO '${dumpPath}'`);
|
||||
}
|
||||
|
|
@ -1,15 +1,18 @@
|
|||
import { $, type ProcessOutput } from 'zx';
|
||||
import os from 'os';
|
||||
import { type TaskTarget, run } from "./task.ts";
|
||||
|
||||
$.verbose = false;
|
||||
|
||||
export async function parallel(
|
||||
targets: TaskTarget[],
|
||||
/**Generic parallel runner with optional logging
|
||||
* Runs `targets` with `runFn` up to a maximum of `maxConcurrency` amount at a time
|
||||
* Shaped in a way that expects generally something that returns zx.ProcessOutput (or
|
||||
* something with .duration and .ok built-in to the return)
|
||||
* @param runFn Should NOT throw. Return { ok: false } instead
|
||||
*/
|
||||
export async function parallel<T, R extends { duration: number, ok: boolean }>(
|
||||
targets: T[],
|
||||
runFn: (t: T)=>Promise<R>,
|
||||
quiet: boolean = false,
|
||||
maxConcurrency: number = os.cpus().length
|
||||
): Promise<ProcessOutput[]> {
|
||||
const resultMap = new Map<string, ProcessOutput>();
|
||||
): Promise<R[]> {
|
||||
const resultMap = new Map<T, R>();
|
||||
|
||||
const total = targets.length;
|
||||
let completed = 0;
|
||||
|
|
@ -40,14 +43,14 @@ export async function parallel(
|
|||
process.stderr.write(`\r${formatEta()}`.padEnd(80));
|
||||
}
|
||||
|
||||
async function runJob(t: TaskTarget): Promise<void> {
|
||||
async function runJob(t: T): Promise<void> {
|
||||
running++;
|
||||
printStatus();
|
||||
|
||||
const result = await run(t);
|
||||
const result = await runFn(t);
|
||||
completionTimes.push(result.duration);
|
||||
|
||||
resultMap.set(t.id, result);
|
||||
resultMap.set(t, result);
|
||||
|
||||
running--;
|
||||
completed++;
|
||||
|
|
@ -76,13 +79,15 @@ export async function parallel(
|
|||
process.stderr.write('\n');
|
||||
const totalSeconds = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||
const failed = Array.from(resultMap.values().filter(p => !p.ok));
|
||||
process.stderr.write(
|
||||
`\nCompleted ${total} jobs in ${totalSeconds}s (${failed.length} failed)\n`
|
||||
);
|
||||
if (!quiet) {
|
||||
process.stderr.write(
|
||||
`\nCompleted ${total} jobs in ${totalSeconds}s (${failed.length} failed)\n`
|
||||
);
|
||||
}
|
||||
|
||||
const output = targets
|
||||
.map(t => {
|
||||
const r = resultMap.get(t.id)!;
|
||||
const r = resultMap.get(t)!;
|
||||
return r;
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,10 @@ import fs from 'node:fs';
|
|||
import { strict as assert } from "node:assert";
|
||||
import { ZipFS } from "./zipFs.ts";
|
||||
import { globSync } from "glob";
|
||||
import { $, ProcessPromise, quote } from "zx";
|
||||
import { $, ProcessOutput, quote } from "zx";
|
||||
import { parallel } from "./parallel.ts";
|
||||
|
||||
$.verbose = false;
|
||||
|
||||
type FSImpl = {
|
||||
isZip?: boolean;
|
||||
|
|
@ -38,19 +41,20 @@ function safe(s: string) {
|
|||
|
||||
interface TaskTargetOp {
|
||||
type: "read" | "mid";
|
||||
toShell(target: TaskTarget): string;
|
||||
toShell(target: TaskTarget): string | undefined;
|
||||
clone(): TaskTargetOp;
|
||||
}
|
||||
class TaskTargetRead implements TaskTargetOp {
|
||||
get type(){ return "read" as const; }
|
||||
toShell(target: TaskTarget) {
|
||||
if (target.fsImpl.isZip) {
|
||||
// Read the file to stdout from the target inside the zip file
|
||||
// This relies on the internals of fsImpl a bit to have the path to
|
||||
// the root zip so we can create a command against it
|
||||
assert(target.fsImpl.zipPath, "Should have a zipPath");
|
||||
// We need to be able to do this
|
||||
return `7z x ${quote(target.fsImpl.zipPath)} -so ${quote(target.path)}`;
|
||||
}
|
||||
|
||||
// TODO : Implement when reading from a zip file
|
||||
return `cat ${quote(target.path)}`;
|
||||
}
|
||||
clone() {
|
||||
|
|
@ -115,19 +119,10 @@ export const COLUMN_TYPES = {
|
|||
"TODO": {}
|
||||
};
|
||||
|
||||
// // if (type === "numeric") {
|
||||
// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
|
||||
// // }
|
||||
// // else {
|
||||
// // queryLine = `count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
|
||||
// // }
|
||||
|
||||
/**Column metadata. Just a string into the TYPES*/
|
||||
type ColumnMeta = (keyof typeof COLUMN_TYPES | undefined);
|
||||
// Make non-optional version of just the metadata values of TaskTarget
|
||||
type TaskTargetMeta = Required<Pick<TaskTarget, "idValue" | "perRowDescription" | "perRowTags" | "columnMeta">>;
|
||||
type TaskTargetMeta = Required<Pick<TaskTarget, "idValue" | "perRowDescription" | "perRowTags" | "columnMeta" | "aggregate" | "metaIdValue" | "aggregateColumns">>;
|
||||
|
||||
export class TaskTarget {
|
||||
/**The current path pointed to by this TaskTarget*/
|
||||
|
|
@ -149,15 +144,16 @@ export class TaskTarget {
|
|||
* you might do something like '"{3}" sent from {2} to {1}'
|
||||
* */
|
||||
perRowDescription?: string;
|
||||
/**For every output CSV, this defines a SQL expression evaluated per-row that
|
||||
* returns a comma-separated string of tags to assign to that row.
|
||||
* Use the items {0}, {1} to template column values, same as perRowDescription.
|
||||
* Example: A static set of tags: "'me,facebook'"
|
||||
* Example: Tags derived from a column: "'facebook,' || {2}"
|
||||
* */
|
||||
/**A CSV of tags that is added to every row of the table (TODO: no template functionality currently)*/
|
||||
perRowTags?: string;
|
||||
/**Metadata about the columns*/
|
||||
columnMeta?: ColumnMeta[];
|
||||
/**Whether or not to aggregate to a single task (everything with the id value idValue)*/
|
||||
aggregate?: boolean;
|
||||
/**Names of the columns to aggregate with*/
|
||||
aggregateColumns?: string[];
|
||||
/**A metadata TaskTarget for this TaskTarget, if one exists*/
|
||||
metaIdValue?: ValidId;
|
||||
|
||||
constructor(path: string){
|
||||
this.path = path;
|
||||
|
|
@ -194,6 +190,15 @@ export class TaskTarget {
|
|||
}
|
||||
return safe(this.idValue);
|
||||
}
|
||||
get metaId() {
|
||||
if (!this.metaIdValue) {
|
||||
return undefined;
|
||||
}
|
||||
if (typeof this.metaIdValue === "function") {
|
||||
return safe(this.metaIdValue(this));
|
||||
}
|
||||
return safe(this.metaIdValue);
|
||||
}
|
||||
|
||||
/**Changes the current directory of the target*/
|
||||
cd(path: string): TaskTarget {
|
||||
|
|
@ -233,6 +238,9 @@ export class TaskTarget {
|
|||
t.perRowDescription = this.perRowDescription;
|
||||
t.perRowTags = this.perRowTags;
|
||||
t.columnMeta = this.columnMeta?.slice();
|
||||
t.metaIdValue = this.metaIdValue;
|
||||
t.aggregate = this.aggregate;
|
||||
t.aggregateColumns = this.aggregateColumns?.slice();
|
||||
return t;
|
||||
}
|
||||
|
||||
|
|
@ -247,6 +255,7 @@ export class TaskTarget {
|
|||
toShell() {
|
||||
const shell = this.pipeline
|
||||
.map(p => p.toShell(this))
|
||||
.filter(p => !!p) // remove empty strings and undefined
|
||||
.join(" | ")
|
||||
return shell;
|
||||
}
|
||||
|
|
@ -269,42 +278,72 @@ export class TaskTarget {
|
|||
}
|
||||
}
|
||||
|
||||
export function each(targets: TaskTarget[], fn: (t: TaskTarget)=>void) {
|
||||
for (const t of targets) {
|
||||
fn(t);
|
||||
}
|
||||
export interface PipelineOp {
|
||||
(targets: TaskTarget[]): TaskTarget[] | Promise<TaskTarget[]>;
|
||||
}
|
||||
export function map(targets: TaskTarget[], fn: (t: TaskTarget)=>TaskTarget) {
|
||||
const newTargets = [];
|
||||
for (const t of targets) {
|
||||
newTargets.push(fn(t));
|
||||
}
|
||||
return newTargets;
|
||||
|
||||
export function cd(path: string): PipelineOp {
|
||||
return (targets: TaskTarget[]) => targets.map(t => t.clone().cd(path));
|
||||
}
|
||||
export function cd(targets: TaskTarget[], path: string): TaskTarget[] {
|
||||
return targets.map(t => t.clone().cd(path));
|
||||
export function glob(globPath: string): PipelineOp {
|
||||
return (targets: TaskTarget[]) => targets.map(t => t.glob(globPath)).flat();
|
||||
}
|
||||
export function glob(targets: TaskTarget[], globPath: string): TaskTarget[] {
|
||||
return targets.map(t => t.glob(globPath)).flat();
|
||||
export function unzip(): PipelineOp {
|
||||
return async (targets: TaskTarget[]) => Promise.all(targets.map(t => t.unzip()));
|
||||
}
|
||||
export async function unzip(targets: TaskTarget[]): Promise<TaskTarget[]> {
|
||||
return Promise.all(targets.map(t => t.unzip()));
|
||||
export function read(): PipelineOp {
|
||||
return (targets: TaskTarget[]) => targets.map(t => t.clone().read())
|
||||
}
|
||||
export function read(targets: TaskTarget[]): TaskTarget[] {
|
||||
return targets.map(t => t.clone().read())
|
||||
export function cmd(cmd: ValidCmd): PipelineOp {
|
||||
return (targets: TaskTarget[]) => targets.map(t => t.clone().cmd(cmd))
|
||||
}
|
||||
export function cmd(targets: TaskTarget[], cmd: ValidCmd): TaskTarget[] {
|
||||
return targets.map(t => t.clone().cmd(cmd))
|
||||
export function assignMeta(meta: Partial<TaskTargetMeta>): PipelineOp {
|
||||
return (targets: TaskTarget[]) => targets.map(t => t.clone().assignMeta(meta))
|
||||
}
|
||||
export function assignMeta(targets: TaskTarget[], meta: Partial<TaskTargetMeta>): TaskTarget[] {
|
||||
return targets.map(t => t.clone().assignMeta(meta))
|
||||
|
||||
export function each(fn: (t: TaskTarget)=>TaskTarget): PipelineOp {
|
||||
return (targets: TaskTarget[])=> targets.map(fn);
|
||||
}
|
||||
export function pipe(...ops: PipelineOp[]): PipelineOp {
|
||||
return async (targets: TaskTarget[]) => {
|
||||
for (const op of ops) {
|
||||
targets = await op(targets);
|
||||
}
|
||||
return targets;
|
||||
};
|
||||
}
|
||||
export function branch(...ops: PipelineOp[]): PipelineOp {
|
||||
return async (targets: TaskTarget[]) => {
|
||||
const targetsArrays = await Promise.all(ops.map(op => op(targets)));
|
||||
return targetsArrays.flat();
|
||||
};
|
||||
}
|
||||
export function branchGen(genFn: ()=>Generator<PipelineOp>): PipelineOp {
|
||||
const opsToBranch = Array.from(genFn());
|
||||
return (targets: TaskTarget[]) => {
|
||||
return branch(...opsToBranch)(targets);
|
||||
};
|
||||
}
|
||||
|
||||
export async function execPaths(entries: ({path: string, op: PipelineOp })[]) {
|
||||
return (await Promise.all(
|
||||
// Map every entry path into a TaskTarget and run the PipelineOp with
|
||||
// that TaskTarget
|
||||
entries
|
||||
.map(async ({path,op})=>{
|
||||
const targets = [new TaskTarget(path)];
|
||||
return await op(targets);
|
||||
})
|
||||
)).flat();
|
||||
}
|
||||
|
||||
|
||||
/**Verify, anything that fails is skipped and throws an error*/
|
||||
export async function verify(targets: TaskTarget[]) {
|
||||
const outTargets: TaskTarget[] = [];
|
||||
for (const t of targets) {
|
||||
// Make sure fsImpl is ready
|
||||
// TODO: DO NOT PUT THIS IN VERIFY, this should go somewhere in the task building stuff...
|
||||
if ("ready" in t.fsImpl && !t.fsImpl.ready && t.fsImpl.init) {
|
||||
await t.fsImpl.init();
|
||||
}
|
||||
|
|
@ -319,78 +358,133 @@ export async function verify(targets: TaskTarget[]) {
|
|||
|
||||
outTargets.push(t);
|
||||
}
|
||||
|
||||
return outTargets;
|
||||
}
|
||||
|
||||
function collectionSwap(a: TaskTargetPipelineHelper, b: TaskTargetPipelineHelper) {
|
||||
if (!a.__collection) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove a, add b
|
||||
const collection = a.__collection;
|
||||
delete a.__collection;
|
||||
collection.delete(a);
|
||||
b.__collection = collection;
|
||||
collection.add(b);
|
||||
export interface ProcessOutputAggregate {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
exitCodes: (number | null)[];
|
||||
duration: number;
|
||||
ok: boolean;
|
||||
}
|
||||
export interface ProcessOutputSimple {
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
exitCode: number;
|
||||
duration: number;
|
||||
ok: boolean;
|
||||
}
|
||||
|
||||
export class TaskTargetPipelineHelper extends Array<TaskTarget> {
|
||||
__collection?: Set<TaskTargetPipelineHelper>;
|
||||
|
||||
static pipeline(t: TaskTarget[]): TaskTargetPipelineHelper {
|
||||
if (Object.getPrototypeOf(t) === TaskTargetPipelineHelper.prototype) {
|
||||
return t as any; // Already done
|
||||
}
|
||||
Object.setPrototypeOf(t, TaskTargetPipelineHelper.prototype);
|
||||
return t as any;
|
||||
function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, t: TaskTarget, po: ProcessOutput) {
|
||||
if (!poa) {
|
||||
assert(t.aggregateColumns, "aggregate TaskTarget must have aggregateColumns");
|
||||
const headers = t.aggregateColumns.join(",") + "\n";
|
||||
return {
|
||||
stdout: headers + po.stdout,
|
||||
stderr: po.stderr,
|
||||
exitCodes: [po.exitCode],
|
||||
duration: po.duration,
|
||||
ok: po.ok
|
||||
};
|
||||
}
|
||||
|
||||
_fn(fn: (t: TaskTarget[])=>TaskTarget[]): TaskTargetPipelineHelper {
|
||||
const p = TaskTargetPipelineHelper.pipeline(this);
|
||||
const t = fn(p);
|
||||
const p2 = TaskTargetPipelineHelper.pipeline(t);
|
||||
collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection
|
||||
return p2;
|
||||
}
|
||||
async _afn(fn: (t: TaskTarget[])=>Promise<TaskTarget[]>): Promise<TaskTargetPipelineHelper> {
|
||||
const p = TaskTargetPipelineHelper.pipeline(this);
|
||||
const t = await fn(p);
|
||||
const p2 = TaskTargetPipelineHelper.pipeline(t);
|
||||
collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection
|
||||
return p2;
|
||||
}
|
||||
|
||||
cd(path: string): TaskTargetPipelineHelper {
|
||||
return this._fn(t => cd(t, path));
|
||||
}
|
||||
glob(globPath: string): TaskTargetPipelineHelper {
|
||||
return this._fn(t => glob(t, globPath));
|
||||
}
|
||||
async unzip(): Promise<TaskTargetPipelineHelper> {
|
||||
return this._afn(unzip);
|
||||
}
|
||||
read(): TaskTargetPipelineHelper {
|
||||
return this._fn(read);
|
||||
}
|
||||
cmd(_cmd: ValidCmd): TaskTargetPipelineHelper {
|
||||
return this._fn(t => cmd(t, _cmd));
|
||||
}
|
||||
assignMeta(meta: Partial<TaskTargetMeta>): TaskTargetPipelineHelper {
|
||||
return this._fn(t => assignMeta(t, meta));
|
||||
}
|
||||
|
||||
/**
|
||||
* @todo Nested versions of this don't currently work, but they could if we
|
||||
* turn __collection into an array of collections
|
||||
*/
|
||||
collect(_c: Set<TaskTargetPipelineHelper>) {
|
||||
this.__collection = _c;
|
||||
return this;
|
||||
}
|
||||
// Comes with a builtin "\n" from jq on stdout and stderr, no need to add
|
||||
// a trailing one
|
||||
poa.stdout += po.stdout;
|
||||
poa.stderr += po.stderr;
|
||||
poa.exitCodes.push(po.exitCode);
|
||||
poa.duration += po.duration;
|
||||
poa.ok &&= po.ok;
|
||||
return poa;
|
||||
}
|
||||
|
||||
export async function run(target: TaskTarget): Promise<ProcessPromise> {
|
||||
export interface RunOutput {
|
||||
target: TaskTarget,
|
||||
result: ProcessOutput | ProcessOutputAggregate | ProcessOutputSimple
|
||||
}
|
||||
|
||||
export async function run(target: TaskTarget): Promise<ProcessOutput> {
|
||||
const command = target.toShell();
|
||||
return await $({ nothrow: true })`bash -c ${command}`;
|
||||
}
|
||||
|
||||
export async function runAll(targets: TaskTarget[]): Promise<RunOutput[]> {
|
||||
const finalTargets = await verify(targets);
|
||||
const results = await parallel(finalTargets, run, true);
|
||||
|
||||
const nonAggregateTargets: TaskTarget[] = finalTargets.filter(t => !t.aggregate);
|
||||
const nonAggregateResults: RunOutput[] = [];
|
||||
const aggregateResultsMap: Record<string, RunOutput> = {};
|
||||
|
||||
// == Aggregate tables ==
|
||||
// Some TaskTargets have .aggregate: true, which means they should all be combined
|
||||
// into a single task with the id of the .id property
|
||||
for (const [idx, r] of results.entries()) {
|
||||
const t = finalTargets[idx];
|
||||
if (!t.aggregate) {
|
||||
nonAggregateResults.push({
|
||||
target: t,
|
||||
result: r
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const aggregateId = t.id;
|
||||
const prevResult = aggregateResultsMap[aggregateId]?.result;
|
||||
aggregateResultsMap[aggregateId] = {
|
||||
target: t, // Use target t for metadata, so it will use the last target
|
||||
result: combineProcessOutputAggregate(prevResult as (ProcessOutputAggregate | undefined), t, r)
|
||||
};
|
||||
}
|
||||
|
||||
// == Metadata table ==
|
||||
// Each TaskTarget has things like perRowDescription and other things we want to store
|
||||
// and output. this creates a single TaskTarget for all that perTable metadata
|
||||
function csvEscape(s: string | undefined) {
|
||||
if (s === undefined) {
|
||||
return "";
|
||||
}
|
||||
if (s.includes("\"") || s.includes(",") || s.includes("\n")) {
|
||||
return `"${s.replace(/\"/g, "\"\"")}"`;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
let metadataCSV = "id,perRowDescription,perRowTags,columnMeta,metaId\n";
|
||||
for (const t of nonAggregateTargets) {
|
||||
const tableNamePart = t.id;
|
||||
const perRowDescriptionPart = t.perRowDescription;
|
||||
const perRowTagsPart = t.perRowTags;
|
||||
const columnMetaPart = t.columnMeta?.join(",") ?? "";
|
||||
const metaIdPart = t.metaId;
|
||||
metadataCSV += [
|
||||
csvEscape(tableNamePart),
|
||||
csvEscape(perRowDescriptionPart),
|
||||
csvEscape(perRowTagsPart),
|
||||
csvEscape(columnMetaPart),
|
||||
csvEscape(metaIdPart)
|
||||
].join(",") + "\n";
|
||||
}
|
||||
// Won't be removed by verify() because we're adding it after that's used
|
||||
// TODO: Would be nice to bake this into TaskTarget/verify for tasks that dont point
|
||||
// to a real path
|
||||
const metadataTarget = new TaskTarget("<none>");
|
||||
metadataTarget
|
||||
// id, perRowDescription, perRowTags, columnMeta, metaId
|
||||
.assignMeta({
|
||||
idValue: "base_data_manager_metadata",
|
||||
columnMeta: ["any", "any", "any", "any", "any"],
|
||||
perRowTags: "internal",
|
||||
});
|
||||
const metadataResult= {
|
||||
stdout: metadataCSV,
|
||||
stderr: "",
|
||||
exitCode: 0,
|
||||
duration: 0, // TODO
|
||||
ok: true
|
||||
};
|
||||
const metadataRunOutput: RunOutput = { target: metadataTarget, result: metadataResult };
|
||||
|
||||
const aggregateResults: RunOutput[] = Object.values(aggregateResultsMap);
|
||||
return aggregateResults.concat(nonAggregateResults).concat(metadataRunOutput);
|
||||
}
|
||||
242
main.ts
242
main.ts
|
|
@ -1,192 +1,90 @@
|
|||
import fs from 'node:fs/promises';
|
||||
import fsSync from 'node:fs';
|
||||
import nodePath from "node:path";
|
||||
import { DatabaseSync } from "node:sqlite";
|
||||
import "./data-export/facebook.ts";
|
||||
import { type DatabaseSync } from "node:sqlite";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { google } from "./data-export/google.ts";
|
||||
import { TaskTargetPipelineHelper, TaskTarget, verify } from "./data-export/task.ts";
|
||||
import { parallel } from "./data-export/parallel.ts";
|
||||
import { ProcessOutput } from 'zx';
|
||||
import { facebook, facebook_v2 } from "./data-export/facebook.ts";
|
||||
import { type TaskTarget, execPaths } from "./data-export/task.ts";
|
||||
import * as DataIO from "./data-export/io.ts";
|
||||
|
||||
declare module "./data-export/task.ts" {
|
||||
interface TaskTargetPipelineHelper {
|
||||
google: typeof google;
|
||||
}
|
||||
}
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
|
||||
Object.assign(TaskTargetPipelineHelper.prototype, {
|
||||
google
|
||||
});
|
||||
export const startTime = Date.now();
|
||||
export const elapsed = ()=>`${((Date.now() - startTime) / 1000).toFixed(2)}s`;
|
||||
|
||||
async function loadCSVTable(
|
||||
db: DatabaseSync,
|
||||
target: TaskTarget,
|
||||
result: ProcessOutput
|
||||
) {
|
||||
const id = target.id;
|
||||
const table = id;
|
||||
const tmpPath = `/tmp/${id}.csv`;
|
||||
console.log(`Writing ${tmpPath}`);
|
||||
const fd = await fs.open(tmpPath, 'w');
|
||||
await fs.writeFile(fd, result.stdout, { encoding: 'utf8' });
|
||||
await fd.close();
|
||||
console.log(`Loading ${tmpPath} → table ${table}`);
|
||||
export async function loadTaskInNewDb(targets: TaskTarget[]): Promise<DatabaseSync> {
|
||||
console.log(`${elapsed()} - Run all targets`);
|
||||
const out = await DataIO.runPipeline(targets);
|
||||
console.log(`${elapsed()} - Final targets exported to CSV. Got ${out.length} targets`);
|
||||
|
||||
// const headers = lines[0].split(",");
|
||||
// const columnsSql = headers.map(h => `"${h}" TEXT`).join(", ");
|
||||
db.exec(`CREATE VIRTUAL TABLE temp.tmp_${table} USING csv(filename='${tmpPath}');`);
|
||||
// db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`);
|
||||
// db.exec(`DROP TABLE IF EXISTS intermediate;`);
|
||||
return `tmp_${table}`;
|
||||
}
|
||||
function getColumnNames(db: DatabaseSync, tableName: string) {
|
||||
return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[];
|
||||
}
|
||||
function templateToSql(template: string, columns: string[]) {
|
||||
// Convert '{0}, {1}' to '%s, %s'
|
||||
const args: string[] = [];
|
||||
const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => {
|
||||
args.push(columns[parseInt(index)]);
|
||||
return '%s';
|
||||
});
|
||||
return `printf('${sqlTemplate}', ${args.join(', ')})`;
|
||||
}
|
||||
function templateToSqlExpr(template: string, columns: string[]) {
|
||||
// perRowTags is already a SQL expression; just substitute {N} with column names
|
||||
return template.replace(/\{(\d+)\}/g, (_match, index) => columns[parseInt(index)]);
|
||||
// TODO: Add an option to output everything plainly as CSV in a single directory
|
||||
|
||||
console.log(`${elapsed()} - Building combined database table in :memory:`);
|
||||
const db = DataIO.getDefaultDB();
|
||||
await DataIO.loadIntoDb(db, out);
|
||||
|
||||
const tableCount = db.prepare(`SELECT COUNT(*) as count FROM base_data_manager_metadata`).get()!.count;
|
||||
console.log(`${elapsed()} - Single database built with ${tableCount} tables`);
|
||||
|
||||
return db;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// Configurable stuff
|
||||
const sqlitePath = 'your.db';
|
||||
|
||||
const t = TaskTargetPipelineHelper;
|
||||
const targets = TaskTargetPipelineHelper.pipeline([
|
||||
// new TaskTarget("/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01"),
|
||||
new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json"),
|
||||
//new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
|
||||
//new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001").facebook_v2();
|
||||
])
|
||||
.facebook();
|
||||
// .facebook_v2();
|
||||
// .google();
|
||||
console.log(`${elapsed()} - Building targets`);
|
||||
const targets = await execPaths([
|
||||
{path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()}
|
||||
// {path: "/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01", op: facebook()}
|
||||
// {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip", op: pipe(unzip(), facebook_v2())}
|
||||
// {path: "/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001", op: facebook_v2()}
|
||||
]);
|
||||
console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`);
|
||||
|
||||
// TODO: Make this less painful in task.ts
|
||||
// let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip");
|
||||
// await (zipTask.fsImpl as any).init();
|
||||
const db = await loadTaskInNewDb(targets);
|
||||
|
||||
const finalTargets = await verify(targets);
|
||||
const results = await parallel(finalTargets, true);
|
||||
|
||||
if (fsSync.existsSync(sqlitePath)) {
|
||||
await fs.unlink(sqlitePath); // unlink the old
|
||||
}
|
||||
// Open an in-memory db for speed
|
||||
const db = new DatabaseSync(":memory:", { allowExtension: true });
|
||||
db.loadExtension("/home/cobertos/sqlite-files/csv.so")
|
||||
db.enableLoadExtension(false);
|
||||
|
||||
// New output table
|
||||
db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, sender TEXT, receiver TEXT, tags TEXT, lat REAL, lng REAL);`);
|
||||
|
||||
for (const [idx, target] of targets.entries()) {
|
||||
const result = results[idx];
|
||||
|
||||
if (!target.columnMeta) {
|
||||
continue; // No column information
|
||||
}
|
||||
|
||||
const tableName = await loadCSVTable(db, target, result);
|
||||
const columnNames = getColumnNames(db, tableName);
|
||||
|
||||
// Now find what to insert into each row of the combined
|
||||
let descriptionPart = `'An entry from the ${tableName} table'`; // Default is just kinda garbo...
|
||||
if (target.perRowDescription) {
|
||||
descriptionPart = templateToSql(target.perRowDescription, columnNames);
|
||||
}
|
||||
|
||||
let timestampPart: string | undefined;
|
||||
let senderPart = 'NULL';
|
||||
let receiverPart = 'NULL';
|
||||
let latPart = 'NULL';
|
||||
let lngPart = 'NULL';
|
||||
for (const [idx, col] of target.columnMeta.entries()) {
|
||||
const columnName = columnNames[idx];
|
||||
if (col === "isodatetime") {
|
||||
timestampPart = columnName;
|
||||
} else if (col === "sender") {
|
||||
senderPart = columnName;
|
||||
} else if (col === "receiver") {
|
||||
receiverPart = columnName;
|
||||
} else if (col === "lat") {
|
||||
latPart = columnName;
|
||||
} else if (col === "lng") {
|
||||
lngPart = columnName;
|
||||
}
|
||||
}
|
||||
if (!timestampPart) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let tagsPart = 'NULL';
|
||||
if (target.perRowTags) {
|
||||
tagsPart = templateToSqlExpr(target.perRowTags, columnNames);
|
||||
}
|
||||
|
||||
// OFFSET + LIMIT to ignore the CSV headers
|
||||
db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${senderPart}, ${receiverPart}, ${tagsPart}, ${latPart}, ${lngPart} FROM ${tableName} LIMIT -1 OFFSET 1;`);
|
||||
}
|
||||
|
||||
// Dump it all to the path specified
|
||||
db.exec(`VACUUM main INTO '${sqlitePath}'`);
|
||||
|
||||
// Now dump it as a CSV
|
||||
const rows = db.prepare(`
|
||||
SELECT timestamp || ',' || '"' || replace(description, '"', '""') || '"' as row FROM combined
|
||||
`)
|
||||
.all()
|
||||
.map(r => r.row)
|
||||
.join('\n');
|
||||
db.close();
|
||||
|
||||
await fs.writeFile('your.csv', rows, { encoding: "utf8" });
|
||||
console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`);
|
||||
DataIO.dumpDBToDisk(db, sqlitePath);
|
||||
|
||||
console.log(`${elapsed()} - Database written to disk`);
|
||||
}
|
||||
|
||||
main();
|
||||
if (process.argv[1] === __filename) {
|
||||
main();
|
||||
}
|
||||
|
||||
// TODO: Move this into here
|
||||
// csvSink(
|
||||
// summarization?: [string, string][]
|
||||
// ) {
|
||||
// // TODO:
|
||||
// return this;
|
||||
// csvSink(
|
||||
// summarization?: [string, string][]
|
||||
// ) {
|
||||
// // TODO:
|
||||
// return this;
|
||||
|
||||
// // Ingest this csv into the database at the given id
|
||||
// // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
|
||||
// // Add a post processing function for these targets that prints out the summarization
|
||||
// // stats
|
||||
// // this.post(async (t: TaskTarget)=>{
|
||||
// // // We only do the first one so far for the summarization
|
||||
// // let queryLine: string;
|
||||
// // let formatFn: (r: any)=>string;
|
||||
// // const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
|
||||
// // if (type === "numeric") {
|
||||
// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
|
||||
// // }
|
||||
// // else {
|
||||
// // queryLine = `count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
|
||||
// // }
|
||||
// // Ingest this csv into the database at the given id
|
||||
// // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
|
||||
// // Add a post processing function for these targets that prints out the summarization
|
||||
// // stats
|
||||
// // this.post(async (t: TaskTarget)=>{
|
||||
// // // We only do the first one so far for the summarization
|
||||
// // let queryLine: string;
|
||||
// // let formatFn: (r: any)=>string;
|
||||
// // const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
|
||||
// // if (type === "numeric") {
|
||||
// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
|
||||
// // }
|
||||
// // else {
|
||||
// // queryLine = `count(*) as n`;
|
||||
// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
|
||||
// // }
|
||||
|
||||
// // const cmd = "sqlite-utils";
|
||||
// // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
|
||||
// // const { stdout, stderr } = await execFile(cmd, args);
|
||||
// // const results = JSON.parse(stdout);
|
||||
// // const result = results[0]; // should only be one result in the array for this type of query
|
||||
// // const logLine = formatFn(result);
|
||||
// // (t as any).log = logLine;
|
||||
// // });
|
||||
// // const cmd = "sqlite-utils";
|
||||
// // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
|
||||
// // const { stdout, stderr } = await execFile(cmd, args);
|
||||
// // const results = JSON.parse(stdout);
|
||||
// // const result = results[0]; // should only be one result in the array for this type of query
|
||||
// // const logLine = formatFn(result);
|
||||
// // (t as any).log = logLine;
|
||||
// // });
|
||||
|
||||
// // return this;
|
||||
// }
|
||||
// // return this;
|
||||
// }
|
||||
|
|
@ -27,6 +27,7 @@
|
|||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^24.1.0",
|
||||
"csv-parse": "^6.1.0",
|
||||
"typescript": "^5.9.3"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
8
pnpm-lock.yaml
generated
8
pnpm-lock.yaml
generated
|
|
@ -33,6 +33,9 @@ importers:
|
|||
'@types/node':
|
||||
specifier: ^24.1.0
|
||||
version: 24.10.0
|
||||
csv-parse:
|
||||
specifier: ^6.1.0
|
||||
version: 6.1.0
|
||||
typescript:
|
||||
specifier: ^5.9.3
|
||||
version: 5.9.3
|
||||
|
|
@ -59,6 +62,9 @@ packages:
|
|||
buffer-crc32@0.2.13:
|
||||
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
|
||||
|
||||
csv-parse@6.1.0:
|
||||
resolution: {integrity: sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw==}
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
|
||||
|
||||
|
|
@ -176,6 +182,8 @@ snapshots:
|
|||
|
||||
buffer-crc32@0.2.13: {}
|
||||
|
||||
csv-parse@6.1.0: {}
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
import test from "node:test";
|
||||
import nodePath from "node:path";
|
||||
import { strict as assert } from "node:assert";
|
||||
import { TaskTargetPipelineHelper, TaskTarget, verify, run } from "../data-export/task.ts";
|
||||
import { TaskTarget, verify, run, unzip, pipe } from "../data-export/task.ts";
|
||||
import { parallel } from "../data-export/parallel.ts";
|
||||
import "../data-export/facebook.ts";
|
||||
import { facebook, facebook_v2 } from "../data-export/facebook.ts";
|
||||
import * as DataIO from "../data-export/io.ts";
|
||||
import { parse } from "csv-parse/sync"; // For better diffs + error checking of CSV output
|
||||
|
||||
const THIS_FILE = import.meta.dirname;
|
||||
const FACEBOOK_V1_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01');
|
||||
|
|
@ -11,67 +13,56 @@ const FACEBOOK_V1_ZIPPED = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021
|
|||
const FACEBOOK_V2_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2025-11-29');
|
||||
|
||||
test("facebook: Can load the 2021 export", async (t) => {
|
||||
const targets = TaskTargetPipelineHelper.pipeline([
|
||||
const targets = [
|
||||
new TaskTarget(FACEBOOK_V1_DIR)
|
||||
])
|
||||
.facebook();
|
||||
|
||||
const finalTargets = await verify(targets);
|
||||
const result = await parallel(finalTargets, true);
|
||||
]
|
||||
const builtTargets = await facebook()(targets);
|
||||
const out = await DataIO.runPipeline(builtTargets);
|
||||
const idAndCSVs: [string, string][] = [];
|
||||
for (const [idx, r] of result.entries()) {
|
||||
const target = finalTargets[idx];
|
||||
assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(r.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, r.stdout]);
|
||||
for (const {target, result} of out) {
|
||||
assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(result.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, result.stdout]);
|
||||
}
|
||||
const csvs = idAndCSVs
|
||||
.sort() // Keep stable ordering for snapshots
|
||||
.map(v => v[1])
|
||||
.map(v => parse(v[1]))
|
||||
|
||||
t.assert.snapshot(csvs);
|
||||
});
|
||||
test("facebook: Can load the 2021 export zipped", async (t) => {
|
||||
const targets = await TaskTargetPipelineHelper.pipeline([
|
||||
const targets = [
|
||||
new TaskTarget(FACEBOOK_V1_ZIPPED)
|
||||
])
|
||||
.unzip();
|
||||
const targets2 = targets
|
||||
.facebook();
|
||||
|
||||
const finalTargets = await verify(targets2);
|
||||
const result = await parallel(finalTargets, true);
|
||||
];
|
||||
const builtTargets = await pipe(unzip(), facebook())(targets);
|
||||
const out = await DataIO.runPipeline(builtTargets);
|
||||
const idAndCSVs: [string, string][] = [];
|
||||
for (const [idx, r] of result.entries()) {
|
||||
const target = finalTargets[idx];
|
||||
assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(r.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, r.stdout]);
|
||||
for (const {target, result} of out) {
|
||||
assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(result.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, result.stdout]);
|
||||
}
|
||||
const csvs = idAndCSVs
|
||||
.sort() // Keep stable ordering for snapshots
|
||||
.map(v => v[1])
|
||||
.map(v => parse(v[1]))
|
||||
|
||||
t.assert.snapshot(csvs);
|
||||
});
|
||||
test("facebook: Can load the 2025 export", async (t) => {
|
||||
const targets = TaskTargetPipelineHelper.pipeline([
|
||||
const targets = [
|
||||
new TaskTarget(FACEBOOK_V2_DIR)
|
||||
])
|
||||
.facebook_v2();
|
||||
|
||||
const finalTargets = await verify(targets);
|
||||
const result = await parallel(finalTargets, true);
|
||||
]
|
||||
const builtTargets = await facebook_v2()(targets);
|
||||
const out = await DataIO.runPipeline(builtTargets);
|
||||
const idAndCSVs: [string, string][] = [];
|
||||
for (const [idx, r] of result.entries()) {
|
||||
const target = finalTargets[idx];
|
||||
assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(r.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, r.stdout]);
|
||||
for (const {target, result} of out) {
|
||||
assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`);
|
||||
assert.ok(result.ok, `Task ${target.id} should be okay`);
|
||||
idAndCSVs.push([target.id, result.stdout]);
|
||||
}
|
||||
const csvs = idAndCSVs
|
||||
.sort() // Keep stable ordering for snapshots
|
||||
.map(v => v[1])
|
||||
.map(v => parse(v[1]))
|
||||
|
||||
t.assert.snapshot(csvs);
|
||||
});
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
3
test/fixtures/README.md
vendored
3
test/fixtures/README.md
vendored
|
|
@ -11,3 +11,6 @@
|
|||
|
||||
* `facebook-json-2021-05-01` - Facebook JSON export
|
||||
* `facebook-json-2025-11-29` - Facebook JSON export
|
||||
* [`discord-chat-exporter-2026-02`](./discord-chat-exporter-2026-02.md) - Discord export with [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter) sometime around Feb 2026
|
||||
* [`discord-json-2021-01`](./discord-json-2021-01.md) - Discord JSON export
|
||||
* [`snapchat-2023-11`](./snapchat-2023-11.md) - Snapchat JSON + HTML export
|
||||
|
|
|
|||
25
test/fixtures/discord-chat-exporter-2026-02.md
vendored
Normal file
25
test/fixtures/discord-chat-exporter-2026-02.md
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# discord-chat-exporter-2026-02
|
||||
|
||||
An export from `DiscordChatExporter`, a comprehensive DiscordChatExporter
|
||||
|
||||
## Export methodology
|
||||
|
||||
This uses the version of `DiscordChatExporter` that existed at the top of the releases tab on GitHub around `2026 February`. **TODO: figure out version**
|
||||
|
||||
This export used a command something like the following to try to get _everything_ `dotnet DiscordChatExporter.Cli.dll export -t xxx -o ~/DiscordChatExporter -f json --media --reuse-media --include-threads -c xxx`
|
||||
|
||||
* It uses `export` command and `-c` but it's the same for `exportguild` and `-g`
|
||||
* `-f json` so only the json export
|
||||
* `--media` download all media
|
||||
* `--reuse-media` not quite sure what this does because it puts it in a folder per channel...
|
||||
* `--include-threads` to get any threads
|
||||
|
||||
## Manual edits
|
||||
* Lots of image replacing + placeholders
|
||||
* Had to rename the folders
|
||||
|
||||
## Notes
|
||||
The export format has files and folders with similar, information-dense names. I tried to preserve that as that's the only way to correlate between the folder and the file name
|
||||
|
||||
* No exif on any media files
|
||||
* There's embeds, thumbnails in the example chat messages but I have no other specimen
|
||||
|
|
@ -0,0 +1,145 @@
|
|||
{
|
||||
"guild": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxx",
|
||||
"iconUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png"
|
||||
},
|
||||
"channel": {
|
||||
"id": "111111111111111111",
|
||||
"type": "xxxxxxxxxxxxx",
|
||||
"categoryId": "111111111111111111",
|
||||
"category": "xxxxxxxxxxxxx",
|
||||
"name": "xxxxxxx",
|
||||
"topic": null
|
||||
},
|
||||
"dateRange": {
|
||||
"after": null,
|
||||
"before": null
|
||||
},
|
||||
"exportedAt": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"messages": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"type": "xxxxxxxxxxxxxxx",
|
||||
"timestamp": "2020-04-13T10:09:08.000000+00:00",
|
||||
"timestampEdited": null,
|
||||
"callEndedTimestamp": null,
|
||||
"isPinned": false,
|
||||
"content": "xxxxxxxxxxxxxxxxxx",
|
||||
"author": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxx",
|
||||
"discriminator": "1111",
|
||||
"nickname": "xxxxxxxx",
|
||||
"color": null,
|
||||
"isBot": false,
|
||||
"roles": [],
|
||||
"avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png"
|
||||
},
|
||||
"attachments": [],
|
||||
"embeds": [],
|
||||
"stickers": [],
|
||||
"reactions": [],
|
||||
"mentions": [],
|
||||
"inlineEmojis": []
|
||||
},
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"type": "xxxxxxx",
|
||||
"timestamp": "2020-04-13T10:09:08.000000+00:00",
|
||||
"timestampEdited": null,
|
||||
"callEndedTimestamp": null,
|
||||
"isPinned": false,
|
||||
"content": "xxxxxxxxx",
|
||||
"author": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxx",
|
||||
"discriminator": "1111",
|
||||
"nickname": "xxxxxxxx",
|
||||
"color": null,
|
||||
"isBot": false,
|
||||
"roles": [],
|
||||
"avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png"
|
||||
},
|
||||
"attachments": [],
|
||||
"embeds": [],
|
||||
"stickers": [],
|
||||
"reactions": [],
|
||||
"mentions": [],
|
||||
"inlineEmojis": []
|
||||
},
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"type": "xxxxxxx",
|
||||
"timestamp": "2020-04-13T10:09:08.000000+00:00",
|
||||
"timestampEdited": null,
|
||||
"callEndedTimestamp": null,
|
||||
"isPinned": false,
|
||||
"content": "https://example.com/example.png",
|
||||
"author": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxx",
|
||||
"discriminator": "1111",
|
||||
"nickname": "xxxxxxxx",
|
||||
"color": null,
|
||||
"isBot": false,
|
||||
"roles": [],
|
||||
"avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png"
|
||||
},
|
||||
"attachments": [],
|
||||
"embeds": [
|
||||
{
|
||||
"title": "",
|
||||
"url": "https://example.com/example.png",
|
||||
"timestamp": null,
|
||||
"description": "",
|
||||
"thumbnail": {
|
||||
"url": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/example.png",
|
||||
"width": 111,
|
||||
"height": 111
|
||||
},
|
||||
"images": [],
|
||||
"fields": [],
|
||||
"inlineEmojis": []
|
||||
}
|
||||
],
|
||||
"stickers": [],
|
||||
"reactions": [],
|
||||
"mentions": [],
|
||||
"inlineEmojis": []
|
||||
},
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"type": "xxxxxxx",
|
||||
"timestamp": "2020-04-13T10:09:08.000000+00:00",
|
||||
"timestampEdited": null,
|
||||
"callEndedTimestamp": null,
|
||||
"isPinned": false,
|
||||
"content": "xxx",
|
||||
"author": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxx",
|
||||
"discriminator": "1111",
|
||||
"nickname": "xxxxxxxx",
|
||||
"color": null,
|
||||
"isBot": false,
|
||||
"roles": [],
|
||||
"avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png"
|
||||
},
|
||||
"attachments": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"url": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/unknown-SUFFIX.png",
|
||||
"fileName": "unknown.png",
|
||||
"fileSizeBytes": 111111
|
||||
}
|
||||
],
|
||||
"embeds": [],
|
||||
"stickers": [],
|
||||
"reactions": [],
|
||||
"mentions": [],
|
||||
"inlineEmojis": []
|
||||
}
|
||||
],
|
||||
"messageCount": 111
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 1.2 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.3 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 1.3 KiB |
41
test/fixtures/discord-json-2021-01.md
vendored
Normal file
41
test/fixtures/discord-json-2021-01.md
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# discord-json-2021-01
|
||||
|
||||
## Manual edits
|
||||
* images -> placeholders
|
||||
* `accounts/avatar.png`
|
||||
* manually scrub folder names
|
||||
* `account/applications/0000000000000`
|
||||
|
||||
## Notes about files
|
||||
* `activity/`
|
||||
* All the .json are NDJSON so some json tools don't like them
|
||||
* _Massive_ files. They hang scrub.ts for a long long time (had to run these piecemeal)
|
||||
* These files also have an _incredible_ amount of shapes and variance.
|
||||
* Instead of outputing all the shapes I made a sort of "super-object" to capture the shape with `jq -n '[inputs] | add' events-2021-00000-of-00001.json.tmp > unique_shape.json` and then scrubbing `unique_shape.json`
|
||||
* `messages/`
|
||||
* I hand did these to keep all the ids the same
|
||||
* There are multiple types of chats. DMs, guild channels, etc
|
||||
* I hand did the csvs as I have no scrubber for that
|
||||
* These are only **THE EXPORTING USERS MESSAGES**, no other user, just fyi
|
||||
* Ids in `messages.csv` are just the id of the message, not of any user
|
||||
* There is the potential to derive missing info from a channel via `@` tags sent or possibly via attachments. Maybe...
|
||||
* `11111111111111111`
|
||||
* This one has a shorter id (it's an older one)
|
||||
* Has `type: 0` but there's no guild information in `channel.json`
|
||||
* The user name was `null` in `index.json`
|
||||
* It's a really odd one
|
||||
* `222222222222222222`
|
||||
* This was a dm channel (said `direct message with xxx#7777` in index.json)
|
||||
* Has `type: 1` and there are two recipients (just the ids) in `channel.json`
|
||||
* Unfortunately that's all the info in the export
|
||||
* `333333333333333333`
|
||||
* This was a normal guild channel
|
||||
* `type: 0` and there's guild information in `channel.json`
|
||||
* I kept a good set of messages around from this one to show how attachements and other stuff works
|
||||
* The last message seemed to be a link not as an attachment. Links just seem to be normal text
|
||||
* `programs/`
|
||||
* was empty...
|
||||
* `servers/``
|
||||
* Info about _some_ of the guilds we have ids for
|
||||
* guild.json didn't really contain anything except the name
|
||||
* I kept around the only guild I noticed an audit-log.json with info in it
|
||||
26
test/fixtures/discord-json-2021-01/README.txt
vendored
Normal file
26
test/fixtures/discord-json-2021-01/README.txt
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
__ __ ___ _ _ ___ ___ ___ _____ ___ _
|
||||
\ \ / / / _ \ | | | | | _ \ o O O | \ / \ |_ _| / \ | |
|
||||
\ V / | (_) | | |_| | | / o | |) | | - | | | | - | |_|
|
||||
_|_|_ \___/ \___/ |_|_\ TS__[O] |___/ |_|_| _|_|_ |_|_| _(_)_
|
||||
_| """ |_|"""""|_|"""""|_|"""""| <======|_|"""""|_|"""""|_|"""""|_|"""""|_| """ |
|
||||
"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'./o--000'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'
|
||||
___ ___ _ _ ___ ___ ___ _ _ _
|
||||
|_ _| / __| o O O | || | | __| | _ \ | __| | | | | | |
|
||||
| | \__ \ o | __ | | _| | / | _| |_| |_| |_|
|
||||
|___| |___/ TS__[O] |_||_| |___| |_|_\ |___| _(_)_ _(_)_ _(_)_
|
||||
_|"""""|_|"""""| <======|_|"""""|_|"""""|_|"""""|_|"""""|_| """ |_| """ |_| """ |
|
||||
"`-0-0-'"`-0-0-'./o--000'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'
|
||||
|
||||
Welcome to your Discord Data Package!
|
||||
|
||||
Inside, you'll find a few JSON (JavaScript Object Notation) and CSV (Comma Separated Values) files
|
||||
of the data we use to provide Discord's service to you. We've chosen these formats for ease of
|
||||
processing. Furthermore, the files have been organized into logical groups to make it easy to
|
||||
understand and work with (at least, we hope so)!
|
||||
|
||||
For more information, you can view our in-depth help article at the following URL:
|
||||
|
||||
https://support.discord.com/hc/articles/360004957991
|
||||
|
||||
All the best,
|
||||
Discord Team
|
||||
16
test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json
vendored
Normal file
16
test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxx",
|
||||
"icon": null,
|
||||
"description": "",
|
||||
"summary": "",
|
||||
"hook": false,
|
||||
"verify_key": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"flags": 1,
|
||||
"secret": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"redirect_uris": [],
|
||||
"rpc_application_state": 1,
|
||||
"store_application_state": 1,
|
||||
"verification_state": 1,
|
||||
"interactions_endpoint_url": null
|
||||
}
|
||||
BIN
test/fixtures/discord-json-2021-01/account/avatar.png
vendored
Normal file
BIN
test/fixtures/discord-json-2021-01/account/avatar.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.7 KiB |
399
test/fixtures/discord-json-2021-01/account/user.json
vendored
Normal file
399
test/fixtures/discord-json-2021-01/account/user.json
vendored
Normal file
|
|
@ -0,0 +1,399 @@
|
|||
{
|
||||
"id": "111111111111111111",
|
||||
"username": "xxxxxxxx",
|
||||
"discriminator": 1111,
|
||||
"email": "not_a_real_email@example.com",
|
||||
"verified": false,
|
||||
"avatar_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"has_mobile": false,
|
||||
"needs_email_verification": false,
|
||||
"premium_until": "2020-04-13T10:09:08.000000+00:00",
|
||||
"flags": 11111111111111,
|
||||
"phone": "xxxxxxxxxxxx",
|
||||
"temp_banned_until": null,
|
||||
"ip": "1.1.1.1",
|
||||
"settings": {
|
||||
"locale": "xxxxx",
|
||||
"show_current_game": false,
|
||||
"restricted_guilds": [],
|
||||
"default_guilds_restricted": false,
|
||||
"inline_attachment_media": false,
|
||||
"inline_embed_media": false,
|
||||
"gif_auto_play": false,
|
||||
"render_embeds": false,
|
||||
"render_reactions": false,
|
||||
"animate_emoji": false,
|
||||
"enable_tts_command": false,
|
||||
"message_display_compact": false,
|
||||
"convert_emoticons": false,
|
||||
"explicit_content_filter": 1,
|
||||
"disable_games_tab": false,
|
||||
"theme": "xxxx",
|
||||
"developer_mode": false,
|
||||
"guild_positions": [
|
||||
"111111111111111111",
|
||||
"111111111111111111"
|
||||
],
|
||||
"detect_platform_accounts": false,
|
||||
"status": "xxxxxx",
|
||||
"afk_timeout": 111,
|
||||
"timezone_offset": 111,
|
||||
"stream_notifications_enabled": false,
|
||||
"allow_accessibility_detection": false,
|
||||
"contact_sync_enabled": false,
|
||||
"native_phone_integration_enabled": false,
|
||||
"animate_stickers": 1,
|
||||
"friend_source_flags": {
|
||||
"all": false
|
||||
},
|
||||
"guild_folders": [
|
||||
{
|
||||
"guild_ids": [
|
||||
"111111111111111111"
|
||||
],
|
||||
"id": null,
|
||||
"name": null,
|
||||
"color": null
|
||||
},
|
||||
{
|
||||
"guild_ids": [
|
||||
"111111111111111111"
|
||||
],
|
||||
"id": null,
|
||||
"name": null,
|
||||
"color": null
|
||||
}
|
||||
],
|
||||
"custom_status": null
|
||||
},
|
||||
"connections": [
|
||||
{
|
||||
"type": "xxxxxxxxx",
|
||||
"id": "xxxxxxxxxxx",
|
||||
"name": "xxxxxxxxxxx",
|
||||
"revoked": false,
|
||||
"visibility": 1,
|
||||
"friend_sync": false,
|
||||
"show_activity": false,
|
||||
"verified": false
|
||||
},
|
||||
{
|
||||
"type": "xxxxxxx",
|
||||
"id": "xxxxxxxx",
|
||||
"name": "xxxxxxxx",
|
||||
"revoked": false,
|
||||
"visibility": 1,
|
||||
"friend_sync": false,
|
||||
"show_activity": false,
|
||||
"verified": false
|
||||
}
|
||||
],
|
||||
"external_friends_lists": [
|
||||
{
|
||||
"user_id": "111111111111111111",
|
||||
"platform_type": "xxxxx",
|
||||
"name": "xxxxxxxx",
|
||||
"id_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"friend_id_hashes": [
|
||||
"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"user_id": "111111111111111111",
|
||||
"platform_type": "xxxxxxxxx",
|
||||
"name": "xxxxxxxxxxx",
|
||||
"id_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"friend_id_hashes": [
|
||||
"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1"
|
||||
]
|
||||
}
|
||||
],
|
||||
"friend_suggestions": [],
|
||||
"mfa_sessions": [],
|
||||
"relationships": [
|
||||
{
|
||||
"id": "11111111111111111",
|
||||
"type": 1,
|
||||
"nickname": null,
|
||||
"user": {
|
||||
"id": "11111111111111111",
|
||||
"username": "xxxxxxxxxxxx",
|
||||
"avatar": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"discriminator": "1111",
|
||||
"public_flags": 1
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "11111111111111111",
|
||||
"type": 1,
|
||||
"nickname": null,
|
||||
"user": {
|
||||
"id": "11111111111111111",
|
||||
"username": "xxxx",
|
||||
"avatar": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"discriminator": "1111",
|
||||
"public_flags": 111
|
||||
}
|
||||
}
|
||||
],
|
||||
"payments": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"created_at": "2020-04-13T10:09:08.000000+00:00",
|
||||
"currency": "xxx",
|
||||
"tax": 111,
|
||||
"tax_inclusive": false,
|
||||
"amount": 1111,
|
||||
"amount_refunded": 1,
|
||||
"status": 1,
|
||||
"description": "xxxxxxxxxxxxxxxxxxxx",
|
||||
"flags": 1,
|
||||
"subscription": {
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"current_period_start": "2020-04-13T10:09:08.000000+00:00",
|
||||
"current_period_end": "2020-04-13T10:09:08.000000+00:00",
|
||||
"payment_gateway": null,
|
||||
"payment_gateway_plan_id": "xxxxxxxxxxxxxxxxxxx",
|
||||
"currency": "xxx",
|
||||
"plan_id": "111111111111111111",
|
||||
"items": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"plan_id": "111111111111111111",
|
||||
"quantity": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"payment_source": {
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"invalid": false,
|
||||
"brand": "xxxx",
|
||||
"last_4": "1111",
|
||||
"expires_month": 11,
|
||||
"expires_year": 1111,
|
||||
"billing_address": {
|
||||
"name": "xxxxxxxxxxxxx",
|
||||
"line_1": "xxxxxxxxxxxxxxxxx",
|
||||
"line_2": null,
|
||||
"city": "xxxxxxxx",
|
||||
"state": "xx",
|
||||
"country": "xx",
|
||||
"postal_code": "11111"
|
||||
},
|
||||
"country": "xx"
|
||||
},
|
||||
"sku_id": "111111111111111111",
|
||||
"sku_price": 1111,
|
||||
"sku_subscription_plan_id": "111111111111111111"
|
||||
},
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"created_at": "2020-04-13T10:09:08.000000+00:00",
|
||||
"currency": "xxx",
|
||||
"tax": 111,
|
||||
"tax_inclusive": false,
|
||||
"amount": 1111,
|
||||
"amount_refunded": 1,
|
||||
"status": 1,
|
||||
"description": "xxxxxxxxxxxxxxxxxxxx",
|
||||
"flags": 1,
|
||||
"subscription": {
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"current_period_start": "2020-04-13T10:09:08.000000+00:00",
|
||||
"current_period_end": "2020-04-13T10:09:08.000000+00:00",
|
||||
"payment_gateway": null,
|
||||
"payment_gateway_plan_id": "xxxxxxxxxxxxxxxxxxx",
|
||||
"currency": "xxx",
|
||||
"plan_id": "111111111111111111",
|
||||
"items": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"plan_id": "111111111111111111",
|
||||
"quantity": 1
|
||||
}
|
||||
]
|
||||
},
|
||||
"payment_source": {
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"invalid": false,
|
||||
"brand": "xxxx",
|
||||
"last_4": "1111",
|
||||
"expires_month": 11,
|
||||
"expires_year": 1111,
|
||||
"billing_address": {
|
||||
"name": "xxxxxxxxxxxxx",
|
||||
"line_1": "xxxxxxxxxxxxxxxxxx",
|
||||
"line_2": null,
|
||||
"city": "xxxxxxxxxx",
|
||||
"state": "xx",
|
||||
"country": "xx",
|
||||
"postal_code": "11111"
|
||||
},
|
||||
"country": "xx"
|
||||
},
|
||||
"sku_id": "111111111111111111",
|
||||
"sku_price": 1111,
|
||||
"sku_subscription_plan_id": "111111111111111111"
|
||||
}
|
||||
],
|
||||
"payment_sources": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"invalid": false,
|
||||
"brand": "xxxx",
|
||||
"last_4": "1111",
|
||||
"expires_month": 11,
|
||||
"expires_year": 1111,
|
||||
"billing_address": {
|
||||
"name": "xxxxxxxxxxxxx",
|
||||
"line_1": "xxxxxxxxxxxxxxxxx",
|
||||
"line_2": null,
|
||||
"city": "xxxxxxxx",
|
||||
"state": "xx",
|
||||
"country": "xx",
|
||||
"postal_code": "11111"
|
||||
},
|
||||
"country": "xx"
|
||||
}
|
||||
],
|
||||
"guild_settings": [
|
||||
{
|
||||
"guild_id": null,
|
||||
"suppress_everyone": false,
|
||||
"suppress_roles": false,
|
||||
"message_notifications": 1,
|
||||
"mobile_push": false,
|
||||
"muted": false,
|
||||
"mute_config": null,
|
||||
"channel_overrides": [
|
||||
{
|
||||
"channel_id": "111111111111111111",
|
||||
"message_notifications": 1,
|
||||
"muted": false,
|
||||
"mute_config": null
|
||||
}
|
||||
],
|
||||
"version": 11
|
||||
},
|
||||
{
|
||||
"guild_id": "11111111111111111",
|
||||
"suppress_everyone": false,
|
||||
"suppress_roles": false,
|
||||
"message_notifications": 1,
|
||||
"mobile_push": false,
|
||||
"muted": false,
|
||||
"mute_config": null,
|
||||
"channel_overrides": [
|
||||
{
|
||||
"channel_id": "111111111111111111",
|
||||
"message_notifications": 1,
|
||||
"muted": false,
|
||||
"mute_config": null
|
||||
},
|
||||
{
|
||||
"channel_id": "111111111111111111",
|
||||
"message_notifications": 1,
|
||||
"muted": false,
|
||||
"mute_config": null
|
||||
}
|
||||
],
|
||||
"version": 1
|
||||
}
|
||||
],
|
||||
"library_applications": [
|
||||
{
|
||||
"application": {
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxxxxxx",
|
||||
"icon": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"description": "xxxxxxxxxxxxxxxxxxxxx",
|
||||
"summary": "xxxxxxxxxxxxxxxxxxxxx",
|
||||
"primary_sku_id": "111111111111111111",
|
||||
"hook": false,
|
||||
"slug": "xxxxxxxxxxxx",
|
||||
"guild_id": "111111111111111111",
|
||||
"verify_key": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1",
|
||||
"publishers": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"developers": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"name": "xxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
}
|
||||
]
|
||||
},
|
||||
"branch_id": "111111111111111111",
|
||||
"sku_id": "111111111111111111",
|
||||
"sku": {
|
||||
"id": "111111111111111111",
|
||||
"type": 1,
|
||||
"premium": false,
|
||||
"preorder_release_at": null,
|
||||
"preorder_approximate_release_date": null
|
||||
},
|
||||
"flags": 1,
|
||||
"created_at": "2020-04-13T10:09:08.000000+00:00",
|
||||
"entitlements": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"sku_id": "111111111111111111",
|
||||
"application_id": "111111111111111111",
|
||||
"user_id": "111111111111111111",
|
||||
"type": 1,
|
||||
"deleted": false,
|
||||
"gift_code_flags": 1,
|
||||
"branches": [
|
||||
"111111111111111111"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"entitlements": [
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"sku_id": "111111111111111111",
|
||||
"application_id": "111111111111111111",
|
||||
"user_id": "111111111111111111",
|
||||
"type": 1,
|
||||
"deleted": false,
|
||||
"gift_code_flags": 1,
|
||||
"branches": [
|
||||
"111111111111111111"
|
||||
],
|
||||
"sku_name": "xxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"user_activity_application_statistics": [
|
||||
{
|
||||
"application_id": "111111111111111111",
|
||||
"last_played_at": "2020-04-13T10:09:08.000000+00:00",
|
||||
"total_duration": 1111,
|
||||
"total_discord_sku_duration": 1
|
||||
},
|
||||
{
|
||||
"application_id": "111111111111111111",
|
||||
"last_played_at": "2020-04-13T10:09:08.000000+00:00",
|
||||
"total_duration": 111111,
|
||||
"total_discord_sku_duration": 1
|
||||
}
|
||||
],
|
||||
"notes": {
|
||||
"111111111111111111": "xxxx"
|
||||
}
|
||||
}
|
||||
2
test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json
vendored
Normal file
2
test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json
vendored
Normal file
File diff suppressed because one or more lines are too long
2
test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json
vendored
Normal file
2
test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json
vendored
Normal file
File diff suppressed because one or more lines are too long
2
test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json
vendored
Normal file
2
test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json
vendored
Normal file
File diff suppressed because one or more lines are too long
2
test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json
vendored
Normal file
2
test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json
vendored
Normal file
File diff suppressed because one or more lines are too long
1
test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json
vendored
Normal file
1
test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"id": "11111111111111111", "type": 0}
|
||||
2
test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv
vendored
Normal file
2
test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
ID,Timestamp,Contents,Attachments
|
||||
8888888888,2022-02-22 22:22:22.222222+00:00,Heyo,
|
||||
|
1
test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json
vendored
Normal file
1
test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"id": "222222222222222222", "type": 1, "recipients": ["00000000000000000", "1111111111111111"]}
|
||||
2
test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv
vendored
Normal file
2
test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
ID,Timestamp,Contents,Attachments
|
||||
2222222222222,2022-22-22 22:22:22.22222+00:00,Heyo,
|
||||
|
1
test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json
vendored
Normal file
1
test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
{"id": "333333333333333333", "type": 0, "name": "generalchat", "guild": {"id": "333333333333333332", "name": "xxx"}}
|
||||
6
test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv
vendored
Normal file
6
test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
ID,Timestamp,Contents,Attachments
|
||||
000000000000000005,2011-02-02 02:05:02.000000+00:00,Huh what the heck is this message,
|
||||
000000000000000004,2011-02-02 02:04:02.000000+00:00,<:thonk:000000000000000000><:thonk:000000000000000000><:thonk:000000000000000000>,
|
||||
000000000000000003,2011-02-02 02:03:02.000000+00:00,"(so <@00000000000000000> who are you)",
|
||||
000000000000000002,2011-02-02 02:02:02.000000+00:00,,https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/image.png
|
||||
000000000000000001,2011-02-02 02:01:02.000000+00:00,https://google.com/whatever,
|
||||
|
5
test/fixtures/discord-json-2021-01/messages/index.json
vendored
Normal file
5
test/fixtures/discord-json-2021-01/messages/index.json
vendored
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
{
|
||||
"11111111111111111": null,
|
||||
"222222222222222222": "Direct Message with xxx#7777",
|
||||
"333333333333333333": "generalchat"
|
||||
}
|
||||
18
test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json
vendored
Normal file
18
test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[
|
||||
{
|
||||
"id": "111111111111111111",
|
||||
"user_id": "111111111111111111",
|
||||
"action_type": 11,
|
||||
"changes": [
|
||||
{
|
||||
"key": "xxxx",
|
||||
"new_value": [
|
||||
{
|
||||
"name": "xxxxxxxxxx",
|
||||
"id": "111111111111111111"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
4
test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json
vendored
Normal file
4
test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"id": "444444444444444444",
|
||||
"name": "xxx"
|
||||
}
|
||||
3
test/fixtures/discord-json-2021-01/servers/index.json
vendored
Normal file
3
test/fixtures/discord-json-2021-01/servers/index.json
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"444444444444444444": "xxx"
|
||||
}
|
||||
9
test/fixtures/facebook-json.md
vendored
Normal file
9
test/fixtures/facebook-json.md
vendored
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
# facebook-json exports
|
||||
|
||||
## `facebook-json-2021-05-01`
|
||||
* Manual edits of images -> placeholders, folder names, key names (in support cases specficially)
|
||||
* This was one of the first few datasets I scrubbed so a lot of manual work was done. Should be easier now
|
||||
* I went poking around this one and there was no exif on any of the images I looked at, only in the json was there exif
|
||||
## `facebook-json-2025-11-29`
|
||||
* Manual edits of images -> placeholders, folder names, key names
|
||||
* This was one of the first few datasets I scrubbed so a lot of manual work was done. Should be easier now
|
||||
83
test/fixtures/snapchat-2023-11.md
vendored
Normal file
83
test/fixtures/snapchat-2023-11.md
vendored
Normal file
|
|
@ -0,0 +1,83 @@
|
|||
# Snapchat
|
||||
|
||||
Exported from the web exporter
|
||||
|
||||
## Manual Edits
|
||||
|
||||
* memories and chat_media placeholders
|
||||
* Snapchat seemed to have events exported where the `+` in emails broke my parsing and the email contained a ' ' instead, so I fixed that
|
||||
* Keys use unique dates in `json/in_app_surveys.json`
|
||||
* Keys in `json/chat_history.json` use user ids, had to manually truncate and edit
|
||||
|
||||
## Notes
|
||||
|
||||
* `memories/`
|
||||
* No exif data
|
||||
* Does not seem to have any correlating .json file. It's just a dump to the disk
|
||||
* files are like `2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg`
|
||||
* Date has no time, just date
|
||||
* `aaaaa...` seems to be a guid
|
||||
* `main` | `overlay` at the end, with the same guid
|
||||
* `main` is just the image
|
||||
* `overlay` looks to be like a filter or some other applied thing that was saved with the memory
|
||||
* Images may be rotated
|
||||
* `chat_media/`
|
||||
* No exif
|
||||
* files are like `2020-01-01_b~xxxx.jpeg`
|
||||
* sometimes they have `main` | `overlay` or something
|
||||
* No idea what the `b~` means or if the xxx is an id or what. Perhaps base64 encoded protobuf, but nothing I decoded seemed to correlate to any identifier in the export
|
||||
* Only referenced from ... oh... it's broken. The `type: "MEDIA"` in snapchats exporter has all empty "content" fields. Amazing... So this will have to be pieced together some other way
|
||||
* This will most likel have to be manually repaired
|
||||
* `json/`
|
||||
* Scrubbed
|
||||
* See manual changes
|
||||
|
||||
|
||||
* Comes with both an html and json export (I will only keep the json after deduping)
|
||||
* NOTE: That the html export has explanations which might be useful to explain some of these fields...
|
||||
* I compared all .html to .json side by side (browser <-> text editor) and all of them were present in both and had the same data except `snap_history.html` (was empty in .html) and `faq.html` (just informational)
|
||||
* I noticed on chat history html pages it puts _every_ category, not just the ones I have. Might be useful future reference
|
||||
|
||||
```
|
||||
Frequently Asked Questions
|
||||
Login History and Account Information
|
||||
Snap History Metadata
|
||||
Chat History Metadata
|
||||
My AI
|
||||
Our Story & Spotlight Content
|
||||
Spotlight Replies
|
||||
Purchase History
|
||||
Snapchat Support History
|
||||
User Profile
|
||||
Public Profiles
|
||||
Friends
|
||||
Ranking
|
||||
Story History
|
||||
Account History
|
||||
Location
|
||||
Search History
|
||||
Terms History
|
||||
Subscriptions
|
||||
Bitmoji
|
||||
In-app Surveys
|
||||
Reported Content
|
||||
Bitmoji Kit
|
||||
Connected Apps
|
||||
Talk History
|
||||
Ads Manager
|
||||
My Lenses
|
||||
Memories
|
||||
Cameos
|
||||
Email Campaign History
|
||||
Snap Tokens
|
||||
Payouts
|
||||
Orders
|
||||
Snap Map Places
|
||||
Shopping Favorites
|
||||
Payments
|
||||
My Sounds
|
||||
Photoshoot Snaps
|
||||
Feature Emails
|
||||
AI Selfies
|
||||
```
|
||||
|
||||
38
test/fixtures/snapchat-2023-11/json/account.json
vendored
Normal file
38
test/fixtures/snapchat-2023-11/json/account.json
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
{
|
||||
"Basic Information": {
|
||||
"Username": "xxxxxxxxx",
|
||||
"Name": "xxxxx",
|
||||
"Creation Date": "2020-04-13 10:09:08 UTC",
|
||||
"Registration IP": "",
|
||||
"Country": ""
|
||||
},
|
||||
"Device Information": {
|
||||
"Make": "",
|
||||
"Model ID": "",
|
||||
"Model Name": "",
|
||||
"Language": "",
|
||||
"OS Type": "",
|
||||
"OS Version": "",
|
||||
"Connection Type": ""
|
||||
},
|
||||
"Device History": [],
|
||||
"Privacy Policy and Terms of Service Acceptance History": [],
|
||||
"Custom Creative Tools Terms": [],
|
||||
"Login History": [
|
||||
{
|
||||
"IP": "1.1.1.1",
|
||||
"Country": "xx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Status": "xxxxxxx",
|
||||
"Device": "some/path"
|
||||
},
|
||||
{
|
||||
"IP": "1.1.1.1",
|
||||
"Country": "xx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Status": "xxxxxxx",
|
||||
"Device": "some/path"
|
||||
}
|
||||
],
|
||||
"Family Center": []
|
||||
}
|
||||
47
test/fixtures/snapchat-2023-11/json/account_history.json
vendored
Normal file
47
test/fixtures/snapchat-2023-11/json/account_history.json
vendored
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
{
|
||||
"Display Name Change": [
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC",
|
||||
"Display Name": "xxxxx"
|
||||
},
|
||||
{
|
||||
"Date": "",
|
||||
"Display Name": "xxxxxx"
|
||||
}
|
||||
],
|
||||
"Email Change": [
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC",
|
||||
"Email Address": "not_a_real_email@example.com"
|
||||
}
|
||||
],
|
||||
"Mobile Number Change": [],
|
||||
"Password Change": [
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC"
|
||||
},
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC"
|
||||
}
|
||||
],
|
||||
"Snapchat Linked to Bitmoji": [
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC"
|
||||
}
|
||||
],
|
||||
"Spectacles": [],
|
||||
"Two-Factor Authentication": [],
|
||||
"Account deactivated / reactivated": [],
|
||||
"Download My Data Reports": [
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC",
|
||||
"Status": "xxxxxxx",
|
||||
"Email Address": "not_a_real_email@example.com"
|
||||
},
|
||||
{
|
||||
"Date": "2020-04-13 10:09:08 UTC",
|
||||
"Status": "xxxxxxxxx",
|
||||
"Email Address": "not_a_real_email@example.com"
|
||||
}
|
||||
]
|
||||
}
|
||||
31
test/fixtures/snapchat-2023-11/json/bitmoji.json
vendored
Normal file
31
test/fixtures/snapchat-2023-11/json/bitmoji.json
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"Basic Information": {
|
||||
"First Name": "",
|
||||
"Last Name": "",
|
||||
"Email": "",
|
||||
"Phone Number": "",
|
||||
"Account Creation Date": "2020-04-13 10:09:08 UTC",
|
||||
"Account Creation User Agent": ""
|
||||
},
|
||||
"Analytics": {
|
||||
"App Open Count": 1,
|
||||
"Avatar Gender": "xxxx",
|
||||
"Outfit Save Count": 1,
|
||||
"Share Count": 1
|
||||
},
|
||||
"Terms of Service Acceptance History": [
|
||||
{
|
||||
"Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Acceptance Date": "2020-04-13 10:09:08"
|
||||
},
|
||||
{
|
||||
"Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Acceptance Date": "2020-04-13 10:09:08"
|
||||
}
|
||||
],
|
||||
"Search History": [],
|
||||
"Support Cases": [],
|
||||
"Selfies": [],
|
||||
"Keyboard Enable Full Access History (iOS only)": [],
|
||||
"Connected Apps": []
|
||||
}
|
||||
8
test/fixtures/snapchat-2023-11/json/cameos_metadata.json
vendored
Normal file
8
test/fixtures/snapchat-2023-11/json/cameos_metadata.json
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"Cameos Selfie": {
|
||||
"Cameos Body Selected": "xxxxxxxxxxxx",
|
||||
"Hairstyle": "xxxxxxxxxxxx",
|
||||
"Use My Cameos Selfie": "xxxxxxx"
|
||||
},
|
||||
"Cameos Stories": []
|
||||
}
|
||||
42
test/fixtures/snapchat-2023-11/json/chat_history.json
vendored
Normal file
42
test/fixtures/snapchat-2023-11/json/chat_history.json
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
{
|
||||
"some_friend": [
|
||||
{
|
||||
"From": "xxxxxxxxx",
|
||||
"Media Type": "xxxxx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Content": "",
|
||||
"Conversation Title": null,
|
||||
"IsSender": false,
|
||||
"Created(microseconds)": 1111111111111
|
||||
},
|
||||
{
|
||||
"From": "xxxxxxxxx",
|
||||
"Media Type": "xxxx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Content": "xxxxxxxxxxxxxxxxxx",
|
||||
"Conversation Title": null,
|
||||
"IsSender": false,
|
||||
"Created(microseconds)": 1111111111111
|
||||
}
|
||||
],
|
||||
"some_friend_too": [
|
||||
{
|
||||
"From": "xxxxxxxxxxxxxx",
|
||||
"Media Type": "xxxxx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Content": "",
|
||||
"Conversation Title": "xxxxxxxxxxxxxxxx",
|
||||
"IsSender": false,
|
||||
"Created(microseconds)": 1111111111111
|
||||
},
|
||||
{
|
||||
"From": "xxxxxxxxxxxxx",
|
||||
"Media Type": "xxxx",
|
||||
"Created": "2020-04-13 10:09:08 UTC",
|
||||
"Content": "xxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Conversation Title": "xxxxxxxxxxxxxxxx",
|
||||
"IsSender": false,
|
||||
"Created(microseconds)": 1111111111111
|
||||
}
|
||||
]
|
||||
}
|
||||
11
test/fixtures/snapchat-2023-11/json/connected_apps.json
vendored
Normal file
11
test/fixtures/snapchat-2023-11/json/connected_apps.json
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"Login History": [],
|
||||
"Permissions": [
|
||||
{
|
||||
"App": "xxxxxxx",
|
||||
"Time": "2020-04-13 10:09:08 UTC",
|
||||
"Type": "xxxxxxx"
|
||||
}
|
||||
],
|
||||
"Connected Applications": []
|
||||
}
|
||||
13
test/fixtures/snapchat-2023-11/json/email_campaign_history.json
vendored
Normal file
13
test/fixtures/snapchat-2023-11/json/email_campaign_history.json
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"Email Campaign Subscriptions": [
|
||||
{
|
||||
"Email Campaign": "xxxxxxxxxxxxxxxx",
|
||||
"Opt Out Status": "xxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Email Campaign": "xxxxxxxxxxxxxxx",
|
||||
"Opt Out Status": "xxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Email Campaign History": []
|
||||
}
|
||||
100
test/fixtures/snapchat-2023-11/json/friends.json
vendored
Normal file
100
test/fixtures/snapchat-2023-11/json/friends.json
vendored
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
{
|
||||
"Friends": [
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Friend Requests Sent": [
|
||||
{
|
||||
"Username": "xxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxxx",
|
||||
"Display Name": "xxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Blocked Users": [
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Deleted Friends": [
|
||||
{
|
||||
"Username": "xxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Hidden Friend Suggestions": [],
|
||||
"Ignored Snapchatters": [
|
||||
{
|
||||
"Username": "xxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Pending Requests": [
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Username": "xxxxxxxxxxxxxx",
|
||||
"Display Name": "xxxxxxxxxxxxx",
|
||||
"Creation Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Last Modified Timestamp": "2020-04-13 10:09:08 UTC",
|
||||
"Source": "xxxxxxxxxxxxxxxx"
|
||||
}
|
||||
],
|
||||
"Shortcuts": []
|
||||
}
|
||||
26
test/fixtures/snapchat-2023-11/json/in_app_surveys.json
vendored
Normal file
26
test/fixtures/snapchat-2023-11/json/in_app_surveys.json
vendored
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"Survey 2020/04/12": [
|
||||
{
|
||||
"Time": "xxxxxxxxxxxx",
|
||||
"Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Survey Response": "xxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Time": "xxxxxxxxxxxx",
|
||||
"Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Survey Response": "xxx"
|
||||
}
|
||||
],
|
||||
"Survey 2020/04/13": [
|
||||
{
|
||||
"Time": "xxxxxxxxxxxx",
|
||||
"Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Survey Response": "xxxxxxxxxxxxxx"
|
||||
},
|
||||
{
|
||||
"Time": "xxxxxxxxxxxx",
|
||||
"Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Survey Response": "some/path"
|
||||
}
|
||||
]
|
||||
}
|
||||
23
test/fixtures/snapchat-2023-11/json/location_history.json
vendored
Normal file
23
test/fixtures/snapchat-2023-11/json/location_history.json
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
{
|
||||
"Frequent Locations": [],
|
||||
"Latest Location": [
|
||||
{
|
||||
"City": "",
|
||||
"Country": "",
|
||||
"Region": ""
|
||||
}
|
||||
],
|
||||
"Home & Work": {},
|
||||
"Daily Top Locations": [],
|
||||
"Top Locations Per Six-Day Period": [],
|
||||
"Location History": [],
|
||||
"Businesses and public places you may have visited": [],
|
||||
"Areas you may have visited in the last two years": [
|
||||
{
|
||||
"Time": "some/path",
|
||||
"City": "xxxxxx",
|
||||
"Region": "xxxxxxxx",
|
||||
"Postal Code": "11111"
|
||||
}
|
||||
]
|
||||
}
|
||||
6
test/fixtures/snapchat-2023-11/json/ranking.json
vendored
Normal file
6
test/fixtures/snapchat-2023-11/json/ranking.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"Number of Stories Viewed": [
|
||||
1
|
||||
],
|
||||
"Content Interests": []
|
||||
}
|
||||
11
test/fixtures/snapchat-2023-11/json/shared_story.json
vendored
Normal file
11
test/fixtures/snapchat-2023-11/json/shared_story.json
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
{
|
||||
"Shared Story": [],
|
||||
"Spotlight History": [
|
||||
{
|
||||
"Story Date": "2020-04-13 10:09:08 UTC",
|
||||
"Story URL": "url://somewhere",
|
||||
"Action Type": "xxxx",
|
||||
"View Time": "xxxxxxxxxxxxx"
|
||||
}
|
||||
]
|
||||
}
|
||||
4
test/fixtures/snapchat-2023-11/json/snapchat_ai.json
vendored
Normal file
4
test/fixtures/snapchat-2023-11/json/snapchat_ai.json
vendored
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
{
|
||||
"My AI Content": [],
|
||||
"My AI Memory": []
|
||||
}
|
||||
10
test/fixtures/snapchat-2023-11/json/subscriptions.json
vendored
Normal file
10
test/fixtures/snapchat-2023-11/json/subscriptions.json
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"Public Users": [
|
||||
"xxxxxxxxxxxxxxx"
|
||||
],
|
||||
"Publishers": [],
|
||||
"Stories": [],
|
||||
"Last Active Timezone": "some/path",
|
||||
"Push Notifications": [],
|
||||
"Hidden Category Sections": []
|
||||
}
|
||||
15
test/fixtures/snapchat-2023-11/json/terms_history.json
vendored
Normal file
15
test/fixtures/snapchat-2023-11/json/terms_history.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"Snap Inc. Terms of Service": [
|
||||
{
|
||||
"Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Acceptance Date": "2020-04-13 10:09:08 UTC"
|
||||
},
|
||||
{
|
||||
"Version": "xxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"Acceptance Date": "2020-04-13 10:09:08 UTC"
|
||||
}
|
||||
],
|
||||
"Custom Creative Tools Terms": [],
|
||||
"Business Services Terms": [],
|
||||
"Games Terms": []
|
||||
}
|
||||
39
test/fixtures/snapchat-2023-11/json/user_profile.json
vendored
Normal file
39
test/fixtures/snapchat-2023-11/json/user_profile.json
vendored
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
{
|
||||
"App Profile": {
|
||||
"Country": "xx",
|
||||
"Creation Time": "2020-04-13 10:09:08 UTC",
|
||||
"Account Creation Country": "xxxxxxx",
|
||||
"Platform Version": "xxxxxxx",
|
||||
"In-app Language": "xx"
|
||||
},
|
||||
"Demographics": {
|
||||
"Cohort Age": "",
|
||||
"Derived Ad Demographic": ""
|
||||
},
|
||||
"Subscriptions": [],
|
||||
"Engagement": [],
|
||||
"Discover Channels Viewed": [],
|
||||
"Breakdown of Time Spent on App": [],
|
||||
"Ads You Interacted With": [],
|
||||
"Interest Categories": [
|
||||
"xxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxx"
|
||||
],
|
||||
"Content Categories": [
|
||||
"xxxxxxxxxxxxxxxxxxxxxxxxxxxx",
|
||||
"some/path"
|
||||
],
|
||||
"Geographic Information": [],
|
||||
"Interactions": {
|
||||
"Web Interactions": [
|
||||
"xxxxxxxxxxxxx",
|
||||
"xxxxxxxxxxxxxxxxxxxxxx"
|
||||
],
|
||||
"App Interactions": [
|
||||
"url://somewhere",
|
||||
"url://somewhere"
|
||||
]
|
||||
},
|
||||
"Off-Platform Sharing": [],
|
||||
"Mobile Ad Id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
}
|
||||
BIN
test/fixtures/snapchat-2023-11/memories/2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg
vendored
Normal file
BIN
test/fixtures/snapchat-2023-11/memories/2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.2 KiB |
95
test/task.ts
95
test/task.ts
|
|
@ -9,7 +9,6 @@ import {
|
|||
cmd,
|
||||
assignMeta,
|
||||
verify,
|
||||
TaskTargetPipelineHelper,
|
||||
} from "../data-export/task.ts";
|
||||
|
||||
const THIS_FILE = import.meta.dirname;
|
||||
|
|
@ -92,7 +91,7 @@ test("TaskTarget: pushToPipeline throws if read is not the first op", () => {
|
|||
test("TaskTarget: clone produces an independent copy", () => {
|
||||
const t = new TaskTarget("/foo").assignMeta({
|
||||
idValue: "orig",
|
||||
columnMeta: ["yeag"]
|
||||
columnMeta: ["any"]
|
||||
});
|
||||
t.read();
|
||||
const c = t.clone();
|
||||
|
|
@ -155,41 +154,41 @@ test("toShell: cmd with function resolves at shell-generation time", () => {
|
|||
|
||||
// -- module-level functions ---------------------------------------------------
|
||||
|
||||
test("cd: clones and changes directory of each target", () => {
|
||||
test("cd: clones and changes directory of each target", async () => {
|
||||
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
|
||||
const result = cd(targets, "sub");
|
||||
const result = await cd("sub")(targets);
|
||||
assert.equal(result[0].path, "/a/sub");
|
||||
assert.equal(result[1].path, "/b/sub");
|
||||
assert.equal(targets[0].path, "/a"); // originals unchanged
|
||||
});
|
||||
|
||||
test("read: clones and adds a read op to each target", () => {
|
||||
test("read: clones and adds a read op to each target", async () => {
|
||||
const targets = [new TaskTarget("/a.txt"), new TaskTarget("/b.txt")];
|
||||
const result = read(targets);
|
||||
const result = await read()(targets);
|
||||
assert.equal(result[0].pipeline[0].type, "read");
|
||||
assert.equal(result[1].pipeline[0].type, "read");
|
||||
assert.equal(targets[0].pipeline.length, 0); // originals unchanged
|
||||
});
|
||||
|
||||
test("cmd: clones and appends a cmd op to each target", () => {
|
||||
test("cmd: clones and appends a cmd op to each target", async () => {
|
||||
const targets = [new TaskTarget("/a.txt")];
|
||||
targets[0].read();
|
||||
const result = cmd(targets, "jq .");
|
||||
const result = await cmd("jq .")(targets);
|
||||
assert.equal(result[0].pipeline.length, 2);
|
||||
assert.equal(targets[0].pipeline.length, 1); // original unchanged
|
||||
});
|
||||
|
||||
test("assignMeta: clones and sets meta on each target", () => {
|
||||
test("assignMeta: clones and sets meta on each target", async () => {
|
||||
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
|
||||
const result = assignMeta(targets, { idValue: "myid" });
|
||||
const result = await assignMeta({ idValue: "myid" })(targets);
|
||||
assert.equal(result[0].id, "myid");
|
||||
assert.equal(result[1].id, "myid");
|
||||
assert.throws(() => targets[0].id); // originals have no id
|
||||
});
|
||||
|
||||
test("taskGlob: returns matching targets across all input targets", () => {
|
||||
test("taskGlob: returns matching targets across all input targets", async () => {
|
||||
const targets = [new TaskTarget(FIXTURE_DIR)];
|
||||
const result = taskGlob(targets, "friends/*.json");
|
||||
const result = await taskGlob("friends/*.json")(targets);
|
||||
assert.ok(result.length > 0);
|
||||
assert.ok(result.every(r => r.path.endsWith(".json")));
|
||||
});
|
||||
|
|
@ -226,75 +225,3 @@ test("verify: filters a mixed list to only valid targets", async () => {
|
|||
assert.equal(result[0], good);
|
||||
});
|
||||
|
||||
// -- TaskTargetPipelineHelper -------------------------------------------------
|
||||
|
||||
test("TaskTargetPipelineHelper: pipeline() promotes a plain array", () => {
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a")]);
|
||||
assert.ok(p instanceof TaskTargetPipelineHelper);
|
||||
});
|
||||
|
||||
test("TaskTargetPipelineHelper: pipeline() is idempotent", () => {
|
||||
const arr = [new TaskTarget("/a")];
|
||||
const p1 = TaskTargetPipelineHelper.pipeline(arr);
|
||||
const p2 = TaskTargetPipelineHelper.pipeline(p1);
|
||||
assert.equal(p1, p2);
|
||||
});
|
||||
|
||||
test("TaskTargetPipelineHelper: cd returns a new helper with paths changed", () => {
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a"), new TaskTarget("/b")]);
|
||||
const p2 = p.cd("sub");
|
||||
assert.ok(p2 instanceof TaskTargetPipelineHelper);
|
||||
assert.equal(p2[0].path, "/a/sub");
|
||||
assert.equal(p2[1].path, "/b/sub");
|
||||
});
|
||||
|
||||
test("TaskTargetPipelineHelper: read returns a new helper with read ops added", () => {
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
|
||||
const p2 = p.read();
|
||||
assert.ok(p2 instanceof TaskTargetPipelineHelper);
|
||||
assert.equal(p2[0].pipeline[0].type, "read");
|
||||
});
|
||||
|
||||
test("TaskTargetPipelineHelper: cmd returns a new helper with cmd ops added", () => {
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
|
||||
const p2 = p.read().cmd("jq .");
|
||||
assert.equal(p2[0].toShell(), "cat /a.txt | jq .");
|
||||
});
|
||||
|
||||
// -- collect ------------------------------------------------------------------
|
||||
|
||||
test("collect: the final end of a chain is added to the collection set", () => {
|
||||
const collection = new Set<TaskTargetPipelineHelper>();
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
|
||||
p.collect(collection);
|
||||
|
||||
const p2 = p.cd("sub");
|
||||
assert.equal(collection.size, 1);
|
||||
assert.ok(collection.has(p2));
|
||||
});
|
||||
|
||||
test("collect: moving the chain end removes the old element and adds the new one", () => {
|
||||
const collection = new Set<TaskTargetPipelineHelper>();
|
||||
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
|
||||
p.collect(collection);
|
||||
|
||||
const p2 = p.cd("sub");
|
||||
const p3 = p2.read();
|
||||
assert.equal(collection.size, 1);
|
||||
assert.ok(collection.has(p3));
|
||||
assert.ok(!collection.has(p2));
|
||||
});
|
||||
|
||||
test("collect: gathers the ends of multiple independent pipeline branches", () => {
|
||||
const collection = new Set<TaskTargetPipelineHelper>();
|
||||
|
||||
const b1 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]).collect(collection).read();
|
||||
const b2 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/b.txt")]).collect(collection).read();
|
||||
|
||||
assert.equal(collection.size, 2);
|
||||
assert.ok(collection.has(b1));
|
||||
assert.ok(collection.has(b2));
|
||||
|
||||
const allTargets = [...collection].flat();
|
||||
assert.equal(allTargets.length, 2);
|
||||
});
|
||||
|
|
|
|||
225
timelinize.ts
Normal file
225
timelinize.ts
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
import { type SQLOutputValue, type DatabaseSync } from "node:sqlite";
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { fileURLToPath } from "node:url";
|
||||
import "./data-export/facebook.ts";
|
||||
import { facebook } from "./data-export/facebook.ts";
|
||||
import { execPaths, COLUMN_TYPES } from "./data-export/task.ts";
|
||||
import * as DataIO from "./data-export/io.ts";
|
||||
import {
|
||||
startTime,
|
||||
elapsed,
|
||||
loadTaskInNewDb
|
||||
} from "./main.ts";
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
|
||||
function dumpDBTableToCSV(db: DatabaseSync, tableName: string, outputFile: string) {
|
||||
const stream = createWriteStream(outputFile);
|
||||
const stmt = db.prepare(`SELECT * FROM ${tableName}`);
|
||||
|
||||
let headerWritten = false;
|
||||
for (const row of stmt.iterate()) {
|
||||
if (!headerWritten) {
|
||||
stream.write(Object.keys(row).join(',') + '\n');
|
||||
headerWritten = true;
|
||||
}
|
||||
stream.write(Object.values(row).map(v => `"${String(v ?? '').replace(/"/g, '""')}"`).join(',') + '\n');
|
||||
}
|
||||
|
||||
stream.end();
|
||||
}
|
||||
function getColumnNames(db: DatabaseSync, tableName: string) {
|
||||
return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[];
|
||||
}
|
||||
function templateToSql(template: string, columns: string[]) {
|
||||
// Convert '{0}, {1}' to '%s, %s'
|
||||
const args: string[] = [];
|
||||
const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => {
|
||||
args.push(columns[parseInt(index)]);
|
||||
return '%s';
|
||||
});
|
||||
return `printf('${sqlTemplate}', ${args.join(', ')})`;
|
||||
}
|
||||
function sqlLiteral(str: string | undefined | null): string {
|
||||
if (str === null || str === undefined) {
|
||||
return 'NULL';
|
||||
}
|
||||
|
||||
// Escape single quotes by doubling them
|
||||
const escaped = str.replace(/'/g, "''");
|
||||
|
||||
// Wrap in single quotes
|
||||
return `'${escaped}'`;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
// Configure the tasks to run
|
||||
console.log(`${elapsed()} - Building targets`);
|
||||
const targets = await execPaths([
|
||||
{path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()}
|
||||
]);
|
||||
console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`);
|
||||
const db = await loadTaskInNewDb(targets);
|
||||
|
||||
// New output tables
|
||||
db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`);
|
||||
|
||||
//(message, email, note,
|
||||
// social, location, media, event, document,
|
||||
// bookmark; defaults to note)
|
||||
|
||||
type ColumnMetaType = (keyof typeof COLUMN_TYPES);
|
||||
interface MetadataRow {
|
||||
id: string,
|
||||
perRowDescription?: string,
|
||||
perRowTags?: string,
|
||||
columnMeta: ColumnMetaType[],
|
||||
columnNames: string[],
|
||||
metaId?: string
|
||||
}
|
||||
function verifyMetdataRow(input: Record<string, SQLOutputValue>): undefined | MetadataRow {
|
||||
const { id, perRowDescription, perRowTags, columnMeta: columnMetaCSV, metaId } = input;
|
||||
if (!id) {
|
||||
console.error("Row did not have id/tableName, skipping");
|
||||
return undefined;
|
||||
}
|
||||
if (typeof id !== "string") {
|
||||
console.error(`Id must be string, got ${typeof id}, ${id}`);
|
||||
return undefined;
|
||||
}
|
||||
if (!columnMetaCSV) {
|
||||
console.warn(`${id} did not have columnMeta, nothing to do. Skipping`);
|
||||
return undefined; // No column information
|
||||
}
|
||||
if (typeof columnMetaCSV !== "string") {
|
||||
console.warn(`${id} did not have columnMeta of type string. Skipping`);
|
||||
return undefined;
|
||||
}
|
||||
const columnMeta = columnMetaCSV.split(",") as ColumnMetaType[];
|
||||
|
||||
// Get the column names from the table id
|
||||
const columnNames = getColumnNames(db, id);
|
||||
if (columnNames.length !== columnMeta.length) {
|
||||
console.error(`columnNames and columnMeta did not have same length. skipping`);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (typeof perRowDescription !== "string" && perRowDescription !== undefined && perRowDescription !== null) {
|
||||
console.warn(`Invalid typeof perRowDescription, was ${typeof perRowDescription}, value ${perRowDescription}`);
|
||||
return undefined;
|
||||
}
|
||||
if (typeof perRowTags !== "string" && perRowTags !== undefined && perRowTags !== null) {
|
||||
console.warn(`Invalid typeof perRowTags, was ${typeof perRowTags}, value ${perRowTags}`);
|
||||
return undefined;
|
||||
}
|
||||
if (typeof metaId !== "string" && metaId !== undefined && metaId !== null) {
|
||||
console.warn(`Invalid typeof metaId, was ${typeof metaId}, value ${metaId}`);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return {
|
||||
id,
|
||||
perRowDescription: perRowDescription ?? undefined,
|
||||
perRowTags: perRowTags ?? undefined,
|
||||
columnMeta,
|
||||
columnNames,
|
||||
metaId: metaId ?? undefined
|
||||
};
|
||||
}
|
||||
|
||||
/**Maps columnMeta names to the column names*/
|
||||
function metaToNames(meta: MetadataRow): Partial<Record<ColumnMetaType, string>> {
|
||||
const out: Partial<Record<ColumnMetaType, string>> = {};
|
||||
for (const [idx, name] of meta.columnNames.entries()) {
|
||||
const metaName = meta.columnMeta[idx];
|
||||
if (out[metaName]) {
|
||||
console.warn(`Duplicate column with metaName "${metaName}". The current one which will be used is "${out[metaName]}". Skipping the duplicate.`);
|
||||
continue;
|
||||
}
|
||||
out[metaName] = name;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
function metaParts(metaNameToColumnName: Partial<Record<ColumnMetaType, string>>): Record<ColumnMetaType, string> {
|
||||
const out: Record<ColumnMetaType, string> = {} as any;
|
||||
for (const type of Object.keys(COLUMN_TYPES) as ColumnMetaType[]) {
|
||||
if (!metaNameToColumnName[type]) {
|
||||
out[type] = "NULL";
|
||||
continue;
|
||||
}
|
||||
// Wrap in brackets so column names like "from" don't cause any issues
|
||||
out[type] = `[${metaNameToColumnName[type]}]`
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Iterate over all the tables and their metadata
|
||||
const statement = db.prepare(`SELECT id, perRowDescription, perRowTags, columnMeta, metaId FROM base_data_manager_metadata`);
|
||||
for (const row of statement.iterate()) {
|
||||
const verified = verifyMetdataRow(row);
|
||||
if (!verified) {
|
||||
continue;
|
||||
}
|
||||
const { id, perRowDescription, perRowTags, columnMeta, columnNames, metaId } = verified;
|
||||
const metaNameToColumnName = metaToNames(verified);
|
||||
const part = metaParts(metaNameToColumnName);
|
||||
|
||||
// Now find what to insert into each row of the combined
|
||||
// Per row tags is an string of csv'd items but needs to be made a literal
|
||||
// TODO: Make this either a template string or have jq do something
|
||||
// tagsPart = templateToSqlExpr(target.perRowTags, columnNames);
|
||||
const tagsPart = sqlLiteral(perRowTags);
|
||||
|
||||
// Choose what to do with this table based on what meta is present
|
||||
if (
|
||||
!!metaNameToColumnName.sender
|
||||
&& !!metaNameToColumnName.isodatetime
|
||||
) {
|
||||
if (!metaId) {
|
||||
console.warn(`Chat ${id} with .sender but no .metaId. Skipping`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// First pull the name of the conversation out of the metaId
|
||||
const receiverThreadTitle = db.prepare(`SELECT title FROM ${metaId} WHERE (id=${sqlLiteral(id)})`).get()?.title;
|
||||
if (!receiverThreadTitle || typeof receiverThreadTitle !== "string") {
|
||||
console.warn(`Chat ${id} with .metaId ${metaId} returned invalid receiverThreadTitle ${typeof receiverThreadTitle}. Skipping`);
|
||||
continue;
|
||||
}
|
||||
const receiverPart = sqlLiteral(receiverThreadTitle);
|
||||
|
||||
// Put this table into the combined table
|
||||
db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${part.text}, 'message', ${part.sender}, ${receiverPart}, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`);
|
||||
}
|
||||
else if (!!metaNameToColumnName.isodatetime) {
|
||||
// Put this table into the combined table
|
||||
let descriptionPart = perRowDescription
|
||||
? templateToSql(perRowDescription, columnNames)
|
||||
: `'An entry from the ${id} table'`; // Default is just kinda garbo...
|
||||
db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${descriptionPart}, 'node', NULL, NULL, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`);
|
||||
}
|
||||
else {
|
||||
console.warn(`Table with id ${id} had no isodatetime or anything else of value, skipping...`);
|
||||
}
|
||||
}
|
||||
|
||||
const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()?.count;
|
||||
console.log(`${elapsed()} - Combined database built with ${count} rows`);
|
||||
|
||||
// Dump it to the disk for debugging
|
||||
const sqlitePath = "debug_your.csv.db";
|
||||
console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`);
|
||||
await DataIO.dumpDBToDisk(db, sqlitePath);
|
||||
|
||||
console.log(`${elapsed()} - Database written to disk`);
|
||||
|
||||
// Dump it all to the path specified
|
||||
dumpDBTableToCSV(db, "combined", "your.csv");
|
||||
console.log(`${elapsed()} - Combined database written to disk as CSV`);
|
||||
db.close();
|
||||
}
|
||||
|
||||
if (process.argv[1] === __filename) {
|
||||
main();
|
||||
}
|
||||
|
||||
123
util/scrub.jq
123
util/scrub.jq
|
|
@ -3,46 +3,89 @@
|
|||
# fd -t f .json -0 | xargs -I % -0 -- jq -f scrub.jq "%" > "%"
|
||||
# (Though you should remove the end `> "%"` first to get just the output without
|
||||
# persisting to be sure it's what you want first)
|
||||
def scrub:
|
||||
walk(
|
||||
if type == "string" then
|
||||
if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then
|
||||
"1.1.1.1"
|
||||
elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then
|
||||
"2000:0000:0000:0000:0000:0000:0000:0000"
|
||||
elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then
|
||||
"not_a_real_email@example.com"
|
||||
elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then
|
||||
# Leave these alone, you will have to manually go through these later and replace with
|
||||
# placeholders
|
||||
# TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise
|
||||
# you need to manually grep for MANUAL REPAIR NEEDED for now
|
||||
("MANUAL REPAIR NEEDED: \(.)" | stderr) | .
|
||||
elif test("://") then
|
||||
"url://somewhere"
|
||||
elif test("/") then
|
||||
"some/path"
|
||||
else
|
||||
"xxx"
|
||||
end
|
||||
elif type == "number" then
|
||||
if 946702800 <= . and . <= 1893474000 then
|
||||
# Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
|
||||
((((. % 31557600) + 1704067200) / 5000 | floor) * 5000)
|
||||
else
|
||||
69
|
||||
end
|
||||
elif type == "array" then
|
||||
# Keep only 2 elements, but scrub *those* elements
|
||||
if length > 1 then
|
||||
[ (.[0] | scrub), (.[1] | scrub) ]
|
||||
elif length > 0 then
|
||||
[ (.[0] | scrub) ]
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
def scrub_key:
|
||||
if test("^[0-9]+$") then
|
||||
("1" * length)
|
||||
else
|
||||
.
|
||||
end;
|
||||
|
||||
def scrub_primitive:
|
||||
if type == "string" then
|
||||
if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then
|
||||
# IPv4
|
||||
"1.1.1.1"
|
||||
elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then
|
||||
# IPv6
|
||||
"2000:0000:0000:0000:0000:0000:0000:0000"
|
||||
elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then
|
||||
# Email-like
|
||||
"not_a_real_email@example.com"
|
||||
elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then
|
||||
# Leave these alone, you will have to manually go through these later and replace with
|
||||
# placeholders
|
||||
# TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise
|
||||
# you need to manually grep for MANUAL REPAIR NEEDED for now
|
||||
("MANUAL REPAIR NEEDED: \(.)" | stderr) | .
|
||||
elif test("://") then
|
||||
"url://somewhere"
|
||||
elif test("/") then
|
||||
"some/path"
|
||||
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+\\-][0-9]{2}:[0-9]{2}$") then
|
||||
# iso date time without millis with timezone
|
||||
"2020-04-13T10:09:08+00:00"
|
||||
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?[+\\-][0-9]{2}:[0-9]{2}$") then
|
||||
# iso date time with millis with timezone
|
||||
"2020-04-13T10:09:08.000000+00:00"
|
||||
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC") then
|
||||
# Date format from snapchat export
|
||||
"2020-04-13 10:09:08 UTC"
|
||||
elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}") then
|
||||
# Date format from snapchat export
|
||||
"2020-04-13 10:09:08"
|
||||
elif test("^[0-9]+$") then
|
||||
# preserve length of the string
|
||||
"1" * length
|
||||
elif test("^[0-9a-fA-F]+$") then #hexadecimal string
|
||||
# repeat the hex pattern and truncate to original length
|
||||
("a1" * length)[:length]
|
||||
elif . == "" then
|
||||
# prevents empty string from just returning null instead of empty string
|
||||
""
|
||||
else
|
||||
.
|
||||
# Preserve string length for other strings
|
||||
"x" * length
|
||||
end
|
||||
);
|
||||
elif type == "number" then
|
||||
if 946702800 <= . and . <= 1893474000 then
|
||||
# Take modulo 1 year to get variance in the output, then add offset to bring to ~2024
|
||||
((((. % 31557600) + 1704067200) / 5000 | floor) * 5000)
|
||||
elif . == (. | floor) then
|
||||
# Integer - preserve digit count
|
||||
(tostring | length) as $len | ("1" * $len) | tonumber
|
||||
else
|
||||
8.08
|
||||
end
|
||||
elif type == "boolean" then
|
||||
# Replace all booleans with false, this can give sensative info away based
|
||||
# on what the key was in the data
|
||||
false
|
||||
else
|
||||
.
|
||||
end;
|
||||
|
||||
def scrub:
|
||||
if type == "object" then
|
||||
# Apply scrubbing to both keys and values
|
||||
with_entries(.key |= scrub_key | .value |= scrub)
|
||||
elif type == "array" then
|
||||
# Keep only 2 elements, but scrub *those* elements
|
||||
.[:2] | map(scrub)
|
||||
else
|
||||
# Scrub a primitive value
|
||||
scrub_primitive
|
||||
end;
|
||||
|
||||
# Call scrub
|
||||
scrub
|
||||
|
|
@ -27,9 +27,6 @@ assert(targetDir, "Usage: ./scrub.ts <directory>");
|
|||
|
||||
const targetPath = path.resolve(targetDir);
|
||||
|
||||
// const stat = await fs.stat(targetPath);
|
||||
// assert(stat.isDirectory(), "");
|
||||
|
||||
const [notADir] = await ptry($`test -d ${targetPath}`);
|
||||
assert(!notADir, `Error: '${targetPath}' is not a directory`);
|
||||
|
||||
|
|
@ -49,12 +46,16 @@ console.log("filePaths", filePaths);
|
|||
for (const file of filePaths) {
|
||||
console.log(`Processing: ${file}`);
|
||||
const tmpFile = `${file}.tmp`;
|
||||
const piiFile = `${file}.DELETE-THIS-HAS-PII`;
|
||||
|
||||
const [jqErr] = await ptry($`jq -f ${scrubJq} ${file} > ${tmpFile}`);
|
||||
assert(!jqErr, `Error processing ${file}: ${jqErr}`);
|
||||
|
||||
const [mvErr] = await ptry($`mv ${tmpFile} ${file}`);
|
||||
assert(!mvErr, `Error moving ${tmpFile} to ${file}: ${mvErr}`);
|
||||
const [mvErr] = await ptry($`mv ${file} ${piiFile}`);
|
||||
assert(!mvErr, `Error moving ${file} to ${piiFile}: ${mvErr}`);
|
||||
|
||||
const [mv2Err] = await ptry($`mv ${tmpFile} ${file}`);
|
||||
assert(!mv2Err, `Error moving ${tmpFile} to ${file}: ${mv2Err}`);
|
||||
}
|
||||
|
||||
console.log();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue