Compare commits
4 commits
20a0a6b310
...
fa07098549
| Author | SHA1 | Date | |
|---|---|---|---|
| fa07098549 | |||
| 29a7d43851 | |||
| 4a20a02ef5 | |||
| 787e281049 |
10 changed files with 604 additions and 2422 deletions
|
|
@ -168,7 +168,7 @@ function facebook_comments_generic(this: TaskTargetPipelineHelper, prop: string)
|
||||||
return this.cmd(["jq", "-r", `
|
return this.cmd(["jq", "-r", `
|
||||||
["timestamp","data", "title"],
|
["timestamp","data", "title"],
|
||||||
(
|
(
|
||||||
.comments[]?
|
.${prop}[]?
|
||||||
| [(.timestamp | todateiso8601), "TODO", .title]
|
| [(.timestamp | todateiso8601), "TODO", .title]
|
||||||
)
|
)
|
||||||
| @csv
|
| @csv
|
||||||
|
|
@ -269,7 +269,7 @@ function facebook_admin_records_generic(this: TaskTargetPipelineHelper, prop: st
|
||||||
return this.cmd(["jq", "-r", `
|
return this.cmd(["jq", "-r", `
|
||||||
["event","created_timestamp","ip_address","user_agent","datr_cookie"],
|
["event","created_timestamp","ip_address","user_agent","datr_cookie"],
|
||||||
(
|
(
|
||||||
.admin_records[]
|
.${prop}[]
|
||||||
| [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie]
|
| [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie]
|
||||||
)
|
)
|
||||||
| @csv
|
| @csv
|
||||||
|
|
@ -301,10 +301,10 @@ function facebook_authorized_logins_v2(this: TaskTargetPipelineHelper) {
|
||||||
}
|
}
|
||||||
function facebook_contact_verification_generic(this: TaskTargetPipelineHelper, prop: string) {
|
function facebook_contact_verification_generic(this: TaskTargetPipelineHelper, prop: string) {
|
||||||
return this.cmd(["jq", "-r", `
|
return this.cmd(["jq", "-r", `
|
||||||
["action", "timestamp", "site", "ip_address"],
|
["timestamp", "email", "contact_type"],
|
||||||
(
|
(
|
||||||
.${prop}[]
|
.${prop}[]
|
||||||
| [.action, (.timestamp | todateiso8601), .site, .ip_address]
|
| [(.verification_time | todateiso8601), .contact, .contact_type]
|
||||||
)
|
)
|
||||||
| @csv
|
| @csv
|
||||||
`])
|
`])
|
||||||
|
|
@ -399,7 +399,7 @@ function facebook_v2(this: TaskTargetPipelineHelper) {
|
||||||
// No correlary for your_off-facebook_activity.json
|
// No correlary for your_off-facebook_activity.json
|
||||||
p.collect(col).cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`).read().facebook_installed_apps_v2();
|
p.collect(col).cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`).read().facebook_installed_apps_v2();
|
||||||
p.collect(col).cd(`your_facebook_activity/comments_and_reactions/comments.json`).read().facebook_comments_v2();
|
p.collect(col).cd(`your_facebook_activity/comments_and_reactions/comments.json`).read().facebook_comments_v2();
|
||||||
p.collect(col).glob(`your_facebook_activity/messages/**/*.json`) // Files are message_1.json, etc
|
p.collect(col).glob(`your_facebook_activity/messages/*/**/*.json`) // Messages files are in the FOLDERS inside messages (archived_threads, e2ee_cutover, etc...)
|
||||||
.setId(t=>`Facebookv2 - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
|
.setId(t=>`Facebookv2 - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
|
||||||
.read()
|
.read()
|
||||||
.facebook_messages_generic()
|
.facebook_messages_generic()
|
||||||
|
|
@ -438,7 +438,8 @@ function facebook_v2(this: TaskTargetPipelineHelper) {
|
||||||
|
|
||||||
p.collect(col).cd(`your_facebook_activity/facebook_marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v2()
|
p.collect(col).cd(`your_facebook_activity/facebook_marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v2()
|
||||||
|
|
||||||
return Array.from(col);
|
const final = Array.from(col).flat();
|
||||||
|
return TaskTargetPipelineHelper.pipeline(final);
|
||||||
}
|
}
|
||||||
|
|
||||||
function facebook(this: TaskTargetPipelineHelper){
|
function facebook(this: TaskTargetPipelineHelper){
|
||||||
|
|
@ -606,7 +607,7 @@ function facebook(this: TaskTargetPipelineHelper){
|
||||||
p.collect(col).cd(`marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v1()
|
p.collect(col).cd(`marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v1()
|
||||||
|
|
||||||
|
|
||||||
p.collect(col).cd(`messages/**/*.json`) // Files are message_1.json, etc
|
p.collect(col).glob(`messages/**/*.json`) // Files are message_1.json, etc
|
||||||
.setId(t=>`Facebook - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
|
.setId(t=>`Facebook - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
|
||||||
.read()
|
.read()
|
||||||
.facebook_messages_generic()
|
.facebook_messages_generic()
|
||||||
|
|
@ -780,6 +781,7 @@ function facebook(this: TaskTargetPipelineHelper){
|
||||||
// `${facebookRoot}/your_places` - no data in my export
|
// `${facebookRoot}/your_places` - no data in my export
|
||||||
// `${facebookRoot}/your_topics` - no data in my export
|
// `${facebookRoot}/your_topics` - no data in my export
|
||||||
|
|
||||||
return Array.from(col);
|
const final = Array.from(col).flat();
|
||||||
|
return TaskTargetPipelineHelper.pipeline(final);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
86
data-export/parallel.ts
Normal file
86
data-export/parallel.ts
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
import { $, type ProcessOutput } from 'zx';
|
||||||
|
import os from 'os';
|
||||||
|
import { type TaskTarget, run } from "./task.ts";
|
||||||
|
|
||||||
|
$.verbose = false;
|
||||||
|
|
||||||
|
type ResultMap = Map<string, ProcessOutput>;
|
||||||
|
|
||||||
|
export async function parallel(
|
||||||
|
targets: TaskTarget[],
|
||||||
|
quiet: boolean = false,
|
||||||
|
maxConcurrency: number = os.cpus().length
|
||||||
|
): Promise<ResultMap> {
|
||||||
|
const results = new Map<string, ProcessOutput>();
|
||||||
|
|
||||||
|
const total = targets.length;
|
||||||
|
let completed = 0;
|
||||||
|
let running = 0;
|
||||||
|
const completionTimes: number[] = [];
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
const inFlight = new Set<Promise<void>>();
|
||||||
|
|
||||||
|
function formatEta(): string {
|
||||||
|
const left = total - completed;
|
||||||
|
const avgSeconds = completionTimes.length > 0
|
||||||
|
? completionTimes.reduce((a, b) => a + b, 0) / completionTimes.length / 1000
|
||||||
|
: 0;
|
||||||
|
const etaSeconds = Math.round(left * avgSeconds);
|
||||||
|
const pct = total > 0 ? Math.round((completed / total) * 100) : 100;
|
||||||
|
const lastDuration = completionTimes.length > 0
|
||||||
|
? (completionTimes[completionTimes.length - 1] / 1000).toFixed(1)
|
||||||
|
: '0.0';
|
||||||
|
|
||||||
|
return `ETA: ${etaSeconds}s Left: ${left} AVG: ${avgSeconds.toFixed(2)}s local:${running}/${completed}/${pct}%/${lastDuration}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function printStatus(): void {
|
||||||
|
if (quiet) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
process.stderr.write(`\r${formatEta()}`.padEnd(80));
|
||||||
|
}
|
||||||
|
|
||||||
|
async function runJob(t: TaskTarget): Promise<void> {
|
||||||
|
running++;
|
||||||
|
printStatus();
|
||||||
|
|
||||||
|
const result = await run(t);
|
||||||
|
completionTimes.push(result.duration);
|
||||||
|
|
||||||
|
results.set(t.id, result);
|
||||||
|
|
||||||
|
running--;
|
||||||
|
completed++;
|
||||||
|
printStatus();
|
||||||
|
}
|
||||||
|
|
||||||
|
const queue = targets.slice();
|
||||||
|
// Process queue with concurrency limit
|
||||||
|
while (queue.length > 0 || inFlight.size > 0) {
|
||||||
|
// Fill up to max concurrency
|
||||||
|
while (queue.length > 0 && inFlight.size < maxConcurrency) {
|
||||||
|
const target = queue.shift()!;
|
||||||
|
const promise = runJob(target).then(() => {
|
||||||
|
inFlight.delete(promise);
|
||||||
|
});
|
||||||
|
inFlight.add(promise);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for at least one to complete if at capacity
|
||||||
|
if (inFlight.size > 0) {
|
||||||
|
await Promise.race(inFlight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final status line
|
||||||
|
process.stderr.write('\n');
|
||||||
|
const totalSeconds = ((Date.now() - startTime) / 1000).toFixed(1);
|
||||||
|
const failed = Array.from(results.values().filter(p => !p.ok));
|
||||||
|
process.stderr.write(
|
||||||
|
`\nCompleted ${total} jobs in ${totalSeconds}s (${failed.length} failed)\n`
|
||||||
|
);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
@ -1,352 +0,0 @@
|
||||||
import nodePath from 'node:path';
|
|
||||||
import fs from 'node:fs';
|
|
||||||
import { strict as assert } from "node:assert";
|
|
||||||
import { execFile as _execFile } from "node:child_process";
|
|
||||||
import { promisify } from "node:util";
|
|
||||||
import { ZipFS } from "./zipFs.ts";
|
|
||||||
import { globSync } from "glob";
|
|
||||||
|
|
||||||
const execFile = promisify(_execFile);
|
|
||||||
|
|
||||||
type FSImpl = {
|
|
||||||
isZip?: boolean;
|
|
||||||
zipPath?: string;
|
|
||||||
init?(): Promise<void>;
|
|
||||||
ready?: boolean;
|
|
||||||
|
|
||||||
statSync: typeof fs["statSync"];
|
|
||||||
existsSync: typeof fs["existsSync"];
|
|
||||||
|
|
||||||
// Required by glob
|
|
||||||
lstatSync: typeof fs["lstatSync"];
|
|
||||||
// Needs to include withFileTypes DirEnt variant
|
|
||||||
readdir: typeof fs["readdir"];
|
|
||||||
readdirSync: typeof fs["readdirSync"];
|
|
||||||
readlinkSync: typeof fs["readlinkSync"];
|
|
||||||
realpathSync: typeof fs["realpathSync"];
|
|
||||||
promises: {
|
|
||||||
lstat: typeof fs.promises["lstat"];
|
|
||||||
// Needs to include withFileTypes DirEnt
|
|
||||||
readdir: typeof fs.promises["readdir"];
|
|
||||||
readlink: typeof fs.promises["readlink"];
|
|
||||||
realpath: typeof fs.promises["realpath"];
|
|
||||||
}
|
|
||||||
};
|
|
||||||
const defaultFSImpl = fs;
|
|
||||||
|
|
||||||
function safe(s: string) {
|
|
||||||
return s.replace(/[^a-zA-Z0-9_]/g, '_');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//TODO: DANGER: I doubt this is safe...
|
|
||||||
function shEscape(s: string) {
|
|
||||||
assert(!s.includes("\n"), "shEscape given new line, caller needs to handle these");
|
|
||||||
if (!s.match(/[ \$\"\'\!]/)) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
// We need to quote this string
|
|
||||||
// Single quoted strings require you to close the single quoted string, then
|
|
||||||
// use the escaped single quote, and then reopen the string... obscene
|
|
||||||
s = s.replace(/'/g, "'\\''");
|
|
||||||
s = `'${s}'`;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
abstract class TaskTargetBase {
|
|
||||||
target: TaskTarget;
|
|
||||||
constructor(target: TaskTarget) {
|
|
||||||
this.target = target;
|
|
||||||
}
|
|
||||||
abstract get type(): "read" | "mid";
|
|
||||||
abstract toShell(): string;
|
|
||||||
}
|
|
||||||
class TaskTargetRead extends TaskTargetBase {
|
|
||||||
get type(){ return "read" as const; }
|
|
||||||
toShell() {
|
|
||||||
if (this.target.fsImpl.isZip) {
|
|
||||||
assert(this.target.fsImpl.zipPath, "Should have a zipPath");
|
|
||||||
// We need to be able to do this
|
|
||||||
return `7z x ${shEscape(this.target.fsImpl.zipPath)} -so ${shEscape(this.target.path)}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO : Implement when reading from a zip file
|
|
||||||
return `cat ${shEscape(this.target.path)}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
class TaskTargetCmd extends TaskTargetBase {
|
|
||||||
get type(){ return "mid" as const; }
|
|
||||||
/**What nodejs spawn() and execFile() take
|
|
||||||
* [cmd, ...args]: string[]
|
|
||||||
*/
|
|
||||||
cmd: string[];
|
|
||||||
static parse(target: TaskTarget, v: string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[])): string[] {
|
|
||||||
if (typeof v === "function") {
|
|
||||||
v = v(target);
|
|
||||||
}
|
|
||||||
if (typeof v === "string") {
|
|
||||||
v = v.split(/\s+/);
|
|
||||||
}
|
|
||||||
return v;
|
|
||||||
}
|
|
||||||
constructor(target: TaskTarget, cmd: string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[])) {
|
|
||||||
super(target);
|
|
||||||
this.cmd = TaskTargetCmd.parse(target, cmd);
|
|
||||||
}
|
|
||||||
toShell() {
|
|
||||||
const out = this.cmd
|
|
||||||
.map(c => {
|
|
||||||
let sh = c.replace(/\n/g, "")
|
|
||||||
return shEscape(sh);
|
|
||||||
});
|
|
||||||
|
|
||||||
return out.join(" ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TaskTarget {
|
|
||||||
path: string;
|
|
||||||
fsImpl: FSImpl = defaultFSImpl;
|
|
||||||
pipeline: TaskTargetBase[];
|
|
||||||
idValue: string | ((t: TaskTarget)=>string) | undefined;
|
|
||||||
postFns: ((t: TaskTarget)=>Promise<void>)[];
|
|
||||||
|
|
||||||
constructor(path: string){
|
|
||||||
this.path = path;
|
|
||||||
this.pipeline = [];
|
|
||||||
this.postFns = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
exists() {
|
|
||||||
return this.fsImpl.existsSync(this.path);
|
|
||||||
}
|
|
||||||
|
|
||||||
_joinPath(path: string) {
|
|
||||||
let finalPath = path;
|
|
||||||
if (!path.startsWith('/')) {
|
|
||||||
finalPath = nodePath.join(this.path, path)
|
|
||||||
}
|
|
||||||
return finalPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
get basename() {
|
|
||||||
return safe(nodePath.basename(this.path));
|
|
||||||
}
|
|
||||||
basenameN(n: number) {
|
|
||||||
return this.path
|
|
||||||
.split("/")
|
|
||||||
.map(s => safe(s))
|
|
||||||
.slice(-n)
|
|
||||||
.join("___");
|
|
||||||
}
|
|
||||||
|
|
||||||
get id() {
|
|
||||||
assert(this.idValue, `TaskTarget for path "${this.path}" must have an id`);
|
|
||||||
if (typeof this.idValue === "function") {
|
|
||||||
return safe(this.idValue(this));
|
|
||||||
}
|
|
||||||
return safe(this.idValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Changes the current directory of the target*/
|
|
||||||
cd(path: string) {
|
|
||||||
this.path = this._joinPath(path);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Get a glob off of the target*/
|
|
||||||
glob(globPath: string) {
|
|
||||||
globPath = this._joinPath(globPath);
|
|
||||||
return globSync(globPath, {
|
|
||||||
cwd: '/DUMMYCWD',
|
|
||||||
fs: this.fsImpl
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
clone() {
|
|
||||||
const t = new TaskTarget(this.path);
|
|
||||||
t.fsImpl = this.fsImpl;
|
|
||||||
t.idValue = typeof this.idValue === "function" ? this.idValue : undefined;
|
|
||||||
t.postFns = t.postFns.slice();
|
|
||||||
//TODO: clone pipeline
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
pushToPipeline(v: TaskTargetBase) {
|
|
||||||
if (v.type === "read") {
|
|
||||||
assert(this.pipeline.length === 0, "A read can only be the first item in a pipeline");
|
|
||||||
}
|
|
||||||
|
|
||||||
this.pipeline.push(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
pushPostFn(fn: ((t: TaskTarget)=>Promise<void>)) {
|
|
||||||
this.postFns.push(fn);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**A very composable object*/
|
|
||||||
export class Task {
|
|
||||||
/**A serial pipeline of Streams*/
|
|
||||||
targets: TaskTarget[];
|
|
||||||
|
|
||||||
/**SHARED list of all tasks for this given tree*/
|
|
||||||
tasks: Task[];
|
|
||||||
|
|
||||||
constructor() {
|
|
||||||
this.tasks = [];
|
|
||||||
this.targets = [new TaskTarget(process.cwd())];
|
|
||||||
}
|
|
||||||
|
|
||||||
cd(path: string) {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
// TODO: opts
|
|
||||||
t.cd(path);
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Globs for all the paths that match under all targets*/
|
|
||||||
glob(globPath: string) {
|
|
||||||
// For every target, concat glob onto it, glob, and then
|
|
||||||
// replace the original set of targets with all the new ones
|
|
||||||
const newTargets: TaskTarget[] = [];
|
|
||||||
for (const t of this.targets) {
|
|
||||||
const matches = t.glob(globPath);
|
|
||||||
for (const m of matches) {
|
|
||||||
const newT = t.clone();
|
|
||||||
newT.path = m;
|
|
||||||
newTargets.push(newT);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.targets = newTargets;
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Opens all targets as zip archives*/
|
|
||||||
async zip() {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
const zfs = new ZipFS(t.path);
|
|
||||||
await zfs.init();
|
|
||||||
t.path = ""; // Each target is now rooted at the base of its respective zip
|
|
||||||
t.fsImpl = zfs.getImpl() as any;
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Returns a copy of ourself*/
|
|
||||||
clone() {
|
|
||||||
const t = new Task();
|
|
||||||
t.targets = this.targets.map(t => t.clone());
|
|
||||||
t.tasks = this.tasks; //SHARED object reference
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Returns a copy of ourself, but adds us to this tree's shared
|
|
||||||
* task list as well*/
|
|
||||||
fork() {
|
|
||||||
const c = this.clone();
|
|
||||||
this.tasks.push(c);
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd(cmd: string | string[] | ((target: TaskTarget)=>string) | ((target: TaskTarget)=>string[])) {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
t.pushToPipeline(new TaskTargetCmd(t, cmd));
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
read() {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
t.pushToPipeline(new TaskTargetRead(t));
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
setId(idValue: string | ((t: TaskTarget)=>string)) {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
t.idValue = idValue;
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
post(fn: any) {
|
|
||||||
for (const t of this.targets) {
|
|
||||||
t.pushPostFn(fn);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
types(
|
|
||||||
types: string[]
|
|
||||||
) {
|
|
||||||
// TODO:
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
csvSink(
|
|
||||||
summarization?: [string, string][]
|
|
||||||
) {
|
|
||||||
// Ingest this csv into the database at the given id
|
|
||||||
// this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
|
|
||||||
// Add a post processing function for these targets that prints out the summarization
|
|
||||||
// stats
|
|
||||||
this.post(async (t: TaskTarget)=>{
|
|
||||||
// We only do the first one so far for the summarization
|
|
||||||
let queryLine: string;
|
|
||||||
let formatFn: (r: any)=>string;
|
|
||||||
const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
|
|
||||||
if (type === "numeric") {
|
|
||||||
queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
|
|
||||||
formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
queryLine = `count(*) as n`;
|
|
||||||
formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cmd = "sqlite-utils";
|
|
||||||
const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
|
|
||||||
const { stdout, stderr } = await execFile(cmd, args);
|
|
||||||
const results = JSON.parse(stdout);
|
|
||||||
const result = results[0]; // should only be one result in the array for this type of query
|
|
||||||
const logLine = formatFn(result);
|
|
||||||
(t as any).log = logLine;
|
|
||||||
});
|
|
||||||
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**Collect all the TaskTargets, make sure everything is init'd and exists
|
|
||||||
* and output the targets for processing*/
|
|
||||||
async getFinalTargets() {
|
|
||||||
const targets: TaskTarget[] = [];
|
|
||||||
for (const task of this.tasks) {
|
|
||||||
for (const t of task.targets) {
|
|
||||||
// Make sure fsImpl is ready
|
|
||||||
if ("ready" in t.fsImpl && !t.fsImpl.ready && t.fsImpl.init) {
|
|
||||||
await t.fsImpl.init();
|
|
||||||
}
|
|
||||||
if (t.pipeline.length <= 0) {
|
|
||||||
continue; // Tasks with empty pipelines are no-ops, remove
|
|
||||||
}
|
|
||||||
if (!t.exists()) {
|
|
||||||
console.warn(`Missing target ${t.path}`);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
targets.push(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return targets;
|
|
||||||
}
|
|
||||||
|
|
||||||
async getTaskTSVShell() {
|
|
||||||
const targets = await this.getFinalTargets();
|
|
||||||
let out: string[] = [];
|
|
||||||
for (const t of targets) {
|
|
||||||
const shell = t.pipeline
|
|
||||||
.map(p => p.toShell())
|
|
||||||
.join(" | ")
|
|
||||||
out.push(`${t.id}\t${shell}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return out.join("\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -3,7 +3,7 @@ import fs from 'node:fs';
|
||||||
import { strict as assert } from "node:assert";
|
import { strict as assert } from "node:assert";
|
||||||
import { ZipFS } from "./zipFs.ts";
|
import { ZipFS } from "./zipFs.ts";
|
||||||
import { globSync } from "glob";
|
import { globSync } from "glob";
|
||||||
import { $ } from "zx";
|
import { $, ProcessPromise, quote } from "zx";
|
||||||
|
|
||||||
type FSImpl = {
|
type FSImpl = {
|
||||||
isZip?: boolean;
|
isZip?: boolean;
|
||||||
|
|
@ -36,20 +36,6 @@ function safe(s: string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//TODO: DANGER: I doubt this is safe...
|
|
||||||
function shEscape(s: string) {
|
|
||||||
assert(!s.includes("\n"), "shEscape given new line, caller needs to handle these");
|
|
||||||
if (!s.match(/[ \$\"\'\!]/)) {
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
// We need to quote this string
|
|
||||||
// Single quoted strings require you to close the single quoted string, then
|
|
||||||
// use the escaped single quote, and then reopen the string... obscene
|
|
||||||
s = s.replace(/'/g, "'\\''");
|
|
||||||
s = `'${s}'`;
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface TaskTargetOp {
|
interface TaskTargetOp {
|
||||||
type: "read" | "mid";
|
type: "read" | "mid";
|
||||||
toShell(target: TaskTarget): string;
|
toShell(target: TaskTarget): string;
|
||||||
|
|
@ -61,11 +47,11 @@ class TaskTargetRead implements TaskTargetOp {
|
||||||
if (target.fsImpl.isZip) {
|
if (target.fsImpl.isZip) {
|
||||||
assert(target.fsImpl.zipPath, "Should have a zipPath");
|
assert(target.fsImpl.zipPath, "Should have a zipPath");
|
||||||
// We need to be able to do this
|
// We need to be able to do this
|
||||||
return `7z x ${shEscape(target.fsImpl.zipPath)} -so ${shEscape(target.path)}`;
|
return `7z x ${quote(target.fsImpl.zipPath)} -so ${quote(target.path)}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO : Implement when reading from a zip file
|
// TODO : Implement when reading from a zip file
|
||||||
return `cat ${shEscape(target.path)}`;
|
return `cat ${quote(target.path)}`;
|
||||||
}
|
}
|
||||||
clone() {
|
clone() {
|
||||||
return new TaskTargetRead();
|
return new TaskTargetRead();
|
||||||
|
|
@ -96,7 +82,7 @@ class TaskTargetCmd implements TaskTargetOp {
|
||||||
const out = parsedCmd
|
const out = parsedCmd
|
||||||
.map(c => {
|
.map(c => {
|
||||||
let sh = c.replace(/\n/g, "")
|
let sh = c.replace(/\n/g, "")
|
||||||
return shEscape(sh);
|
return quote(sh);
|
||||||
});
|
});
|
||||||
|
|
||||||
return out.join(" ");
|
return out.join(" ");
|
||||||
|
|
@ -183,8 +169,8 @@ export class TaskTarget {
|
||||||
const t = new TaskTarget(this.path);
|
const t = new TaskTarget(this.path);
|
||||||
t.fsImpl = this.fsImpl; // holds no state, just needs same impl
|
t.fsImpl = this.fsImpl; // holds no state, just needs same impl
|
||||||
t.idValue = this.idValue;
|
t.idValue = this.idValue;
|
||||||
t.postFns = t.postFns.slice();
|
t.postFns = this.postFns.slice();
|
||||||
t.pipeline = t.pipeline.slice()
|
t.pipeline = this.pipeline.slice()
|
||||||
.map(p => p.clone());
|
.map(p => p.clone());
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
@ -332,6 +318,16 @@ export function getTSVManifest(targets: TaskTarget[]): string {
|
||||||
return out.join("\n");
|
return out.join("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getTaskManifest(targets: TaskTarget[]): [string, string][] {
|
||||||
|
let out: [string, string][] = [];
|
||||||
|
for (const t of targets) {
|
||||||
|
const shell = t.toShell();
|
||||||
|
out.push([t.id, shell] as const);
|
||||||
|
}
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
function collectionSwap(a: TaskTargetPipelineHelper, b: TaskTargetPipelineHelper) {
|
function collectionSwap(a: TaskTargetPipelineHelper, b: TaskTargetPipelineHelper) {
|
||||||
if (!a.__collection) {
|
if (!a.__collection) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -408,25 +404,7 @@ export class TaskTargetPipelineHelper extends Array<TaskTarget> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function parallel(targets: TaskTarget[]) {
|
export async function run(target: TaskTarget): Promise<ProcessPromise> {
|
||||||
const finalTargets = await verify(targets);
|
const command = target.toShell();
|
||||||
const manifestTSV = getTSVManifest(finalTargets);
|
return await $({ nothrow: true })`bash -c ${command}`;
|
||||||
|
|
||||||
try {
|
|
||||||
await $({ input: manifestTSV })`/usr/bin/parallel \
|
|
||||||
--colsep ${'\t'} \
|
|
||||||
--jobs 0 \
|
|
||||||
--linebuffer \
|
|
||||||
--tagstring {1} \
|
|
||||||
--eta \
|
|
||||||
--joblog out.manifest \
|
|
||||||
${'bash -c {2} > OUTTEST/{1}.csv'} \
|
|
||||||
::::- `; // stdin is in manifestTSV
|
|
||||||
}
|
|
||||||
catch(err: any) {
|
|
||||||
// I'm pretty sure status is the amount that failed?
|
|
||||||
if (err?.status >= 30) {
|
|
||||||
throw err;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
@ -5,7 +5,9 @@
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"type": "module",
|
"type": "module",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "node --enable-source-maps --test --experimental-transform-types --no-warnings ./test/util.ts",
|
"test": "node --enable-source-maps --test --experimental-transform-types --no-warnings ./test/task.ts",
|
||||||
|
"test2": "node --enable-source-maps --test --experimental-transform-types --no-warnings ./test/facebook.ts",
|
||||||
|
"test-update-snapshots": "node --enable-source-maps --test --experimental-transform-types --no-warnings --test-update-snapshots ./test/facebook.ts",
|
||||||
"dev": "vite --port 2223",
|
"dev": "vite --port 2223",
|
||||||
"server": "node --experimental-transform-types server/server.ts",
|
"server": "node --experimental-transform-types server/server.ts",
|
||||||
"prototype": "node --import ./util/tsx-loader.js --import ./util/ignore-css-loader.js --experimental-transform-types server/prototype.ts"
|
"prototype": "node --import ./util/tsx-loader.js --import ./util/ignore-css-loader.js --experimental-transform-types server/prototype.ts"
|
||||||
|
|
|
||||||
1910
pnpm-lock.yaml
generated
1910
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load diff
164
test/facebook.ts
164
test/facebook.ts
|
|
@ -1,115 +1,73 @@
|
||||||
import test from "node:test";
|
import test from "node:test";
|
||||||
import fs from "node:fs";
|
import nodePath from "node:path";
|
||||||
import assert from "node:assert";
|
import { strict as assert } from "node:assert";
|
||||||
import { finished } from "node:stream/promises";
|
import { finished } from "node:stream/promises";
|
||||||
import { Readable, Writable } from "node:stream";
|
import { Readable, Writable } from "node:stream";
|
||||||
import { TaskTargetPipelineHelper } from "../data-export/task.ts";
|
import { TaskTargetPipelineHelper, TaskTarget, verify, getTSVManifest, getTaskManifest, run } from "../data-export/task.ts";
|
||||||
|
import { parallel } from "../data-export/parallel.ts";
|
||||||
|
import "../data-export/facebook.ts";
|
||||||
|
|
||||||
test("facebook: Can load the 2021 export", async () => {
|
const THIS_FILE = import.meta.dirname;
|
||||||
// TODO:
|
const FACEBOOK_V1_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01');
|
||||||
// const t = new Task();
|
const FACEBOOK_V1_ZIPPED = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01.zip');
|
||||||
// (await t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
|
const FACEBOOK_V2_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2025-11-29');
|
||||||
// const taskText = await t.getTaskTSVShell();
|
|
||||||
// await fs.writeFile('test.manifest', taskText);
|
|
||||||
// // Run everything with parallel
|
|
||||||
// try {
|
|
||||||
// execFileSync('/usr/bin/parallel', ['--colsep', '\t', '--jobs', '0', '--linebuffer', '--tagstring', '{1}', '--eta', '--joblog', 'out.manifest', 'bash -c {2} > OUTTEST/{1}.csv', '::::', 'test.manifest'], {
|
|
||||||
// stdio: 'inherit'
|
|
||||||
// });
|
|
||||||
// }
|
|
||||||
// catch(err: any) {
|
|
||||||
// // I'm pretty sure status is the amount that failed?
|
|
||||||
// if (err?.status >= 30) {
|
|
||||||
// throw err;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Now take the output and load it all into a single SQLITE file
|
test("facebook: Can load the 2021 export", async (t) => {
|
||||||
// const entries = await fs.readdir('OUTTEST', { withFileTypes: true });
|
const targets = TaskTargetPipelineHelper.pipeline([
|
||||||
// const csvFiles = entries
|
new TaskTarget(FACEBOOK_V1_DIR)
|
||||||
// .filter(e => e.isFile() && e.name.endsWith(".csv"))
|
])
|
||||||
// .map(e => nodePath.join('OUTTEST', e.name));
|
.facebook();
|
||||||
|
|
||||||
|
const finalTargets = await verify(targets);
|
||||||
|
const result = await parallel(finalTargets, true);
|
||||||
|
for (const [id, r] of result.entries()) {
|
||||||
|
assert.ok(!r.stderr, `Task ${id} should have no stderr output`);
|
||||||
|
assert.ok(r.ok, `Task ${id} should be okay`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const allCSV = Array.from(result.entries())
|
||||||
|
.sort() // Keep stable ordering for snapshots
|
||||||
|
.map(([id, r]) => r.stdout);
|
||||||
|
|
||||||
|
t.assert.snapshot(allCSV);
|
||||||
});
|
});
|
||||||
|
test("facebook: Can load the 2021 export zipped", async (t) => {
|
||||||
|
const targets = await TaskTargetPipelineHelper.pipeline([
|
||||||
|
new TaskTarget(FACEBOOK_V1_ZIPPED)
|
||||||
|
])
|
||||||
|
.unzip();
|
||||||
|
const targets2 = targets
|
||||||
|
.facebook();
|
||||||
|
|
||||||
// import fs from 'node:fs/promises';
|
const finalTargets = await verify(targets2);
|
||||||
// import { type SpawnOptions, execFile as _execFile, execFileSync } from "node:child_process";
|
const result = await parallel(finalTargets, true);
|
||||||
// import nodePath from "node:path";
|
for (const [id, r] of result.entries()) {
|
||||||
// import { DatabaseSync } from "node:sqlite";
|
assert.ok(!r.stderr, `Task ${id} should have no stderr output`);
|
||||||
// import { promisify } from "node:util";
|
assert.ok(r.ok, `Task ${id} should be okay`);
|
||||||
// import "../data-export/facebook.ts";
|
}
|
||||||
// import { google } from "../data-export/google.ts";
|
|
||||||
// const execFile = promisify(_execFile);
|
|
||||||
|
|
||||||
// declare module "../data-export/task.ts" {
|
const allCSV = Array.from(result.entries())
|
||||||
// interface Task {
|
.sort() // Keep stable ordering for snapshots
|
||||||
// google: typeof google;
|
.map(([id, r]) => r.stdout);
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
// Object.assign(Task.prototype, {
|
t.assert.snapshot(allCSV);
|
||||||
// google
|
});
|
||||||
// });
|
test("facebook: Can load the 2025 export", async (t) => {
|
||||||
|
const targets = TaskTargetPipelineHelper.pipeline([
|
||||||
|
new TaskTarget(FACEBOOK_V2_DIR)
|
||||||
|
])
|
||||||
|
.facebook_v2();
|
||||||
|
|
||||||
// function loadIntoSqlite(
|
const finalTargets = await verify(targets);
|
||||||
// paths: string[],
|
const result = await parallel(finalTargets, true);
|
||||||
// sqlitePath: string
|
for (const [id, r] of result.entries()) {
|
||||||
// ) {
|
assert.ok(!r.stderr, `Task ${id} should have no stderr output`);
|
||||||
// // Open an in-memory db for speed
|
assert.ok(r.ok, `Task ${id} should be okay`);
|
||||||
// const db = new DatabaseSync(":memory:", { allowExtension: true });
|
}
|
||||||
// db.loadExtension("/home/cobertos/sqlite-files/csv.so")
|
|
||||||
// db.enableLoadExtension(false);
|
|
||||||
// for (const path of paths) {
|
|
||||||
// const table = nodePath.basename(path, ".csv");
|
|
||||||
// console.log(`Loading ${path} → table ${table}`);
|
|
||||||
|
|
||||||
// // const headers = lines[0].split(",");
|
const allCSV = Array.from(result.entries())
|
||||||
// // const columnsSql = headers.map(h => `"${h}" TEXT`).join(", ");
|
.sort() // Keep stable ordering for snapshots
|
||||||
// db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${path}');`);
|
.map(([id, r]) => r.stdout);
|
||||||
// db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`);
|
|
||||||
// db.exec(`DROP TABLE IF EXISTS intermediate;`);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// // Dump it all to the path specified
|
t.assert.snapshot(allCSV);
|
||||||
// db.exec(`VACUUM main INTO '${sqlitePath}'`);
|
});
|
||||||
// db.close();
|
|
||||||
// }
|
|
||||||
|
|
||||||
// async function main() {
|
|
||||||
// const t = new Task();
|
|
||||||
// // t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json")
|
|
||||||
// // .facebook()
|
|
||||||
|
|
||||||
// (await t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
|
|
||||||
|
|
||||||
// // t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001")
|
|
||||||
// // .google()
|
|
||||||
|
|
||||||
|
|
||||||
// // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip");
|
|
||||||
// // await (zipTask.fsImpl as any).init();
|
|
||||||
|
|
||||||
// // zipTask.facebook();
|
|
||||||
// const taskText = await t.getTaskTSVShell();
|
|
||||||
// await fs.writeFile('test.manifest', taskText);
|
|
||||||
// // Run everything with parallel
|
|
||||||
// try {
|
|
||||||
// execFileSync('/usr/bin/parallel', ['--colsep', '\t', '--jobs', '0', '--linebuffer', '--tagstring', '{1}', '--eta', '--joblog', 'out.manifest', 'bash -c {2} > OUTTEST/{1}.csv', '::::', 'test.manifest'], {
|
|
||||||
// stdio: 'inherit'
|
|
||||||
// });
|
|
||||||
// }
|
|
||||||
// catch(err: any) {
|
|
||||||
// // I'm pretty sure status is the amount that failed?
|
|
||||||
// if (err?.status >= 30) {
|
|
||||||
// throw err;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// // Now take the output and load it all into a single SQLITE file
|
|
||||||
// const entries = await fs.readdir('OUTTEST', { withFileTypes: true });
|
|
||||||
// const csvFiles = entries
|
|
||||||
// .filter(e => e.isFile() && e.name.endsWith(".csv"))
|
|
||||||
// .map(e => nodePath.join('OUTTEST', e.name));
|
|
||||||
// await fs.unlink('your.db');
|
|
||||||
// loadIntoSqlite(csvFiles, 'your.db');
|
|
||||||
// }
|
|
||||||
|
|
||||||
// main();
|
|
||||||
|
|
|
||||||
116
test/facebook.ts.snapshot
Normal file
116
test/facebook.ts.snapshot
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
exports[`facebook: Can load the 2021 export 1`] = `
|
||||||
|
[
|
||||||
|
"\\"album\\",\\"uri\\",\\"creation_timestamp\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-03-07T15:23:20Z\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-07-01T07:46:40Z\\"\\n",
|
||||||
|
"[\\n \\"from\\",\\n \\"to\\",\\n \\"timestamp\\",\\n \\"body\\"\\n]\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"status\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\"\\n",
|
||||||
|
"\\"service_name\\",\\"native_app_id\\",\\"username\\",\\"email\\",\\"phone_number\\",\\"name\\"\\n\\"xxx\\",69,\\"xxx\\",\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\"\\n\\"xxx\\",1707005000,\\"xxx\\",\\"not_a_real_email@example.com\\",,\\"xxx\\"\\n",
|
||||||
|
"\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",,,\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\",,,\\n",
|
||||||
|
"\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-12-29T08:13:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-22T01:26:40Z\\",\\"2024-05-11T15:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"\\",\\"\\",\\"\\",\\"xxx\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-08T19:20:00Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-01-17T14:00:00Z\\",\\"TODO\\",\\"xxx\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-10-18T07:03:20Z\\",\\"not_a_real_email@example.com\\",69\\n\\"2024-01-21T22:10:00Z\\",\\"not_a_real_email@example.com\\",69\\n",
|
||||||
|
"\\"name\\"\\n\\"xxx\\"\\n\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-13T13:13:20Z\\"\\n\\"xxx\\",\\"2024-10-31T00:36:40Z\\"\\n",
|
||||||
|
"\\"game\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-11-03T16:06:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n",
|
||||||
|
"\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-04-23T17:56:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"amount\\",\\"currency\\",\\"type\\",\\"status\\",\\"payment_method\\",\\"created_timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-05T21:36:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-15T12:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-12T06:13:20Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"rank\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",69,\\"2024-07-22T19:03:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\",\\"reaction\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-08T16:33:20Z\\"\\n\\"xxx\\",\\"2024-09-24T19:10:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-27T15:13:20Z\\"\\n\\"xxx\\",\\"2024-08-24T00:40:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-06-23T05:20:00Z\\"\\n\\"xxx\\",\\"2024-05-25T08:16:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-04-28T20:10:00Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"subject\\",\\"message\\",\\"timestamp\\"\\n\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-16T06:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"url://somewhere\\",\\"2024-10-16T06:26:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-12-17T08:43:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"id\\",\\"type\\",\\"timestamp\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-11T12:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T19:56:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T11:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-07T21:06:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-02-27T05:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-05-16T03:26:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-10-31T06:10:00Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n"
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`facebook: Can load the 2021 export zipped 1`] = `
|
||||||
|
[
|
||||||
|
"\\"album\\",\\"uri\\",\\"creation_timestamp\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-03-07T15:23:20Z\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-07-01T07:46:40Z\\"\\n",
|
||||||
|
"[\\n \\"from\\",\\n \\"to\\",\\n \\"timestamp\\",\\n \\"body\\"\\n]\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"status\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\"\\n",
|
||||||
|
"\\"service_name\\",\\"native_app_id\\",\\"username\\",\\"email\\",\\"phone_number\\",\\"name\\"\\n\\"xxx\\",69,\\"xxx\\",\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\"\\n\\"xxx\\",1707005000,\\"xxx\\",\\"not_a_real_email@example.com\\",,\\"xxx\\"\\n",
|
||||||
|
"\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",,,\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\",,,\\n",
|
||||||
|
"\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-12-29T08:13:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-22T01:26:40Z\\",\\"2024-05-11T15:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"\\",\\"\\",\\"\\",\\"xxx\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-08T19:20:00Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-01-17T14:00:00Z\\",\\"TODO\\",\\"xxx\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-10-18T07:03:20Z\\",\\"not_a_real_email@example.com\\",69\\n\\"2024-01-21T22:10:00Z\\",\\"not_a_real_email@example.com\\",69\\n",
|
||||||
|
"\\"name\\"\\n\\"xxx\\"\\n\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-13T13:13:20Z\\"\\n\\"xxx\\",\\"2024-10-31T00:36:40Z\\"\\n",
|
||||||
|
"\\"game\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-11-03T16:06:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n",
|
||||||
|
"\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-04-23T17:56:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"amount\\",\\"currency\\",\\"type\\",\\"status\\",\\"payment_method\\",\\"created_timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-05T21:36:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-15T12:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-12T06:13:20Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"rank\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",69,\\"2024-07-22T19:03:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\",\\"reaction\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-08T16:33:20Z\\"\\n\\"xxx\\",\\"2024-09-24T19:10:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-27T15:13:20Z\\"\\n\\"xxx\\",\\"2024-08-24T00:40:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-06-23T05:20:00Z\\"\\n\\"xxx\\",\\"2024-05-25T08:16:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-04-28T20:10:00Z\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"subject\\",\\"message\\",\\"timestamp\\"\\n\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-16T06:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"url://somewhere\\",\\"2024-10-16T06:26:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-12-17T08:43:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"id\\",\\"type\\",\\"timestamp\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-11T12:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T19:56:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T11:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-07T21:06:40Z\\"\\n",
|
||||||
|
"\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-02-27T05:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-05-16T03:26:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-10-31T06:10:00Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n"
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`facebook: Can load the 2025 export 1`] = `
|
||||||
|
[
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"some/path\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n",
|
||||||
|
"\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"<other>\\",\\"1970-01-01T00:00:00Z\\",\\n",
|
||||||
|
"\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-11-22T10:06:40Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-11-21T23:00:00Z\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-13T02:06:40Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-07-12T02:06:40Z\\",\\"TODO\\",\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-01-12T00:40:00Z\\"\\n\\"xxx\\",\\"2024-06-21T17:13:20Z\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-02-07T19:43:20Z\\",\\"not_a_real_email@example.com\\",69\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-10-06T06:10:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-01-22T16:13:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-02T23:00:00Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-09-27T01:20:00Z\\",69,69,\\"xxx\\"\\n",
|
||||||
|
"\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n",
|
||||||
|
"\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-11-20T12:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-11-15T00:20:00Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-21T03:10:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-09-11T20:03:20Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-20T12:50:00Z\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-10T10:43:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n",
|
||||||
|
"\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-11T01:33:20Z\\",,,\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",,,\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-01T14:13:20Z\\"\\n\\"xxx\\",\\"2024-08-12T08:06:40Z\\"\\n",
|
||||||
|
"\\"start\\",\\"end\\"\\n",
|
||||||
|
"\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n,\\"2024-04-04T19:46:40Z\\",\\"2024-11-23T02:46:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\"\\n,\\"2024-04-05T06:53:20Z\\",\\"2024-11-22T10:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\"\\n",
|
||||||
|
"\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-04-01T16:46:40Z\\"\\n\\"xxx\\",\\"2024-09-07T16:03:20Z\\"\\n",
|
||||||
|
"\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-12T17:46:40Z\\"\\n\\"xxx\\",\\"2024-02-12T17:46:40Z\\"\\n",
|
||||||
|
"\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-12-08T09:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-12-28T00:16:40Z\\"\\n"
|
||||||
|
]
|
||||||
|
`;
|
||||||
BIN
test/fixtures/facebook-json-2021-05-01.zip
vendored
Normal file
BIN
test/fixtures/facebook-json-2021-05-01.zip
vendored
Normal file
Binary file not shown.
316
test/task.ts
316
test/task.ts
|
|
@ -1,10 +1,312 @@
|
||||||
import test from "node:test";
|
import test from "node:test";
|
||||||
import fs from "node:fs";
|
import nodePath from "node:path";
|
||||||
import assert from "node:assert";
|
import { strict as assert } from "node:assert/strict";
|
||||||
import { finished } from "node:stream/promises";
|
import {
|
||||||
import { Readable, Writable } from "node:stream";
|
TaskTarget,
|
||||||
import { TaskTargetPipelineHelper } from "../data-export/task.ts";
|
cd,
|
||||||
|
glob as taskGlob,
|
||||||
|
read,
|
||||||
|
cmd,
|
||||||
|
setId,
|
||||||
|
verify,
|
||||||
|
getTSVManifest,
|
||||||
|
TaskTargetPipelineHelper,
|
||||||
|
} from "../data-export/task.ts";
|
||||||
|
|
||||||
test("facebook: Can load the 2021 export", async () => {
|
const THIS_FILE = import.meta.dirname;
|
||||||
|
const FIXTURE_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01');
|
||||||
|
const FIXTURE_FILE = nodePath.join(FIXTURE_DIR, 'friends/friends.json');
|
||||||
|
|
||||||
});
|
// -- TaskTarget ---------------------------------------------------------------
|
||||||
|
|
||||||
|
test("TaskTarget: constructor initializes path, pipeline, postFns", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar");
|
||||||
|
assert.equal(t.path, "/foo/bar");
|
||||||
|
assert.deepEqual(t.pipeline, []);
|
||||||
|
assert.deepEqual(t.postFns, []);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: exists() returns true for a real file", () => {
|
||||||
|
assert.equal(new TaskTarget(FIXTURE_FILE).exists(), true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: exists() returns false for a missing file", () => {
|
||||||
|
assert.equal(new TaskTarget("/nonexistent-file-xyz").exists(), false);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: basename safe-ifies the path basename", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar/some-file.txt");
|
||||||
|
assert.equal(t.basename, "some_file_txt");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: basenameN returns last n path segments joined with ___", () => {
|
||||||
|
const t = new TaskTarget("/a/b/c/d");
|
||||||
|
assert.equal(t.basenameN(2), "c___d");
|
||||||
|
assert.equal(t.basenameN(1), "d");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: id throws when no idValue is set", () => {
|
||||||
|
assert.throws(() => new TaskTarget("/foo").id, /must have an id/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: id with a string value is safe-ified", () => {
|
||||||
|
const t = new TaskTarget("/foo").setId("my-id");
|
||||||
|
assert.equal(t.id, "my_id");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: id with a function value is resolved against the target", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar").setId(tgt => tgt.basename);
|
||||||
|
assert.equal(t.id, "bar");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: cd with an absolute path replaces the path", () => {
|
||||||
|
const t = new TaskTarget("/foo");
|
||||||
|
t.cd("/bar/baz");
|
||||||
|
assert.equal(t.path, "/bar/baz");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: cd with a relative path joins with the current path", () => {
|
||||||
|
const t = new TaskTarget("/foo");
|
||||||
|
t.cd("bar");
|
||||||
|
assert.equal(t.path, "/foo/bar");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: read adds a read op to the pipeline", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar.txt");
|
||||||
|
t.read();
|
||||||
|
assert.equal(t.pipeline.length, 1);
|
||||||
|
assert.equal(t.pipeline[0].type, "read");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: cmd adds a mid op to the pipeline", () => {
|
||||||
|
const t = new TaskTarget("/foo");
|
||||||
|
t.cmd("jq .");
|
||||||
|
assert.equal(t.pipeline.length, 1);
|
||||||
|
assert.equal(t.pipeline[0].type, "mid");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: pushToPipeline throws if read is not the first op", () => {
|
||||||
|
const t = new TaskTarget("/foo");
|
||||||
|
t.cmd("jq .");
|
||||||
|
assert.throws(() => t.read(), /first item/);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: clone produces an independent copy", () => {
|
||||||
|
const t = new TaskTarget("/foo").setId("orig");
|
||||||
|
t.read();
|
||||||
|
const c = t.clone();
|
||||||
|
assert.equal(c.path, "/foo");
|
||||||
|
assert.equal(c.id, "orig");
|
||||||
|
assert.equal(c.pipeline.length, 1);
|
||||||
|
c.path = "/other";
|
||||||
|
assert.equal(t.path, "/foo"); // original unchanged
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTarget: glob returns matching TaskTargets from disk", () => {
|
||||||
|
const t = new TaskTarget(FIXTURE_DIR);
|
||||||
|
const results = t.glob("friends/*.json");
|
||||||
|
assert.ok(results.length > 0);
|
||||||
|
assert.ok(results.every(r => r instanceof TaskTarget));
|
||||||
|
assert.ok(results.every(r => r.path.endsWith(".json")));
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- toShell / shEscape -------------------------------------------------------
|
||||||
|
|
||||||
|
test("toShell: a single read produces a cat command", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar.txt");
|
||||||
|
t.read();
|
||||||
|
assert.equal(t.toShell(), "cat /foo/bar.txt");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("toShell: read piped into cmd", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar.txt");
|
||||||
|
t.read();
|
||||||
|
t.cmd("jq .");
|
||||||
|
assert.equal(t.toShell(), "cat /foo/bar.txt | jq .");
|
||||||
|
});
|
||||||
|
|
||||||
|
for (const c of " $!&".split("")) {
|
||||||
|
test(`toShell: quotes paths that contain ${JSON.stringify(c)}`, () => {
|
||||||
|
const t = new TaskTarget(`/foo/bar${c}baz.txt`);
|
||||||
|
t.read();
|
||||||
|
assert.equal(t.toShell(), `cat $'/foo/bar${c}baz.txt'`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
test(`toShell: quotes and escapes paths that contain '`, () => {
|
||||||
|
const t = new TaskTarget(`/foo/bar'baz.txt`);
|
||||||
|
t.read();
|
||||||
|
assert.equal(t.toShell(), `cat $'/foo/bar\\'baz.txt'`);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("toShell: cmd with array splits tokens", () => {
|
||||||
|
const t = new TaskTarget("/foo");
|
||||||
|
t.cmd(["jq", "."]);
|
||||||
|
assert.equal(t.toShell(), "jq .");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("toShell: cmd with function resolves at shell-generation time", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar.json");
|
||||||
|
t.cmd(tgt => `jq -r .name ${tgt.path}`);
|
||||||
|
assert.equal(t.toShell(), "jq -r .name /foo/bar.json");
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- module-level functions ---------------------------------------------------
|
||||||
|
|
||||||
|
test("cd: clones and changes directory of each target", () => {
|
||||||
|
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
|
||||||
|
const result = cd(targets, "sub");
|
||||||
|
assert.equal(result[0].path, "/a/sub");
|
||||||
|
assert.equal(result[1].path, "/b/sub");
|
||||||
|
assert.equal(targets[0].path, "/a"); // originals unchanged
|
||||||
|
});
|
||||||
|
|
||||||
|
test("read: clones and adds a read op to each target", () => {
|
||||||
|
const targets = [new TaskTarget("/a.txt"), new TaskTarget("/b.txt")];
|
||||||
|
const result = read(targets);
|
||||||
|
assert.equal(result[0].pipeline[0].type, "read");
|
||||||
|
assert.equal(result[1].pipeline[0].type, "read");
|
||||||
|
assert.equal(targets[0].pipeline.length, 0); // originals unchanged
|
||||||
|
});
|
||||||
|
|
||||||
|
test("cmd: clones and appends a cmd op to each target", () => {
|
||||||
|
const targets = [new TaskTarget("/a.txt")];
|
||||||
|
targets[0].read();
|
||||||
|
const result = cmd(targets, "jq .");
|
||||||
|
assert.equal(result[0].pipeline.length, 2);
|
||||||
|
assert.equal(targets[0].pipeline.length, 1); // original unchanged
|
||||||
|
});
|
||||||
|
|
||||||
|
test("setId: clones and sets id on each target", () => {
|
||||||
|
const targets = [new TaskTarget("/a"), new TaskTarget("/b")];
|
||||||
|
const result = setId(targets, "myid");
|
||||||
|
assert.equal(result[0].id, "myid");
|
||||||
|
assert.equal(result[1].id, "myid");
|
||||||
|
assert.throws(() => targets[0].id); // originals have no id
|
||||||
|
});
|
||||||
|
|
||||||
|
test("taskGlob: returns matching targets across all input targets", () => {
|
||||||
|
const targets = [new TaskTarget(FIXTURE_DIR)];
|
||||||
|
const result = taskGlob(targets, "friends/*.json");
|
||||||
|
assert.ok(result.length > 0);
|
||||||
|
assert.ok(result.every(r => r.path.endsWith(".json")));
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- verify -------------------------------------------------------------------
|
||||||
|
|
||||||
|
test("verify: removes targets with an empty pipeline", async () => {
|
||||||
|
const t = new TaskTarget(FIXTURE_FILE);
|
||||||
|
const result = await verify([t]);
|
||||||
|
assert.equal(result.length, 0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("verify: removes targets whose file does not exist", async () => {
|
||||||
|
const t = new TaskTarget("/nonexistent-file-xyz");
|
||||||
|
t.read();
|
||||||
|
const result = await verify([t]);
|
||||||
|
assert.equal(result.length, 0);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("verify: keeps targets that exist and have a pipeline", async () => {
|
||||||
|
const t = new TaskTarget(FIXTURE_FILE);
|
||||||
|
t.read();
|
||||||
|
const result = await verify([t]);
|
||||||
|
assert.equal(result.length, 1);
|
||||||
|
assert.equal(result[0].path, FIXTURE_FILE);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("verify: filters a mixed list to only valid targets", async () => {
|
||||||
|
const good = new TaskTarget(FIXTURE_FILE); good.read();
|
||||||
|
const noPipeline = new TaskTarget(FIXTURE_FILE);
|
||||||
|
const noFile = new TaskTarget("/nonexistent-xyz"); noFile.read();
|
||||||
|
const result = await verify([good, noPipeline, noFile]);
|
||||||
|
assert.equal(result.length, 1);
|
||||||
|
assert.equal(result[0], good);
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- getTSVManifest -----------------------------------------------------------
|
||||||
|
|
||||||
|
test("getTSVManifest: produces id<TAB>shell for a single target", () => {
|
||||||
|
const t = new TaskTarget("/foo/bar.txt");
|
||||||
|
t.setId("myid");
|
||||||
|
t.read();
|
||||||
|
assert.equal(getTSVManifest([t]), "myid\tcat /foo/bar.txt");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("getTSVManifest: joins multiple targets with newlines", () => {
|
||||||
|
const t1 = new TaskTarget("/a.txt"); t1.setId("a"); t1.read();
|
||||||
|
const t2 = new TaskTarget("/b.txt"); t2.setId("b"); t2.read();
|
||||||
|
assert.equal(getTSVManifest([t1, t2]), "a\tcat /a.txt\nb\tcat /b.txt");
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- TaskTargetPipelineHelper -------------------------------------------------
|
||||||
|
|
||||||
|
test("TaskTargetPipelineHelper: pipeline() promotes a plain array", () => {
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a")]);
|
||||||
|
assert.ok(p instanceof TaskTargetPipelineHelper);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTargetPipelineHelper: pipeline() is idempotent", () => {
|
||||||
|
const arr = [new TaskTarget("/a")];
|
||||||
|
const p1 = TaskTargetPipelineHelper.pipeline(arr);
|
||||||
|
const p2 = TaskTargetPipelineHelper.pipeline(p1);
|
||||||
|
assert.equal(p1, p2);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTargetPipelineHelper: cd returns a new helper with paths changed", () => {
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a"), new TaskTarget("/b")]);
|
||||||
|
const p2 = p.cd("sub");
|
||||||
|
assert.ok(p2 instanceof TaskTargetPipelineHelper);
|
||||||
|
assert.equal(p2[0].path, "/a/sub");
|
||||||
|
assert.equal(p2[1].path, "/b/sub");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTargetPipelineHelper: read returns a new helper with read ops added", () => {
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
|
||||||
|
const p2 = p.read();
|
||||||
|
assert.ok(p2 instanceof TaskTargetPipelineHelper);
|
||||||
|
assert.equal(p2[0].pipeline[0].type, "read");
|
||||||
|
});
|
||||||
|
|
||||||
|
test("TaskTargetPipelineHelper: cmd returns a new helper with cmd ops added", () => {
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]);
|
||||||
|
const p2 = p.read().cmd("jq .");
|
||||||
|
assert.equal(p2[0].toShell(), "cat /a.txt | jq .");
|
||||||
|
});
|
||||||
|
|
||||||
|
// -- collect ------------------------------------------------------------------
|
||||||
|
|
||||||
|
test("collect: the final end of a chain is added to the collection set", () => {
|
||||||
|
const collection = new Set<TaskTargetPipelineHelper>();
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
|
||||||
|
p.collect(collection);
|
||||||
|
|
||||||
|
const p2 = p.cd("sub");
|
||||||
|
assert.equal(collection.size, 1);
|
||||||
|
assert.ok(collection.has(p2));
|
||||||
|
});
|
||||||
|
|
||||||
|
test("collect: moving the chain end removes the old element and adds the new one", () => {
|
||||||
|
const collection = new Set<TaskTargetPipelineHelper>();
|
||||||
|
const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]);
|
||||||
|
p.collect(collection);
|
||||||
|
|
||||||
|
const p2 = p.cd("sub");
|
||||||
|
const p3 = p2.read();
|
||||||
|
assert.equal(collection.size, 1);
|
||||||
|
assert.ok(collection.has(p3));
|
||||||
|
assert.ok(!collection.has(p2));
|
||||||
|
});
|
||||||
|
|
||||||
|
test("collect: gathers the ends of multiple independent pipeline branches", () => {
|
||||||
|
const collection = new Set<TaskTargetPipelineHelper>();
|
||||||
|
|
||||||
|
const b1 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]).collect(collection).read();
|
||||||
|
const b2 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/b.txt")]).collect(collection).read();
|
||||||
|
|
||||||
|
assert.equal(collection.size, 2);
|
||||||
|
assert.ok(collection.has(b1));
|
||||||
|
assert.ok(collection.has(b2));
|
||||||
|
|
||||||
|
const allTargets = [...collection].flat();
|
||||||
|
assert.equal(allTargets.length, 2);
|
||||||
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue