From f6d0427a45dfa6fecd68f23b31b8fe7ca8d0e51a Mon Sep 17 00:00:00 2001 From: cobertos Date: Thu, 26 Feb 2026 00:13:39 -0500 Subject: [PATCH 1/4] Converted TaskTargetPipelineHelper to more functional style, added aggregate() functionality to bring together multiple exports (no tests, but works) * made parallel generic (not tied to TaskTarget) * pulled common higher-order/frontend operations into io.ts * split timelinize specific functionality into own file * Tests made to pass and match previous facebook export snapshots _exactly_ --- README.md | 25 + data-export/facebook.ts | 1519 +++++++++++++++++++-------------------- data-export/google.ts | 192 ++--- data-export/io.ts | 68 ++ data-export/parallel.ts | 35 +- data-export/task.ts | 220 +++--- main.ts | 224 ++---- test/facebook.ts | 64 +- test/task.ts | 95 +-- timelinize.ts | 240 +++++++ 10 files changed, 1423 insertions(+), 1259 deletions(-) create mode 100644 README.md create mode 100644 data-export/io.ts create mode 100644 timelinize.ts diff --git a/README.md b/README.md new file mode 100644 index 0000000..88eccb3 --- /dev/null +++ b/README.md @@ -0,0 +1,25 @@ +# base-data-manager + +A Typescript project for parsing through many types of data exports to tabular formats + +** This is heavily WIP, and mostly just a toy for myself ** + +### Installation + +* Install `jq` +* Install sqlite `csv.so` extension (Hardcoded to `/home/cobertos/sqlite-files/` currently) +* Install `node` + `pnpm i` +* See `main.ts` for current example usage + + +### Proposed Architecture + +The architecture runs in 2 steps. + +The first step is unopinionated in it's output format. It's meant to take the source data exactly as-is and output it as csv. All source data should pass through, but will be normalized in csv + +**TODO: It's not completely unopinionated, there is some normalization for names of columns I think we want to apply? Or maybe we apply that later...** + +An optional second step combines everything into a single SQLite database. From here we normalize many different types of data across multiple exports into a single opinionated output. For example, message threads/channels should all have the same table format, or end up in the same table + +**TODO: No idea if the second part should be a part of this project... but it currently is** diff --git a/data-export/facebook.ts b/data-export/facebook.ts index 0a1af6e..d0d2793 100644 --- a/data-export/facebook.ts +++ b/data-export/facebook.ts @@ -1,171 +1,91 @@ -import { TaskTargetPipelineHelper } from "./task.ts"; - -declare module "../data-export/task.ts" { - interface TaskTargetPipelineHelper { - facebook: typeof facebook; - facebook_v2: typeof facebook_v2; - facebook_notifications_generic: typeof facebook_notifications_generic; - facebook_notifications_v1: typeof facebook_notifications_v1; - facebook_notifications_v2: typeof facebook_notifications_v2; - facebook_installed_apps_generic: typeof facebook_installed_apps_generic; - facebook_installed_apps_v1: typeof facebook_installed_apps_v1; - facebook_installed_apps_v2: typeof facebook_installed_apps_v2; - facebook_comments_generic: typeof facebook_comments_generic; - facebook_comments_v1: typeof facebook_comments_v1; - facebook_comments_v2: typeof facebook_comments_v2; - facebook_people_interactions_generic: typeof facebook_people_interactions_generic; - facebook_people_interactions_v1: typeof facebook_people_interactions_v1; - facebook_people_interactions_v2: typeof facebook_people_interactions_v2; - facebook_marketplace_items_sold_generic: typeof facebook_marketplace_items_sold_generic; - facebook_marketplace_items_sold_v1: typeof facebook_marketplace_items_sold_v1; - facebook_marketplace_items_sold_v2: typeof facebook_marketplace_items_sold_v2; - facebook_searches_generic: typeof facebook_searches_generic; - facebook_searches_v1: typeof facebook_searches_v1; - facebook_searches_v2: typeof facebook_searches_v2; - facebook_account_activity_generic: typeof facebook_account_activity_generic; - facebook_account_activity_v1: typeof facebook_account_activity_v1; - facebook_account_activity_v2: typeof facebook_account_activity_v2; - facebook_messages_generic: typeof facebook_messages_generic; - facebook_friends_generic: typeof facebook_friends_generic; - facebook_admin_records_generic: typeof facebook_admin_records_generic; - facebook_admin_records_v1: typeof facebook_admin_records_v1; - facebook_admin_records_v2: typeof facebook_admin_records_v2; - facebook_authorized_logins_generic: typeof facebook_authorized_logins_generic; - facebook_authorized_logins_v1: typeof facebook_authorized_logins_v1; - facebook_authorized_logins_v2: typeof facebook_authorized_logins_v2; - facebook_contact_verification_generic: typeof facebook_contact_verification_generic; - facebook_contact_verification_v1: typeof facebook_contact_verification_v1; - facebook_contact_verification_v2: typeof facebook_contact_verification_v2; - facebook_pages_unfollowed_generic: typeof facebook_pages_unfollowed_generic; - facebook_pages_unfollowed_v1: typeof facebook_pages_unfollowed_v1; - facebook_pages_unfollowed_v2: typeof facebook_pages_unfollowed_v2; - facebook_account_accesses_generic: typeof facebook_account_accesses_generic; - facebook_account_accesses_v1: typeof facebook_account_accesses_v1; - facebook_account_accesses_v2: typeof facebook_account_accesses_v2; - facebook_groups_joined_generic: typeof facebook_groups_joined_generic; - facebook_groups_joined_v1: typeof facebook_groups_joined_v1; - facebook_groups_joined_v2: typeof facebook_groups_joined_v2; - facebook_group_posts_v1: typeof facebook_group_posts_v1; - facebook_group_posts_v2: typeof facebook_group_posts_v2; - } -} - -Object.assign(TaskTargetPipelineHelper.prototype, { - facebook, - facebook_v2, - facebook_notifications_generic, - facebook_notifications_v1, - facebook_notifications_v2, - facebook_installed_apps_generic, - facebook_installed_apps_v1, - facebook_installed_apps_v2, - facebook_comments_generic, - facebook_comments_v1, - facebook_comments_v2, - facebook_people_interactions_generic, - facebook_people_interactions_v1, - facebook_people_interactions_v2, - facebook_marketplace_items_sold_generic, - facebook_marketplace_items_sold_v1, - facebook_marketplace_items_sold_v2, - facebook_searches_generic, - facebook_searches_v1, - facebook_searches_v2, - facebook_account_activity_generic, - facebook_account_activity_v1, - facebook_account_activity_v2, - facebook_admin_records_generic, - facebook_admin_records_v1, - facebook_admin_records_v2, - facebook_authorized_logins_generic, - facebook_authorized_logins_v1, - facebook_authorized_logins_v2, - facebook_contact_verification_generic, - facebook_contact_verification_v1, - facebook_contact_verification_v2, - facebook_account_accesses_generic, - facebook_account_accesses_v1, - facebook_account_accesses_v2, - facebook_pages_unfollowed_generic, - facebook_pages_unfollowed_v1, - facebook_pages_unfollowed_v2, - facebook_groups_joined_generic, - facebook_groups_joined_v1, - facebook_groups_joined_v2, - facebook_messages_generic, - facebook_friends_generic, - facebook_group_posts_v1, - facebook_group_posts_v2, -}); +import { pipe, branch, cmd, assignMeta, aggregate, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; /**Parses about_you/notifications.json in the old format * or logged_information/notifications.json in the new format*/ -function facebook_notifications_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", `["timestamp","unread","href","text"], - ( - .${prop}[] - | [(.timestamp | todateiso8601), .unread, .href, .text] - ) - | @csv`]) - .assignMeta({ +function facebook_notifications_generic(prop: string): PipelineOp { + return pipe( + cmd(["jq", "-r", `["timestamp","unread","href","text"], + ( + .${prop}[] + | [(.timestamp | todateiso8601), .unread, .href, .text] + ) + | @csv`]), + assignMeta({ columnMeta: ["isodatetime", "any", "url", "text"], perRowDescription: 'Notification at {0}: "{3}"', - perRowTags: "'facebook'", - }); + perRowTags: "facebook,initiated_by_third_party", + }) + ); } -function facebook_notifications_v1(this: TaskTargetPipelineHelper) { - return this.facebook_notifications_generic("notifications"); +function facebook_notifications_v1(): PipelineOp { + return facebook_notifications_generic("notifications"); } -function facebook_notifications_v2(this: TaskTargetPipelineHelper) { - return this.facebook_notifications_generic("notifications_v2"); +function facebook_notifications_v2(): PipelineOp { + return facebook_notifications_generic("notifications_v2"); } /**Installed apps*/ -function facebook_installed_apps_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` +function facebook_installed_apps_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` ["name","added_timestamp"], ( .${prop}[] | [.name, (.added_timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ + `]), + assignMeta({ columnMeta: ["text", "isodatetime"], perRowDescription: 'App "{0}" added on {1}', - perRowTags: "'facebook'", - }); + perRowTags: "facebook", + }) + ); } -function facebook_installed_apps_v1(this: TaskTargetPipelineHelper) { - return this.facebook_installed_apps_generic("installed_apps"); +function facebook_installed_apps_v1() { + return facebook_installed_apps_generic("installed_apps"); } -function facebook_installed_apps_v2(this: TaskTargetPipelineHelper) { +function facebook_installed_apps_v2() { // TODO: There's a few more properties in here for v2 - return this.facebook_installed_apps_generic("installed_apps_v2"); + return facebook_installed_apps_generic("installed_apps_v2"); } -function facebook_messages_generic(this: TaskTargetPipelineHelper) { - // This most assuredly does not handle certain things like pictures and such - // There are messages .type and then they have other thing in them? - // there's also is_unsent: false - return this.cmd(["jq", "-r", ` - ["from","to","timestamp","content"], - ( - .messages[] - | [.sender_name, "", ((.timestamp_ms / 1000) | round | todateiso8601), .content] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender", "receiver", "isodatetime", "text"], - perRowDescription: '"{3}" from {0} at {2}', - perRowTags: "'facebook,message'", - }); +function facebook_messages_generic() { + return branchGen(function*(){ + // This most assuredly does not handle certain things like pictures and such + // There are messages .type and then they have other thing in them? + + // We also want to collect another set of data that is the conversation-level + // information + // TODO: Readd + // yield pipe( + // cmd(["jq", "-r", ` + // [.title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))] + // | @csv + // `]), + // aggregate("Facebook - Messages Meta") + // ); + yield pipe( + cmd(["jq", "-r", ` + ["from","to","timestamp","content"], + ( + .messages[] + | [.sender_name, "", ((.timestamp_ms / 1000) | round | todateiso8601), .content] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender", "receiver", "isodatetime", "text"], + perRowDescription: '"{3}" from {0} at {2}', + perRowTags: "facebook,message", + }) + ); + }); } /**Comments*/ -function facebook_comments_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_comments_generic(prop: string) { // TODO: .data is an array that has items, but usually just one // "data": [ // { @@ -178,764 +98,835 @@ function facebook_comments_generic(this: TaskTargetPipelineHelper, prop: string) // } // ], // TODO: there's also attachments (media) - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["timestamp","data", "title"], ( .${prop}[]? | [(.timestamp | todateiso8601), "TODO", .title] ) | @csv - `]) - .assignMeta({ - columnMeta: ["isodatetime", "TODO", "text"], - perRowDescription: 'Comment on "{2}" at {0}', - perRowTags: "'facebook'", - }); + `]), + assignMeta({ + columnMeta: ["isodatetime", "TODO", "text"], + perRowDescription: 'Comment on "{2}" at {0}', + perRowTags: "facebook", + }) + ); } -function facebook_comments_v1(this: TaskTargetPipelineHelper) { - return this.facebook_comments_generic("comments"); +function facebook_comments_v1() { + return facebook_comments_generic("comments"); } -function facebook_comments_v2(this: TaskTargetPipelineHelper) { +function facebook_comments_v2() { // TODO: I don't see any difference between v1 and v2? Perhaps it's in the data? - return this.facebook_comments_generic("comments_v2"); + return facebook_comments_generic("comments_v2"); } -function facebook_friends_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` - ["name", "timestamp"], - ( - .${prop}[] - | [.name, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: '{0} at {1}', - perRowTags: "'facebook'", - }); +function facebook_friends_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` + ["name", "timestamp"], + ( + .${prop}[] + | [.name, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: '{0} at {1}', + perRowTags: "facebook", + }) + ); } -function facebook_people_interactions_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` - ["name", "uri", "timestamp"], - ( - .${prop}[].entries[] - | [.data.name, .data.uri, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "url", "isodatetime"], - perRowDescription: 'Interaction with {0} at {2}', - perRowTags: "'facebook'", - }); +function facebook_people_interactions_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` + ["name", "uri", "timestamp"], + ( + .${prop}[].entries[] + | [.data.name, .data.uri, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "url", "isodatetime"], + perRowDescription: 'Interaction with {0} at {2}', + perRowTags: "facebook", + }) + ); } -function facebook_people_interactions_v1(this: TaskTargetPipelineHelper) { - return this.facebook_people_interactions_generic("people_interactions"); +function facebook_people_interactions_v1() { + return facebook_people_interactions_generic("people_interactions"); } -function facebook_people_interactions_v2(this: TaskTargetPipelineHelper) { - return this.facebook_people_interactions_generic("people_interactions_v2"); +function facebook_people_interactions_v2() { + return facebook_people_interactions_generic("people_interactions_v2"); } -function facebook_marketplace_items_sold_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_marketplace_items_sold_generic(prop: string) { // TODO: Updated_timestamp may not exist so it's removed for now - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title", "price", "seller", "created_timestamp", "latitude", "longitude", "description"], ( .${prop}[] | [.title, .price, .seller, (.created_timestamp | todateiso8601), .location.coordinate.latitude, .location.coordinate.longitude, .description] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "numeric", "sender", "isodatetime", "lat", "lng", "text"], - perRowDescription: 'Sold "{0}" for {1} on {3}', - perRowTags: "'facebook,marketplace'", - }); + `]), + assignMeta({ + columnMeta: ["text", "numeric", "sender", "isodatetime", "lat", "lng", "text"], + perRowDescription: 'Sold "{0}" for {1} on {3}', + perRowTags: "facebook,marketplace", + }) + ); } -function facebook_marketplace_items_sold_v1(this: TaskTargetPipelineHelper) { - return this.facebook_marketplace_items_sold_generic("items_selling"); +function facebook_marketplace_items_sold_v1() { + return facebook_marketplace_items_sold_generic("items_selling"); } -function facebook_marketplace_items_sold_v2(this: TaskTargetPipelineHelper) { - return this.facebook_marketplace_items_sold_generic("items_selling_v2"); +function facebook_marketplace_items_sold_v2() { + return facebook_marketplace_items_sold_generic("items_selling_v2"); } -function facebook_searches_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_searches_generic(prop: string) { // TODO: Data and attachments, both only contain one "text" field inside the // first object of the array... Same data, do they ever differ? - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title","data","timestamp"], ( .${prop}[] | [.title, .data[0].text, (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "text", "isodatetime"], - perRowDescription: 'Searched for "{1}" at {2}', - perRowTags: "'facebook'", - }); + `]), + assignMeta({ + columnMeta: ["text", "text", "isodatetime"], + perRowDescription: 'Searched for "{1}" at {2}', + perRowTags: "facebook,initiated_by_me,content_by_me", + }) + ); } -function facebook_searches_v1(this: TaskTargetPipelineHelper) { - return this.facebook_searches_generic("searches"); +function facebook_searches_v1() { + return facebook_searches_generic("searches"); } -function facebook_searches_v2(this: TaskTargetPipelineHelper) { - return this.facebook_searches_generic("searches_v2"); +function facebook_searches_v2() { + return facebook_searches_generic("searches_v2"); } -function facebook_account_activity_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` +function facebook_account_activity_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` ["action", "ip", "user_agent", "datr_cookie", "city", "region", "country", "site_name","timestamp"], ( .${prop}[] | [.action, .ip_address, .user_agent, .datr_cookie, .city, .region, .country, .site_name, (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "text", "text", "text", "text", "text", "text", "text", "isodatetime"], - perRowDescription: '{0} from {4}, {6} on {8}', - perRowTags: "'facebook,security'", - }); + `]), + assignMeta({ + columnMeta: ["text", "text", "text", "text", "text", "text", "text", "text", "isodatetime"], + perRowDescription: '{0} from {4}, {6} on {8}', + perRowTags: "facebook,security", + }) + ); } -function facebook_account_activity_v1(this: TaskTargetPipelineHelper) { - return this.facebook_account_activity_generic("account_activity"); +function facebook_account_activity_v1() { + return facebook_account_activity_generic("account_activity"); } -function facebook_account_activity_v2(this: TaskTargetPipelineHelper) { - return this.facebook_account_activity_generic("account_activity_v2"); +function facebook_account_activity_v2() { + return facebook_account_activity_generic("account_activity_v2"); } -function facebook_admin_records_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` +function facebook_admin_records_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` ["event","created_timestamp","ip_address","user_agent","datr_cookie"], ( .${prop}[] | [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime", "text", "text", "text"], - perRowDescription: '{0} at {1} from {2}', - perRowTags: "'facebook,security'", - }); + `]), + assignMeta({ + columnMeta: ["text", "isodatetime", "text", "text", "text"], + perRowDescription: '{0} at {1} from {2}', + perRowTags: "facebook,security", + }) + ); } -function facebook_admin_records_v1(this: TaskTargetPipelineHelper) { - return this.facebook_admin_records_generic("admin_records"); +function facebook_admin_records_v1() { + return facebook_admin_records_generic("admin_records"); } -function facebook_admin_records_v2(this: TaskTargetPipelineHelper) { - return this.facebook_admin_records_generic("admin_records_v2"); +function facebook_admin_records_v2() { + return facebook_admin_records_generic("admin_records_v2"); } -function facebook_authorized_logins_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_authorized_logins_generic(prop: string) { // I don't think .location, .app, .session_type are in v1? So I've made them nullable, but I only have // 1 v1 entry to actually compare against... - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["name","created_timestamp","updated_timestamp","ip_address","user_agent","location","app", "session_type", "datr_cookie"], ( .${prop}[] | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address, .user_agent, .location // "", .app // "", .session_type // "", .datr_cookie] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime", "isodatetime", "text", "text", "text", "text", "text", "text"], - perRowDescription: 'Session "{0}" from {5} on {1}', - perRowTags: "'facebook,security'", - }); + `]), + assignMeta({ + columnMeta: ["text", "isodatetime", "isodatetime", "text", "text", "text", "text", "text", "text"], + perRowDescription: 'Session "{0}" from {5} on {1}', + perRowTags: "facebook,security", + }) + ); } -function facebook_authorized_logins_v1(this: TaskTargetPipelineHelper) { - return this.facebook_authorized_logins_generic("recognized_devices"); +function facebook_authorized_logins_v1() { + return facebook_authorized_logins_generic("recognized_devices"); } -function facebook_authorized_logins_v2(this: TaskTargetPipelineHelper) { - return this.facebook_authorized_logins_generic("active_sessions_v2"); +function facebook_authorized_logins_v2() { + return facebook_authorized_logins_generic("active_sessions_v2"); } -function facebook_contact_verification_generic(this: TaskTargetPipelineHelper, prop: string) { - return this.cmd(["jq", "-r", ` +function facebook_contact_verification_generic(prop: string) { + return pipe( + cmd(["jq", "-r", ` ["timestamp", "email", "contact_type"], ( .${prop}[] | [(.verification_time | todateiso8601), .contact, .contact_type] ) | @csv - `]) - .assignMeta({ - columnMeta: ["isodatetime", "text", "text"], - perRowDescription: '{2} verification of {1} at {0}', - perRowTags: "'facebook,security'", - }); + `]), + assignMeta({ + columnMeta: ["isodatetime", "text", "text"], + perRowDescription: '{2} verification of {1} at {0}', + perRowTags: "facebook,security", + }) + ); } -function facebook_contact_verification_v1(this: TaskTargetPipelineHelper) { - return this.facebook_contact_verification_generic("contact_verifications"); +function facebook_contact_verification_v1() { + return facebook_contact_verification_generic("contact_verifications"); } -function facebook_contact_verification_v2(this: TaskTargetPipelineHelper) { - return this.facebook_contact_verification_generic("contact_verifications_v2"); +function facebook_contact_verification_v2() { + return facebook_contact_verification_generic("contact_verifications_v2"); } -function facebook_account_accesses_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_account_accesses_generic(prop: string) { // TODO: there's a updated_timestamp doesn't always exist - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["action", "timestamp", "site", "ip_address"], ( .${prop}[] | [.action, (.timestamp | todateiso8601), .site, .ip_address] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime", "text", "text"], - perRowDescription: '{0} on {2} at {1} from {3}', - perRowTags: "'facebook,security'", - }); + `]), + assignMeta({ + columnMeta: ["text", "isodatetime", "text", "text"], + perRowDescription: '{0} on {2} at {1} from {3}', + perRowTags: "facebook,security", + }) + ); } -function facebook_account_accesses_v1(this: TaskTargetPipelineHelper) { - return this.facebook_account_accesses_generic("account_accesses"); +function facebook_account_accesses_v1() { + return facebook_account_accesses_generic("account_accesses"); } -function facebook_account_accesses_v2(this: TaskTargetPipelineHelper) { - return this.facebook_account_accesses_generic("account_accesses_v2"); +function facebook_account_accesses_v2() { + return facebook_account_accesses_generic("account_accesses_v2"); } -function facebook_pages_unfollowed_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_pages_unfollowed_generic(prop: string) { // TODO: This is missing the .data field, but it only looks like the "name" on the only record I have - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title", "timestamp"], ( .${prop}[] | [.title, (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Unfollowed "{0}" at {1}', - perRowTags: "'facebook'", - }); + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Unfollowed "{0}" at {1}', + perRowTags: "facebook,initiated_by_me", + }) + ); } -function facebook_pages_unfollowed_v1(this: TaskTargetPipelineHelper) { - return this.facebook_pages_unfollowed_generic("pages_unfollowed"); +function facebook_pages_unfollowed_v1() { + return facebook_pages_unfollowed_generic("pages_unfollowed"); } -function facebook_pages_unfollowed_v2(this: TaskTargetPipelineHelper) { - return this.facebook_pages_unfollowed_generic("pages_unfollowed_v2"); +function facebook_pages_unfollowed_v2() { + return facebook_pages_unfollowed_generic("pages_unfollowed_v2"); } -function facebook_groups_joined_generic(this: TaskTargetPipelineHelper, prop: string) { +function facebook_groups_joined_generic(prop: string) { // this has a data property but it is redundant, ONLY IN v2 - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title", "timestamp"], ( .${prop}[] | [.title, (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Joined group "{0}" at {1}', - perRowTags: "'facebook'", - }); + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Joined group "{0}" at {1}', + perRowTags: "facebook,initiated_by_me", + }) + ); } -function facebook_groups_joined_v1(this: TaskTargetPipelineHelper) { - return this.facebook_groups_joined_generic("groups_joined"); +function facebook_groups_joined_v1() { + return facebook_groups_joined_generic("groups_joined"); } -function facebook_groups_joined_v2(this: TaskTargetPipelineHelper) { - return this.facebook_groups_joined_generic("groups_joined_v2"); +function facebook_groups_joined_v2() { + return facebook_groups_joined_generic("groups_joined_v2"); } -function facebook_group_posts_v1(this: TaskTargetPipelineHelper) { +function facebook_group_posts_v1() { // TODO: Attachments metadata, maybe another timestamp in the data field too (but it looks like the same everywhere) - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title", "data", "timestamp"], ( .group_posts.activity_log_data[] | [.title, "TODO", (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "TODO", "isodatetime"], - perRowDescription: 'Group post "{0}" at {2}', - perRowTags: "'facebook'", - }); + `]), + assignMeta({ + columnMeta: ["text", "TODO", "isodatetime"], + perRowDescription: 'Group post "{0}" at {2}', + perRowTags: "facebook", + }) + ); } -function facebook_group_posts_v2(this: TaskTargetPipelineHelper) { +function facebook_group_posts_v2() { // TODO: Still a data and attachments to pull out - return this.cmd(["jq", "-r", ` + return pipe( + cmd(["jq", "-r", ` ["title", "data", "timestamp"], ( .group_posts_v2[] | [.title, "TODO", (.timestamp | todateiso8601)] ) | @csv - `]) - .assignMeta({ - columnMeta: ["text", "TODO", "isodatetime"], - perRowDescription: 'Group post "{0}" at {2}', - perRowTags: "'facebook'", - }); -} - -function facebook_v2(this: TaskTargetPipelineHelper) { - const p = this.assignMeta({ idValue: t=>`Facebookv2 - ${t.basename}` }); // Generic ID for everything in here - const col: Set = new Set(); - - // No correlary to accounts_and_profiles.json - // No correlary for your_off-facebook_activity.json - p.collect(col).cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`).read().facebook_installed_apps_v2(); - p.collect(col).cd(`your_facebook_activity/comments_and_reactions/comments.json`).read().facebook_comments_v2(); - p.collect(col).glob(`your_facebook_activity/messages/*/**/*.json`) // Messages files are in the FOLDERS inside messages (archived_threads, e2ee_cutover, etc...) - .assignMeta({ idValue: t=>`Facebookv2 - Messages ${t.basenameN(2)}` }) // 1, 2, etc is not specific enough, include the convo name - .read() - .facebook_messages_generic() - - p.collect(col).cd(`your_facebook_activity/other_activity/time_spent_on_facebook.json`).read() - .cmd(["jq", "-r", ` - ["start","end"], - ( - .label_values[] - | select(.label == "Intervals") - | .vec[] - | [ - (.dict[0].timestamp_value | todateiso8601), - (.dict[1].timestamp_value | todateiso8601) - ] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["isodatetime", "isodatetime"], - perRowDescription: 'Active from {0} to {1}', - perRowTags: "'facebook'", - }); - p.collect(col).cd(`your_facebook_activity/groups/your_group_membership_activity.json`).read().facebook_groups_joined_v2(); - p.collect(col).cd(`your_facebook_activity/groups/group_posts_and_comments.json`).read().facebook_group_posts_v2(); - p.collect(col).cd(`your_facebook_activity/pages/pages_and_profiles_you've_unfollowed.json`).read().facebook_pages_unfollowed_v2(); - - p.collect(col).cd(`connections/friends/your_friends.json`).read().facebook_friends_generic("friends_v2"); - p.collect(col).cd(`connections/friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests_v2"); - p.collect(col).cd(`connections/friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests_v2"); - - p.collect(col).cd(`logged_information/activity_messages/people_and_friends.json`).read().facebook_people_interactions_v2() - p.collect(col).cd(`logged_information/search/your_search_history.json`).read().facebook_searches_v2() - p.collect(col).cd(`logged_information/notifications/notifications.json`).read().facebook_notifications_v2(); - - p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v2() - p.collect(col).cd(`security_and_login_information/record_details.json`).read().facebook_admin_records_v2() - p.collect(col).cd(`security_and_login_information/where_you're_logged_in.json`).read().facebook_authorized_logins_v2() - p.collect(col).cd(`security_and_login_information/email_address_verifications.json`).read().facebook_contact_verification_v2() - p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v2() - - p.collect(col).cd(`your_facebook_activity/facebook_marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v2() - - const final = Array.from(col).flat(); - return TaskTargetPipelineHelper.pipeline(final); -} - -function facebook(this: TaskTargetPipelineHelper){ - const p = this.assignMeta({ idValue: t=>`Facebook - ${t.basename}` }); // Generic ID for everything in here - const col: Set = new Set(); - - p.collect(col).cd(`about_you/notifications.json`).read().facebook_notifications_v1() - //TODO: .fork().skip('face_recognition.json').reason("Not a table, no idea how to use") - //TODO: .fork().skip('friend_peer_group.json').reason("Not a table, very small file") - //TODO:.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future") - //TODO: .fork().todo('preferences.json').reason("Too complex for now") - //TODO:.fork().todo('visited.json').reason("Too complex for now") - //TODO:.fork().todo('viewed.json').reason("Too complex for now") - - p.collect(col).cd(`accounts_center/accounts_and_profiles.json`).read() - .cmd(["jq", "-r", `["service_name","native_app_id","username","email", "phone_number", "name"], - ( - .linked_accounts[] - | [.service_name, .native_app_id, .username, .email, .phone_number, .name] - ) - | @csv`]) - .assignMeta({ - columnMeta: ["text", "text", "text", "text", "text", "text"], - perRowDescription: '{0} account "{2}"', - perRowTags: "'facebook'", - }); - - - p.collect(col).cd(`ads_and_businesses/your_off-facebook_activity.json`).read() - .cmd(["jq", "-r", ` - ["name","id","type","timestamp"], - ( - .off_facebook_activity[] - | .name as $name - | .events[] - | [$name, .id, .type, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "any", "text", "isodatetime"], - perRowDescription: '{2} event from {0} at {3}', - perRowTags: "'facebook'", - }); - //TODO: .fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json') - - p.collect(col).cd(`apps_and_websites/apps_and_websites.json`).read().facebook_installed_apps_v1() - - // `${facebookRoot}/archive` - no data in my export - // `${facebookRoot}/campus` - no data in my export - - p.collect(col).cd(`comments/comments.json`).read().facebook_comments_v1() - - p.collect(col).glob(`dating/messages/*.json`) // Files are 0.json, 1.json, etc - .assignMeta({ idValue: t=>`Facebook - Dating Messages ${t.basename}` }) // Slightly more specific message - .read() - .cmd(["jq", "-r", ` - ["from","to","timestamp","body"], - .recipient as $to - | ( - .messages[] - | ["Me", $to, (.timestamp | todateiso8601), .body] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender", "receiver", "isodatetime", "text"], - perRowDescription: '"{3}" from {0} to {1} at {2}', - perRowTags: "'facebook,message,dating'", - }); - //todo: your_dating_activity.json, but it only has a few lines and not super useful - //todo: the other dating files are also just, small - - // TODO: events -// rcd(`events`); -// localCollect('event_invitations.json', json, sspawn('jq', [` -// .events_invited[] |= ( -// .start_timestamp |= todateiso8601 | -// .end_timestamp |= todateiso8601 -// ) -// `])); -// localCollect('your_event_responses.json', json, sspawn('jq', [` -// .event_responses.events_joined[] |= ( -// .start_timestamp |= todateiso8601 | -// .end_timestamp |= todateiso8601 -// ) | -// .event_responses.events_declined[] |= ( -// .start_timestamp |= todateiso8601 | -// .end_timestamp |= todateiso8601 -// ) | -// .event_responses.events_interested[] |= ( -// .start_timestamp |= todateiso8601 | -// .end_timestamp |= todateiso8601 -// ) -// `])); - - p.collect(col).cd(`facebook_gaming/instant_games.json`) - .read() - .cmd(["jq", "-r", ` - ["game", "added_timestamp"], - ( - .instant_games_played[] - | [.name, (.added_timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Played "{0}" starting {1}', - perRowTags: "'facebook,gaming'", - }); - - p.collect(col).cd(`following_and_followers/unfollowed_pages.json`).read().facebook_pages_unfollowed_v1() - p.collect(col).cd(`following_and_followers/following.json`) - .read() - .cmd(["jq", "-r", ` - ["name", "timestamp"], - ( - .following[] - | [.name, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["receiver", "isodatetime"], - perRowDescription: 'Followed "{0}" at {1}', - perRowTags: "'facebook'", - }); - p.collect(col).cd(`following_and_followers/followers.json`) - .read() - .cmd(["jq", "-r", ` - ["name"], - ( - .followers[] - | [.name] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender"], - perRowDescription: '{0} follows you', - perRowTags: "'facebook'", - }); - - p.collect(col).cd(`friends/sent_friend_requests.json`).read().facebook_friends_generic("sent_requests") - p.collect(col).cd(`friends/removed_friends.json`).read().facebook_friends_generic("deleted_friends") - p.collect(col).cd(`friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests") - p.collect(col).cd(`friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests") - p.collect(col).cd(`friends/friends.json`).read().facebook_friends_generic("friends") - - p.collect(col).cd(`groups/your_group_membership_activity.json`).read().facebook_groups_joined_v1(); - p.collect(col).cd(`groups/your_posts_and_comments_in_groups.json`).read().facebook_group_posts_v1(); - - // there's also groups.json and events.json but neither has timestamp so they're - // not really useful right now - p.collect(col).cd(`interactions/people.json`).read().facebook_people_interactions_v1() - - // `${facebookRoot}/journalist_registration` - no data in my export - - p.collect(col).cd(`likes_and_reactions/pages.json`) - .read() - .cmd(["jq", "-r", ` - ["name", "timestamp"], - ( - .page_likes[] - | [.name, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Liked page "{0}" at {1}', - perRowTags: "'facebook'", - }); - p.collect(col).cd(`likes_and_reactions/posts_and_comments.json`) - .read() - .cmd(["jq", "-r", ` - ["title", "timestamp", "reaction"], - ( - .reactions[] - | [.name, (.timestamp | todateiso8601), .data[0].reaction.reaction] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime", "text"], - perRowDescription: '{2} on "{0}" at {1}', - perRowTags: "'facebook'", - }); - - // TODO: - // rcd(`location`); - // localCollect('primary_location.json', json); - // localCollect('primary_public_location.json', json); - // localCollect('timezone.json', json); - - p.collect(col).cd(`marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v1() - - - p.collect(col).glob(`messages/**/*.json`) // Files are message_1.json, etc - .assignMeta({ idValue: t=>`Facebook - Messages ${t.basenameN(2)}` }) // 1, 2, etc is not specific enough, include the convo name - .read() - .facebook_messages_generic() - - - // `${facebookRoot}/music_recommendations` - no data - - // rcd(`news`); - // localCollect('your_locations.json', json); - - p.collect(col).cd(`other_activity/pokes.json`) - .read() - .cmd(["jq", "-r", ` - ["from", "to","rank","timestamp"], - ( - .pokes.data[] - | [.poker, .pokee, .rank, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender", "receiver", "numeric", "isodatetime"], - perRowDescription: '{0} poked {1} at {3}', - perRowTags: "'facebook'", - }); - p.collect(col).cd(`other_activity/support_correspondences.json`) - .read() - // TODO: I'm seeing blanks in .from and .to when the replier was Facebook - // themselves. Perhaps it's broken? - // TODO: Attachments - .cmd(["jq", "-r", ` - ["from", "to", "subject", "message", "timestamp"], - ( - .support_correspondence[].messages[] - | [.from, .to, .subject, .message, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender", "receiver", "text", "text", "isodatetime"], - perRowDescription: '"{2}" from {0} to {1} at {4}', - perRowTags: "'facebook'", - }); - - - // `${facebookRoot}/pages` - no data - - p.collect(col).cd(`payment_history/payment_history.json`) - .read() - .cmd(["jq", "-r", ` - ["from", "to","amount","currency", "type","status","payment_method", "created_timestamp"], - ( - .payments.payments[] - | [.sender, .receiver, .amount, .currency, .type, .status, .payment_method, (.created_timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["sender", "receiver", "numeric", "text", "text", "text", "text", "isodatetime"], - perRowDescription: '{2} {3} from {0} to {1} on {7}', - perRowTags: "'facebook,payment'", - }); - - // TODO: There's also photos_and_videos/your_videos.json - // TODO: There's a media_metadata in each of the images too to convert as well as external files - p.collect(col).glob(`photos_and_videos/album/*.json`) - // Could use a better name, currently 0.json, 1.json, etc... - .assignMeta({ idValue: t=>`Facebook - Album ${t.basename}` }) //slightly more speciifc name, it woudl be better if we could use the album name - .read() - .cmd(["jq", "-r", ` - ["album","uri","creation_timestamp"], - ( - .photos[] - | [.title, .uri, (.creation_timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "url", "isodatetime"], - perRowDescription: 'Photo in "{0}" at {2}', - perRowTags: "'facebook,photo'", - }); - - p.collect(col).cd(`posts/your_pinned_posts.json`) - .read() - .cmd(["jq", "-r", ` - ["name","uri","timestamp"], - ( - .pinned_posts[].entries[] - | [.data.name, .data.uri, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "url", "isodatetime"], - perRowDescription: 'Pinned post "{0}" at {2}', - perRowTags: "'facebook'", - }); - // TODO: Glob? I never posted a lot on FB - p.collect(col).cd(`posts/your_posts_1.json`) - .read() - // TODO: Data is an array with objects. .post, .updated_timestamp, separately?? - // TODO: Also attachments - .cmd(["jq", "-r", ` - ["title","data","timestamp"], - ( - .[] - | [.title, "TODO: data", (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ + `]), + assignMeta({ columnMeta: ["text", "TODO", "isodatetime"], - perRowDescription: 'Post "{0}" at {2}', - perRowTags: "'facebook'", - }); + perRowDescription: 'Group post "{0}" at {2}', + perRowTags: "facebook", + }) + ); +} - // `${facebookRoot}/privacy_checkup` - no data +export function facebook_v2() { + return pipe( + // Generic ID for everything in here + assignMeta({ idValue: t=>`Facebookv2 - ${t.basename}` }), + branchGen(function*() { + // No correlary to accounts_and_profiles.json + // No correlary for your_off-facebook_activity.json + yield pipe(cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`), read(), facebook_installed_apps_v2()); + yield pipe(cd(`your_facebook_activity/comments_and_reactions/comments.json`),read(),facebook_comments_v2()); + yield pipe( + glob(`your_facebook_activity/messages/*/**/*.json`), // Messages files are in the FOLDERS inside messages (archived_threads, e2ee_cutover, etc...) + assignMeta({ idValue: t=>`Facebookv2 - Messages ${t.basenameN(2)}` }), // 1, 2, etc is not specific enough, include the convo name + read(), + facebook_messages_generic() + ); - // TODO: Shape is non-tabular, but maybe we should handle it? - // Looks mostly like dupes from other places - // './profile_information.json': undefined, - // The minimum amount of data is just .title and .timestamp - // TODO: HAndle data and attachments - p.collect(col).cd(`profile_information/profile_update_history.json`) - .read() - .cmd(["jq", "-r", ` - ["title","timestamp"], - ( - .profile_updates[] - | [.title, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Profile update "{0}" at {1}', - perRowTags: "'facebook'", - }); + yield pipe( + cd(`your_facebook_activity/other_activity/time_spent_on_facebook.json`), + read(), + cmd(["jq", "-r", ` + ["start","end"], + ( + .label_values[] + | select(.label == "Intervals") + | .vec[] + | [ + (.dict[0].timestamp_value | todateiso8601), + (.dict[1].timestamp_value | todateiso8601) + ] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["isodatetime", "isodatetime"], + perRowDescription: 'Active from {0} to {1}', + perRowTags: "facebook", + }) + ); + yield pipe(cd(`your_facebook_activity/groups/your_group_membership_activity.json`), read(), facebook_groups_joined_v2()); + yield pipe(cd(`your_facebook_activity/groups/group_posts_and_comments.json`), read(), facebook_group_posts_v2()); + yield pipe(cd(`your_facebook_activity/pages/pages_and_profiles_you've_unfollowed.json`), read(), facebook_pages_unfollowed_v2()); - // `${facebookRoot}/rewards` - no data - // `${facebookRoot}/saved_items_and_collections` - no data + yield pipe(cd(`connections/friends/your_friends.json`), read(), facebook_friends_generic("friends_v2")); + yield pipe(cd(`connections/friends/rejected_friend_requests.json`), read(), facebook_friends_generic("rejected_requests_v2")); + yield pipe(cd(`connections/friends/received_friend_requests.json`), read(), facebook_friends_generic("received_requests_v2")); - p.collect(col).cd(`search_history/your_search_history.json`).read().facebook_searches_v1() + yield pipe(cd(`logged_information/activity_messages/people_and_friends.json`), read(), facebook_people_interactions_v2()); + yield pipe(cd(`logged_information/search/your_search_history.json`), read(), facebook_searches_v2()); + yield pipe(cd(`logged_information/notifications/notifications.json`), read(), facebook_notifications_v2()); - p.collect(col).cd(`security_and_login_information/account_status_changes.json`) - .read() - .cmd(["jq", "-r", ` - ["status","timestamp"], - ( - .account_status_changes[] - | [.status, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Account {0} at {1}', - perRowTags: "'facebook,security'", - }); - p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v1() - p.collect(col).cd(`security_and_login_information/administrative_records.json`).read().facebook_admin_records_v1() - p.collect(col).cd(`security_and_login_information/authorized_logins.json`).read().facebook_authorized_logins_v1() - p.collect(col).cd(`security_and_login_information/contact_verifications.json`).read().facebook_contact_verification_v1() - p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v1() - // TODO: datr_cookie_info, looks like a bunch of timestamps - // a.fork().cd(`login_protection_data.json`) - // .read() - // // TODO: updated_timestamp doesn't always exist - // .cmd(["jq", "-r", ` - // ["name", "created_timestamp", "updated_timestamp", "ip_address"], - // ( - // .login_protection_data[] - // | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address] - // ) - // | @csv - // `]) - // TODO: mobile_devices, only a couple entries - // TODO: used_ip_addresses - // TODO: where_you've logged in - // TODO: your_facebook_activity, useless and small + yield pipe(cd(`security_and_login_information/account_activity.json`), read(), facebook_account_activity_v2()); + yield pipe(cd(`security_and_login_information/record_details.json`), read(), facebook_admin_records_v2()); + yield pipe(cd(`security_and_login_information/where_you're_logged_in.json`), read(), facebook_authorized_logins_v2()); + yield pipe(cd(`security_and_login_information/email_address_verifications.json`), read(), facebook_contact_verification_v2()); + yield pipe(cd(`security_and_login_information/logins_and_logouts.json`), read(), facebook_account_accesses_v2()); + + yield pipe(cd(`your_facebook_activity/facebook_marketplace/items_sold.json`), read(), facebook_marketplace_items_sold_v2()); + }) + ); +} + +export function facebook(){ + return pipe( + // Generic ID for everything in here + assignMeta({ idValue: t=>`Facebook - ${t.basename}` }), + branchGen(function*() { + + yield pipe(cd(`about_you/notifications.json`), read(), facebook_notifications_v1()); + //TODO: .fork().skip('face_recognition.json').reason("Not a table, no idea how to use") + //TODO: .fork().skip('friend_peer_group.json').reason("Not a table, very small file") + //TODO:.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future") + //TODO: .fork().todo('preferences.json').reason("Too complex for now") + //TODO:.fork().todo('visited.json').reason("Too complex for now") + //TODO:.fork().todo('viewed.json').reason("Too complex for now") + + yield pipe( + cd(`accounts_center/accounts_and_profiles.json`), + read(), + cmd(["jq", "-r", `["service_name","native_app_id","username","email", "phone_number", "name"], + ( + .linked_accounts[] + | [.service_name, .native_app_id, .username, .email, .phone_number, .name] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "text", "text", "text", "text", "text"], + perRowDescription: '{0} account "{2}"', + perRowTags: "facebook", + }) + ); - // `${facebookRoot}/short_videos` - no data in my export - // `${facebookRoot}/saved_items_and_collections` - no data in my export + yield pipe( + cd(`ads_and_businesses/your_off-facebook_activity.json`), + read(), + cmd(["jq", "-r", ` + ["name","id","type","timestamp"], + ( + .off_facebook_activity[] + | .name as $name + | .events[] + | [$name, .id, .type, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "any", "text", "isodatetime"], + perRowDescription: '{2} event from {0} at {3}', + perRowTags: "facebook", + }) + ); + //TODO: .fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json') - p.collect(col).cd(`stories/story_reactions.json`) - .read() - .cmd(["jq", "-r", ` - ["title", "timestamp"], - ( - .stories_feedback[] - | [.title, (.timestamp | todateiso8601)] - ) - | @csv - `]) - .assignMeta({ - columnMeta: ["text", "isodatetime"], - perRowDescription: 'Story reaction on "{0}" at {1}', - perRowTags: "'facebook'", - }); + yield pipe(cd(`apps_and_websites/apps_and_websites.json`), read(), facebook_installed_apps_v1()); - // `${facebookRoot}/trash` - no data in my export - // `${facebookRoot}/voice_recording_and_transcription` - no data in my export - // `${facebookRoot}/volunteering` - no data in my export - // `${facebookRoot}/voting_location_and_reminders` - only small 1-property things - // `${facebookRoot}/your_places` - no data in my export - // `${facebookRoot}/your_topics` - no data in my export + // `${facebookRoot}/archive` - no data in my export + // `${facebookRoot}/campus` - no data in my export - const final = Array.from(col).flat(); - return TaskTargetPipelineHelper.pipeline(final); + yield pipe(cd(`comments/comments.json`), read(), facebook_comments_v1()); + + yield pipe( + glob(`dating/messages/*.json`), // Files are 0.json, 1.json, etc + assignMeta({ idValue: t=>`Facebook - Dating Messages ${t.basename}` }), // Slightly more specific message + read(), + cmd(["jq", "-r", ` + ["from","to","timestamp","body"], + .recipient as $to + | ( + .messages[] + | ["Me", $to, (.timestamp | todateiso8601), .body] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender", "receiver", "isodatetime", "text"], + perRowDescription: '"{3}" from {0} to {1} at {2}', + perRowTags: "facebook,message,dating,content_by_me", + }) + ); + //todo: your_dating_activity.json, but it only has a few lines and not super useful + //todo: the other dating files are also just, small + + // TODO: events + // rcd(`events`); + // localCollect('event_invitations.json', json, sspawn('jq', [` + // .events_invited[] |= ( + // .start_timestamp |= todateiso8601 | + // .end_timestamp |= todateiso8601 + // ) + // `])); + // localCollect('your_event_responses.json', json, sspawn('jq', [` + // .event_responses.events_joined[] |= ( + // .start_timestamp |= todateiso8601 | + // .end_timestamp |= todateiso8601 + // ) | + // .event_responses.events_declined[] |= ( + // .start_timestamp |= todateiso8601 | + // .end_timestamp |= todateiso8601 + // ) | + // .event_responses.events_interested[] |= ( + // .start_timestamp |= todateiso8601 | + // .end_timestamp |= todateiso8601 + // ) + // `])); + + yield pipe( + cd(`facebook_gaming/instant_games.json`), + read(), + cmd(["jq", "-r", ` + ["game", "added_timestamp"], + ( + .instant_games_played[] + | [.name, (.added_timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Played "{0}" starting {1}', + perRowTags: "facebook,gaming", + }) + ); + + yield pipe(cd(`following_and_followers/unfollowed_pages.json`), read(), facebook_pages_unfollowed_v1()); + yield pipe( + cd(`following_and_followers/following.json`), + read(), + cmd(["jq", "-r", ` + ["name", "timestamp"], + ( + .following[] + | [.name, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["receiver", "isodatetime"], + perRowDescription: 'Followed "{0}" at {1}', + perRowTags: "facebook", + }) + ); + yield pipe( + cd(`following_and_followers/followers.json`), + read(), + cmd(["jq", "-r", ` + ["name"], + ( + .followers[] + | [.name] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender"], + perRowDescription: '{0} follows you', + perRowTags: "facebook", + }) + ); + + yield pipe(cd(`friends/sent_friend_requests.json`), read(), facebook_friends_generic("sent_requests")); + yield pipe(cd(`friends/removed_friends.json`), read(), facebook_friends_generic("deleted_friends")); + yield pipe(cd(`friends/rejected_friend_requests.json`), read(), facebook_friends_generic("rejected_requests")); + yield pipe(cd(`friends/received_friend_requests.json`), read(), facebook_friends_generic("received_requests")); + yield pipe(cd(`friends/friends.json`), read(), facebook_friends_generic("friends")); + + yield pipe(cd(`groups/your_group_membership_activity.json`), read(), facebook_groups_joined_v1()); + yield pipe(cd(`groups/your_posts_and_comments_in_groups.json`), read(), facebook_group_posts_v1()); + + // there's also groups.json and events.json but neither has timestamp so they're + // not really useful right now + yield pipe(cd(`interactions/people.json`), read(), facebook_people_interactions_v1()); + + // `${facebookRoot}/journalist_registration` - no data in my export + + yield pipe( + cd(`likes_and_reactions/pages.json`), + read(), + cmd(["jq", "-r", ` + ["name", "timestamp"], + ( + .page_likes[] + | [.name, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Liked page "{0}" at {1}', + perRowTags: "facebook", + }) + ); + yield pipe( + cd(`likes_and_reactions/posts_and_comments.json`), + read(), + cmd(["jq", "-r", ` + ["title", "timestamp", "reaction"], + ( + .reactions[] + | [.name, (.timestamp | todateiso8601), .data[0].reaction.reaction] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime", "text"], + perRowDescription: '{2} on "{0}" at {1}', + perRowTags: "facebook", + }) + ); + + // TODO: + // rcd(`location`); + // localCollect('primary_location.json', json); + // localCollect('primary_public_location.json', json); + // localCollect('timezone.json', json); + + yield pipe(cd(`marketplace/items_sold.json`), read(), facebook_marketplace_items_sold_v1()); + + + yield pipe( + glob(`messages/**/*.json`), // Files are message_1.json, etc + assignMeta({ idValue: t=>`Facebook - Messages ${t.basenameN(2)}` }), // 1, 2, etc is not specific enough, include the convo name + read(), + facebook_messages_generic() + ); + + // `${facebookRoot}/music_recommendations` - no data + + // rcd(`news`); + // localCollect('your_locations.json', json); + + yield pipe( + cd(`other_activity/pokes.json`), + read(), + cmd(["jq", "-r", ` + ["from", "to","rank","timestamp"], + ( + .pokes.data[] + | [.poker, .pokee, .rank, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender", "receiver", "numeric", "isodatetime"], + perRowDescription: '{0} poked {1} at {3}', + perRowTags: "facebook", + }) + ); + yield pipe( + cd(`other_activity/support_correspondences.json`), + read(), + // TODO: I'm seeing blanks in .from and .to when the replier was Facebook + // themselves. Perhaps it's broken? + // TODO: Attachments + cmd(["jq", "-r", ` + ["from", "to", "subject", "message", "timestamp"], + ( + .support_correspondence[].messages[] + | [.from, .to, .subject, .message, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender", "receiver", "text", "text", "isodatetime"], + perRowDescription: '"{2}" from {0} to {1} at {4}', + perRowTags: "facebook", + }) + ); + + + // `${facebookRoot}/pages` - no data + + yield pipe( + cd(`payment_history/payment_history.json`), + read(), + cmd(["jq", "-r", ` + ["from", "to","amount","currency", "type","status","payment_method", "created_timestamp"], + ( + .payments.payments[] + | [.sender, .receiver, .amount, .currency, .type, .status, .payment_method, (.created_timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["sender", "receiver", "numeric", "text", "text", "text", "text", "isodatetime"], + perRowDescription: '{2} {3} from {0} to {1} on {7}', + perRowTags: "facebook,payment", + }) + ); + + // TODO: There's also photos_and_videos/your_videos.json + // TODO: There's a media_metadata in each of the images too to convert as well as external files + yield pipe( + glob(`photos_and_videos/album/*.json`), + // Could use a better name, currently 0.json, 1.json, etc... + assignMeta({ idValue: t=>`Facebook - Album ${t.basename}` }), //slightly more speciifc name, it woudl be better if we could use the album name + read(), + cmd(["jq", "-r", ` + ["album","uri","creation_timestamp"], + ( + .photos[] + | [.title, .uri, (.creation_timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "url", "isodatetime"], + perRowDescription: 'Photo in "{0}" at {2}', + perRowTags: "facebook,photo", + }) + ); + + yield pipe(cd(`posts/your_pinned_posts.json`), + read(), + cmd(["jq", "-r", ` + ["name","uri","timestamp"], + ( + .pinned_posts[].entries[] + | [.data.name, .data.uri, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "url", "isodatetime"], + perRowDescription: 'Pinned post "{0}" at {2}', + perRowTags: "facebook", + }) + ); + // TODO: Glob? I never posted a lot on FB + yield pipe( + cd(`posts/your_posts_1.json`), + read(), + // TODO: Data is an array with objects. .post, .updated_timestamp, separately?? + // TODO: Also attachments + cmd(["jq", "-r", ` + ["title","data","timestamp"], + ( + .[] + | [.title, "TODO: data", (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "TODO", "isodatetime"], + perRowDescription: 'Post "{0}" at {2}', + perRowTags: "facebook", + }) + ); + + // `${facebookRoot}/privacy_checkup` - no data + + // TODO: Shape is non-tabular, but maybe we should handle it? + // Looks mostly like dupes from other places + // './profile_information.json': undefined, + // The minimum amount of data is just .title and .timestamp + // TODO: HAndle data and attachments + yield pipe( + cd(`profile_information/profile_update_history.json`), + read(), + cmd(["jq", "-r", ` + ["title","timestamp"], + ( + .profile_updates[] + | [.title, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Profile update "{0}" at {1}', + perRowTags: "facebook", + }) + ); + + // `${facebookRoot}/rewards` - no data + // `${facebookRoot}/saved_items_and_collections` - no data + + yield pipe(cd(`search_history/your_search_history.json`), read(), facebook_searches_v1()); + + yield pipe( + cd(`security_and_login_information/account_status_changes.json`), + read(), + cmd(["jq", "-r", ` + ["status","timestamp"], + ( + .account_status_changes[] + | [.status, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Account {0} at {1}', + perRowTags: "facebook,security", + }) + ); + yield pipe(cd(`security_and_login_information/account_activity.json`), read(), facebook_account_activity_v1()); + yield pipe(cd(`security_and_login_information/administrative_records.json`), read(), facebook_admin_records_v1()); + yield pipe(cd(`security_and_login_information/authorized_logins.json`), read(), facebook_authorized_logins_v1()); + yield pipe(cd(`security_and_login_information/contact_verifications.json`), read(), facebook_contact_verification_v1()); + yield pipe(cd(`security_and_login_information/logins_and_logouts.json`), read(), facebook_account_accesses_v1()); + // TODO: datr_cookie_info, looks like a bunch of timestamps + // a.fork().cd(`login_protection_data.json`) + // .read() + // // TODO: updated_timestamp doesn't always exist + // .cmd(["jq", "-r", ` + // ["name", "created_timestamp", "updated_timestamp", "ip_address"], + // ( + // .login_protection_data[] + // | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address] + // ) + // | @csv + // `]) + // TODO: mobile_devices, only a couple entries + // TODO: used_ip_addresses + // TODO: where_you've logged in + // TODO: your_facebook_activity, useless and small + + + // `${facebookRoot}/short_videos` - no data in my export + // `${facebookRoot}/saved_items_and_collections` - no data in my export + + yield pipe( + cd(`stories/story_reactions.json`), + read(), + cmd(["jq", "-r", ` + ["title", "timestamp"], + ( + .stories_feedback[] + | [.title, (.timestamp | todateiso8601)] + ) + | @csv + `]), + assignMeta({ + columnMeta: ["text", "isodatetime"], + perRowDescription: 'Story reaction on "{0}" at {1}', + perRowTags: "facebook", + }) + ); + + // `${facebookRoot}/trash` - no data in my export + // `${facebookRoot}/voice_recording_and_transcription` - no data in my export + // `${facebookRoot}/volunteering` - no data in my export + // `${facebookRoot}/voting_location_and_reminders` - only small 1-property things + // `${facebookRoot}/your_places` - no data in my export + // `${facebookRoot}/your_topics` - no data in my export + }) + ); }; diff --git a/data-export/google.ts b/data-export/google.ts index 2cb4701..a6c788d 100644 --- a/data-export/google.ts +++ b/data-export/google.ts @@ -1,105 +1,115 @@ -import { TaskTargetPipelineHelper } from "./task.ts"; +import { pipe, branch, cmd, assignMeta, aggregate, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; import { htmlSelectorChunkedDuplex } from "./html.ts"; -export function google(this: TaskTargetPipelineHelper){ - const p = this.assignMeta({ idValue: t=>`Google - ${t.basename}` }); // Generic ID for everything in here - const col: Set = new Set(); - - // TODO: There is a root takeout folder +export function google(){ + return pipe( + // Generic ID for everything in here + assignMeta({ idValue: t=>`Google - ${t.basename}` }), + branchGen(function*() { + // TODO: There is a root takeout folder - p.collect(col).cd('Access Log Activity/Activities - A list of Google services accessed by.csv').read() - p.collect(col).cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv').read() + yield pipe(cd('Access Log Activity/Activities - A list of Google services accessed by.csv'), read()) + yield pipe(cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv'), read()) - // Assignments - data was empty - // Business messages - GMB messages, there's some but so far outside of what I want - // TODO: Calendar, exports an .ics + // Assignments - data was empty + // Business messages - GMB messages, there's some but so far outside of what I want + // TODO: Calendar, exports an .ics - // a = t.fork().cd(`Chrome`) - // TODO: Assersses and mode.json - // TODO: Bookmarks.csv - // TODO: Device Information.json - // TODO: Dictionary.csv - // TODO: ... - p.collect(col).cd('Chrome/History.json') - .read() - // TODO: Typed Url", no data - // TODO: "session", complex data - // Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something... - // TODO: time_usec IS WRONG!! Needs to be ms - .cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"], - ( - ."Browser History"[] - | [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)] - ) - | @csv`]) + // a = t.fork().cd(`Chrome`) + // TODO: Assersses and mode.json + // TODO: Bookmarks.csv + // TODO: Device Information.json + // TODO: Dictionary.csv + // TODO: ... + yield pipe( + cd('Chrome/History.json'), + read(), + // TODO: Typed Url", no data + // TODO: "session", complex data + // Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something... + // TODO: time_usec IS WRONG!! Needs to be ms + cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"], + ( + ."Browser History"[] + | [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)] + ) + | @csv + `]) + ); - // TODO: Contactss, exports an .vcf - // TODO: ... + // TODO: Contactss, exports an .vcf + // TODO: ... - // a = t.fork().cd(`Google Pay`) - p.collect(col).cd(`Google Pay/Google transactions`).glob(`transactions_*.csv`) - .read() - // .fork("a").cd(`Money sends and requests`) - // .fork().cd(`Money sends and requests.csv`) - // .read() - // .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"]) - // TODO: One more folder, and it only has a pdf + // a = t.fork().cd(`Google Pay`) + yield pipe( + cd(`Google Pay/Google transactions`), + glob(`transactions_*.csv`), + read(), + // .fork("a").cd(`Money sends and requests`) + // .fork().cd(`Money sends and requests.csv`) + // .read() + // .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"]) + // TODO: One more folder, and it only has a pdf + ); - // TODO: Google Play Movies _ TV - no data - // TODO: ... + // TODO: Google Play Movies _ TV - no data + // TODO: ... - p.collect(col).cd("Location History/Location History.json") - .read() - // TODO: This is missing - // "altitude" : 158, - // "verticalAccuracy" : 68 - // and the activity models. I had no idea google tries to determine if I'm "tilting" - .cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"], - ( - .locations[] - | [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy] - ) - | @csv`]) - // There's also the semantic history but that's an entire nother can of worms - // it seems like + yield pipe( + cd("Location History/Location History.json"), + read(), + // TODO: This is missing + // "altitude" : 158, + // "verticalAccuracy" : 68 + // and the activity models. I had no idea google tries to determine if I'm "tilting" + cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"], + ( + .locations[] + | [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy] + ) + | @csv + `]) + ); + // There's also the semantic history but that's an entire nother can of worms + // it seems like - // TODO: Needs no-headers! - // a = t.fork().cd(`My Activity`) - // a.fork().glob(`**/MyActivity.html`) - // .setId(t=>`Google - ${t.basenameN(2)}`) - // .read() - // .pipe(()=>{ - // // Parses the MyActivity format, chunking it into pieces of HTML text - // // and then parsing out the text - // const dup = htmlSelectorChunkedDuplex( - // (tag, attrs)=>{ - // // TODO: We also probably want to get and parse each - // // ".content-cell.mdl-typography--caption" as well (it - // // has location for websearches and sometimes a details field) - // // but then we have to get ".mdl-grid" and parse it - // return attrs.class?.includes("content-cell") - // && attrs.class?.includes("mdl-typography--body-1") - // && !attrs.class?.includes("mdl-typography--text-right") - // }, - // (chunk)=>{ - // const text = chunk.innerText; - // const split = text.split("\n"); - // const timestamp = split.pop(); // TODO: need to parse this - // const rest = split.join("\n"); - // // TODO: Escape instead of replace - // const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes - // // Return a CSV - // return `"${restSafe}","${timestamp}"\n`; - // } - // ); - // return dup; - // }) + // TODO: Needs no-headers! + // a = t.fork().cd(`My Activity`) + // a.fork().glob(`**/MyActivity.html`) + // .setId(t=>`Google - ${t.basenameN(2)}`) + // .read() + // .pipe(()=>{ + // // Parses the MyActivity format, chunking it into pieces of HTML text + // // and then parsing out the text + // const dup = htmlSelectorChunkedDuplex( + // (tag, attrs)=>{ + // // TODO: We also probably want to get and parse each + // // ".content-cell.mdl-typography--caption" as well (it + // // has location for websearches and sometimes a details field) + // // but then we have to get ".mdl-grid" and parse it + // return attrs.class?.includes("content-cell") + // && attrs.class?.includes("mdl-typography--body-1") + // && !attrs.class?.includes("mdl-typography--text-right") + // }, + // (chunk)=>{ + // const text = chunk.innerText; + // const split = text.split("\n"); + // const timestamp = split.pop(); // TODO: need to parse this + // const rest = split.join("\n"); + // // TODO: Escape instead of replace + // const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes + // // Return a CSV + // return `"${restSafe}","${timestamp}"\n`; + // } + // ); + // return dup; + // }) - // TODO: News - // TODO: Profile - // TODO: Tasks - No data - - return Array.from(col); + // TODO: News + // TODO: Profile + // TODO: Tasks - No data + }) + ); }; diff --git a/data-export/io.ts b/data-export/io.ts new file mode 100644 index 0000000..ad1abfb --- /dev/null +++ b/data-export/io.ts @@ -0,0 +1,68 @@ +import fs from 'node:fs/promises'; +import fsSync from 'node:fs'; +import { DatabaseSync } from "node:sqlite"; +import { type ProcessOutputAggregate, type RunOutput, TaskTarget, runAll } from "./task.ts"; +import { ProcessOutput } from 'zx'; + + +async function loadCSVTable( + db: DatabaseSync, + target: TaskTarget, + result: ProcessOutput | ProcessOutputAggregate +) { + const id = target.id; + const table = id; + const tmpPath = `/tmp/${id}.csv`; + // console.log(`Writing ${tmpPath}`); + const fd = await fs.open(tmpPath, 'w'); + await fs.writeFile(fd, result.stdout, { encoding: 'utf8' }); + await fd.close(); + // console.log(`Loading ${tmpPath} → table ${table}`); + + db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${tmpPath}');`); + db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`); + db.exec(`DROP TABLE IF EXISTS intermediate;`); + return table; +} + +// TODO: This should really have the same name throughout the codebase? +export const runPipeline = runAll; + +/** + * @param db Must be a DatabaseSync with the csv.so extension enabled + */ +export async function loadIntoDb(db: DatabaseSync, runOutput: RunOutput[]) { + // TODO: Metadata table should probably become a target just like CSVs so we can handle the same way as everyhing else + // Create the metadata table + db.exec(`CREATE TABLE base_data_manager_metadata (id TEXT, perRowDescription TEXT, perRowTags TEXT, columnMeta TEXT);`); + for (const {result, target} of runOutput) { + // Load result CSV into a table + const tableName = await loadCSVTable(db, target, result); + // Load the metadata + function literalIfExists(val: undefined | string) { + if (!val) { + return "NULL"; + } + return `'${val}'`; + } + const tableNamePart = `'${tableName}'`; + const columnMetaPart = literalIfExists(target.columnMeta?.join(",")); + const perRowDescriptionPart = literalIfExists(target.perRowDescription); + const perRowTags = literalIfExists(target.perRowDescription); + db.exec(`INSERT INTO base_data_manager_metadata VALUES (${tableNamePart}, ${perRowDescriptionPart}, ${perRowTags}, ${columnMetaPart});`); + } +} +export function getDefaultDB(): DatabaseSync { + const db = new DatabaseSync(":memory:", { allowExtension: true }); + db.loadExtension("/home/cobertos/sqlite-files/csv.so") + db.enableLoadExtension(false); + return db; +} +export async function dumpDBToDisk(db: DatabaseSync, dumpPath: string) { + if (fsSync.existsSync(dumpPath)) { + await fs.unlink(dumpPath); // unlink the old + } + + // Dump it all to the path specified + db.exec(`VACUUM main INTO '${dumpPath}'`); +} diff --git a/data-export/parallel.ts b/data-export/parallel.ts index 20f223a..4998ac1 100644 --- a/data-export/parallel.ts +++ b/data-export/parallel.ts @@ -1,15 +1,18 @@ -import { $, type ProcessOutput } from 'zx'; import os from 'os'; -import { type TaskTarget, run } from "./task.ts"; -$.verbose = false; - -export async function parallel( - targets: TaskTarget[], +/**Generic parallel runner with optional logging + * Runs `targets` with `runFn` up to a maximum of `maxConcurrency` amount at a time + * Shaped in a way that expects generally something that returns zx.ProcessOutput (or + * something with .duration and .ok built-in to the return) + * @param runFn Should NOT throw. Return { ok: false } instead + */ +export async function parallel( + targets: T[], + runFn: (t: T)=>Promise, quiet: boolean = false, maxConcurrency: number = os.cpus().length -): Promise { - const resultMap = new Map(); +): Promise { + const resultMap = new Map(); const total = targets.length; let completed = 0; @@ -40,14 +43,14 @@ export async function parallel( process.stderr.write(`\r${formatEta()}`.padEnd(80)); } - async function runJob(t: TaskTarget): Promise { + async function runJob(t: T): Promise { running++; printStatus(); - const result = await run(t); + const result = await runFn(t); completionTimes.push(result.duration); - resultMap.set(t.id, result); + resultMap.set(t, result); running--; completed++; @@ -76,13 +79,15 @@ export async function parallel( process.stderr.write('\n'); const totalSeconds = ((Date.now() - startTime) / 1000).toFixed(1); const failed = Array.from(resultMap.values().filter(p => !p.ok)); - process.stderr.write( - `\nCompleted ${total} jobs in ${totalSeconds}s (${failed.length} failed)\n` - ); + if (!quiet) { + process.stderr.write( + `\nCompleted ${total} jobs in ${totalSeconds}s (${failed.length} failed)\n` + ); + } const output = targets .map(t => { - const r = resultMap.get(t.id)!; + const r = resultMap.get(t)!; return r; }); diff --git a/data-export/task.ts b/data-export/task.ts index 364eec4..04e23c6 100644 --- a/data-export/task.ts +++ b/data-export/task.ts @@ -3,7 +3,10 @@ import fs from 'node:fs'; import { strict as assert } from "node:assert"; import { ZipFS } from "./zipFs.ts"; import { globSync } from "glob"; -import { $, ProcessPromise, quote } from "zx"; +import { $, ProcessOutput, quote } from "zx"; +import { parallel } from "./parallel.ts"; + +$.verbose = false; type FSImpl = { isZip?: boolean; @@ -38,19 +41,20 @@ function safe(s: string) { interface TaskTargetOp { type: "read" | "mid"; - toShell(target: TaskTarget): string; + toShell(target: TaskTarget): string | undefined; clone(): TaskTargetOp; } class TaskTargetRead implements TaskTargetOp { get type(){ return "read" as const; } toShell(target: TaskTarget) { if (target.fsImpl.isZip) { + // Read the file to stdout from the target inside the zip file + // This relies on the internals of fsImpl a bit to have the path to + // the root zip so we can create a command against it assert(target.fsImpl.zipPath, "Should have a zipPath"); - // We need to be able to do this return `7z x ${quote(target.fsImpl.zipPath)} -so ${quote(target.path)}`; } - // TODO : Implement when reading from a zip file return `cat ${quote(target.path)}`; } clone() { @@ -115,15 +119,6 @@ export const COLUMN_TYPES = { "TODO": {} }; -// // if (type === "numeric") { -// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; -// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; -// // } -// // else { -// // queryLine = `count(*) as n`; -// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; -// // } - /**Column metadata. Just a string into the TYPES*/ type ColumnMeta = (keyof typeof COLUMN_TYPES | undefined); // Make non-optional version of just the metadata values of TaskTarget @@ -136,6 +131,8 @@ export class TaskTarget { fsImpl: FSImpl = defaultFSImpl; /**The pipeline of things to do to the above path to get an stdout of the output*/ pipeline: TaskTargetOp[]; + /**Aggregate to a specific id*/ + aggregateId?: string; // == Metadata, user configurable, no good defaults == /**Id of the TaskTarget @@ -149,12 +146,7 @@ export class TaskTarget { * you might do something like '"{3}" sent from {2} to {1}' * */ perRowDescription?: string; - /**For every output CSV, this defines a SQL expression evaluated per-row that - * returns a comma-separated string of tags to assign to that row. - * Use the items {0}, {1} to template column values, same as perRowDescription. - * Example: A static set of tags: "'me,facebook'" - * Example: Tags derived from a column: "'facebook,' || {2}" - * */ + /**A CSV of tags that is added to every row of the table (TODO: no template functionality currently)*/ perRowTags?: string; /**Metadata about the columns*/ columnMeta?: ColumnMeta[]; @@ -247,6 +239,7 @@ export class TaskTarget { toShell() { const shell = this.pipeline .map(p => p.toShell(this)) + .filter(p => !!p) // remove empty strings and undefined .join(" | ") return shell; } @@ -259,6 +252,11 @@ export class TaskTarget { this.pushToPipeline(new TaskTargetRead()); return this; } + /**Aggregate all the items into the specifically named target id*/ + aggregate(aggregateId: string) { + this.aggregateId = aggregateId; + return this; + } assignMeta(meta: Partial) { Object.assign(this, { ...meta, @@ -269,37 +267,56 @@ export class TaskTarget { } } -export function each(targets: TaskTarget[], fn: (t: TaskTarget)=>void) { - for (const t of targets) { - fn(t); - } +export interface PipelineOp { + (targets: TaskTarget[]): TaskTarget[] | Promise; } -export function map(targets: TaskTarget[], fn: (t: TaskTarget)=>TaskTarget) { - const newTargets = []; - for (const t of targets) { - newTargets.push(fn(t)); - } - return newTargets; + +export function cd(path: string): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.clone().cd(path)); } -export function cd(targets: TaskTarget[], path: string): TaskTarget[] { - return targets.map(t => t.clone().cd(path)); +export function glob(globPath: string): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.glob(globPath)).flat(); } -export function glob(targets: TaskTarget[], globPath: string): TaskTarget[] { - return targets.map(t => t.glob(globPath)).flat(); +export function unzip(): PipelineOp { + return async (targets: TaskTarget[]) => Promise.all(targets.map(t => t.unzip())); } -export async function unzip(targets: TaskTarget[]): Promise { - return Promise.all(targets.map(t => t.unzip())); +export function read(): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.clone().read()) } -export function read(targets: TaskTarget[]): TaskTarget[] { - return targets.map(t => t.clone().read()) +export function cmd(cmd: ValidCmd): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.clone().cmd(cmd)) } -export function cmd(targets: TaskTarget[], cmd: ValidCmd): TaskTarget[] { - return targets.map(t => t.clone().cmd(cmd)) +export function aggregate(aggregateId: string): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.clone().aggregate(aggregateId)) } -export function assignMeta(targets: TaskTarget[], meta: Partial): TaskTarget[] { - return targets.map(t => t.clone().assignMeta(meta)) +export function assignMeta(meta: Partial): PipelineOp { + return (targets: TaskTarget[]) => targets.map(t => t.clone().assignMeta(meta)) } + +export function pipe(...ops: PipelineOp[]): PipelineOp { + return async (targets: TaskTarget[]) => { + for (const op of ops) { + targets = await op(targets); + } + return targets; + }; +} +export function branch(...ops: PipelineOp[]): PipelineOp { + return async (targets: TaskTarget[]) => { + const targetsArrays = await Promise.all(ops.map(op => op(targets))); + return targetsArrays.flat(); + }; +} +export function branchGen(genFn: ()=>Generator): PipelineOp { + const opsToBranch = Array.from(genFn()); + return (targets: TaskTarget[]) => { + return branch(...opsToBranch)(targets); + }; +} + + + /**Verify, anything that fails is skipped and throws an error*/ export async function verify(targets: TaskTarget[]) { const outTargets: TaskTarget[] = []; @@ -319,78 +336,73 @@ export async function verify(targets: TaskTarget[]) { outTargets.push(t); } + return outTargets; } -function collectionSwap(a: TaskTargetPipelineHelper, b: TaskTargetPipelineHelper) { - if (!a.__collection) { - return; - } - - // Remove a, add b - const collection = a.__collection; - delete a.__collection; - collection.delete(a); - b.__collection = collection; - collection.add(b); +export interface ProcessOutputAggregate { + stdout: string; + stderr: string; + exitCodes: (number | null)[]; + duration: number; + ok: boolean; } -export class TaskTargetPipelineHelper extends Array { - __collection?: Set; - - static pipeline(t: TaskTarget[]): TaskTargetPipelineHelper { - if (Object.getPrototypeOf(t) === TaskTargetPipelineHelper.prototype) { - return t as any; // Already done - } - Object.setPrototypeOf(t, TaskTargetPipelineHelper.prototype); - return t as any; +function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, po: ProcessOutput) { + if (!poa) { + return { + stdout: po.stdout, + stderr: po.stderr, + exitCodes: [po.exitCode], + duration: po.duration, + ok: po.ok + }; } - _fn(fn: (t: TaskTarget[])=>TaskTarget[]): TaskTargetPipelineHelper { - const p = TaskTargetPipelineHelper.pipeline(this); - const t = fn(p); - const p2 = TaskTargetPipelineHelper.pipeline(t); - collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection - return p2; - } - async _afn(fn: (t: TaskTarget[])=>Promise): Promise { - const p = TaskTargetPipelineHelper.pipeline(this); - const t = await fn(p); - const p2 = TaskTargetPipelineHelper.pipeline(t); - collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection - return p2; - } - - cd(path: string): TaskTargetPipelineHelper { - return this._fn(t => cd(t, path)); - } - glob(globPath: string): TaskTargetPipelineHelper { - return this._fn(t => glob(t, globPath)); - } - async unzip(): Promise { - return this._afn(unzip); - } - read(): TaskTargetPipelineHelper { - return this._fn(read); - } - cmd(_cmd: ValidCmd): TaskTargetPipelineHelper { - return this._fn(t => cmd(t, _cmd)); - } - assignMeta(meta: Partial): TaskTargetPipelineHelper { - return this._fn(t => assignMeta(t, meta)); - } - - /** - * @todo Nested versions of this don't currently work, but they could if we - * turn __collection into an array of collections - */ - collect(_c: Set) { - this.__collection = _c; - return this; - } + // Comes with a builtin "\n" from jq on stdout and stderr, no need to add + // a trailing one + poa.stdout += po.stdout; + poa.stderr += po.stderr; + poa.exitCodes.push(po.exitCode); + poa.duration += po.duration; + poa.ok &&= po.ok; + return poa; } -export async function run(target: TaskTarget): Promise { +export interface RunOutput { + target: TaskTarget, + result: ProcessOutput | ProcessOutputAggregate +} + +export async function run(target: TaskTarget): Promise { const command = target.toShell(); return await $({ nothrow: true })`bash -c ${command}`; +} + +export async function runAll(targets: TaskTarget[]): Promise { + const finalTargets = await verify(targets); + const results = await parallel(finalTargets, run, true); + + const nonAggregateResults: RunOutput[] = []; + const aggregateResultsMap: Record = {}; + + // Aggregate + for (const [idx, r] of results.entries()) { + const t = finalTargets[idx]; + if (!t.aggregateId) { + nonAggregateResults.push({ + target: t, + result: r + }); + continue; + } + const prevResult = aggregateResultsMap[t.aggregateId]?.result; + aggregateResultsMap[t.aggregateId] = { + target: t, + result: combineProcessOutputAggregate(prevResult as (ProcessOutputAggregate | undefined), r) + }; + } + + const aggregateResults: RunOutput[] = Object.values(aggregateResultsMap); + return aggregateResults.concat(nonAggregateResults); } \ No newline at end of file diff --git a/main.ts b/main.ts index 5ab68fb..cc8fc0f 100644 --- a/main.ts +++ b/main.ts @@ -1,192 +1,88 @@ -import fs from 'node:fs/promises'; -import fsSync from 'node:fs'; -import nodePath from "node:path"; -import { DatabaseSync } from "node:sqlite"; import "./data-export/facebook.ts"; import { google } from "./data-export/google.ts"; -import { TaskTargetPipelineHelper, TaskTarget, verify } from "./data-export/task.ts"; -import { parallel } from "./data-export/parallel.ts"; -import { ProcessOutput } from 'zx'; - -declare module "./data-export/task.ts" { - interface TaskTargetPipelineHelper { - google: typeof google; - } -} - -Object.assign(TaskTargetPipelineHelper.prototype, { - google -}); - -async function loadCSVTable( - db: DatabaseSync, - target: TaskTarget, - result: ProcessOutput -) { - const id = target.id; - const table = id; - const tmpPath = `/tmp/${id}.csv`; - console.log(`Writing ${tmpPath}`); - const fd = await fs.open(tmpPath, 'w'); - await fs.writeFile(fd, result.stdout, { encoding: 'utf8' }); - await fd.close(); - console.log(`Loading ${tmpPath} → table ${table}`); - - // const headers = lines[0].split(","); - // const columnsSql = headers.map(h => `"${h}" TEXT`).join(", "); - db.exec(`CREATE VIRTUAL TABLE temp.tmp_${table} USING csv(filename='${tmpPath}');`); - // db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`); - // db.exec(`DROP TABLE IF EXISTS intermediate;`); - return `tmp_${table}`; -} -function getColumnNames(db: DatabaseSync, tableName: string) { - return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[]; -} -function templateToSql(template: string, columns: string[]) { - // Convert '{0}, {1}' to '%s, %s' - const args: string[] = []; - const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => { - args.push(columns[parseInt(index)]); - return '%s'; - }); - return `printf('${sqlTemplate}', ${args.join(', ')})`; -} -function templateToSqlExpr(template: string, columns: string[]) { - // perRowTags is already a SQL expression; just substitute {N} with column names - return template.replace(/\{(\d+)\}/g, (_match, index) => columns[parseInt(index)]); -} +import { facebook, facebook_v2 } from "./data-export/facebook.ts"; +import { TaskTarget } from "./data-export/task.ts"; +import * as DataIO from "./data-export/io.ts"; async function main() { + let time = Date.now(); + function elapsed() { + return `${((Date.now() - time) / 1000).toFixed(2)}s`; + } + const sqlitePath = 'your.db'; - const t = TaskTargetPipelineHelper; - const targets = TaskTargetPipelineHelper.pipeline([ + console.log(`${elapsed()} - Building targets`); + const unbuiltTargets = [ + new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json") // new TaskTarget("/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01"), - new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json"), //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2(); //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001").facebook_v2(); - ]) - .facebook(); + ]; + console.log(`${elapsed()} - Begin solving ${unbuiltTargets.length} input target for possible targets`); + const targets = await facebook()(unbuiltTargets); + console.log(`${elapsed()} - Found ${targets.length} possible targets`); // .facebook_v2(); // .google(); // TODO: Make this less painful in task.ts // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip"); // await (zipTask.fsImpl as any).init(); + console.log(`${elapsed()} - Run all targets`); + const out = await DataIO.runPipeline(targets); + console.log(`${elapsed()} - Final targets exported to CSV. Got ${out.length} targets`); - const finalTargets = await verify(targets); - const results = await parallel(finalTargets, true); + // TODO: Add an option to output everything plainly as CSV in a single directory - if (fsSync.existsSync(sqlitePath)) { - await fs.unlink(sqlitePath); // unlink the old - } - // Open an in-memory db for speed - const db = new DatabaseSync(":memory:", { allowExtension: true }); - db.loadExtension("/home/cobertos/sqlite-files/csv.so") - db.enableLoadExtension(false); + console.log(`${elapsed()} - Building combined database table in :memory:`); + const db = DataIO.getDefaultDB(); + await DataIO.loadIntoDb(db, out); + + const tableCount = db.prepare(`SELECT COUNT(*) as count FROM base_data_manager_metadata`).get()!.count; + console.log(`${elapsed()} - Single database built with ${tableCount} tables`); - // New output table - db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, sender TEXT, receiver TEXT, tags TEXT, lat REAL, lng REAL);`); - for (const [idx, target] of targets.entries()) { - const result = results[idx]; - - if (!target.columnMeta) { - continue; // No column information - } - - const tableName = await loadCSVTable(db, target, result); - const columnNames = getColumnNames(db, tableName); - - // Now find what to insert into each row of the combined - let descriptionPart = `'An entry from the ${tableName} table'`; // Default is just kinda garbo... - if (target.perRowDescription) { - descriptionPart = templateToSql(target.perRowDescription, columnNames); - } - - let timestampPart: string | undefined; - let senderPart = 'NULL'; - let receiverPart = 'NULL'; - let latPart = 'NULL'; - let lngPart = 'NULL'; - for (const [idx, col] of target.columnMeta.entries()) { - const columnName = columnNames[idx]; - if (col === "isodatetime") { - timestampPart = columnName; - } else if (col === "sender") { - senderPart = columnName; - } else if (col === "receiver") { - receiverPart = columnName; - } else if (col === "lat") { - latPart = columnName; - } else if (col === "lng") { - lngPart = columnName; - } - } - if (!timestampPart) { - continue; - } - - let tagsPart = 'NULL'; - if (target.perRowTags) { - tagsPart = templateToSqlExpr(target.perRowTags, columnNames); - } - - // OFFSET + LIMIT to ignore the CSV headers - db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${senderPart}, ${receiverPart}, ${tagsPart}, ${latPart}, ${lngPart} FROM ${tableName} LIMIT -1 OFFSET 1;`); - } - - // Dump it all to the path specified - db.exec(`VACUUM main INTO '${sqlitePath}'`); - - // Now dump it as a CSV - const rows = db.prepare(` - SELECT timestamp || ',' || '"' || replace(description, '"', '""') || '"' as row FROM combined - `) - .all() - .map(r => r.row) - .join('\n'); - db.close(); - - await fs.writeFile('your.csv', rows, { encoding: "utf8" }); + console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`); + DataIO.dumpDBToDisk(db, sqlitePath); + console.log(`${elapsed()} - Database written to disk`); } main(); // TODO: Move this into here - // csvSink( - // summarization?: [string, string][] - // ) { - // // TODO: - // return this; +// csvSink( +// summarization?: [string, string][] +// ) { +// // TODO: +// return this; - // // Ingest this csv into the database at the given id - // // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]); - // // Add a post processing function for these targets that prints out the summarization - // // stats - // // this.post(async (t: TaskTarget)=>{ - // // // We only do the first one so far for the summarization - // // let queryLine: string; - // // let formatFn: (r: any)=>string; - // // const [columnName, type] = summarization?.[0] ?? [undefined, undefined]; - // // if (type === "numeric") { - // // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; - // // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; - // // } - // // else { - // // queryLine = `count(*) as n`; - // // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; - // // } +// // Ingest this csv into the database at the given id +// // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]); +// // Add a post processing function for these targets that prints out the summarization +// // stats +// // this.post(async (t: TaskTarget)=>{ +// // // We only do the first one so far for the summarization +// // let queryLine: string; +// // let formatFn: (r: any)=>string; +// // const [columnName, type] = summarization?.[0] ?? [undefined, undefined]; +// // if (type === "numeric") { +// // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; +// // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; +// // } +// // else { +// // queryLine = `count(*) as n`; +// // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; +// // } - // // const cmd = "sqlite-utils"; - // // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`] - // // const { stdout, stderr } = await execFile(cmd, args); - // // const results = JSON.parse(stdout); - // // const result = results[0]; // should only be one result in the array for this type of query - // // const logLine = formatFn(result); - // // (t as any).log = logLine; - // // }); +// // const cmd = "sqlite-utils"; +// // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`] +// // const { stdout, stderr } = await execFile(cmd, args); +// // const results = JSON.parse(stdout); +// // const result = results[0]; // should only be one result in the array for this type of query +// // const logLine = formatFn(result); +// // (t as any).log = logLine; +// // }); - // // return this; - // } \ No newline at end of file +// // return this; +// } \ No newline at end of file diff --git a/test/facebook.ts b/test/facebook.ts index 14eb503..15b00c3 100644 --- a/test/facebook.ts +++ b/test/facebook.ts @@ -1,9 +1,10 @@ import test from "node:test"; import nodePath from "node:path"; import { strict as assert } from "node:assert"; -import { TaskTargetPipelineHelper, TaskTarget, verify, run } from "../data-export/task.ts"; +import { TaskTarget, verify, run, unzip, pipe } from "../data-export/task.ts"; import { parallel } from "../data-export/parallel.ts"; -import "../data-export/facebook.ts"; +import { facebook, facebook_v2 } from "../data-export/facebook.ts"; +import * as DataIO from "../data-export/io.ts"; const THIS_FILE = import.meta.dirname; const FACEBOOK_V1_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01'); @@ -11,19 +12,16 @@ const FACEBOOK_V1_ZIPPED = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021 const FACEBOOK_V2_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2025-11-29'); test("facebook: Can load the 2021 export", async (t) => { - const targets = TaskTargetPipelineHelper.pipeline([ + const targets = [ new TaskTarget(FACEBOOK_V1_DIR) - ]) - .facebook(); - - const finalTargets = await verify(targets); - const result = await parallel(finalTargets, true); + ] + const builtTargets = await facebook()(targets); + const out = await DataIO.runPipeline(builtTargets); const idAndCSVs: [string, string][] = []; - for (const [idx, r] of result.entries()) { - const target = finalTargets[idx]; - assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`); - assert.ok(r.ok, `Task ${target.id} should be okay`); - idAndCSVs.push([target.id, r.stdout]); + for (const {target, result} of out) { + assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`); + assert.ok(result.ok, `Task ${target.id} should be okay`); + idAndCSVs.push([target.id, result.stdout]); } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots @@ -32,21 +30,16 @@ test("facebook: Can load the 2021 export", async (t) => { t.assert.snapshot(csvs); }); test("facebook: Can load the 2021 export zipped", async (t) => { - const targets = await TaskTargetPipelineHelper.pipeline([ + const targets = [ new TaskTarget(FACEBOOK_V1_ZIPPED) - ]) - .unzip(); - const targets2 = targets - .facebook(); - - const finalTargets = await verify(targets2); - const result = await parallel(finalTargets, true); + ]; + const builtTargets = await pipe(unzip(), facebook())(targets); + const out = await DataIO.runPipeline(builtTargets); const idAndCSVs: [string, string][] = []; - for (const [idx, r] of result.entries()) { - const target = finalTargets[idx]; - assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`); - assert.ok(r.ok, `Task ${target.id} should be okay`); - idAndCSVs.push([target.id, r.stdout]); + for (const {target, result} of out) { + assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`); + assert.ok(result.ok, `Task ${target.id} should be okay`); + idAndCSVs.push([target.id, result.stdout]); } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots @@ -55,19 +48,16 @@ test("facebook: Can load the 2021 export zipped", async (t) => { t.assert.snapshot(csvs); }); test("facebook: Can load the 2025 export", async (t) => { - const targets = TaskTargetPipelineHelper.pipeline([ + const targets = [ new TaskTarget(FACEBOOK_V2_DIR) - ]) - .facebook_v2(); - - const finalTargets = await verify(targets); - const result = await parallel(finalTargets, true); + ] + const builtTargets = await facebook_v2()(targets); + const out = await DataIO.runPipeline(builtTargets); const idAndCSVs: [string, string][] = []; - for (const [idx, r] of result.entries()) { - const target = finalTargets[idx]; - assert.ok(!r.stderr, `Task ${target.id} should have no stderr output`); - assert.ok(r.ok, `Task ${target.id} should be okay`); - idAndCSVs.push([target.id, r.stdout]); + for (const {target, result} of out) { + assert.ok(!result.stderr, `Task ${target.id} should have no stderr output`); + assert.ok(result.ok, `Task ${target.id} should be okay`); + idAndCSVs.push([target.id, result.stdout]); } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots diff --git a/test/task.ts b/test/task.ts index d333362..93a8f36 100644 --- a/test/task.ts +++ b/test/task.ts @@ -9,7 +9,6 @@ import { cmd, assignMeta, verify, - TaskTargetPipelineHelper, } from "../data-export/task.ts"; const THIS_FILE = import.meta.dirname; @@ -92,7 +91,7 @@ test("TaskTarget: pushToPipeline throws if read is not the first op", () => { test("TaskTarget: clone produces an independent copy", () => { const t = new TaskTarget("/foo").assignMeta({ idValue: "orig", - columnMeta: ["yeag"] + columnMeta: ["any"] }); t.read(); const c = t.clone(); @@ -155,41 +154,41 @@ test("toShell: cmd with function resolves at shell-generation time", () => { // -- module-level functions --------------------------------------------------- -test("cd: clones and changes directory of each target", () => { +test("cd: clones and changes directory of each target", async () => { const targets = [new TaskTarget("/a"), new TaskTarget("/b")]; - const result = cd(targets, "sub"); + const result = await cd("sub")(targets); assert.equal(result[0].path, "/a/sub"); assert.equal(result[1].path, "/b/sub"); assert.equal(targets[0].path, "/a"); // originals unchanged }); -test("read: clones and adds a read op to each target", () => { +test("read: clones and adds a read op to each target", async () => { const targets = [new TaskTarget("/a.txt"), new TaskTarget("/b.txt")]; - const result = read(targets); + const result = await read()(targets); assert.equal(result[0].pipeline[0].type, "read"); assert.equal(result[1].pipeline[0].type, "read"); assert.equal(targets[0].pipeline.length, 0); // originals unchanged }); -test("cmd: clones and appends a cmd op to each target", () => { +test("cmd: clones and appends a cmd op to each target", async () => { const targets = [new TaskTarget("/a.txt")]; targets[0].read(); - const result = cmd(targets, "jq ."); + const result = await cmd("jq .")(targets); assert.equal(result[0].pipeline.length, 2); assert.equal(targets[0].pipeline.length, 1); // original unchanged }); -test("assignMeta: clones and sets meta on each target", () => { +test("assignMeta: clones and sets meta on each target", async () => { const targets = [new TaskTarget("/a"), new TaskTarget("/b")]; - const result = assignMeta(targets, { idValue: "myid" }); + const result = await assignMeta({ idValue: "myid" })(targets); assert.equal(result[0].id, "myid"); assert.equal(result[1].id, "myid"); assert.throws(() => targets[0].id); // originals have no id }); -test("taskGlob: returns matching targets across all input targets", () => { +test("taskGlob: returns matching targets across all input targets", async () => { const targets = [new TaskTarget(FIXTURE_DIR)]; - const result = taskGlob(targets, "friends/*.json"); + const result = await taskGlob("friends/*.json")(targets); assert.ok(result.length > 0); assert.ok(result.every(r => r.path.endsWith(".json"))); }); @@ -226,75 +225,3 @@ test("verify: filters a mixed list to only valid targets", async () => { assert.equal(result[0], good); }); -// -- TaskTargetPipelineHelper ------------------------------------------------- - -test("TaskTargetPipelineHelper: pipeline() promotes a plain array", () => { - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a")]); - assert.ok(p instanceof TaskTargetPipelineHelper); -}); - -test("TaskTargetPipelineHelper: pipeline() is idempotent", () => { - const arr = [new TaskTarget("/a")]; - const p1 = TaskTargetPipelineHelper.pipeline(arr); - const p2 = TaskTargetPipelineHelper.pipeline(p1); - assert.equal(p1, p2); -}); - -test("TaskTargetPipelineHelper: cd returns a new helper with paths changed", () => { - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a"), new TaskTarget("/b")]); - const p2 = p.cd("sub"); - assert.ok(p2 instanceof TaskTargetPipelineHelper); - assert.equal(p2[0].path, "/a/sub"); - assert.equal(p2[1].path, "/b/sub"); -}); - -test("TaskTargetPipelineHelper: read returns a new helper with read ops added", () => { - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]); - const p2 = p.read(); - assert.ok(p2 instanceof TaskTargetPipelineHelper); - assert.equal(p2[0].pipeline[0].type, "read"); -}); - -test("TaskTargetPipelineHelper: cmd returns a new helper with cmd ops added", () => { - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]); - const p2 = p.read().cmd("jq ."); - assert.equal(p2[0].toShell(), "cat /a.txt | jq ."); -}); - -// -- collect ------------------------------------------------------------------ - -test("collect: the final end of a chain is added to the collection set", () => { - const collection = new Set(); - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]); - p.collect(collection); - - const p2 = p.cd("sub"); - assert.equal(collection.size, 1); - assert.ok(collection.has(p2)); -}); - -test("collect: moving the chain end removes the old element and adds the new one", () => { - const collection = new Set(); - const p = TaskTargetPipelineHelper.pipeline([new TaskTarget("/foo")]); - p.collect(collection); - - const p2 = p.cd("sub"); - const p3 = p2.read(); - assert.equal(collection.size, 1); - assert.ok(collection.has(p3)); - assert.ok(!collection.has(p2)); -}); - -test("collect: gathers the ends of multiple independent pipeline branches", () => { - const collection = new Set(); - - const b1 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/a.txt")]).collect(collection).read(); - const b2 = TaskTargetPipelineHelper.pipeline([new TaskTarget("/b.txt")]).collect(collection).read(); - - assert.equal(collection.size, 2); - assert.ok(collection.has(b1)); - assert.ok(collection.has(b2)); - - const allTargets = [...collection].flat(); - assert.equal(allTargets.length, 2); -}); diff --git a/timelinize.ts b/timelinize.ts new file mode 100644 index 0000000..50fef01 --- /dev/null +++ b/timelinize.ts @@ -0,0 +1,240 @@ +import fs from 'node:fs/promises'; +import fsSync from 'node:fs'; +import nodePath from "node:path"; +import { DatabaseSync } from "node:sqlite"; +import "./data-export/facebook.ts"; +import { google } from "./data-export/google.ts"; +import { TaskTargetPipelineHelper, TaskTarget, runAll } from "./data-export/task.ts"; +import { ProcessOutput } from 'zx'; + +declare module "./data-export/task.ts" { + interface TaskTargetPipelineHelper { + google: typeof google; + } +} + +Object.assign(TaskTargetPipelineHelper.prototype, { + google +}); + +async function loadCSVTable( + db: DatabaseSync, + target: TaskTarget, + result: ProcessOutput +) { + const id = target.id; + const table = id; + const tmpPath = `/tmp/${id}.csv`; + // console.log(`Writing ${tmpPath}`); + const fd = await fs.open(tmpPath, 'w'); + await fs.writeFile(fd, result.stdout, { encoding: 'utf8' }); + await fd.close(); + // console.log(`Loading ${tmpPath} → table ${table}`); + + // const headers = lines[0].split(","); + // const columnsSql = headers.map(h => `"${h}" TEXT`).join(", "); + db.exec(`CREATE VIRTUAL TABLE temp.tmp_${table} USING csv(filename='${tmpPath}');`); + // db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`); + // db.exec(`DROP TABLE IF EXISTS intermediate;`); + return `tmp_${table}`; +} +function getColumnNames(db: DatabaseSync, tableName: string) { + return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[]; +} +function templateToSql(template: string, columns: string[]) { + // Convert '{0}, {1}' to '%s, %s' + const args: string[] = []; + const sqlTemplate = template.replace(/\{(\d+)\}/g, (match, index) => { + args.push(columns[parseInt(index)]); + return '%s'; + }); + return `printf('${sqlTemplate}', ${args.join(', ')})`; +} + +async function main() { + let time = Date.now(); + function elapsed() { + return `${((Date.now() - time) / 1000).toFixed(2)}s`; + } + + const sqlitePath = 'your.db'; + + console.log(`${elapsed()} - Building targets`); + const t = TaskTargetPipelineHelper; + const targets = TaskTargetPipelineHelper.pipeline([ + // new TaskTarget("/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01"), + new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json"), + //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2(); + //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001").facebook_v2(); + ]) + .facebook(); + // .facebook_v2(); + // .google(); + + // TODO: Make this less painful in task.ts + // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip"); + // await (zipTask.fsImpl as any).init(); + + const results = await runAll(targets); + console.log(`${elapsed()} - All ${results.length} targets converted to CSV`); + + if (fsSync.existsSync(sqlitePath)) { + await fs.unlink(sqlitePath); // unlink the old + } + // Open an in-memory db for speed + console.log(`${elapsed()} - Building combined database table in :memory:`); + const db = new DatabaseSync(":memory:", { allowExtension: true }); + db.loadExtension("/home/cobertos/sqlite-files/csv.so") + db.enableLoadExtension(false); + + // New output table + db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`); + +//(message, email, note, +// social, location, media, event, document, +// bookmark; defaults to note) + + for (const [idx, target] of targets.entries()) { + const result = results[idx]; + let time = Date.now(); + + if (!target.columnMeta) { + continue; // No column information + } + + const tableName = await loadCSVTable(db, target, result); + const columnNames = getColumnNames(db, tableName); + + // Now find what to insert into each row of the combined + let descriptionPart = `'An entry from the ${tableName} table'`; // Default is just kinda garbo... + if (target.perRowDescription) { + descriptionPart = templateToSql(target.perRowDescription, columnNames); + } + + let timestampPart: string | undefined; + let senderPart = 'NULL'; + let receiverPart = 'NULL'; + let latPart = 'NULL'; + let lngPart = 'NULL'; + for (const [idx, col] of target.columnMeta.entries()) { + const columnName = columnNames[idx]; + if (col === "isodatetime") { + timestampPart = columnName; + } else if (col === "sender") { + senderPart = columnName; + } else if (col === "receiver") { + receiverPart = columnName; + } else if (col === "lat") { + latPart = columnName; + } else if (col === "lng") { + lngPart = columnName; + } + } + if (!timestampPart) { + continue; + } + + let tagsPart = 'NULL'; + if (target.perRowTags) { + // Per row tags is an string of csv'd items but needs to be made a literal + tagsPart = `'${target.perRowTags}'`; + // TODO: Make this either a template string or have jq do something + // tagsPart = templateToSqlExpr(target.perRowTags, columnNames); + } + + let typePart = "'note'"; + if (target.perRowTags) { + //message, email, note, social, location, media, event, document, bookmark + if (target.perRowTags.includes(",message")) { + typePart = "'message'"; + } + } + + // OFFSET + LIMIT to ignore the CSV headers + db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${typePart}, ${senderPart}, ${receiverPart}, ${latPart}, ${lngPart}, ${tagsPart} FROM ${tableName} LIMIT -1 OFFSET 1;`); + } + + const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()!.count; + console.log(`${elapsed()} - Combined database built with ${count} rows`); + // Dump it all to the path specified + db.exec(`VACUUM main INTO '${sqlitePath}'`); + console.log(`${elapsed()} - Combined database written to disk`); + + // Now dump it as a CSV + console.log(`${elapsed()} - Building final combined CSV`); + // const rows = db.prepare(` + // SELECT timestamp || ',' || + // '"' || replace(description, '"', '""') || '"' || ',' || + // COALESCE(type, '') || ',' || + // '"' || replace(COALESCE(sender, ''), '"', '""') || '"' || ',' || + // '"' || replace(COALESCE(receiver, ''), '"', '""') || '"' || ',' || + // COALESCE(lat, '') || ',' || + // COALESCE(lng, '') || ',' || + // '"' || replace(COALESCE(tags, ''), '"', '""') || '"' as row FROM combined + // `.replace(/\n/g, '')) + // .all() + // .map(r => r.row) + // .join('\n'); + const rows = db.prepare(`SELECT * FROM combined`).all() + .map(r => [ + r.timestamp, + r.description, + r.type, + r.sender, + r.receiver, + r.lat, + r.lng, + r.tags + ].map(v => { + if (v == null || v === '') return ''; + const str = String(v); + return str.includes(',') || str.includes('"') + ? `"${str.replace(/"/g, '""')}"` + : str; + }).join(',')) + .join('\n'); + db.close(); + + console.log(`${elapsed()} - Writing final combined CSV`); + const headers = "timestamp,description,type,sender,receiver,lat,lng,tags\n"; + await fs.writeFile('your.csv', headers+rows, { encoding: "utf8" }); +} + +main(); + +// TODO: Move this into here + // csvSink( + // summarization?: [string, string][] + // ) { + // // TODO: + // return this; + + // // Ingest this csv into the database at the given id + // // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]); + // // Add a post processing function for these targets that prints out the summarization + // // stats + // // this.post(async (t: TaskTarget)=>{ + // // // We only do the first one so far for the summarization + // // let queryLine: string; + // // let formatFn: (r: any)=>string; + // // const [columnName, type] = summarization?.[0] ?? [undefined, undefined]; + // // if (type === "numeric") { + // // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; + // // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; + // // } + // // else { + // // queryLine = `count(*) as n`; + // // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; + // // } + + // // const cmd = "sqlite-utils"; + // // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`] + // // const { stdout, stderr } = await execFile(cmd, args); + // // const results = JSON.parse(stdout); + // // const result = results[0]; // should only be one result in the array for this type of query + // // const logLine = formatFn(result); + // // (t as any).log = logLine; + // // }); + + // // return this; + // } \ No newline at end of file From a4fbe1618dbf73332e9fd1c2875e502cfa48625d Mon Sep 17 00:00:00 2001 From: cobertos Date: Thu, 26 Feb 2026 11:09:42 -0500 Subject: [PATCH 2/4] Fixed FB dating messages, added metadata as output table, added aggregate message thread metadata from FB * aggregateId is now metadata and it's just aggregate: boolean and uses .id instead * Use csv-parse for tests * Update test snapshots --- data-export/facebook.ts | 38 +- data-export/google.ts | 2 +- data-export/io.ts | 26 +- data-export/task.ts | 84 +- main.ts | 2 +- package.json | 1 + pnpm-lock.yaml | 8 + test/facebook.ts | 7 +- test/facebook.ts.snapshot | 2680 +++++++++++++++++++++++++++++++++++-- timelinize.ts | 3 +- 10 files changed, 2686 insertions(+), 165 deletions(-) diff --git a/data-export/facebook.ts b/data-export/facebook.ts index d0d2793..0ecf231 100644 --- a/data-export/facebook.ts +++ b/data-export/facebook.ts @@ -1,4 +1,4 @@ -import { pipe, branch, cmd, assignMeta, aggregate, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; +import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; /**Parses about_you/notifications.json in the old format * or logged_information/notifications.json in the new format*/ @@ -56,16 +56,20 @@ function facebook_messages_generic() { // This most assuredly does not handle certain things like pictures and such // There are messages .type and then they have other thing in them? - // We also want to collect another set of data that is the conversation-level - // information - // TODO: Readd - // yield pipe( - // cmd(["jq", "-r", ` - // [.title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))] - // | @csv - // `]), - // aggregate("Facebook - Messages Meta") - // ); + // Conversation-level information aggregated into a single place + // TODO: This will result in MULTIPLE rows for a single thread if there is multiple .jsons for a single + // chat in one directory. Ughhhhhhhhhhhhhhh. For now this is just a limiation + yield pipe( + cmd(["jq", "-r", ` + [.title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))] + | @csv + `]), + assignMeta({ + aggregate: true, + idValue: "Facebook - Messages Meta", + }) + ); + // The conversation itself yield pipe( cmd(["jq", "-r", ` ["from","to","timestamp","content"], @@ -548,11 +552,13 @@ export function facebook(){ read(), cmd(["jq", "-r", ` ["from","to","timestamp","body"], - .recipient as $to - | ( - .messages[] - | ["Me", $to, (.timestamp | todateiso8601), .body] - ) + ( + .recipient as $to + | ( + .messages[] + | ["Me", $to, (.timestamp | todateiso8601), .body] + ) + ) | @csv `]), assignMeta({ diff --git a/data-export/google.ts b/data-export/google.ts index a6c788d..05c7bfe 100644 --- a/data-export/google.ts +++ b/data-export/google.ts @@ -1,4 +1,4 @@ -import { pipe, branch, cmd, assignMeta, aggregate, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; +import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; import { htmlSelectorChunkedDuplex } from "./html.ts"; export function google(){ diff --git a/data-export/io.ts b/data-export/io.ts index ad1abfb..208a88d 100644 --- a/data-export/io.ts +++ b/data-export/io.ts @@ -1,14 +1,14 @@ import fs from 'node:fs/promises'; import fsSync from 'node:fs'; import { DatabaseSync } from "node:sqlite"; -import { type ProcessOutputAggregate, type RunOutput, TaskTarget, runAll } from "./task.ts"; +import { type ProcessOutputAggregate, type RunOutput, TaskTarget, runAll, type ProcessOutputSimple } from "./task.ts"; import { ProcessOutput } from 'zx'; async function loadCSVTable( db: DatabaseSync, target: TaskTarget, - result: ProcessOutput | ProcessOutputAggregate + result: ProcessOutput | ProcessOutputAggregate | ProcessOutputSimple ) { const id = target.id; const table = id; @@ -19,10 +19,10 @@ async function loadCSVTable( await fd.close(); // console.log(`Loading ${tmpPath} → table ${table}`); - db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${tmpPath}');`); + db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${tmpPath}', header);`); db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`); db.exec(`DROP TABLE IF EXISTS intermediate;`); - return table; + return; } // TODO: This should really have the same name throughout the codebase? @@ -32,24 +32,8 @@ export const runPipeline = runAll; * @param db Must be a DatabaseSync with the csv.so extension enabled */ export async function loadIntoDb(db: DatabaseSync, runOutput: RunOutput[]) { - // TODO: Metadata table should probably become a target just like CSVs so we can handle the same way as everyhing else - // Create the metadata table - db.exec(`CREATE TABLE base_data_manager_metadata (id TEXT, perRowDescription TEXT, perRowTags TEXT, columnMeta TEXT);`); for (const {result, target} of runOutput) { - // Load result CSV into a table - const tableName = await loadCSVTable(db, target, result); - // Load the metadata - function literalIfExists(val: undefined | string) { - if (!val) { - return "NULL"; - } - return `'${val}'`; - } - const tableNamePart = `'${tableName}'`; - const columnMetaPart = literalIfExists(target.columnMeta?.join(",")); - const perRowDescriptionPart = literalIfExists(target.perRowDescription); - const perRowTags = literalIfExists(target.perRowDescription); - db.exec(`INSERT INTO base_data_manager_metadata VALUES (${tableNamePart}, ${perRowDescriptionPart}, ${perRowTags}, ${columnMetaPart});`); + await loadCSVTable(db, target, result); } } export function getDefaultDB(): DatabaseSync { diff --git a/data-export/task.ts b/data-export/task.ts index 04e23c6..3597bb7 100644 --- a/data-export/task.ts +++ b/data-export/task.ts @@ -122,7 +122,7 @@ export const COLUMN_TYPES = { /**Column metadata. Just a string into the TYPES*/ type ColumnMeta = (keyof typeof COLUMN_TYPES | undefined); // Make non-optional version of just the metadata values of TaskTarget -type TaskTargetMeta = Required>; +type TaskTargetMeta = Required>; export class TaskTarget { /**The current path pointed to by this TaskTarget*/ @@ -131,8 +131,6 @@ export class TaskTarget { fsImpl: FSImpl = defaultFSImpl; /**The pipeline of things to do to the above path to get an stdout of the output*/ pipeline: TaskTargetOp[]; - /**Aggregate to a specific id*/ - aggregateId?: string; // == Metadata, user configurable, no good defaults == /**Id of the TaskTarget @@ -150,6 +148,8 @@ export class TaskTarget { perRowTags?: string; /**Metadata about the columns*/ columnMeta?: ColumnMeta[]; + /**Whether or not to aggregate to a single task (everything with the id value idValue)*/ + aggregate?: boolean; constructor(path: string){ this.path = path; @@ -252,11 +252,6 @@ export class TaskTarget { this.pushToPipeline(new TaskTargetRead()); return this; } - /**Aggregate all the items into the specifically named target id*/ - aggregate(aggregateId: string) { - this.aggregateId = aggregateId; - return this; - } assignMeta(meta: Partial) { Object.assign(this, { ...meta, @@ -286,9 +281,6 @@ export function read(): PipelineOp { export function cmd(cmd: ValidCmd): PipelineOp { return (targets: TaskTarget[]) => targets.map(t => t.clone().cmd(cmd)) } -export function aggregate(aggregateId: string): PipelineOp { - return (targets: TaskTarget[]) => targets.map(t => t.clone().aggregate(aggregateId)) -} export function assignMeta(meta: Partial): PipelineOp { return (targets: TaskTarget[]) => targets.map(t => t.clone().assignMeta(meta)) } @@ -347,6 +339,13 @@ export interface ProcessOutputAggregate { duration: number; ok: boolean; } +export interface ProcessOutputSimple { + stdout: string; + stderr: string; + exitCode: number; + duration: number; + ok: boolean; +} function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, po: ProcessOutput) { if (!poa) { @@ -371,7 +370,7 @@ function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, export interface RunOutput { target: TaskTarget, - result: ProcessOutput | ProcessOutputAggregate + result: ProcessOutput | ProcessOutputAggregate | ProcessOutputSimple } export async function run(target: TaskTarget): Promise { @@ -383,26 +382,75 @@ export async function runAll(targets: TaskTarget[]): Promise { const finalTargets = await verify(targets); const results = await parallel(finalTargets, run, true); + const nonAggregateTargets: TaskTarget[] = finalTargets.filter(t => !t.aggregate); const nonAggregateResults: RunOutput[] = []; const aggregateResultsMap: Record = {}; - // Aggregate + // == Aggregate tables == + // Some TaskTargets have .aggregate: true, which means they should all be combined + // into a single task with the id of the .id property for (const [idx, r] of results.entries()) { const t = finalTargets[idx]; - if (!t.aggregateId) { + if (!t.aggregate) { nonAggregateResults.push({ target: t, result: r }); continue; } - const prevResult = aggregateResultsMap[t.aggregateId]?.result; - aggregateResultsMap[t.aggregateId] = { - target: t, + const aggregateId = t.id; + const prevResult = aggregateResultsMap[aggregateId]?.result; + aggregateResultsMap[aggregateId] = { + target: t, // Use target t for metadata, so it will use the last target result: combineProcessOutputAggregate(prevResult as (ProcessOutputAggregate | undefined), r) }; } + // == Metadata table == + // Each TaskTarget has things like perRowDescription and other things we want to store + // and output. this creates a single TaskTarget for all that perTable metadata + function csvEscape(s: string | undefined) { + if (s === undefined) { + return ""; + } + if (s.includes("\"") || s.includes(",") || s.includes("\n")) { + return `"${s.replace(/\"/g, "\"\"")}"`; + } + return s; + } + let metadataCSV = "id,perRowDescription,perRowTags,columnMeta\n"; + for (const t of nonAggregateTargets) { + const tableNamePart = t.id; + const perRowDescriptionPart = t.perRowDescription; + const perRowTagsPart = t.perRowTags; + const columnMetaPart = t.columnMeta?.join(",") ?? ""; + metadataCSV += [ + csvEscape(tableNamePart), + csvEscape(perRowDescriptionPart), + csvEscape(perRowTagsPart), + csvEscape(columnMetaPart) + ].join(",") + "\n"; + } + // Won't be removed by verify() because we're adding it after that's used + // TODO: Would be nice to bake this into TaskTarget/verify for tasks that dont point + // to a real path + const metadataTarget = new TaskTarget(""); + metadataTarget + // id, perRowDescription, perRowTags, columnMeta + .assignMeta({ + idValue: "base_data_manager_metadata", + columnMeta: ["any", "any", "any", "any"], + perRowTags: "internal", + }); + const metadataResult= { + stdout: metadataCSV, + stderr: "", + exitCode: 0, + duration: 0, // TODO + ok: true + }; + const metadataRunOutput: RunOutput = { target: metadataTarget, result: metadataResult }; + const aggregateResults: RunOutput[] = Object.values(aggregateResultsMap); - return aggregateResults.concat(nonAggregateResults); + return aggregateResults.concat(nonAggregateResults).concat(metadataRunOutput); } \ No newline at end of file diff --git a/main.ts b/main.ts index cc8fc0f..fbe171f 100644 --- a/main.ts +++ b/main.ts @@ -21,7 +21,7 @@ async function main() { ]; console.log(`${elapsed()} - Begin solving ${unbuiltTargets.length} input target for possible targets`); const targets = await facebook()(unbuiltTargets); - console.log(`${elapsed()} - Found ${targets.length} possible targets`); + console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); // .facebook_v2(); // .google(); diff --git a/package.json b/package.json index 586e3ff..a8efa82 100644 --- a/package.json +++ b/package.json @@ -27,6 +27,7 @@ }, "devDependencies": { "@types/node": "^24.1.0", + "csv-parse": "^6.1.0", "typescript": "^5.9.3" } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2538877..49c20f3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -33,6 +33,9 @@ importers: '@types/node': specifier: ^24.1.0 version: 24.10.0 + csv-parse: + specifier: ^6.1.0 + version: 6.1.0 typescript: specifier: ^5.9.3 version: 5.9.3 @@ -59,6 +62,9 @@ packages: buffer-crc32@0.2.13: resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} + csv-parse@6.1.0: + resolution: {integrity: sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw==} + dom-serializer@2.0.0: resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} @@ -176,6 +182,8 @@ snapshots: buffer-crc32@0.2.13: {} + csv-parse@6.1.0: {} + dom-serializer@2.0.0: dependencies: domelementtype: 2.3.0 diff --git a/test/facebook.ts b/test/facebook.ts index 15b00c3..e14d181 100644 --- a/test/facebook.ts +++ b/test/facebook.ts @@ -5,6 +5,7 @@ import { TaskTarget, verify, run, unzip, pipe } from "../data-export/task.ts"; import { parallel } from "../data-export/parallel.ts"; import { facebook, facebook_v2 } from "../data-export/facebook.ts"; import * as DataIO from "../data-export/io.ts"; +import { parse } from "csv-parse/sync"; // For better diffs + error checking of CSV output const THIS_FILE = import.meta.dirname; const FACEBOOK_V1_DIR = nodePath.join(THIS_FILE, 'fixtures/facebook-json-2021-05-01'); @@ -25,7 +26,7 @@ test("facebook: Can load the 2021 export", async (t) => { } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots - .map(v => v[1]) + .map(v => parse(v[1])) t.assert.snapshot(csvs); }); @@ -43,7 +44,7 @@ test("facebook: Can load the 2021 export zipped", async (t) => { } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots - .map(v => v[1]) + .map(v => parse(v[1])) t.assert.snapshot(csvs); }); @@ -61,7 +62,7 @@ test("facebook: Can load the 2025 export", async (t) => { } const csvs = idAndCSVs .sort() // Keep stable ordering for snapshots - .map(v => v[1]) + .map(v => parse(v[1])) t.assert.snapshot(csvs); }); diff --git a/test/facebook.ts.snapshot b/test/facebook.ts.snapshot index 8b6e62a..faf0373 100644 --- a/test/facebook.ts.snapshot +++ b/test/facebook.ts.snapshot @@ -1,117 +1,2591 @@ exports[`facebook: Can load the 2021 export 1`] = ` [ - "\\"album\\",\\"uri\\",\\"creation_timestamp\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-03-07T15:23:20Z\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-07-01T07:46:40Z\\"\\n", - "[\\n \\"from\\",\\n \\"to\\",\\n \\"timestamp\\",\\n \\"body\\"\\n]\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"status\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\"\\n", - "\\"service_name\\",\\"native_app_id\\",\\"username\\",\\"email\\",\\"phone_number\\",\\"name\\"\\n\\"xxx\\",69,\\"xxx\\",\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\"\\n\\"xxx\\",1707005000,\\"xxx\\",\\"not_a_real_email@example.com\\",,\\"xxx\\"\\n", - "\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",,,\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\",,,\\n", - "\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-12-29T08:13:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n", - "\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-22T01:26:40Z\\",\\"2024-05-11T15:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"\\",\\"\\",\\"\\",\\"xxx\\"\\n", - "\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-08T19:20:00Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-01-17T14:00:00Z\\",\\"TODO\\",\\"xxx\\"\\n", - "\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-10-18T07:03:20Z\\",\\"not_a_real_email@example.com\\",69\\n\\"2024-01-21T22:10:00Z\\",\\"not_a_real_email@example.com\\",69\\n", - "\\"name\\"\\n\\"xxx\\"\\n\\"xxx\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-13T13:13:20Z\\"\\n\\"xxx\\",\\"2024-10-31T00:36:40Z\\"\\n", - "\\"game\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-11-03T16:06:40Z\\"\\n", - "\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n", - "\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-04-23T17:56:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n", - "\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"from\\",\\"to\\",\\"amount\\",\\"currency\\",\\"type\\",\\"status\\",\\"payment_method\\",\\"created_timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-05T21:36:40Z\\"\\n", - "\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-15T12:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-12T06:13:20Z\\"\\n", - "\\"from\\",\\"to\\",\\"rank\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",69,\\"2024-07-22T19:03:20Z\\"\\n", - "\\"title\\",\\"timestamp\\",\\"reaction\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n", - "\\"title\\",\\"timestamp\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-08T16:33:20Z\\"\\n\\"xxx\\",\\"2024-09-24T19:10:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-27T15:13:20Z\\"\\n\\"xxx\\",\\"2024-08-24T00:40:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-06-23T05:20:00Z\\"\\n\\"xxx\\",\\"2024-05-25T08:16:40Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-04-28T20:10:00Z\\"\\n", - "\\"from\\",\\"to\\",\\"subject\\",\\"message\\",\\"timestamp\\"\\n\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-16T06:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"url://somewhere\\",\\"2024-10-16T06:26:40Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-12-17T08:43:20Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n", - "\\"name\\",\\"id\\",\\"type\\",\\"timestamp\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-11T12:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T19:56:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T11:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-07T21:06:40Z\\"\\n", - "\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-02-27T05:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-05-16T03:26:40Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-10-31T06:10:00Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n" + [ + [ + "album", + "uri", + "creation_timestamp" + ], + [ + "xxx", + "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png", + "2024-03-07T15:23:20Z" + ], + [ + "xxx", + "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png", + "2024-07-01T07:46:40Z" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "body" + ], + [ + "Me", + "xxx", + "2024-01-13T07:13:20Z", + "xxx" + ], + [ + "Me", + "xxx", + "2024-01-13T07:13:20Z", + "xxx" + ] + ], + [ + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "action", + "ip", + "user_agent", + "datr_cookie", + "city", + "region", + "country", + "site_name", + "timestamp" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "status", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-02-13T14:36:40Z" + ] + ], + [ + [ + "service_name", + "native_app_id", + "username", + "email", + "phone_number", + "name" + ], + [ + "xxx", + "69", + "xxx", + "not_a_real_email@example.com", + "xxx", + "xxx" + ], + [ + "xxx", + "1707005000", + "xxx", + "not_a_real_email@example.com", + "", + "xxx" + ] + ], + [ + [ + "event", + "created_timestamp", + "ip_address", + "user_agent", + "datr_cookie" + ], + [ + "xxx", + "2024-05-01T07:53:20Z", + "", + "", + "" + ], + [ + "xxx", + "2024-02-13T14:36:40Z", + "", + "", + "" + ] + ], + [ + [ + "name", + "added_timestamp" + ], + [ + "xxx", + "2024-12-29T08:13:20Z" + ], + [ + "xxx", + "2024-09-02T12:26:40Z" + ] + ], + [ + [ + "name", + "created_timestamp", + "updated_timestamp", + "ip_address", + "user_agent", + "location", + "app", + "session_type", + "datr_cookie" + ], + [ + "xxx", + "2024-08-22T01:26:40Z", + "2024-05-11T15:06:40Z", + "1.1.1.1", + "some/path", + "", + "", + "", + "xxx" + ] + ], + [ + [ + "timestamp", + "data", + "title" + ], + [ + "2024-02-08T19:20:00Z", + "TODO", + "xxx" + ], + [ + "2024-01-17T14:00:00Z", + "TODO", + "xxx" + ] + ], + [ + [ + "timestamp", + "email", + "contact_type" + ], + [ + "2024-10-18T07:03:20Z", + "not_a_real_email@example.com", + "69" + ], + [ + "2024-01-21T22:10:00Z", + "not_a_real_email@example.com", + "69" + ] + ], + [ + [ + "name" + ], + [ + "xxx" + ], + [ + "xxx" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-02-13T13:13:20Z" + ], + [ + "xxx", + "2024-10-31T00:36:40Z" + ] + ], + [ + [ + "game", + "added_timestamp" + ], + [ + "xxx", + "2024-11-03T16:06:40Z" + ] + ], + [ + [ + "title", + "price", + "seller", + "created_timestamp", + "latitude", + "longitude", + "description" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-12-18T05:33:20Z", + "69", + "69", + "xxx" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-12-18T05:33:20Z", + "69", + "69", + "xxx" + ] + ], + [ + [ + "action", + "timestamp", + "site", + "ip_address" + ], + [ + "xxx", + "2024-05-01T07:53:20Z", + "xxx", + "1.1.1.1" + ], + [ + "xxx", + "2024-04-23T17:56:40Z", + "xxx", + "1.1.1.1" + ] + ], + [ + [ + "timestamp", + "unread", + "href", + "text" + ], + [ + "2024-04-30T08:16:40Z", + "true", + "url://somewhere", + "xxx" + ], + [ + "2024-04-30T08:16:40Z", + "true", + "url://somewhere", + "xxx" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "from", + "to", + "amount", + "currency", + "type", + "status", + "payment_method", + "created_timestamp" + ], + [ + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-05T21:36:40Z" + ] + ], + [ + [ + "name", + "uri", + "timestamp" + ], + [ + "xxx", + "url://somewhere", + "2024-01-15T12:00:00Z" + ], + [ + "xxx", + "url://somewhere", + "2024-01-12T06:13:20Z" + ] + ], + [ + [ + "from", + "to", + "rank", + "timestamp" + ], + [ + "xxx", + "xxx", + "69", + "2024-07-22T19:03:20Z" + ] + ], + [ + [ + "title", + "timestamp", + "reaction" + ], + [ + "", + "2024-01-14T06:50:00Z", + "xxx" + ], + [ + "", + "2024-01-14T06:50:00Z", + "xxx" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "", + "2024-10-06T08:56:40Z" + ], + [ + "", + "2024-10-06T08:56:40Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-02-08T16:33:20Z" + ], + [ + "xxx", + "2024-09-24T19:10:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-09-27T15:13:20Z" + ], + [ + "xxx", + "2024-08-24T00:40:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-06-23T05:20:00Z" + ], + [ + "xxx", + "2024-05-25T08:16:40Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-04-28T20:10:00Z" + ] + ], + [ + [ + "from", + "to", + "subject", + "message", + "timestamp" + ], + [ + "not_a_real_email@example.com", + "xxx", + "xxx", + "xxx", + "2024-10-16T06:26:40Z" + ], + [ + "xxx", + "xxx", + "xxx", + "url://somewhere", + "2024-10-16T06:26:40Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-12-17T08:43:20Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ] + ], + [ + [ + "name", + "id", + "type", + "timestamp" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-11T12:36:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-10T19:56:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-10T11:36:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-07T21:06:40Z" + ] + ], + [ + [ + "name", + "uri", + "timestamp" + ], + [ + "xxx", + "url://somewhere", + "2024-02-27T05:00:00Z" + ], + [ + "xxx", + "url://somewhere", + "2024-05-16T03:26:40Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "TODO: data", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "TODO: data", + "2024-10-31T06:10:00Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "TODO", + "2024-02-08T19:20:00Z" + ], + [ + "xxx", + "TODO", + "2024-02-08T19:20:00Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "xxx", + "2024-11-17T06:30:00Z" + ], + [ + "xxx", + "xxx", + "2024-11-17T06:30:00Z" + ] + ], + [ + [ + "id", + "perRowDescription", + "perRowTags", + "columnMeta" + ], + [ + "Facebook___notifications_json", + "Notification at {0}: \\"{3}\\"", + "facebook,initiated_by_third_party", + "isodatetime,any,url,text" + ], + [ + "Facebook___accounts_and_profiles_json", + "{0} account \\"{2}\\"", + "facebook", + "text,text,text,text,text,text" + ], + [ + "Facebook___your_off_facebook_activity_json", + "{2} event from {0} at {3}", + "facebook", + "text,any,text,isodatetime" + ], + [ + "Facebook___apps_and_websites_json", + "App \\"{0}\\" added on {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___comments_json", + "Comment on \\"{2}\\" at {0}", + "facebook", + "isodatetime,TODO,text" + ], + [ + "Facebook___Dating_Messages_0_json", + "\\"{3}\\" from {0} to {1} at {2}", + "facebook,message,dating,content_by_me", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___instant_games_json", + "Played \\"{0}\\" starting {1}", + "facebook,gaming", + "text,isodatetime" + ], + [ + "Facebook___unfollowed_pages_json", + "Unfollowed \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebook___following_json", + "Followed \\"{0}\\" at {1}", + "facebook", + "receiver,isodatetime" + ], + [ + "Facebook___followers_json", + "{0} follows you", + "facebook", + "sender" + ], + [ + "Facebook___sent_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___removed_friends_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___rejected_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___received_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___friends_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___your_group_membership_activity_json", + "Joined group \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebook___your_posts_and_comments_in_groups_json", + "Group post \\"{0}\\" at {2}", + "facebook", + "text,TODO,isodatetime" + ], + [ + "Facebook___people_json", + "Interaction with {0} at {2}", + "facebook", + "text,url,isodatetime" + ], + [ + "Facebook___pages_json", + "Liked page \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___posts_and_comments_json", + "{2} on \\"{0}\\" at {1}", + "facebook", + "text,isodatetime,text" + ], + [ + "Facebook___items_sold_json", + "Sold \\"{0}\\" for {1} on {3}", + "facebook,marketplace", + "text,numeric,sender,isodatetime,lat,lng,text" + ], + [ + "Facebook___Messages_randomuser4_xxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser3_xxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser2_xxxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser_xxxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___pokes_json", + "{0} poked {1} at {3}", + "facebook", + "sender,receiver,numeric,isodatetime" + ], + [ + "Facebook___support_correspondences_json", + "\\"{2}\\" from {0} to {1} at {4}", + "facebook", + "sender,receiver,text,text,isodatetime" + ], + [ + "Facebook___payment_history_json", + "{2} {3} from {0} to {1} on {7}", + "facebook,payment", + "sender,receiver,numeric,text,text,text,text,isodatetime" + ], + [ + "Facebook___Album_0_json", + "Photo in \\"{0}\\" at {2}", + "facebook,photo", + "text,url,isodatetime" + ], + [ + "Facebook___your_pinned_posts_json", + "Pinned post \\"{0}\\" at {2}", + "facebook", + "text,url,isodatetime" + ], + [ + "Facebook___your_posts_1_json", + "Post \\"{0}\\" at {2}", + "facebook", + "text,TODO,isodatetime" + ], + [ + "Facebook___profile_update_history_json", + "Profile update \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___your_search_history_json", + "Searched for \\"{1}\\" at {2}", + "facebook,initiated_by_me,content_by_me", + "text,text,isodatetime" + ], + [ + "Facebook___account_status_changes_json", + "Account {0} at {1}", + "facebook,security", + "text,isodatetime" + ], + [ + "Facebook___account_activity_json", + "{0} from {4}, {6} on {8}", + "facebook,security", + "text,text,text,text,text,text,text,text,isodatetime" + ], + [ + "Facebook___administrative_records_json", + "{0} at {1} from {2}", + "facebook,security", + "text,isodatetime,text,text,text" + ], + [ + "Facebook___authorized_logins_json", + "Session \\"{0}\\" from {5} on {1}", + "facebook,security", + "text,isodatetime,isodatetime,text,text,text,text,text,text" + ], + [ + "Facebook___contact_verifications_json", + "{2} verification of {1} at {0}", + "facebook,security", + "isodatetime,text,text" + ], + [ + "Facebook___logins_and_logouts_json", + "{0} on {2} at {1} from {3}", + "facebook,security", + "text,isodatetime,text,text" + ], + [ + "Facebook___story_reactions_json", + "Story reaction on \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ] + ] ] `; exports[`facebook: Can load the 2021 export zipped 1`] = ` [ - "\\"album\\",\\"uri\\",\\"creation_timestamp\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-03-07T15:23:20Z\\"\\n\\"xxx\\",\\"photos_and_videos/CoverPhotos_yyyyyy/200x200png.png\\",\\"2024-07-01T07:46:40Z\\"\\n", - "[\\n \\"from\\",\\n \\"to\\",\\n \\"timestamp\\",\\n \\"body\\"\\n]\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n\\"Me\\",\\"xxx\\",\\"2024-01-13T07:13:20Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"status\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\"\\n", - "\\"service_name\\",\\"native_app_id\\",\\"username\\",\\"email\\",\\"phone_number\\",\\"name\\"\\n\\"xxx\\",69,\\"xxx\\",\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\"\\n\\"xxx\\",1707005000,\\"xxx\\",\\"not_a_real_email@example.com\\",,\\"xxx\\"\\n", - "\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",,,\\n\\"xxx\\",\\"2024-02-13T14:36:40Z\\",,,\\n", - "\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-12-29T08:13:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n", - "\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-22T01:26:40Z\\",\\"2024-05-11T15:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"\\",\\"\\",\\"\\",\\"xxx\\"\\n", - "\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-08T19:20:00Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-01-17T14:00:00Z\\",\\"TODO\\",\\"xxx\\"\\n", - "\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-10-18T07:03:20Z\\",\\"not_a_real_email@example.com\\",69\\n\\"2024-01-21T22:10:00Z\\",\\"not_a_real_email@example.com\\",69\\n", - "\\"name\\"\\n\\"xxx\\"\\n\\"xxx\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-13T13:13:20Z\\"\\n\\"xxx\\",\\"2024-10-31T00:36:40Z\\"\\n", - "\\"game\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-11-03T16:06:40Z\\"\\n", - "\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-12-18T05:33:20Z\\",69,69,\\"xxx\\"\\n", - "\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-04-23T17:56:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n", - "\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-04-30T08:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"2024-05-01T07:53:20Z\\"\\n", - "\\"from\\",\\"to\\",\\"amount\\",\\"currency\\",\\"type\\",\\"status\\",\\"payment_method\\",\\"created_timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-05-05T21:36:40Z\\"\\n", - "\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-15T12:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-12T06:13:20Z\\"\\n", - "\\"from\\",\\"to\\",\\"rank\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",69,\\"2024-07-22T19:03:20Z\\"\\n", - "\\"title\\",\\"timestamp\\",\\"reaction\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n,\\"2024-01-14T06:50:00Z\\",\\"xxx\\"\\n", - "\\"title\\",\\"timestamp\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n,\\"2024-10-06T08:56:40Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-08T16:33:20Z\\"\\n\\"xxx\\",\\"2024-09-24T19:10:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-27T15:13:20Z\\"\\n\\"xxx\\",\\"2024-08-24T00:40:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-06-23T05:20:00Z\\"\\n\\"xxx\\",\\"2024-05-25T08:16:40Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-04-28T20:10:00Z\\"\\n", - "\\"from\\",\\"to\\",\\"subject\\",\\"message\\",\\"timestamp\\"\\n\\"not_a_real_email@example.com\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-16T06:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"url://somewhere\\",\\"2024-10-16T06:26:40Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-12-17T08:43:20Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n\\"xxx\\",\\"2024-01-14T06:50:00Z\\"\\n", - "\\"name\\",\\"id\\",\\"type\\",\\"timestamp\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-11T12:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T19:56:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-10T11:36:40Z\\"\\n\\"xxx\\",69,\\"xxx\\",\\"2024-02-07T21:06:40Z\\"\\n", - "\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-02-27T05:00:00Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-05-16T03:26:40Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-05-01T07:53:20Z\\"\\n\\"xxx\\",\\"TODO: data\\",\\"2024-10-31T06:10:00Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-02-08T19:20:00Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-11-17T06:30:00Z\\"\\n" + [ + [ + "album", + "uri", + "creation_timestamp" + ], + [ + "xxx", + "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png", + "2024-03-07T15:23:20Z" + ], + [ + "xxx", + "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png", + "2024-07-01T07:46:40Z" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "body" + ], + [ + "Me", + "xxx", + "2024-01-13T07:13:20Z", + "xxx" + ], + [ + "Me", + "xxx", + "2024-01-13T07:13:20Z", + "xxx" + ] + ], + [ + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "xxx", + "some/path", + "xxx, xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "action", + "ip", + "user_agent", + "datr_cookie", + "city", + "region", + "country", + "site_name", + "timestamp" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "status", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-02-13T14:36:40Z" + ] + ], + [ + [ + "service_name", + "native_app_id", + "username", + "email", + "phone_number", + "name" + ], + [ + "xxx", + "69", + "xxx", + "not_a_real_email@example.com", + "xxx", + "xxx" + ], + [ + "xxx", + "1707005000", + "xxx", + "not_a_real_email@example.com", + "", + "xxx" + ] + ], + [ + [ + "event", + "created_timestamp", + "ip_address", + "user_agent", + "datr_cookie" + ], + [ + "xxx", + "2024-05-01T07:53:20Z", + "", + "", + "" + ], + [ + "xxx", + "2024-02-13T14:36:40Z", + "", + "", + "" + ] + ], + [ + [ + "name", + "added_timestamp" + ], + [ + "xxx", + "2024-12-29T08:13:20Z" + ], + [ + "xxx", + "2024-09-02T12:26:40Z" + ] + ], + [ + [ + "name", + "created_timestamp", + "updated_timestamp", + "ip_address", + "user_agent", + "location", + "app", + "session_type", + "datr_cookie" + ], + [ + "xxx", + "2024-08-22T01:26:40Z", + "2024-05-11T15:06:40Z", + "1.1.1.1", + "some/path", + "", + "", + "", + "xxx" + ] + ], + [ + [ + "timestamp", + "data", + "title" + ], + [ + "2024-02-08T19:20:00Z", + "TODO", + "xxx" + ], + [ + "2024-01-17T14:00:00Z", + "TODO", + "xxx" + ] + ], + [ + [ + "timestamp", + "email", + "contact_type" + ], + [ + "2024-10-18T07:03:20Z", + "not_a_real_email@example.com", + "69" + ], + [ + "2024-01-21T22:10:00Z", + "not_a_real_email@example.com", + "69" + ] + ], + [ + [ + "name" + ], + [ + "xxx" + ], + [ + "xxx" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-02-13T13:13:20Z" + ], + [ + "xxx", + "2024-10-31T00:36:40Z" + ] + ], + [ + [ + "game", + "added_timestamp" + ], + [ + "xxx", + "2024-11-03T16:06:40Z" + ] + ], + [ + [ + "title", + "price", + "seller", + "created_timestamp", + "latitude", + "longitude", + "description" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-12-18T05:33:20Z", + "69", + "69", + "xxx" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-12-18T05:33:20Z", + "69", + "69", + "xxx" + ] + ], + [ + [ + "action", + "timestamp", + "site", + "ip_address" + ], + [ + "xxx", + "2024-05-01T07:53:20Z", + "xxx", + "1.1.1.1" + ], + [ + "xxx", + "2024-04-23T17:56:40Z", + "xxx", + "1.1.1.1" + ] + ], + [ + [ + "timestamp", + "unread", + "href", + "text" + ], + [ + "2024-04-30T08:16:40Z", + "true", + "url://somewhere", + "xxx" + ], + [ + "2024-04-30T08:16:40Z", + "true", + "url://somewhere", + "xxx" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "2024-05-01T07:53:20Z" + ] + ], + [ + [ + "from", + "to", + "amount", + "currency", + "type", + "status", + "payment_method", + "created_timestamp" + ], + [ + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-05-05T21:36:40Z" + ] + ], + [ + [ + "name", + "uri", + "timestamp" + ], + [ + "xxx", + "url://somewhere", + "2024-01-15T12:00:00Z" + ], + [ + "xxx", + "url://somewhere", + "2024-01-12T06:13:20Z" + ] + ], + [ + [ + "from", + "to", + "rank", + "timestamp" + ], + [ + "xxx", + "xxx", + "69", + "2024-07-22T19:03:20Z" + ] + ], + [ + [ + "title", + "timestamp", + "reaction" + ], + [ + "", + "2024-01-14T06:50:00Z", + "xxx" + ], + [ + "", + "2024-01-14T06:50:00Z", + "xxx" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "", + "2024-10-06T08:56:40Z" + ], + [ + "", + "2024-10-06T08:56:40Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-02-08T16:33:20Z" + ], + [ + "xxx", + "2024-09-24T19:10:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-09-27T15:13:20Z" + ], + [ + "xxx", + "2024-08-24T00:40:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-06-23T05:20:00Z" + ], + [ + "xxx", + "2024-05-25T08:16:40Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-04-28T20:10:00Z" + ] + ], + [ + [ + "from", + "to", + "subject", + "message", + "timestamp" + ], + [ + "not_a_real_email@example.com", + "xxx", + "xxx", + "xxx", + "2024-10-16T06:26:40Z" + ], + [ + "xxx", + "xxx", + "xxx", + "url://somewhere", + "2024-10-16T06:26:40Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-12-17T08:43:20Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ], + [ + "xxx", + "2024-01-14T06:50:00Z" + ] + ], + [ + [ + "name", + "id", + "type", + "timestamp" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-11T12:36:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-10T19:56:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-10T11:36:40Z" + ], + [ + "xxx", + "69", + "xxx", + "2024-02-07T21:06:40Z" + ] + ], + [ + [ + "name", + "uri", + "timestamp" + ], + [ + "xxx", + "url://somewhere", + "2024-02-27T05:00:00Z" + ], + [ + "xxx", + "url://somewhere", + "2024-05-16T03:26:40Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "TODO: data", + "2024-05-01T07:53:20Z" + ], + [ + "xxx", + "TODO: data", + "2024-10-31T06:10:00Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "TODO", + "2024-02-08T19:20:00Z" + ], + [ + "xxx", + "TODO", + "2024-02-08T19:20:00Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "xxx", + "2024-11-17T06:30:00Z" + ], + [ + "xxx", + "xxx", + "2024-11-17T06:30:00Z" + ] + ], + [ + [ + "id", + "perRowDescription", + "perRowTags", + "columnMeta" + ], + [ + "Facebook___notifications_json", + "Notification at {0}: \\"{3}\\"", + "facebook,initiated_by_third_party", + "isodatetime,any,url,text" + ], + [ + "Facebook___accounts_and_profiles_json", + "{0} account \\"{2}\\"", + "facebook", + "text,text,text,text,text,text" + ], + [ + "Facebook___your_off_facebook_activity_json", + "{2} event from {0} at {3}", + "facebook", + "text,any,text,isodatetime" + ], + [ + "Facebook___apps_and_websites_json", + "App \\"{0}\\" added on {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___comments_json", + "Comment on \\"{2}\\" at {0}", + "facebook", + "isodatetime,TODO,text" + ], + [ + "Facebook___Dating_Messages_0_json", + "\\"{3}\\" from {0} to {1} at {2}", + "facebook,message,dating,content_by_me", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___instant_games_json", + "Played \\"{0}\\" starting {1}", + "facebook,gaming", + "text,isodatetime" + ], + [ + "Facebook___unfollowed_pages_json", + "Unfollowed \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebook___following_json", + "Followed \\"{0}\\" at {1}", + "facebook", + "receiver,isodatetime" + ], + [ + "Facebook___followers_json", + "{0} follows you", + "facebook", + "sender" + ], + [ + "Facebook___sent_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___removed_friends_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___rejected_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___received_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___friends_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___your_group_membership_activity_json", + "Joined group \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebook___your_posts_and_comments_in_groups_json", + "Group post \\"{0}\\" at {2}", + "facebook", + "text,TODO,isodatetime" + ], + [ + "Facebook___people_json", + "Interaction with {0} at {2}", + "facebook", + "text,url,isodatetime" + ], + [ + "Facebook___pages_json", + "Liked page \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___posts_and_comments_json", + "{2} on \\"{0}\\" at {1}", + "facebook", + "text,isodatetime,text" + ], + [ + "Facebook___items_sold_json", + "Sold \\"{0}\\" for {1} on {3}", + "facebook,marketplace", + "text,numeric,sender,isodatetime,lat,lng,text" + ], + [ + "Facebook___Messages_randomuser4_xxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser3_xxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser2_xxxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___Messages_randomuser_xxxxxxxx___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebook___pokes_json", + "{0} poked {1} at {3}", + "facebook", + "sender,receiver,numeric,isodatetime" + ], + [ + "Facebook___support_correspondences_json", + "\\"{2}\\" from {0} to {1} at {4}", + "facebook", + "sender,receiver,text,text,isodatetime" + ], + [ + "Facebook___payment_history_json", + "{2} {3} from {0} to {1} on {7}", + "facebook,payment", + "sender,receiver,numeric,text,text,text,text,isodatetime" + ], + [ + "Facebook___Album_0_json", + "Photo in \\"{0}\\" at {2}", + "facebook,photo", + "text,url,isodatetime" + ], + [ + "Facebook___your_pinned_posts_json", + "Pinned post \\"{0}\\" at {2}", + "facebook", + "text,url,isodatetime" + ], + [ + "Facebook___your_posts_1_json", + "Post \\"{0}\\" at {2}", + "facebook", + "text,TODO,isodatetime" + ], + [ + "Facebook___profile_update_history_json", + "Profile update \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebook___your_search_history_json", + "Searched for \\"{1}\\" at {2}", + "facebook,initiated_by_me,content_by_me", + "text,text,isodatetime" + ], + [ + "Facebook___account_status_changes_json", + "Account {0} at {1}", + "facebook,security", + "text,isodatetime" + ], + [ + "Facebook___account_activity_json", + "{0} from {4}, {6} on {8}", + "facebook,security", + "text,text,text,text,text,text,text,text,isodatetime" + ], + [ + "Facebook___administrative_records_json", + "{0} at {1} from {2}", + "facebook,security", + "text,isodatetime,text,text,text" + ], + [ + "Facebook___authorized_logins_json", + "Session \\"{0}\\" from {5} on {1}", + "facebook,security", + "text,isodatetime,isodatetime,text,text,text,text,text,text" + ], + [ + "Facebook___contact_verifications_json", + "{2} verification of {1} at {0}", + "facebook,security", + "isodatetime,text,text" + ], + [ + "Facebook___logins_and_logouts_json", + "{0} on {2} at {1} from {3}", + "facebook,security", + "text,isodatetime,text,text" + ], + [ + "Facebook___story_reactions_json", + "Story reaction on \\"{0}\\" at {1}", + "facebook", + "text,isodatetime" + ] + ] ] `; exports[`facebook: Can load the 2025 export 1`] = ` [ - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"some/path\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\"xxx\\"\\n", - "\\"from\\",\\"to\\",\\"timestamp\\",\\"content\\"\\n\\"xxx\\",\\"\\",\\"1970-01-01T00:00:00Z\\",\\n", - "\\"action\\",\\"ip\\",\\"user_agent\\",\\"datr_cookie\\",\\"city\\",\\"region\\",\\"country\\",\\"site_name\\",\\"timestamp\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-11-22T10:06:40Z\\"\\n\\"xxx\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-11-21T23:00:00Z\\"\\n", - "\\"timestamp\\",\\"data\\",\\"title\\"\\n\\"2024-02-13T02:06:40Z\\",\\"TODO\\",\\"xxx\\"\\n\\"2024-07-12T02:06:40Z\\",\\"TODO\\",\\"xxx\\"\\n", - "\\"name\\",\\"added_timestamp\\"\\n\\"xxx\\",\\"2024-01-12T00:40:00Z\\"\\n\\"xxx\\",\\"2024-06-21T17:13:20Z\\"\\n", - "\\"timestamp\\",\\"email\\",\\"contact_type\\"\\n\\"2024-02-07T19:43:20Z\\",\\"not_a_real_email@example.com\\",69\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-10-06T06:10:00Z\\"\\n\\"xxx\\",\\"TODO\\",\\"2024-01-22T16:13:20Z\\"\\n", - "\\"title\\",\\"price\\",\\"seller\\",\\"created_timestamp\\",\\"latitude\\",\\"longitude\\",\\"description\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-10-02T23:00:00Z\\",69,69,\\"xxx\\"\\n\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"2024-09-27T01:20:00Z\\",69,69,\\"xxx\\"\\n", - "\\"action\\",\\"timestamp\\",\\"site\\",\\"ip_address\\"\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",\\"xxx\\",\\"1.1.1.1\\"\\n", - "\\"timestamp\\",\\"unread\\",\\"href\\",\\"text\\"\\n\\"2024-11-20T12:16:40Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n\\"2024-11-15T00:20:00Z\\",true,\\"url://somewhere\\",\\"xxx\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-21T03:10:00Z\\"\\n", - "\\"name\\",\\"uri\\",\\"timestamp\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-09-11T20:03:20Z\\"\\n\\"xxx\\",\\"url://somewhere\\",\\"2024-01-20T12:50:00Z\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-10T10:43:20Z\\"\\n\\"xxx\\",\\"2024-09-02T12:26:40Z\\"\\n", - "\\"event\\",\\"created_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"datr_cookie\\"\\n\\"xxx\\",\\"2024-08-11T01:33:20Z\\",,,\\n\\"xxx\\",\\"2024-08-10T14:26:40Z\\",,,\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-09-01T14:13:20Z\\"\\n\\"xxx\\",\\"2024-08-12T08:06:40Z\\"\\n", - "\\"start\\",\\"end\\"\\n", - "\\"name\\",\\"created_timestamp\\",\\"updated_timestamp\\",\\"ip_address\\",\\"user_agent\\",\\"location\\",\\"app\\",\\"session_type\\",\\"datr_cookie\\"\\n,\\"2024-04-04T19:46:40Z\\",\\"2024-11-23T02:46:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\"\\n,\\"2024-04-05T06:53:20Z\\",\\"2024-11-22T10:06:40Z\\",\\"1.1.1.1\\",\\"some/path\\",\\"xxx\\",\\"xxx\\",\\"xxx\\",\\"xxx\\"\\n", - "\\"name\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-04-01T16:46:40Z\\"\\n\\"xxx\\",\\"2024-09-07T16:03:20Z\\"\\n", - "\\"title\\",\\"timestamp\\"\\n\\"xxx\\",\\"2024-02-12T17:46:40Z\\"\\n\\"xxx\\",\\"2024-02-12T17:46:40Z\\"\\n", - "\\"title\\",\\"data\\",\\"timestamp\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-12-08T09:26:40Z\\"\\n\\"xxx\\",\\"xxx\\",\\"2024-12-28T00:16:40Z\\"\\n" + [ + [ + "xxx", + "true", + "", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "", + "some/path", + "xxx, xxx" + ], + [ + "xxx", + "true", + "", + "some/path", + "xxx, xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "some/path" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "xxx" + ] + ], + [ + [ + "from", + "to", + "timestamp", + "content" + ], + [ + "xxx", + "", + "1970-01-01T00:00:00Z", + "" + ] + ], + [ + [ + "action", + "ip", + "user_agent", + "datr_cookie", + "city", + "region", + "country", + "site_name", + "timestamp" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-11-22T10:06:40Z" + ], + [ + "xxx", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx", + "xxx", + "2024-11-21T23:00:00Z" + ] + ], + [ + [ + "timestamp", + "data", + "title" + ], + [ + "2024-02-13T02:06:40Z", + "TODO", + "xxx" + ], + [ + "2024-07-12T02:06:40Z", + "TODO", + "xxx" + ] + ], + [ + [ + "name", + "added_timestamp" + ], + [ + "xxx", + "2024-01-12T00:40:00Z" + ], + [ + "xxx", + "2024-06-21T17:13:20Z" + ] + ], + [ + [ + "timestamp", + "email", + "contact_type" + ], + [ + "2024-02-07T19:43:20Z", + "not_a_real_email@example.com", + "69" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "TODO", + "2024-10-06T06:10:00Z" + ], + [ + "xxx", + "TODO", + "2024-01-22T16:13:20Z" + ] + ], + [ + [ + "title", + "price", + "seller", + "created_timestamp", + "latitude", + "longitude", + "description" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-10-02T23:00:00Z", + "69", + "69", + "xxx" + ], + [ + "xxx", + "xxx", + "xxx", + "2024-09-27T01:20:00Z", + "69", + "69", + "xxx" + ] + ], + [ + [ + "action", + "timestamp", + "site", + "ip_address" + ], + [ + "xxx", + "2024-08-10T14:26:40Z", + "xxx", + "1.1.1.1" + ], + [ + "xxx", + "2024-08-10T14:26:40Z", + "xxx", + "1.1.1.1" + ] + ], + [ + [ + "timestamp", + "unread", + "href", + "text" + ], + [ + "2024-11-20T12:16:40Z", + "true", + "url://somewhere", + "xxx" + ], + [ + "2024-11-15T00:20:00Z", + "true", + "url://somewhere", + "xxx" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-02-21T03:10:00Z" + ] + ], + [ + [ + "name", + "uri", + "timestamp" + ], + [ + "xxx", + "url://somewhere", + "2024-09-11T20:03:20Z" + ], + [ + "xxx", + "url://somewhere", + "2024-01-20T12:50:00Z" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-09-10T10:43:20Z" + ], + [ + "xxx", + "2024-09-02T12:26:40Z" + ] + ], + [ + [ + "event", + "created_timestamp", + "ip_address", + "user_agent", + "datr_cookie" + ], + [ + "xxx", + "2024-08-11T01:33:20Z", + "", + "", + "" + ], + [ + "xxx", + "2024-08-10T14:26:40Z", + "", + "", + "" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-09-01T14:13:20Z" + ], + [ + "xxx", + "2024-08-12T08:06:40Z" + ] + ], + [ + [ + "start", + "end" + ] + ], + [ + [ + "name", + "created_timestamp", + "updated_timestamp", + "ip_address", + "user_agent", + "location", + "app", + "session_type", + "datr_cookie" + ], + [ + "", + "2024-04-04T19:46:40Z", + "2024-11-23T02:46:40Z", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx" + ], + [ + "", + "2024-04-05T06:53:20Z", + "2024-11-22T10:06:40Z", + "1.1.1.1", + "some/path", + "xxx", + "xxx", + "xxx", + "xxx" + ] + ], + [ + [ + "name", + "timestamp" + ], + [ + "xxx", + "2024-04-01T16:46:40Z" + ], + [ + "xxx", + "2024-09-07T16:03:20Z" + ] + ], + [ + [ + "title", + "timestamp" + ], + [ + "xxx", + "2024-02-12T17:46:40Z" + ], + [ + "xxx", + "2024-02-12T17:46:40Z" + ] + ], + [ + [ + "title", + "data", + "timestamp" + ], + [ + "xxx", + "xxx", + "2024-12-08T09:26:40Z" + ], + [ + "xxx", + "xxx", + "2024-12-28T00:16:40Z" + ] + ], + [ + [ + "id", + "perRowDescription", + "perRowTags", + "columnMeta" + ], + [ + "Facebookv2___connected_apps_and_websites_json", + "App \\"{0}\\" added on {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebookv2___comments_json", + "Comment on \\"{2}\\" at {0}", + "facebook", + "isodatetime,TODO,text" + ], + [ + "Facebookv2___Messages_chatname_000000000000000000___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebookv2___Messages_chatname_000000000000000___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebookv2___Messages_chatname_00000000000000000___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebookv2___Messages_archived_threads___chatnametype2_000000000000000_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebookv2___Messages_chatname_00000000000000000___message_1_json", + "\\"{3}\\" from {0} at {2}", + "facebook,message", + "sender,receiver,isodatetime,text" + ], + [ + "Facebookv2___time_spent_on_facebook_json", + "Active from {0} to {1}", + "facebook", + "isodatetime,isodatetime" + ], + [ + "Facebookv2___your_group_membership_activity_json", + "Joined group \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebookv2___group_posts_and_comments_json", + "Group post \\"{0}\\" at {2}", + "facebook", + "text,TODO,isodatetime" + ], + [ + "Facebookv2___pages_and_profiles_you_ve_unfollowed_json", + "Unfollowed \\"{0}\\" at {1}", + "facebook,initiated_by_me", + "text,isodatetime" + ], + [ + "Facebookv2___your_friends_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebookv2___rejected_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebookv2___received_friend_requests_json", + "{0} at {1}", + "facebook", + "text,isodatetime" + ], + [ + "Facebookv2___people_and_friends_json", + "Interaction with {0} at {2}", + "facebook", + "text,url,isodatetime" + ], + [ + "Facebookv2___your_search_history_json", + "Searched for \\"{1}\\" at {2}", + "facebook,initiated_by_me,content_by_me", + "text,text,isodatetime" + ], + [ + "Facebookv2___notifications_json", + "Notification at {0}: \\"{3}\\"", + "facebook,initiated_by_third_party", + "isodatetime,any,url,text" + ], + [ + "Facebookv2___account_activity_json", + "{0} from {4}, {6} on {8}", + "facebook,security", + "text,text,text,text,text,text,text,text,isodatetime" + ], + [ + "Facebookv2___record_details_json", + "{0} at {1} from {2}", + "facebook,security", + "text,isodatetime,text,text,text" + ], + [ + "Facebookv2___where_you_re_logged_in_json", + "Session \\"{0}\\" from {5} on {1}", + "facebook,security", + "text,isodatetime,isodatetime,text,text,text,text,text,text" + ], + [ + "Facebookv2___email_address_verifications_json", + "{2} verification of {1} at {0}", + "facebook,security", + "isodatetime,text,text" + ], + [ + "Facebookv2___logins_and_logouts_json", + "{0} on {2} at {1} from {3}", + "facebook,security", + "text,isodatetime,text,text" + ], + [ + "Facebookv2___items_sold_json", + "Sold \\"{0}\\" for {1} on {3}", + "facebook,marketplace", + "text,numeric,sender,isodatetime,lat,lng,text" + ] + ] ] `; diff --git a/timelinize.ts b/timelinize.ts index 50fef01..7e0675c 100644 --- a/timelinize.ts +++ b/timelinize.ts @@ -150,8 +150,7 @@ async function main() { } } - // OFFSET + LIMIT to ignore the CSV headers - db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${typePart}, ${senderPart}, ${receiverPart}, ${latPart}, ${lngPart}, ${tagsPart} FROM ${tableName} LIMIT -1 OFFSET 1;`); + db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${typePart}, ${senderPart}, ${receiverPart}, ${latPart}, ${lngPart}, ${tagsPart} FROM ${tableName};`); } const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()!.count; From 7d815833e611bb02357ffc25e16a6399f3208def Mon Sep 17 00:00:00 2001 From: cobertos Date: Thu, 26 Feb 2026 16:32:33 -0500 Subject: [PATCH 3/4] Rewrote timelinize.ts to work, and added new features for it. Added aggregateColumns for aggregated header, added metaIdValue to track which aggregate has metadata for another TaskTarget, added each() to allow a method of using cmd() with .id and other properties, added execPaths to make the initial definition of TaskTarget array to be a little more succinct --- .gitignore | 2 +- data-export/facebook.ts | 14 +- data-export/task.ts | 52 ++++- main.ts | 52 ++--- timelinize.ts | 418 +++++++++++++++++++--------------------- 5 files changed, 282 insertions(+), 256 deletions(-) diff --git a/.gitignore b/.gitignore index 48e832b..3f6a571 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ node_modules/ -your.db +*.db your.csv .gitSAFE diff --git a/data-export/facebook.ts b/data-export/facebook.ts index 0ecf231..54b5cae 100644 --- a/data-export/facebook.ts +++ b/data-export/facebook.ts @@ -1,4 +1,4 @@ -import { pipe, branch, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; +import { pipe, branch, each, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; /**Parses about_you/notifications.json in the old format * or logged_information/notifications.json in the new format*/ @@ -60,12 +60,15 @@ function facebook_messages_generic() { // TODO: This will result in MULTIPLE rows for a single thread if there is multiple .jsons for a single // chat in one directory. Ughhhhhhhhhhhhhhh. For now this is just a limiation yield pipe( - cmd(["jq", "-r", ` - [.title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))] - | @csv - `]), + each(t => + t.clone().cmd(["jq", "-r", ` + ["${t.id}", .title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))] + | @csv + `]) + ), assignMeta({ aggregate: true, + aggregateColumns: ["id", "title", "is_still_participant", "thread_type", "thread_path", "participants"], idValue: "Facebook - Messages Meta", }) ); @@ -80,6 +83,7 @@ function facebook_messages_generic() { | @csv `]), assignMeta({ + metaIdValue: "Facebook - Messages Meta", columnMeta: ["sender", "receiver", "isodatetime", "text"], perRowDescription: '"{3}" from {0} at {2}', perRowTags: "facebook,message", diff --git a/data-export/task.ts b/data-export/task.ts index 3597bb7..bcbeed0 100644 --- a/data-export/task.ts +++ b/data-export/task.ts @@ -122,7 +122,7 @@ export const COLUMN_TYPES = { /**Column metadata. Just a string into the TYPES*/ type ColumnMeta = (keyof typeof COLUMN_TYPES | undefined); // Make non-optional version of just the metadata values of TaskTarget -type TaskTargetMeta = Required>; +type TaskTargetMeta = Required>; export class TaskTarget { /**The current path pointed to by this TaskTarget*/ @@ -150,6 +150,10 @@ export class TaskTarget { columnMeta?: ColumnMeta[]; /**Whether or not to aggregate to a single task (everything with the id value idValue)*/ aggregate?: boolean; + /**Names of the columns to aggregate with*/ + aggregateColumns?: string[]; + /**A metadata TaskTarget for this TaskTarget, if one exists*/ + metaIdValue?: ValidId; constructor(path: string){ this.path = path; @@ -186,6 +190,15 @@ export class TaskTarget { } return safe(this.idValue); } + get metaId() { + if (!this.metaIdValue) { + return undefined; + } + if (typeof this.metaIdValue === "function") { + return safe(this.metaIdValue(this)); + } + return safe(this.metaIdValue); + } /**Changes the current directory of the target*/ cd(path: string): TaskTarget { @@ -225,6 +238,9 @@ export class TaskTarget { t.perRowDescription = this.perRowDescription; t.perRowTags = this.perRowTags; t.columnMeta = this.columnMeta?.slice(); + t.metaIdValue = this.metaIdValue; + t.aggregate = this.aggregate; + t.aggregateColumns = this.aggregateColumns?.slice(); return t; } @@ -285,7 +301,9 @@ export function assignMeta(meta: Partial): PipelineOp { return (targets: TaskTarget[]) => targets.map(t => t.clone().assignMeta(meta)) } - +export function each(fn: (t: TaskTarget)=>TaskTarget): PipelineOp { + return (targets: TaskTarget[])=> targets.map(fn); +} export function pipe(...ops: PipelineOp[]): PipelineOp { return async (targets: TaskTarget[]) => { for (const op of ops) { @@ -307,6 +325,17 @@ export function branchGen(genFn: ()=>Generator): PipelineOp { }; } +export async function execPaths(entries: ({path: string, op: PipelineOp })[]) { + return (await Promise.all( + // Map every entry path into a TaskTarget and run the PipelineOp with + // that TaskTarget + entries + .map(async ({path,op})=>{ + const targets = [new TaskTarget(path)]; + return await op(targets); + }) + )).flat(); +} /**Verify, anything that fails is skipped and throws an error*/ @@ -314,6 +343,7 @@ export async function verify(targets: TaskTarget[]) { const outTargets: TaskTarget[] = []; for (const t of targets) { // Make sure fsImpl is ready + // TODO: DO NOT PUT THIS IN VERIFY, this should go somewhere in the task building stuff... if ("ready" in t.fsImpl && !t.fsImpl.ready && t.fsImpl.init) { await t.fsImpl.init(); } @@ -347,10 +377,12 @@ export interface ProcessOutputSimple { ok: boolean; } -function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, po: ProcessOutput) { +function combineProcessOutputAggregate(poa: ProcessOutputAggregate | undefined, t: TaskTarget, po: ProcessOutput) { if (!poa) { + assert(t.aggregateColumns, "aggregate TaskTarget must have aggregateColumns"); + const headers = t.aggregateColumns.join(",") + "\n"; return { - stdout: po.stdout, + stdout: headers + po.stdout, stderr: po.stderr, exitCodes: [po.exitCode], duration: po.duration, @@ -402,7 +434,7 @@ export async function runAll(targets: TaskTarget[]): Promise { const prevResult = aggregateResultsMap[aggregateId]?.result; aggregateResultsMap[aggregateId] = { target: t, // Use target t for metadata, so it will use the last target - result: combineProcessOutputAggregate(prevResult as (ProcessOutputAggregate | undefined), r) + result: combineProcessOutputAggregate(prevResult as (ProcessOutputAggregate | undefined), t, r) }; } @@ -418,17 +450,19 @@ export async function runAll(targets: TaskTarget[]): Promise { } return s; } - let metadataCSV = "id,perRowDescription,perRowTags,columnMeta\n"; + let metadataCSV = "id,perRowDescription,perRowTags,columnMeta,metaId\n"; for (const t of nonAggregateTargets) { const tableNamePart = t.id; const perRowDescriptionPart = t.perRowDescription; const perRowTagsPart = t.perRowTags; const columnMetaPart = t.columnMeta?.join(",") ?? ""; + const metaIdPart = t.metaId; metadataCSV += [ csvEscape(tableNamePart), csvEscape(perRowDescriptionPart), csvEscape(perRowTagsPart), - csvEscape(columnMetaPart) + csvEscape(columnMetaPart), + csvEscape(metaIdPart) ].join(",") + "\n"; } // Won't be removed by verify() because we're adding it after that's used @@ -436,10 +470,10 @@ export async function runAll(targets: TaskTarget[]): Promise { // to a real path const metadataTarget = new TaskTarget(""); metadataTarget - // id, perRowDescription, perRowTags, columnMeta + // id, perRowDescription, perRowTags, columnMeta, metaId .assignMeta({ idValue: "base_data_manager_metadata", - columnMeta: ["any", "any", "any", "any"], + columnMeta: ["any", "any", "any", "any", "any"], perRowTags: "internal", }); const metadataResult= { diff --git a/main.ts b/main.ts index fbe171f..914eef9 100644 --- a/main.ts +++ b/main.ts @@ -1,33 +1,16 @@ -import "./data-export/facebook.ts"; +import { type DatabaseSync } from "node:sqlite"; +import { fileURLToPath } from "node:url"; import { google } from "./data-export/google.ts"; import { facebook, facebook_v2 } from "./data-export/facebook.ts"; -import { TaskTarget } from "./data-export/task.ts"; +import { type TaskTarget, execPaths } from "./data-export/task.ts"; import * as DataIO from "./data-export/io.ts"; -async function main() { - let time = Date.now(); - function elapsed() { - return `${((Date.now() - time) / 1000).toFixed(2)}s`; - } +const __filename = fileURLToPath(import.meta.url); - const sqlitePath = 'your.db'; +export const startTime = Date.now(); +export const elapsed = ()=>`${((Date.now() - startTime) / 1000).toFixed(2)}s`; - console.log(`${elapsed()} - Building targets`); - const unbuiltTargets = [ - new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json") - // new TaskTarget("/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01"), - //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2(); - //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001").facebook_v2(); - ]; - console.log(`${elapsed()} - Begin solving ${unbuiltTargets.length} input target for possible targets`); - const targets = await facebook()(unbuiltTargets); - console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); - // .facebook_v2(); - // .google(); - - // TODO: Make this less painful in task.ts - // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip"); - // await (zipTask.fsImpl as any).init(); +export async function loadTaskInNewDb(targets: TaskTarget[]): Promise { console.log(`${elapsed()} - Run all targets`); const out = await DataIO.runPipeline(targets); console.log(`${elapsed()} - Final targets exported to CSV. Got ${out.length} targets`); @@ -41,6 +24,23 @@ async function main() { const tableCount = db.prepare(`SELECT COUNT(*) as count FROM base_data_manager_metadata`).get()!.count; console.log(`${elapsed()} - Single database built with ${tableCount} tables`); + return db; +} + +async function main() { + // Configurable stuff + const sqlitePath = 'your.db'; + + console.log(`${elapsed()} - Building targets`); + const targets = await execPaths([ + {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()} + // {path: "/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01", op: facebook()} + // {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip", op: pipe(unzip(), facebook_v2())} + // {path: "/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001", op: facebook_v2()} + ]); + console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); + + const db = await loadTaskInNewDb(targets); console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`); DataIO.dumpDBToDisk(db, sqlitePath); @@ -48,7 +48,9 @@ async function main() { console.log(`${elapsed()} - Database written to disk`); } -main(); +if (process.argv[1] === __filename) { + main(); +} // TODO: Move this into here // csvSink( diff --git a/timelinize.ts b/timelinize.ts index 7e0675c..a0dc95f 100644 --- a/timelinize.ts +++ b/timelinize.ts @@ -1,42 +1,32 @@ -import fs from 'node:fs/promises'; -import fsSync from 'node:fs'; -import nodePath from "node:path"; -import { DatabaseSync } from "node:sqlite"; +import { type SQLOutputValue, type DatabaseSync } from "node:sqlite"; +import { createWriteStream } from 'node:fs'; +import { fileURLToPath } from "node:url"; import "./data-export/facebook.ts"; -import { google } from "./data-export/google.ts"; -import { TaskTargetPipelineHelper, TaskTarget, runAll } from "./data-export/task.ts"; -import { ProcessOutput } from 'zx'; +import { facebook } from "./data-export/facebook.ts"; +import { execPaths, COLUMN_TYPES } from "./data-export/task.ts"; +import * as DataIO from "./data-export/io.ts"; +import { + startTime, + elapsed, + loadTaskInNewDb +} from "./main.ts"; -declare module "./data-export/task.ts" { - interface TaskTargetPipelineHelper { - google: typeof google; +const __filename = fileURLToPath(import.meta.url); + +function dumpDBTableToCSV(db: DatabaseSync, tableName: string, outputFile: string) { + const stream = createWriteStream(outputFile); + const stmt = db.prepare(`SELECT * FROM ${tableName}`); + + let headerWritten = false; + for (const row of stmt.iterate()) { + if (!headerWritten) { + stream.write(Object.keys(row).join(',') + '\n'); + headerWritten = true; + } + stream.write(Object.values(row).map(v => `"${String(v ?? '').replace(/"/g, '""')}"`).join(',') + '\n'); } -} - -Object.assign(TaskTargetPipelineHelper.prototype, { - google -}); - -async function loadCSVTable( - db: DatabaseSync, - target: TaskTarget, - result: ProcessOutput -) { - const id = target.id; - const table = id; - const tmpPath = `/tmp/${id}.csv`; - // console.log(`Writing ${tmpPath}`); - const fd = await fs.open(tmpPath, 'w'); - await fs.writeFile(fd, result.stdout, { encoding: 'utf8' }); - await fd.close(); - // console.log(`Loading ${tmpPath} → table ${table}`); - - // const headers = lines[0].split(","); - // const columnsSql = headers.map(h => `"${h}" TEXT`).join(", "); - db.exec(`CREATE VIRTUAL TABLE temp.tmp_${table} USING csv(filename='${tmpPath}');`); - // db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`); - // db.exec(`DROP TABLE IF EXISTS intermediate;`); - return `tmp_${table}`; + + stream.end(); } function getColumnNames(db: DatabaseSync, tableName: string) { return db.prepare(`PRAGMA table_info(${tableName})`).all().map(c => c.name) as string[]; @@ -50,190 +40,186 @@ function templateToSql(template: string, columns: string[]) { }); return `printf('${sqlTemplate}', ${args.join(', ')})`; } - -async function main() { - let time = Date.now(); - function elapsed() { - return `${((Date.now() - time) / 1000).toFixed(2)}s`; +function sqlLiteral(str: string | undefined | null): string { + if (str === null || str === undefined) { + return 'NULL'; } - - const sqlitePath = 'your.db'; - - console.log(`${elapsed()} - Building targets`); - const t = TaskTargetPipelineHelper; - const targets = TaskTargetPipelineHelper.pipeline([ - // new TaskTarget("/home/cobertos/Seafile/projects/base-data-manager/test/fixtures/facebook-json-2021-05-01"), - new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json"), - //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2(); - //new TaskTarget("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001").facebook_v2(); - ]) - .facebook(); - // .facebook_v2(); - // .google(); - - // TODO: Make this less painful in task.ts - // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip"); - // await (zipTask.fsImpl as any).init(); - - const results = await runAll(targets); - console.log(`${elapsed()} - All ${results.length} targets converted to CSV`); - - if (fsSync.existsSync(sqlitePath)) { - await fs.unlink(sqlitePath); // unlink the old - } - // Open an in-memory db for speed - console.log(`${elapsed()} - Building combined database table in :memory:`); - const db = new DatabaseSync(":memory:", { allowExtension: true }); - db.loadExtension("/home/cobertos/sqlite-files/csv.so") - db.enableLoadExtension(false); - - // New output table - db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`); - -//(message, email, note, -// social, location, media, event, document, -// bookmark; defaults to note) - - for (const [idx, target] of targets.entries()) { - const result = results[idx]; - let time = Date.now(); - - if (!target.columnMeta) { - continue; // No column information - } - - const tableName = await loadCSVTable(db, target, result); - const columnNames = getColumnNames(db, tableName); - - // Now find what to insert into each row of the combined - let descriptionPart = `'An entry from the ${tableName} table'`; // Default is just kinda garbo... - if (target.perRowDescription) { - descriptionPart = templateToSql(target.perRowDescription, columnNames); - } - - let timestampPart: string | undefined; - let senderPart = 'NULL'; - let receiverPart = 'NULL'; - let latPart = 'NULL'; - let lngPart = 'NULL'; - for (const [idx, col] of target.columnMeta.entries()) { - const columnName = columnNames[idx]; - if (col === "isodatetime") { - timestampPart = columnName; - } else if (col === "sender") { - senderPart = columnName; - } else if (col === "receiver") { - receiverPart = columnName; - } else if (col === "lat") { - latPart = columnName; - } else if (col === "lng") { - lngPart = columnName; - } - } - if (!timestampPart) { - continue; - } - - let tagsPart = 'NULL'; - if (target.perRowTags) { - // Per row tags is an string of csv'd items but needs to be made a literal - tagsPart = `'${target.perRowTags}'`; - // TODO: Make this either a template string or have jq do something - // tagsPart = templateToSqlExpr(target.perRowTags, columnNames); - } - - let typePart = "'note'"; - if (target.perRowTags) { - //message, email, note, social, location, media, event, document, bookmark - if (target.perRowTags.includes(",message")) { - typePart = "'message'"; - } - } - - db.exec(`INSERT INTO combined SELECT ${timestampPart}, ${descriptionPart}, ${typePart}, ${senderPart}, ${receiverPart}, ${latPart}, ${lngPart}, ${tagsPart} FROM ${tableName};`); - } - - const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()!.count; - console.log(`${elapsed()} - Combined database built with ${count} rows`); - // Dump it all to the path specified - db.exec(`VACUUM main INTO '${sqlitePath}'`); - console.log(`${elapsed()} - Combined database written to disk`); - - // Now dump it as a CSV - console.log(`${elapsed()} - Building final combined CSV`); - // const rows = db.prepare(` - // SELECT timestamp || ',' || - // '"' || replace(description, '"', '""') || '"' || ',' || - // COALESCE(type, '') || ',' || - // '"' || replace(COALESCE(sender, ''), '"', '""') || '"' || ',' || - // '"' || replace(COALESCE(receiver, ''), '"', '""') || '"' || ',' || - // COALESCE(lat, '') || ',' || - // COALESCE(lng, '') || ',' || - // '"' || replace(COALESCE(tags, ''), '"', '""') || '"' as row FROM combined - // `.replace(/\n/g, '')) - // .all() - // .map(r => r.row) - // .join('\n'); - const rows = db.prepare(`SELECT * FROM combined`).all() - .map(r => [ - r.timestamp, - r.description, - r.type, - r.sender, - r.receiver, - r.lat, - r.lng, - r.tags - ].map(v => { - if (v == null || v === '') return ''; - const str = String(v); - return str.includes(',') || str.includes('"') - ? `"${str.replace(/"/g, '""')}"` - : str; - }).join(',')) - .join('\n'); - db.close(); - - console.log(`${elapsed()} - Writing final combined CSV`); - const headers = "timestamp,description,type,sender,receiver,lat,lng,tags\n"; - await fs.writeFile('your.csv', headers+rows, { encoding: "utf8" }); + + // Escape single quotes by doubling them + const escaped = str.replace(/'/g, "''"); + + // Wrap in single quotes + return `'${escaped}'`; } -main(); +async function main() { + // Configure the tasks to run + console.log(`${elapsed()} - Building targets`); + const targets = await execPaths([ + {path: "/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json", op: facebook()} + ]); + console.log(`${elapsed()} - Found ${targets.filter(t => !t.aggregate).length} possible targets`); + const db = await loadTaskInNewDb(targets); -// TODO: Move this into here - // csvSink( - // summarization?: [string, string][] - // ) { - // // TODO: - // return this; + // New output tables + db.exec(`CREATE TABLE combined (timestamp TEXT, description TEXT, type TEXT, sender TEXT, receiver TEXT, lat REAL, lng REAL, tags TEXT);`); - // // Ingest this csv into the database at the given id - // // this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]); - // // Add a post processing function for these targets that prints out the summarization - // // stats - // // this.post(async (t: TaskTarget)=>{ - // // // We only do the first one so far for the summarization - // // let queryLine: string; - // // let formatFn: (r: any)=>string; - // // const [columnName, type] = summarization?.[0] ?? [undefined, undefined]; - // // if (type === "numeric") { - // // queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`; - // // formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`; - // // } - // // else { - // // queryLine = `count(*) as n`; - // // formatFn = (r: any)=>`${r.n} rows for ${t.id}`; - // // } + //(message, email, note, + // social, location, media, event, document, + // bookmark; defaults to note) - // // const cmd = "sqlite-utils"; - // // const args = ["query", "your.db", `select ${queryLine} from ${t.id}`] - // // const { stdout, stderr } = await execFile(cmd, args); - // // const results = JSON.parse(stdout); - // // const result = results[0]; // should only be one result in the array for this type of query - // // const logLine = formatFn(result); - // // (t as any).log = logLine; - // // }); + type ColumnMetaType = (keyof typeof COLUMN_TYPES); + interface MetadataRow { + id: string, + perRowDescription?: string, + perRowTags?: string, + columnMeta: ColumnMetaType[], + columnNames: string[], + metaId?: string + } + function verifyMetdataRow(input: Record): undefined | MetadataRow { + const { id, perRowDescription, perRowTags, columnMeta: columnMetaCSV, metaId } = input; + if (!id) { + console.error("Row did not have id/tableName, skipping"); + return undefined; + } + if (typeof id !== "string") { + console.error(`Id must be string, got ${typeof id}, ${id}`); + return undefined; + } + if (!columnMetaCSV) { + console.warn(`${id} did not have columnMeta, nothing to do. Skipping`); + return undefined; // No column information + } + if (typeof columnMetaCSV !== "string") { + console.warn(`${id} did not have columnMeta of type string. Skipping`); + return undefined; + } + const columnMeta = columnMetaCSV.split(",") as ColumnMetaType[]; + + // Get the column names from the table id + const columnNames = getColumnNames(db, id); + if (columnNames.length !== columnMeta.length) { + console.error(`columnNames and columnMeta did not have same length. skipping`); + return undefined; + } + + if (typeof perRowDescription !== "string" && perRowDescription !== undefined && perRowDescription !== null) { + console.warn(`Invalid typeof perRowDescription, was ${typeof perRowDescription}, value ${perRowDescription}`); + return undefined; + } + if (typeof perRowTags !== "string" && perRowTags !== undefined && perRowTags !== null) { + console.warn(`Invalid typeof perRowTags, was ${typeof perRowTags}, value ${perRowTags}`); + return undefined; + } + if (typeof metaId !== "string" && metaId !== undefined && metaId !== null) { + console.warn(`Invalid typeof metaId, was ${typeof metaId}, value ${metaId}`); + return undefined; + } + + return { + id, + perRowDescription: perRowDescription ?? undefined, + perRowTags: perRowTags ?? undefined, + columnMeta, + columnNames, + metaId: metaId ?? undefined + }; + } + + /**Maps columnMeta names to the column names*/ + function metaToNames(meta: MetadataRow): Partial> { + const out: Partial> = {}; + for (const [idx, name] of meta.columnNames.entries()) { + const metaName = meta.columnMeta[idx]; + if (out[metaName]) { + console.warn(`Duplicate column with metaName "${metaName}". The current one which will be used is "${out[metaName]}". Skipping the duplicate.`); + continue; + } + out[metaName] = name; + } + return out; + } + function metaParts(metaNameToColumnName: Partial>): Record { + const out: Record = {} as any; + for (const type of Object.keys(COLUMN_TYPES) as ColumnMetaType[]) { + if (!metaNameToColumnName[type]) { + out[type] = "NULL"; + continue; + } + // Wrap in brackets so column names like "from" don't cause any issues + out[type] = `[${metaNameToColumnName[type]}]` + } + return out; + } + + // Iterate over all the tables and their metadata + const statement = db.prepare(`SELECT id, perRowDescription, perRowTags, columnMeta, metaId FROM base_data_manager_metadata`); + for (const row of statement.iterate()) { + const verified = verifyMetdataRow(row); + if (!verified) { + continue; + } + const { id, perRowDescription, perRowTags, columnMeta, columnNames, metaId } = verified; + const metaNameToColumnName = metaToNames(verified); + const part = metaParts(metaNameToColumnName); + + // Now find what to insert into each row of the combined + // Per row tags is an string of csv'd items but needs to be made a literal + // TODO: Make this either a template string or have jq do something + // tagsPart = templateToSqlExpr(target.perRowTags, columnNames); + const tagsPart = sqlLiteral(perRowTags); + + // Choose what to do with this table based on what meta is present + if ( + !!metaNameToColumnName.sender + && !!metaNameToColumnName.isodatetime + ) { + if (!metaId) { + console.warn(`Chat ${id} with .sender but no .metaId. Skipping`); + continue; + } + + // First pull the name of the conversation out of the metaId + const receiverThreadTitle = db.prepare(`SELECT title FROM ${metaId} WHERE (id=${sqlLiteral(id)})`).get()?.title; + if (!receiverThreadTitle || typeof receiverThreadTitle !== "string") { + console.warn(`Chat ${id} with .metaId ${metaId} returned invalid receiverThreadTitle ${typeof receiverThreadTitle}. Skipping`); + continue; + } + const receiverPart = sqlLiteral(receiverThreadTitle); + + // Put this table into the combined table + db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${part.text}, 'message', ${part.sender}, ${receiverPart}, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`); + } + else if (!!metaNameToColumnName.isodatetime) { + // Put this table into the combined table + let descriptionPart = perRowDescription + ? templateToSql(perRowDescription, columnNames) + : `'An entry from the ${id} table'`; // Default is just kinda garbo... + db.exec(`INSERT INTO combined SELECT ${part.isodatetime}, ${descriptionPart}, 'node', NULL, NULL, ${part.lat}, ${part.lng}, ${tagsPart} FROM ${id};`); + } + else { + console.warn(`Table with id ${id} had no isodatetime or anything else of value, skipping...`); + } + } + + const count = db.prepare(`SELECT COUNT(*) as count FROM combined`).get()?.count; + console.log(`${elapsed()} - Combined database built with ${count} rows`); + + // Dump it to the disk for debugging + const sqlitePath = "debug_your.csv.db"; + console.log(`${elapsed()} - Writing database to disk at "${sqlitePath}"`); + await DataIO.dumpDBToDisk(db, sqlitePath); + + console.log(`${elapsed()} - Database written to disk`); + + // Dump it all to the path specified + dumpDBTableToCSV(db, "combined", "your.csv"); + console.log(`${elapsed()} - Combined database written to disk as CSV`); + db.close(); +} + +if (process.argv[1] === __filename) { + main(); +} - // // return this; - // } \ No newline at end of file From c093fbfceeeef52ec6ffcb52f4b2e71360a2b9cd Mon Sep 17 00:00:00 2001 From: cobertos Date: Fri, 27 Feb 2026 03:39:42 -0500 Subject: [PATCH 4/4] Added 3 more scrubbed fixtures for new exports, scrub added boolean and numeric key scrubbing --- .gitignore | 1 + test/fixtures/README.md | 3 + .../fixtures/discord-chat-exporter-2026-02.md | 25 ++ ...nels - ChannelName [0000000000000000].json | 145 +++++++ .../avatar.png | Bin 0 -> 1268 bytes .../example.png | Bin 0 -> 1341 bytes .../unknown-SUFFIX.png | Bin 0 -> 1341 bytes test/fixtures/discord-json-2021-01.md | 41 ++ test/fixtures/discord-json-2021-01/README.txt | 26 ++ .../0000000000000000/application.json | 16 + .../discord-json-2021-01/account/avatar.png | Bin 0 -> 1787 bytes .../discord-json-2021-01/account/user.json | 399 ++++++++++++++++++ .../analytics/events-2021-00000-of-00001.json | 2 + .../modeling/events-2021-00000-of-00001.json | 2 + .../reporting/events-2021-00000-of-00001.json | 2 + .../tns/events-2021-00000-of-00001.json | 2 + .../messages/11111111111111111/channel.json | 1 + .../messages/11111111111111111/messages.csv | 2 + .../messages/222222222222222222/channel.json | 1 + .../messages/222222222222222222/messages.csv | 2 + .../messages/333333333333333333/channel.json | 1 + .../messages/333333333333333333/messages.csv | 6 + .../discord-json-2021-01/messages/index.json | 5 + .../servers/444444444444444444/audit-log.json | 18 + .../servers/444444444444444444/guild.json | 4 + .../discord-json-2021-01/servers/index.json | 3 + test/fixtures/facebook-json.md | 9 + test/fixtures/snapchat-2023-11.md | 83 ++++ .../snapchat-2023-11/json/account.json | 38 ++ .../json/account_history.json | 47 +++ .../snapchat-2023-11/json/bitmoji.json | 31 ++ .../json/cameos_metadata.json | 8 + .../snapchat-2023-11/json/chat_history.json | 42 ++ .../snapchat-2023-11/json/connected_apps.json | 11 + .../json/email_campaign_history.json | 13 + .../snapchat-2023-11/json/friends.json | 100 +++++ .../snapchat-2023-11/json/in_app_surveys.json | 26 ++ .../json/location_history.json | 23 + .../snapchat-2023-11/json/ranking.json | 6 + .../snapchat-2023-11/json/shared_story.json | 11 + .../snapchat-2023-11/json/snapchat_ai.json | 4 + .../snapchat-2023-11/json/subscriptions.json | 10 + .../snapchat-2023-11/json/terms_history.json | 15 + .../snapchat-2023-11/json/user_profile.json | 39 ++ ...aaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg | Bin 0 -> 2294 bytes util/scrub.jq | 125 ++++-- util/scrub.ts | 11 +- 47 files changed, 1313 insertions(+), 46 deletions(-) create mode 100644 test/fixtures/discord-chat-exporter-2026-02.md create mode 100644 test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json create mode 100644 test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png create mode 100644 test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/example.png create mode 100644 test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/unknown-SUFFIX.png create mode 100644 test/fixtures/discord-json-2021-01.md create mode 100644 test/fixtures/discord-json-2021-01/README.txt create mode 100644 test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json create mode 100644 test/fixtures/discord-json-2021-01/account/avatar.png create mode 100644 test/fixtures/discord-json-2021-01/account/user.json create mode 100644 test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json create mode 100644 test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json create mode 100644 test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json create mode 100644 test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json create mode 100644 test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json create mode 100644 test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv create mode 100644 test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json create mode 100644 test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv create mode 100644 test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json create mode 100644 test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv create mode 100644 test/fixtures/discord-json-2021-01/messages/index.json create mode 100644 test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json create mode 100644 test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json create mode 100644 test/fixtures/discord-json-2021-01/servers/index.json create mode 100644 test/fixtures/facebook-json.md create mode 100644 test/fixtures/snapchat-2023-11.md create mode 100644 test/fixtures/snapchat-2023-11/json/account.json create mode 100644 test/fixtures/snapchat-2023-11/json/account_history.json create mode 100644 test/fixtures/snapchat-2023-11/json/bitmoji.json create mode 100644 test/fixtures/snapchat-2023-11/json/cameos_metadata.json create mode 100644 test/fixtures/snapchat-2023-11/json/chat_history.json create mode 100644 test/fixtures/snapchat-2023-11/json/connected_apps.json create mode 100644 test/fixtures/snapchat-2023-11/json/email_campaign_history.json create mode 100644 test/fixtures/snapchat-2023-11/json/friends.json create mode 100644 test/fixtures/snapchat-2023-11/json/in_app_surveys.json create mode 100644 test/fixtures/snapchat-2023-11/json/location_history.json create mode 100644 test/fixtures/snapchat-2023-11/json/ranking.json create mode 100644 test/fixtures/snapchat-2023-11/json/shared_story.json create mode 100644 test/fixtures/snapchat-2023-11/json/snapchat_ai.json create mode 100644 test/fixtures/snapchat-2023-11/json/subscriptions.json create mode 100644 test/fixtures/snapchat-2023-11/json/terms_history.json create mode 100644 test/fixtures/snapchat-2023-11/json/user_profile.json create mode 100644 test/fixtures/snapchat-2023-11/memories/2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg diff --git a/.gitignore b/.gitignore index 3f6a571..7e254c9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ node_modules/ *.db your.csv .gitSAFE +*.DELETE-THIS-HAS-PII \ No newline at end of file diff --git a/test/fixtures/README.md b/test/fixtures/README.md index 75fd3a9..f0b836d 100644 --- a/test/fixtures/README.md +++ b/test/fixtures/README.md @@ -11,3 +11,6 @@ * `facebook-json-2021-05-01` - Facebook JSON export * `facebook-json-2025-11-29` - Facebook JSON export +* [`discord-chat-exporter-2026-02`](./discord-chat-exporter-2026-02.md) - Discord export with [DiscordChatExporter](https://github.com/Tyrrrz/DiscordChatExporter) sometime around Feb 2026 +* [`discord-json-2021-01`](./discord-json-2021-01.md) - Discord JSON export +* [`snapchat-2023-11`](./snapchat-2023-11.md) - Snapchat JSON + HTML export diff --git a/test/fixtures/discord-chat-exporter-2026-02.md b/test/fixtures/discord-chat-exporter-2026-02.md new file mode 100644 index 0000000..3f8982f --- /dev/null +++ b/test/fixtures/discord-chat-exporter-2026-02.md @@ -0,0 +1,25 @@ +# discord-chat-exporter-2026-02 + +An export from `DiscordChatExporter`, a comprehensive DiscordChatExporter + +## Export methodology + +This uses the version of `DiscordChatExporter` that existed at the top of the releases tab on GitHub around `2026 February`. **TODO: figure out version** + +This export used a command something like the following to try to get _everything_ `dotnet DiscordChatExporter.Cli.dll export -t xxx -o ~/DiscordChatExporter -f json --media --reuse-media --include-threads -c xxx` + +* It uses `export` command and `-c` but it's the same for `exportguild` and `-g` +* `-f json` so only the json export +* `--media` download all media +* `--reuse-media` not quite sure what this does because it puts it in a folder per channel... +* `--include-threads` to get any threads + +## Manual edits +* Lots of image replacing + placeholders +* Had to rename the folders + +## Notes +The export format has files and folders with similar, information-dense names. I tried to preserve that as that's the only way to correlate between the folder and the file name + +* No exif on any media files +* There's embeds, thumbnails in the example chat messages but I have no other specimen \ No newline at end of file diff --git a/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json b/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json new file mode 100644 index 0000000..653631d --- /dev/null +++ b/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json @@ -0,0 +1,145 @@ +{ + "guild": { + "id": "111111111111111111", + "name": "xxxxxxxx", + "iconUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png" + }, + "channel": { + "id": "111111111111111111", + "type": "xxxxxxxxxxxxx", + "categoryId": "111111111111111111", + "category": "xxxxxxxxxxxxx", + "name": "xxxxxxx", + "topic": null + }, + "dateRange": { + "after": null, + "before": null + }, + "exportedAt": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "messages": [ + { + "id": "111111111111111111", + "type": "xxxxxxxxxxxxxxx", + "timestamp": "2020-04-13T10:09:08.000000+00:00", + "timestampEdited": null, + "callEndedTimestamp": null, + "isPinned": false, + "content": "xxxxxxxxxxxxxxxxxx", + "author": { + "id": "111111111111111111", + "name": "xxxxxxxx", + "discriminator": "1111", + "nickname": "xxxxxxxx", + "color": null, + "isBot": false, + "roles": [], + "avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png" + }, + "attachments": [], + "embeds": [], + "stickers": [], + "reactions": [], + "mentions": [], + "inlineEmojis": [] + }, + { + "id": "111111111111111111", + "type": "xxxxxxx", + "timestamp": "2020-04-13T10:09:08.000000+00:00", + "timestampEdited": null, + "callEndedTimestamp": null, + "isPinned": false, + "content": "xxxxxxxxx", + "author": { + "id": "111111111111111111", + "name": "xxxxxxxx", + "discriminator": "1111", + "nickname": "xxxxxxxx", + "color": null, + "isBot": false, + "roles": [], + "avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png" + }, + "attachments": [], + "embeds": [], + "stickers": [], + "reactions": [], + "mentions": [], + "inlineEmojis": [] + }, + { + "id": "111111111111111111", + "type": "xxxxxxx", + "timestamp": "2020-04-13T10:09:08.000000+00:00", + "timestampEdited": null, + "callEndedTimestamp": null, + "isPinned": false, + "content": "https://example.com/example.png", + "author": { + "id": "111111111111111111", + "name": "xxxxxxxx", + "discriminator": "1111", + "nickname": "xxxxxxxx", + "color": null, + "isBot": false, + "roles": [], + "avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png" + }, + "attachments": [], + "embeds": [ + { + "title": "", + "url": "https://example.com/example.png", + "timestamp": null, + "description": "", + "thumbnail": { + "url": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/example.png", + "width": 111, + "height": 111 + }, + "images": [], + "fields": [], + "inlineEmojis": [] + } + ], + "stickers": [], + "reactions": [], + "mentions": [], + "inlineEmojis": [] + }, + { + "id": "111111111111111111", + "type": "xxxxxxx", + "timestamp": "2020-04-13T10:09:08.000000+00:00", + "timestampEdited": null, + "callEndedTimestamp": null, + "isPinned": false, + "content": "xxx", + "author": { + "id": "111111111111111111", + "name": "xxxxxxxx", + "discriminator": "1111", + "nickname": "xxxxxxxx", + "color": null, + "isBot": false, + "roles": [], + "avatarUrl": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png" + }, + "attachments": [ + { + "id": "111111111111111111", + "url": "GuildName - Text Channels - ChannelName [0000000000000000].json_Files/unknown-SUFFIX.png", + "fileName": "unknown.png", + "fileSizeBytes": 111111 + } + ], + "embeds": [], + "stickers": [], + "reactions": [], + "mentions": [], + "inlineEmojis": [] + } + ], + "messageCount": 111 +} diff --git a/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png b/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/avatar.png new file mode 100644 index 0000000000000000000000000000000000000000..16559966c9622d7b67885f7f60183ccc496f3788 GIT binary patch literal 1268 zcmV_5N*m^y=*Ww!Hb(*!-)q_^Yz{cY^fv_5Q%c z`gei!ZFuu+c=O=m{kFXNw!8X*i}j(X_}}CF-{Sq3pZBb?_{+}wm!9`qiP2L400c}) zL_t(|0qntn00000fS~=U7pVdO0000000000000000001h*}2m#S5+kr;Dtn~Rv?2d87; zpYWm?;9G-zC3q^2`-Lv>HZ}+CZOpvahqisAzZw2HcOYo}G4P^j<9pzr3a;~!7Y*^1 zP_>RX(X|BWgE#X(0(9UFC4$o%-q4^3P2{Y~L2%-%&Ou;!V*^1s^5#4N>3MrEua(Y# znIjkt?;=$?17`0yd%&H1A_&UxPF_@{OJL^fI|KlBfyg`Zb?$+nx2@yg*B z`jLzLU>kV_4L1c0uk<4-@XyIZvIw^ip6SPYK?urDpqJd^6SU_-2-Z%ZDe988AY?&9 z+wn#JPbv%nI1V_zPSMXOc_&t2TcJ>Pi_ysUG6rfERE4E#R}iv5K@~%~(geu&LL<~s zFqS4jzVkZJRy^hquvi;m`HVIc7N8)3ya65G4UsSf`BzArR}} zSa@y06omjBhuX4h3x~20fVU$JmvjMT#37VJFg60Z==Od9OaEE56L240yw@BW&Nc*1R(*AC1Sh$>q_MzS%;N?EVvkB7Yaha z?h86b^i04r0nY?H6YxyH2A&CcCg7QX$e`jg0o$+)%0g6xD)@R#GboL8PuOY0H#6un zQc<*BIULi4Z^9)ant|R6jH|1m{cN~Um{lHlFgGNoz*h{pH>OySgAh|5cY&`6nl{s) zc|{5ea}Z)ug5PX*ue7C^B`Kif9gCc8qO5fc)BIU)WGNt(ybHWSaJEq-c>IlxA&-y4T@>6z7lef{iv-yW}G5V=_RrHPa5*W1k1z$nA z`yRZ9hW0eyb#+ebw+R{CIEeSGy~J1AU+?aBap!bxkBlLiyPeT3_Ty)h6MQ)(p#u2h zXUQ#`iVA4J#}`b8@h&y!E)k~w+fz??rDAvkLCpVr~YUU4rBlT000L0Ti@fW000000000000000 e0000009FEcbfa0QeS}v4000057wW_mbFGr=lCLg_5??i*_vny~mU`or^l?w+5XAD(;4Lvd(bh%p2J0NntL z?==-~o=WSC%Cr7@8li%A7KZRW0Gu^Al^RgrUV|9dy94qR1pZSuSiSZNwDKQk=Zk4{gYS1J@!d_KR| z6NxmjwzmEa>4iXOGMUWX@gL2XZclr8d9Cg2kR<-xq@<+jv9YnNU3TwN3J_}U{S zpC5pN=a{i6g@uu;JTCXOL~?zkyRfkEK%wAB%+1Zanal_MdIRq%6v{08I83Xuo=!K- z@t&BNh$a#%#bR+jnfwEf$J@9^qtWEs+q#2;gWW?zCLuW74D}?IkRj5`r`yorOUR^jjxx29eVX;`>X!C`_ zs2mguH9JsRdL|NmZ)Iggw94hWrBjNEcxM)Fsvea}O)Gld z*?EF!VX@F6nM_95OiWCSSlrs;)SI=>h(rV_olX~FTT+GlvDu8C9;eutnDA?uQGwu* zF)?|*y1F`oRaRDZfIwWjgca$YI|r|FLn23|(wM$~!U)mPS9;&h1pk?`c;n(|aoywc za_z|4hK5_unbaq5j*i@Ob932~h`p6eVq6@%L$HM;lgWV)EEVyf<%IZ^lclp?SU1tY z7WVYx$LzhlyLT6+NOkH4`ue_Fik8+^KT}ntkO>1y6lcRP012t79mYng6_u`U=22VP z+Kx}!I2?}spu+~2%Pn;oaV(tPStM`=}r#rM=+A8VKTHixLT& z_m=k4%U4pVv`rdUklVBZgTb)lHZ-x@%Q>D^({!Ne+Am_S-t$el#^fc75uT5xo~3*VMFG@&wH*CFAKaE* XCR@_H>tn}HZxIOa!}-4ONhtmwDj`L} literal 0 HcmV?d00001 diff --git a/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/unknown-SUFFIX.png b/test/fixtures/discord-chat-exporter-2026-02/GuildName - Text Channels - ChannelName [0000000000000000].json_Files/unknown-SUFFIX.png new file mode 100644 index 0000000000000000000000000000000000000000..abc6b4a59d84a79c69d8b896bb3ca30ff7913e70 GIT binary patch literal 1341 zcmd6n{WIGK0LH&wu1CsMSsKK<+~`s5dD*;#l_2qwpk5*+D4lt!kj`7Bv{P}kSZf;P zBt{Ujv>57wW_mbFGr=lCLg_5??i*_vny~mU`or^l?w+5XAD(;4Lvd(bh%p2J0NntL z?==-~o=WSC%Cr7@8li%A7KZRW0Gu^Al^RgrUV|9dy94qR1pZSuSiSZNwDKQk=Zk4{gYS1J@!d_KR| z6NxmjwzmEa>4iXOGMUWX@gL2XZclr8d9Cg2kR<-xq@<+jv9YnNU3TwN3J_}U{S zpC5pN=a{i6g@uu;JTCXOL~?zkyRfkEK%wAB%+1Zanal_MdIRq%6v{08I83Xuo=!K- z@t&BNh$a#%#bR+jnfwEf$J@9^qtWEs+q#2;gWW?zCLuW74D}?IkRj5`r`yorOUR^jjxx29eVX;`>X!C`_ zs2mguH9JsRdL|NmZ)Iggw94hWrBjNEcxM)Fsvea}O)Gld z*?EF!VX@F6nM_95OiWCSSlrs;)SI=>h(rV_olX~FTT+GlvDu8C9;eutnDA?uQGwu* zF)?|*y1F`oRaRDZfIwWjgca$YI|r|FLn23|(wM$~!U)mPS9;&h1pk?`c;n(|aoywc za_z|4hK5_unbaq5j*i@Ob932~h`p6eVq6@%L$HM;lgWV)EEVyf<%IZ^lclp?SU1tY z7WVYx$LzhlyLT6+NOkH4`ue_Fik8+^KT}ntkO>1y6lcRP012t79mYng6_u`U=22VP z+Kx}!I2?}spu+~2%Pn;oaV(tPStM`=}r#rM=+A8VKTHixLT& z_m=k4%U4pVv`rdUklVBZgTb)lHZ-x@%Q>D^({!Ne+Am_S-t$el#^fc75uT5xo~3*VMFG@&wH*CFAKaE* XCR@_H>tn}HZxIOa!}-4ONhtmwDj`L} literal 0 HcmV?d00001 diff --git a/test/fixtures/discord-json-2021-01.md b/test/fixtures/discord-json-2021-01.md new file mode 100644 index 0000000..c7fca9e --- /dev/null +++ b/test/fixtures/discord-json-2021-01.md @@ -0,0 +1,41 @@ +# discord-json-2021-01 + +## Manual edits +* images -> placeholders + * `accounts/avatar.png` +* manually scrub folder names + * `account/applications/0000000000000` + +## Notes about files +* `activity/` + * All the .json are NDJSON so some json tools don't like them + * _Massive_ files. They hang scrub.ts for a long long time (had to run these piecemeal) + * These files also have an _incredible_ amount of shapes and variance. + * Instead of outputing all the shapes I made a sort of "super-object" to capture the shape with `jq -n '[inputs] | add' events-2021-00000-of-00001.json.tmp > unique_shape.json` and then scrubbing `unique_shape.json` +* `messages/` + * I hand did these to keep all the ids the same + * There are multiple types of chats. DMs, guild channels, etc + * I hand did the csvs as I have no scrubber for that + * These are only **THE EXPORTING USERS MESSAGES**, no other user, just fyi + * Ids in `messages.csv` are just the id of the message, not of any user + * There is the potential to derive missing info from a channel via `@` tags sent or possibly via attachments. Maybe... + * `11111111111111111` + * This one has a shorter id (it's an older one) + * Has `type: 0` but there's no guild information in `channel.json` + * The user name was `null` in `index.json` + * It's a really odd one + * `222222222222222222` + * This was a dm channel (said `direct message with xxx#7777` in index.json) + * Has `type: 1` and there are two recipients (just the ids) in `channel.json` + * Unfortunately that's all the info in the export + * `333333333333333333` + * This was a normal guild channel + * `type: 0` and there's guild information in `channel.json` + * I kept a good set of messages around from this one to show how attachements and other stuff works + * The last message seemed to be a link not as an attachment. Links just seem to be normal text +* `programs/` + * was empty... +* `servers/`` + * Info about _some_ of the guilds we have ids for + * guild.json didn't really contain anything except the name + * I kept around the only guild I noticed an audit-log.json with info in it \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/README.txt b/test/fixtures/discord-json-2021-01/README.txt new file mode 100644 index 0000000..e13a74b --- /dev/null +++ b/test/fixtures/discord-json-2021-01/README.txt @@ -0,0 +1,26 @@ + __ __ ___ _ _ ___ ___ ___ _____ ___ _ + \ \ / / / _ \ | | | | | _ \ o O O | \ / \ |_ _| / \ | | + \ V / | (_) | | |_| | | / o | |) | | - | | | | - | |_| + _|_|_ \___/ \___/ |_|_\ TS__[O] |___/ |_|_| _|_|_ |_|_| _(_)_ +_| """ |_|"""""|_|"""""|_|"""""| <======|_|"""""|_|"""""|_|"""""|_|"""""|_| """ | +"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'./o--000'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-' + ___ ___ _ _ ___ ___ ___ _ _ _ + |_ _| / __| o O O | || | | __| | _ \ | __| | | | | | | + | | \__ \ o | __ | | _| | / | _| |_| |_| |_| + |___| |___/ TS__[O] |_||_| |___| |_|_\ |___| _(_)_ _(_)_ _(_)_ +_|"""""|_|"""""| <======|_|"""""|_|"""""|_|"""""|_|"""""|_| """ |_| """ |_| """ | +"`-0-0-'"`-0-0-'./o--000'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-'"`-0-0-' + +Welcome to your Discord Data Package! + +Inside, you'll find a few JSON (JavaScript Object Notation) and CSV (Comma Separated Values) files +of the data we use to provide Discord's service to you. We've chosen these formats for ease of +processing. Furthermore, the files have been organized into logical groups to make it easy to +understand and work with (at least, we hope so)! + +For more information, you can view our in-depth help article at the following URL: + +https://support.discord.com/hc/articles/360004957991 + +All the best, +Discord Team diff --git a/test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json b/test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json new file mode 100644 index 0000000..58dc565 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/account/applications/0000000000000000/application.json @@ -0,0 +1,16 @@ +{ + "id": "111111111111111111", + "name": "xxxxxxx", + "icon": null, + "description": "", + "summary": "", + "hook": false, + "verify_key": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "flags": 1, + "secret": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "redirect_uris": [], + "rpc_application_state": 1, + "store_application_state": 1, + "verification_state": 1, + "interactions_endpoint_url": null +} diff --git a/test/fixtures/discord-json-2021-01/account/avatar.png b/test/fixtures/discord-json-2021-01/account/avatar.png new file mode 100644 index 0000000000000000000000000000000000000000..0ada82819be1ca62ac5b19f4cb883a3838b66d0f GIT binary patch literal 1787 zcmbtVeKga182?RJ>xJ@`)ZPxwu_xo%3;t zD*6b2;%?t5auYAcF%l}_Tmpkj62YOsJrBZ@QNx`(A*y>aGgY@00xDV_9x5AA00j2` zFYgrsd+9xppMqM0s@0w8c-a~={np^nB@zXbXW&pD+;(ZQ3aR-lsQCgbS^VXF^CxOb z@J$V4YYU57e^%(Tfc>%V&lkTOjF}q$CK2FdGFd>4PwB9W?mqt}z5S<8(bCuJ38K%H z%w$M>LV}>5s6E(^n?MFpV#?FjR(d_ZeO)ZsAM|q1Q!EIMmoCjn7KRFvs20a7r|#K? zE2HIIR~F9|n7T`7hf*Z`-BR9U1Ma4#DdmBC#vog~^4AeVmn#?js1gC2U309=+LjDY zWUwgs3aZMuq~cKQV5pN%rrKt`HEQ5C9HM;u~gAAwg6*7?{ zVKpMS{qmz@A~7$J$7qAPJZ^$wJ< zJWE^Vu-jtWh}ssc#*i-83@*d?+3D7332gjXpiPQ!{s!&uY^dgLwYj$|pT`VM-AKwK zviX7A3ueE|d`kCC%caZI`V&=0g~^IJ-wH=O-O1q)p4EaP)MChXl#EZ^1Oz)(E*Ud% zOV+Uj2EscNmn%<#oMSQQ@->su0s{xSsLUo5libtPG(I8+xlB_Ty=U78zxxi3jK2dM zbl1B*-5$44OfQe@ye7EM?U-z~UrWi=^`v^=)+12&2UEN#kIoI;x+eZ|vMWV;saW;J zsnHA^{f=c3GcLl^EqMY^oRt{#HZBq%=&)BRC+z`d;|GWAJ8Q$-t~8ou&iv!l5y z=H{qv^7pbG2hI%ZPn=+|T5)n3_!wh?;rZ@Dgc*HZWSQGK?X+a0uxWOEXA-sMelb1sbBhMSsrz1E?q}l_&-4 zeqHCa=pO5rgX|IM<2xhdYlv8kZSmb#!!?TrTy$ zKCilw%oJ@_&wDCmlAITq%=->jPg9i&S<&*zMpYDFA?AHRBA6G+#KWQUmWJ8ZdoP(+ zVcsV_9=_^$1(99~g$2}~64KQEnWfPZ5w*&&zCX^Vrh{XK=70kyTwRw-BWnTey}nJ0 z?`nra@9<1SVe$cud5DjbQ&`~;HGXdDMYsvd)*!NFwBIFH^X`F=FYJR)9B`A>_#+^U z*@m8cWB3KvTVo?5B1$pX)Y`0b(#k__Ut?7yvr?G7d&#c-`|+3zr!gbDh#aP0t~&+K z{@kpk+3jRsG+}OrQ=NmL{m%^vm21LJ(D^hsF>d)H|b702)se zyCrrO@w^ZBL`UQ0>$7tQ_Sgz@2>huRHPf;hE05+2V+!{ZelTRU>jbg$0VH0=qfvj_Oc8+4}sGPvsx4JnD&ULj!zo(bw)g_L&Kvb39^zNT?*8h4u40iT0|L2xK b)~EvdMz?^hcQwKm+-3mU8{^gJdG`9hNB}+v literal 0 HcmV?d00001 diff --git a/test/fixtures/discord-json-2021-01/account/user.json b/test/fixtures/discord-json-2021-01/account/user.json new file mode 100644 index 0000000..d26819a --- /dev/null +++ b/test/fixtures/discord-json-2021-01/account/user.json @@ -0,0 +1,399 @@ +{ + "id": "111111111111111111", + "username": "xxxxxxxx", + "discriminator": 1111, + "email": "not_a_real_email@example.com", + "verified": false, + "avatar_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "has_mobile": false, + "needs_email_verification": false, + "premium_until": "2020-04-13T10:09:08.000000+00:00", + "flags": 11111111111111, + "phone": "xxxxxxxxxxxx", + "temp_banned_until": null, + "ip": "1.1.1.1", + "settings": { + "locale": "xxxxx", + "show_current_game": false, + "restricted_guilds": [], + "default_guilds_restricted": false, + "inline_attachment_media": false, + "inline_embed_media": false, + "gif_auto_play": false, + "render_embeds": false, + "render_reactions": false, + "animate_emoji": false, + "enable_tts_command": false, + "message_display_compact": false, + "convert_emoticons": false, + "explicit_content_filter": 1, + "disable_games_tab": false, + "theme": "xxxx", + "developer_mode": false, + "guild_positions": [ + "111111111111111111", + "111111111111111111" + ], + "detect_platform_accounts": false, + "status": "xxxxxx", + "afk_timeout": 111, + "timezone_offset": 111, + "stream_notifications_enabled": false, + "allow_accessibility_detection": false, + "contact_sync_enabled": false, + "native_phone_integration_enabled": false, + "animate_stickers": 1, + "friend_source_flags": { + "all": false + }, + "guild_folders": [ + { + "guild_ids": [ + "111111111111111111" + ], + "id": null, + "name": null, + "color": null + }, + { + "guild_ids": [ + "111111111111111111" + ], + "id": null, + "name": null, + "color": null + } + ], + "custom_status": null + }, + "connections": [ + { + "type": "xxxxxxxxx", + "id": "xxxxxxxxxxx", + "name": "xxxxxxxxxxx", + "revoked": false, + "visibility": 1, + "friend_sync": false, + "show_activity": false, + "verified": false + }, + { + "type": "xxxxxxx", + "id": "xxxxxxxx", + "name": "xxxxxxxx", + "revoked": false, + "visibility": 1, + "friend_sync": false, + "show_activity": false, + "verified": false + } + ], + "external_friends_lists": [ + { + "user_id": "111111111111111111", + "platform_type": "xxxxx", + "name": "xxxxxxxx", + "id_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "friend_id_hashes": [ + "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1" + ] + }, + { + "user_id": "111111111111111111", + "platform_type": "xxxxxxxxx", + "name": "xxxxxxxxxxx", + "id_hash": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "friend_id_hashes": [ + "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1" + ] + } + ], + "friend_suggestions": [], + "mfa_sessions": [], + "relationships": [ + { + "id": "11111111111111111", + "type": 1, + "nickname": null, + "user": { + "id": "11111111111111111", + "username": "xxxxxxxxxxxx", + "avatar": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "discriminator": "1111", + "public_flags": 1 + } + }, + { + "id": "11111111111111111", + "type": 1, + "nickname": null, + "user": { + "id": "11111111111111111", + "username": "xxxx", + "avatar": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "discriminator": "1111", + "public_flags": 111 + } + } + ], + "payments": [ + { + "id": "111111111111111111", + "created_at": "2020-04-13T10:09:08.000000+00:00", + "currency": "xxx", + "tax": 111, + "tax_inclusive": false, + "amount": 1111, + "amount_refunded": 1, + "status": 1, + "description": "xxxxxxxxxxxxxxxxxxxx", + "flags": 1, + "subscription": { + "id": "111111111111111111", + "type": 1, + "current_period_start": "2020-04-13T10:09:08.000000+00:00", + "current_period_end": "2020-04-13T10:09:08.000000+00:00", + "payment_gateway": null, + "payment_gateway_plan_id": "xxxxxxxxxxxxxxxxxxx", + "currency": "xxx", + "plan_id": "111111111111111111", + "items": [ + { + "id": "111111111111111111", + "plan_id": "111111111111111111", + "quantity": 1 + } + ] + }, + "payment_source": { + "id": "111111111111111111", + "type": 1, + "invalid": false, + "brand": "xxxx", + "last_4": "1111", + "expires_month": 11, + "expires_year": 1111, + "billing_address": { + "name": "xxxxxxxxxxxxx", + "line_1": "xxxxxxxxxxxxxxxxx", + "line_2": null, + "city": "xxxxxxxx", + "state": "xx", + "country": "xx", + "postal_code": "11111" + }, + "country": "xx" + }, + "sku_id": "111111111111111111", + "sku_price": 1111, + "sku_subscription_plan_id": "111111111111111111" + }, + { + "id": "111111111111111111", + "created_at": "2020-04-13T10:09:08.000000+00:00", + "currency": "xxx", + "tax": 111, + "tax_inclusive": false, + "amount": 1111, + "amount_refunded": 1, + "status": 1, + "description": "xxxxxxxxxxxxxxxxxxxx", + "flags": 1, + "subscription": { + "id": "111111111111111111", + "type": 1, + "current_period_start": "2020-04-13T10:09:08.000000+00:00", + "current_period_end": "2020-04-13T10:09:08.000000+00:00", + "payment_gateway": null, + "payment_gateway_plan_id": "xxxxxxxxxxxxxxxxxxx", + "currency": "xxx", + "plan_id": "111111111111111111", + "items": [ + { + "id": "111111111111111111", + "plan_id": "111111111111111111", + "quantity": 1 + } + ] + }, + "payment_source": { + "id": "111111111111111111", + "type": 1, + "invalid": false, + "brand": "xxxx", + "last_4": "1111", + "expires_month": 11, + "expires_year": 1111, + "billing_address": { + "name": "xxxxxxxxxxxxx", + "line_1": "xxxxxxxxxxxxxxxxxx", + "line_2": null, + "city": "xxxxxxxxxx", + "state": "xx", + "country": "xx", + "postal_code": "11111" + }, + "country": "xx" + }, + "sku_id": "111111111111111111", + "sku_price": 1111, + "sku_subscription_plan_id": "111111111111111111" + } + ], + "payment_sources": [ + { + "id": "111111111111111111", + "type": 1, + "invalid": false, + "brand": "xxxx", + "last_4": "1111", + "expires_month": 11, + "expires_year": 1111, + "billing_address": { + "name": "xxxxxxxxxxxxx", + "line_1": "xxxxxxxxxxxxxxxxx", + "line_2": null, + "city": "xxxxxxxx", + "state": "xx", + "country": "xx", + "postal_code": "11111" + }, + "country": "xx" + } + ], + "guild_settings": [ + { + "guild_id": null, + "suppress_everyone": false, + "suppress_roles": false, + "message_notifications": 1, + "mobile_push": false, + "muted": false, + "mute_config": null, + "channel_overrides": [ + { + "channel_id": "111111111111111111", + "message_notifications": 1, + "muted": false, + "mute_config": null + } + ], + "version": 11 + }, + { + "guild_id": "11111111111111111", + "suppress_everyone": false, + "suppress_roles": false, + "message_notifications": 1, + "mobile_push": false, + "muted": false, + "mute_config": null, + "channel_overrides": [ + { + "channel_id": "111111111111111111", + "message_notifications": 1, + "muted": false, + "mute_config": null + }, + { + "channel_id": "111111111111111111", + "message_notifications": 1, + "muted": false, + "mute_config": null + } + ], + "version": 1 + } + ], + "library_applications": [ + { + "application": { + "id": "111111111111111111", + "name": "xxxxxxxxxxxx", + "icon": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "description": "xxxxxxxxxxxxxxxxxxxxx", + "summary": "xxxxxxxxxxxxxxxxxxxxx", + "primary_sku_id": "111111111111111111", + "hook": false, + "slug": "xxxxxxxxxxxx", + "guild_id": "111111111111111111", + "verify_key": "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1", + "publishers": [ + { + "id": "111111111111111111", + "name": "xxxxxxxxxxx" + } + ], + "developers": [ + { + "id": "111111111111111111", + "name": "xxxxxxxxxxx" + }, + { + "id": "111111111111111111", + "name": "xxxxxxxxxxxxxxxxxxxxxxxx" + } + ] + }, + "branch_id": "111111111111111111", + "sku_id": "111111111111111111", + "sku": { + "id": "111111111111111111", + "type": 1, + "premium": false, + "preorder_release_at": null, + "preorder_approximate_release_date": null + }, + "flags": 1, + "created_at": "2020-04-13T10:09:08.000000+00:00", + "entitlements": [ + { + "id": "111111111111111111", + "sku_id": "111111111111111111", + "application_id": "111111111111111111", + "user_id": "111111111111111111", + "type": 1, + "deleted": false, + "gift_code_flags": 1, + "branches": [ + "111111111111111111" + ] + } + ] + } + ], + "entitlements": [ + { + "id": "111111111111111111", + "sku_id": "111111111111111111", + "application_id": "111111111111111111", + "user_id": "111111111111111111", + "type": 1, + "deleted": false, + "gift_code_flags": 1, + "branches": [ + "111111111111111111" + ], + "sku_name": "xxxxxxxxxxxx" + } + ], + "user_activity_application_statistics": [ + { + "application_id": "111111111111111111", + "last_played_at": "2020-04-13T10:09:08.000000+00:00", + "total_duration": 1111, + "total_discord_sku_duration": 1 + }, + { + "application_id": "111111111111111111", + "last_played_at": "2020-04-13T10:09:08.000000+00:00", + "total_duration": 111111, + "total_discord_sku_duration": 1 + } + ], + "notes": { + "111111111111111111": "xxxx" + } +} diff --git a/test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json b/test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json new file mode 100644 index 0000000..1660835 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/activity/analytics/events-2021-00000-of-00001.json @@ -0,0 +1,2 @@ +{"event_type":"xxxxxxxxxxxxxxxx","event_id":"some/path","user_id":"111111111111111111","domain":"xxxxxxxxx","ip":"1.1.1.1","day":"1111","variant":"a","browser":"xxxxxxxxxxxxxx","os":"xxxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","city":"xxxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","chosen_locale":"xxxxx","detected_locale":"xxxxx","os_version":"xxxxxxxxxx","release_channel":"xxxxxx","client_version":"xxxxxxx","event_source":"xxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","client_build_number":"11111","is_refresh":false,"success":false,"method":"xxxxxxxxxxxxxxxxxxx","user_is_authenticated":false,"isp":"xxxxxxxxxxxxx","message_id":"111111111111111111","channel":"111111111111111111","channel_type":"1","is_friend":false,"private":false,"server":"111111111111111111","num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":false,"emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","device":"xxxxxxxxxxxxxxxxx","browser_user_agent":"some/path","os_sdk_version":"11","accessibility_support_enabled":false,"accessibility_features":"111","os_arch":"xxx","system_locale":"xxxxx","payment_source_id":"111111111111111111","payment_source_type":"1","payment_gateway":"1","is_default":false,"search_engine":"xxxxxx","location":"xxxxxxxxxxxxxxxxxxxxxxxx","channel_id":"111111111111111111","guild_id":"111111111111111111","nonce":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","voice_state_count":"1","video_stream_count":"1","video_enabled":false,"rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","connection_type":"xxxxxxx","game_name":"xxxxxxxxx","game_platform":"xxxxxxx","game_id":"111111111111111111","client_performance_cpu":8.08,"client_performance_memory":"111111","custom_status_count":"1","effective_connection_speed":"xx","browser_version":"xxxx","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxxx","guild_size_total":"1111","guild_member_num_roles":"1","guild_member_perms":"11111111","guild_num_channels":"11","guild_num_text_channels":"11","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","channel_size_total":"1","channel_member_perms":"111111111","channel_hidden":false,"num_users_visible":"1","num_users_visible_with_mobile_indicator":"1","num_users_visible_with_game_activity":"1","num_users_visible_with_activity":"1","search_engine_current":"xxxxxxxxxx","type":"xxxxxxxxxx","action":"xxxxxxx","previous_action":"xxxx","channel_size_online":"11","command":"xxx","source":"xxxxxxxxxxxx","action_type":"1","mime_type":"some/path","total_attachments":"1","platform_type":"xxxxxxxxxxxxxxx","display_type":"xxxxx","payment_id":"111111111111111111","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","payment_type":"xxxxxxxxxxxx","price":"1111","currency":"xxx","amount":"1111","amount_refunded":"1","tax":"111","tax_inclusive":false,"sku_id":"111111111111111111","sku_type":"1","sku_subscription_plan_id":"111111111111111111","subscription_id":"111111111111111111","subscription_type":"1","subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_plan_id":"111111111111111111","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","failure_message":"some/path","is_gift":false,"average_ping":"11","duration":"1111","maximum_ping":"11","minimum_ping":"11","previous_tier":"xx","quality":8.08,"session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","speaker":"111111111111111111","tier":"1","guild_affinity_score":8.08,"guild_affinity_index":"11","is_pending":false,"preview_enabled":false,"location_page":"xxxxxxxxxxxxx","location_section":"xxxxxxxxxxxxx","location_object":"xxxxxxxxxx","location_object_type":"xxx","subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","regular_price":"1111","to_step":"xxxxxxx","from_step":"xxxxxx","load_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","step_duration_ms":"1111","flow_duration_ms":"1111","eligible_for_trial":false,"session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"marketing_variant":"a","platform":"xxxxxxx","ptb":false,"referring_location":"xxxx","released":false,"num_failed":"1","num_delta_installed":"1","num_full_installed":"1","foreground_bytes_total":"11111111","background_bytes_total":"1","foreground_download_ms_total":"111","background_download_ms_total":"1","foreground_install_ms_total":"1111","background_install_ms_total":"1","foreground_install_ms_discord_voice":"1111","min_version_discord_voice":"1","max_version_discord_voice":"1","foreground_install_ms_discord_rpc":"111","min_version_discord_rpc":"1","max_version_discord_rpc":"1","foreground_install_ms_discord_desktop_core":"111","min_version_discord_desktop_core":"1","max_version_discord_desktop_core":"1","background_bytes_discord_rpc":"11111","background_download_ms_discord_rpc":"111","background_bytes_discord_cloudsync":"1111111","background_bytes_discord_dispatch":"1111111","background_bytes_discord_game_utils":"111111","background_bytes_discord_media":"111111","background_bytes_discord_modules":"1111111","background_bytes_discord_overlay2":"111111","background_download_ms_discord_cloudsync":"111","background_download_ms_discord_dispatch":"111","background_download_ms_discord_game_utils":"111","background_download_ms_discord_media":"111","background_download_ms_discord_modules":"111","background_download_ms_discord_overlay2":"111","background_install_ms_discord_cloudsync":"111","background_install_ms_discord_dispatch":"111","background_install_ms_discord_game_utils":"111","background_install_ms_discord_media":"11","background_install_ms_discord_modules":"111","background_install_ms_discord_overlay2":"111","background_install_ms_discord_rpc":"111","max_version_discord_cloudsync":"1","max_version_discord_dispatch":"1","max_version_discord_game_utils":"1","max_version_discord_media":"1","max_version_discord_modules":"1","max_version_discord_overlay2":"1","background_bytes_discord_krisp":"11111111","background_download_ms_discord_krisp":"1111","background_install_ms_discord_krisp":"1111","max_version_discord_krisp":"1","background_bytes_discord_voice":"1111111","background_download_ms_discord_voice":"111","background_bytes_discord_desktop_core":"1111111","background_download_ms_discord_desktop_core":"111","background_bytes_discord_hook":"1111111","background_download_ms_discord_hook":"111","background_install_ms_discord_hook":"111","max_version_discord_hook":"1","foreground_bytes_discord_game_utils":"111111","foreground_download_ms_discord_game_utils":"111","foreground_install_ms_discord_dispatch":"111","foreground_install_ms_discord_game_utils":"11","min_version_discord_dispatch":"1","min_version_discord_game_utils":"1","foreground_bytes_discord_voice":"1111111","foreground_download_ms_discord_voice":"111","foreground_install_ms_discord_overlay2":"111","min_version_discord_overlay2":"1","foreground_bytes_discord_desktop_core":"1111111","foreground_bytes_discord_erlpack":"111111","foreground_bytes_discord_spellcheck":"1111111","foreground_bytes_discord_utils":"1111111","foreground_download_ms_discord_desktop_core":"111","foreground_download_ms_discord_erlpack":"11","foreground_download_ms_discord_spellcheck":"111","foreground_download_ms_discord_utils":"111","foreground_install_ms_discord_erlpack":"11","foreground_install_ms_discord_spellcheck":"1111","foreground_install_ms_discord_utils":"1111","min_version_discord_erlpack":"1","min_version_discord_spellcheck":"1","min_version_discord_utils":"1","max_version_discord_erlpack":"1","max_version_discord_spellcheck":"1","max_version_discord_utils":"1","foreground_bytes_discord_krisp":"11111111","foreground_bytes_discord_rpc":"11111","foreground_download_ms_discord_krisp":"1111","foreground_download_ms_discord_rpc":"111","foreground_install_ms_discord_krisp":"1111","min_version_discord_krisp":"1","foreground_bytes_discord_dispatch":"1111111","foreground_download_ms_discord_dispatch":"1111","setup_type":"xxxxxxxxxxxxxxxxxxx","temporary":false,"max_uses":"1","max_age":"11111","regenerate":false,"unique":false,"code":"xxxxxxxx","notice_type":"xxxxxxxxxxxxxxxx","survey_id":"xxxxxxxxxxxxxxxxxxxxxxxxxx","dismissed":false,"mode":"xxxxxxxxxxxxxx","mute":false,"anyone_priority":false,"game_exe_name":"xxxxxxxxxxxxx","media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","source_page":"xxxxxxxxxxxx","source_section":"xxxxxxxxxx","source_object":"xxxxxxxx","change_log_id":"xxxxxxxxxxxx","duration_ms":"11111","num_applications_total":"1","num_applications_battlenet":"1","num_applications_discord":"1","num_applications_steam":"1","num_applications_twitch":"1","num_applications_uplay":"1","num_applications_origin":"1","num_applications_gog":"1","num_applications_epic":"1","cpu":"xxxxxxx","gpu":"xxxxxxx","name":"xxxxxxxxxxxxxxxxxxxxx","bucket":"1","revision":"1","game_ids":["111111111111111111","111111111111111111"],"num_cards_game_news":"1","num_users_subscribed":"11","window_width":"1111","window_height":"1111","feed_layout":"xxxxxxxxx","subscribed_games":[],"num_items_now_playing":"1","num_items_recently_played":"11","news_ids_viewed":[],"guild_ids_viewed":[],"num_cards":"11","num_cards_visible":"1","num_cards_game_playable":"1","num_game_parties":"1","num_game_parties_voice":"1","num_game_parties_solo":"1","num_game_parties_recently_played":"11","num_game_parties_rich_presence":"1","num_game_parties_collapsed":"1","num_launcher_applications":"1","is_premium":false,"application_id":"111111111111111111","branch_id":"111111111111111111","manifest_ids":[],"target_build_id":"111111111111111111","target_manifest_ids":[],"patch_type":"xxxxxxx","num_guilds":"111","num_guilds_recommended":"1","num_guilds_popular":"111","recommended_guild_ids":["11111111111111111","111111111111111111"],"notif_type":"xxxxxxxxxxxxxx","notif_in_app":false,"rel_type":"1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","channel_is_nsfw":false,"context":"xxxxxx","parent_media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","identity_type":"xxxxx","stacktrace":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","notifications_in_app_enabled":false,"emoji_id":"111111111111111111","emoji_name":"x","full":false,"instant_invite":false,"has_images":false,"party_platform":"xxxxxxx","track_id":"xxxxxxxxxxxxxxxxxxxxxx","has_match_secret":false,"has_spectate_secret":false,"has_join_secret":false,"party_max":"1"} +{"event_type":"xxxxxxxxxxxxxxxx","event_id":"some/path","user_id":"111111111111111111","domain":"xxxxxxxxx","ip":"1.1.1.1","day":"1111","variant":"a","browser":"xxxxxxxxxxxxxx","os":"xxxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","city":"xxxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","chosen_locale":"xxxxx","detected_locale":"xxxxx","os_version":"xxxxxxxxxx","release_channel":"xxxxxx","client_version":"xxxxxxx","event_source":"xxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","client_build_number":"11111","is_refresh":false,"success":false,"method":"xxxxxxxxxxxxxxxxxxx","user_is_authenticated":false,"isp":"xxxxxxxxxxxxx","message_id":"111111111111111111","channel":"111111111111111111","channel_type":"1","is_friend":false,"private":false,"server":"111111111111111111","num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":false,"emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","device":"xxxxxxxxxxxxxxxxx","browser_user_agent":"some/path","os_sdk_version":"11","accessibility_support_enabled":false,"accessibility_features":"111","os_arch":"xxx","system_locale":"xxxxx","payment_source_id":"111111111111111111","payment_source_type":"1","payment_gateway":"1","is_default":false,"search_engine":"xxxxxx","location":"xxxxxxxxxxxxxxxxxxxxxxxx","channel_id":"111111111111111111","guild_id":"111111111111111111","nonce":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","voice_state_count":"1","video_stream_count":"1","video_enabled":false,"rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","connection_type":"xxxxxxx","game_name":"xxxxxxxxx","game_platform":"xxxxxxx","game_id":"111111111111111111","client_performance_cpu":8.08,"client_performance_memory":"111111","custom_status_count":"1","effective_connection_speed":"xx","browser_version":"xxxx","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxxx","guild_size_total":"1111","guild_member_num_roles":"1","guild_member_perms":"11111111","guild_num_channels":"11","guild_num_text_channels":"11","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","channel_size_total":"1","channel_member_perms":"111111111","channel_hidden":false,"num_users_visible":"1","num_users_visible_with_mobile_indicator":"1","num_users_visible_with_game_activity":"1","num_users_visible_with_activity":"1","search_engine_current":"xxxxxxxxxx","type":"xxxxxxxxxx","action":"xxxxxxx","previous_action":"xxxx","channel_size_online":"11","command":"xxx","source":"xxxxxxxxxxxx","action_type":"1","mime_type":"some/path","total_attachments":"1","platform_type":"xxxxxxxxxxxxxxx","display_type":"xxxxx","payment_id":"111111111111111111","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","payment_type":"xxxxxxxxxxxx","price":"1111","currency":"xxx","amount":"1111","amount_refunded":"1","tax":"111","tax_inclusive":false,"sku_id":"111111111111111111","sku_type":"1","sku_subscription_plan_id":"111111111111111111","subscription_id":"111111111111111111","subscription_type":"1","subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_plan_id":"111111111111111111","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","failure_message":"some/path","is_gift":false,"average_ping":"11","duration":"1111","maximum_ping":"11","minimum_ping":"11","previous_tier":"xx","quality":8.08,"session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","speaker":"111111111111111111","tier":"1","guild_affinity_score":8.08,"guild_affinity_index":"11","is_pending":false,"preview_enabled":false,"location_page":"xxxxxxxxxxxxx","location_section":"xxxxxxxxxxxxx","location_object":"xxxxxxxxxx","location_object_type":"xxx","subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","regular_price":"1111","to_step":"xxxxxxx","from_step":"xxxxxx","load_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","step_duration_ms":"1111","flow_duration_ms":"1111","eligible_for_trial":false,"session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"marketing_variant":"a","platform":"xxxxxxx","ptb":false,"referring_location":"xxxx","released":false,"num_failed":"1","num_delta_installed":"1","num_full_installed":"1","foreground_bytes_total":"11111111","background_bytes_total":"1","foreground_download_ms_total":"111","background_download_ms_total":"1","foreground_install_ms_total":"1111","background_install_ms_total":"1","foreground_install_ms_discord_voice":"1111","min_version_discord_voice":"1","max_version_discord_voice":"1","foreground_install_ms_discord_rpc":"111","min_version_discord_rpc":"1","max_version_discord_rpc":"1","foreground_install_ms_discord_desktop_core":"111","min_version_discord_desktop_core":"1","max_version_discord_desktop_core":"1","background_bytes_discord_rpc":"11111","background_download_ms_discord_rpc":"111","background_bytes_discord_cloudsync":"1111111","background_bytes_discord_dispatch":"1111111","background_bytes_discord_game_utils":"111111","background_bytes_discord_media":"111111","background_bytes_discord_modules":"1111111","background_bytes_discord_overlay2":"111111","background_download_ms_discord_cloudsync":"111","background_download_ms_discord_dispatch":"111","background_download_ms_discord_game_utils":"111","background_download_ms_discord_media":"111","background_download_ms_discord_modules":"111","background_download_ms_discord_overlay2":"111","background_install_ms_discord_cloudsync":"111","background_install_ms_discord_dispatch":"111","background_install_ms_discord_game_utils":"111","background_install_ms_discord_media":"11","background_install_ms_discord_modules":"111","background_install_ms_discord_overlay2":"111","background_install_ms_discord_rpc":"111","max_version_discord_cloudsync":"1","max_version_discord_dispatch":"1","max_version_discord_game_utils":"1","max_version_discord_media":"1","max_version_discord_modules":"1","max_version_discord_overlay2":"1","background_bytes_discord_krisp":"11111111","background_download_ms_discord_krisp":"1111","background_install_ms_discord_krisp":"1111","max_version_discord_krisp":"1","background_bytes_discord_voice":"1111111","background_download_ms_discord_voice":"111","background_bytes_discord_desktop_core":"1111111","background_download_ms_discord_desktop_core":"111","background_bytes_discord_hook":"1111111","background_download_ms_discord_hook":"111","background_install_ms_discord_hook":"111","max_version_discord_hook":"1","foreground_bytes_discord_game_utils":"111111","foreground_download_ms_discord_game_utils":"111","foreground_install_ms_discord_dispatch":"111","foreground_install_ms_discord_game_utils":"11","min_version_discord_dispatch":"1","min_version_discord_game_utils":"1","foreground_bytes_discord_voice":"1111111","foreground_download_ms_discord_voice":"111","foreground_install_ms_discord_overlay2":"111","min_version_discord_overlay2":"1","foreground_bytes_discord_desktop_core":"1111111","foreground_bytes_discord_erlpack":"111111","foreground_bytes_discord_spellcheck":"1111111","foreground_bytes_discord_utils":"1111111","foreground_download_ms_discord_desktop_core":"111","foreground_download_ms_discord_erlpack":"11","foreground_download_ms_discord_spellcheck":"111","foreground_download_ms_discord_utils":"111","foreground_install_ms_discord_erlpack":"11","foreground_install_ms_discord_spellcheck":"1111","foreground_install_ms_discord_utils":"1111","min_version_discord_erlpack":"1","min_version_discord_spellcheck":"1","min_version_discord_utils":"1","max_version_discord_erlpack":"1","max_version_discord_spellcheck":"1","max_version_discord_utils":"1","foreground_bytes_discord_krisp":"11111111","foreground_bytes_discord_rpc":"11111","foreground_download_ms_discord_krisp":"1111","foreground_download_ms_discord_rpc":"111","foreground_install_ms_discord_krisp":"1111","min_version_discord_krisp":"1","foreground_bytes_discord_dispatch":"1111111","foreground_download_ms_discord_dispatch":"1111","setup_type":"xxxxxxxxxxxxxxxxxxx","temporary":false,"max_uses":"1","max_age":"11111","regenerate":false,"unique":false,"code":"xxxxxxxx","notice_type":"xxxxxxxxxxxxxxxx","survey_id":"xxxxxxxxxxxxxxxxxxxxxxxxxx","dismissed":false,"mode":"xxxxxxxxxxxxxx","mute":false,"anyone_priority":false,"game_exe_name":"xxxxxxxxxxxxx","media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","source_page":"xxxxxxxxxxxx","source_section":"xxxxxxxxxx","source_object":"xxxxxxxx","change_log_id":"xxxxxxxxxxxx","duration_ms":"11111","num_applications_total":"1","num_applications_battlenet":"1","num_applications_discord":"1","num_applications_steam":"1","num_applications_twitch":"1","num_applications_uplay":"1","num_applications_origin":"1","num_applications_gog":"1","num_applications_epic":"1","cpu":"xxxxxxx","gpu":"xxxxxxx","name":"xxxxxxxxxxxxxxxxxxxxx","bucket":"1","revision":"1","game_ids":["111111111111111111","111111111111111111"],"num_cards_game_news":"1","num_users_subscribed":"11","window_width":"1111","window_height":"1111","feed_layout":"xxxxxxxxx","subscribed_games":[],"num_items_now_playing":"1","num_items_recently_played":"11","news_ids_viewed":[],"guild_ids_viewed":[],"num_cards":"11","num_cards_visible":"1","num_cards_game_playable":"1","num_game_parties":"1","num_game_parties_voice":"1","num_game_parties_solo":"1","num_game_parties_recently_played":"11","num_game_parties_rich_presence":"1","num_game_parties_collapsed":"1","num_launcher_applications":"1","is_premium":false,"application_id":"111111111111111111","branch_id":"111111111111111111","manifest_ids":[],"target_build_id":"111111111111111111","target_manifest_ids":[],"patch_type":"xxxxxxx","num_guilds":"111","num_guilds_recommended":"1","num_guilds_popular":"111","recommended_guild_ids":["11111111111111111","111111111111111111"],"notif_type":"xxxxxxxxxxxxxx","notif_in_app":false,"rel_type":"1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","channel_is_nsfw":false,"context":"xxxxxx","parent_media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","identity_type":"xxxxx","stacktrace":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","notifications_in_app_enabled":false,"emoji_id":"111111111111111111","emoji_name":"x","full":false,"instant_invite":false,"has_images":false,"party_platform":"xxxxxxx","track_id":"xxxxxxxxxxxxxxxxxxxxxx","has_match_secret":false,"has_spectate_secret":false,"has_join_secret":false,"party_max":"1"} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json b/test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json new file mode 100644 index 0000000..48f3f5a --- /dev/null +++ b/test/fixtures/discord-json-2021-01/activity/modeling/events-2021-00000-of-00001.json @@ -0,0 +1,2 @@ +{"event_type":"xxxxxxxxxxxx","event_id":"some/path","event_source":"xxx","user_id":"111111111111111111","domain":"xxxxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","user_is_authenticated":false,"accessibility_support_enabled":false,"browser_user_agent":"some/path","browser":"xxxxxxxxxxxxxx","browser_version":"xxxxxxxxxxxxx","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","os":"xxxxxxx","os_version":"xxxxxxxxxx","client_build_number":"11111","release_channel":"xxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","search_engine":"xxxxxx","location":"xxxxxxxxxxxxxxxxxxxxxxxx","city":"xxxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","isp":"xxxxxxxxxxxx","location_page":"xxxxxxxxxxxxx","location_section":"xxxxxxx","location_object":"xxxxxxxxxx","location_object_type":"xxx","subscription_type":"1","subscription_plan_id":"111111111111111111","payment_type":"xxxxxxxxxxxx","price":"1111","regular_price":"1111","currency":"xxx","is_gift":false,"to_step":"xxxxxxx","from_step":"xxxxxx","load_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","step_duration_ms":"1111","flow_duration_ms":"1111","accessibility_features":"111","eligible_for_trial":false,"client_send_timestamp":"xxxx","client_track_timestamp":"xxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","device":"xxxxxxxxxxxxxxx","os_sdk_version":"11","client_version":"xxxxxxx","channel_type":"1","channel":"111111111111111111","mode":"xxxxxxxxxxxxxx","server":"111111111111111111","system_locale":"xxxxx","game_platform":"xxxxxxx","game_name":"xxxxxx","variant":"a","mute":false,"os_arch":"xxx","client_performance_cpu":8.08,"client_performance_memory":"111111","channel_id":"111111111111111111","channel_size_total":"1","channel_member_perms":"1111111111","channel_hidden":false,"anyone_priority":false,"game_exe_name":"xxxxxxxxxxxxx","game_id":"111111111111111111","guild_id":"111111111111111111","guild_size_total":"1","guild_member_num_roles":"1","guild_member_perms":"1111111111","guild_num_channels":"1","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","voice_state_count":"1","channel_size_online":"1","message_id":"111111111111111111","is_friend":false,"private":false,"num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":"1","emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","subscription_id":"111111111111111111","payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","removal_type":"xxxxxxxxxxxxxx","plan_id":"111111111111111111","is_initiator":false,"mutual_guilds":"1","type":"xxxxxxxxxx","user_guilds":"11","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","is_pending":false,"preview_enabled":false,"guild_affinity_score":8.08,"guild_affinity_index":"11","emoji_name":"xxxx","emoji_id":"111111111111111111","sku_id":"111111111111111111","quantity":"1","hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","port":"11111","connect_time":"111","connect_count":"1","audio_subsystem":"xxxxxxxx","audio_layer":"xxxxxxxxxxxxxxxx","cloudflare_best_region":"xxxxxxxxxx","context":"xxxxxxx","application_id":"111111111111111111","activity_duration_s":"111","total_duration_s":"1111","total_discord_sku_duration_s":"1","distributor":"xxxxx","priority":false,"packets_sent":"11","packets_sent_lost":"1","packets_received":"11","packets_received_lost":"1","channel_is_nsfw":false,"payment_id":"111111111111111111","transaction_id":"111111111111111111","transaction_type":"1","exchange_rate":1,"presentment_currency":"xxx","settlement_currency":"xxx","net_presentment_amount":"111","net_presentment_fees":"11","net_presentment_tax":"1","net_settlement_amount":"111","net_settlement_fees":"11","net_settlement_tax":"1","payment_gateway":"1","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","presentment_amount":"1111","presentment_fees":"111","presentment_tax":"1","settlement_amount":"1111","settlement_fees":"111","settlement_tax":"1","invite_code":"xxxxxx","invite_guild_id":"111111111111111111","invite_channel_id":"111111111111111111","invite_channel_type":"1","invite_type":"xxxxxxx","is_suggested":false,"row_num":"1","num_total":"111","is_filtered":false,"num_affinity_connections":"111","amount":"1111","amount_refunded":"1","tax":"1","tax_inclusive":false,"sku_type":"1","sku_subscription_plan_id":"111111111111111111","subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","payment_source_id":"111111111111111111","failure_message":"some/path","nonce":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","video_stream_count":"1","video_enabled":false,"connection_type":"xxxxxxx","custom_status_count":"1","effective_connection_speed":"xx","store_title":"xxxxx","gift_code":"xxxxxxxxxxxxxxxx","gift_code_max_uses":"1","resolved":false,"subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","scheduled_start_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","scheduled_end_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","access_type":"xxxx","expected_to_autorenew":false,"duration":"11111","full":false,"instant_invite":false,"has_images":false,"profile_user_status":"xxxxxxx","is_streaming":false,"has_custom_status":false,"application_name":"xxxxxxxxxxxxx","source":"xxxxxxxxxxxx","party_max":"1","protocol":"xxx","reconnect":false,"reason":"xxxxxxxxxxx","channel_bitrate":"111111","ping_average":"11","ping_bad_count":"1","input_detected":false,"no_input_detected_notice":false,"audio_jitter_buffer_mean":"1","audio_jitter_buffer_p75":"1","audio_jitter_buffer_p95":"1","audio_jitter_buffer_p99":"1","audio_jitter_buffer_max":"1","audio_jitter_delay_mean":"1","audio_jitter_delay_p75":"1","audio_jitter_delay_p95":"1","audio_jitter_delay_p99":"1","audio_jitter_delay_max":"1","audio_jitter_target_mean":"1","audio_jitter_target_p75":"1","audio_jitter_target_p95":"1","audio_jitter_target_p99":"1","audio_jitter_target_max":"1","relative_reception_delay_mean":"1","relative_reception_delay_p75":"1","relative_reception_delay_p95":"1","relative_reception_delay_p99":"1","relative_reception_delay_max":"1","relative_playout_delay_mean":"1","relative_playout_delay_p75":"1","relative_playout_delay_p95":"1","relative_playout_delay_p99":"1","relative_playout_delay_max":"1","mos_mean":1,"mos_1":"1","mos_2":"1","mos_3":"1","mos_4":"1","duration_connection_type_wifi":"1","duration_connection_type_cellular":"1","duration_connection_type_ethernet":"1","duration_connection_type_bluetooth":"1","duration_connection_type_other":"1","duration_connection_type_unknown":"111","duration_connection_type_none":"1","duration_effective_connection_speed_2g":"1","duration_effective_connection_speed_3g":"1","duration_effective_connection_speed_4g":"111","duration_effective_connection_speed_unknown":"1","ping_timeout":"1","audio_input_mode":"xxxxxxxxxxxx","automatic_audio_input_sensitivity_enabled":false,"audio_input_sensitivity":111,"echo_cancellation_enabled":false,"noise_suppression_enabled":false,"automatic_gain_control_enabled":false,"voice_output_volume":111,"frame_op_silent":"1","frame_op_normal":"1","frame_op_merged":"1","frame_op_expanded":"1","frame_op_accelerated":"1","frame_op_preemptive_expanded":"1","frame_op_cng":"1","max_voice_state_count":"1","duration_listening":"1","duration_speaking":"1","duration_participation":"1","duration_connected":"1","noise_cancellation_enabled":false,"decryption_failures":"1","encryption_mode":"xxxxxxxxxxxxxxx","audio_decoded_normal":"11","audio_decoded_plc":"11","audio_decoded_plccng":"11111","audio_decoded_cng":"1","audio_decoded_muted_output":"11111","search_type":"xxxxx","search_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","is_error":false,"limit":"11","offset":"1","page":"1","total_results":"1111","page_results":"11","is_indexing":false,"page_num_messages":"11","page_num_links":"11","page_num_embeds":"11","page_num_attach":"1","guild_ids":[],"search_engine_current":"xxxxxxxxxx","load_duration_ms":"111","num_modifiers":"1","custom":false,"guild":"111111111111111111","invite":"xxxxxx","user_day":"1111","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","size_total":"11","size_online":"11","guild_verification_level":"1","location_message_id":"111111111111111111","list_sort":"xxxxxxxxxxxxxxx","list_index":"1","game":"xxxxxxxxxxxxx","verified":false,"elevated":false,"duration_ms":"11111","party_platform":"xxxxxxx","activity_action":"1","activity_party_platform":"xxxxxxx","old_nsfw":false,"new_nsfw":false,"settings_type":"xxxxxxx","destination_pane":"xxxxxxxx","origin_pane":"xxxxxxxxxxxxxxx"} +{"event_type":"xxxxxxxxxxxx","event_id":"some/path","event_source":"xxx","user_id":"111111111111111111","domain":"xxxxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","user_is_authenticated":false,"accessibility_support_enabled":false,"browser_user_agent":"some/path","browser":"xxxxxxxxxxxxxx","browser_version":"xxxxxxxxxxxxx","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","os":"xxxxxxx","os_version":"xxxxxxxxxx","client_build_number":"11111","release_channel":"xxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","search_engine":"xxxxxx","location":"xxxxxxxxxxxxxxxxxxxxxxxx","city":"xxxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","isp":"xxxxxxxxxxxx","location_page":"xxxxxxxxxxxxx","location_section":"xxxxxxx","location_object":"xxxxxxxxxx","location_object_type":"xxx","subscription_type":"1","subscription_plan_id":"111111111111111111","payment_type":"xxxxxxxxxxxx","price":"1111","regular_price":"1111","currency":"xxx","is_gift":false,"to_step":"xxxxxxx","from_step":"xxxxxx","load_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","step_duration_ms":"1111","flow_duration_ms":"1111","accessibility_features":"111","eligible_for_trial":false,"client_send_timestamp":"xxxx","client_track_timestamp":"xxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","device":"xxxxxxxxxxxxxxx","os_sdk_version":"11","client_version":"xxxxxxx","channel_type":"1","channel":"111111111111111111","mode":"xxxxxxxxxxxxxx","server":"111111111111111111","system_locale":"xxxxx","game_platform":"xxxxxxx","game_name":"xxxxxx","variant":"a","mute":false,"os_arch":"xxx","client_performance_cpu":8.08,"client_performance_memory":"111111","channel_id":"111111111111111111","channel_size_total":"1","channel_member_perms":"1111111111","channel_hidden":false,"anyone_priority":false,"game_exe_name":"xxxxxxxxxxxxx","game_id":"111111111111111111","guild_id":"111111111111111111","guild_size_total":"1","guild_member_num_roles":"1","guild_member_perms":"1111111111","guild_num_channels":"1","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","voice_state_count":"1","channel_size_online":"1","message_id":"111111111111111111","is_friend":false,"private":false,"num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":"1","emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","subscription_id":"111111111111111111","payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","removal_type":"xxxxxxxxxxxxxx","plan_id":"111111111111111111","is_initiator":false,"mutual_guilds":"1","type":"xxxxxxxxxx","user_guilds":"11","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","is_pending":false,"preview_enabled":false,"guild_affinity_score":8.08,"guild_affinity_index":"11","emoji_name":"xxxx","emoji_id":"111111111111111111","sku_id":"111111111111111111","quantity":"1","hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","port":"11111","connect_time":"111","connect_count":"1","audio_subsystem":"xxxxxxxx","audio_layer":"xxxxxxxxxxxxxxxx","cloudflare_best_region":"xxxxxxxxxx","context":"xxxxxxx","application_id":"111111111111111111","activity_duration_s":"111","total_duration_s":"1111","total_discord_sku_duration_s":"1","distributor":"xxxxx","priority":false,"packets_sent":"11","packets_sent_lost":"1","packets_received":"11","packets_received_lost":"1","channel_is_nsfw":false,"payment_id":"111111111111111111","transaction_id":"111111111111111111","transaction_type":"1","exchange_rate":1,"presentment_currency":"xxx","settlement_currency":"xxx","net_presentment_amount":"111","net_presentment_fees":"11","net_presentment_tax":"1","net_settlement_amount":"111","net_settlement_fees":"11","net_settlement_tax":"1","payment_gateway":"1","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","presentment_amount":"1111","presentment_fees":"111","presentment_tax":"1","settlement_amount":"1111","settlement_fees":"111","settlement_tax":"1","invite_code":"xxxxxx","invite_guild_id":"111111111111111111","invite_channel_id":"111111111111111111","invite_channel_type":"1","invite_type":"xxxxxxx","is_suggested":false,"row_num":"1","num_total":"111","is_filtered":false,"num_affinity_connections":"111","amount":"1111","amount_refunded":"1","tax":"1","tax_inclusive":false,"sku_type":"1","sku_subscription_plan_id":"111111111111111111","subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","payment_source_id":"111111111111111111","failure_message":"some/path","nonce":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","video_stream_count":"1","video_enabled":false,"connection_type":"xxxxxxx","custom_status_count":"1","effective_connection_speed":"xx","store_title":"xxxxx","gift_code":"xxxxxxxxxxxxxxxx","gift_code_max_uses":"1","resolved":false,"subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","scheduled_start_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","scheduled_end_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","access_type":"xxxx","expected_to_autorenew":false,"duration":"11111","full":false,"instant_invite":false,"has_images":false,"profile_user_status":"xxxxxxx","is_streaming":false,"has_custom_status":false,"application_name":"xxxxxxxxxxxxx","source":"xxxxxxxxxxxx","party_max":"1","protocol":"xxx","reconnect":false,"reason":"xxxxxxxxxxx","channel_bitrate":"111111","ping_average":"11","ping_bad_count":"1","input_detected":false,"no_input_detected_notice":false,"audio_jitter_buffer_mean":"1","audio_jitter_buffer_p75":"1","audio_jitter_buffer_p95":"1","audio_jitter_buffer_p99":"1","audio_jitter_buffer_max":"1","audio_jitter_delay_mean":"1","audio_jitter_delay_p75":"1","audio_jitter_delay_p95":"1","audio_jitter_delay_p99":"1","audio_jitter_delay_max":"1","audio_jitter_target_mean":"1","audio_jitter_target_p75":"1","audio_jitter_target_p95":"1","audio_jitter_target_p99":"1","audio_jitter_target_max":"1","relative_reception_delay_mean":"1","relative_reception_delay_p75":"1","relative_reception_delay_p95":"1","relative_reception_delay_p99":"1","relative_reception_delay_max":"1","relative_playout_delay_mean":"1","relative_playout_delay_p75":"1","relative_playout_delay_p95":"1","relative_playout_delay_p99":"1","relative_playout_delay_max":"1","mos_mean":1,"mos_1":"1","mos_2":"1","mos_3":"1","mos_4":"1","duration_connection_type_wifi":"1","duration_connection_type_cellular":"1","duration_connection_type_ethernet":"1","duration_connection_type_bluetooth":"1","duration_connection_type_other":"1","duration_connection_type_unknown":"111","duration_connection_type_none":"1","duration_effective_connection_speed_2g":"1","duration_effective_connection_speed_3g":"1","duration_effective_connection_speed_4g":"111","duration_effective_connection_speed_unknown":"1","ping_timeout":"1","audio_input_mode":"xxxxxxxxxxxx","automatic_audio_input_sensitivity_enabled":false,"audio_input_sensitivity":111,"echo_cancellation_enabled":false,"noise_suppression_enabled":false,"automatic_gain_control_enabled":false,"voice_output_volume":111,"frame_op_silent":"1","frame_op_normal":"1","frame_op_merged":"1","frame_op_expanded":"1","frame_op_accelerated":"1","frame_op_preemptive_expanded":"1","frame_op_cng":"1","max_voice_state_count":"1","duration_listening":"1","duration_speaking":"1","duration_participation":"1","duration_connected":"1","noise_cancellation_enabled":false,"decryption_failures":"1","encryption_mode":"xxxxxxxxxxxxxxx","audio_decoded_normal":"11","audio_decoded_plc":"11","audio_decoded_plccng":"11111","audio_decoded_cng":"1","audio_decoded_muted_output":"11111","search_type":"xxxxx","search_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","is_error":false,"limit":"11","offset":"1","page":"1","total_results":"1111","page_results":"11","is_indexing":false,"page_num_messages":"11","page_num_links":"11","page_num_embeds":"11","page_num_attach":"1","guild_ids":[],"search_engine_current":"xxxxxxxxxx","load_duration_ms":"111","num_modifiers":"1","custom":false,"guild":"111111111111111111","invite":"xxxxxx","user_day":"1111","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","size_total":"11","size_online":"11","guild_verification_level":"1","location_message_id":"111111111111111111","list_sort":"xxxxxxxxxxxxxxx","list_index":"1","game":"xxxxxxxxxxxxx","verified":false,"elevated":false,"duration_ms":"11111","party_platform":"xxxxxxx","activity_action":"1","activity_party_platform":"xxxxxxx","old_nsfw":false,"new_nsfw":false,"settings_type":"xxxxxxx","destination_pane":"xxxxxxxx","origin_pane":"xxxxxxxxxxxxxxx"} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json b/test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json new file mode 100644 index 0000000..a289f7c --- /dev/null +++ b/test/fixtures/discord-json-2021-01/activity/reporting/events-2021-00000-of-00001.json @@ -0,0 +1,2 @@ +{"event_type":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx","event_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_id":"111111111111111111","domain":"xxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","variant":"a","browser":"xxxxxxxxxxxxxx","device":"xxxxxxx","os":"xxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","guild_id":"111111111111111111","guild_size_total":"1","guild_member_num_roles":"1","guild_member_perms":"111111111","guild_num_channels":"1","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"channel_id":"111111111111111111","channel_type":"1","channel_size_total":"1","channel_size_online":"1","channel_member_perms":"111111111","channel_hidden":false,"client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","search_engine":"xxxxxx","city":"xxxxxxx","event_source":"xxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","browser_user_agent":"some/path","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","os_version":"xxxxxxxxxx","os_sdk_version":"11","client_build_number":"11111","client_version":"xxxxxxx","user_is_authenticated":false,"isp":"xxxxxxxxxxxx","accessibility_support_enabled":false,"accessibility_features":"111","preview_enabled":false,"browser_version":"xxxxxxxxxxxxx","release_channel":"xxxxxx","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","os_arch":"xxx","is_member":false,"client_performance_cpu":8.08,"client_performance_memory":"111111","num_voice_channels_active":"1","location_section":"xxxxxxx","system_locale":"xxxxx","type":"xxxxxxxxxx","game_name":"xxxxxxxxxxxxxxxxx","game_platform":"xxxxxxx","location_object":"xxxxxxxxxx","has_custom_status":false,"is_friend":false,"has_images":false,"profile_user_status":"xxxxxxx","is_streaming":false,"application_name":"xxx","application_id":"111111111111111111","party_max":"1","source":"xxxxxxxx","payment_source_id":"111111111111111111","payment_source_type":"1","payment_gateway":"xxxxxx","is_default":false,"text_len":"11","clear_after":"xxxxx","message_id":"111111111111111111","channel":"111111111111111111","private":false,"server":"111111111111111111","num_attachments":"1","max_attachment_size":"1","length":"1","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":"1","emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","location":"xxxxxxxxxxxxxxxxxxxx","old_nsfw":false,"new_nsfw":false,"captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"source_page":"xxxxxxxxxxxx","source_section":"xxxxxxxxxx","source_object":"xxxxxxxx","sku_id":"111111111111111111","sku_type":"1","store_title":"xxxxxxx","distribution_type":"xxxxxxxxxxxx","price":"1111","regular_price":"1111","currency":"xxx","load_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","has_description":false,"has_staff_review":false,"carousel_image_count":"1","carousel_video_count":"1","has_single_player":false,"has_online_multiplayer":false,"has_local_multiplayer":false,"has_pvp_features":false,"has_local_coop":false,"has_online_coop":false,"has_cross_platform":false,"has_rich_presence":false,"has_game_invites":false,"has_spectator_mode":false,"has_controller_support":false,"has_cloud_saves":false,"has_secure_networking":false,"payment_id":"111111111111111111","transaction_id":"111111111111111111","transaction_type":"1","exchange_rate":1,"presentment_currency":"xxx","settlement_currency":"xxx","net_presentment_amount":"111","net_presentment_fees":"11","net_presentment_tax":"1","net_settlement_amount":"111","net_settlement_fees":"11","net_settlement_tax":"1","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","presentment_amount":"1111","presentment_fees":"111","presentment_tax":"1","settlement_amount":"1111","settlement_fees":"111","settlement_tax":"1","subscription_id":"111111111111111111","subscription_type":"1","payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","removal_type":"xxxxxxxxxxxxxx","plan_id":"111111111111111111","connected":false,"platform_type":"xxxxxxx","visibility":"1","friend_sync":false,"partner":false,"link_method":"xxxxx","verified":false,"distributor":"xxxxxxx","elevated":false,"mode":"xxxxxxxxxxxxxx","priority":false,"game_ids_viewed":[],"store_application_ids_viewed":[],"store_sku_ids_viewed":[],"num_games_viewed":"11","num_cards_viewed":"11","seconds_spent":"1","subscribed_games":["111111111111111111","111111111111111111"],"num_cards_total":"11","news_ids_viewed":[],"feed_layout":"xxxxxxxxxxxx","window_width":"111","window_height":"111","plan":"xxxxxxxxxxxxx","has_auth_token":false,"invite_code":"xxxxxxx","is_backgrounded":false,"mute":false,"anyone_priority":false,"party_platform":"xxxxxxx","emoji_id":"111111111111111111","join_method":"xxxxxxxxxxxxxxxxx","user_guilds":"11","join_type":"xxxx","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","location_message_id":"111111111111111111","location_page":"xxxxxxxxxxxxx","location_object_type":"xxx","payment_type":"xxxxxxxxxxxx","is_gift":false,"eligible_for_trial":false,"subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_plan_id":"111111111111111111","quantity":"1","device_name":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx","num_guild_permissions":"1","source_object_type":"xxx","error_message":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","request_status":"111","price_shown":"1111","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","is_premium":false,"amount":"1111","amount_refunded":"1","tax":"111","tax_inclusive":false,"subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","sku_subscription_plan_id":"111111111111111111","failure_message":"some/path","search_engine_current":"xxxxxxxxxx","card_index":"1","card_type":"xxxxxxxxxxx","opened_from":"xxxxxxxxxxxx","uri_host":"xxx","uri_scheme":"xxxxxxx","theme":"xxxx","application_ids_viewed":[],"sku_ids_viewed":[],"duration_ms":"1111","game_id":"111111111111111111","is_new_user":false,"scheduled_start_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","scheduled_end_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","access_type":"xxxx","expected_to_autorenew":false,"adjust_tracker_token":"xxxxxx","adjust_tracker_name":"xxxxxxx","attribution_network":"xxxxxxx","num_applications_total":"1","num_applications_battlenet":"1","num_applications_discord":"1","num_applications_steam":"1","num_applications_twitch":"1","num_applications_uplay":"1","num_applications_origin":"1","num_applications_gog":"1","num_applications_epic":"1","gift_code":"xxxxxxxxxxxxxxxx","gift_code_max_uses":"1","resolved":false,"harvest_id":"111111111111111111","full":false,"instant_invite":false,"activity_duration_s":"111","total_duration_s":"1111","total_discord_sku_duration_s":"1","promotion_id":"1111111","list_index":"1","promotion_type":"xxxxxxxxxxxxxxx","promotion_url":"url://somewhere","custom":false,"guild":"111111111111111111","invite":"xxxxxxx","user_day":"1111","size_total":"111","size_online":"11","invite_type":"xxxxxxx","guild_verification_level":"1","guild_size_online":"11","list_sort":"xxxxxxxxxxxxxxx","game_ids":[],"num_cards":"1","num_cards_visible":"1","num_cards_game_news":"1","num_cards_game_playable":"1","num_game_parties":"1","num_game_parties_voice":"1","num_game_parties_solo":"1","num_game_parties_recently_played":"1","num_game_parties_rich_presence":"1","num_game_parties_collapsed":"1","num_users_subscribed":"11","num_launcher_applications":"1","guild_ids_viewed":[],"num_items_now_playing":"1","num_items_recently_played":"1","name":"xxxxxxx","has_bot":false,"has_redirect_uri":false,"activity_action":"1","activity_party_platform":"xxxxxxx","success":false} +{"event_type":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx","event_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_id":"111111111111111111","domain":"xxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","variant":"a","browser":"xxxxxxxxxxxxxx","device":"xxxxxxx","os":"xxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","guild_id":"111111111111111111","guild_size_total":"1","guild_member_num_roles":"1","guild_member_perms":"111111111","guild_num_channels":"1","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"1","guild_is_vip":false,"channel_id":"111111111111111111","channel_type":"1","channel_size_total":"1","channel_size_online":"1","channel_member_perms":"111111111","channel_hidden":false,"client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","search_engine":"xxxxxx","city":"xxxxxxx","event_source":"xxxxxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","browser_user_agent":"some/path","cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","os_version":"xxxxxxxxxx","os_sdk_version":"11","client_build_number":"11111","client_version":"xxxxxxx","user_is_authenticated":false,"isp":"xxxxxxxxxxxx","accessibility_support_enabled":false,"accessibility_features":"111","preview_enabled":false,"browser_version":"xxxxxxxxxxxxx","release_channel":"xxxxxx","referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","os_arch":"xxx","is_member":false,"client_performance_cpu":8.08,"client_performance_memory":"111111","num_voice_channels_active":"1","location_section":"xxxxxxx","system_locale":"xxxxx","type":"xxxxxxxxxx","game_name":"xxxxxxxxxxxxxxxxx","game_platform":"xxxxxxx","location_object":"xxxxxxxxxx","has_custom_status":false,"is_friend":false,"has_images":false,"profile_user_status":"xxxxxxx","is_streaming":false,"application_name":"xxx","application_id":"111111111111111111","party_max":"1","source":"xxxxxxxx","payment_source_id":"111111111111111111","payment_source_type":"1","payment_gateway":"xxxxxx","is_default":false,"text_len":"11","clear_after":"xxxxx","message_id":"111111111111111111","channel":"111111111111111111","private":false,"server":"111111111111111111","num_attachments":"1","max_attachment_size":"1","length":"1","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":"1","emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","location":"xxxxxxxxxxxxxxxxxxxx","old_nsfw":false,"new_nsfw":false,"captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"source_page":"xxxxxxxxxxxx","source_section":"xxxxxxxxxx","source_object":"xxxxxxxx","sku_id":"111111111111111111","sku_type":"1","store_title":"xxxxxxx","distribution_type":"xxxxxxxxxxxx","price":"1111","regular_price":"1111","currency":"xxx","load_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","has_description":false,"has_staff_review":false,"carousel_image_count":"1","carousel_video_count":"1","has_single_player":false,"has_online_multiplayer":false,"has_local_multiplayer":false,"has_pvp_features":false,"has_local_coop":false,"has_online_coop":false,"has_cross_platform":false,"has_rich_presence":false,"has_game_invites":false,"has_spectator_mode":false,"has_controller_support":false,"has_cloud_saves":false,"has_secure_networking":false,"payment_id":"111111111111111111","transaction_id":"111111111111111111","transaction_type":"1","exchange_rate":1,"presentment_currency":"xxx","settlement_currency":"xxx","net_presentment_amount":"111","net_presentment_fees":"11","net_presentment_tax":"1","net_settlement_amount":"111","net_settlement_fees":"11","net_settlement_tax":"1","created_at":"xxxxxxxxxxxxxxxxxxxxxxx","presentment_amount":"1111","presentment_fees":"111","presentment_tax":"1","settlement_amount":"1111","settlement_fees":"111","settlement_tax":"1","subscription_id":"111111111111111111","subscription_type":"1","payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","removal_type":"xxxxxxxxxxxxxx","plan_id":"111111111111111111","connected":false,"platform_type":"xxxxxxx","visibility":"1","friend_sync":false,"partner":false,"link_method":"xxxxx","verified":false,"distributor":"xxxxxxx","elevated":false,"mode":"xxxxxxxxxxxxxx","priority":false,"game_ids_viewed":[],"store_application_ids_viewed":[],"store_sku_ids_viewed":[],"num_games_viewed":"11","num_cards_viewed":"11","seconds_spent":"1","subscribed_games":["111111111111111111","111111111111111111"],"num_cards_total":"11","news_ids_viewed":[],"feed_layout":"xxxxxxxxxxxx","window_width":"111","window_height":"111","plan":"xxxxxxxxxxxxx","has_auth_token":false,"invite_code":"xxxxxxx","is_backgrounded":false,"mute":false,"anyone_priority":false,"party_platform":"xxxxxxx","emoji_id":"111111111111111111","join_method":"xxxxxxxxxxxxxxxxx","user_guilds":"11","join_type":"xxxx","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","location_message_id":"111111111111111111","location_page":"xxxxxxxxxxxxx","location_object_type":"xxx","payment_type":"xxxxxxxxxxxx","is_gift":false,"eligible_for_trial":false,"subscription_plan_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_plan_id":"111111111111111111","quantity":"1","device_name":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx","num_guild_permissions":"1","source_object_type":"xxx","error_message":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","request_status":"111","price_shown":"1111","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","utm_medium":"xxxxxxx","utm_source":"xxxxxxxxxxx","is_premium":false,"amount":"1111","amount_refunded":"1","tax":"111","tax_inclusive":false,"subscription_payment_gateway_plan_id":"xxxxxxxxxxxxxxxxxxx","subscription_current_period_start":"xxxxxxxxxxxxxxxxxxxxxxxxxx","subscription_current_period_end":"xxxxxxxxxxxxxxxxxxxxxxxxxx","sku_subscription_plan_id":"111111111111111111","failure_message":"some/path","search_engine_current":"xxxxxxxxxx","card_index":"1","card_type":"xxxxxxxxxxx","opened_from":"xxxxxxxxxxxx","uri_host":"xxx","uri_scheme":"xxxxxxx","theme":"xxxx","application_ids_viewed":[],"sku_ids_viewed":[],"duration_ms":"1111","game_id":"111111111111111111","is_new_user":false,"scheduled_start_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","scheduled_end_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","access_type":"xxxx","expected_to_autorenew":false,"adjust_tracker_token":"xxxxxx","adjust_tracker_name":"xxxxxxx","attribution_network":"xxxxxxx","num_applications_total":"1","num_applications_battlenet":"1","num_applications_discord":"1","num_applications_steam":"1","num_applications_twitch":"1","num_applications_uplay":"1","num_applications_origin":"1","num_applications_gog":"1","num_applications_epic":"1","gift_code":"xxxxxxxxxxxxxxxx","gift_code_max_uses":"1","resolved":false,"harvest_id":"111111111111111111","full":false,"instant_invite":false,"activity_duration_s":"111","total_duration_s":"1111","total_discord_sku_duration_s":"1","promotion_id":"1111111","list_index":"1","promotion_type":"xxxxxxxxxxxxxxx","promotion_url":"url://somewhere","custom":false,"guild":"111111111111111111","invite":"xxxxxxx","user_day":"1111","size_total":"111","size_online":"11","invite_type":"xxxxxxx","guild_verification_level":"1","guild_size_online":"11","list_sort":"xxxxxxxxxxxxxxx","game_ids":[],"num_cards":"1","num_cards_visible":"1","num_cards_game_news":"1","num_cards_game_playable":"1","num_game_parties":"1","num_game_parties_voice":"1","num_game_parties_solo":"1","num_game_parties_recently_played":"1","num_game_parties_rich_presence":"1","num_game_parties_collapsed":"1","num_users_subscribed":"11","num_launcher_applications":"1","guild_ids_viewed":[],"num_items_now_playing":"1","num_items_recently_played":"1","name":"xxxxxxx","has_bot":false,"has_redirect_uri":false,"activity_action":"1","activity_party_platform":"xxxxxxx","success":false} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json b/test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json new file mode 100644 index 0000000..dc5db5f --- /dev/null +++ b/test/fixtures/discord-json-2021-01/activity/tns/events-2021-00000-of-00001.json @@ -0,0 +1,2 @@ +{"event_type":"xxxxxxxxxxxxxx","event_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","event_source":"xxxxxx","user_id":"111111111111111111","domain":"xxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","freight_id":"xxxxxxxxxxxxxxxxxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","user_is_authenticated":false,"cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","city":"xxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","isp":"xxxxxxxxxxxx","message_id":"111111111111111111","channel":"111111111111111111","channel_type":"1","is_friend":false,"private":false,"num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":false,"emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","browser":"xxxxxxxxxxxxxx","os":"xxxxxxx","os_version":"xxxxxxxxxx","os_arch":"xxx","client_build_number":"11111","release_channel":"xxxxxx","client_version":"xxxxxxx","server":"111111111111111111","browser_user_agent":"some/path","device":"xxxxxxxxxxxxxxxxx","device_advertiser_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","os_sdk_version":"11","accessibility_support_enabled":false,"accessibility_features":"111","system_locale":"xxxxx","browser_version":"xxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","is_new_user":false,"referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","search_engine_current":"xxxxxxxxxx","client_uuid":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","client_performance_cpu":8.08,"client_performance_memory":"111111","guild_id":"111111111111111111","guild_size_total":"111","guild_member_num_roles":"1","guild_member_perms":"111111111","guild_num_channels":"11","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"11","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","channel_id":"111111111111111111","channel_size_total":"1","channel_member_perms":"111111111","channel_hidden":false,"mode":"xxxxxxxxxxxxxx","priority":false,"media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","packets_sent":"111","packets_sent_lost":"1","packets_received":"1111","packets_received_lost":"1","voice_state_count":"1","game_name":"xxxxxxxxxxxxxxxxx","game_exe_name":"xxxxxxxxx","is_initiator":false,"mutual_guilds":"1","type":"xxxxxxxxxx","user_guilds":"11","location":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","activity_action":"1","activity_party_platform":"xxxxxxx","invite_type":"xxxxxxx","invite_code":"xxxxxx","invite_channel_id":"111111111111111111","invite_channel_type":"1","is_suggested":false,"row_num":"1","num_total":"111","is_filtered":false,"num_affinity_connections":"111","invite_guild_id":"111111111111111111","app_id":"111111111111111111","transport":"xxx","resolved":false,"code":"xxxxxx","authenticated":false,"user_banned":false,"error_code":"11111","error_message":"xxxxxxxxxxxxxx","size_total":"1","size_online":"1","preview_enabled":false,"location_section":"xxxxxxx","channel_is_nsfw":false,"hostname":"xxxxxxxxxxxxxxxxxxxxxxxx","port":"11111","protocol":"xxx","reconnect":false,"reason":"xxxxxxxxxxx","duration":"1111111","channel_bitrate":"11111","connect_count":"1","ping_average":"11","ping_bad_count":"1","audio_jitter_buffer_mean":"1","audio_jitter_buffer_p75":"1","audio_jitter_buffer_p95":"1","audio_jitter_buffer_p99":"1","audio_jitter_buffer_max":"1","audio_jitter_delay_mean":"1","audio_jitter_delay_p75":"1","audio_jitter_delay_p95":"1","audio_jitter_delay_p99":"1","audio_jitter_delay_max":"1","audio_jitter_target_mean":"1","audio_jitter_target_p75":"1","audio_jitter_target_p95":"1","audio_jitter_target_p99":"1","audio_jitter_target_max":"1","relative_reception_delay_mean":"1","relative_reception_delay_p75":"1","relative_reception_delay_p95":"1","relative_reception_delay_p99":"1","relative_reception_delay_max":"1","relative_playout_delay_mean":"1","relative_playout_delay_p75":"1","relative_playout_delay_p95":"1","relative_playout_delay_p99":"1","relative_playout_delay_max":"1","mos_mean":8.08,"mos_1":"1","mos_2":"1","mos_3":"11","mos_4":"111","audio_input_mode":"xxxxxxxxxxxxxx","frame_op_silent":"1111111","frame_op_normal":"1111","frame_op_merged":"11","frame_op_expanded":"111","frame_op_accelerated":"111","frame_op_preemptive_expanded":"1111","frame_op_cng":"1","automatic_audio_input_sensitivity_enabled":false,"audio_input_sensitivity":8.08,"echo_cancellation_enabled":false,"noise_suppression_enabled":false,"automatic_gain_control_enabled":false,"voice_output_volume":111,"duration_listening":"111","duration_speaking":"11","duration_participation":"111","duration_connected":"1111","noise_cancellation_enabled":false,"duration_connection_type_wifi":"1","duration_connection_type_cellular":"1","duration_connection_type_ethernet":"1","duration_connection_type_bluetooth":"1","duration_connection_type_other":"1","duration_connection_type_unknown":"1111","duration_connection_type_none":"1","duration_effective_connection_speed_2g":"1","duration_effective_connection_speed_3g":"1","duration_effective_connection_speed_4g":"1111","duration_effective_connection_speed_unknown":"1","context":"xxxxxxx","ping_timeout":"1","input_detected":false,"no_input_detected_notice":false,"max_voice_state_count":"1","cloudflare_best_region":"xxxxxxxxxx","decryption_failures":"1","encryption_mode":"xxxxxxxxxxxxxxx","mute":false,"anyone_priority":false,"convert_emoticons":false,"developer_mode":false,"enable_tts_command":false,"friend_source_flags":"11","guild_positions":["111111111111111111","111111111111111111"],"inline_attachment_media":false,"inline_embed_media":false,"gif_auto_play":false,"locale":"xxxxx","message_display_compact":false,"render_embeds":false,"render_reactions":false,"animate_emoji":false,"restricted_guilds":[],"show_current_game":false,"theme":"xxxx","detect_platform_accounts":false,"status":"xxxxxx","default_guilds_restricted":false,"explicit_content_filter":"1","afk_timeout":"111","timezone_offset":"111","disable_games_tab":false,"num_server_folders":"1","stream_notifications_enabled":false,"has_custom_status":false,"allow_accessibility_detection":false,"contact_sync_enabled":false,"native_phone_integration_enabled":false,"animate_stickers":"1","guild_name":"xxxxxxxxxxx","captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"animated":false,"application_name":"xxxxxxxxxxxx","is_streaming":false,"has_images":false,"profile_user_status":"xxxxxxxxxxxxxx","location_object":"xxxxxx","game_platform":"xxxxxxx","custom":false,"guild":"111111111111111111","invite":"xxxxxx","user_day":"1111","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","guild_verification_level":"1","search_engine":"xxxxxx","location_message_id":"111111111111111111","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","emoji_id":"111111111111111111","application_id":"111111111111111111","name":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","bucket":"1","revision":"1","population":"xx","client_event_source":"xxxxxxx","join_method":"xxxxxxxxxxxxxxxxx","join_type":"xxxx","account_id":"xxxxxxxx","account_name":"xxxxxxxx","connected":false,"platform_type":"xxxxxxx","visibility":"1","friend_sync":false,"partner":false,"link_method":"xxxxx","temporary":false,"max_uses":"1","max_age":"11111","regenerate":false,"unique":false,"video_stream_count":"1","video_enabled":false,"video_input_type":"xxxx","enabled_inputs":[],"source":"xxxxxxxxxxxx","owner":false,"harvest_id":"111111111111111111","channel_name":"xxxxxxxxxxxxxxxxx","is_nsfw":false,"custom_status_count":"1","game_id":"111111111111111111","connection_type":"xxxxxxxx","emoji_name":"xxxx","party_id":"xxxxxxxxxxxxxxxxxxxxxxxxxx","party_platform":"xxxxxxx","sku_id":"111111111111111111","identity_type":"xxxxx"} +{"event_type":"xxxxxxxxxxxxxx","event_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","event_source":"xxxxxx","user_id":"111111111111111111","domain":"xxx","freight_hostname":"xxxxxxxxxxxxxxxxxxxxxxxxx","freight_id":"xxxxxxxxxxxxxxxxxxxxxxxx","ip":"1.1.1.1","day":"1111","chosen_locale":"xxxxx","detected_locale":"xxxxx","user_is_authenticated":false,"cfduid":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a","city":"xxxxxxxxx","country_code":"xx","region_code":"xx","time_zone":"some/path","isp":"xxxxxxxxxxxx","message_id":"111111111111111111","channel":"111111111111111111","channel_type":"1","is_friend":false,"private":false,"num_attachments":"1","max_attachment_size":"1","length":"11","word_count":"1","mention_everyone":false,"emoji_unicode":"1","emoji_custom":"1","emoji_custom_external":"1","emoji_managed":"1","emoji_managed_external":"1","emoji_animated":false,"emoji_only":false,"num_embeds":"1","attachment_ids":[],"has_spoiler":false,"probably_has_markdown":false,"user_is_bot":false,"sticker_ids":[],"message_type":"1","client_send_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","client_track_timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","timestamp":"xxxxxxxxxxxxxxxxxxxxxxxxxx","browser":"xxxxxxxxxxxxxx","os":"xxxxxxx","os_version":"xxxxxxxxxx","os_arch":"xxx","client_build_number":"11111","release_channel":"xxxxxx","client_version":"xxxxxxx","server":"111111111111111111","browser_user_agent":"some/path","device":"xxxxxxxxxxxxxxxxx","device_advertiser_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","os_sdk_version":"11","accessibility_support_enabled":false,"accessibility_features":"111","system_locale":"xxxxx","browser_version":"xxxx","referrer":"url://somewhere","referring_domain":"xxxxxxxxxxxxxx","is_new_user":false,"referrer_current":"url://somewhere","referring_domain_current":"xxxxxxxxxxxxxx","search_engine_current":"xxxxxxxxxx","client_uuid":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","client_performance_cpu":8.08,"client_performance_memory":"111111","guild_id":"111111111111111111","guild_size_total":"111","guild_member_num_roles":"1","guild_member_perms":"111111111","guild_num_channels":"11","guild_num_text_channels":"1","guild_num_voice_channels":"1","guild_num_roles":"11","guild_is_vip":false,"is_member":false,"num_voice_channels_active":"1","channel_id":"111111111111111111","channel_size_total":"1","channel_member_perms":"111111111","channel_hidden":false,"mode":"xxxxxxxxxxxxxx","priority":false,"media_session_id":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","rtc_connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","packets_sent":"111","packets_sent_lost":"1","packets_received":"1111","packets_received_lost":"1","voice_state_count":"1","game_name":"xxxxxxxxxxxxxxxxx","game_exe_name":"xxxxxxxxx","is_initiator":false,"mutual_guilds":"1","type":"xxxxxxxxxx","user_guilds":"11","location":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","activity_action":"1","activity_party_platform":"xxxxxxx","invite_type":"xxxxxxx","invite_code":"xxxxxx","invite_channel_id":"111111111111111111","invite_channel_type":"1","is_suggested":false,"row_num":"1","num_total":"111","is_filtered":false,"num_affinity_connections":"111","invite_guild_id":"111111111111111111","app_id":"111111111111111111","transport":"xxx","resolved":false,"code":"xxxxxx","authenticated":false,"user_banned":false,"error_code":"11111","error_message":"xxxxxxxxxxxxxx","size_total":"1","size_online":"1","preview_enabled":false,"location_section":"xxxxxxx","channel_is_nsfw":false,"hostname":"xxxxxxxxxxxxxxxxxxxxxxxx","port":"11111","protocol":"xxx","reconnect":false,"reason":"xxxxxxxxxxx","duration":"1111111","channel_bitrate":"11111","connect_count":"1","ping_average":"11","ping_bad_count":"1","audio_jitter_buffer_mean":"1","audio_jitter_buffer_p75":"1","audio_jitter_buffer_p95":"1","audio_jitter_buffer_p99":"1","audio_jitter_buffer_max":"1","audio_jitter_delay_mean":"1","audio_jitter_delay_p75":"1","audio_jitter_delay_p95":"1","audio_jitter_delay_p99":"1","audio_jitter_delay_max":"1","audio_jitter_target_mean":"1","audio_jitter_target_p75":"1","audio_jitter_target_p95":"1","audio_jitter_target_p99":"1","audio_jitter_target_max":"1","relative_reception_delay_mean":"1","relative_reception_delay_p75":"1","relative_reception_delay_p95":"1","relative_reception_delay_p99":"1","relative_reception_delay_max":"1","relative_playout_delay_mean":"1","relative_playout_delay_p75":"1","relative_playout_delay_p95":"1","relative_playout_delay_p99":"1","relative_playout_delay_max":"1","mos_mean":8.08,"mos_1":"1","mos_2":"1","mos_3":"11","mos_4":"111","audio_input_mode":"xxxxxxxxxxxxxx","frame_op_silent":"1111111","frame_op_normal":"1111","frame_op_merged":"11","frame_op_expanded":"111","frame_op_accelerated":"111","frame_op_preemptive_expanded":"1111","frame_op_cng":"1","automatic_audio_input_sensitivity_enabled":false,"audio_input_sensitivity":8.08,"echo_cancellation_enabled":false,"noise_suppression_enabled":false,"automatic_gain_control_enabled":false,"voice_output_volume":111,"duration_listening":"111","duration_speaking":"11","duration_participation":"111","duration_connected":"1111","noise_cancellation_enabled":false,"duration_connection_type_wifi":"1","duration_connection_type_cellular":"1","duration_connection_type_ethernet":"1","duration_connection_type_bluetooth":"1","duration_connection_type_other":"1","duration_connection_type_unknown":"1111","duration_connection_type_none":"1","duration_effective_connection_speed_2g":"1","duration_effective_connection_speed_3g":"1","duration_effective_connection_speed_4g":"1111","duration_effective_connection_speed_unknown":"1","context":"xxxxxxx","ping_timeout":"1","input_detected":false,"no_input_detected_notice":false,"max_voice_state_count":"1","cloudflare_best_region":"xxxxxxxxxx","decryption_failures":"1","encryption_mode":"xxxxxxxxxxxxxxx","mute":false,"anyone_priority":false,"convert_emoticons":false,"developer_mode":false,"enable_tts_command":false,"friend_source_flags":"11","guild_positions":["111111111111111111","111111111111111111"],"inline_attachment_media":false,"inline_embed_media":false,"gif_auto_play":false,"locale":"xxxxx","message_display_compact":false,"render_embeds":false,"render_reactions":false,"animate_emoji":false,"restricted_guilds":[],"show_current_game":false,"theme":"xxxx","detect_platform_accounts":false,"status":"xxxxxx","default_guilds_restricted":false,"explicit_content_filter":"1","afk_timeout":"111","timezone_offset":"111","disable_games_tab":false,"num_server_folders":"1","stream_notifications_enabled":false,"has_custom_status":false,"allow_accessibility_detection":false,"contact_sync_enabled":false,"native_phone_integration_enabled":false,"animate_stickers":"1","guild_name":"xxxxxxxxxxx","captcha_service":"xxxxxxxx","sitekey":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","user_flow":"xxxxxxxxx","force_bad":false,"animated":false,"application_name":"xxxxxxxxxxxx","is_streaming":false,"has_images":false,"profile_user_status":"xxxxxxxxxxxxxx","location_object":"xxxxxx","game_platform":"xxxxxxx","custom":false,"guild":"111111111111111111","invite":"xxxxxx","user_day":"1111","location_guild_id":"111111111111111111","location_channel_id":"111111111111111111","location_channel_type":"1","guild_verification_level":"1","search_engine":"xxxxxx","location_message_id":"111111111111111111","session":"a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1","emoji_id":"111111111111111111","application_id":"111111111111111111","name":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","bucket":"1","revision":"1","population":"xx","client_event_source":"xxxxxxx","join_method":"xxxxxxxxxxxxxxxxx","join_type":"xxxx","account_id":"xxxxxxxx","account_name":"xxxxxxxx","connected":false,"platform_type":"xxxxxxx","visibility":"1","friend_sync":false,"partner":false,"link_method":"xxxxx","temporary":false,"max_uses":"1","max_age":"11111","regenerate":false,"unique":false,"video_stream_count":"1","video_enabled":false,"video_input_type":"xxxx","enabled_inputs":[],"source":"xxxxxxxxxxxx","owner":false,"harvest_id":"111111111111111111","channel_name":"xxxxxxxxxxxxxxxxx","is_nsfw":false,"custom_status_count":"1","game_id":"111111111111111111","connection_type":"xxxxxxxx","emoji_name":"xxxx","party_id":"xxxxxxxxxxxxxxxxxxxxxxxxxx","party_platform":"xxxxxxx","sku_id":"111111111111111111","identity_type":"xxxxx"} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json b/test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json new file mode 100644 index 0000000..685450c --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/11111111111111111/channel.json @@ -0,0 +1 @@ +{"id": "11111111111111111", "type": 0} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv b/test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv new file mode 100644 index 0000000..e5a1674 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/11111111111111111/messages.csv @@ -0,0 +1,2 @@ +ID,Timestamp,Contents,Attachments +8888888888,2022-02-22 22:22:22.222222+00:00,Heyo, diff --git a/test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json b/test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json new file mode 100644 index 0000000..d461056 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/222222222222222222/channel.json @@ -0,0 +1 @@ +{"id": "222222222222222222", "type": 1, "recipients": ["00000000000000000", "1111111111111111"]} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv b/test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv new file mode 100644 index 0000000..9c1324a --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/222222222222222222/messages.csv @@ -0,0 +1,2 @@ +ID,Timestamp,Contents,Attachments +2222222222222,2022-22-22 22:22:22.22222+00:00,Heyo, diff --git a/test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json b/test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json new file mode 100644 index 0000000..5892587 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/333333333333333333/channel.json @@ -0,0 +1 @@ +{"id": "333333333333333333", "type": 0, "name": "generalchat", "guild": {"id": "333333333333333332", "name": "xxx"}} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv b/test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv new file mode 100644 index 0000000..5603929 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/333333333333333333/messages.csv @@ -0,0 +1,6 @@ +ID,Timestamp,Contents,Attachments +000000000000000005,2011-02-02 02:05:02.000000+00:00,Huh what the heck is this message, +000000000000000004,2011-02-02 02:04:02.000000+00:00,<:thonk:000000000000000000><:thonk:000000000000000000><:thonk:000000000000000000>, +000000000000000003,2011-02-02 02:03:02.000000+00:00,"(so <@00000000000000000> who are you)", +000000000000000002,2011-02-02 02:02:02.000000+00:00,,https://cdn.discordapp.com/attachments/000000000000000000/000000000000000000/image.png +000000000000000001,2011-02-02 02:01:02.000000+00:00,https://google.com/whatever, diff --git a/test/fixtures/discord-json-2021-01/messages/index.json b/test/fixtures/discord-json-2021-01/messages/index.json new file mode 100644 index 0000000..10c1f13 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/messages/index.json @@ -0,0 +1,5 @@ +{ + "11111111111111111": null, + "222222222222222222": "Direct Message with xxx#7777", + "333333333333333333": "generalchat" +} \ No newline at end of file diff --git a/test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json b/test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json new file mode 100644 index 0000000..2ed58ec --- /dev/null +++ b/test/fixtures/discord-json-2021-01/servers/444444444444444444/audit-log.json @@ -0,0 +1,18 @@ +[ + { + "id": "111111111111111111", + "user_id": "111111111111111111", + "action_type": 11, + "changes": [ + { + "key": "xxxx", + "new_value": [ + { + "name": "xxxxxxxxxx", + "id": "111111111111111111" + } + ] + } + ] + } +] diff --git a/test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json b/test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json new file mode 100644 index 0000000..fd56ec8 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/servers/444444444444444444/guild.json @@ -0,0 +1,4 @@ +{ + "id": "444444444444444444", + "name": "xxx" +} diff --git a/test/fixtures/discord-json-2021-01/servers/index.json b/test/fixtures/discord-json-2021-01/servers/index.json new file mode 100644 index 0000000..8b6c150 --- /dev/null +++ b/test/fixtures/discord-json-2021-01/servers/index.json @@ -0,0 +1,3 @@ +{ + "444444444444444444": "xxx" +} \ No newline at end of file diff --git a/test/fixtures/facebook-json.md b/test/fixtures/facebook-json.md new file mode 100644 index 0000000..3acaf76 --- /dev/null +++ b/test/fixtures/facebook-json.md @@ -0,0 +1,9 @@ +# facebook-json exports + +## `facebook-json-2021-05-01` + * Manual edits of images -> placeholders, folder names, key names (in support cases specficially) + * This was one of the first few datasets I scrubbed so a lot of manual work was done. Should be easier now + * I went poking around this one and there was no exif on any of the images I looked at, only in the json was there exif +## `facebook-json-2025-11-29` + * Manual edits of images -> placeholders, folder names, key names + * This was one of the first few datasets I scrubbed so a lot of manual work was done. Should be easier now \ No newline at end of file diff --git a/test/fixtures/snapchat-2023-11.md b/test/fixtures/snapchat-2023-11.md new file mode 100644 index 0000000..7cd92da --- /dev/null +++ b/test/fixtures/snapchat-2023-11.md @@ -0,0 +1,83 @@ +# Snapchat + +Exported from the web exporter + +## Manual Edits + +* memories and chat_media placeholders +* Snapchat seemed to have events exported where the `+` in emails broke my parsing and the email contained a ' ' instead, so I fixed that +* Keys use unique dates in `json/in_app_surveys.json` +* Keys in `json/chat_history.json` use user ids, had to manually truncate and edit + +## Notes + +* `memories/` + * No exif data + * Does not seem to have any correlating .json file. It's just a dump to the disk + * files are like `2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg` + * Date has no time, just date + * `aaaaa...` seems to be a guid + * `main` | `overlay` at the end, with the same guid + * `main` is just the image + * `overlay` looks to be like a filter or some other applied thing that was saved with the memory + * Images may be rotated +* `chat_media/` + * No exif + * files are like `2020-01-01_b~xxxx.jpeg` + * sometimes they have `main` | `overlay` or something + * No idea what the `b~` means or if the xxx is an id or what. Perhaps base64 encoded protobuf, but nothing I decoded seemed to correlate to any identifier in the export + * Only referenced from ... oh... it's broken. The `type: "MEDIA"` in snapchats exporter has all empty "content" fields. Amazing... So this will have to be pieced together some other way + * This will most likel have to be manually repaired +* `json/` + * Scrubbed + * See manual changes + + +* Comes with both an html and json export (I will only keep the json after deduping) + * NOTE: That the html export has explanations which might be useful to explain some of these fields... + * I compared all .html to .json side by side (browser <-> text editor) and all of them were present in both and had the same data except `snap_history.html` (was empty in .html) and `faq.html` (just informational) +* I noticed on chat history html pages it puts _every_ category, not just the ones I have. Might be useful future reference + +``` +Frequently Asked Questions +Login History and Account Information +Snap History Metadata +Chat History Metadata +My AI +Our Story & Spotlight Content +Spotlight Replies +Purchase History +Snapchat Support History +User Profile +Public Profiles +Friends +Ranking +Story History +Account History +Location +Search History +Terms History +Subscriptions +Bitmoji +In-app Surveys +Reported Content +Bitmoji Kit +Connected Apps +Talk History +Ads Manager +My Lenses +Memories +Cameos +Email Campaign History +Snap Tokens +Payouts +Orders +Snap Map Places +Shopping Favorites +Payments +My Sounds +Photoshoot Snaps +Feature Emails +AI Selfies +``` + diff --git a/test/fixtures/snapchat-2023-11/json/account.json b/test/fixtures/snapchat-2023-11/json/account.json new file mode 100644 index 0000000..5c6c8cd --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/account.json @@ -0,0 +1,38 @@ +{ + "Basic Information": { + "Username": "xxxxxxxxx", + "Name": "xxxxx", + "Creation Date": "2020-04-13 10:09:08 UTC", + "Registration IP": "", + "Country": "" + }, + "Device Information": { + "Make": "", + "Model ID": "", + "Model Name": "", + "Language": "", + "OS Type": "", + "OS Version": "", + "Connection Type": "" + }, + "Device History": [], + "Privacy Policy and Terms of Service Acceptance History": [], + "Custom Creative Tools Terms": [], + "Login History": [ + { + "IP": "1.1.1.1", + "Country": "xx", + "Created": "2020-04-13 10:09:08 UTC", + "Status": "xxxxxxx", + "Device": "some/path" + }, + { + "IP": "1.1.1.1", + "Country": "xx", + "Created": "2020-04-13 10:09:08 UTC", + "Status": "xxxxxxx", + "Device": "some/path" + } + ], + "Family Center": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/account_history.json b/test/fixtures/snapchat-2023-11/json/account_history.json new file mode 100644 index 0000000..634fdf9 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/account_history.json @@ -0,0 +1,47 @@ +{ + "Display Name Change": [ + { + "Date": "2020-04-13 10:09:08 UTC", + "Display Name": "xxxxx" + }, + { + "Date": "", + "Display Name": "xxxxxx" + } + ], + "Email Change": [ + { + "Date": "2020-04-13 10:09:08 UTC", + "Email Address": "not_a_real_email@example.com" + } + ], + "Mobile Number Change": [], + "Password Change": [ + { + "Date": "2020-04-13 10:09:08 UTC" + }, + { + "Date": "2020-04-13 10:09:08 UTC" + } + ], + "Snapchat Linked to Bitmoji": [ + { + "Date": "2020-04-13 10:09:08 UTC" + } + ], + "Spectacles": [], + "Two-Factor Authentication": [], + "Account deactivated / reactivated": [], + "Download My Data Reports": [ + { + "Date": "2020-04-13 10:09:08 UTC", + "Status": "xxxxxxx", + "Email Address": "not_a_real_email@example.com" + }, + { + "Date": "2020-04-13 10:09:08 UTC", + "Status": "xxxxxxxxx", + "Email Address": "not_a_real_email@example.com" + } + ] +} diff --git a/test/fixtures/snapchat-2023-11/json/bitmoji.json b/test/fixtures/snapchat-2023-11/json/bitmoji.json new file mode 100644 index 0000000..ff8ddde --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/bitmoji.json @@ -0,0 +1,31 @@ +{ + "Basic Information": { + "First Name": "", + "Last Name": "", + "Email": "", + "Phone Number": "", + "Account Creation Date": "2020-04-13 10:09:08 UTC", + "Account Creation User Agent": "" + }, + "Analytics": { + "App Open Count": 1, + "Avatar Gender": "xxxx", + "Outfit Save Count": 1, + "Share Count": 1 + }, + "Terms of Service Acceptance History": [ + { + "Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Acceptance Date": "2020-04-13 10:09:08" + }, + { + "Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Acceptance Date": "2020-04-13 10:09:08" + } + ], + "Search History": [], + "Support Cases": [], + "Selfies": [], + "Keyboard Enable Full Access History (iOS only)": [], + "Connected Apps": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/cameos_metadata.json b/test/fixtures/snapchat-2023-11/json/cameos_metadata.json new file mode 100644 index 0000000..9afbe54 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/cameos_metadata.json @@ -0,0 +1,8 @@ +{ + "Cameos Selfie": { + "Cameos Body Selected": "xxxxxxxxxxxx", + "Hairstyle": "xxxxxxxxxxxx", + "Use My Cameos Selfie": "xxxxxxx" + }, + "Cameos Stories": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/chat_history.json b/test/fixtures/snapchat-2023-11/json/chat_history.json new file mode 100644 index 0000000..7c5ed49 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/chat_history.json @@ -0,0 +1,42 @@ +{ + "some_friend": [ + { + "From": "xxxxxxxxx", + "Media Type": "xxxxx", + "Created": "2020-04-13 10:09:08 UTC", + "Content": "", + "Conversation Title": null, + "IsSender": false, + "Created(microseconds)": 1111111111111 + }, + { + "From": "xxxxxxxxx", + "Media Type": "xxxx", + "Created": "2020-04-13 10:09:08 UTC", + "Content": "xxxxxxxxxxxxxxxxxx", + "Conversation Title": null, + "IsSender": false, + "Created(microseconds)": 1111111111111 + } + ], + "some_friend_too": [ + { + "From": "xxxxxxxxxxxxxx", + "Media Type": "xxxxx", + "Created": "2020-04-13 10:09:08 UTC", + "Content": "", + "Conversation Title": "xxxxxxxxxxxxxxxx", + "IsSender": false, + "Created(microseconds)": 1111111111111 + }, + { + "From": "xxxxxxxxxxxxx", + "Media Type": "xxxx", + "Created": "2020-04-13 10:09:08 UTC", + "Content": "xxxxxxxxxxxxxxxxxxxxxx", + "Conversation Title": "xxxxxxxxxxxxxxxx", + "IsSender": false, + "Created(microseconds)": 1111111111111 + } + ] +} diff --git a/test/fixtures/snapchat-2023-11/json/connected_apps.json b/test/fixtures/snapchat-2023-11/json/connected_apps.json new file mode 100644 index 0000000..6b20ab9 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/connected_apps.json @@ -0,0 +1,11 @@ +{ + "Login History": [], + "Permissions": [ + { + "App": "xxxxxxx", + "Time": "2020-04-13 10:09:08 UTC", + "Type": "xxxxxxx" + } + ], + "Connected Applications": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/email_campaign_history.json b/test/fixtures/snapchat-2023-11/json/email_campaign_history.json new file mode 100644 index 0000000..2e03ff2 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/email_campaign_history.json @@ -0,0 +1,13 @@ +{ + "Email Campaign Subscriptions": [ + { + "Email Campaign": "xxxxxxxxxxxxxxxx", + "Opt Out Status": "xxxxxxxxxxxx" + }, + { + "Email Campaign": "xxxxxxxxxxxxxxx", + "Opt Out Status": "xxxxxxxxxxxx" + } + ], + "Email Campaign History": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/friends.json b/test/fixtures/snapchat-2023-11/json/friends.json new file mode 100644 index 0000000..604d5cb --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/friends.json @@ -0,0 +1,100 @@ +{ + "Friends": [ + { + "Username": "xxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxxxx" + } + ], + "Friend Requests Sent": [ + { + "Username": "xxxxxxxxxx", + "Display Name": "xxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxxx", + "Display Name": "xxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxxxxxxxx" + } + ], + "Blocked Users": [ + { + "Username": "xxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + } + ], + "Deleted Friends": [ + { + "Username": "xxxxxx", + "Display Name": "xxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + } + ], + "Hidden Friend Suggestions": [], + "Ignored Snapchatters": [ + { + "Username": "xxxxxxxxx", + "Display Name": "xxxxxxxxxxxxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxx", + "Display Name": "xxxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + } + ], + "Pending Requests": [ + { + "Username": "xxxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + }, + { + "Username": "xxxxxxxxxxxxxx", + "Display Name": "xxxxxxxxxxxxx", + "Creation Timestamp": "2020-04-13 10:09:08 UTC", + "Last Modified Timestamp": "2020-04-13 10:09:08 UTC", + "Source": "xxxxxxxxxxxxxxxx" + } + ], + "Shortcuts": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/in_app_surveys.json b/test/fixtures/snapchat-2023-11/json/in_app_surveys.json new file mode 100644 index 0000000..6ffd77c --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/in_app_surveys.json @@ -0,0 +1,26 @@ +{ + "Survey 2020/04/12": [ + { + "Time": "xxxxxxxxxxxx", + "Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Survey Response": "xxxxxxxxxx" + }, + { + "Time": "xxxxxxxxxxxx", + "Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Survey Response": "xxx" + } + ], + "Survey 2020/04/13": [ + { + "Time": "xxxxxxxxxxxx", + "Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Survey Response": "xxxxxxxxxxxxxx" + }, + { + "Time": "xxxxxxxxxxxx", + "Survey Question": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Survey Response": "some/path" + } + ] +} diff --git a/test/fixtures/snapchat-2023-11/json/location_history.json b/test/fixtures/snapchat-2023-11/json/location_history.json new file mode 100644 index 0000000..6754237 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/location_history.json @@ -0,0 +1,23 @@ +{ + "Frequent Locations": [], + "Latest Location": [ + { + "City": "", + "Country": "", + "Region": "" + } + ], + "Home & Work": {}, + "Daily Top Locations": [], + "Top Locations Per Six-Day Period": [], + "Location History": [], + "Businesses and public places you may have visited": [], + "Areas you may have visited in the last two years": [ + { + "Time": "some/path", + "City": "xxxxxx", + "Region": "xxxxxxxx", + "Postal Code": "11111" + } + ] +} diff --git a/test/fixtures/snapchat-2023-11/json/ranking.json b/test/fixtures/snapchat-2023-11/json/ranking.json new file mode 100644 index 0000000..ca60e14 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/ranking.json @@ -0,0 +1,6 @@ +{ + "Number of Stories Viewed": [ + 1 + ], + "Content Interests": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/shared_story.json b/test/fixtures/snapchat-2023-11/json/shared_story.json new file mode 100644 index 0000000..bb15c44 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/shared_story.json @@ -0,0 +1,11 @@ +{ + "Shared Story": [], + "Spotlight History": [ + { + "Story Date": "2020-04-13 10:09:08 UTC", + "Story URL": "url://somewhere", + "Action Type": "xxxx", + "View Time": "xxxxxxxxxxxxx" + } + ] +} diff --git a/test/fixtures/snapchat-2023-11/json/snapchat_ai.json b/test/fixtures/snapchat-2023-11/json/snapchat_ai.json new file mode 100644 index 0000000..892aecd --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/snapchat_ai.json @@ -0,0 +1,4 @@ +{ + "My AI Content": [], + "My AI Memory": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/subscriptions.json b/test/fixtures/snapchat-2023-11/json/subscriptions.json new file mode 100644 index 0000000..30eb8ab --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/subscriptions.json @@ -0,0 +1,10 @@ +{ + "Public Users": [ + "xxxxxxxxxxxxxxx" + ], + "Publishers": [], + "Stories": [], + "Last Active Timezone": "some/path", + "Push Notifications": [], + "Hidden Category Sections": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/terms_history.json b/test/fixtures/snapchat-2023-11/json/terms_history.json new file mode 100644 index 0000000..d6321d7 --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/terms_history.json @@ -0,0 +1,15 @@ +{ + "Snap Inc. Terms of Service": [ + { + "Version": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "Acceptance Date": "2020-04-13 10:09:08 UTC" + }, + { + "Version": "xxxxxxxxxxxxxxxxxxxxxxxxx", + "Acceptance Date": "2020-04-13 10:09:08 UTC" + } + ], + "Custom Creative Tools Terms": [], + "Business Services Terms": [], + "Games Terms": [] +} diff --git a/test/fixtures/snapchat-2023-11/json/user_profile.json b/test/fixtures/snapchat-2023-11/json/user_profile.json new file mode 100644 index 0000000..d5cde8c --- /dev/null +++ b/test/fixtures/snapchat-2023-11/json/user_profile.json @@ -0,0 +1,39 @@ +{ + "App Profile": { + "Country": "xx", + "Creation Time": "2020-04-13 10:09:08 UTC", + "Account Creation Country": "xxxxxxx", + "Platform Version": "xxxxxxx", + "In-app Language": "xx" + }, + "Demographics": { + "Cohort Age": "", + "Derived Ad Demographic": "" + }, + "Subscriptions": [], + "Engagement": [], + "Discover Channels Viewed": [], + "Breakdown of Time Spent on App": [], + "Ads You Interacted With": [], + "Interest Categories": [ + "xxxxxx", + "xxxxxxxxxxxxxxxxxxx" + ], + "Content Categories": [ + "xxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "some/path" + ], + "Geographic Information": [], + "Interactions": { + "Web Interactions": [ + "xxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxx" + ], + "App Interactions": [ + "url://somewhere", + "url://somewhere" + ] + }, + "Off-Platform Sharing": [], + "Mobile Ad Id": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +} diff --git a/test/fixtures/snapchat-2023-11/memories/2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg b/test/fixtures/snapchat-2023-11/memories/2020-01-01_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-main.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2762dcad1eb6ad77e35090839c46978151217330 GIT binary patch literal 2294 zcmdT@X*8RO8vY`LW@;-!?W46*wAQNXGCECFVo7NmWm;S6v^XM&;9bQ&wJkOInQ~{`+{#lK5!I($;!dx zWMMEln7lkp0j>gvZ`%e}S5n@tqN%Q>rKyfUXzLjpXzT3KMIa2V40oBBnp>FfG_ba{ zHnTN0GdKH_1R^gl4_APz!QpCV+6Zm4{~KT%psE0LL9Cz z&uZRkiVy@^+SbCB^dIJiT&MdZV~K$Yo+7~|Zex_&;g{2VsnP7DalNr1)z#y4`Podn zM?Ww-&{s6(x^l`xj-H%45pumHtqkemwu)N;0oId*ya>B!uiZ^_v1-i}&apyQBfmZx zvsc~@Px{e3$8Wi4TwWIRQn9(AQCVI*eDVCr_Q_H0TFR-FV?BXsO?1)3q~OrN zdpDizM>g5aQU~k=Go32ojVjGR^{XB$Iw_q|%P6f!9y`d^e5-i%_iIHcf<3;o@TPM4 z=f@N~X~Hex#drQx7dP&MB&rR1UFM0bgwRqE-xKkQBtj2Mkg;C|x38yql!l!CMCj)1 zLHCg6h!RC@LQPf~vQvLzNz+}H{JRuctf|EHx4G>@MH``4IG>-fFX*@aVE-_dalM7$ zL$gCu#+JotZD06E$8k=A(M-&qhh8K&XP99!smB}a>u91;U@ggf)as_!ma$_7Or^{l z4I`D^C64?k48JgR!&nRnMi1IG!bj_He*AI)uTMMNnYRrU>yzQXz@QzUj5AzIzj59| z2h&QWOMcp@sTZp|Pdg%gvI6P13X9yDt0%hDaZ_oIh(yeMv52DNalooKbtJVH8xp$T zjajqhD|amgUa57|=*Dn7rRDve=1n&RE!0_4HK`tw8Rj6*PVAvb7*OVa{Z9VtCYO7~ z=?{M%h$*CRx;C_W8+gUG9_dZA-W!2PY^}Z&O<#LZeg6Vi*dQzpxB4g7ABA1Rf0mDi2-BqOQL%uzs@pI^DTt3?zAWQ-vc`C2JDfk@ z`^<#P0U&U4Jw^!x1~F?AOQ{?!83cBC3~A1okQd^A{s;oZbiV}M^L=MfxJzrk%d@<2 z)|{m66G`U=yU%-3xD+6q32vXV(z#5*i8`7wX)o=!PVeKPfoZq!Y--_TM48uV(R|@t zaF__%bYM8py4ss&;hi^J(qx}(8d=*{kG^X^TTnH>KEL{`S__S)*vj~1=(5Uz~FlH&NZh0SY!=-Oz2z`duFpBfG}7p_RZ|CIq#>tZ>KX)QSSq>0v&f3dyW z$THnLl7s1pz!Up#dx+=dpKk8K<$cT z+yUve9EVRz^J-J(7@IZjE>0~@>*6iGxsM}7H1XI@TArb2fv~o=j<%|H1XXr}Rkb<% zta}m{n|a4CgZ#!ix<~*~dZWb+3(r2%RYTY}bE^F{MXA8wA*Nc7a88PE5`7i4J(>LY z7zJa=@r}t@viXi*R0~nw2=)?LgFuSGzy`hvFCM-OnN_(Jk$Wk{Jdr@mNF`}=tLyp# z_KRi*jcFT*t2^x_?mFnIP6So)tBR7#xxd^s)i6Uwaj_-04=IorJMOz4IDt5vQuo;C e5kmi<#=#(KpK86JvEV+D{a "%" # (Though you should remove the end `> "%"` first to get just the output without # persisting to be sure it's what you want first) -def scrub: - walk( - if type == "string" then - if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then - "1.1.1.1" - elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then - "2000:0000:0000:0000:0000:0000:0000:0000" - elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then - "not_a_real_email@example.com" - elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then - # Leave these alone, you will have to manually go through these later and replace with - # placeholders - # TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise - # you need to manually grep for MANUAL REPAIR NEEDED for now - ("MANUAL REPAIR NEEDED: \(.)" | stderr) | . - elif test("://") then - "url://somewhere" - elif test("/") then - "some/path" - else - "xxx" - end - elif type == "number" then - if 946702800 <= . and . <= 1893474000 then - # Take modulo 1 year to get variance in the output, then add offset to bring to ~2024 - ((((. % 31557600) + 1704067200) / 5000 | floor) * 5000) - else - 69 - end - elif type == "array" then - # Keep only 2 elements, but scrub *those* elements - if length > 1 then - [ (.[0] | scrub), (.[1] | scrub) ] - elif length > 0 then - [ (.[0] | scrub) ] - else - [] - end + +def scrub_key: + if test("^[0-9]+$") then + ("1" * length) + else + . + end; + +def scrub_primitive: + if type == "string" then + if test("^(([0-9]{1,3}\\.){3}[0-9]{1,3})$") then + # IPv4 + "1.1.1.1" + elif test("^([0-9a-fA-F]{0,4}:){2,7}[0-9a-fA-F]{0,4}$") then + # IPv6 + "2000:0000:0000:0000:0000:0000:0000:0000" + elif test("^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$") then + # Email-like + "not_a_real_email@example.com" + elif test("\\.(jpg|jpeg|png|gif|bmp|webp|svg|ico|tiff|mp3|wav|flac|aac|ogg|wma|m4a|mp4|avi|mkv|mov|wmv|flv|webm)$"; "i") then + # Leave these alone, you will have to manually go through these later and replace with + # placeholders + # TODO: jq 1.7 adds debug(), use this instead when I can upgrade jq, otherwise + # you need to manually grep for MANUAL REPAIR NEEDED for now + ("MANUAL REPAIR NEEDED: \(.)" | stderr) | . + elif test("://") then + "url://somewhere" + elif test("/") then + "some/path" + elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}[+\\-][0-9]{2}:[0-9]{2}$") then + # iso date time without millis with timezone + "2020-04-13T10:09:08+00:00" + elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]{1,6})?[+\\-][0-9]{2}:[0-9]{2}$") then + # iso date time with millis with timezone + "2020-04-13T10:09:08.000000+00:00" + elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2} UTC") then + # Date format from snapchat export + "2020-04-13 10:09:08 UTC" + elif test("^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}") then + # Date format from snapchat export + "2020-04-13 10:09:08" + elif test("^[0-9]+$") then + # preserve length of the string + "1" * length + elif test("^[0-9a-fA-F]+$") then #hexadecimal string + # repeat the hex pattern and truncate to original length + ("a1" * length)[:length] + elif . == "" then + # prevents empty string from just returning null instead of empty string + "" else - . + # Preserve string length for other strings + "x" * length end - ); -scrub \ No newline at end of file + elif type == "number" then + if 946702800 <= . and . <= 1893474000 then + # Take modulo 1 year to get variance in the output, then add offset to bring to ~2024 + ((((. % 31557600) + 1704067200) / 5000 | floor) * 5000) + elif . == (. | floor) then + # Integer - preserve digit count + (tostring | length) as $len | ("1" * $len) | tonumber + else + 8.08 + end + elif type == "boolean" then + # Replace all booleans with false, this can give sensative info away based + # on what the key was in the data + false + else + . + end; + +def scrub: + if type == "object" then + # Apply scrubbing to both keys and values + with_entries(.key |= scrub_key | .value |= scrub) + elif type == "array" then + # Keep only 2 elements, but scrub *those* elements + .[:2] | map(scrub) + else + # Scrub a primitive value + scrub_primitive + end; + +# Call scrub +scrub diff --git a/util/scrub.ts b/util/scrub.ts index 07024c4..7b1cade 100755 --- a/util/scrub.ts +++ b/util/scrub.ts @@ -27,9 +27,6 @@ assert(targetDir, "Usage: ./scrub.ts "); const targetPath = path.resolve(targetDir); -// const stat = await fs.stat(targetPath); -// assert(stat.isDirectory(), ""); - const [notADir] = await ptry($`test -d ${targetPath}`); assert(!notADir, `Error: '${targetPath}' is not a directory`); @@ -49,12 +46,16 @@ console.log("filePaths", filePaths); for (const file of filePaths) { console.log(`Processing: ${file}`); const tmpFile = `${file}.tmp`; + const piiFile = `${file}.DELETE-THIS-HAS-PII`; const [jqErr] = await ptry($`jq -f ${scrubJq} ${file} > ${tmpFile}`); assert(!jqErr, `Error processing ${file}: ${jqErr}`); - const [mvErr] = await ptry($`mv ${tmpFile} ${file}`); - assert(!mvErr, `Error moving ${tmpFile} to ${file}: ${mvErr}`); + const [mvErr] = await ptry($`mv ${file} ${piiFile}`); + assert(!mvErr, `Error moving ${file} to ${piiFile}: ${mvErr}`); + + const [mv2Err] = await ptry($`mv ${tmpFile} ${file}`); + assert(!mv2Err, `Error moving ${tmpFile} to ${file}: ${mv2Err}`); } console.log();