Compare commits

...

10 commits

306 changed files with 8549 additions and 2040 deletions

6
.gitignore vendored
View file

@ -1 +1,7 @@
node_modules/
your.db
data-export/oldfacebook.ts
OUTTEST
.gitSAFE
out.manifest
test.manifest

View file

@ -1,20 +0,0 @@
# AI Requirements Document
Below are the requirements for AI.
## General guidelines
* Use Typescript throughout. Therefore, all code files should be .ts or .tsx
* When importing use the exact file extension. Import .ts directly (do not convert it to .js)
## Server guidlines
* Keep things simple, use the express ecosystem
## UI guidelines
* Components should use JSX, so .tsx extension.
* Use Preact and Preact signals.
* Use function components, never class components
* Components should be in CamelCase
* Components have a default function export the same name as their file
* When components have props, name it `ComponentNameProps` as an interface at the top of the file
* Any styles should go in a separate `ComponentName.css` file if necessary

File diff suppressed because it is too large Load diff

785
data-export/facebook.ts Normal file
View file

@ -0,0 +1,785 @@
import { TaskTargetPipelineHelper } from "./task.ts";
declare module "../data-export/task.ts" {
interface TaskTargetPipelineHelper {
facebook: typeof facebook;
facebook_v2: typeof facebook_v2;
facebook_notifications_generic: typeof facebook_notifications_generic;
facebook_notifications_v1: typeof facebook_notifications_v1;
facebook_notifications_v2: typeof facebook_notifications_v2;
facebook_installed_apps_generic: typeof facebook_installed_apps_generic;
facebook_installed_apps_v1: typeof facebook_installed_apps_v1;
facebook_installed_apps_v2: typeof facebook_installed_apps_v2;
facebook_comments_generic: typeof facebook_comments_generic;
facebook_comments_v1: typeof facebook_comments_v1;
facebook_comments_v2: typeof facebook_comments_v2;
facebook_people_interactions_generic: typeof facebook_people_interactions_generic;
facebook_people_interactions_v1: typeof facebook_people_interactions_v1;
facebook_people_interactions_v2: typeof facebook_people_interactions_v2;
facebook_marketplace_items_sold_generic: typeof facebook_marketplace_items_sold_generic;
facebook_marketplace_items_sold_v1: typeof facebook_marketplace_items_sold_v1;
facebook_marketplace_items_sold_v2: typeof facebook_marketplace_items_sold_v2;
facebook_searches_generic: typeof facebook_searches_generic;
facebook_searches_v1: typeof facebook_searches_v1;
facebook_searches_v2: typeof facebook_searches_v2;
facebook_account_activity_generic: typeof facebook_account_activity_generic;
facebook_account_activity_v1: typeof facebook_account_activity_v1;
facebook_account_activity_v2: typeof facebook_account_activity_v2;
facebook_messages_generic: typeof facebook_messages_generic;
facebook_friends_generic: typeof facebook_friends_generic;
facebook_admin_records_generic: typeof facebook_admin_records_generic;
facebook_admin_records_v1: typeof facebook_admin_records_v1;
facebook_admin_records_v2: typeof facebook_admin_records_v2;
facebook_authorized_logins_generic: typeof facebook_authorized_logins_generic;
facebook_authorized_logins_v1: typeof facebook_authorized_logins_v1;
facebook_authorized_logins_v2: typeof facebook_authorized_logins_v2;
facebook_contact_verification_generic: typeof facebook_contact_verification_generic;
facebook_contact_verification_v1: typeof facebook_contact_verification_v1;
facebook_contact_verification_v2: typeof facebook_contact_verification_v2;
facebook_pages_unfollowed_generic: typeof facebook_pages_unfollowed_generic;
facebook_pages_unfollowed_v1: typeof facebook_pages_unfollowed_v1;
facebook_pages_unfollowed_v2: typeof facebook_pages_unfollowed_v2;
facebook_account_accesses_generic: typeof facebook_account_accesses_generic;
facebook_account_accesses_v1: typeof facebook_account_accesses_v1;
facebook_account_accesses_v2: typeof facebook_account_accesses_v2;
facebook_groups_joined_generic: typeof facebook_groups_joined_generic;
facebook_groups_joined_v1: typeof facebook_groups_joined_v1;
facebook_groups_joined_v2: typeof facebook_groups_joined_v2;
facebook_group_posts_v1: typeof facebook_group_posts_v1;
facebook_group_posts_v2: typeof facebook_group_posts_v2;
}
}
Object.assign(TaskTargetPipelineHelper.prototype, {
facebook,
facebook_v2,
facebook_notifications_generic,
facebook_notifications_v1,
facebook_notifications_v2,
facebook_installed_apps_generic,
facebook_installed_apps_v1,
facebook_installed_apps_v2,
facebook_comments_generic,
facebook_comments_v1,
facebook_comments_v2,
facebook_people_interactions_generic,
facebook_people_interactions_v1,
facebook_people_interactions_v2,
facebook_marketplace_items_sold_generic,
facebook_marketplace_items_sold_v1,
facebook_marketplace_items_sold_v2,
facebook_searches_generic,
facebook_searches_v1,
facebook_searches_v2,
facebook_account_activity_generic,
facebook_account_activity_v1,
facebook_account_activity_v2,
facebook_admin_records_generic,
facebook_admin_records_v1,
facebook_admin_records_v2,
facebook_authorized_logins_generic,
facebook_authorized_logins_v1,
facebook_authorized_logins_v2,
facebook_contact_verification_generic,
facebook_contact_verification_v1,
facebook_contact_verification_v2,
facebook_account_accesses_generic,
facebook_account_accesses_v1,
facebook_account_accesses_v2,
facebook_pages_unfollowed_generic,
facebook_pages_unfollowed_v1,
facebook_pages_unfollowed_v2,
facebook_groups_joined_generic,
facebook_groups_joined_v1,
facebook_groups_joined_v2,
facebook_messages_generic,
facebook_friends_generic,
facebook_group_posts_v1,
facebook_group_posts_v2,
});
/**Parses about_you/notifications.json in the old format
* or logged_information/notifications.json in the new format*/
function facebook_notifications_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `["timestamp","unread","href","text"],
(
.${prop}[]
| [(.timestamp | todateiso8601), .unread, .href, .text]
)
| @csv`])
.types(["time", "text", "text", "text"]);
}
function facebook_notifications_v1(this: TaskTargetPipelineHelper) {
return this.facebook_notifications_generic("notifications");
}
function facebook_notifications_v2(this: TaskTargetPipelineHelper) {
return this.facebook_notifications_generic("notifications_v2");
}
/**Installed apps*/
function facebook_installed_apps_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name","added_timestamp"],
(
.${prop}[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`])
.types(["text", "time"]);
}
function facebook_installed_apps_v1(this: TaskTargetPipelineHelper) {
return this.facebook_installed_apps_generic("installed_apps");
}
function facebook_installed_apps_v2(this: TaskTargetPipelineHelper) {
// TODO: There's a few more properties in here for v2
return this.facebook_installed_apps_generic("installed_apps_v2");
}
function facebook_messages_generic(this: TaskTargetPipelineHelper) {
// This most assuredly does not handle certain things like pictures and such
// There are messages .type and then they have other thing in them?
// there's also is_unsent: false
return this.cmd(["jq", "-r", `
["from","to","timestamp","content"],
(
.messages[]
| [.sender_name, "<other>", ((.timestamp_ms / 1000) | round | todateiso8601), .content]
)
| @csv
`])
}
/**Comments*/
function facebook_comments_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: .data is an array that has items, but usually just one
// "data": [
// {
// "comment": {
// "timestamp": 1612923641,
// "comment": "xxx",
// "author": "xxx xxx",
// "group": "xxx"
// }
// }
// ],
// TODO: there's also attachments (media)
return this.cmd(["jq", "-r", `
["timestamp","data", "title"],
(
.comments[]?
| [(.timestamp | todateiso8601), "TODO", .title]
)
| @csv
`])
.types(["time", "text", "text"])
}
function facebook_comments_v1(this: TaskTargetPipelineHelper) {
return this.facebook_comments_generic("comments");
}
function facebook_comments_v2(this: TaskTargetPipelineHelper) {
// TODO: I don't see any difference between v1 and v2? Perhaps it's in the data?
return this.facebook_comments_generic("comments_v2");
}
function facebook_friends_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name", "timestamp"],
(
.${prop}[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`]);
}
function facebook_people_interactions_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name", "uri", "timestamp"],
(
.${prop}[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_people_interactions_v1(this: TaskTargetPipelineHelper) {
return this.facebook_people_interactions_generic("people_interactions");
}
function facebook_people_interactions_v2(this: TaskTargetPipelineHelper) {
return this.facebook_people_interactions_generic("people_interactions_v2");
}
function facebook_marketplace_items_sold_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: Updated_timestamp may not exist so it's removed for now
return this.cmd(["jq", "-r", `
["title", "price", "seller", "created_timestamp", "latitude", "longitude", "description"],
(
.${prop}[]
| [.title, .price, .seller, (.created_timestamp | todateiso8601), .location.coordinate.latitude, .location.coordinate.longitude, .description]
)
| @csv
`])
}
function facebook_marketplace_items_sold_v1(this: TaskTargetPipelineHelper) {
return this.facebook_marketplace_items_sold_generic("items_selling");
}
function facebook_marketplace_items_sold_v2(this: TaskTargetPipelineHelper) {
return this.facebook_marketplace_items_sold_generic("items_selling_v2");
}
function facebook_searches_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: Data and attachments, both only contain one "text" field inside the
// first object of the array... Same data, do they ever differ?
return this.cmd(["jq", "-r", `
["title","data","timestamp"],
(
.${prop}[]
| [.title, .data[0].text, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_searches_v1(this: TaskTargetPipelineHelper) {
return this.facebook_searches_generic("searches");
}
function facebook_searches_v2(this: TaskTargetPipelineHelper) {
return this.facebook_searches_generic("searches_v2");
}
function facebook_account_activity_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["action", "ip", "user_agent", "datr_cookie", "city", "region", "country", "site_name","timestamp"],
(
.${prop}[]
| [.action, .ip_address, .user_agent, .datr_cookie, .city, .region, .country, .site_name, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_account_activity_v1(this: TaskTargetPipelineHelper) {
return this.facebook_account_activity_generic("account_activity");
}
function facebook_account_activity_v2(this: TaskTargetPipelineHelper) {
return this.facebook_account_activity_generic("account_activity_v2");
}
function facebook_admin_records_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["event","created_timestamp","ip_address","user_agent","datr_cookie"],
(
.admin_records[]
| [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie]
)
| @csv
`])
}
function facebook_admin_records_v1(this: TaskTargetPipelineHelper) {
return this.facebook_admin_records_generic("admin_records");
}
function facebook_admin_records_v2(this: TaskTargetPipelineHelper) {
return this.facebook_admin_records_generic("admin_records_v2");
}
function facebook_authorized_logins_generic(this: TaskTargetPipelineHelper, prop: string) {
// I don't think .location, .app, .session_type are in v1? So I've made them nullable, but I only have
// 1 v1 entry to actually compare against...
return this.cmd(["jq", "-r", `
["name","created_timestamp","updated_timestamp","ip_address","user_agent","location","app", "session_type", "datr_cookie"],
(
.${prop}[]
| [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address, .user_agent, .location // "", .app // "", .session_type // "", .datr_cookie]
)
| @csv
`])
}
function facebook_authorized_logins_v1(this: TaskTargetPipelineHelper) {
return this.facebook_authorized_logins_generic("recognized_devices");
}
function facebook_authorized_logins_v2(this: TaskTargetPipelineHelper) {
return this.facebook_authorized_logins_generic("active_sessions_v2");
}
function facebook_contact_verification_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["action", "timestamp", "site", "ip_address"],
(
.${prop}[]
| [.action, (.timestamp | todateiso8601), .site, .ip_address]
)
| @csv
`])
}
function facebook_contact_verification_v1(this: TaskTargetPipelineHelper) {
return this.facebook_contact_verification_generic("contact_verifications");
}
function facebook_contact_verification_v2(this: TaskTargetPipelineHelper) {
return this.facebook_contact_verification_generic("contact_verifications_v2");
}
function facebook_account_accesses_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: there's a updated_timestamp doesn't always exist
return this.cmd(["jq", "-r", `
["action", "timestamp", "site", "ip_address"],
(
.${prop}[]
| [.action, (.timestamp | todateiso8601), .site, .ip_address]
)
| @csv
`])
}
function facebook_account_accesses_v1(this: TaskTargetPipelineHelper) {
return this.facebook_account_accesses_generic("account_accesses");
}
function facebook_account_accesses_v2(this: TaskTargetPipelineHelper) {
return this.facebook_account_accesses_generic("account_accesses_v2");
}
function facebook_pages_unfollowed_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: This is missing the .data field, but it only looks like the "name" on the only record I have
return this.cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_pages_unfollowed_v1(this: TaskTargetPipelineHelper) {
return this.facebook_pages_unfollowed_generic("pages_unfollowed");
}
function facebook_pages_unfollowed_v2(this: TaskTargetPipelineHelper) {
return this.facebook_pages_unfollowed_generic("pages_unfollowed_v2");
}
function facebook_groups_joined_generic(this: TaskTargetPipelineHelper, prop: string) {
// this has a data property but it is redundant, ONLY IN v2
return this.cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_groups_joined_v1(this: TaskTargetPipelineHelper) {
return this.facebook_groups_joined_generic("groups_joined");
}
function facebook_groups_joined_v2(this: TaskTargetPipelineHelper) {
return this.facebook_groups_joined_generic("groups_joined_v2");
}
function facebook_group_posts_v1(this: TaskTargetPipelineHelper) {
// TODO: Attachments metadata, maybe another timestamp in the data field too (but it looks like the same everywhere)
return this.cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts.activity_log_data[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_group_posts_v2(this: TaskTargetPipelineHelper) {
// TODO: Still a data and attachments to pull out
return this.cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts_v2[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_v2(this: TaskTargetPipelineHelper) {
const p = this.setId(t=>`Facebookv2 - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
// No correlary to accounts_and_profiles.json
// No correlary for your_off-facebook_activity.json
p.collect(col).cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`).read().facebook_installed_apps_v2();
p.collect(col).cd(`your_facebook_activity/comments_and_reactions/comments.json`).read().facebook_comments_v2();
p.collect(col).glob(`your_facebook_activity/messages/**/*.json`) // Files are message_1.json, etc
.setId(t=>`Facebookv2 - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
.read()
.facebook_messages_generic()
p.collect(col).cd(`your_facebook_activity/other_activity/time_spent_on_facebook.json`).read()
.cmd(["jq", "-r", `
["start","end"],
(
.label_values[]
| select(.label == "Intervals")
| .vec[]
| [
(.dict[0].timestamp_value | todateiso8601),
(.dict[1].timestamp_value | todateiso8601)
]
)
| @csv
`])
p.collect(col).cd(`your_facebook_activity/groups/your_group_membership_activity.json`).read().facebook_groups_joined_v2();
p.collect(col).cd(`your_facebook_activity/groups/group_posts_and_comments.json`).read().facebook_group_posts_v2();
p.collect(col).cd(`your_facebook_activity/pages/pages_and_profiles_you've_unfollowed.json`).read().facebook_pages_unfollowed_v2();
p.collect(col).cd(`connections/friends/your_friends.json`).read().facebook_friends_generic("friends_v2");
p.collect(col).cd(`connections/friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests_v2");
p.collect(col).cd(`connections/friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests_v2");
p.collect(col).cd(`logged_information/activity_messages/people_and_friends.json`).read().facebook_people_interactions_v2()
p.collect(col).cd(`logged_information/search/your_search_history.json`).read().facebook_searches_v2()
p.collect(col).cd(`logged_information/notifications/notifications.json`).read().facebook_notifications_v2();
p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v2()
p.collect(col).cd(`security_and_login_information/record_details.json`).read().facebook_admin_records_v2()
p.collect(col).cd(`security_and_login_information/where_you're_logged_in.json`).read().facebook_authorized_logins_v2()
p.collect(col).cd(`security_and_login_information/email_address_verifications.json`).read().facebook_contact_verification_v2()
p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v2()
p.collect(col).cd(`your_facebook_activity/facebook_marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v2()
return Array.from(col);
}
function facebook(this: TaskTargetPipelineHelper){
const p = this.setId(t=>`Facebook - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
p.collect(col).cd(`about_you/notifications.json`).read().facebook_notifications_v1()
//TODO: .fork().skip('face_recognition.json').reason("Not a table, no idea how to use")
//TODO: .fork().skip('friend_peer_group.json').reason("Not a table, very small file")
//TODO:.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future")
//TODO: .fork().todo('preferences.json').reason("Too complex for now")
//TODO:.fork().todo('visited.json').reason("Too complex for now")
//TODO:.fork().todo('viewed.json').reason("Too complex for now")
p.collect(col).cd(`accounts_center/accounts_and_profiles.json`).read()
.cmd(["jq", "-r", `["service_name","native_app_id","username","email", "phone_number", "name"],
(
.linked_accounts[]
| [.service_name, .native_app_id, .username, .email, .phone_number, .name]
)
| @csv`])
.csvSink()
p.collect(col).cd(`ads_and_businesses/your_off-facebook_activity.json`).read()
.cmd(["jq", "-r", `
["name","id","type","timestamp"],
(
.off_facebook_activity[]
| .name as $name
| .events[]
| [$name, .id, .type, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
//TODO: .fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json')
p.collect(col).cd(`apps_and_websites/apps_and_websites.json`).read().facebook_installed_apps_v1()
// `${facebookRoot}/archive` - no data in my export
// `${facebookRoot}/campus` - no data in my export
p.collect(col).cd(`comments/comments.json`).read().facebook_comments_v1()
p.collect(col).glob(`dating/messages/*.json`) // Files are 0.json, 1.json, etc
.setId(t=>`Facebook - Dating Messages ${t.basename}`) // Slightly more specific message
.read()
.cmd(["jq", "-r", `
["from","to","timestamp","body"],
.recipient as $to
| (
.messages[]
| ["Me", $to, (.timestamp | todateiso8601), .body]
)
| @csv
`])
.csvSink();//[["timestamp", "numeric"]])
//todo: your_dating_activity.json, but it only has a few lines and not super useful
//todo: the other dating files are also just, small
// TODO: events
// rcd(`events`);
// localCollect('event_invitations.json', json, sspawn('jq', [`
// .events_invited[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
// localCollect('your_event_responses.json', json, sspawn('jq', [`
// .event_responses.events_joined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_declined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_interested[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
p.collect(col).cd(`facebook_gaming/instant_games.json`)
.read()
.cmd(["jq", "-r", `
["game", "added_timestamp"],
(
.instant_games_played[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["added_timestamp", "numeric"]])
p.collect(col).cd(`following_and_followers/unfollowed_pages.json`).read().facebook_pages_unfollowed_v1()
p.collect(col).cd(`following_and_followers/following.json`)
.read()
.cmd(["jq", "-r", `
["name", "timestamp"],
(
.following[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`following_and_followers/followers.json`)
.read()
.cmd(["jq", "-r", `
["name"],
(
.followers[]
| [.name]
)
| @csv
`])
.csvSink()
p.collect(col).cd(`friends/sent_friend_requests.json`).read().facebook_friends_generic("sent_requests")
p.collect(col).cd(`friends/removed_friends.json`).read().facebook_friends_generic("deleted_friends")
p.collect(col).cd(`friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests")
p.collect(col).cd(`friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests")
p.collect(col).cd(`friends/friends.json`).read().facebook_friends_generic("friends")
p.collect(col).cd(`groups/your_group_membership_activity.json`).read().facebook_groups_joined_v1();
p.collect(col).cd(`groups/your_posts_and_comments_in_groups.json`).read().facebook_group_posts_v1();
// there's also groups.json and events.json but neither has timestamp so they're
// not really useful right now
p.collect(col).cd(`interactions/people.json`).read().facebook_people_interactions_v1()
// `${facebookRoot}/journalist_registration` - no data in my export
p.collect(col).cd(`likes_and_reactions/pages.json`)
.read()
.cmd(["jq", "-r", `
["name", "timestamp"],
(
.page_likes[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`likes_and_reactions/posts_and_comments.json`)
.read()
.cmd(["jq", "-r", `
["title", "timestamp", "reaction"],
(
.reactions[]
| [.name, (.timestamp | todateiso8601), .data[0].reaction.reaction]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// TODO:
// rcd(`location`);
// localCollect('primary_location.json', json);
// localCollect('primary_public_location.json', json);
// localCollect('timezone.json', json);
p.collect(col).cd(`marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v1()
p.collect(col).cd(`messages/**/*.json`) // Files are message_1.json, etc
.setId(t=>`Facebook - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
.read()
.facebook_messages_generic()
// `${facebookRoot}/music_recommendations` - no data
// rcd(`news`);
// localCollect('your_locations.json', json);
p.collect(col).cd(`other_activity/pokes.json`)
.read()
.cmd(["jq", "-r", `
["from", "to","rank","timestamp"],
(
.pokes.data[]
| [.poker, .pokee, .rank, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]]);
p.collect(col).cd(`other_activity/support_correspondences.json`)
.read()
// TODO: I'm seeing blanks in .from and .to when the replier was Facebook
// themselves. Perhaps it's broken?
// TODO: Attachments
.cmd(["jq", "-r", `
["from", "to", "subject", "message", "timestamp"],
(
.support_correspondence[].messages[]
| [.from, .to, .subject, .message, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/pages` - no data
p.collect(col).cd(`payment_history/payment_history.json`)
.read()
.cmd(["jq", "-r", `
["from", "to","amount","currency", "type","status","payment_method", "created_timestamp"],
(
.payments.payments[]
| [.sender, .receiver, .amount, .currency, .type, .status, .payment_method, (.created_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["created_timestamp", "numeric"]]);
// TODO: There's also photos_and_videos/your_videos.json
// TODO: There's a media_metadata in each of the images too to convert as well as external files
p.collect(col).glob(`photos_and_videos/album/*.json`)
// Could use a better name, currently 0.json, 1.json, etc...
.setId(t=>`Facebook - Album ${t.basename}`) //slightly more speciifc name, it woudl be better if we could use the album name
.read()
.cmd(["jq", "-r", `
["album","uri","creation_timestamp"],
(
.photos[]
| [.title, .uri, (.creation_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["creation_timestamp", "numeric"]])
p.collect(col).cd(`posts/your_pinned_posts.json`)
.read()
.cmd(["jq", "-r", `
["name","uri","timestamp"],
(
.pinned_posts[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// TODO: Glob? I never posted a lot on FB
p.collect(col).cd(`posts/your_posts_1.json`)
.read()
// TODO: Data is an array with objects. .post, .updated_timestamp, separately??
// TODO: Also attachments
.cmd(["jq", "-r", `
["title","data","timestamp"],
(
.[]
| [.title, "TODO: data", (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/privacy_checkup` - no data
// TODO: Shape is non-tabular, but maybe we should handle it?
// Looks mostly like dupes from other places
// './profile_information.json': undefined,
// The minimum amount of data is just .title and .timestamp
// TODO: HAndle data and attachments
p.collect(col).cd(`profile_information/profile_update_history.json`)
.read()
.cmd(["jq", "-r", `
["title","timestamp"],
(
.profile_updates[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/rewards` - no data
// `${facebookRoot}/saved_items_and_collections` - no data
p.collect(col).cd(`search_history/your_search_history.json`).read().facebook_searches_v1()
p.collect(col).cd(`security_and_login_information/account_status_changes.json`)
.read()
.cmd(["jq", "-r", `
["status","timestamp"],
(
.account_status_changes[]
| [.status, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v1()
p.collect(col).cd(`security_and_login_information/administrative_records.json`).read().facebook_admin_records_v1()
p.collect(col).cd(`security_and_login_information/authorized_logins.json`).read().facebook_authorized_logins_v1()
p.collect(col).cd(`security_and_login_information/contact_verifications.json`).read().facebook_contact_verification_v1()
p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v1()
// TODO: datr_cookie_info, looks like a bunch of timestamps
// a.fork().cd(`login_protection_data.json`)
// .read()
// // TODO: updated_timestamp doesn't always exist
// .cmd(["jq", "-r", `
// ["name", "created_timestamp", "updated_timestamp", "ip_address"],
// (
// .login_protection_data[]
// | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address]
// )
// | @csv
// `])
// TODO: mobile_devices, only a couple entries
// TODO: used_ip_addresses
// TODO: where_you've logged in
// TODO: your_facebook_activity, useless and small
// `${facebookRoot}/short_videos` - no data in my export
// `${facebookRoot}/saved_items_and_collections` - no data in my export
p.collect(col).cd(`stories/story_reactions.json`)
.read()
.cmd(["jq", "-r", `
["title", "timestamp"],
(
.stories_feedback[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/trash` - no data in my export
// `${facebookRoot}/voice_recording_and_transcription` - no data in my export
// `${facebookRoot}/volunteering` - no data in my export
// `${facebookRoot}/voting_location_and_reminders` - only small 1-property things
// `${facebookRoot}/your_places` - no data in my export
// `${facebookRoot}/your_topics` - no data in my export
return Array.from(col);
};

View file

@ -1,59 +0,0 @@
import { Task } from "./task.ts";
export function facebook(this: Task, path: string){
const t = this.fork();
t.cd(path);
t.fork().cd(`about_you`)
.fork().skip('face_recognition.json').reason("Not a table, no idea how to use")
.fork().skip('friend_peer_group.json').reason("Not a table, very small file")
.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future")
.fork().read('notifications.json')
.jq(["-r", `["timestamp","unread","href","text"],
(
.notifications[]
| [(.timestamp | todateiso8601), .unread, .href, .text]
)
| @csv`])
// .write(`${process.cwd()}/probe.csv`)
.sqlite_utils(["insert", "your.db", "notifications.json", "-", "--csv", "--detect-types"])
.sink()
.fork().todo('preferences.json').reason("Too complex for now")
.fork().todo('visited.json').reason("Too complex for now")
.fork().todo('viewed.json').reason("Too complex for now")
t.fork().cd(`accounts_center`)
.fork().read('accounts_and_profiles.json')
.jq(["-r", `["service_name","native_app_id","username","email", "phone_number", "name"],
(
.linked_accounts[]
| [.service_name, .native_app_id, .username, .email, .phone_number, .name]
)
| @csv`])
.sqlite_utils(["insert", "your.db", "accounts_and_profiles.json", "-", "--csv", "--no-headers", "--detect-types"])
.sink()
t.fork().cd(`ads_and_businesses`)
.fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json')
.fork().read('your_off-facebook_activity.json')
.jq(["-r", `
["name","id","type","timestamp"],
(
.off_facebook_activity[]
| .name as $name
| .events[]
| [$name, .id, .type, (.timestamp | todateiso8601)]
)
| @csv
`])
.sqlite_utils(["insert", "your.db", "your_off-facebook_activity.json", "-", "--csv", "--no-headers", "--detect-types"])
.sink()
return t;
};

107
data-export/google.ts Normal file
View file

@ -0,0 +1,107 @@
import { TaskTargetPipelineHelper } from "./task.ts";
import { htmlSelectorChunkedDuplex } from "./html.ts";
export function google(this: TaskTargetPipelineHelper){
const p = this.setId(t=>`Google - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
// TODO: There is a root takeout folder
p.collect(col).cd('Access Log Activity/Activities - A list of Google services accessed by.csv').read()
p.collect(col).cd('Devices - A list of devices (i.e. Nest, Pixel, iPh.csv').read()
// Assignments - data was empty
// Business messages - GMB messages, there's some but so far outside of what I want
// TODO: Calendar, exports an .ics
// a = t.fork().cd(`Chrome`)
// TODO: Assersses and mode.json
// TODO: Bookmarks.csv
// TODO: Device Information.json
// TODO: Dictionary.csv
// TODO: ...
p.collect(col).cd('Chrome/History.json')
.read()
// TODO: Typed Url", no data
// TODO: "session", complex data
// Omitted .ptoken and .client_id for now. I think ptoken is maybe for the history API? client_id is base64 something...
// TODO: time_usec IS WRONG!! Needs to be ms
.cmd(["jq", "-r", `["favicon_url","page_transition","title","url","time_usec"],
(
."Browser History"[]
| [.favicon_url, .page_transition, .title, .url, (.time_usec | todateiso8601)]
)
| @csv`])
// TODO: Contactss, exports an .vcf
// TODO: ...
// a = t.fork().cd(`Google Pay`)
p.collect(col).cd(`Google Pay/Google transactions`).glob(`transactions_*.csv`)
.read()
.csvSink()
// .fork("a").cd(`Money sends and requests`)
// .fork().cd(`Money sends and requests.csv`)
// .read()
// .cmd(t=>["sqlite-utils", "insert", "your.db", t.basename, "-", "--csv", "--detect-types"])
// TODO: One more folder, and it only has a pdf
// TODO: Google Play Movies _ TV - no data
// TODO: ...
p.collect(col).cd("Location History/Location History.json")
.read()
// TODO: This is missing
// "altitude" : 158,
// "verticalAccuracy" : 68
// and the activity models. I had no idea google tries to determine if I'm "tilting"
.cmd(["jq", "-r", `["timestamp","latitudeE7","longitudeE7","accuracy"],
(
.locations[]
| [.timestampMs | todateiso8601, .latitudeE7, .longitudeE7, .accuracy]
)
| @csv`])
.csvSink()
// There's also the semantic history but that's an entire nother can of worms
// it seems like
// TODO: Needs no-headers!
// a = t.fork().cd(`My Activity`)
// a.fork().glob(`**/MyActivity.html`)
// .setId(t=>`Google - ${t.basenameN(2)}`)
// .read()
// .pipe(()=>{
// // Parses the MyActivity format, chunking it into pieces of HTML text
// // and then parsing out the text
// const dup = htmlSelectorChunkedDuplex(
// (tag, attrs)=>{
// // TODO: We also probably want to get and parse each
// // ".content-cell.mdl-typography--caption" as well (it
// // has location for websearches and sometimes a details field)
// // but then we have to get ".mdl-grid" and parse it
// return attrs.class?.includes("content-cell")
// && attrs.class?.includes("mdl-typography--body-1")
// && !attrs.class?.includes("mdl-typography--text-right")
// },
// (chunk)=>{
// const text = chunk.innerText;
// const split = text.split("\n");
// const timestamp = split.pop(); // TODO: need to parse this
// const rest = split.join("\n");
// // TODO: Escape instead of replace
// const restSafe = rest.replace(/"/g, "'").replace(/\n/g,"\\n"); // escape newlines and quotes
// // Return a CSV
// return `"${restSafe}","${timestamp}"\n`;
// }
// );
// return dup;
// })
// TODO: News
// TODO: Profile
// TODO: Tasks - No data
return Array.from(col);
};

152
data-export/html.ts Normal file
View file

@ -0,0 +1,152 @@
import { strict as assert } from "node:assert";
import { Parser as HTMLParser2 } from "htmlparser2";
import { WritableStream } from "htmlparser2/WritableStream";
import { Duplex, Readable, Writable } from 'node:stream';
import duplexify from "duplexify";
type HTMLParser2CBs = ConstructorParameters<typeof HTMLParser2>[0];
type HTMLParser2Opts = ConstructorParameters<typeof HTMLParser2>[1];
type HTMLParser2Args = ConstructorParameters<typeof HTMLParser2>;
const htmlVoidElements = [
'area',
'base',
'basefont',
'bgsound',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'image',
'img',
'input',
'isindex',
'keygen',
'link',
'menuitem',
'meta',
'nextid',
'param',
'source',
'track',
'wbr',
];
export function openTag(tagName: string, attributes: {[k: string]: string}) {
// Transform attributes into string
let attrs = Object.entries(attributes)
.map(([k,v])=>{
// If the HTML coming through uses single quotes for the attribute, it
// can contain a double quote, so just escape those. Markdown-it generates
// this if you use a " in an alt tag, but I also do this sometimes too
v = v
.replace(/"/g, '&quot;');
return `${k}="${v}"`;
})
.join(' ');
attrs = attrs ? ' ' + attrs : '';
// self close certain things, because JSX requires all tags to be closed,
// no html <br>
const selfClosing = htmlVoidElements.includes(tagName) ? ' /' : '';
return `<${tagName}${attrs}${selfClosing}>`;
}
export function closeTag(tagName: string) {
if (htmlVoidElements.includes(tagName)) {
// No closing tag
return '';
}
return `</${tagName}>`;
}
interface HTMLChunk {
innerText: string;
innerHTML: string;
}
function htmlSelectorChunker(matcher: (tag: string, attrs:{ [s: string]: string })=>boolean, cb: (chunk: HTMLChunk)=>void): HTMLParser2CBs {
let tagStateStack: {
tag: string,
attrs: { [s: string]: string },
marked?: boolean,
innerText?: string,
innerHTML?: string
}[] = [];
const htmlParser2CBs: HTMLParser2CBs = {
onopentag(tag, attrs) {
const marked = tagStateStack.find(t => t.marked);
const tagStackItem = {
tag, attrs
};
if (matcher(tag, attrs)) {
assert(!marked, "Nested tag marking encountered, not implemented/no sane implementation");
(tagStackItem as any).marked = true;
(tagStackItem as any).innerText = "";
(tagStackItem as any).innerHTML = "";
}
tagStateStack.push(tagStackItem);
if (marked) {
marked.innerHTML += openTag(tag, attrs);
const str = tag === "br" ? "\n" : "";
marked.innerText += str;
}
},
ontext(text) {
const marked = tagStateStack.find(t => t.marked);
if (!marked) {
return; // nothing to do
}
marked.innerText += text;
},
onclosetag(tag) {
const marked = tagStateStack.find(t => t.marked);
if (!marked) {
return;
}
marked.innerHTML += closeTag(tag);
const popped = tagStateStack.pop();
if (marked === popped) {
cb(popped as HTMLChunk);
}
},
};
return htmlParser2CBs;
}
class ExternalReadable extends Readable {
_read() {
}
}
export function htmlSelectorChunkedDuplex(
matcher: (tag: string, attrs:{ [s: string]: string })=>boolean,
postProcess: (chunk: HTMLChunk)=>string
): Duplex {
const readable = new ExternalReadable();
const cbs = htmlSelectorChunker(
matcher,
(chunk)=>{
const out = postProcess(chunk);
readable.push(out);
}
);
const writable = new WritableStream({
...cbs,
onerror(error){
readable.emit("error", error);
},
onend() {
readable.push(null);
readable.emit("close");
}
});
return duplexify(writable, readable);
}

View file

@ -0,0 +1,352 @@
import nodePath from 'node:path';
import fs from 'node:fs';
import { strict as assert } from "node:assert";
import { execFile as _execFile } from "node:child_process";
import { promisify } from "node:util";
import { ZipFS } from "./zipFs.ts";
import { globSync } from "glob";
const execFile = promisify(_execFile);
type FSImpl = {
isZip?: boolean;
zipPath?: string;
init?(): Promise<void>;
ready?: boolean;
statSync: typeof fs["statSync"];
existsSync: typeof fs["existsSync"];
// Required by glob
lstatSync: typeof fs["lstatSync"];
// Needs to include withFileTypes DirEnt variant
readdir: typeof fs["readdir"];
readdirSync: typeof fs["readdirSync"];
readlinkSync: typeof fs["readlinkSync"];
realpathSync: typeof fs["realpathSync"];
promises: {
lstat: typeof fs.promises["lstat"];
// Needs to include withFileTypes DirEnt
readdir: typeof fs.promises["readdir"];
readlink: typeof fs.promises["readlink"];
realpath: typeof fs.promises["realpath"];
}
};
const defaultFSImpl = fs;
function safe(s: string) {
return s.replace(/[^a-zA-Z0-9_]/g, '_');
}
//TODO: DANGER: I doubt this is safe...
function shEscape(s: string) {
assert(!s.includes("\n"), "shEscape given new line, caller needs to handle these");
if (!s.match(/[ \$\"\'\!]/)) {
return s;
}
// We need to quote this string
// Single quoted strings require you to close the single quoted string, then
// use the escaped single quote, and then reopen the string... obscene
s = s.replace(/'/g, "'\\''");
s = `'${s}'`;
return s;
}
abstract class TaskTargetBase {
target: TaskTarget;
constructor(target: TaskTarget) {
this.target = target;
}
abstract get type(): "read" | "mid";
abstract toShell(): string;
}
class TaskTargetRead extends TaskTargetBase {
get type(){ return "read" as const; }
toShell() {
if (this.target.fsImpl.isZip) {
assert(this.target.fsImpl.zipPath, "Should have a zipPath");
// We need to be able to do this
return `7z x ${shEscape(this.target.fsImpl.zipPath)} -so ${shEscape(this.target.path)}`;
}
// TODO : Implement when reading from a zip file
return `cat ${shEscape(this.target.path)}`;
}
}
class TaskTargetCmd extends TaskTargetBase {
get type(){ return "mid" as const; }
/**What nodejs spawn() and execFile() take
* [cmd, ...args]: string[]
*/
cmd: string[];
static parse(target: TaskTarget, v: string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[])): string[] {
if (typeof v === "function") {
v = v(target);
}
if (typeof v === "string") {
v = v.split(/\s+/);
}
return v;
}
constructor(target: TaskTarget, cmd: string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[])) {
super(target);
this.cmd = TaskTargetCmd.parse(target, cmd);
}
toShell() {
const out = this.cmd
.map(c => {
let sh = c.replace(/\n/g, "")
return shEscape(sh);
});
return out.join(" ");
}
}
class TaskTarget {
path: string;
fsImpl: FSImpl = defaultFSImpl;
pipeline: TaskTargetBase[];
idValue: string | ((t: TaskTarget)=>string) | undefined;
postFns: ((t: TaskTarget)=>Promise<void>)[];
constructor(path: string){
this.path = path;
this.pipeline = [];
this.postFns = [];
}
exists() {
return this.fsImpl.existsSync(this.path);
}
_joinPath(path: string) {
let finalPath = path;
if (!path.startsWith('/')) {
finalPath = nodePath.join(this.path, path)
}
return finalPath;
}
get basename() {
return safe(nodePath.basename(this.path));
}
basenameN(n: number) {
return this.path
.split("/")
.map(s => safe(s))
.slice(-n)
.join("___");
}
get id() {
assert(this.idValue, `TaskTarget for path "${this.path}" must have an id`);
if (typeof this.idValue === "function") {
return safe(this.idValue(this));
}
return safe(this.idValue);
}
/**Changes the current directory of the target*/
cd(path: string) {
this.path = this._joinPath(path);
}
/**Get a glob off of the target*/
glob(globPath: string) {
globPath = this._joinPath(globPath);
return globSync(globPath, {
cwd: '/DUMMYCWD',
fs: this.fsImpl
});
}
clone() {
const t = new TaskTarget(this.path);
t.fsImpl = this.fsImpl;
t.idValue = typeof this.idValue === "function" ? this.idValue : undefined;
t.postFns = t.postFns.slice();
//TODO: clone pipeline
return t;
}
pushToPipeline(v: TaskTargetBase) {
if (v.type === "read") {
assert(this.pipeline.length === 0, "A read can only be the first item in a pipeline");
}
this.pipeline.push(v);
}
pushPostFn(fn: ((t: TaskTarget)=>Promise<void>)) {
this.postFns.push(fn);
}
}
/**A very composable object*/
export class Task {
/**A serial pipeline of Streams*/
targets: TaskTarget[];
/**SHARED list of all tasks for this given tree*/
tasks: Task[];
constructor() {
this.tasks = [];
this.targets = [new TaskTarget(process.cwd())];
}
cd(path: string) {
for (const t of this.targets) {
// TODO: opts
t.cd(path);
}
return this;
}
/**Globs for all the paths that match under all targets*/
glob(globPath: string) {
// For every target, concat glob onto it, glob, and then
// replace the original set of targets with all the new ones
const newTargets: TaskTarget[] = [];
for (const t of this.targets) {
const matches = t.glob(globPath);
for (const m of matches) {
const newT = t.clone();
newT.path = m;
newTargets.push(newT);
}
}
this.targets = newTargets;
return this;
}
/**Opens all targets as zip archives*/
async zip() {
for (const t of this.targets) {
const zfs = new ZipFS(t.path);
await zfs.init();
t.path = ""; // Each target is now rooted at the base of its respective zip
t.fsImpl = zfs.getImpl() as any;
}
return this;
}
/**Returns a copy of ourself*/
clone() {
const t = new Task();
t.targets = this.targets.map(t => t.clone());
t.tasks = this.tasks; //SHARED object reference
return t;
}
/**Returns a copy of ourself, but adds us to this tree's shared
* task list as well*/
fork() {
const c = this.clone();
this.tasks.push(c);
return c;
}
cmd(cmd: string | string[] | ((target: TaskTarget)=>string) | ((target: TaskTarget)=>string[])) {
for (const t of this.targets) {
t.pushToPipeline(new TaskTargetCmd(t, cmd));
}
return this;
}
read() {
for (const t of this.targets) {
t.pushToPipeline(new TaskTargetRead(t));
}
return this;
}
setId(idValue: string | ((t: TaskTarget)=>string)) {
for (const t of this.targets) {
t.idValue = idValue;
}
return this;
}
post(fn: any) {
for (const t of this.targets) {
t.pushPostFn(fn);
}
}
types(
types: string[]
) {
// TODO:
return this;
}
csvSink(
summarization?: [string, string][]
) {
// Ingest this csv into the database at the given id
// this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
// Add a post processing function for these targets that prints out the summarization
// stats
this.post(async (t: TaskTarget)=>{
// We only do the first one so far for the summarization
let queryLine: string;
let formatFn: (r: any)=>string;
const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
if (type === "numeric") {
queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
}
else {
queryLine = `count(*) as n`;
formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
}
const cmd = "sqlite-utils";
const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
const { stdout, stderr } = await execFile(cmd, args);
const results = JSON.parse(stdout);
const result = results[0]; // should only be one result in the array for this type of query
const logLine = formatFn(result);
(t as any).log = logLine;
});
return this;
}
/**Collect all the TaskTargets, make sure everything is init'd and exists
* and output the targets for processing*/
async getFinalTargets() {
const targets: TaskTarget[] = [];
for (const task of this.tasks) {
for (const t of task.targets) {
// Make sure fsImpl is ready
if ("ready" in t.fsImpl && !t.fsImpl.ready && t.fsImpl.init) {
await t.fsImpl.init();
}
if (t.pipeline.length <= 0) {
continue; // Tasks with empty pipelines are no-ops, remove
}
if (!t.exists()) {
console.warn(`Missing target ${t.path}`);
continue;
}
targets.push(t);
}
}
return targets;
}
async getTaskTSVShell() {
const targets = await this.getFinalTargets();
let out: string[] = [];
for (const t of targets) {
const shell = t.pipeline
.map(p => p.toShell())
.join(" | ")
out.push(`${t.id}\t${shell}`);
}
return out.join("\n");
}
}

View file

@ -1,195 +1,432 @@
import nodePath from 'node:path';
import fs from 'node:fs';
import { strict as assert } from "node:assert";
import { type SpawnOptions } from "node:child_process";
import { type Stream } from 'node:stream';
import { ChildProcessDuplex } from "./util.ts";
import { finished } from "node:stream/promises";
import { ZipFS } from "./zipFs.ts";
import { globSync } from "glob";
import { $ } from "zx";
export function pipeCollection(...args: Stream[]) {
if (args.length <= 1) {
return args[0];
}
type FSImpl = {
isZip?: boolean;
zipPath?: string;
init?(): Promise<void>;
ready?: boolean;
let out = args[0];
for (const s of args.slice(1)) {
out = out.pipe(s as any);
statSync: typeof fs["statSync"];
existsSync: typeof fs["existsSync"];
// Required by glob
lstatSync: typeof fs["lstatSync"];
// Needs to include withFileTypes DirEnt variant
readdir: typeof fs["readdir"];
readdirSync: typeof fs["readdirSync"];
readlinkSync: typeof fs["readlinkSync"];
realpathSync: typeof fs["realpathSync"];
promises: {
lstat: typeof fs.promises["lstat"];
// Needs to include withFileTypes DirEnt
readdir: typeof fs.promises["readdir"];
readlink: typeof fs.promises["readlink"];
realpath: typeof fs.promises["realpath"];
}
return out;
};
const defaultFSImpl = fs;
function safe(s: string) {
return s.replace(/[^a-zA-Z0-9_]/g, '_');
}
/**A very composable object*/
export class Task {
cwd: string;
/**A serial pipeline of tasks*/
pipeline: any[];
//TODO: DANGER: I doubt this is safe...
function shEscape(s: string) {
assert(!s.includes("\n"), "shEscape given new line, caller needs to handle these");
if (!s.match(/[ \$\"\'\!]/)) {
return s;
}
// We need to quote this string
// Single quoted strings require you to close the single quoted string, then
// use the escaped single quote, and then reopen the string... obscene
s = s.replace(/'/g, "'\\''");
s = `'${s}'`;
return s;
}
/**A SHARED object reference between all Task objects of a given tree*/
tasks: any[];
constructor() {
this.pipeline = [];
this.tasks = [];
this.cwd = process.cwd();
interface TaskTargetOp {
type: "read" | "mid";
toShell(target: TaskTarget): string;
clone(): TaskTargetOp;
}
class TaskTargetRead implements TaskTargetOp {
get type(){ return "read" as const; }
toShell(target: TaskTarget) {
if (target.fsImpl.isZip) {
assert(target.fsImpl.zipPath, "Should have a zipPath");
// We need to be able to do this
return `7z x ${shEscape(target.fsImpl.zipPath)} -so ${shEscape(target.path)}`;
}
/**Changes the current directory*/
cd(path: string, opts?: { canFail?: boolean }) {
if (path.startsWith('/')) {
this.cwd = path;
return this;
// TODO : Implement when reading from a zip file
return `cat ${shEscape(target.path)}`;
}
this.cwd = nodePath.join(this.cwd, path);
return this;
}
clone() {
const t = new Task();
t.cwd = this.cwd;
return new TaskTargetRead();
}
}
type ValidCmd = string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[]);
class TaskTargetCmd implements TaskTargetOp {
get type(){ return "mid" as const; }
/**What nodejs spawn() and execFile() take
* [cmd, ...args]: string[]
*/
cmd: ValidCmd;
static parse(target: TaskTarget, v: string | string[] | ((t: TaskTarget)=>string) | ((t: TaskTarget)=>string[])): string[] {
if (typeof v === "function") {
v = v(target);
}
if (typeof v === "string") {
v = v.split(/\s+/);
}
return v;
}
constructor(cmd: ValidCmd) {
this.cmd = cmd;
}
toShell(target: TaskTarget) {
const parsedCmd = TaskTargetCmd.parse(target, this.cmd);
const out = parsedCmd
.map(c => {
let sh = c.replace(/\n/g, "")
return shEscape(sh);
});
return out.join(" ");
}
clone() {
return new TaskTargetCmd(this.cmd);
}
}
type ValidId = string | ((t: TaskTarget)=>string);
export class TaskTarget {
path: string;
fsImpl: FSImpl = defaultFSImpl;
pipeline: TaskTargetOp[];
idValue: ValidId | undefined;
postFns: ((t: TaskTarget)=>Promise<void>)[];
constructor(path: string){
this.path = path;
this.pipeline = [];
this.postFns = [];
}
exists() {
return this.fsImpl.existsSync(this.path);
}
_joinPath(path: string) {
let finalPath = path;
if (!path.startsWith('/')) {
finalPath = nodePath.join(this.path, path)
}
return finalPath;
}
get basename() {
return safe(nodePath.basename(this.path));
}
basenameN(n: number) {
return this.path
.split("/")
.map(s => safe(s))
.slice(-n)
.join("___");
}
get id() {
assert(this.idValue, `TaskTarget for path "${this.path}" must have an id`);
if (typeof this.idValue === "function") {
return safe(this.idValue(this));
}
return safe(this.idValue);
}
/**Changes the current directory of the target*/
cd(path: string): TaskTarget {
this.path = this._joinPath(path);
return this;
}
/**Unzips the file pointed to by the current TaskTarget*/
async unzip(): Promise<TaskTarget> {
const zfs = new ZipFS(this.path);
await zfs.init();
this.path = ""; // target is now rooted at the base of its respective zipfs
this.fsImpl = zfs.getImpl() as any;
return this;
}
/**Get a glob off of the target*/
glob(globPath: string): TaskTarget[] {
globPath = this._joinPath(globPath);
const items = globSync(globPath, {
cwd: '/DUMMYCWD',
fs: this.fsImpl
});
const ret = items.map(i => new TaskTarget(i));
// TODO: This should probably clone()
ret.forEach(t => t.fsImpl = this.fsImpl); // Should all use the same fsImpl
return ret;
}
/**Clones the TaskTarget*/
clone(): TaskTarget {
const t = new TaskTarget(this.path);
t.fsImpl = this.fsImpl; // holds no state, just needs same impl
t.idValue = this.idValue;
t.postFns = t.postFns.slice();
t.pipeline = t.pipeline.slice()
.map(p => p.clone());
return t;
}
/**Returns a copy of ourself*/
fork() {
const c = this.clone();
this.tasks.push(c);
c.tasks = this.tasks; // Share object reference
return c;
pushToPipeline(v: TaskTargetOp) {
if (v.type === "read") {
assert(this.pipeline.length === 0, "A read can only be the first item in a pipeline");
}
reason(msg: string) {
// TODO: Add .reason
//this.reason = msg;
this.pipeline.push(v);
}
toShell() {
const shell = this.pipeline
.map(p => p.toShell(this))
.join(" | ")
return shell;
}
pushPostFn(fn: ((t: TaskTarget)=>Promise<void>)) {
this.postFns.push(fn);
}
cmd(cmd: ValidCmd) {
this.pushToPipeline(new TaskTargetCmd(cmd));
return this;
}
skip(msg: string) {
// TODO :Print out reaosn too
this.pipeline.push({
type: 'fn',
value: ()=>console.log(`SKIPPED: ${msg}`)
});
read() {
this.pushToPipeline(new TaskTargetRead());
return this;
}
todo(msg: string) {
// TODO :Print out reaosn too
this.pipeline.push({
type: 'fn',
value: ()=>console.log(`TODO: ${msg}`)
});
setId(idValue: ValidId) {
this.idValue = idValue;
return this;
}
jq(cmd: string | string[]) {
this.pipeline.push({
type: 'cmd',
value: Array.isArray(cmd) ? cmd : [cmd],
cmd: `jq`
});
post(fn: any) {
this.pushPostFn(fn);
}
types(
types: string[]
) {
// TODO:
return this;
}
sqlite_utils(cmd: string | string[]) {
this.pipeline.push({
type: 'cmd',
value: Array.isArray(cmd) ? cmd : [cmd],
cmd: `sqlite-utils`
});
csvSink(
summarization?: [string, string][]
) {
// TODO:
return this;
}
read(path: string) {
let finalPath = path;
if (!path.startsWith('/')) {
finalPath = nodePath.join(this.cwd, path)
}
this.pipeline.push({
type: 'read',
value: finalPath
});
// Ingest this csv into the database at the given id
// this.cmd(t=>["sqlite-utils", "insert", "your.db", t.id, "-", "--csv", "--detect-types"]);
// Add a post processing function for these targets that prints out the summarization
// stats
// this.post(async (t: TaskTarget)=>{
// // We only do the first one so far for the summarization
// let queryLine: string;
// let formatFn: (r: any)=>string;
// const [columnName, type] = summarization?.[0] ?? [undefined, undefined];
// if (type === "numeric") {
// queryLine = `min(${columnName}) as lo, max(${columnName}) as hi, count(*) as n`;
// formatFn = (r: any)=>`${r.n} rows from ${r.lo} to ${r.hi} for ${t.id}`;
// }
// else {
// queryLine = `count(*) as n`;
// formatFn = (r: any)=>`${r.n} rows for ${t.id}`;
// }
return this;
}
write(path: string) {
let finalPath = path;
if (!path.startsWith('/')) {
finalPath = nodePath.join(this.cwd, path)
}
// const cmd = "sqlite-utils";
// const args = ["query", "your.db", `select ${queryLine} from ${t.id}`]
// const { stdout, stderr } = await execFile(cmd, args);
// const results = JSON.parse(stdout);
// const result = results[0]; // should only be one result in the array for this type of query
// const logLine = formatFn(result);
// (t as any).log = logLine;
// });
this.pipeline.push({
type: 'write',
value: finalPath
});
return this;
}
sink() {
return this;
}
doPipeline() {
// Do what's described in .pipeline
let streams: Stream[] = [];
let streamDescription = [];
for (const c of this.pipeline) {
switch(c.type) {
case 'fn':
c.value();
break;
case 'cmd':
streams.push(
new ChildProcessDuplex(c.cmd, c.value)
);
streamDescription.push(`Shell ${c.cmd} ${c.value.join(" ")}`);
break;
case 'read':
streams.push(
fs.createReadStream(c.value, 'utf8')
);
streamDescription.push(`Read ${c.value}`);
break;
case 'write':
streams.push(
fs.createWriteStream(c.value, 'utf8')
);
streamDescription.push(`Write ${c.value}`);
break;
default:
throw new Error(`Unexpected ${c.type}`);
}
}
if (streams.length === 0) {
return undefined;
}
console.log("About to run:\n" + streamDescription.join("\n"));
const finalStream = pipeCollection(...streams);
finalStream.on('error', (err: any)=>{
console.log("IT ERRORED", err)
});
return finalStream;
}
async doTasks() {
let out = [];
console.log(`Working on ${this.tasks.length} tasks`);
for (const t of this.tasks) {
const s = t.doPipeline();
if (!s) {
continue;
}
const p = finished(s);
// await p;
out.push(p);
}
await Promise.all(out);
// const startTime = performance.now();
// const stats = final.getStats();
// const duration = performance.now() - startTime;
// const thisNodesPerSecond = stats.nodes.size / (duration / 1000);
// console2g.log(`Loaded ${solved.cacheKey} nodes=${stats.nodes.size} (${thisNodesPerSecond.toFixed(0)}/s) connections=${stats.connections.size} duration=${duration.toFixed(1)}`);
// return this;
}
}
export function each(targets: TaskTarget[], fn: (t: TaskTarget)=>void) {
for (const t of targets) {
fn(t);
}
}
export function map(targets: TaskTarget[], fn: (t: TaskTarget)=>TaskTarget) {
const newTargets = [];
for (const t of targets) {
newTargets.push(fn(t));
}
return newTargets;
}
export function cd(targets: TaskTarget[], path: string): TaskTarget[] {
return targets.map(t => t.clone().cd(path));
}
export function glob(targets: TaskTarget[], globPath: string): TaskTarget[] {
return targets.map(t => t.glob(globPath)).flat();
}
export async function unzip(targets: TaskTarget[]): Promise<TaskTarget[]> {
return Promise.all(targets.map(t => t.unzip()));
}
export function read(targets: TaskTarget[]): TaskTarget[] {
return targets.map(t => t.clone().read())
}
export function cmd(targets: TaskTarget[], cmd: ValidCmd): TaskTarget[] {
return targets.map(t => t.clone().cmd(cmd))
}
export function setId(targets: TaskTarget[], id: ValidId): TaskTarget[] {
return targets.map(t => t.clone().setId(id))
}
/**Verify, anything that fails is skipped and throws an error*/
export async function verify(targets: TaskTarget[]) {
const outTargets: TaskTarget[] = [];
for (const t of targets) {
// Make sure fsImpl is ready
if ("ready" in t.fsImpl && !t.fsImpl.ready && t.fsImpl.init) {
await t.fsImpl.init();
}
// TODO: Probably remove or assert as incorrect
if (t.pipeline.length <= 0) {
continue; // Tasks with empty pipelines are no-ops, remove
}
if (!t.exists()) {
console.warn(`Missing target ${t.path}`);
continue;
}
outTargets.push(t);
}
return outTargets;
}
/**Writes a manifest for parallel, a TSV where each record is an id + the shell to run
* @todo Enforce doing a verify before we output?
*/
export function getTSVManifest(targets: TaskTarget[]): string {
let out: string[] = [];
for (const t of targets) {
const shell = t.toShell();
out.push(`${t.id}\t${shell}`);
}
return out.join("\n");
}
function collectionSwap(a: TaskTargetPipelineHelper, b: TaskTargetPipelineHelper) {
if (!a.__collection) {
return;
}
// Remove a, add b
const collection = a.__collection;
delete a.__collection;
collection.delete(a);
b.__collection = collection;
collection.add(b);
}
export class TaskTargetPipelineHelper extends Array<TaskTarget> {
__collection?: Set<TaskTargetPipelineHelper>;
static pipeline(t: TaskTarget[]): TaskTargetPipelineHelper {
if (Object.getPrototypeOf(t) === TaskTargetPipelineHelper.prototype) {
return t as any; // Already done
}
Object.setPrototypeOf(t, TaskTargetPipelineHelper.prototype);
return t as any;
}
_fn(fn: (t: TaskTarget[])=>TaskTarget[]): TaskTargetPipelineHelper {
const p = TaskTargetPipelineHelper.pipeline(this);
const t = fn(p);
const p2 = TaskTargetPipelineHelper.pipeline(t);
collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection
return p2;
}
async _afn(fn: (t: TaskTarget[])=>Promise<TaskTarget[]>): Promise<TaskTargetPipelineHelper> {
const p = TaskTargetPipelineHelper.pipeline(this);
const t = await fn(p);
const p2 = TaskTargetPipelineHelper.pipeline(t);
collectionSwap(p, p2); // Move collection pointer to the new item, ends always end up in the collection
return p2;
}
cd(path: string): TaskTargetPipelineHelper {
return this._fn(t => cd(t, path));
}
glob(globPath: string): TaskTargetPipelineHelper {
return this._fn(t => glob(t, globPath));
}
async unzip(): Promise<TaskTargetPipelineHelper> {
return this._afn(unzip);
}
read(): TaskTargetPipelineHelper {
return this._fn(read);
}
cmd(_cmd: ValidCmd): TaskTargetPipelineHelper {
return this._fn(t => cmd(t, _cmd));
}
setId(id: ValidId): TaskTargetPipelineHelper {
return this._fn(t => setId(t, id));
}
types(...args: any[]) {
// TODO: no-op
return this;
}
csvSink(...args: any[]) {
// TODO: no-op
return this;
}
/**
* @todo Nested versions of this don't currently work, but they could if we
* turn __collection into an array of collections
*/
collect(_c: Set<TaskTargetPipelineHelper>) {
this.__collection = _c;
return this;
}
}
export async function parallel(targets: TaskTarget[]) {
const finalTargets = await verify(targets);
const manifestTSV = getTSVManifest(finalTargets);
try {
await $({ input: manifestTSV })`/usr/bin/parallel \
--colsep ${'\t'} \
--jobs 0 \
--linebuffer \
--tagstring {1} \
--eta \
--joblog out.manifest \
${'bash -c {2} > OUTTEST/{1}.csv'} \
::::- `; // stdin is in manifestTSV
}
catch(err: any) {
// I'm pretty sure status is the amount that failed?
if (err?.status >= 30) {
throw err;
}
}
}

View file

@ -1,73 +0,0 @@
import { type ChildProcessWithoutNullStreams, spawn, type SpawnOptions } from "node:child_process";
import { Duplex } from 'node:stream';
/**@todo I wrote this when I was really tired with AI and google and it's proabbly
* shitty, this needs a closer look. See notes at 2025-06-19T06:28:34
* */
export class ChildProcessDuplex extends Duplex {
_pArgs: Parameters<typeof spawn>
_p: ChildProcessWithoutNullStreams | undefined;
stderrorIsError = true;
constructor(command: string, args: readonly string[], options?: SpawnOptions) {
super();
// TODO: Typign
this._pArgs = [command, args, options] as any;
}
__setupP() {
// TODO: Make sure the args have the right stream setup...
this._p = spawn(...this._pArgs) as ChildProcessWithoutNullStreams;
this._p.on("close", ()=>{
// If you don't do this, then a process that closes the streams near when
// the process ends will somehow close properly, but will cause longer
// running processes to not work. There is a test case for this because
// I still dont quite understand it
this.emit("close");
});
this._p.stdout.on('end', ()=>{
this.push(null);
});
this._p.stdout.on('data', (chunk) => {
const canPushMore = this.push(chunk);
if (!canPushMore) {
this._p!.stdout.pause();
}
});
// Forward errors
this._p.on('error', (error) => {
this.emit('error', error);
});
if (this.stderrorIsError) {
this._p.stderr.on('data', (data) => {
this.emit('error', new Error(data));
});
}
}
_write(chunk: any, encoding: BufferEncoding, cb: ()=>any) {
if (!this._p) {
this.__setupP();
}
this._p!.stdin.write(chunk, encoding, cb);
}
_read(size: number) {
if (!this._p) {
this.__setupP();
}
this._p!.stdout.resume();
}
_final(cb: any) {
if (this._p) {
// Close stdin at the end of writing
this._p.stdin.end();
}
cb();
}
}

354
data-export/zipFs.ts Normal file
View file

@ -0,0 +1,354 @@
import { strict as assert } from "node:assert";
import fs from "node:fs";
import path from "node:path";
import { Readable } from "node:stream";
import yauzl from "yauzl";
function removeDummyCwd(path: string) {
if (path.startsWith("/DUMMYCWD/")) {
// This is so we can properly call globSync with _some_ cwd
// and then strip it later, as a cwd of "" will use the current
// working directory of the process (which will matching nothing)
return path.slice("/DUMMYCWD/".length);
}
return path;
}
// Dirent-like class for directory entries
class ZipDirent {
name: string;
private isDir: boolean;
constructor(name: string, isDirectory: boolean) {
this.name = name;
this.isDir = isDirectory;
}
isFile(): boolean {
return !this.isDir;
}
isDirectory(): boolean {
return this.isDir;
}
isBlockDevice(): boolean {
return false;
}
isCharacterDevice(): boolean {
return false;
}
isSymbolicLink(): boolean {
return false;
}
isFIFO(): boolean {
return false;
}
isSocket(): boolean {
return false;
}
}
function _entryToStats(entry: yauzl.Entry) {
const isDir = entry.fileName.endsWith("/");
const modDate = entry.getLastModDate();
return {
isFile: () => !isDir,
isDirectory: () => isDir,
isBlockDevice: () => false,
isCharacterDevice: () => false,
isSymbolicLink: () => false,
isFIFO: () => false,
isSocket: () => false,
size: entry.uncompressedSize,
compressedSize: entry.compressedSize,
mtime: modDate,
mode: isDir ? 0o040755 : 0o100644,
uid: 0,
gid: 0,
dev: 0,
ino: 0,
nlink: 1,
rdev: 0,
blksize: 4096,
blocks: Math.ceil(entry.uncompressedSize / 512),
atime: modDate,
ctime: modDate,
birthtime: modDate,
atimeMs: modDate.getTime(),
mtimeMs: modDate.getTime(),
ctimeMs: modDate.getTime(),
birthtimeMs: modDate.getTime(),
};
}
export class ZipFS {
isZip = true;
zipPath: string;
entries: Map<string, yauzl.Entry>;
zipFile: yauzl.ZipFile | null;
constructor(path: string) {
this.zipPath = path;
this.entries = new Map();
this.zipFile = null;
}
async init() {
this.zipFile = await new Promise<yauzl.ZipFile>((resolve, reject) => {
yauzl.open(this.zipPath, {
lazyEntries: true,
autoClose: false
}, (err, zipfile) => {
if (err || !zipfile) return reject(err);
resolve(zipfile);
});
});
await new Promise<void>((resolve, reject) => {
this.zipFile!.readEntry();
this.zipFile!.on("entry", (entry) => {
const name = entry.fileName;
this.entries.set(name, entry);
this.zipFile!.readEntry();
});
this.zipFile!.on("end", resolve);
this.zipFile!.on("error", reject);
});
}
get ready() {
return !!this.zipFile;
}
existsSync(path: string): boolean {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
return this.entries.has(path);
}
stat(path: string) {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
const entry = this.entries.get(path);
if (!entry) throw new Error(`ENOENT: no such file or directory, stat '${path}'`);
return _entryToStats(entry);
}
statSync(path: string) {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
const entry = this.entries.get(path);
if (!entry) throw new Error(`ENOENT: no such file or directory, stat '${path}'`);
return _entryToStats(entry);
}
lstatSync(path: string) {
// ZIP files don't have symlinks, so lstat is the same as stat
path = removeDummyCwd(path);
return this.statSync(path);
}
createWriteStream(path: string): never {
throw new Error("ZIP filesystem is read-only");
}
createReadStream(path: string): Readable {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
const entry = this.entries.get(path);
if (!entry) throw new Error(`ENOENT: no such file or directory, open '${path}'`);
const out = new Readable({ read() {} });
this.zipFile.openReadStream(entry, (err, stream) => {
if (err || !stream) {
out.destroy(err ?? new Error("Failed to open stream"));
return;
}
stream.on("data", (chunk) => out.push(chunk));
stream.on("end", () => out.push(null));
stream.on("error", (e) => out.destroy(e));
});
return out;
}
private _listDirectory(dirPath: string): string[] {
// Normalize the directory path
let normalizedDir = dirPath.replace(/\\/g, "/");
if (normalizedDir && !normalizedDir.endsWith("/")) {
normalizedDir += "/";
}
const results = new Set<string>();
for (const entryPath of this.entries.keys()) {
// Check if this entry is directly under the directory
if (entryPath === normalizedDir) continue; // Skip the directory itself
if (normalizedDir === "" || normalizedDir === "/") {
// Root directory - get top-level entries
const parts = entryPath.split("/").filter(p => p);
if (parts.length > 0) {
results.add(parts[0]);
}
} else if (entryPath.startsWith(normalizedDir)) {
// Get the relative path from the directory
const relativePath = entryPath.substring(normalizedDir.length);
const parts = relativePath.split("/").filter(p => p);
if (parts.length > 0) {
results.add(parts[0]);
}
}
}
return Array.from(results).sort();
}
readdirSync(dirPath: string, options?: { withFileTypes?: false }): string[];
readdirSync(dirPath: string, options: { withFileTypes: true }): ZipDirent[];
readdirSync(dirPath: string, options?: { withFileTypes?: boolean }): string[] | ZipDirent[] {
assert(this.zipFile, 'Must be inited');
dirPath = removeDummyCwd(dirPath);
const entries = this._listDirectory(dirPath);
if (options?.withFileTypes) {
return entries.map(name => {
let fullPath = dirPath.replace(/\\/g, "/");
if (fullPath && !fullPath.endsWith("/")) {
fullPath += "/";
}
const entryPath = fullPath + name;
// Check if it's a directory by looking for entries with this prefix
const isDirectory = this.entries.has(entryPath + "/") ||
Array.from(this.entries.keys()).some(p => p.startsWith(entryPath + "/"));
return new ZipDirent(name, isDirectory);
});
}
return entries;
}
async readdir(dirPath: string, options?: { withFileTypes?: false }): Promise<string[]>;
async readdir(dirPath: string, options: { withFileTypes: true }): Promise<ZipDirent[]>;
async readdir(dirPath: string, options?: { withFileTypes?: boolean }): Promise<string[] | ZipDirent[]> {
assert(this.zipFile, 'Must be inited');
dirPath = removeDummyCwd(dirPath);
return this.readdirSync(dirPath, options as any);
}
readlinkSync(path: string): string {
// ZIP files don't support symlinks
throw new Error(`EINVAL: invalid argument, readlink '${path}'`);
}
realpathSync(path: string): string {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
// Normalize the path and check if it exists
const normalized = path.replace(/\\/g, "/");
if (this.entries.has(normalized) || this.entries.has(normalized + "/")) {
return normalized;
}
// Check if it's a valid directory path
const withSlash = normalized.endsWith("/") ? normalized : normalized + "/";
const hasChildren = Array.from(this.entries.keys()).some(p => p.startsWith(withSlash));
if (hasChildren) {
return normalized;
}
throw new Error(`ENOENT: no such file or directory, realpath '${path}'`);
}
promises = {
lstat: async (path: string) => {
return this.lstatSync(path);
},
readdir: async (dirPath: string, options?: { withFileTypes?: false }): Promise<string[]> => {
return this.readdirSync(dirPath, options);
},
readlink: async (path: string): Promise<string> => {
return this.readlinkSync(path);
},
realpath: async (path: string): Promise<string> => {
return this.realpathSync(path);
},
};
lstat(path: string, callback: (err: Error | null, stats?: any) => void) {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
try {
const stats = this.lstatSync(path);
callback(null, stats);
} catch (err) {
callback(err as Error);
}
}
realpath(path: string, callback: (err: Error | null, resolvedPath?: string) => void) {
assert(this.zipFile, 'Must be inited');
path = removeDummyCwd(path);
try {
const resolved = this.realpathSync(path);
callback(null, resolved);
} catch (err) {
callback(err as Error);
}
}
getImpl() {
// Because glob uses ...xxx notation to unpack ourselves into a _new_ object
// we need to make sure that we DONT use a class, otherwise the properties
// will be non-enumerable and not show up in the output object
return {
isZip: this.isZip,
zipPath: this.zipPath,
init: this.init.bind(this),
ready: this.ready,
statSync: this.statSync.bind(this),
createReadStream: this.createReadStream.bind(this),
createWriteStream: this.createWriteStream.bind(this),
existsSync: this.existsSync.bind(this),
lstatSync: this.lstatSync.bind(this),
readdir: this.readdir.bind(this),
readdirSync: this.readdirSync.bind(this),
readlinkSync: this.readlinkSync.bind(this),
realpathSync: this.realpathSync.bind(this),
};
}
access() { throw new Error("Not implemented"); }
appendFile() { throw new Error("Not implemented"); }
chmod() { throw new Error("Not implemented"); }
chown() { throw new Error("Not implemented"); }
copyFile() { throw new Error("Not implemented"); }
mkdir() { throw new Error("Not implemented"); }
mkdtemp() { throw new Error("Not implemented"); }
open() { throw new Error("Not implemented"); }
readFile() { throw new Error("Not implemented"); }
rename() { throw new Error("Not implemented"); }
rm() { throw new Error("Not implemented"); }
rmdir() { throw new Error("Not implemented"); }
statfs() { throw new Error("Not implemented"); }
symlink() { throw new Error("Not implemented"); }
truncate() { throw new Error("Not implemented"); }
unlink() { throw new Error("Not implemented"); }
utimes() { throw new Error("Not implemented"); }
watch() { throw new Error("Not implemented"); }
writeFile() { throw new Error("Not implemented"); }
}

View file

@ -1,28 +0,0 @@
# Example Dataset
This is an example dataset to demonstrate the Base Data Manager prototype.
## Overview
This dataset contains sample data for testing purposes.
## Data Source
- **Source**: Example data generator
- **Last Updated**: 2024
- **Format**: CSV
## Fields
- `id`: Unique identifier
- `name`: Name field
- `value`: Numeric value
- `timestamp`: Date/time of entry
## Usage
Import this dataset using the `data-import.ts` script.
## Notes
This is just example data for testing the prototype functionality.

View file

@ -1,12 +0,0 @@
export async function main() {
// Simulate some data processing
console.log('Processing example dataset...');
// Simulate async operation
await new Promise(resolve => setTimeout(resolve, 100));
// Return stats
return {
rows: 42
};
}

View file

@ -1,18 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Base Data Manager</title>
</head>
<body>
<div id="root"></div>
<script type="module">
globalThis.process = { env: {} };
</script>
<script type="module">
import main from "./src/main.ts";
main();
</script>
</body>
</html>

67
main.ts Normal file
View file

@ -0,0 +1,67 @@
import fs from 'node:fs/promises';
import nodePath from "node:path";
import { DatabaseSync } from "node:sqlite";
import "./data-export/facebook.ts";
import { google } from "./data-export/google.ts";
import { TaskTargetPipelineHelper } from "./data-export/task.ts";
declare module "./data-export/task.ts" {
interface TaskTargetPipelineHelper {
google: typeof google;
}
}
Object.assign(TaskTargetPipelineHelper.prototype, {
google
});
function loadIntoSqlite(
paths: string[],
sqlitePath: string
) {
// Open an in-memory db for speed
const db = new DatabaseSync(":memory:", { allowExtension: true });
db.loadExtension("/home/cobertos/sqlite-files/csv.so")
db.enableLoadExtension(false);
for (const path of paths) {
const table = nodePath.basename(path, ".csv");
console.log(`Loading ${path} → table ${table}`);
// const headers = lines[0].split(",");
// const columnsSql = headers.map(h => `"${h}" TEXT`).join(", ");
db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${path}');`);
db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`);
db.exec(`DROP TABLE IF EXISTS intermediate;`);
}
// Dump it all to the path specified
db.exec(`VACUUM main INTO '${sqlitePath}'`);
db.close();
}
async function main() {
const t = TaskTargetPipelineHelper;
// TODO:
// t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json")
// .facebook()
// (await t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
// t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001")
// .google()
// let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip");
// await (zipTask.fsImpl as any).init();
// zipTask.facebook();
// Now take the output and load it all into a single SQLITE file
// const entries = await fs.readdir('OUTTEST', { withFileTypes: true });
// const csvFiles = entries
// .filter(e => e.isFile() && e.name.endsWith(".csv"))
// .map(e => nodePath.join('OUTTEST', e.name));
// await fs.unlink('your.db');
// loadIntoSqlite(csvFiles, 'your.db');
}
main();

View file

@ -15,22 +15,17 @@
"license": "ISC",
"packageManager": "pnpm@10.19.0",
"dependencies": {
"@preact/signals": "^2.2.1",
"@types/cors": "^2.8.19",
"@types/express": "^5.0.5",
"cors": "^2.8.5",
"dotenv": "^17.2.3",
"esbuild": "^0.27.0",
"express": "^5.1.0",
"preact": "^10.26.9",
"preact-custom-element": "^4.3.0",
"preact-render-to-string": "^6.6.3"
"@types/duplexify": "^3.6.5",
"@types/yauzl": "^2.10.3",
"duplexify": "^4.1.3",
"fp-ts": "^2.16.11",
"glob": "^13.0.0",
"htmlparser2": "^10.0.0",
"yauzl": "^3.2.0",
"zx": "^8.8.5"
},
"devDependencies": {
"@types/jsdom": "^21.1.7",
"@types/node": "^24.1.0",
"jsdom": "^26.1.0",
"typescript": "^5.9.3",
"vite": "^7.0.6"
"typescript": "^5.9.3"
}
}

238
pnpm-lock.yaml generated
View file

@ -11,24 +11,48 @@ importers:
'@preact/signals':
specifier: ^2.2.1
version: 2.4.0(preact@10.27.2)
'@types/cli-progress':
specifier: ^3.11.6
version: 3.11.6
'@types/cors':
specifier: ^2.8.19
version: 2.8.19
'@types/duplexify':
specifier: ^3.6.5
version: 3.6.5
'@types/express':
specifier: ^5.0.5
version: 5.0.5
'@types/progress-stream':
specifier: ^2.0.5
version: 2.0.5
'@types/yauzl':
specifier: ^2.10.3
version: 2.10.3
cors:
specifier: ^2.8.5
version: 2.8.5
dotenv:
specifier: ^17.2.3
version: 17.2.3
duplexify:
specifier: ^4.1.3
version: 4.1.3
esbuild:
specifier: ^0.27.0
version: 0.27.0
express:
specifier: ^5.1.0
version: 5.1.0
fp-ts:
specifier: ^2.16.11
version: 2.16.11
glob:
specifier: ^13.0.0
version: 13.0.0
htmlparser2:
specifier: ^10.0.0
version: 10.0.0
preact:
specifier: ^10.26.9
version: 10.27.2
@ -38,6 +62,12 @@ importers:
preact-render-to-string:
specifier: ^6.6.3
version: 6.6.3(preact@10.27.2)
yauzl:
specifier: ^3.2.0
version: 3.2.0
zx:
specifier: ^8.8.5
version: 8.8.5
devDependencies:
'@types/jsdom':
specifier: ^21.1.7
@ -400,6 +430,14 @@ packages:
cpu: [x64]
os: [win32]
'@isaacs/balanced-match@4.0.1':
resolution: {integrity: sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==}
engines: {node: 20 || >=22}
'@isaacs/brace-expansion@5.0.0':
resolution: {integrity: sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==}
engines: {node: 20 || >=22}
'@preact/signals-core@1.12.1':
resolution: {integrity: sha512-BwbTXpj+9QutoZLQvbttRg5x3l5468qaV2kufh+51yha1c53ep5dY4kTuZR35+3pAZxpfQerGJiQqg34ZNZ6uA==}
@ -521,12 +559,18 @@ packages:
'@types/body-parser@1.19.6':
resolution: {integrity: sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==}
'@types/cli-progress@3.11.6':
resolution: {integrity: sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA==}
'@types/connect@3.4.38':
resolution: {integrity: sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==}
'@types/cors@2.8.19':
resolution: {integrity: sha512-mFNylyeyqN93lfe/9CSxOGREz8cpzAhH+E93xJ4xWQf62V8sQ/24reV2nyzUWM6H6Xji+GGHpkbLe7pVoUEskg==}
'@types/duplexify@3.6.5':
resolution: {integrity: sha512-fB56ACzlW91UdZ5F3VXplVMDngO8QaX5Y2mjvADtN01TT2TMy4WjF0Lg+tFDvt4uMBeTe4SgaD+qCrA7dL5/tA==}
'@types/estree@1.0.8':
resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==}
@ -548,6 +592,9 @@ packages:
'@types/node@24.10.0':
resolution: {integrity: sha512-qzQZRBqkFsYyaSWXuEHc2WR9c0a0CXwiE5FWUvn7ZM+vdy1uZLfCunD38UzhuB7YN/J11ndbDBcTmOdxJo9Q7A==}
'@types/progress-stream@2.0.5':
resolution: {integrity: sha512-5YNriuEZkHlFHHepLIaxzq3atGeav1qCTGzB74HKWpo66qjfostF+rHc785YYYHeBytve8ZG3ejg42jEIfXNiQ==}
'@types/qs@6.14.0':
resolution: {integrity: sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ==}
@ -566,6 +613,9 @@ packages:
'@types/tough-cookie@4.0.5':
resolution: {integrity: sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==}
'@types/yauzl@2.10.3':
resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==}
accepts@2.0.0:
resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==}
engines: {node: '>= 0.6'}
@ -578,6 +628,9 @@ packages:
resolution: {integrity: sha512-nfDwkulwiZYQIGwxdy0RUmowMhKcFVcYXUU7m4QlKYim1rUtg83xm2yjZ40QjDuc291AJjjeSc9b++AWHSgSHw==}
engines: {node: '>=18'}
buffer-crc32@0.2.13:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
bytes@3.1.2:
resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
engines: {node: '>= 0.8'}
@ -634,6 +687,19 @@ packages:
resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
engines: {node: '>= 0.8'}
dom-serializer@2.0.0:
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
domelementtype@2.3.0:
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
domhandler@5.0.3:
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
engines: {node: '>= 4'}
domutils@3.2.2:
resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
dotenv@17.2.3:
resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
engines: {node: '>=12'}
@ -642,6 +708,9 @@ packages:
resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
engines: {node: '>= 0.4'}
duplexify@4.1.3:
resolution: {integrity: sha512-M3BmBhwJRZsSx38lZyhE53Csddgzl5R7xGJNk7CVddZD6CcmwMCH8J+7AprIrQKH7TonKxaCjcv27Qmf+sQ+oA==}
ee-first@1.1.1:
resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
@ -649,6 +718,13 @@ packages:
resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
engines: {node: '>= 0.8'}
end-of-stream@1.4.5:
resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==}
entities@4.5.0:
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
engines: {node: '>=0.12'}
entities@6.0.1:
resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==}
engines: {node: '>=0.12'}
@ -703,6 +779,9 @@ packages:
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
engines: {node: '>= 0.6'}
fp-ts@2.16.11:
resolution: {integrity: sha512-LaI+KaX2NFkfn1ZGHoKCmcfv7yrZsC3b8NtWsTVQeHkq4F27vI5igUuO53sxqDEa2gNQMHFPmpojDw/1zmUK7w==}
fresh@2.0.0:
resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==}
engines: {node: '>= 0.8'}
@ -723,6 +802,10 @@ packages:
resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
engines: {node: '>= 0.4'}
glob@13.0.0:
resolution: {integrity: sha512-tvZgpqk6fz4BaNZ66ZsRaZnbHvP/jG3uKJvAZOwEVUL4RTA5nJeeLYfyN9/VA8NX/V3IBG+hkeuGpKjvELkVhA==}
engines: {node: 20 || >=22}
gopd@1.2.0:
resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
engines: {node: '>= 0.4'}
@ -739,6 +822,9 @@ packages:
resolution: {integrity: sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==}
engines: {node: '>=18'}
htmlparser2@10.0.0:
resolution: {integrity: sha512-TwAZM+zE5Tq3lrEHvOlvwgj1XLWQCtaaibSN11Q+gGBAS7Y1uZSWwXXRe4iF6OXnaq1riyQAPFOBtYc77Mxq0g==}
http-errors@2.0.1:
resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
engines: {node: '>= 0.8'}
@ -784,6 +870,10 @@ packages:
lru-cache@10.4.3:
resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
lru-cache@11.2.2:
resolution: {integrity: sha512-F9ODfyqML2coTIsQpSkRHnLSZMtkU8Q+mSfcaIyKwy58u+8k5nvAYeiNhsyMARvzNcXJ9QfWVrcPsC9e9rAxtg==}
engines: {node: 20 || >=22}
math-intrinsics@1.1.0:
resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
engines: {node: '>= 0.4'}
@ -804,6 +894,14 @@ packages:
resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==}
engines: {node: '>=18'}
minimatch@10.1.1:
resolution: {integrity: sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==}
engines: {node: 20 || >=22}
minipass@7.1.2:
resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==}
engines: {node: '>=16 || 14 >=14.17'}
ms@2.1.3:
resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
@ -841,9 +939,16 @@ packages:
resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
engines: {node: '>= 0.8'}
path-scurry@2.0.1:
resolution: {integrity: sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA==}
engines: {node: 20 || >=22}
path-to-regexp@8.3.0:
resolution: {integrity: sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==}
pend@1.2.0:
resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
picocolors@1.1.1:
resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==}
@ -888,6 +993,10 @@ packages:
resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==}
engines: {node: '>= 0.10'}
readable-stream@3.6.2:
resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==}
engines: {node: '>= 6'}
rollup@4.52.5:
resolution: {integrity: sha512-3GuObel8h7Kqdjt0gxkEzaifHTqLVW56Y/bjN7PSQtkKr0w3V/QYSdt6QWYtd7A1xUtYQigtdUfgj1RvWVtorw==}
engines: {node: '>=18.0.0', npm: '>=8.0.0'}
@ -900,6 +1009,9 @@ packages:
rrweb-cssom@0.8.0:
resolution: {integrity: sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==}
safe-buffer@5.1.2:
resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==}
safer-buffer@2.1.2:
resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
@ -942,6 +1054,12 @@ packages:
resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
engines: {node: '>= 0.8'}
stream-shift@1.0.3:
resolution: {integrity: sha512-76ORR0DO1o1hlKwTbi/DM3EXWGf3ZJYO8cXX5RJwnul2DEg2oyoZyjLNoQM8WsvZiFKCRfC1O0J7iCvie3RZmQ==}
string_decoder@1.1.1:
resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==}
symbol-tree@3.2.4:
resolution: {integrity: sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==}
@ -984,6 +1102,9 @@ packages:
resolution: {integrity: sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==}
engines: {node: '>= 0.8'}
util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
vary@1.1.2:
resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
engines: {node: '>= 0.8'}
@ -1070,6 +1191,15 @@ packages:
xmlchars@2.2.0:
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
yauzl@3.2.0:
resolution: {integrity: sha512-Ow9nuGZE+qp1u4JIPvg+uCiUr7xGQWdff7JQSk5VGYTAZMDe2q8lxJ10ygv10qmSj031Ty/6FNJpLO4o1Sgc+w==}
engines: {node: '>=12'}
zx@8.8.5:
resolution: {integrity: sha512-SNgDF5L0gfN7FwVOdEFguY3orU5AkfFZm9B5YSHog/UDHv+lvmd82ZAsOenOkQixigwH2+yyH198AwNdKhj+RA==}
engines: {node: '>= 12.17.0'}
hasBin: true
snapshots:
'@asamuzakjp/css-color@3.2.0':
@ -1256,6 +1386,12 @@ snapshots:
'@esbuild/win32-x64@0.27.0':
optional: true
'@isaacs/balanced-match@4.0.1': {}
'@isaacs/brace-expansion@5.0.0':
dependencies:
'@isaacs/balanced-match': 4.0.1
'@preact/signals-core@1.12.1': {}
'@preact/signals@2.4.0(preact@10.27.2)':
@ -1334,6 +1470,10 @@ snapshots:
'@types/connect': 3.4.38
'@types/node': 24.10.0
'@types/cli-progress@3.11.6':
dependencies:
'@types/node': 24.10.0
'@types/connect@3.4.38':
dependencies:
'@types/node': 24.10.0
@ -1342,6 +1482,10 @@ snapshots:
dependencies:
'@types/node': 24.10.0
'@types/duplexify@3.6.5':
dependencies:
'@types/node': 24.10.0
'@types/estree@1.0.8': {}
'@types/express-serve-static-core@5.1.0':
@ -1371,6 +1515,10 @@ snapshots:
dependencies:
undici-types: 7.16.0
'@types/progress-stream@2.0.5':
dependencies:
'@types/node': 24.10.0
'@types/qs@6.14.0': {}
'@types/range-parser@1.2.7': {}
@ -1392,6 +1540,10 @@ snapshots:
'@types/tough-cookie@4.0.5': {}
'@types/yauzl@2.10.3':
dependencies:
'@types/node': 24.10.0
accepts@2.0.0:
dependencies:
mime-types: 3.0.2
@ -1413,6 +1565,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
buffer-crc32@0.2.13: {}
bytes@3.1.2: {}
call-bind-apply-helpers@1.0.2:
@ -1456,6 +1610,24 @@ snapshots:
depd@2.0.0: {}
dom-serializer@2.0.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
entities: 4.5.0
domelementtype@2.3.0: {}
domhandler@5.0.3:
dependencies:
domelementtype: 2.3.0
domutils@3.2.2:
dependencies:
dom-serializer: 2.0.0
domelementtype: 2.3.0
domhandler: 5.0.3
dotenv@17.2.3: {}
dunder-proto@1.0.1:
@ -1464,10 +1636,23 @@ snapshots:
es-errors: 1.3.0
gopd: 1.2.0
duplexify@4.1.3:
dependencies:
end-of-stream: 1.4.5
inherits: 2.0.4
readable-stream: 3.6.2
stream-shift: 1.0.3
ee-first@1.1.1: {}
encodeurl@2.0.0: {}
end-of-stream@1.4.5:
dependencies:
once: 1.4.0
entities@4.5.0: {}
entities@6.0.1: {}
es-define-property@1.0.1: {}
@ -1589,6 +1774,8 @@ snapshots:
forwarded@0.2.0: {}
fp-ts@2.16.11: {}
fresh@2.0.0: {}
fsevents@2.3.3:
@ -1614,6 +1801,12 @@ snapshots:
dunder-proto: 1.0.1
es-object-atoms: 1.1.1
glob@13.0.0:
dependencies:
minimatch: 10.1.1
minipass: 7.1.2
path-scurry: 2.0.1
gopd@1.2.0: {}
has-symbols@1.1.0: {}
@ -1626,6 +1819,13 @@ snapshots:
dependencies:
whatwg-encoding: 3.1.1
htmlparser2@10.0.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.2.2
entities: 6.0.1
http-errors@2.0.1:
dependencies:
depd: 2.0.0
@ -1693,6 +1893,8 @@ snapshots:
lru-cache@10.4.3: {}
lru-cache@11.2.2: {}
math-intrinsics@1.1.0: {}
media-typer@1.1.0: {}
@ -1705,6 +1907,12 @@ snapshots:
dependencies:
mime-db: 1.54.0
minimatch@10.1.1:
dependencies:
'@isaacs/brace-expansion': 5.0.0
minipass@7.1.2: {}
ms@2.1.3: {}
nanoid@3.3.11: {}
@ -1731,8 +1939,15 @@ snapshots:
parseurl@1.3.3: {}
path-scurry@2.0.1:
dependencies:
lru-cache: 11.2.2
minipass: 7.1.2
path-to-regexp@8.3.0: {}
pend@1.2.0: {}
picocolors@1.1.1: {}
picomatch@4.0.3: {}
@ -1773,6 +1988,12 @@ snapshots:
iconv-lite: 0.7.0
unpipe: 1.0.0
readable-stream@3.6.2:
dependencies:
inherits: 2.0.4
string_decoder: 1.1.1
util-deprecate: 1.0.2
rollup@4.52.5:
dependencies:
'@types/estree': 1.0.8
@ -1813,6 +2034,8 @@ snapshots:
rrweb-cssom@0.8.0: {}
safe-buffer@5.1.2: {}
safer-buffer@2.1.2: {}
saxes@6.0.0:
@ -1878,6 +2101,12 @@ snapshots:
statuses@2.0.2: {}
stream-shift@1.0.3: {}
string_decoder@1.1.1:
dependencies:
safe-buffer: 5.1.2
symbol-tree@3.2.4: {}
tinyglobby@0.2.15:
@ -1913,6 +2142,8 @@ snapshots:
unpipe@1.0.0: {}
util-deprecate@1.0.2: {}
vary@1.1.2: {}
vite@7.1.12(@types/node@24.10.0):
@ -1951,3 +2182,10 @@ snapshots:
xml-name-validator@5.0.0: {}
xmlchars@2.2.0: {}
yauzl@3.2.0:
dependencies:
buffer-crc32: 0.2.13
pend: 1.2.0
zx@8.8.5: {}

View file

@ -1,152 +0,0 @@
import { readdir, readFile, writeFile } from 'fs/promises';
import { join, resolve } from 'path';
import { h } from 'preact';
import render from 'preact-render-to-string';
import DatasetList from '../src/DatasetList.tsx';
// Get SEARCH_ROOT_PATH from argv or environment variable
const SEARCH_ROOT_PATH = process.argv[2] || process.env.SEARCH_ROOT_PATH;
if (!SEARCH_ROOT_PATH) {
throw new Error('SEARCH_ROOT_PATH must be provided either as first argument or as environment variable');
}
interface Stats {
rows: number;
}
interface DataImportModule {
main(): Promise<Stats>;
}
interface Dataset {
name: string;
readme: string;
stats: Stats | null;
error?: string;
}
async function readReadme(folderPath: string): Promise<string> {
try {
const readmePath = join(folderPath, 'README.md');
const content = await readFile(readmePath, 'utf-8');
return content;
} catch (error) {
return '';
}
}
async function importDataStats(folderPath: string): Promise<{ stats: Stats | null; error?: string }> {
try {
const dataImportPath = join(folderPath, 'data-import.ts');
const absolutePath = resolve(dataImportPath);
// Dynamically import the data-import.ts module
const module = await import(absolutePath) as DataImportModule;
if (typeof module.main !== 'function') {
return { stats: null, error: 'No main() function found in data-import.ts' };
}
const stats = await module.main();
return { stats, error: undefined };
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
return { stats: null, error: errorMessage };
}
}
// The script expects the following structure in the `SEARCH_ROOT_PATH` (defaults to `./datasets`):
// ```
// datasets/
// ├── dataset-1/
// │ ├── README.md
// │ └── data-import.ts
// ├── dataset-2/
// │ ├── README.md
// │ └── data-import.ts
// └── dataset-3/
// ├── README.md
// └── data-import.ts
// ```
async function scanDatasets(rootPath: string): Promise<Dataset[]> {
const datasets: Dataset[] = [];
try {
const entries = await readdir(rootPath, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const folderPath = join(rootPath, entry.name);
console.log(`Processing dataset: ${entry.name}`);
// Read README.md
const readme = await readReadme(folderPath);
// Import and run data-import.ts
const { stats, error } = await importDataStats(folderPath);
datasets.push({
name: entry.name,
readme,
stats,
error
});
}
}
} catch (error) {
console.error(`Error scanning directory ${rootPath}:`, error);
}
return datasets;
}
function generateHTML(htmlContent: string): string {
return `<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Base Data Manager</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
margin: 0;
padding: 0;
background-color: #ffffff;
}
* {
box-sizing: border-box;
}
</style>
</head>
<body>
${htmlContent}
</body>
</html>`;
}
async function main() {
console.log(`Scanning datasets in: ${SEARCH_ROOT_PATH}`);
// Scan all datasets
const datasets = await scanDatasets(SEARCH_ROOT_PATH);
console.log(`Found ${datasets.length} dataset(s)`);
// Render Preact component to string
const appHtml = render(h(DatasetList, { datasets }));
// Generate complete HTML document
const fullHtml = generateHTML(appHtml);
// Write to dashboard.html in current working directory
const outputPath = join(process.cwd(), 'dashboard.html');
await writeFile(outputPath, fullHtml, 'utf-8');
console.log(`Generated HTML written to: ${outputPath}`);
}
main().catch(console.error);

View file

@ -1,20 +0,0 @@
import { facebook } from "../data-export/facebook2.ts";
import { Task } from "../data-export/task.ts";
declare module "../data-export/task.ts" {
interface Task {
facebook: typeof facebook;
}
}
Object.assign(Task.prototype, {
facebook
});
async function main() {
const t = new Task();
t.facebook("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json");
await t.doTasks();
}
main();

View file

@ -1,64 +0,0 @@
import { strict as assert } from 'node:assert';
import nodePath from 'node:path';
import { fileURLToPath } from "node:url";
import fs from 'node:fs';
import { type IncomingMessage } from 'node:http';
import http from 'node:http';
import { type Socket } from "node:net";
import express from 'express';
import cors from 'cors';
import dotenv from "dotenv";
dotenv.config();
const __filename = fileURLToPath(import.meta.url);
const __dirname = nodePath.dirname(__filename);
/**Gets the express app with all the endpoints*/
function getExpressApp<T>() {
const app = express();
app.use(cors());
app.use(express.json());
// if (serveUI) {
// const compiler = webpack(webpackConfig);
// app.use(middleware(compiler));
// //app.use(express.static(nodePath.join(__dirname, appFilesDir)));
// }
app.get('/', (req: express.Request, res: express.Response) => {
res.send('Server!');
});
app.get('/info', (req: express.Request, res: express.Response) => {
const infoPayload = JSON.stringify({
test: 'test'
});
res.setHeader('Content-Type', 'text/json');
res.status(200);
res.send(infoPayload);
});
return app;
}
async function ready(server: http.Server) {
await new Promise((resolve, reject)=> {
server.on('listening', resolve);
server.on('error', reject);
});
}
export async function makeServer() {
// Get the servers
const app = getExpressApp();
let httpServer: http.Server;
httpServer = http.createServer(app);
// Listen to the available servers
const addr = '127.0.0.1';
httpServer.listen({ port: '2947', hostname: addr });
// Wait for everything to be ready
await ready(httpServer);
console.log(`Running HTTP server on ${(httpServer.address() as any).address}:${(httpServer.address() as any).port}...`);
}
if (process.argv[1] === fileURLToPath(import.meta.url)) {
makeServer();
}

View file

@ -1,40 +0,0 @@
import { h } from 'preact';
interface DatasetItemProps {
name: string;
readme: string;
stats: {
rows: number;
} | null;
error?: string;
}
export default function DatasetItem({ name, readme, stats, error }: DatasetItemProps) {
return (
<details style="border: 1px solid #ccc; margin-bottom: 10px; border-radius: 4px;">
<summary style="padding: 15px; cursor: pointer; background-color: #f5f5f5; font-weight: bold; user-select: none;">
{name}
</summary>
<div style="padding: 15px; border-top: 1px solid #ccc;">
<div style="margin-bottom: 15px;">
<h3 style="margin-top: 0;">Stats</h3>
{error ? (
<div style="color: red;">Error loading data: {error}</div>
) : stats ? (
<div>
<strong>Rows:</strong> {stats.rows}
</div>
) : (
<div>No stats available</div>
)}
</div>
<div>
<h3>README</h3>
<pre style="background-color: #f9f9f9; padding: 10px; border: 1px solid #ddd; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;">
{readme || 'No README.md found'}
</pre>
</div>
</div>
</details>
);
}

View file

@ -1,37 +0,0 @@
import { h } from 'preact';
import DatasetItem from './DatasetItem.tsx';
interface Stats {
rows: number;
}
interface Dataset {
name: string;
readme: string;
stats: Stats | null;
error?: string;
}
interface DatasetListProps {
datasets: Dataset[];
}
export default function DatasetList({ datasets }: DatasetListProps) {
return (
<div style="max-width: 1200px; margin: 0 auto; padding: 20px;">
<h1>Base Data Manager</h1>
<p>Found {datasets.length} dataset(s)</p>
<div>
{datasets.map((dataset) => (
<DatasetItem
key={dataset.name}
name={dataset.name}
readme={dataset.readme}
stats={dataset.stats}
error={dataset.error}
/>
))}
</div>
</div>
);
}

View file

@ -1,8 +0,0 @@
import { h, render } from 'preact';
import { signal } from '@preact/signals';
export default function MainUI() {
return <>
<div>test</div>
</>
}

View file

@ -1,7 +0,0 @@
import { h, render } from 'preact';
import MainUI from "./MainUI.tsx";
export default function main() {
const component = h(MainUI, {});
const outputBefore = render(component, document.body);
}

115
test/facebook.ts Normal file
View file

@ -0,0 +1,115 @@
import test from "node:test";
import fs from "node:fs";
import assert from "node:assert";
import { finished } from "node:stream/promises";
import { Readable, Writable } from "node:stream";
import { TaskTargetPipelineHelper } from "../data-export/task.ts";
test("facebook: Can load the 2021 export", async () => {
// TODO:
// const t = new Task();
// (await t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
// const taskText = await t.getTaskTSVShell();
// await fs.writeFile('test.manifest', taskText);
// // Run everything with parallel
// try {
// execFileSync('/usr/bin/parallel', ['--colsep', '\t', '--jobs', '0', '--linebuffer', '--tagstring', '{1}', '--eta', '--joblog', 'out.manifest', 'bash -c {2} > OUTTEST/{1}.csv', '::::', 'test.manifest'], {
// stdio: 'inherit'
// });
// }
// catch(err: any) {
// // I'm pretty sure status is the amount that failed?
// if (err?.status >= 30) {
// throw err;
// }
// }
// // Now take the output and load it all into a single SQLITE file
// const entries = await fs.readdir('OUTTEST', { withFileTypes: true });
// const csvFiles = entries
// .filter(e => e.isFile() && e.name.endsWith(".csv"))
// .map(e => nodePath.join('OUTTEST', e.name));
});
// import fs from 'node:fs/promises';
// import { type SpawnOptions, execFile as _execFile, execFileSync } from "node:child_process";
// import nodePath from "node:path";
// import { DatabaseSync } from "node:sqlite";
// import { promisify } from "node:util";
// import "../data-export/facebook.ts";
// import { google } from "../data-export/google.ts";
// const execFile = promisify(_execFile);
// declare module "../data-export/task.ts" {
// interface Task {
// google: typeof google;
// }
// }
// Object.assign(Task.prototype, {
// google
// });
// function loadIntoSqlite(
// paths: string[],
// sqlitePath: string
// ) {
// // Open an in-memory db for speed
// const db = new DatabaseSync(":memory:", { allowExtension: true });
// db.loadExtension("/home/cobertos/sqlite-files/csv.so")
// db.enableLoadExtension(false);
// for (const path of paths) {
// const table = nodePath.basename(path, ".csv");
// console.log(`Loading ${path} → table ${table}`);
// // const headers = lines[0].split(",");
// // const columnsSql = headers.map(h => `"${h}" TEXT`).join(", ");
// db.exec(`CREATE VIRTUAL TABLE temp.intermediate USING csv(filename='${path}');`);
// db.exec(`CREATE TABLE "${table}" AS SELECT * FROM intermediate;`);
// db.exec(`DROP TABLE IF EXISTS intermediate;`);
// }
// // Dump it all to the path specified
// db.exec(`VACUUM main INTO '${sqlitePath}'`);
// db.close();
// }
// async function main() {
// const t = new Task();
// // t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/formapcast_facebook-DEADNAME-May2021-json")
// // .facebook()
// (await t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-x-2025-11-29-x.zip").zip()).facebook_v2();
// // t.fork().cd("/home/cobertos/Seafile/archive/ExportedServiceData/google/2023-NAMEwork-001")
// // .google()
// // let zipTask = t.fork().zip("/home/cobertos/Seafile/archive/ExportedServiceData/facebook/facebook-DEADNAME-May2021-json.zip");
// // await (zipTask.fsImpl as any).init();
// // zipTask.facebook();
// const taskText = await t.getTaskTSVShell();
// await fs.writeFile('test.manifest', taskText);
// // Run everything with parallel
// try {
// execFileSync('/usr/bin/parallel', ['--colsep', '\t', '--jobs', '0', '--linebuffer', '--tagstring', '{1}', '--eta', '--joblog', 'out.manifest', 'bash -c {2} > OUTTEST/{1}.csv', '::::', 'test.manifest'], {
// stdio: 'inherit'
// });
// }
// catch(err: any) {
// // I'm pretty sure status is the amount that failed?
// if (err?.status >= 30) {
// throw err;
// }
// }
// // Now take the output and load it all into a single SQLITE file
// const entries = await fs.readdir('OUTTEST', { withFileTypes: true });
// const csvFiles = entries
// .filter(e => e.isFile() && e.name.endsWith(".csv"))
// .map(e => nodePath.join('OUTTEST', e.name));
// await fs.unlink('your.db');
// loadIntoSqlite(csvFiles, 'your.db');
// }
// main();

13
test/fixtures/README.md vendored Normal file
View file

@ -0,0 +1,13 @@
### Addition process
* Paste in a new folder
* Manually scan through _folders_ and delete any long lists of files. For example, facebook chats, we dont need 100 chat thread .json files
* Run `util/scrub.ts` inside that folder
* Fix any warnings that come out of the above for manual edits
* Manually blank out any file/folder names with IDs or PII
* Manually scan through each file for any remaining PII
### Corpus
* `facebook-json-2021-05-01` - Facebook JSON export
* `facebook-json-2025-11-29` - Facebook JSON export

View file

@ -0,0 +1,9 @@
{
"facial_data": {
"threshold_green": 69,
"threshold_yellow": 69,
"threshold_red": 69,
"example_count": 69,
"raw_data": "xxx"
}
}

View file

@ -0,0 +1,3 @@
{
"friend_peer_group": "xxx"
}

View file

@ -0,0 +1,42 @@
{
"messenger": {
"secret_conversations": {
"has_sent_message": false,
"has_received_message": true,
"tincan_devices": []
},
"autofill_information": {
"CITY": [
"xxx"
],
"STATE": [
"xxx"
],
"COUNTRY": [
"xxx"
],
"EMAIL": [
"not_a_real_email@example.com",
"not_a_real_email@example.com"
],
"JOB_TITLE": [
"xxx"
],
"COMPANY_NAME": [
"xxx"
],
"GENDER": [
"xxx"
],
"FIRST_NAME": [
"xxx"
],
"LAST_NAME": [
"xxx"
],
"FULL_NAME": [
"xxx"
]
}
}
}

View file

@ -0,0 +1,16 @@
{
"notifications": [
{
"timestamp": 1714465000,
"unread": true,
"href": "url://somewhere",
"text": "xxx"
},
{
"timestamp": 1714465000,
"unread": true,
"href": "url://somewhere",
"text": "xxx"
}
]
}

View file

@ -0,0 +1,37 @@
{
"preferences": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1704500000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
},
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1708835000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1713130000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1,72 @@
{
"viewed_things": [
{
"name": "xxx",
"description": "xxx",
"children": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1728855000,
"data": {
"name": "xxx",
"uri": "url://somewhere",
"watch_time": "xxx"
}
},
{
"timestamp": 1721720000,
"data": {
"name": "xxx",
"uri": "url://somewhere",
"watch_time": "xxx"
}
}
]
},
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1723345000,
"data": {
"uri": "url://somewhere",
"name": "xxx"
}
},
{
"timestamp": 1730165000,
"data": {
"uri": "url://somewhere",
"name": "xxx"
}
}
]
}
]
},
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1714200000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1707070000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1,44 @@
{
"visited_things": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1731145000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1731145000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
},
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1706175000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1704250000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1,19 @@
{
"linked_accounts": [
{
"service_name": "xxx",
"native_app_id": 69,
"username": "xxx",
"email": "not_a_real_email@example.com",
"phone_number": "xxx",
"name": "xxx"
},
{
"service_name": "xxx",
"native_app_id": 1707005000,
"username": "xxx",
"email": "not_a_real_email@example.com",
"name": "xxx"
}
]
}

View file

@ -0,0 +1,6 @@
{
"custom_audiences": [
"xxx",
"xxx"
]
}

View file

@ -0,0 +1,34 @@
{
"off_facebook_activity": [
{
"name": "xxx",
"events": [
{
"id": 69,
"type": "xxx",
"timestamp": 1707655000
},
{
"id": 69,
"type": "xxx",
"timestamp": 1707595000
}
]
},
{
"name": "xxx",
"events": [
{
"id": 69,
"type": "xxx",
"timestamp": 1707565000
},
{
"id": 69,
"type": "xxx",
"timestamp": 1707340000
}
]
}
]
}

View file

@ -0,0 +1,12 @@
{
"installed_apps": [
{
"name": "xxx",
"added_timestamp": 1735460000
},
{
"name": "xxx",
"added_timestamp": 1725280000
}
]
}

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,32 @@
{
"comments": [
{
"timestamp": 1707420000,
"data": [
{
"comment": {
"timestamp": 1707245000,
"comment": "xxx",
"author": "xxx",
"group": "xxx"
}
}
],
"title": "xxx"
},
{
"timestamp": 1705500000,
"data": [
{
"comment": {
"timestamp": 1705320000,
"comment": "xxx",
"author": "xxx",
"group": "xxx"
}
}
],
"title": "xxx"
}
]
}

View file

@ -0,0 +1,12 @@
{
"dating_communities": {
"events": [
"xxx",
"xxx"
],
"groups": [
"xxx",
"xxx"
]
}
}

View file

@ -0,0 +1,13 @@
{
"recipient": "xxx",
"messages": [
{
"timestamp": 1705130000,
"body": "xxx"
},
{
"timestamp": 1705130000,
"body": "xxx"
}
]
}

View file

@ -0,0 +1,7 @@
{
"dating_your_activity": {
"recently_active_date_range": "xxx",
"liked_count": 69,
"passed_count": 69
}
}

View file

@ -0,0 +1,15 @@
{
"dating_preferences": {
"min_age": 69,
"max_age": 69,
"children": "xxx",
"distance": 69,
"gender": [
"xxx",
"xxx"
],
"religious_views": [
"xxx"
]
}
}

View file

@ -0,0 +1,94 @@
{
"dating_profile": {
"additional_locations": [],
"blocked_people": [
"xxx"
],
"children": "xxx",
"college": [
"xxx"
],
"current_city": "xxx",
"company": [
"xxx"
],
"profile_questions": [
{
"question": "xxx",
"answer": "xxx"
},
{
"question": "xxx",
"answer": "xxx"
}
],
"gender": [
"xxx"
],
"grad_school": [],
"height": 69,
"high_school": [
"xxx"
],
"hobbies": [
"xxx",
"xxx"
],
"hometown": "xxx",
"intro": "url://somewhere",
"job_title": [
"xxx"
],
"profile_picture": {
"uri": "photos_and_videos/SomeAlbum_xxxxxxx/200x200png.png",
"creation_timestamp": 1705555000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1705465000
}
]
}
},
"title": "xxx"
},
"religious_views": [
"xxx"
],
"photos": [
{
"uri": "photos_and_videos/SomeAlbum_xxxxxxx/200x200png.png",
"creation_timestamp": 1705470000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1705295000
}
]
}
},
"title": "xxx"
},
{
"uri": "photos_and_videos/SomeAlbum_xxxxxxx/200x200png.png",
"creation_timestamp": 1732045000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1731870000
}
]
}
},
"title": "xxx"
}
],
"account_status": "xxx"
}
}

View file

@ -0,0 +1,14 @@
{
"events_invited": [
{
"name": "xxx",
"start_timestamp": 1706525000,
"end_timestamp": 1706540000
},
{
"name": "xxx",
"start_timestamp": 1734470000,
"end_timestamp": 1734485000
}
]
}

View file

@ -0,0 +1,40 @@
{
"event_responses": {
"events_joined": [
{
"name": "xxx",
"start_timestamp": 1729040000,
"end_timestamp": 1729055000
},
{
"name": "xxx",
"start_timestamp": 1728395000,
"end_timestamp": 1728435000
}
],
"events_declined": [
{
"name": "xxx",
"start_timestamp": 1719505000,
"end_timestamp": 69
},
{
"name": "xxx",
"start_timestamp": 1722705000,
"end_timestamp": 1722730000
}
],
"events_interested": [
{
"name": "xxx",
"start_timestamp": 1706715000,
"end_timestamp": 69
},
{
"name": "xxx",
"start_timestamp": 1704645000,
"end_timestamp": 1704835000
}
]
}
}

View file

@ -0,0 +1,8 @@
{
"instant_games_played": [
{
"name": "xxx",
"added_timestamp": 1730650000
}
]
}

View file

@ -0,0 +1,10 @@
{
"followers": [
{
"name": "xxx"
},
{
"name": "xxx"
}
]
}

View file

@ -0,0 +1,12 @@
{
"following": [
{
"name": "xxx",
"timestamp": 1714550000
},
{
"name": "xxx",
"timestamp": 1714550000
}
]
}

View file

@ -0,0 +1,13 @@
{
"pages_unfollowed": [
{
"timestamp": 1734425000,
"data": [
{
"name": "xxx"
}
],
"title": "xxx"
}
]
}

View file

@ -0,0 +1,12 @@
{
"friends": [
{
"name": "xxx",
"timestamp": 1707830000
},
{
"name": "xxx",
"timestamp": 1730335000
}
]
}

View file

@ -0,0 +1,12 @@
{
"received_requests": [
{
"name": "xxx",
"timestamp": 1707410000
},
{
"name": "xxx",
"timestamp": 1727205000
}
]
}

View file

@ -0,0 +1,12 @@
{
"rejected_requests": [
{
"name": "xxx",
"timestamp": 1727450000
},
{
"name": "xxx",
"timestamp": 1724460000
}
]
}

View file

@ -0,0 +1,12 @@
{
"deleted_friends": [
{
"name": "xxx",
"timestamp": 1705215000
},
{
"name": "xxx",
"timestamp": 1705215000
}
]
}

View file

@ -0,0 +1,12 @@
{
"sent_requests": [
{
"name": "xxx",
"timestamp": 1719120000
},
{
"name": "xxx",
"timestamp": 1716625000
}
]
}

View file

@ -0,0 +1,12 @@
{
"groups_joined": [
{
"timestamp": 1705215000,
"title": "xxx"
},
{
"timestamp": 1705215000,
"title": "xxx"
}
]
}

View file

@ -0,0 +1,34 @@
{
"group_posts": {
"activity_log_data": [
{
"timestamp": 1707420000,
"data": [
{
"comment": {
"timestamp": 1707245000,
"comment": "xxx",
"author": "xxx",
"group": "xxx"
}
}
],
"title": "xxx"
},
{
"timestamp": 1707420000,
"data": [
{
"comment": {
"timestamp": 1707245000,
"comment": "xxx",
"author": "xxx",
"group": "xxx"
}
}
],
"title": "xxx"
}
]
}
}

View file

@ -0,0 +1,16 @@
{
"events_interactions": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1705040000,
"data": {
"value": "xxx"
}
}
]
}
]
}

View file

@ -0,0 +1,24 @@
{
"group_interactions": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"data": {
"name": "xxx",
"value": "xxx",
"uri": "url://somewhere"
}
},
{
"data": {
"name": "xxx",
"value": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1,24 @@
{
"people_interactions": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1705320000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1705040000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,12 @@
{
"page_likes": [
{
"name": "xxx",
"timestamp": 1714550000
},
{
"name": "xxx",
"timestamp": 1714550000
}
]
}

View file

@ -0,0 +1,28 @@
{
"reactions": [
{
"timestamp": 1705215000,
"data": [
{
"reaction": {
"reaction": "xxx",
"actor": "xxx"
}
}
],
"title": "xxx"
},
{
"timestamp": 1705215000,
"data": [
{
"reaction": {
"reaction": "xxx",
"actor": "xxx"
}
}
],
"title": "xxx"
}
]
}

View file

@ -0,0 +1,13 @@
{
"primary_location": {
"city_region_pairs": [
[
"xxx",
"xxx"
]
],
"zipcode": [
"xxx"
]
}
}

View file

@ -0,0 +1,7 @@
{
"primary_public_location": {
"city": "xxx",
"region": "xxx",
"country": "xxx"
}
}

View file

@ -0,0 +1,3 @@
{
"timezone": "some/path"
}

View file

@ -0,0 +1,36 @@
{
"items_selling": [
{
"title": "xxx",
"price": "xxx",
"seller": "xxx",
"created_timestamp": 1734500000,
"updated_timestamp": 1734500000,
"category": "xxx",
"marketplace": "xxx",
"location": {
"coordinate": {
"latitude": 69,
"longitude": 69
}
},
"description": "xxx"
},
{
"title": "xxx",
"price": "xxx",
"seller": "xxx",
"created_timestamp": 1734500000,
"updated_timestamp": 1734500000,
"category": "xxx",
"marketplace": "xxx",
"location": {
"coordinate": {
"latitude": 69,
"longitude": 69
}
},
"description": "xxx"
}
]
}

View file

@ -0,0 +1,30 @@
{
"participants": [
{
"name": "xxx"
},
{
"name": "xxx"
}
],
"messages": [
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
},
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
}
],
"title": "xxx",
"is_still_participant": true,
"thread_type": "xxx",
"thread_path": "some/path"
}

View file

@ -0,0 +1,30 @@
{
"participants": [
{
"name": "xxx"
},
{
"name": "xxx"
}
],
"messages": [
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
},
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
}
],
"title": "xxx",
"is_still_participant": true,
"thread_type": "xxx",
"thread_path": "some/path"
}

View file

@ -0,0 +1,23 @@
{
"participants": [
{
"name": "xxx"
},
{
"name": "xxx"
}
],
"messages": [
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
}
],
"title": "xxx",
"is_still_participant": true,
"thread_type": "xxx",
"thread_path": "some/path"
}

View file

@ -0,0 +1,23 @@
{
"participants": [
{
"name": "xxx"
},
{
"name": "xxx"
}
],
"messages": [
{
"sender_name": "xxx",
"timestamp_ms": 69,
"content": "xxx",
"type": "xxx",
"is_unsent": false
}
],
"title": "xxx",
"is_still_participant": true,
"thread_type": "xxx",
"thread_path": "some/path"
}

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,5 @@
{
"news_your_locations": [
"xxx"
]
}

View file

@ -0,0 +1,12 @@
{
"pokes": {
"data": [
{
"poker": "xxx",
"pokee": "xxx",
"rank": 69,
"timestamp": 1721675000
}
]
}
}

View file

@ -0,0 +1,26 @@
{
"support_correspondence": [
{
"timestamp": 1729235000,
"subject": "xxx",
"messages": [
{
"from": "not_a_real_email@example.com",
"to": "xxx",
"subject": "xxx",
"message": "xxx",
"timestamp": 1729060000,
"attachments": []
},
{
"from": "xxx",
"to": "xxx",
"subject": "xxx",
"message": "url://somewhere",
"timestamp": 1729060000,
"attachments": []
}
]
}
]
}

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,17 @@
{
"payments": {
"preferred_currency": "xxx",
"payments": [
{
"created_timestamp": 1714945000,
"amount": "xxx",
"currency": "xxx",
"sender": "xxx",
"receiver": "xxx",
"type": "xxx",
"status": "xxx",
"payment_method": "xxx"
}
]
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View file

@ -0,0 +1,52 @@
{
"name": "xxx",
"photos": [
{
"uri": "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png",
"creation_timestamp": 1709825000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1708000000
}
]
}
},
"title": "xxx"
},
{
"uri": "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png",
"creation_timestamp": 1719820000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1718825000
}
]
}
},
"title": "xxx"
}
],
"cover_photo": {
"uri": "photos_and_videos/CoverPhotos_yyyyyy/200x200png.png",
"creation_timestamp": 1719910000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"taken_timestamp": 1719005000
}
]
}
},
"title": "xxx"
},
"last_modified_timestamp": 1719910000,
"description": "xxx"
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

View file

@ -0,0 +1,23 @@
{
"videos": [
{
"uri": "photos_and_videos/videos/sample.mp4",
"creation_timestamp": 1723410000,
"media_metadata": {
"video_metadata": {
"exif_data": [
{
"upload_ip": "1.1.1.1",
"upload_timestamp": 69
}
]
}
},
"thumbnail": {
"uri": "photos_and_videos/videos/sample.mp4"
},
"title": "xxx",
"description": "xxx"
}
]
}

View file

@ -0,0 +1,24 @@
{
"pinned_posts": [
{
"name": "xxx",
"description": "xxx",
"entries": [
{
"timestamp": 1709010000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
},
{
"timestamp": 1715830000,
"data": {
"name": "xxx",
"uri": "url://somewhere"
}
}
]
}
]
}

View file

@ -0,0 +1,54 @@
[
{
"timestamp": 1714550000,
"data": [
{
"post": "xxx"
}
],
"title": "xxx"
},
{
"timestamp": 1730355000,
"attachments": [
{
"data": [
{
"media": {
"uri": "photos_and_videos/your_posts/200x200png.png",
"creation_timestamp": 1729825000,
"media_metadata": {
"photo_metadata": {
"exif_data": [
{
"iso": 69,
"focal_length": "some/path",
"upload_ip": "1.1.1.1",
"taken_timestamp": 1717585000,
"modified_timestamp": 1717585000,
"camera_make": "xxx",
"camera_model": "xxx",
"exposure": "some/path",
"f_stop": "some/path",
"orientation": 69,
"original_width": 69,
"original_height": 69
}
]
}
},
"title": "xxx",
"description": "xxx"
}
}
]
}
],
"data": [
{
"post": "xxx"
}
],
"title": "xxx"
}
]

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,153 @@
{
"profile": {
"name": {
"full_name": "xxx",
"first_name": "xxx",
"middle_name": "xxx",
"last_name": "xxx"
},
"emails": {
"emails": [
"not_a_real_email@example.com",
"not_a_real_email@example.com"
],
"previous_emails": [
"not_a_real_email@example.com"
],
"pending_emails": [],
"ad_account_emails": []
},
"birthday": {
"year": 69,
"month": 69,
"day": 69
},
"gender": {
"custom_genders": [
"xxx"
],
"pronoun": "xxx"
},
"previous_names": [],
"other_names": [],
"current_city": {
"name": "xxx",
"timestamp": 69
},
"hometown": {
"name": "xxx",
"timestamp": 69
},
"relationship": {
"status": "xxx",
"timestamp": 1708345000
},
"family_members": [
{
"name": "xxx",
"relation": "xxx",
"timestamp": 1727220000
},
{
"name": "xxx",
"relation": "xxx",
"timestamp": 1715325000
}
],
"education_experiences": [
{
"name": "xxx",
"start_timestamp": 1735475000,
"end_timestamp": 1735475000,
"graduated": false,
"concentrations": [],
"school_type": "xxx",
"timestamp": 1704850000
},
{
"name": "xxx",
"graduated": true,
"concentrations": [],
"school_type": "xxx",
"timestamp": 1709385000
}
],
"work_experiences": [
{
"employer": "xxx",
"title": "xxx",
"description": "xxx",
"start_timestamp": 1735510000,
"end_timestamp": 1728205000,
"timestamp": 1728205000
},
{
"employer": "xxx",
"title": "xxx",
"description": "xxx",
"start_timestamp": 1735465000,
"end_timestamp": 1735445000,
"timestamp": 1728205000
}
],
"languages": [
{
"name": "xxx",
"timestamp": 69
}
],
"interested_in": [
"xxx",
"xxx"
],
"blood_info": {
"blood_donor_status": "xxx"
},
"websites": [
{
"address": "url://somewhere"
}
],
"phone_numbers": [],
"pages": [
{
"name": "xxx",
"pages": [
"xxx",
"xxx"
],
"timestamps": [
69,
69
]
},
{
"name": "xxx",
"pages": [
"xxx",
"xxx"
],
"timestamps": [
69,
69
]
}
],
"groups": [
{
"name": "xxx",
"timestamp": 1705215000
},
{
"name": "xxx",
"timestamp": 1704480000
}
],
"registration_timestamp": 1718825000,
"profile_uri": "url://somewhere",
"intro_bio": {
"name": "xxx",
"timestamp": 1717060000
}
}
}

View file

@ -0,0 +1,73 @@
{
"profile_updates": [
{
"timestamp": 1728205000,
"attachments": [
{
"data": [
{
"life_event": {
"title": "xxx",
"start_date": {
"year": 69,
"month": 69,
"day": 69
},
"end_date": {
"year": 69,
"month": 69,
"day": 69
},
"place": {
"name": "xxx",
"coordinate": {
"latitude": 69,
"longitude": 69
},
"address": "xxx"
}
}
}
]
}
],
"data": [
{
"backdated_timestamp": 1728025000
}
]
},
{
"timestamp": 1728205000,
"attachments": [
{
"data": [
{
"life_event": {
"title": "xxx",
"start_date": {
"year": 69,
"month": 69,
"day": 69
},
"place": {
"name": "xxx",
"coordinate": {
"latitude": 69,
"longitude": 69
},
"address": "xxx"
}
}
}
]
}
],
"data": [
{
"backdated_timestamp": 1735335000
}
]
}
]
}

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1 @@
You have no data in this section

View file

@ -0,0 +1,40 @@
{
"searches": [
{
"timestamp": 1731825000,
"attachments": [
{
"data": [
{
"text": "xxx"
}
]
}
],
"data": [
{
"text": "xxx"
}
],
"title": "xxx"
},
{
"timestamp": 1731825000,
"attachments": [
{
"data": [
{
"text": "xxx"
}
]
}
],
"data": [
{
"text": "xxx"
}
],
"title": "xxx"
}
]
}

View file

@ -0,0 +1,26 @@
{
"account_activity": [
{
"action": "xxx",
"timestamp": 1714550000,
"ip_address": "1.1.1.1",
"user_agent": "some/path",
"datr_cookie": "xxx",
"city": "xxx",
"region": "xxx",
"country": "xxx",
"site_name": "xxx"
},
{
"action": "xxx",
"timestamp": 1714550000,
"ip_address": "1.1.1.1",
"user_agent": "some/path",
"datr_cookie": "xxx",
"city": "xxx",
"region": "xxx",
"country": "xxx",
"site_name": "xxx"
}
]
}

View file

@ -0,0 +1,12 @@
{
"account_status_changes": [
{
"status": "xxx",
"timestamp": 1714550000
},
{
"status": "xxx",
"timestamp": 1707835000
}
]
}

View file

@ -0,0 +1,22 @@
{
"admin_records": [
{
"event": "xxx",
"session": {
"created_timestamp": 1714550000,
"ip_address": "1.1.1.1",
"user_agent": "some/path",
"datr_cookie": "xxx"
}
},
{
"event": "xxx",
"session": {
"created_timestamp": 1707835000,
"ip_address": "1.1.1.1",
"user_agent": "some/path",
"datr_cookie": "xxx"
}
}
]
}

View file

@ -0,0 +1,12 @@
{
"recognized_devices": [
{
"name": "xxx",
"created_timestamp": 1724290000,
"updated_timestamp": 1715440000,
"ip_address": "1.1.1.1",
"user_agent": "some/path",
"datr_cookie": "xxx"
}
]
}

View file

@ -0,0 +1,14 @@
{
"contact_verifications": [
{
"contact": "not_a_real_email@example.com",
"contact_type": 69,
"verification_time": 1729235000
},
{
"contact": "not_a_real_email@example.com",
"contact_type": 69,
"verification_time": 1705875000
}
]
}

View file

@ -0,0 +1,7 @@
{
"datr_stats": {
"xxxxxx": [
1704880000
]
}
}

View file

@ -0,0 +1,19 @@
{
"login_protection_data": [
{
"name": "xxx",
"session": {
"created_timestamp": 1707325000,
"updated_timestamp": 1714550000,
"ip_address": "1.1.1.1"
}
},
{
"name": "xxx",
"session": {
"created_timestamp": 1727405000,
"updated_timestamp": 1714550000
}
}
]
}

Some files were not shown because too many files have changed in this diff Show more