base-data-manager/data-export/facebook.ts

942 lines
32 KiB
TypeScript

import { pipe, branch, each, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
/**Parses about_you/notifications.json in the old format
* or logged_information/notifications.json in the new format*/
function facebook_notifications_generic(prop: string): PipelineOp {
return pipe(
cmd(["jq", "-r", `["timestamp","unread","href","text"],
(
.${prop}[]
| [(.timestamp | todateiso8601), .unread, .href, .text]
)
| @csv`]),
assignMeta({
columnMeta: ["isodatetime", "any", "url", "text"],
perRowDescription: 'Notification at {0}: "{3}"',
perRowTags: "facebook,initiated_by_third_party",
})
);
}
function facebook_notifications_v1(): PipelineOp {
return facebook_notifications_generic("notifications");
}
function facebook_notifications_v2(): PipelineOp {
return facebook_notifications_generic("notifications_v2");
}
/**Installed apps*/
function facebook_installed_apps_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["name","added_timestamp"],
(
.${prop}[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'App "{0}" added on {1}',
perRowTags: "facebook",
})
);
}
function facebook_installed_apps_v1() {
return facebook_installed_apps_generic("installed_apps");
}
function facebook_installed_apps_v2() {
// TODO: There's a few more properties in here for v2
return facebook_installed_apps_generic("installed_apps_v2");
}
function facebook_messages_generic() {
return branchGen(function*(){
// This most assuredly does not handle certain things like pictures and such
// There are messages .type and then they have other thing in them?
// Conversation-level information aggregated into a single place
// TODO: This will result in MULTIPLE rows for a single thread if there is multiple .jsons for a single
// chat in one directory. Ughhhhhhhhhhhhhhh. For now this is just a limiation
yield pipe(
each(t =>
t.clone().cmd(["jq", "-r", `
["${t.id}", .title, .is_still_participant, .thread_type, .thread_path, (.participants | map(.name) | join(", "))]
| @csv
`])
),
assignMeta({
aggregate: true,
aggregateColumns: ["id", "title", "is_still_participant", "thread_type", "thread_path", "participants"],
idValue: "Facebook - Messages Meta",
})
);
// The conversation itself
yield pipe(
cmd(["jq", "-r", `
["from","to","timestamp","content"],
(
.messages[]
| [.sender_name, "<other>", ((.timestamp_ms / 1000) | round | todateiso8601), .content]
)
| @csv
`]),
assignMeta({
metaIdValue: "Facebook - Messages Meta",
columnMeta: ["sender", "receiver", "isodatetime", "text"],
perRowDescription: '"{3}" from {0} at {2}',
perRowTags: "facebook,message",
})
);
});
}
/**Comments*/
function facebook_comments_generic(prop: string) {
// TODO: .data is an array that has items, but usually just one
// "data": [
// {
// "comment": {
// "timestamp": 1612923641,
// "comment": "xxx",
// "author": "xxx xxx",
// "group": "xxx"
// }
// }
// ],
// TODO: there's also attachments (media)
return pipe(
cmd(["jq", "-r", `
["timestamp","data", "title"],
(
.${prop}[]?
| [(.timestamp | todateiso8601), "TODO", .title]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "TODO", "text"],
perRowDescription: 'Comment on "{2}" at {0}',
perRowTags: "facebook",
})
);
}
function facebook_comments_v1() {
return facebook_comments_generic("comments");
}
function facebook_comments_v2() {
// TODO: I don't see any difference between v1 and v2? Perhaps it's in the data?
return facebook_comments_generic("comments_v2");
}
function facebook_friends_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["name", "timestamp"],
(
.${prop}[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: '{0} at {1}',
perRowTags: "facebook",
})
);
}
function facebook_people_interactions_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["name", "uri", "timestamp"],
(
.${prop}[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "url", "isodatetime"],
perRowDescription: 'Interaction with {0} at {2}',
perRowTags: "facebook",
})
);
}
function facebook_people_interactions_v1() {
return facebook_people_interactions_generic("people_interactions");
}
function facebook_people_interactions_v2() {
return facebook_people_interactions_generic("people_interactions_v2");
}
function facebook_marketplace_items_sold_generic(prop: string) {
// TODO: Updated_timestamp may not exist so it's removed for now
return pipe(
cmd(["jq", "-r", `
["title", "price", "seller", "created_timestamp", "latitude", "longitude", "description"],
(
.${prop}[]
| [.title, .price, .seller, (.created_timestamp | todateiso8601), .location.coordinate.latitude, .location.coordinate.longitude, .description]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "numeric", "sender", "isodatetime", "lat", "lng", "text"],
perRowDescription: 'Sold "{0}" for {1} on {3}',
perRowTags: "facebook,marketplace",
})
);
}
function facebook_marketplace_items_sold_v1() {
return facebook_marketplace_items_sold_generic("items_selling");
}
function facebook_marketplace_items_sold_v2() {
return facebook_marketplace_items_sold_generic("items_selling_v2");
}
function facebook_searches_generic(prop: string) {
// TODO: Data and attachments, both only contain one "text" field inside the
// first object of the array... Same data, do they ever differ?
return pipe(
cmd(["jq", "-r", `
["title","data","timestamp"],
(
.${prop}[]
| [.title, .data[0].text, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text", "isodatetime"],
perRowDescription: 'Searched for "{1}" at {2}',
perRowTags: "facebook,initiated_by_me,content_by_me",
})
);
}
function facebook_searches_v1() {
return facebook_searches_generic("searches");
}
function facebook_searches_v2() {
return facebook_searches_generic("searches_v2");
}
function facebook_account_activity_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["action", "ip", "user_agent", "datr_cookie", "city", "region", "country", "site_name","timestamp"],
(
.${prop}[]
| [.action, .ip_address, .user_agent, .datr_cookie, .city, .region, .country, .site_name, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text", "text", "text", "text", "text", "text", "text", "isodatetime"],
perRowDescription: '{0} from {4}, {6} on {8}',
perRowTags: "facebook,security",
})
);
}
function facebook_account_activity_v1() {
return facebook_account_activity_generic("account_activity");
}
function facebook_account_activity_v2() {
return facebook_account_activity_generic("account_activity_v2");
}
function facebook_admin_records_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["event","created_timestamp","ip_address","user_agent","datr_cookie"],
(
.${prop}[]
| [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime", "text", "text", "text"],
perRowDescription: '{0} at {1} from {2}',
perRowTags: "facebook,security",
})
);
}
function facebook_admin_records_v1() {
return facebook_admin_records_generic("admin_records");
}
function facebook_admin_records_v2() {
return facebook_admin_records_generic("admin_records_v2");
}
function facebook_authorized_logins_generic(prop: string) {
// I don't think .location, .app, .session_type are in v1? So I've made them nullable, but I only have
// 1 v1 entry to actually compare against...
return pipe(
cmd(["jq", "-r", `
["name","created_timestamp","updated_timestamp","ip_address","user_agent","location","app", "session_type", "datr_cookie"],
(
.${prop}[]
| [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address, .user_agent, .location // "", .app // "", .session_type // "", .datr_cookie]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime", "isodatetime", "text", "text", "text", "text", "text", "text"],
perRowDescription: 'Session "{0}" from {5} on {1}',
perRowTags: "facebook,security",
})
);
}
function facebook_authorized_logins_v1() {
return facebook_authorized_logins_generic("recognized_devices");
}
function facebook_authorized_logins_v2() {
return facebook_authorized_logins_generic("active_sessions_v2");
}
function facebook_contact_verification_generic(prop: string) {
return pipe(
cmd(["jq", "-r", `
["timestamp", "email", "contact_type"],
(
.${prop}[]
| [(.verification_time | todateiso8601), .contact, .contact_type]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text"],
perRowDescription: '{2} verification of {1} at {0}',
perRowTags: "facebook,security",
})
);
}
function facebook_contact_verification_v1() {
return facebook_contact_verification_generic("contact_verifications");
}
function facebook_contact_verification_v2() {
return facebook_contact_verification_generic("contact_verifications_v2");
}
function facebook_account_accesses_generic(prop: string) {
// TODO: there's a updated_timestamp doesn't always exist
return pipe(
cmd(["jq", "-r", `
["action", "timestamp", "site", "ip_address"],
(
.${prop}[]
| [.action, (.timestamp | todateiso8601), .site, .ip_address]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime", "text", "text"],
perRowDescription: '{0} on {2} at {1} from {3}',
perRowTags: "facebook,security",
})
);
}
function facebook_account_accesses_v1() {
return facebook_account_accesses_generic("account_accesses");
}
function facebook_account_accesses_v2() {
return facebook_account_accesses_generic("account_accesses_v2");
}
function facebook_pages_unfollowed_generic(prop: string) {
// TODO: This is missing the .data field, but it only looks like the "name" on the only record I have
return pipe(
cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Unfollowed "{0}" at {1}',
perRowTags: "facebook,initiated_by_me",
})
);
}
function facebook_pages_unfollowed_v1() {
return facebook_pages_unfollowed_generic("pages_unfollowed");
}
function facebook_pages_unfollowed_v2() {
return facebook_pages_unfollowed_generic("pages_unfollowed_v2");
}
function facebook_groups_joined_generic(prop: string) {
// this has a data property but it is redundant, ONLY IN v2
return pipe(
cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Joined group "{0}" at {1}',
perRowTags: "facebook,initiated_by_me",
})
);
}
function facebook_groups_joined_v1() {
return facebook_groups_joined_generic("groups_joined");
}
function facebook_groups_joined_v2() {
return facebook_groups_joined_generic("groups_joined_v2");
}
function facebook_group_posts_v1() {
// TODO: Attachments metadata, maybe another timestamp in the data field too (but it looks like the same everywhere)
return pipe(
cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts.activity_log_data[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "TODO", "isodatetime"],
perRowDescription: 'Group post "{0}" at {2}',
perRowTags: "facebook",
})
);
}
function facebook_group_posts_v2() {
// TODO: Still a data and attachments to pull out
return pipe(
cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts_v2[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "TODO", "isodatetime"],
perRowDescription: 'Group post "{0}" at {2}',
perRowTags: "facebook",
})
);
}
export function facebook_v2() {
return pipe(
// Generic ID for everything in here
assignMeta({ idValue: t=>`Facebookv2 - ${t.basename}` }),
branchGen(function*() {
// No correlary to accounts_and_profiles.json
// No correlary for your_off-facebook_activity.json
yield pipe(cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`), read(), facebook_installed_apps_v2());
yield pipe(cd(`your_facebook_activity/comments_and_reactions/comments.json`),read(),facebook_comments_v2());
yield pipe(
glob(`your_facebook_activity/messages/*/**/*.json`), // Messages files are in the FOLDERS inside messages (archived_threads, e2ee_cutover, etc...)
assignMeta({ idValue: t=>`Facebookv2 - Messages ${t.basenameN(2)}` }), // 1, 2, etc is not specific enough, include the convo name
read(),
facebook_messages_generic()
);
yield pipe(
cd(`your_facebook_activity/other_activity/time_spent_on_facebook.json`),
read(),
cmd(["jq", "-r", `
["start","end"],
(
.label_values[]
| select(.label == "Intervals")
| .vec[]
| [
(.dict[0].timestamp_value | todateiso8601),
(.dict[1].timestamp_value | todateiso8601)
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "isodatetime"],
perRowDescription: 'Active from {0} to {1}',
perRowTags: "facebook",
})
);
yield pipe(cd(`your_facebook_activity/groups/your_group_membership_activity.json`), read(), facebook_groups_joined_v2());
yield pipe(cd(`your_facebook_activity/groups/group_posts_and_comments.json`), read(), facebook_group_posts_v2());
yield pipe(cd(`your_facebook_activity/pages/pages_and_profiles_you've_unfollowed.json`), read(), facebook_pages_unfollowed_v2());
yield pipe(cd(`connections/friends/your_friends.json`), read(), facebook_friends_generic("friends_v2"));
yield pipe(cd(`connections/friends/rejected_friend_requests.json`), read(), facebook_friends_generic("rejected_requests_v2"));
yield pipe(cd(`connections/friends/received_friend_requests.json`), read(), facebook_friends_generic("received_requests_v2"));
yield pipe(cd(`logged_information/activity_messages/people_and_friends.json`), read(), facebook_people_interactions_v2());
yield pipe(cd(`logged_information/search/your_search_history.json`), read(), facebook_searches_v2());
yield pipe(cd(`logged_information/notifications/notifications.json`), read(), facebook_notifications_v2());
yield pipe(cd(`security_and_login_information/account_activity.json`), read(), facebook_account_activity_v2());
yield pipe(cd(`security_and_login_information/record_details.json`), read(), facebook_admin_records_v2());
yield pipe(cd(`security_and_login_information/where_you're_logged_in.json`), read(), facebook_authorized_logins_v2());
yield pipe(cd(`security_and_login_information/email_address_verifications.json`), read(), facebook_contact_verification_v2());
yield pipe(cd(`security_and_login_information/logins_and_logouts.json`), read(), facebook_account_accesses_v2());
yield pipe(cd(`your_facebook_activity/facebook_marketplace/items_sold.json`), read(), facebook_marketplace_items_sold_v2());
})
);
}
export function facebook(){
return pipe(
// Generic ID for everything in here
assignMeta({ idValue: t=>`Facebook - ${t.basename}` }),
branchGen(function*() {
yield pipe(cd(`about_you/notifications.json`), read(), facebook_notifications_v1());
//TODO: .fork().skip('face_recognition.json').reason("Not a table, no idea how to use")
//TODO: .fork().skip('friend_peer_group.json').reason("Not a table, very small file")
//TODO:.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future")
//TODO: .fork().todo('preferences.json').reason("Too complex for now")
//TODO:.fork().todo('visited.json').reason("Too complex for now")
//TODO:.fork().todo('viewed.json').reason("Too complex for now")
yield pipe(
cd(`accounts_center/accounts_and_profiles.json`),
read(),
cmd(["jq", "-r", `["service_name","native_app_id","username","email", "phone_number", "name"],
(
.linked_accounts[]
| [.service_name, .native_app_id, .username, .email, .phone_number, .name]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text", "text", "text", "text", "text"],
perRowDescription: '{0} account "{2}"',
perRowTags: "facebook",
})
);
yield pipe(
cd(`ads_and_businesses/your_off-facebook_activity.json`),
read(),
cmd(["jq", "-r", `
["name","id","type","timestamp"],
(
.off_facebook_activity[]
| .name as $name
| .events[]
| [$name, .id, .type, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "any", "text", "isodatetime"],
perRowDescription: '{2} event from {0} at {3}',
perRowTags: "facebook",
})
);
//TODO: .fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json')
yield pipe(cd(`apps_and_websites/apps_and_websites.json`), read(), facebook_installed_apps_v1());
// `${facebookRoot}/archive` - no data in my export
// `${facebookRoot}/campus` - no data in my export
yield pipe(cd(`comments/comments.json`), read(), facebook_comments_v1());
yield pipe(
glob(`dating/messages/*.json`), // Files are 0.json, 1.json, etc
assignMeta({ idValue: t=>`Facebook - Dating Messages ${t.basename}` }), // Slightly more specific message
read(),
cmd(["jq", "-r", `
["from","to","timestamp","body"],
(
.recipient as $to
| (
.messages[]
| ["Me", $to, (.timestamp | todateiso8601), .body]
)
)
| @csv
`]),
assignMeta({
columnMeta: ["sender", "receiver", "isodatetime", "text"],
perRowDescription: '"{3}" from {0} to {1} at {2}',
perRowTags: "facebook,message,dating,content_by_me",
})
);
//todo: your_dating_activity.json, but it only has a few lines and not super useful
//todo: the other dating files are also just, small
// TODO: events
// rcd(`events`);
// localCollect('event_invitations.json', json, sspawn('jq', [`
// .events_invited[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
// localCollect('your_event_responses.json', json, sspawn('jq', [`
// .event_responses.events_joined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_declined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_interested[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
yield pipe(
cd(`facebook_gaming/instant_games.json`),
read(),
cmd(["jq", "-r", `
["game", "added_timestamp"],
(
.instant_games_played[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Played "{0}" starting {1}',
perRowTags: "facebook,gaming",
})
);
yield pipe(cd(`following_and_followers/unfollowed_pages.json`), read(), facebook_pages_unfollowed_v1());
yield pipe(
cd(`following_and_followers/following.json`),
read(),
cmd(["jq", "-r", `
["name", "timestamp"],
(
.following[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["receiver", "isodatetime"],
perRowDescription: 'Followed "{0}" at {1}',
perRowTags: "facebook",
})
);
yield pipe(
cd(`following_and_followers/followers.json`),
read(),
cmd(["jq", "-r", `
["name"],
(
.followers[]
| [.name]
)
| @csv
`]),
assignMeta({
columnMeta: ["sender"],
perRowDescription: '{0} follows you',
perRowTags: "facebook",
})
);
yield pipe(cd(`friends/sent_friend_requests.json`), read(), facebook_friends_generic("sent_requests"));
yield pipe(cd(`friends/removed_friends.json`), read(), facebook_friends_generic("deleted_friends"));
yield pipe(cd(`friends/rejected_friend_requests.json`), read(), facebook_friends_generic("rejected_requests"));
yield pipe(cd(`friends/received_friend_requests.json`), read(), facebook_friends_generic("received_requests"));
yield pipe(cd(`friends/friends.json`), read(), facebook_friends_generic("friends"));
yield pipe(cd(`groups/your_group_membership_activity.json`), read(), facebook_groups_joined_v1());
yield pipe(cd(`groups/your_posts_and_comments_in_groups.json`), read(), facebook_group_posts_v1());
// there's also groups.json and events.json but neither has timestamp so they're
// not really useful right now
yield pipe(cd(`interactions/people.json`), read(), facebook_people_interactions_v1());
// `${facebookRoot}/journalist_registration` - no data in my export
yield pipe(
cd(`likes_and_reactions/pages.json`),
read(),
cmd(["jq", "-r", `
["name", "timestamp"],
(
.page_likes[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Liked page "{0}" at {1}',
perRowTags: "facebook",
})
);
yield pipe(
cd(`likes_and_reactions/posts_and_comments.json`),
read(),
cmd(["jq", "-r", `
["title", "timestamp", "reaction"],
(
.reactions[]
| [.name, (.timestamp | todateiso8601), .data[0].reaction.reaction]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime", "text"],
perRowDescription: '{2} on "{0}" at {1}',
perRowTags: "facebook",
})
);
// TODO:
// rcd(`location`);
// localCollect('primary_location.json', json);
// localCollect('primary_public_location.json', json);
// localCollect('timezone.json', json);
yield pipe(cd(`marketplace/items_sold.json`), read(), facebook_marketplace_items_sold_v1());
yield pipe(
glob(`messages/**/*.json`), // Files are message_1.json, etc
assignMeta({ idValue: t=>`Facebook - Messages ${t.basenameN(2)}` }), // 1, 2, etc is not specific enough, include the convo name
read(),
facebook_messages_generic()
);
// `${facebookRoot}/music_recommendations` - no data
// rcd(`news`);
// localCollect('your_locations.json', json);
yield pipe(
cd(`other_activity/pokes.json`),
read(),
cmd(["jq", "-r", `
["from", "to","rank","timestamp"],
(
.pokes.data[]
| [.poker, .pokee, .rank, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["sender", "receiver", "numeric", "isodatetime"],
perRowDescription: '{0} poked {1} at {3}',
perRowTags: "facebook",
})
);
yield pipe(
cd(`other_activity/support_correspondences.json`),
read(),
// TODO: I'm seeing blanks in .from and .to when the replier was Facebook
// themselves. Perhaps it's broken?
// TODO: Attachments
cmd(["jq", "-r", `
["from", "to", "subject", "message", "timestamp"],
(
.support_correspondence[].messages[]
| [.from, .to, .subject, .message, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["sender", "receiver", "text", "text", "isodatetime"],
perRowDescription: '"{2}" from {0} to {1} at {4}',
perRowTags: "facebook",
})
);
// `${facebookRoot}/pages` - no data
yield pipe(
cd(`payment_history/payment_history.json`),
read(),
cmd(["jq", "-r", `
["from", "to","amount","currency", "type","status","payment_method", "created_timestamp"],
(
.payments.payments[]
| [.sender, .receiver, .amount, .currency, .type, .status, .payment_method, (.created_timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["sender", "receiver", "numeric", "text", "text", "text", "text", "isodatetime"],
perRowDescription: '{2} {3} from {0} to {1} on {7}',
perRowTags: "facebook,payment",
})
);
// TODO: There's also photos_and_videos/your_videos.json
// TODO: There's a media_metadata in each of the images too to convert as well as external files
yield pipe(
glob(`photos_and_videos/album/*.json`),
// Could use a better name, currently 0.json, 1.json, etc...
assignMeta({ idValue: t=>`Facebook - Album ${t.basename}` }), //slightly more speciifc name, it woudl be better if we could use the album name
read(),
cmd(["jq", "-r", `
["album","uri","creation_timestamp"],
(
.photos[]
| [.title, .uri, (.creation_timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "url", "isodatetime"],
perRowDescription: 'Photo in "{0}" at {2}',
perRowTags: "facebook,photo",
})
);
yield pipe(cd(`posts/your_pinned_posts.json`),
read(),
cmd(["jq", "-r", `
["name","uri","timestamp"],
(
.pinned_posts[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "url", "isodatetime"],
perRowDescription: 'Pinned post "{0}" at {2}',
perRowTags: "facebook",
})
);
// TODO: Glob? I never posted a lot on FB
yield pipe(
cd(`posts/your_posts_1.json`),
read(),
// TODO: Data is an array with objects. .post, .updated_timestamp, separately??
// TODO: Also attachments
cmd(["jq", "-r", `
["title","data","timestamp"],
(
.[]
| [.title, "TODO: data", (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "TODO", "isodatetime"],
perRowDescription: 'Post "{0}" at {2}',
perRowTags: "facebook",
})
);
// `${facebookRoot}/privacy_checkup` - no data
// TODO: Shape is non-tabular, but maybe we should handle it?
// Looks mostly like dupes from other places
// './profile_information.json': undefined,
// The minimum amount of data is just .title and .timestamp
// TODO: HAndle data and attachments
yield pipe(
cd(`profile_information/profile_update_history.json`),
read(),
cmd(["jq", "-r", `
["title","timestamp"],
(
.profile_updates[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Profile update "{0}" at {1}',
perRowTags: "facebook",
})
);
// `${facebookRoot}/rewards` - no data
// `${facebookRoot}/saved_items_and_collections` - no data
yield pipe(cd(`search_history/your_search_history.json`), read(), facebook_searches_v1());
yield pipe(
cd(`security_and_login_information/account_status_changes.json`),
read(),
cmd(["jq", "-r", `
["status","timestamp"],
(
.account_status_changes[]
| [.status, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Account {0} at {1}',
perRowTags: "facebook,security",
})
);
yield pipe(cd(`security_and_login_information/account_activity.json`), read(), facebook_account_activity_v1());
yield pipe(cd(`security_and_login_information/administrative_records.json`), read(), facebook_admin_records_v1());
yield pipe(cd(`security_and_login_information/authorized_logins.json`), read(), facebook_authorized_logins_v1());
yield pipe(cd(`security_and_login_information/contact_verifications.json`), read(), facebook_contact_verification_v1());
yield pipe(cd(`security_and_login_information/logins_and_logouts.json`), read(), facebook_account_accesses_v1());
// TODO: datr_cookie_info, looks like a bunch of timestamps
// a.fork().cd(`login_protection_data.json`)
// .read()
// // TODO: updated_timestamp doesn't always exist
// .cmd(["jq", "-r", `
// ["name", "created_timestamp", "updated_timestamp", "ip_address"],
// (
// .login_protection_data[]
// | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address]
// )
// | @csv
// `])
// TODO: mobile_devices, only a couple entries
// TODO: used_ip_addresses
// TODO: where_you've logged in
// TODO: your_facebook_activity, useless and small
// `${facebookRoot}/short_videos` - no data in my export
// `${facebookRoot}/saved_items_and_collections` - no data in my export
yield pipe(
cd(`stories/story_reactions.json`),
read(),
cmd(["jq", "-r", `
["title", "timestamp"],
(
.stories_feedback[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime"],
perRowDescription: 'Story reaction on "{0}" at {1}',
perRowTags: "facebook",
})
);
// `${facebookRoot}/trash` - no data in my export
// `${facebookRoot}/voice_recording_and_transcription` - no data in my export
// `${facebookRoot}/volunteering` - no data in my export
// `${facebookRoot}/voting_location_and_reminders` - only small 1-property things
// `${facebookRoot}/your_places` - no data in my export
// `${facebookRoot}/your_topics` - no data in my export
})
);
};