base-data-manager/data-export/facebook.ts

787 lines
30 KiB
TypeScript

import { TaskTargetPipelineHelper } from "./task.ts";
declare module "../data-export/task.ts" {
interface TaskTargetPipelineHelper {
facebook: typeof facebook;
facebook_v2: typeof facebook_v2;
facebook_notifications_generic: typeof facebook_notifications_generic;
facebook_notifications_v1: typeof facebook_notifications_v1;
facebook_notifications_v2: typeof facebook_notifications_v2;
facebook_installed_apps_generic: typeof facebook_installed_apps_generic;
facebook_installed_apps_v1: typeof facebook_installed_apps_v1;
facebook_installed_apps_v2: typeof facebook_installed_apps_v2;
facebook_comments_generic: typeof facebook_comments_generic;
facebook_comments_v1: typeof facebook_comments_v1;
facebook_comments_v2: typeof facebook_comments_v2;
facebook_people_interactions_generic: typeof facebook_people_interactions_generic;
facebook_people_interactions_v1: typeof facebook_people_interactions_v1;
facebook_people_interactions_v2: typeof facebook_people_interactions_v2;
facebook_marketplace_items_sold_generic: typeof facebook_marketplace_items_sold_generic;
facebook_marketplace_items_sold_v1: typeof facebook_marketplace_items_sold_v1;
facebook_marketplace_items_sold_v2: typeof facebook_marketplace_items_sold_v2;
facebook_searches_generic: typeof facebook_searches_generic;
facebook_searches_v1: typeof facebook_searches_v1;
facebook_searches_v2: typeof facebook_searches_v2;
facebook_account_activity_generic: typeof facebook_account_activity_generic;
facebook_account_activity_v1: typeof facebook_account_activity_v1;
facebook_account_activity_v2: typeof facebook_account_activity_v2;
facebook_messages_generic: typeof facebook_messages_generic;
facebook_friends_generic: typeof facebook_friends_generic;
facebook_admin_records_generic: typeof facebook_admin_records_generic;
facebook_admin_records_v1: typeof facebook_admin_records_v1;
facebook_admin_records_v2: typeof facebook_admin_records_v2;
facebook_authorized_logins_generic: typeof facebook_authorized_logins_generic;
facebook_authorized_logins_v1: typeof facebook_authorized_logins_v1;
facebook_authorized_logins_v2: typeof facebook_authorized_logins_v2;
facebook_contact_verification_generic: typeof facebook_contact_verification_generic;
facebook_contact_verification_v1: typeof facebook_contact_verification_v1;
facebook_contact_verification_v2: typeof facebook_contact_verification_v2;
facebook_pages_unfollowed_generic: typeof facebook_pages_unfollowed_generic;
facebook_pages_unfollowed_v1: typeof facebook_pages_unfollowed_v1;
facebook_pages_unfollowed_v2: typeof facebook_pages_unfollowed_v2;
facebook_account_accesses_generic: typeof facebook_account_accesses_generic;
facebook_account_accesses_v1: typeof facebook_account_accesses_v1;
facebook_account_accesses_v2: typeof facebook_account_accesses_v2;
facebook_groups_joined_generic: typeof facebook_groups_joined_generic;
facebook_groups_joined_v1: typeof facebook_groups_joined_v1;
facebook_groups_joined_v2: typeof facebook_groups_joined_v2;
facebook_group_posts_v1: typeof facebook_group_posts_v1;
facebook_group_posts_v2: typeof facebook_group_posts_v2;
}
}
Object.assign(TaskTargetPipelineHelper.prototype, {
facebook,
facebook_v2,
facebook_notifications_generic,
facebook_notifications_v1,
facebook_notifications_v2,
facebook_installed_apps_generic,
facebook_installed_apps_v1,
facebook_installed_apps_v2,
facebook_comments_generic,
facebook_comments_v1,
facebook_comments_v2,
facebook_people_interactions_generic,
facebook_people_interactions_v1,
facebook_people_interactions_v2,
facebook_marketplace_items_sold_generic,
facebook_marketplace_items_sold_v1,
facebook_marketplace_items_sold_v2,
facebook_searches_generic,
facebook_searches_v1,
facebook_searches_v2,
facebook_account_activity_generic,
facebook_account_activity_v1,
facebook_account_activity_v2,
facebook_admin_records_generic,
facebook_admin_records_v1,
facebook_admin_records_v2,
facebook_authorized_logins_generic,
facebook_authorized_logins_v1,
facebook_authorized_logins_v2,
facebook_contact_verification_generic,
facebook_contact_verification_v1,
facebook_contact_verification_v2,
facebook_account_accesses_generic,
facebook_account_accesses_v1,
facebook_account_accesses_v2,
facebook_pages_unfollowed_generic,
facebook_pages_unfollowed_v1,
facebook_pages_unfollowed_v2,
facebook_groups_joined_generic,
facebook_groups_joined_v1,
facebook_groups_joined_v2,
facebook_messages_generic,
facebook_friends_generic,
facebook_group_posts_v1,
facebook_group_posts_v2,
});
/**Parses about_you/notifications.json in the old format
* or logged_information/notifications.json in the new format*/
function facebook_notifications_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `["timestamp","unread","href","text"],
(
.${prop}[]
| [(.timestamp | todateiso8601), .unread, .href, .text]
)
| @csv`])
.types(["time", "text", "text", "text"]);
}
function facebook_notifications_v1(this: TaskTargetPipelineHelper) {
return this.facebook_notifications_generic("notifications");
}
function facebook_notifications_v2(this: TaskTargetPipelineHelper) {
return this.facebook_notifications_generic("notifications_v2");
}
/**Installed apps*/
function facebook_installed_apps_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name","added_timestamp"],
(
.${prop}[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`])
.types(["text", "time"]);
}
function facebook_installed_apps_v1(this: TaskTargetPipelineHelper) {
return this.facebook_installed_apps_generic("installed_apps");
}
function facebook_installed_apps_v2(this: TaskTargetPipelineHelper) {
// TODO: There's a few more properties in here for v2
return this.facebook_installed_apps_generic("installed_apps_v2");
}
function facebook_messages_generic(this: TaskTargetPipelineHelper) {
// This most assuredly does not handle certain things like pictures and such
// There are messages .type and then they have other thing in them?
// there's also is_unsent: false
return this.cmd(["jq", "-r", `
["from","to","timestamp","content"],
(
.messages[]
| [.sender_name, "<other>", ((.timestamp_ms / 1000) | round | todateiso8601), .content]
)
| @csv
`])
}
/**Comments*/
function facebook_comments_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: .data is an array that has items, but usually just one
// "data": [
// {
// "comment": {
// "timestamp": 1612923641,
// "comment": "xxx",
// "author": "xxx xxx",
// "group": "xxx"
// }
// }
// ],
// TODO: there's also attachments (media)
return this.cmd(["jq", "-r", `
["timestamp","data", "title"],
(
.${prop}[]?
| [(.timestamp | todateiso8601), "TODO", .title]
)
| @csv
`])
.types(["time", "text", "text"])
}
function facebook_comments_v1(this: TaskTargetPipelineHelper) {
return this.facebook_comments_generic("comments");
}
function facebook_comments_v2(this: TaskTargetPipelineHelper) {
// TODO: I don't see any difference between v1 and v2? Perhaps it's in the data?
return this.facebook_comments_generic("comments_v2");
}
function facebook_friends_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name", "timestamp"],
(
.${prop}[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`]);
}
function facebook_people_interactions_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["name", "uri", "timestamp"],
(
.${prop}[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_people_interactions_v1(this: TaskTargetPipelineHelper) {
return this.facebook_people_interactions_generic("people_interactions");
}
function facebook_people_interactions_v2(this: TaskTargetPipelineHelper) {
return this.facebook_people_interactions_generic("people_interactions_v2");
}
function facebook_marketplace_items_sold_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: Updated_timestamp may not exist so it's removed for now
return this.cmd(["jq", "-r", `
["title", "price", "seller", "created_timestamp", "latitude", "longitude", "description"],
(
.${prop}[]
| [.title, .price, .seller, (.created_timestamp | todateiso8601), .location.coordinate.latitude, .location.coordinate.longitude, .description]
)
| @csv
`])
}
function facebook_marketplace_items_sold_v1(this: TaskTargetPipelineHelper) {
return this.facebook_marketplace_items_sold_generic("items_selling");
}
function facebook_marketplace_items_sold_v2(this: TaskTargetPipelineHelper) {
return this.facebook_marketplace_items_sold_generic("items_selling_v2");
}
function facebook_searches_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: Data and attachments, both only contain one "text" field inside the
// first object of the array... Same data, do they ever differ?
return this.cmd(["jq", "-r", `
["title","data","timestamp"],
(
.${prop}[]
| [.title, .data[0].text, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_searches_v1(this: TaskTargetPipelineHelper) {
return this.facebook_searches_generic("searches");
}
function facebook_searches_v2(this: TaskTargetPipelineHelper) {
return this.facebook_searches_generic("searches_v2");
}
function facebook_account_activity_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["action", "ip", "user_agent", "datr_cookie", "city", "region", "country", "site_name","timestamp"],
(
.${prop}[]
| [.action, .ip_address, .user_agent, .datr_cookie, .city, .region, .country, .site_name, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_account_activity_v1(this: TaskTargetPipelineHelper) {
return this.facebook_account_activity_generic("account_activity");
}
function facebook_account_activity_v2(this: TaskTargetPipelineHelper) {
return this.facebook_account_activity_generic("account_activity_v2");
}
function facebook_admin_records_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["event","created_timestamp","ip_address","user_agent","datr_cookie"],
(
.${prop}[]
| [.event, (.session.created_timestamp | todateiso8601), .ip_address, .user_agent, .datr_cookie]
)
| @csv
`])
}
function facebook_admin_records_v1(this: TaskTargetPipelineHelper) {
return this.facebook_admin_records_generic("admin_records");
}
function facebook_admin_records_v2(this: TaskTargetPipelineHelper) {
return this.facebook_admin_records_generic("admin_records_v2");
}
function facebook_authorized_logins_generic(this: TaskTargetPipelineHelper, prop: string) {
// I don't think .location, .app, .session_type are in v1? So I've made them nullable, but I only have
// 1 v1 entry to actually compare against...
return this.cmd(["jq", "-r", `
["name","created_timestamp","updated_timestamp","ip_address","user_agent","location","app", "session_type", "datr_cookie"],
(
.${prop}[]
| [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address, .user_agent, .location // "", .app // "", .session_type // "", .datr_cookie]
)
| @csv
`])
}
function facebook_authorized_logins_v1(this: TaskTargetPipelineHelper) {
return this.facebook_authorized_logins_generic("recognized_devices");
}
function facebook_authorized_logins_v2(this: TaskTargetPipelineHelper) {
return this.facebook_authorized_logins_generic("active_sessions_v2");
}
function facebook_contact_verification_generic(this: TaskTargetPipelineHelper, prop: string) {
return this.cmd(["jq", "-r", `
["timestamp", "email", "contact_type"],
(
.${prop}[]
| [(.verification_time | todateiso8601), .contact, .contact_type]
)
| @csv
`])
}
function facebook_contact_verification_v1(this: TaskTargetPipelineHelper) {
return this.facebook_contact_verification_generic("contact_verifications");
}
function facebook_contact_verification_v2(this: TaskTargetPipelineHelper) {
return this.facebook_contact_verification_generic("contact_verifications_v2");
}
function facebook_account_accesses_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: there's a updated_timestamp doesn't always exist
return this.cmd(["jq", "-r", `
["action", "timestamp", "site", "ip_address"],
(
.${prop}[]
| [.action, (.timestamp | todateiso8601), .site, .ip_address]
)
| @csv
`])
}
function facebook_account_accesses_v1(this: TaskTargetPipelineHelper) {
return this.facebook_account_accesses_generic("account_accesses");
}
function facebook_account_accesses_v2(this: TaskTargetPipelineHelper) {
return this.facebook_account_accesses_generic("account_accesses_v2");
}
function facebook_pages_unfollowed_generic(this: TaskTargetPipelineHelper, prop: string) {
// TODO: This is missing the .data field, but it only looks like the "name" on the only record I have
return this.cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_pages_unfollowed_v1(this: TaskTargetPipelineHelper) {
return this.facebook_pages_unfollowed_generic("pages_unfollowed");
}
function facebook_pages_unfollowed_v2(this: TaskTargetPipelineHelper) {
return this.facebook_pages_unfollowed_generic("pages_unfollowed_v2");
}
function facebook_groups_joined_generic(this: TaskTargetPipelineHelper, prop: string) {
// this has a data property but it is redundant, ONLY IN v2
return this.cmd(["jq", "-r", `
["title", "timestamp"],
(
.${prop}[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_groups_joined_v1(this: TaskTargetPipelineHelper) {
return this.facebook_groups_joined_generic("groups_joined");
}
function facebook_groups_joined_v2(this: TaskTargetPipelineHelper) {
return this.facebook_groups_joined_generic("groups_joined_v2");
}
function facebook_group_posts_v1(this: TaskTargetPipelineHelper) {
// TODO: Attachments metadata, maybe another timestamp in the data field too (but it looks like the same everywhere)
return this.cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts.activity_log_data[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_group_posts_v2(this: TaskTargetPipelineHelper) {
// TODO: Still a data and attachments to pull out
return this.cmd(["jq", "-r", `
["title", "data", "timestamp"],
(
.group_posts_v2[]
| [.title, "TODO", (.timestamp | todateiso8601)]
)
| @csv
`])
}
function facebook_v2(this: TaskTargetPipelineHelper) {
const p = this.setId(t=>`Facebookv2 - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
// No correlary to accounts_and_profiles.json
// No correlary for your_off-facebook_activity.json
p.collect(col).cd(`apps_and_websites_off_of_facebook/connected_apps_and_websites.json`).read().facebook_installed_apps_v2();
p.collect(col).cd(`your_facebook_activity/comments_and_reactions/comments.json`).read().facebook_comments_v2();
p.collect(col).glob(`your_facebook_activity/messages/*/**/*.json`) // Messages files are in the FOLDERS inside messages (archived_threads, e2ee_cutover, etc...)
.setId(t=>`Facebookv2 - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
.read()
.facebook_messages_generic()
p.collect(col).cd(`your_facebook_activity/other_activity/time_spent_on_facebook.json`).read()
.cmd(["jq", "-r", `
["start","end"],
(
.label_values[]
| select(.label == "Intervals")
| .vec[]
| [
(.dict[0].timestamp_value | todateiso8601),
(.dict[1].timestamp_value | todateiso8601)
]
)
| @csv
`])
p.collect(col).cd(`your_facebook_activity/groups/your_group_membership_activity.json`).read().facebook_groups_joined_v2();
p.collect(col).cd(`your_facebook_activity/groups/group_posts_and_comments.json`).read().facebook_group_posts_v2();
p.collect(col).cd(`your_facebook_activity/pages/pages_and_profiles_you've_unfollowed.json`).read().facebook_pages_unfollowed_v2();
p.collect(col).cd(`connections/friends/your_friends.json`).read().facebook_friends_generic("friends_v2");
p.collect(col).cd(`connections/friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests_v2");
p.collect(col).cd(`connections/friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests_v2");
p.collect(col).cd(`logged_information/activity_messages/people_and_friends.json`).read().facebook_people_interactions_v2()
p.collect(col).cd(`logged_information/search/your_search_history.json`).read().facebook_searches_v2()
p.collect(col).cd(`logged_information/notifications/notifications.json`).read().facebook_notifications_v2();
p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v2()
p.collect(col).cd(`security_and_login_information/record_details.json`).read().facebook_admin_records_v2()
p.collect(col).cd(`security_and_login_information/where_you're_logged_in.json`).read().facebook_authorized_logins_v2()
p.collect(col).cd(`security_and_login_information/email_address_verifications.json`).read().facebook_contact_verification_v2()
p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v2()
p.collect(col).cd(`your_facebook_activity/facebook_marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v2()
const final = Array.from(col).flat();
return TaskTargetPipelineHelper.pipeline(final);
}
function facebook(this: TaskTargetPipelineHelper){
const p = this.setId(t=>`Facebook - ${t.basename}`); // Generic ID for everything in here
const col: Set<TaskTargetPipelineHelper> = new Set();
p.collect(col).cd(`about_you/notifications.json`).read().facebook_notifications_v1()
//TODO: .fork().skip('face_recognition.json').reason("Not a table, no idea how to use")
//TODO: .fork().skip('friend_peer_group.json').reason("Not a table, very small file")
//TODO:.fork().skip('messenger.json').reason("Not a table, but might have some juicy stuff for future")
//TODO: .fork().todo('preferences.json').reason("Too complex for now")
//TODO:.fork().todo('visited.json').reason("Too complex for now")
//TODO:.fork().todo('viewed.json').reason("Too complex for now")
p.collect(col).cd(`accounts_center/accounts_and_profiles.json`).read()
.cmd(["jq", "-r", `["service_name","native_app_id","username","email", "phone_number", "name"],
(
.linked_accounts[]
| [.service_name, .native_app_id, .username, .email, .phone_number, .name]
)
| @csv`])
.csvSink()
p.collect(col).cd(`ads_and_businesses/your_off-facebook_activity.json`).read()
.cmd(["jq", "-r", `
["name","id","type","timestamp"],
(
.off_facebook_activity[]
| .name as $name
| .events[]
| [$name, .id, .type, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
//TODO: .fork().todo('advertisers_who_uploaded_a_contact_list_with_your_information.json')
p.collect(col).cd(`apps_and_websites/apps_and_websites.json`).read().facebook_installed_apps_v1()
// `${facebookRoot}/archive` - no data in my export
// `${facebookRoot}/campus` - no data in my export
p.collect(col).cd(`comments/comments.json`).read().facebook_comments_v1()
p.collect(col).glob(`dating/messages/*.json`) // Files are 0.json, 1.json, etc
.setId(t=>`Facebook - Dating Messages ${t.basename}`) // Slightly more specific message
.read()
.cmd(["jq", "-r", `
["from","to","timestamp","body"],
.recipient as $to
| (
.messages[]
| ["Me", $to, (.timestamp | todateiso8601), .body]
)
| @csv
`])
.csvSink();//[["timestamp", "numeric"]])
//todo: your_dating_activity.json, but it only has a few lines and not super useful
//todo: the other dating files are also just, small
// TODO: events
// rcd(`events`);
// localCollect('event_invitations.json', json, sspawn('jq', [`
// .events_invited[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
// localCollect('your_event_responses.json', json, sspawn('jq', [`
// .event_responses.events_joined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_declined[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// ) |
// .event_responses.events_interested[] |= (
// .start_timestamp |= todateiso8601 |
// .end_timestamp |= todateiso8601
// )
// `]));
p.collect(col).cd(`facebook_gaming/instant_games.json`)
.read()
.cmd(["jq", "-r", `
["game", "added_timestamp"],
(
.instant_games_played[]
| [.name, (.added_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["added_timestamp", "numeric"]])
p.collect(col).cd(`following_and_followers/unfollowed_pages.json`).read().facebook_pages_unfollowed_v1()
p.collect(col).cd(`following_and_followers/following.json`)
.read()
.cmd(["jq", "-r", `
["name", "timestamp"],
(
.following[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`following_and_followers/followers.json`)
.read()
.cmd(["jq", "-r", `
["name"],
(
.followers[]
| [.name]
)
| @csv
`])
.csvSink()
p.collect(col).cd(`friends/sent_friend_requests.json`).read().facebook_friends_generic("sent_requests")
p.collect(col).cd(`friends/removed_friends.json`).read().facebook_friends_generic("deleted_friends")
p.collect(col).cd(`friends/rejected_friend_requests.json`).read().facebook_friends_generic("rejected_requests")
p.collect(col).cd(`friends/received_friend_requests.json`).read().facebook_friends_generic("received_requests")
p.collect(col).cd(`friends/friends.json`).read().facebook_friends_generic("friends")
p.collect(col).cd(`groups/your_group_membership_activity.json`).read().facebook_groups_joined_v1();
p.collect(col).cd(`groups/your_posts_and_comments_in_groups.json`).read().facebook_group_posts_v1();
// there's also groups.json and events.json but neither has timestamp so they're
// not really useful right now
p.collect(col).cd(`interactions/people.json`).read().facebook_people_interactions_v1()
// `${facebookRoot}/journalist_registration` - no data in my export
p.collect(col).cd(`likes_and_reactions/pages.json`)
.read()
.cmd(["jq", "-r", `
["name", "timestamp"],
(
.page_likes[]
| [.name, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`likes_and_reactions/posts_and_comments.json`)
.read()
.cmd(["jq", "-r", `
["title", "timestamp", "reaction"],
(
.reactions[]
| [.name, (.timestamp | todateiso8601), .data[0].reaction.reaction]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// TODO:
// rcd(`location`);
// localCollect('primary_location.json', json);
// localCollect('primary_public_location.json', json);
// localCollect('timezone.json', json);
p.collect(col).cd(`marketplace/items_sold.json`).read().facebook_marketplace_items_sold_v1()
p.collect(col).glob(`messages/**/*.json`) // Files are message_1.json, etc
.setId(t=>`Facebook - Messages ${t.basenameN(2)}`) // 1, 2, etc is not specific enough, include the convo name
.read()
.facebook_messages_generic()
// `${facebookRoot}/music_recommendations` - no data
// rcd(`news`);
// localCollect('your_locations.json', json);
p.collect(col).cd(`other_activity/pokes.json`)
.read()
.cmd(["jq", "-r", `
["from", "to","rank","timestamp"],
(
.pokes.data[]
| [.poker, .pokee, .rank, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]]);
p.collect(col).cd(`other_activity/support_correspondences.json`)
.read()
// TODO: I'm seeing blanks in .from and .to when the replier was Facebook
// themselves. Perhaps it's broken?
// TODO: Attachments
.cmd(["jq", "-r", `
["from", "to", "subject", "message", "timestamp"],
(
.support_correspondence[].messages[]
| [.from, .to, .subject, .message, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/pages` - no data
p.collect(col).cd(`payment_history/payment_history.json`)
.read()
.cmd(["jq", "-r", `
["from", "to","amount","currency", "type","status","payment_method", "created_timestamp"],
(
.payments.payments[]
| [.sender, .receiver, .amount, .currency, .type, .status, .payment_method, (.created_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["created_timestamp", "numeric"]]);
// TODO: There's also photos_and_videos/your_videos.json
// TODO: There's a media_metadata in each of the images too to convert as well as external files
p.collect(col).glob(`photos_and_videos/album/*.json`)
// Could use a better name, currently 0.json, 1.json, etc...
.setId(t=>`Facebook - Album ${t.basename}`) //slightly more speciifc name, it woudl be better if we could use the album name
.read()
.cmd(["jq", "-r", `
["album","uri","creation_timestamp"],
(
.photos[]
| [.title, .uri, (.creation_timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["creation_timestamp", "numeric"]])
p.collect(col).cd(`posts/your_pinned_posts.json`)
.read()
.cmd(["jq", "-r", `
["name","uri","timestamp"],
(
.pinned_posts[].entries[]
| [.data.name, .data.uri, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// TODO: Glob? I never posted a lot on FB
p.collect(col).cd(`posts/your_posts_1.json`)
.read()
// TODO: Data is an array with objects. .post, .updated_timestamp, separately??
// TODO: Also attachments
.cmd(["jq", "-r", `
["title","data","timestamp"],
(
.[]
| [.title, "TODO: data", (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/privacy_checkup` - no data
// TODO: Shape is non-tabular, but maybe we should handle it?
// Looks mostly like dupes from other places
// './profile_information.json': undefined,
// The minimum amount of data is just .title and .timestamp
// TODO: HAndle data and attachments
p.collect(col).cd(`profile_information/profile_update_history.json`)
.read()
.cmd(["jq", "-r", `
["title","timestamp"],
(
.profile_updates[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/rewards` - no data
// `${facebookRoot}/saved_items_and_collections` - no data
p.collect(col).cd(`search_history/your_search_history.json`).read().facebook_searches_v1()
p.collect(col).cd(`security_and_login_information/account_status_changes.json`)
.read()
.cmd(["jq", "-r", `
["status","timestamp"],
(
.account_status_changes[]
| [.status, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
p.collect(col).cd(`security_and_login_information/account_activity.json`).read().facebook_account_activity_v1()
p.collect(col).cd(`security_and_login_information/administrative_records.json`).read().facebook_admin_records_v1()
p.collect(col).cd(`security_and_login_information/authorized_logins.json`).read().facebook_authorized_logins_v1()
p.collect(col).cd(`security_and_login_information/contact_verifications.json`).read().facebook_contact_verification_v1()
p.collect(col).cd(`security_and_login_information/logins_and_logouts.json`).read().facebook_account_accesses_v1()
// TODO: datr_cookie_info, looks like a bunch of timestamps
// a.fork().cd(`login_protection_data.json`)
// .read()
// // TODO: updated_timestamp doesn't always exist
// .cmd(["jq", "-r", `
// ["name", "created_timestamp", "updated_timestamp", "ip_address"],
// (
// .login_protection_data[]
// | [.name, (.created_timestamp | todateiso8601), (.updated_timestamp | todateiso8601), .ip_address]
// )
// | @csv
// `])
// TODO: mobile_devices, only a couple entries
// TODO: used_ip_addresses
// TODO: where_you've logged in
// TODO: your_facebook_activity, useless and small
// `${facebookRoot}/short_videos` - no data in my export
// `${facebookRoot}/saved_items_and_collections` - no data in my export
p.collect(col).cd(`stories/story_reactions.json`)
.read()
.cmd(["jq", "-r", `
["title", "timestamp"],
(
.stories_feedback[]
| [.title, (.timestamp | todateiso8601)]
)
| @csv
`])
.csvSink([["timestamp", "numeric"]])
// `${facebookRoot}/trash` - no data in my export
// `${facebookRoot}/voice_recording_and_transcription` - no data in my export
// `${facebookRoot}/volunteering` - no data in my export
// `${facebookRoot}/voting_location_and_reminders` - only small 1-property things
// `${facebookRoot}/your_places` - no data in my export
// `${facebookRoot}/your_topics` - no data in my export
const final = Array.from(col).flat();
return TaskTargetPipelineHelper.pipeline(final);
};