base-data-manager/data-export/instagram.ts

542 lines
19 KiB
TypeScript

import { pipe, each, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
// ── helpers ───────────────────────────────────────────────────────────────────
/** Login / logout share the same string_map_data shape */
function instagram_session_activity(prop: string): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["timestamp","ip_address","port","language_code","user_agent","cookie_name"],
(
.${prop}[]
| [
(.string_map_data.Time.timestamp | todateiso8601),
.string_map_data."IP Address".value,
.string_map_data.Port.value,
.string_map_data."Language Code".value,
.string_map_data."User Agent".value,
.string_map_data."Cookie Name".value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text", "text", "text", "text"],
perRowTags: "instagram,security",
})
);
}
/** Relationship lists that all share: prop[].{title, string_list_data[0].{value,href,timestamp}} */
function instagram_relationship(prop: string): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["username","href","timestamp"],
(
.${prop}[]
| [
(.string_list_data[0].value // .title // ""),
(.string_list_data[0].href // ""),
(.string_list_data[0].timestamp | todateiso8601)
]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "url", "isodatetime"],
perRowTags: "instagram",
})
);
}
// ── main export ────────────────────────────────────────────────────────────────
export function instagram(): PipelineOp {
return pipe(
assignMeta({ idValue: t => `Instagram - ${t.basename}` }),
branchGen(function*() {
// ── Security / Login ─────────────────────────────────────────────────
yield pipe(
cd("security_and_login_information/login_and_profile_creation/login_activity.json"),
read(),
instagram_session_activity("account_history_login_history")
);
yield pipe(
cd("security_and_login_information/login_and_profile_creation/logout_activity.json"),
read(),
instagram_session_activity("account_history_logout_history")
);
yield pipe(
cd("security_and_login_information/login_and_profile_creation/signup_details.json"),
read(),
cmd(["jq", "-r", `
["timestamp","username","ip_address","email","phone_number","device"],
(
.account_history_registration_info[]
| [
(.string_map_data.Time.timestamp | todateiso8601),
.string_map_data.Username.value,
.string_map_data."IP Address".value,
.string_map_data.Email.value,
.string_map_data."Phone Number".value,
.string_map_data.Device.value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text", "text", "text", "text"],
perRowTags: "instagram,security",
})
);
// ── Connections / Followers ───────────────────────────────────────────
yield pipe(
cd("connections/followers_and_following/followers_1.json"),
read(),
cmd(["jq", "-r", `
["username","href","timestamp"],
(
.[]
| [
.string_list_data[0].value,
.string_list_data[0].href,
(.string_list_data[0].timestamp | todateiso8601)
]
)
| @csv
`]),
assignMeta({
columnMeta: ["sender", "url", "isodatetime"],
perRowTags: "instagram",
})
);
yield pipe(
cd("connections/followers_and_following/following.json"),
read(),
instagram_relationship("relationships_following")
);
yield pipe(
cd("connections/followers_and_following/blocked_profiles.json"),
read(),
instagram_relationship("relationships_blocked_users")
);
yield pipe(
cd("connections/followers_and_following/recently_unfollowed_profiles.json"),
read(),
instagram_relationship("relationships_unfollowed_users")
);
yield pipe(
cd("connections/followers_and_following/pending_follow_requests.json"),
read(),
instagram_relationship("relationships_follow_requests_sent")
);
yield pipe(
cd("connections/followers_and_following/removed_suggestions.json"),
read(),
instagram_relationship("relationships_dismissed_suggested_users")
);
// ── Messages ─────────────────────────────────────────────────────────
yield pipe(
glob("your_instagram_activity/messages/inbox/**/*.json"),
assignMeta({ idValue: t => `Instagram - Messages ${t.basenameN(2)}` }),
read(),
branchGen(function*() {
// Thread metadata aggregated across all conversations
yield pipe(
each(t =>
t.clone().cmd(["jq", "-r", `
["${t.id}", .title, .is_still_participant, .thread_path, (.participants | map(.name) | join(", "))]
| @csv
`])
),
assignMeta({
aggregate: true,
aggregateColumns: ["id", "title", "is_still_participant", "thread_path", "participants"],
idValue: "Instagram - Messages Meta",
})
);
// Individual messages
yield pipe(
cmd(["jq", "-r", `
["from","timestamp","content"],
(
.messages[]
| [.sender_name, ((.timestamp_ms / 1000) | round | todateiso8601), .content // ""]
)
| @csv
`]),
assignMeta({
metaIdValue: "Instagram - Messages Meta",
columnMeta: ["sender", "isodatetime", "text"],
perRowDescription: '"{2}" from {0} at {1}',
perRowTags: "instagram,message",
})
);
})
);
// ── Likes ─────────────────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/likes/liked_posts.json"),
read(),
cmd(["jq", "-r", `
["timestamp","url"],
(
.[]
| [(.timestamp | todateiso8601), (.label_values[0].href // "")]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "url"],
perRowDescription: "Liked post {1} at {0}",
perRowTags: "instagram,initiated_by_me",
})
);
yield pipe(
cd("your_instagram_activity/likes/liked_comments.json"),
read(),
cmd(["jq", "-r", `
["title","href","timestamp"],
(
.likes_comment_likes[]
| [.title, .string_list_data[0].href, (.string_list_data[0].timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "url", "isodatetime"],
perRowDescription: "Liked comment by {0} at {2}",
perRowTags: "instagram,initiated_by_me",
})
);
// ── Comments ──────────────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/comments/post_comments_1.json"),
read(),
cmd(["jq", "-r", `
["timestamp","media_owner","comment"],
(
.[]
| [
(.string_map_data.Time.timestamp | todateiso8601),
.string_map_data."Media Owner".value,
.string_map_data.Comment.value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text"],
perRowDescription: "Comment on {1}'s post at {0}",
perRowTags: "instagram,initiated_by_me,content_by_me",
})
);
// ── Story interactions ────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/story_interactions/polls.json"),
read(),
cmd(["jq", "-r", `
["timestamp","title","answer"],
(
.story_activities_polls[]
| [(.string_list_data[0].timestamp | todateiso8601), .title, .string_list_data[0].value]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text"],
perRowDescription: 'Poll "{1}" answered "{2}" at {0}',
perRowTags: "instagram,initiated_by_me",
})
);
yield pipe(
cd("your_instagram_activity/story_interactions/emoji_sliders.json"),
read(),
cmd(["jq", "-r", `
["timestamp","title","value"],
(
.story_activities_emoji_sliders[]
| [(.string_list_data[0].timestamp | todateiso8601), .title, .string_list_data[0].value]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "numeric"],
perRowDescription: 'Emoji slider "{1}" = {2} at {0}',
perRowTags: "instagram,initiated_by_me",
})
);
// ── Saved / Shopping ──────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/saved/saved_posts.json"),
read(),
cmd(["jq", "-r", `
["timestamp","href","title"],
(
.saved_saved_media[]
| [(.string_map_data."Saved on".timestamp | todateiso8601), .string_map_data."Saved on".href, .title]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "url", "text"],
perRowDescription: 'Saved "{2}" at {0}',
perRowTags: "instagram,initiated_by_me",
})
);
yield pipe(
cd("your_instagram_activity/shopping/recently_viewed_items.json"),
read(),
cmd(["jq", "-r", `
["product_name","merchant_name"],
(
.checkout_saved_recently_viewed_products[]
| [.string_map_data."Product Name".value, .string_map_data."Merchant Name".value]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text"],
perRowTags: "instagram",
})
);
// ── Threads viewed ────────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/threads/threads_viewed.json"),
read(),
cmd(["jq", "-r", `
["timestamp","author","url"],
(
.text_post_app_text_post_app_posts_seen[]
| [
(.string_map_data.Time.timestamp | todateiso8601),
.string_map_data.Author.value,
.string_map_data.URL.href
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "url"],
perRowDescription: "Viewed thread by {1} at {0}",
perRowTags: "instagram",
})
);
// ── Off-Meta activity ─────────────────────────────────────────────────
yield pipe(
cd("apps_and_websites_off_of_instagram/apps_and_websites/your_activity_off_meta_technologies.json"),
read(),
cmd(["jq", "-r", `
["name","id","type","timestamp"],
(
.apps_and_websites_off_meta_activity[]
| .name as $name
| .events[]
| [$name, (.id | tostring), .type, (.timestamp | todateiso8601)]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "any", "text", "isodatetime"],
perRowDescription: "{2} event from {0} at {3}",
perRowTags: "instagram",
})
);
// ── Ads ───────────────────────────────────────────────────────────────
yield pipe(
cd("ads_information/ads_and_topics/posts_viewed.json"),
read(),
cmd(["jq", "-r", `
["timestamp","url"],
(
.[]
| [(.timestamp | todateiso8601), (.label_values[1].href // "")]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "url"],
perRowDescription: "Viewed post {1} at {0}",
perRowTags: "instagram",
})
);
yield pipe(
cd("ads_information/ads_and_topics/videos_watched.json"),
read(),
cmd(["jq", "-r", `
["timestamp","url"],
(
.[]
| [(.timestamp | todateiso8601), (.label_values[1].href // "")]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "url"],
perRowDescription: "Watched video {1} at {0}",
perRowTags: "instagram",
})
);
yield pipe(
cd("ads_information/ads_and_topics/posts_you're_not_interested_in.json"),
read(),
cmd(["jq", "-r", `
["href","username","timestamp"],
(
.impressions_history_posts_not_interested[]
| [
.string_list_data[0].href,
.string_list_data[0].value,
(.string_list_data[1].timestamp | todateiso8601)
]
)
| @csv
`]),
assignMeta({
columnMeta: ["url", "text", "isodatetime"],
perRowDescription: "Not interested in {1} at {2}",
perRowTags: "instagram",
})
);
yield pipe(
cd("ads_information/instagram_ads_and_businesses/advertisers_using_your_activity_or_information.json"),
read(),
cmd(["jq", "-r", `
["advertiser_name","has_data_file","has_remarketing","has_in_person_store_visit"],
(
.ig_custom_audiences_all_types[]
| [.advertiser_name, .has_data_file_custom_audience, .has_remarketing_custom_audience, .has_in_person_store_visit]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "any", "any", "any"],
perRowTags: "instagram,ads",
})
);
yield pipe(
cd("ads_information/instagram_ads_and_businesses/other_categories_used_to_reach_you.json"),
read(),
cmd(["jq", "-r", `
["category"],
(.label_values[0].vec[] | [.value])
| @csv
`]),
assignMeta({
columnMeta: ["text"],
perRowTags: "instagram,ads",
})
);
// ── Personal information ───────────────────────────────────────────────
yield pipe(
cd("personal_information/personal_information/profile_changes.json"),
read(),
cmd(["jq", "-r", `
["timestamp","changed","previous_value","new_value"],
(
.profile_profile_change[]
| [
(.string_map_data."Change Date".timestamp | todateiso8601),
.string_map_data.Changed.value,
.string_map_data."Previous Value".value,
.string_map_data."New Value".value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text", "text", "text"],
perRowDescription: 'Changed {1} to "{3}" at {0}',
perRowTags: "instagram",
})
);
yield pipe(
cd("personal_information/device_information/devices.json"),
read(),
cmd(["jq", "-r", `
["last_login","user_agent"],
(
.devices_devices[]
| [
(.string_map_data."Last Login".timestamp | todateiso8601),
.string_map_data."User Agent".value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["isodatetime", "text"],
perRowTags: "instagram,security",
})
);
// ── Preferences ───────────────────────────────────────────────────────
yield pipe(
cd("preferences/your_topics/recommended_topics.json"),
read(),
cmd(["jq", "-r", `
["topic"],
(.topics_your_topics[] | [.string_map_data.Name.value])
| @csv
`]),
assignMeta({
columnMeta: ["text"],
perRowTags: "instagram",
})
);
yield pipe(
cd("preferences/settings/notification_preferences.json"),
read(),
cmd(["jq", "-r", `
["channel","type","value"],
(
.settings_notification_preferences[]
| [
.string_map_data.Channel.value,
.string_map_data.Type.value,
.string_map_data.Value.value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text", "text"],
perRowTags: "instagram",
})
);
// ── Monetization ──────────────────────────────────────────────────────
yield pipe(
cd("your_instagram_activity/monetization/eligibility.json"),
read(),
cmd(["jq", "-r", `
["product_name","decision","reason"],
(
.monetization_eligibility[]
| [
.string_map_data."Product Name".value,
.string_map_data.Decision.value,
.string_map_data.Reason.value
]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "text", "text"],
perRowTags: "instagram",
})
);
})
);
}