base-data-manager/data-export/discord.ts

201 lines
6.2 KiB
TypeScript

import { pipe, each, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts";
/** Extracts the channel ID directory name from paths like messages/{channelId}/messages.csv */
function discordChannelId(t: { path: string }): string {
return t.path.split('/').slice(-2, -1)[0];
}
/** Linked third-party accounts (Steam, Twitch, etc.) from account/user.json */
function discord_connections(): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["type", "name", "id", "verified", "visibility"],
(
.connections[]?
| [.type, .name, .id, .verified, .visibility]
)
| @csv
`]),
assignMeta({
idValue: "Discord - Connections",
columnMeta: ["text", "text", "any", "any", "any"],
perRowDescription: '{0} account "{1}"',
perRowTags: "discord",
})
);
}
/** Friends, blocked users, and other relationships from account/user.json */
function discord_relationships(): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["username", "discriminator", "type"],
(
.relationships[]?
| [.user.username, .user.discriminator, .type]
)
| @csv
`]),
assignMeta({
idValue: "Discord - Relationships",
columnMeta: ["text", "any", "any"],
perRowDescription: '{0}#{1} (relationship type {2})',
perRowTags: "discord",
})
);
}
/** Purchase history from account/user.json */
function discord_payments(): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["created_at", "description", "amount", "currency", "status"],
(
.payments[]?
| [.created_at, .description, .amount, .currency, .status]
)
| @csv
`]),
assignMeta({
idValue: "Discord - Payments",
columnMeta: ["isodatetime", "text", "numeric", "text", "any"],
perRowDescription: '{1}: {2} {3} on {0}',
perRowTags: "discord,payment",
})
);
}
/** Application/game play-time statistics from account/user.json */
function discord_activity_stats(): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["application_id", "last_played_at", "total_duration"],
(
.user_activity_application_statistics[]?
| [.application_id, .last_played_at, .total_duration]
)
| @csv
`]),
assignMeta({
idValue: "Discord - Activity Stats",
columnMeta: ["any", "isodatetime", "numeric"],
perRowDescription: 'App {0}: {2}s played, last at {1}',
perRowTags: "discord",
})
);
}
/**
* Activity event logs from activity/{subdir}/events-*.json (NDJSON format).
* Each subdirectory (analytics, modeling, reporting, tns) becomes its own table.
* Fields chosen for what the user did: event type, when, where (channel/guild),
* which message, which game, and human-readable channel/guild names when available.
*/
function discord_activity_events(): PipelineOp {
return pipe(
glob(`activity/*/events-*.json`),
assignMeta({ idValue: t => `Discord - Activity ${t.path.split('/').slice(-2, -1)[0]}` }),
read(),
// NDJSON: use -n + inputs so jq processes all lines, emitting one header then N rows
cmd(["jq", "-rn", `
["event_type", "timestamp", "channel_id", "guild_id", "message_id", "game_name", "channel_name", "guild_name"],
(
inputs
| [
.event_type,
.timestamp,
(.channel_id // ""),
(.guild_id // ""),
(.message_id // ""),
(.game_name // ""),
(.channel_name // ""),
(.guild_name // "")
]
)
| @csv
`]),
assignMeta({
columnMeta: ["text", "isodatetime", "any", "any", "any", "text", "text", "text"],
perRowDescription: '{0} at {1}',
perRowTags: "discord,activity",
})
);
}
/** Notes the user wrote on other users, keyed by user ID, from account/user.json */
function discord_notes(): PipelineOp {
return pipe(
cmd(["jq", "-r", `
["user_id", "note"],
(
.notes // {}
| to_entries[]
| [.key, .value]
)
| @csv
`]),
assignMeta({
idValue: "Discord - Notes",
columnMeta: ["any", "text"],
perRowDescription: 'Note on {0}: "{1}"',
perRowTags: "discord",
})
);
}
/**
* Messages from messages/{channelId}/messages.csv and channel metadata from
* messages/{channelId}/channel.json.
* NOTE: The export only contains the exporting user's own messages.
*/
function discord_messages(): PipelineOp {
return branchGen(function* () {
// Channel-level metadata aggregated into a single table
yield pipe(
glob(`messages/*/channel.json`),
assignMeta({ idValue: t => `Discord - Channel ${discordChannelId(t)}` }),
read(),
each(t => t.clone().cmd(["jq", "-r", `
["${t.id}", .type, (.name // ""), (.guild.id // ""), (.guild.name // ""), ((.recipients // []) | join(","))]
| @csv
`])),
assignMeta({
aggregate: true,
aggregateColumns: ["id", "type", "name", "guild_id", "guild_name", "recipients"],
idValue: "Discord - Messages Meta",
})
);
// The messages themselves — one table per channel
yield pipe(
glob(`messages/*/messages.csv`),
assignMeta({ idValue: t => `Discord - Messages ${discordChannelId(t)}` }),
read(),
// Normalize the header row to lowercase names
cmd(["sed", "-e", "1s/.*/id,timestamp,content,attachment/"]),
assignMeta({
metaIdValue: "Discord - Messages Meta",
columnMeta: ["any", "isodatetime", "text", "url"],
perRowDescription: '"{2}" at {1}',
perRowTags: "discord,message,content_by_me",
})
);
});
}
export function discord(): PipelineOp {
return pipe(
assignMeta({ idValue: t => `Discord - ${t.basename}` }),
branchGen(function* () {
yield discord_messages();
yield pipe(cd(`account/user.json`), read(), discord_connections());
yield pipe(cd(`account/user.json`), read(), discord_relationships());
yield pipe(cd(`account/user.json`), read(), discord_payments());
yield pipe(cd(`account/user.json`), read(), discord_activity_stats());
yield pipe(cd(`account/user.json`), read(), discord_notes());
yield discord_activity_events();
})
);
}