import { pipe, each, cmd, assignMeta, cd, glob, read, branchGen, type PipelineOp } from "./task.ts"; /** Extracts the channel ID directory name from paths like messages/{channelId}/messages.csv */ function discordChannelId(t: { path: string }): string { return t.path.split('/').slice(-2, -1)[0]; } /** Linked third-party accounts (Steam, Twitch, etc.) from account/user.json */ function discord_connections(): PipelineOp { return pipe( cmd(["jq", "-r", ` ["type", "name", "id", "verified", "visibility"], ( .connections[]? | [.type, .name, .id, .verified, .visibility] ) | @csv `]), assignMeta({ idValue: "Discord - Connections", columnMeta: ["text", "text", "any", "any", "any"], perRowDescription: '{0} account "{1}"', perRowTags: "discord", }) ); } /** Friends, blocked users, and other relationships from account/user.json */ function discord_relationships(): PipelineOp { return pipe( cmd(["jq", "-r", ` ["username", "discriminator", "type"], ( .relationships[]? | [.user.username, .user.discriminator, .type] ) | @csv `]), assignMeta({ idValue: "Discord - Relationships", columnMeta: ["text", "any", "any"], perRowDescription: '{0}#{1} (relationship type {2})', perRowTags: "discord", }) ); } /** Purchase history from account/user.json */ function discord_payments(): PipelineOp { return pipe( cmd(["jq", "-r", ` ["created_at", "description", "amount", "currency", "status"], ( .payments[]? | [.created_at, .description, .amount, .currency, .status] ) | @csv `]), assignMeta({ idValue: "Discord - Payments", columnMeta: ["isodatetime", "text", "numeric", "text", "any"], perRowDescription: '{1}: {2} {3} on {0}', perRowTags: "discord,payment", }) ); } /** Application/game play-time statistics from account/user.json */ function discord_activity_stats(): PipelineOp { return pipe( cmd(["jq", "-r", ` ["application_id", "last_played_at", "total_duration"], ( .user_activity_application_statistics[]? | [.application_id, .last_played_at, .total_duration] ) | @csv `]), assignMeta({ idValue: "Discord - Activity Stats", columnMeta: ["any", "isodatetime", "numeric"], perRowDescription: 'App {0}: {2}s played, last at {1}', perRowTags: "discord", }) ); } /** * Activity event logs from activity/{subdir}/events-*.json (NDJSON format). * Each subdirectory (analytics, modeling, reporting, tns) becomes its own table. * Fields chosen for what the user did: event type, when, where (channel/guild), * which message, which game, and human-readable channel/guild names when available. */ function discord_activity_events(): PipelineOp { return pipe( glob(`activity/*/events-*.json`), assignMeta({ idValue: t => `Discord - Activity ${t.path.split('/').slice(-2, -1)[0]}` }), read(), // NDJSON: use -n + inputs so jq processes all lines, emitting one header then N rows cmd(["jq", "-rn", ` ["event_type", "timestamp", "channel_id", "guild_id", "message_id", "game_name", "channel_name", "guild_name"], ( inputs | [ .event_type, .timestamp, (.channel_id // ""), (.guild_id // ""), (.message_id // ""), (.game_name // ""), (.channel_name // ""), (.guild_name // "") ] ) | @csv `]), assignMeta({ columnMeta: ["text", "isodatetime", "any", "any", "any", "text", "text", "text"], perRowDescription: '{0} at {1}', perRowTags: "discord,activity", }) ); } /** Notes the user wrote on other users, keyed by user ID, from account/user.json */ function discord_notes(): PipelineOp { return pipe( cmd(["jq", "-r", ` ["user_id", "note"], ( .notes // {} | to_entries[] | [.key, .value] ) | @csv `]), assignMeta({ idValue: "Discord - Notes", columnMeta: ["any", "text"], perRowDescription: 'Note on {0}: "{1}"', perRowTags: "discord", }) ); } /** * Messages from messages/{channelId}/messages.csv and channel metadata from * messages/{channelId}/channel.json. * NOTE: The export only contains the exporting user's own messages. */ function discord_messages(): PipelineOp { return branchGen(function* () { // Channel-level metadata aggregated into a single table yield pipe( glob(`messages/*/channel.json`), assignMeta({ idValue: t => `Discord - Channel ${discordChannelId(t)}` }), read(), each(t => t.clone().cmd(["jq", "-r", ` ["${t.id}", .type, (.name // ""), (.guild.id // ""), (.guild.name // ""), ((.recipients // []) | join(","))] | @csv `])), assignMeta({ aggregate: true, aggregateColumns: ["id", "type", "name", "guild_id", "guild_name", "recipients"], idValue: "Discord - Messages Meta", }) ); // The messages themselves — one table per channel yield pipe( glob(`messages/*/messages.csv`), assignMeta({ idValue: t => `Discord - Messages ${discordChannelId(t)}` }), read(), // Normalize the header row to lowercase names cmd(["sed", "-e", "1s/.*/id,timestamp,content,attachment/"]), assignMeta({ metaIdValue: "Discord - Messages Meta", columnMeta: ["any", "isodatetime", "text", "url"], perRowDescription: '"{2}" at {1}', perRowTags: "discord,message,content_by_me", }) ); }); } export function discord(): PipelineOp { return pipe( assignMeta({ idValue: t => `Discord - ${t.basename}` }), branchGen(function* () { yield discord_messages(); yield pipe(cd(`account/user.json`), read(), discord_connections()); yield pipe(cd(`account/user.json`), read(), discord_relationships()); yield pipe(cd(`account/user.json`), read(), discord_payments()); yield pipe(cd(`account/user.json`), read(), discord_activity_stats()); yield pipe(cd(`account/user.json`), read(), discord_notes()); yield discord_activity_events(); }) ); }