import { pipe, each, cmd, assignMeta, glob, read, branchGen, type PipelineOp } from "./task.ts"; /** * Extracts the channel ID from the filename, e.g. * "GuildName - Text Channels - ChannelName [0000000000000000].json" → "0000000000000000" */ function chatExporterChannelId(t: { path: string }): string { const match = t.path.match(/\[([^\]]+)\]\.json$/); return match?.[1] ?? t.path.split('/').pop()!; } /** * Channel metadata aggregate + messages table, one pair per exported JSON file. * Unlike the native Discord export, DiscordChatExporter captures ALL authors' messages. */ function discord_chat_exporter_messages(): PipelineOp { return branchGen(function* () { // Channel-level metadata aggregated into a single table yield pipe( glob(`*.json`), assignMeta({ idValue: t => `DiscordCE - Channel ${chatExporterChannelId(t)}` }), read(), each(t => t.clone().cmd(["jq", "-r", ` ["${t.id}", .guild.name, .channel.name, .channel.type, (.channel.category // ""), (.channel.topic // ""), .messageCount] | @csv `])), assignMeta({ aggregate: true, aggregateColumns: ["id", "guild_name", "channel_name", "channel_type", "channel_category", "channel_topic", "message_count"], idValue: "DiscordCE - Messages Meta", }) ); // The messages — one table per exported file yield pipe( glob(`*.json`), assignMeta({ idValue: t => `DiscordCE - Messages ${chatExporterChannelId(t)}` }), read(), cmd(["jq", "-r", ` ["id", "timestamp", "author", "discriminator", "content", "attachment"], ( .messages[] | [ .id, .timestamp, .author.name, (.author.discriminator // ""), .content, (.attachments[0].url // "") ] ) | @csv `]), assignMeta({ metaIdValue: "DiscordCE - Messages Meta", columnMeta: ["any", "isodatetime", "sender", "any", "text", "url"], perRowDescription: '"{4}" from {2} at {1}', perRowTags: "discord,message", }) ); }); } export function discord_chat_exporter(): PipelineOp { return pipe( assignMeta({ idValue: t => `DiscordCE - ${t.basename}` }), discord_chat_exporter_messages() ); }