base-data-manager/data-export/discord-chat-exporter.ts

69 lines
2.3 KiB
TypeScript

import { pipe, each, cmd, assignMeta, glob, read, branchGen, type PipelineOp } from "./task.ts";
/**
* Extracts the channel ID from the filename, e.g.
* "GuildName - Text Channels - ChannelName [0000000000000000].json" → "0000000000000000"
*/
function chatExporterChannelId(t: { path: string }): string {
const match = t.path.match(/\[([^\]]+)\]\.json$/);
return match?.[1] ?? t.path.split('/').pop()!;
}
/**
* Channel metadata aggregate + messages table, one pair per exported JSON file.
* Unlike the native Discord export, DiscordChatExporter captures ALL authors' messages.
*/
function discord_chat_exporter_messages(): PipelineOp {
return branchGen(function* () {
// Channel-level metadata aggregated into a single table
yield pipe(
glob(`*.json`),
assignMeta({ idValue: t => `DiscordCE - Channel ${chatExporterChannelId(t)}` }),
read(),
each(t => t.clone().cmd(["jq", "-r", `
["${t.id}", .guild.name, .channel.name, .channel.type, (.channel.category // ""), (.channel.topic // ""), .messageCount]
| @csv
`])),
assignMeta({
aggregate: true,
aggregateColumns: ["id", "guild_name", "channel_name", "channel_type", "channel_category", "channel_topic", "message_count"],
idValue: "DiscordCE - Messages Meta",
})
);
// The messages — one table per exported file
yield pipe(
glob(`*.json`),
assignMeta({ idValue: t => `DiscordCE - Messages ${chatExporterChannelId(t)}` }),
read(),
cmd(["jq", "-r", `
["id", "timestamp", "author", "discriminator", "content", "attachment"],
(
.messages[]
| [
.id,
.timestamp,
.author.name,
(.author.discriminator // ""),
.content,
(.attachments[0].url // "")
]
)
| @csv
`]),
assignMeta({
metaIdValue: "DiscordCE - Messages Meta",
columnMeta: ["any", "isodatetime", "sender", "any", "text", "url"],
perRowDescription: '"{4}" from {2} at {1}',
perRowTags: "discord,message",
})
);
});
}
export function discord_chat_exporter(): PipelineOp {
return pipe(
assignMeta({ idValue: t => `DiscordCE - ${t.basename}` }),
discord_chat_exporter_messages()
);
}