Improved database schema

- Some queries are faster due to better index ordering
- Database is smaller thanks to splitting message_channel table and
  adding WITHOUT ROWID where helpful
This commit is contained in:
Cadence Ember 2023-08-28 17:32:55 +12:00
parent 21156446ee
commit fcbb045cbb
11 changed files with 114 additions and 93 deletions

View file

@ -19,7 +19,7 @@ async function deleteMessage(data) {
for (const eventID of eventsToRedact) {
// Unfortuately, we can't specify a sender to do the redaction as, unless we find out that info via the audit logs
await api.redactEvent(roomID, eventID)
db.prepare("DELETE from event_message WHERE event_id = ?").run(eventID)
db.prepare("DELETE FROM event_message WHERE event_id = ?").run(eventID)
}
}

View file

@ -32,7 +32,7 @@ async function editMessage(message, guild) {
// Not redacting as the last action because the last action is likely to be shown in the room preview in clients, and we don't want it to look like somebody actually deleted a message.
for (const eventID of eventsToRedact) {
await api.redactEvent(roomID, eventID, senderMxid)
db.prepare("DELETE from event_message WHERE event_id = ?").run(eventID)
db.prepare("DELETE FROM event_message WHERE event_id = ?").run(eventID)
// TODO: If I just redacted part = 0, I should update one of the other events to make it the new part = 0, right?
}
@ -44,7 +44,7 @@ async function editMessage(message, guild) {
delete contentWithoutType.$type
const eventID = await api.sendEvent(roomID, eventType, contentWithoutType, senderMxid)
db.prepare("INSERT INTO event_message (event_id, event_type, event_subtype, message_id, channel_id, part, source) VALUES (?, ?, ?, ?, ?, 1, 1)").run(eventID, eventType, content.msgtype || null, message.id, message.channel_id) // part 1 = supporting; source 1 = discord
db.prepare("INSERT INTO event_message (event_id, event_type, event_subtype, message_id, part, source) VALUES (?, ?, ?, ?, ?, 1, 1)").run(eventID, eventType, content.msgtype || null, message.id) // part 1 = supporting; source 1 = discord
}
}

View file

@ -32,6 +32,9 @@ async function sendMessage(message, guild) {
const events = await messageToEvent.messageToEvent(message, guild, {}, {api})
const eventIDs = []
let eventPart = 0 // 0 is primary, 1 is supporting
if (events.length) {
db.prepare("REPLACE INTO message_channel (message_id, channel_id) VALUES (?, ?)").run(message.id, message.channel_id)
}
for (const event of events) {
const eventType = event.$type
/** @type {Pick<typeof event, Exclude<keyof event, "$type">> & { $type?: string }} */
@ -40,7 +43,7 @@ async function sendMessage(message, guild) {
const useTimestamp = message["backfill"] ? new Date(message.timestamp).getTime() : undefined
const eventID = await api.sendEvent(roomID, eventType, eventWithoutType, senderMxid, useTimestamp)
db.prepare("INSERT INTO event_message (event_id, event_type, event_subtype, message_id, channel_id, part, source) VALUES (?, ?, ?, ?, ?, ?, 1)").run(eventID, eventType, event.msgtype || null, message.id, message.channel_id, eventPart) // source 1 = discord
db.prepare("INSERT INTO event_message (event_id, event_type, event_subtype, message_id, part, source) VALUES (?, ?, ?, ?, ?, ?, 1)").run(eventID, eventType, event.msgtype || null, message.id, eventPart) // source 1 = discord
eventPart = 1 // TODO: use more intelligent algorithm to determine whether primary or supporting
eventIDs.push(eventID)

View file

@ -79,9 +79,10 @@ async function messageToEvent(message, guild, options = {}, di) {
const ref = message.message_reference
assert(ref)
assert(ref.message_id)
const row = db.prepare("SELECT room_id, event_id FROM event_message INNER JOIN channel_room USING (channel_id) WHERE channel_id = ? AND message_id = ?").get(ref.channel_id, ref.message_id)
if (!row) return []
const event = await di.api.getEvent(row.room_id, row.event_id)
const eventID = db.prepare("SELECT event_id FROM event_message WHERE message_id = ?").pluck().get(ref.message_id)
const roomID = db.prepare("SELECT room_id FROM channel_room WHERE channel_id = ?").pluck().get(ref.channel_id)
if (!eventID || !roomID) return []
const event = await di.api.getEvent(roomID, eventID)
return [{
...event.content,
$type: event.type
@ -118,7 +119,7 @@ async function messageToEvent(message, guild, options = {}, di) {
// Mentions scenarios 1 and 2, part A. i.e. translate relevant message.mentions to m.mentions
// (Still need to do scenarios 1 and 2 part B, and scenario 3.)
if (message.type === DiscordTypes.MessageType.Reply && message.message_reference?.message_id) {
const row = db.prepare("SELECT event_id, room_id, source FROM event_message INNER JOIN channel_room USING (channel_id) WHERE message_id = ? AND part = 0").get(message.message_reference.message_id)
const row = db.prepare("SELECT event_id, room_id, source FROM event_message INNER JOIN message_channel USING (message_id) INNER JOIN channel_room USING (channel_id) WHERE message_id = ? AND part = 0").get(message.message_reference.message_id)
if (row) {
repliedToEventId = row.event_id
repliedToEventRoomId = row.room_id
@ -144,9 +145,10 @@ async function messageToEvent(message, guild, options = {}, di) {
if (message.content) {
let content = message.content
content = content.replace(/https:\/\/(?:ptb\.|canary\.|www\.)?discord(?:app)?\.com\/channels\/([0-9]+)\/([0-9]+)\/([0-9]+)/, (whole, guildID, channelID, messageID) => {
const row = db.prepare("SELECT room_id, event_id FROM event_message INNER JOIN channel_room USING (channel_id) WHERE channel_id = ? AND message_id = ? AND part = 0").get(channelID, messageID)
if (row) {
return `https://matrix.to/#/${row.room_id}/${row.event_id}`
const eventID = db.prepare("SELECT event_id FROM event_message WHERE message_id = ?").pluck().get(messageID)
const roomID = db.prepare("SELECT room_id FROM channel_room WHERE channel_id = ?").pluck().get(channelID)
if (eventID && roomID) {
return `https://matrix.to/#/${roomID}/${eventID}`
} else {
return `${whole} [event not found]`
}

View file

@ -81,18 +81,18 @@ module.exports = {
async checkMissedMessages(client, guild) {
if (guild.unavailable) return
const bridgedChannels = db.prepare("SELECT channel_id FROM channel_room").pluck().all()
const prepared = db.prepare("SELECT message_id FROM event_message WHERE channel_id = ? AND message_id = ?").pluck()
const prepared = db.prepare("SELECT 1 FROM event_message WHERE message_id = ?").pluck()
for (const channel of guild.channels.concat(guild.threads)) {
if (!bridgedChannels.includes(channel.id)) continue
if (!channel.last_message_id) continue
const latestWasBridged = prepared.get(channel.id, channel.last_message_id)
const latestWasBridged = prepared.get(channel.last_message_id)
if (latestWasBridged) continue
/** More recent messages come first. */
console.log(`[check missed messages] in ${channel.id} (${guild.name} / ${channel.name}) because its last message ${channel.last_message_id} is not in the database`)
const messages = await client.snow.channel.getChannelMessages(channel.id, {limit: 50})
let latestBridgedMessageIndex = messages.findIndex(m => {
return prepared.get(channel.id, m.id)
return prepared.get(m.id)
})
console.log(`[check missed messages] got ${messages.length} messages; last message that IS bridged is at position ${latestBridgedMessageIndex} in the channel`)
if (latestBridgedMessageIndex === -1) latestBridgedMessageIndex = 1 // rather than crawling the ENTIRE channel history, let's just bridge the most recent 1 message to make it up to date.