diff --git a/d2m/discord-packets.js b/d2m/discord-packets.js index 4d6e9fe..b8f8eec 100644 --- a/d2m/discord-packets.js +++ b/d2m/discord-packets.js @@ -68,6 +68,24 @@ const utils = { guild.stickers = message.d.stickers } + } else if (message.t === "GUILD_ROLE_CREATE" || message.t === "GUILD_ROLE_UPDATE" || message.t === "GUILD_ROLE_DELETE") { + const guild = client.guilds.get(message.d.guild_id) + /** Delete this in case of UPDATE or DELETE */ + const targetID = "role_id" in message.d ? message.d.role_id : message.d.role.id + /** Add this in case of CREATE or UPDATE */ + const newRoles = [] + if ("role" in message.d) newRoles.push(message.d.role) + if (guild) { + const targetIndex = guild.roles.findIndex(r => r.id === targetID) + if (targetIndex !== -1) { + // Role already exists. Delete it and maybe replace it. + guild.roles.splice(targetIndex, 1, ...newRoles) + } else { + // Role doesn't already exist. + guild.roles.push(...newRoles) + } + } + } else if (message.t === "THREAD_CREATE") { client.channels.set(message.d.id, message.d) diff --git a/docs/notes.md b/docs/notes.md index c908476..4ed8dc5 100644 --- a/docs/notes.md +++ b/docs/notes.md @@ -125,13 +125,31 @@ Can use custom transaction ID (?) to send the original timestamps to Matrix. See 3. Build replacement event with fallbacks. 4. Send to matrix. -## Reaction added +## Reaction added/removed/emoji removed/all removed -1. Add reaction on matrix. +m->d reactions will have to be sent as the bridge bot since webhooks cannot add reactions. This also means Discord users can't tell who reacted. We will have to tolerate this. -## Reaction removed +Database storage requirements for each kind of event: -1. Remove reaction on matrix. Just redact the event. +**Added** + +N/A + +**Removed d->m** + +Need to know the event ID of the reaction event so we can redact it. We can look it up with `/v1/rooms/!x/relations/$x/m.annotation`. (If the message was edited, use its original event ID in the query.) This gets all event details for all reactions from the homeserver. + +If it is a custom emoji, we will need to use the existing `emoji` table to resolve the emoji ID to the key. + +Then we can pick the one to redact based on the `key` and `sender` and redact it. + +This also works for _remove emoji_ and _remove all_. + +**Removed m->d** + +Need to know the discord ID of the message that was reacted to. If we know the event ID of what was reacted to, we can look up the Discord ID in the usual database. Unfortunately, after a reaction has been redacted, it's already too late to look up which event it was redacted from. + +So we do need a database table. It will only hold reactions that were sent by Matrix users and were successfully bridged. It will associate the reaction event ID with the Discord message ID it was reacted on (skipping the middleman). ## Member data changed diff --git a/m2d/converters/utils.js b/m2d/converters/utils.js index 9689252..4f74c6b 100644 --- a/m2d/converters/utils.js +++ b/m2d/converters/utils.js @@ -2,6 +2,12 @@ const reg = require("../../matrix/read-registration") const userRegex = reg.namespaces.users.map(u => new RegExp(u.regex)) +const assert = require("assert").strict +/** @type {import("xxhash-wasm").XXHashAPI} */ // @ts-ignore +let hasher = null +// @ts-ignore +require("xxhash-wasm")().then(h => hasher = h) + /** * Determine whether an event is the bridged representation of a discord message. * Such messages shouldn't be bridged again. @@ -29,5 +35,25 @@ function getPublicUrlForMxc(mxc) { else return null } +/** + * Event IDs are really big and have more entropy than we need. + * If we want to store the event ID in the database, we can store a more compact version by hashing it with this. + * Choosing a 64-bit non-cryptographic hash as only a 32-bit hash will see birthday collisions unreasonably frequently: https://en.wikipedia.org/wiki/Birthday_attack#Mathematics + * xxhash outputs an unsigned 64-bit integer. + * Converting to a signed 64-bit integer with no bit loss so that it can be stored in an SQLite integer field as-is: https://www.sqlite.org/fileformat2.html#record_format + * This should give very efficient storage with sufficient entropy. + * @param {string} eventID + */ +function getEventIDHash(eventID) { + assert(hasher, "xxhash is not ready yet") + if (eventID[0] === "$" && eventID.length >= 13) { + eventID = eventID.slice(1) // increase entropy per character to potentially help xxhash + } + const unsignedHash = hasher.h64(eventID) + const signedHash = unsignedHash - 0x8000000000000000n // shifting down to signed 64-bit range + return signedHash +} + module.exports.eventSenderIsFromDiscord = eventSenderIsFromDiscord module.exports.getPublicUrlForMxc = getPublicUrlForMxc +module.exports.getEventIDHash = getEventIDHash diff --git a/package-lock.json b/package-lock.json index e561c24..6071d1e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,8 @@ "prettier-bytes": "^1.0.4", "snowtransfer": "^0.8.0", "try-to-catch": "^3.0.1", - "turndown": "^7.1.2" + "turndown": "^7.1.2", + "xxhash-wasm": "^1.0.2" }, "devDependencies": { "@types/node": "^18.16.0", @@ -3339,6 +3340,11 @@ "integrity": "sha512-23LJhkIw940uTcDFyJZmNyO0z8lEINOTGCr4vR5YCG3urkdXwduRIhivBm9wKaVynLHYvxoHHYbKsDiafCLp6w==", "dev": true }, + "node_modules/xxhash-wasm": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/xxhash-wasm/-/xxhash-wasm-1.0.2.tgz", + "integrity": "sha512-ibF0Or+FivM9lNrg+HGJfVX8WJqgo+kCLDc4vx6xMeTce7Aj+DLttKbxxRR/gNLSAelRc1omAPlJ77N/Jem07A==" + }, "node_modules/y18n": { "version": "5.0.8", "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz", diff --git a/package.json b/package.json index a8796b3..1b92aaa 100644 --- a/package.json +++ b/package.json @@ -30,7 +30,8 @@ "prettier-bytes": "^1.0.4", "snowtransfer": "^0.8.0", "try-to-catch": "^3.0.1", - "turndown": "^7.1.2" + "turndown": "^7.1.2", + "xxhash-wasm": "^1.0.2" }, "devDependencies": { "@types/node": "^18.16.0", diff --git a/readme.md b/readme.md index 6d623ae..aac392b 100644 --- a/readme.md +++ b/readme.md @@ -5,6 +5,7 @@ Modern Matrix-to-Discord appservice bridge. ## Why a new bridge? * Modern: Supports new Discord features like replies, threads and stickers, and new Matrix features like edits, spaces and space membership. +* Efficient: Special attention has been given to memory usage, database indexes, disk footprint, runtime algorithms, and queries to the homeserver. * Reliable: Any errors on either side are notified on Matrix and can be retried. * Tested: A test suite and code coverage make sure all the core logic works. * Simple development: No build step (it's JavaScript, not TypeScript), minimal/lightweight dependencies, and abstraction only where necessary so that less background knowledge is required. No need to learn about Intents or library functions. @@ -39,6 +40,14 @@ Most features you'd expect in both directions, plus a little extra spice: * This bridge is not designed for puppetting. * Some aspects of this bridge are customised for my homeserver. I'm working over time to make it more general. Please please reach out to @cadence:cadence.moe if you would like to run this, and I'll work with you to get it running! +## Efficiency details + +Using WeatherStack as a thin layer between the bridge application and the Discord API lets us control exactly what data is cached. Only necessary information is cached. For example, member data, user data, message content, and past edits are never stored in memory. This keeps the memory usage low and also prevents it ballooning in size over the bridge's runtime. + +The bridge uses a small SQLite database to store relationships like which Discord messages correspond to which Matrix messages. This is so the bridge knows what to edit when some message is edited on Discord. Using `without rowid` on the database tables stores the index and the data in the same B-tree. Since Matrix and Discord's internal IDs are quite long, this vastly reduces storage space because those IDs do not have to be stored twice separately. On my personal instance of OOYE, every 100,000 messages sent require only 17.7 MB of storage space in the SQLite database. + +Only necessary data and columns are queried from the database. We only contact the homeserver API if the database doesn't contain what we need. + # Development information ## You will need