adding some more notes

This commit is contained in:
Cadence Ember 2023-09-22 17:47:36 +12:00
parent a4c2d62331
commit 5dbcc74617
6 changed files with 84 additions and 6 deletions

View file

@ -2,6 +2,12 @@
const reg = require("../../matrix/read-registration")
const userRegex = reg.namespaces.users.map(u => new RegExp(u.regex))
const assert = require("assert").strict
/** @type {import("xxhash-wasm").XXHashAPI} */ // @ts-ignore
let hasher = null
// @ts-ignore
require("xxhash-wasm")().then(h => hasher = h)
/**
* Determine whether an event is the bridged representation of a discord message.
* Such messages shouldn't be bridged again.
@ -29,5 +35,25 @@ function getPublicUrlForMxc(mxc) {
else return null
}
/**
* Event IDs are really big and have more entropy than we need.
* If we want to store the event ID in the database, we can store a more compact version by hashing it with this.
* Choosing a 64-bit non-cryptographic hash as only a 32-bit hash will see birthday collisions unreasonably frequently: https://en.wikipedia.org/wiki/Birthday_attack#Mathematics
* xxhash outputs an unsigned 64-bit integer.
* Converting to a signed 64-bit integer with no bit loss so that it can be stored in an SQLite integer field as-is: https://www.sqlite.org/fileformat2.html#record_format
* This should give very efficient storage with sufficient entropy.
* @param {string} eventID
*/
function getEventIDHash(eventID) {
assert(hasher, "xxhash is not ready yet")
if (eventID[0] === "$" && eventID.length >= 13) {
eventID = eventID.slice(1) // increase entropy per character to potentially help xxhash
}
const unsignedHash = hasher.h64(eventID)
const signedHash = unsignedHash - 0x8000000000000000n // shifting down to signed 64-bit range
return signedHash
}
module.exports.eventSenderIsFromDiscord = eventSenderIsFromDiscord
module.exports.getPublicUrlForMxc = getPublicUrlForMxc
module.exports.getEventIDHash = getEventIDHash