m->d: Fix message ID guess on plaintext events

This commit is contained in:
Cadence Ember 2023-12-02 17:13:10 +13:00
parent 4dcdd0287e
commit c8742f9512
3 changed files with 121 additions and 36 deletions

View file

@ -41,6 +41,7 @@ function encodeEmoji(input, shortcode) {
"%F0%9F%90%88", // 🐈
"%E2%9D%93", // ❓
"%F0%9F%8F%86", // 🏆️
"%F0%9F%93%9A", // 📚️
]
discordPreferredEncoding =

View file

@ -259,6 +259,62 @@ async function uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles)
return content
}
/**
* @param {string} input
* @param {{api: import("../../matrix/api")}} di simple-as-nails dependency injection for the matrix API
*/
async function handleRoomOrMessageLinks(input, di) {
let offset = 0
for (const match of [...input.matchAll(/("?https:\/\/matrix.to\/#\/(![^"/, ?)]+)(?:\/(\$[^"/ ?)]+))?(?:\?[^",:!? )]*)?)(">|[, )]|$)/g)]) {
assert(typeof match.index === "number")
const [_, attributeValue, roomID, eventID, endMarker] = match
let result
const resultType = endMarker === '">' ? "html" : "plain"
const MAKE_RESULT = {
ROOM_LINK: {
html: channelID => `${attributeValue}" data-channel-id="${channelID}">`,
plain: channelID => `<#${channelID}>${endMarker}`
},
MESSAGE_LINK: {
html: (guildID, channelID, messageID) => `${attributeValue}" data-channel-id="${channelID}" data-guild-id="${guildID}" data-message-id="${messageID}">`,
plain: (guildID, channelID, messageID) => `https://discord.com/channels/${guildID}/${channelID}/${messageID}${endMarker}`
}
}
// Don't process links that are part of the reply fallback, they'll be removed entirely by turndown
if (input.slice(match.index + match[0].length + offset).startsWith("In reply to")) continue
const channelID = select("channel_room", "channel_id", {room_id: roomID}).pluck().get()
if (!channelID) continue
if (!eventID) {
// 1: It's a room link, so <#link> to the channel
result = MAKE_RESULT.ROOM_LINK[resultType](channelID)
} else {
// Linking to a particular event with a discord.com/channels/guildID/channelID/messageID link
// Need to know the guildID and messageID
const guildID = discord.channels.get(channelID)?.["guild_id"]
if (!guildID) continue
const messageID = select("event_message", "message_id", {event_id: eventID}).pluck().get()
if (messageID) {
// 2: Linking to a known event
result = MAKE_RESULT.MESSAGE_LINK[resultType](guildID, channelID, messageID)
} else {
// 3: Linking to an unknown event that OOYE didn't originally bridge - we can guess messageID from the timestamp
const originalEvent = await di.api.getEvent(roomID, eventID)
if (!originalEvent) continue
const guessedMessageID = dUtils.timestampToSnowflakeInexact(originalEvent.origin_server_ts)
result = MAKE_RESULT.MESSAGE_LINK[resultType](guildID, channelID, guessedMessageID)
}
}
input = input.slice(0, match.index + offset) + result + input.slice(match.index + match[0].length + offset)
offset += result.length - match[0].length
}
return input
}
/**
* @param {Ty.Event.Outer_M_Room_Message | Ty.Event.Outer_M_Room_Message_File | Ty.Event.Outer_M_Sticker | Ty.Event.Outer_M_Room_Message_Encrypted_File} event
* @param {import("discord-api-types/v10").APIGuild} guild
@ -409,41 +465,7 @@ async function eventToMessage(event, guild, di) {
})
// Handling mentions of rooms and room-messages
let offset = 0
for (const match of [...input.matchAll(/("https:\/\/matrix.to\/#\/(![^"/?]+)(?:\/(\$[^"/?]+))?(?:\?[^"]*)?")>/g)]) {
assert(typeof match.index === "number")
const [_, attributeValue, roomID, eventID] = match
let result
// Don't process links that are part of the reply fallback, they'll be removed entirely by turndown
if (input.slice(match.index + match[0].length + offset).startsWith("In reply to")) continue
const channelID = select("channel_room", "channel_id", {room_id: roomID}).pluck().get()
if (!channelID) continue
if (!eventID) {
// 1: It's a room link, so <#link> to the channel
result = `${attributeValue} data-channel-id="${channelID}">`
} else {
// Linking to a particular event with a discord.com/channels/guildID/channelID/messageID link
// Need to know the guildID and messageID
const guildID = discord.channels.get(channelID)?.["guild_id"]
if (!guildID) continue
const messageID = select("event_message", "message_id", {event_id: eventID}).pluck().get()
if (messageID) {
// 2: Linking to a known event
result = `${attributeValue} data-channel-id="${channelID}" data-guild-id="${guildID}" data-message-id="${messageID}">`
} else {
// 3: Linking to an unknown event that OOYE didn't originally bridge - we can guess messageID from the timestamp
const originalEvent = await di.api.getEvent(roomID, eventID)
if (!originalEvent) continue
const guessedMessageID = dUtils.timestampToSnowflakeInexact(originalEvent.origin_server_ts)
result = `${attributeValue} data-channel-id="${channelID}" data-guild-id="${guildID}" data-message-id="${guessedMessageID}">`
}
}
input = input.slice(0, match.index + offset) + result + input.slice(match.index + match[0].length + offset)
offset += result.length - match[0].length
}
input = await handleRoomOrMessageLinks(input, di)
// Stripping colons after mentions
input = input.replace(/( data-user-id.*?<\/a>):?/g, "$1")
@ -504,6 +526,8 @@ async function eventToMessage(event, guild, di) {
content = `* ${displayName} ${content}`
}
content = await handleRoomOrMessageLinks(content, di)
// Markdown needs to be escaped, though take care not to escape the middle of links
// @ts-ignore bad type from turndown
content = turndownService.escape(content)

View file

@ -1725,7 +1725,67 @@ test("event2message: mentioning bridged rooms works", async t => {
)
})
test("event2message: mentioning known bridged events works", async t => {
test("event2message: mentioning known bridged events works (plaintext body)", async t => {
t.deepEqual(
await eventToMessage({
content: {
msgtype: "m.text",
body: "it was uploaded earlier in https://matrix.to/#/!CzvdIdUQXgUjDVKxeU:cadence.moe/$zXSlyI78DQqQwwfPUSzZ1b-nXzbUrCDljJgnGDdoI10?via=cadence.moe, take a look!"
},
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
origin_server_ts: 1688301929913,
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
sender: "@cadence:cadence.moe",
type: "m.room.message",
unsigned: {
age: 405299
}
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "it was uploaded earlier in https://discord.com/channels/497159726455455754/497161350934560778/1141619794500649020, take a look!",
avatar_url: undefined
}]
}
)
})
test("event2message: mentioning known bridged events works (partially formatted body)", async t => {
t.deepEqual(
await eventToMessage({
content: {
msgtype: "m.text",
body: "wrong body",
format: "org.matrix.custom.html",
formatted_body: `it was uploaded earlier in https://matrix.to/#/!CzvdIdUQXgUjDVKxeU:cadence.moe/$zXSlyI78DQqQwwfPUSzZ1b-nXzbUrCDljJgnGDdoI10?via=cadence.moe`
},
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
origin_server_ts: 1688301929913,
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
sender: "@cadence:cadence.moe",
type: "m.room.message",
unsigned: {
age: 405299
}
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "it was uploaded earlier in https://discord.com/channels/497159726455455754/497161350934560778/1141619794500649020",
avatar_url: undefined
}]
}
)
})
test("event2message: mentioning known bridged events works (formatted body)", async t => {
t.deepEqual(
await eventToMessage({
content: {