From 369e0862e52709d4f8ab9618ba66b84e1713c555 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 14 Feb 2024 11:04:54 +1300 Subject: [PATCH 1/3] m->d: Fix reply previews saying undefined --- m2d/converters/event-to-message.js | 45 +++++++++--- m2d/converters/event-to-message.test.js | 94 +++++++++++++++++++++++++ test/ooye-test-data.sql | 3 +- 3 files changed, 130 insertions(+), 12 deletions(-) diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index ce5cfcd..8b601a4 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -522,17 +522,40 @@ async function eventToMessage(event, guild, di) { } else if (repliedToEvent.unsigned?.redacted_because) { contentPreview = " (in reply to a deleted message)" } else { - const repliedToContent = repliedToEvent.content.formatted_body || repliedToEvent.content.body - const contentPreviewChunks = chunk( - entities.decodeHTML5Strict( // Remove entities like & " - repliedToContent.replace(/.*<\/mx-reply>/s, "") // Remove everything before replies, so just use the actual message body - .replace(/^\s*
.*?<\/blockquote>(.....)/s, "$1") // If the message starts with a blockquote, don't count it and use the message body afterwards - .replace(/(?:\n|
)+/g, " ") // Should all be on one line - .replace(/]*data-mx-spoiler\b[^>]*>.*?<\/span>/g, "[spoiler]") // Good enough method of removing spoiler content. (I don't want to break out the HTML parser unless I have to.) - .replace(/<[^>]+>/g, "") // Completely strip all HTML tags and formatting. - ), 50) - contentPreview = ":\n> " + contentPreviewChunks[0] - if (contentPreviewChunks.length > 1) contentPreview = contentPreview.replace(/[,.']$/, "") + "..." + // Generate a reply preview for a standard message + /** @type {string} */ + let repliedToContent = repliedToEvent.content.formatted_body || repliedToEvent.content.body + repliedToContent = repliedToContent.replace(/.*<\/mx-reply>/s, "") // Remove everything before replies, so just use the actual message body + repliedToContent = repliedToContent.replace(/^\s*
.*?<\/blockquote>(.....)/s, "$1") // If the message starts with a blockquote, don't count it and use the message body afterwards + repliedToContent = repliedToContent.replace(/(?:\n|
)+/g, " ") // Should all be on one line + repliedToContent = repliedToContent.replace(/]*data-mx-spoiler\b[^>]*>.*?<\/span>/g, "[spoiler]") // Good enough method of removing spoiler content. (I don't want to break out the HTML parser unless I have to.) + repliedToContent = repliedToContent.replace(/]*)>/g, (_, att) => { // Convert Matrix emoji images into Discord emoji markdown + if (!att.includes("data-mx-emoticon")) return "" + // Try to get the equivalent Discord emoji, if there is a src and if we know about it + const mxcUrlMatch = att.match(/\bsrc="(mxc:\/\/[^"]+)"/) + if (mxcUrlMatch) { + const row = select("emoji", ["emoji_id", "name", "animated"], {mxc_url: mxcUrlMatch[1]}).get() + if (row) { + const animatedChar = row.animated ? "a" : "" + return `<${animatedChar}:${row.name}:${row.emoji_id}>` + } + } + // Emoji is unknown or inaccessible, try substituting the title text instead + const titleTextMatch = att.match(/\btitle=":?([^:"]+)/) + if (titleTextMatch) return `:${titleTextMatch[1]}:` + // Otherwise we can't use the emoji. + return "" + }) + repliedToContent = repliedToContent.replace(/<[^:>][^>]*>/g, "") // Completely strip all HTML tags and formatting. + repliedToContent = entities.decodeHTML5Strict(repliedToContent) // Remove entities like & " + const contentPreviewChunks = chunk(repliedToContent, 50) + if (contentPreviewChunks.length) { + contentPreview = ":\n> " + contentPreviewChunks[0] + if (contentPreviewChunks.length > 1) contentPreview = contentPreview.replace(/[,.']$/, "") + "..." + } else { + console.log("Unable to generate reply preview for this replied-to event because we stripped all of it:", repliedToEvent) + contentPreview = "" + } } replyLine = `> ${replyLine}${contentPreview}\n` })() diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index fb21098..2c42c3e 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -1292,6 +1292,100 @@ test("event2message: entities are not escaped in main message or reply preview", ) }) +test("event2message: reply preview converts emoji formatting when replying to a known custom emoji", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> :hippo:\n\nreply", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
\":hippo:\"
reply", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs" + } + } + }, + event_id: "$bCMLaLiMfoRajaGTgzaxAci-g8hJfkspVJIKwYktnvc", + room_id: "!TqlyQmifxGUggEmdBN:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!TqlyQmifxGUggEmdBN:cadence.moe", "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: ":hippo:", + format: "org.matrix.custom.html", + formatted_body: "\":hippo:\"" + } + }) + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" + + "\n> <:hippo:230201364309868544>" + + "\nreply", + avatar_url: undefined + }] + } + ) +}) + +test("event2message: reply preview uses emoji title text when replying to an unknown custom emoji", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> :hippo:\n\nreply", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
\":hippo:\"
reply", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs" + } + } + }, + event_id: "$bCMLaLiMfoRajaGTgzaxAci-g8hJfkspVJIKwYktnvc", + room_id: "!TqlyQmifxGUggEmdBN:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!TqlyQmifxGUggEmdBN:cadence.moe", "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: ":hippo:", + format: "org.matrix.custom.html", + formatted_body: "\":hippo:\"" + } + }) + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" + + "\n> :hippo:" + + "\nreply", + avatar_url: undefined + }] + } + ) +}) + test("event2message: editing a rich reply to a sim user", async t => { const eventsFetched = [] t.deepEqual( diff --git a/test/ooye-test-data.sql b/test/ooye-test-data.sql index 023d5bb..b41d609 100644 --- a/test/ooye-test-data.sql +++ b/test/ooye-test-data.sql @@ -117,7 +117,8 @@ INSERT INTO member_cache (room_id, mxid, displayname, avatar_url) VALUES ('!CzvdIdUQXgUjDVKxeU:cadence.moe', '@cadence:cadence.moe', 'cadence [they]', 'mxc://cadence.moe/azCAhThKTojXSZJRoWwZmhvU'), ('!cBxtVRxDlZvSVhJXVK:cadence.moe', '@Milan:tchncs.de', 'Milan', NULL), ('!TqlyQmifxGUggEmdBN:cadence.moe', '@ampflower:matrix.org', 'Ampflower 🌺', 'mxc://cadence.moe/PRfhXYBTOalvgQYtmCLeUXko'), -('!TqlyQmifxGUggEmdBN:cadence.moe', '@aflower:syndicated.gay', 'Rose', 'mxc://syndicated.gay/ZkBUPXCiXTjdJvONpLJmcbKP'); +('!TqlyQmifxGUggEmdBN:cadence.moe', '@aflower:syndicated.gay', 'Rose', 'mxc://syndicated.gay/ZkBUPXCiXTjdJvONpLJmcbKP'), +('!TqlyQmifxGUggEmdBN:cadence.moe', '@cadence:cadence.moe', 'cadence [they]', NULL); INSERT INTO lottie (sticker_id, mxc_url) VALUES ('860171525772279849', 'mxc://cadence.moe/ZtvvVbwMIdUZeovWVyGVFCeR'); From 67939860b231068f033800a8a58ae5393392a493 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 14 Feb 2024 11:32:07 +1300 Subject: [PATCH 2/3] Consolidate convertEmoji function --- m2d/converters/event-to-message.js | 86 ++++++++++++++----------- m2d/converters/event-to-message.test.js | 51 ++++++++++++++- 2 files changed, 97 insertions(+), 40 deletions(-) diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index 8b601a4..6ddbd26 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -162,32 +162,8 @@ turndownService.addRule("emoji", { replacement: function (content, node) { const mxcUrl = node.getAttribute("src") - // Get the known emoji from the database. (We may not be able to actually use this if it was from another server.) - const row = select("emoji", ["emoji_id", "name", "animated"], {mxc_url: mxcUrl}).get() - // Also guess a suitable emoji based on the ID (if available) or name - let guess = null const guessedName = node.getAttribute("title").replace(/^:|:$/g, "") - for (const guild of discord.guilds.values()) { - /** @type {{name: string, id: string, animated: number}[]} */ - // @ts-ignore - const emojis = guild.emojis - const match = emojis.find(e => e.id === row?.emoji_id) || emojis.find(e => e.name === guessedName) || emojis.find(e => e.name?.toLowerCase() === guessedName.toLowerCase()) - if (match) { - guess = match - break - } - } - if (guess) { - // We know an emoji, and we can use it - const animatedChar = guess.animated ? "a" : "" - return `<${animatedChar}:${guess.name}:${guess.id}>` - } else if (endOfMessageEmojis.includes(mxcUrl)) { - // We can't locate or use a suitable emoji. After control returns, it will rewind over this, delete this section, and upload the emojis as a sprite sheet. - return `<::>` - } else { - // We prefer not to upload this as a sprite sheet because the emoji is not at the end of the message, it is in the middle. - return `[${node.getAttribute("title")}](${mxUtils.getPublicUrlForMxc(mxcUrl)})` - } + return convertEmoji(mxcUrl, guessedName, true, true) } }) @@ -216,6 +192,52 @@ turndownService.addRule("fencedCodeBlock", { } }) +/** + * @param {string | null} mxcUrl + * @param {string | null} nameForGuess without colons + * @param {boolean} allowSpriteSheetIndicator + * @param {boolean} allowLink + * @returns {string} discord markdown that represents the custom emoji in some form + */ +function convertEmoji(mxcUrl, nameForGuess, allowSpriteSheetIndicator, allowLink) { + // Get the known emoji from the database. + let row + if (mxcUrl) row = select("emoji", ["emoji_id", "name", "animated"], {mxc_url: mxcUrl}).get() + if (!row && nameForGuess) { + // We don't know the emoji, but we could guess a suitable emoji based on the name + const nameForGuessLower = nameForGuess.toLowerCase() + for (const guild of discord.guilds.values()) { + /** @type {{name: string, id: string, animated: number}[]} */ + // @ts-ignore + const emojis = guild.emojis + const found = emojis.find(e => e.name?.toLowerCase() === nameForGuessLower) + if (found) { + row = { + animated: found.animated, + emoji_id: found.id, + name: found.name + } + break + } + } + } + if (row) { + // We know an emoji, and we can use it + const animatedChar = row.animated ? "a" : "" + return `<${animatedChar}:${row.name}:${row.emoji_id}>` + } else if (allowSpriteSheetIndicator && mxcUrl && endOfMessageEmojis.includes(mxcUrl)) { + // We can't locate or use a suitable emoji. After control returns, it will rewind over this, delete this section, and upload the emojis as a sprite sheet. + return `<::>` + } else if (allowLink && mxcUrl && nameForGuess) { + // We prefer not to upload this as a sprite sheet because the emoji is not at the end of the message, it is in the middle. + return `[:${nameForGuess}:](${mxUtils.getPublicUrlForMxc(mxcUrl)})` + } else if (nameForGuess) { + return `:${nameForGuess}:` + } else { + return "" + } +} + /** * @param {string} roomID * @param {string} mxid @@ -530,21 +552,9 @@ async function eventToMessage(event, guild, di) { repliedToContent = repliedToContent.replace(/(?:\n|
)+/g, " ") // Should all be on one line repliedToContent = repliedToContent.replace(/]*data-mx-spoiler\b[^>]*>.*?<\/span>/g, "[spoiler]") // Good enough method of removing spoiler content. (I don't want to break out the HTML parser unless I have to.) repliedToContent = repliedToContent.replace(/]*)>/g, (_, att) => { // Convert Matrix emoji images into Discord emoji markdown - if (!att.includes("data-mx-emoticon")) return "" - // Try to get the equivalent Discord emoji, if there is a src and if we know about it const mxcUrlMatch = att.match(/\bsrc="(mxc:\/\/[^"]+)"/) - if (mxcUrlMatch) { - const row = select("emoji", ["emoji_id", "name", "animated"], {mxc_url: mxcUrlMatch[1]}).get() - if (row) { - const animatedChar = row.animated ? "a" : "" - return `<${animatedChar}:${row.name}:${row.emoji_id}>` - } - } - // Emoji is unknown or inaccessible, try substituting the title text instead const titleTextMatch = att.match(/\btitle=":?([^:"]+)/) - if (titleTextMatch) return `:${titleTextMatch[1]}:` - // Otherwise we can't use the emoji. - return "" + return convertEmoji(mxcUrlMatch?.[1], titleTextMatch?.[1], false, false) }) repliedToContent = repliedToContent.replace(/<[^:>][^>]*>/g, "") // Completely strip all HTML tags and formatting. repliedToContent = entities.decodeHTML5Strict(repliedToContent) // Remove entities like & " diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index 2c42c3e..7cc0fdd 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -1339,7 +1339,7 @@ test("event2message: reply preview converts emoji formatting when replying to a ) }) -test("event2message: reply preview uses emoji title text when replying to an unknown custom emoji", async t => { +test("event2message: reply preview can guess custom emoji based on the name if it is unknown", async t => { t.deepEqual( await eventToMessage({ type: "m.room.message", @@ -1378,7 +1378,54 @@ test("event2message: reply preview uses emoji title text when replying to an unk messagesToSend: [{ username: "cadence [they]", content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" - + "\n> :hippo:" + + "\n> <:hippo:230201364309868544>" + + "\nreply", + avatar_url: undefined + }] + } + ) +}) + +test("event2message: reply preview uses emoji title text when replying to an unknown custom emoji", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> :svkftngur_gkdne:\n\nreply", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
\":svkftngur_gkdne:\"
reply", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs" + } + } + }, + event_id: "$bCMLaLiMfoRajaGTgzaxAci-g8hJfkspVJIKwYktnvc", + room_id: "!TqlyQmifxGUggEmdBN:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!TqlyQmifxGUggEmdBN:cadence.moe", "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: ":svkftngur_gkdne:", + format: "org.matrix.custom.html", + formatted_body: "\":svkftngur_gkdne:\"" + } + }) + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" + + "\n> :svkftngur_gkdne:" + "\nreply", avatar_url: undefined }] From 310bf2282cc757bedace903fd4705f5f12fce48d Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Wed, 14 Feb 2024 11:39:50 +1300 Subject: [PATCH 3/3] Code coverage for silly reply previews --- m2d/converters/event-to-message.test.js | 93 +++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index 7cc0fdd..2b354e0 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -1433,6 +1433,99 @@ test("event2message: reply preview uses emoji title text when replying to an unk ) }) +test("event2message: reply preview ignores garbage image", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> I am having AAAA a nice day\n\nreply", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
I am having a nice day
reply", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs" + } + } + }, + event_id: "$bCMLaLiMfoRajaGTgzaxAci-g8hJfkspVJIKwYktnvc", + room_id: "!TqlyQmifxGUggEmdBN:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!TqlyQmifxGUggEmdBN:cadence.moe", "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "I am having AAAA a nice day", + format: "org.matrix.custom.html", + formatted_body: "I am having a nice day" + } + }) + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" + + "\n> I am having a nice day" + + "\nreply", + avatar_url: undefined + }] + } + ) +}) + +test("event2message: reply to empty message doesn't show an extra line or anything", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> \n\nreply", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
reply", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs" + } + } + }, + event_id: "$bCMLaLiMfoRajaGTgzaxAci-g8hJfkspVJIKwYktnvc", + room_id: "!TqlyQmifxGUggEmdBN:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!TqlyQmifxGUggEmdBN:cadence.moe", "$zmO-dtPO6FubBkDxJZ5YmutPIsG1RgV5JJku-9LeGWs", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "", + format: "org.matrix.custom.html", + formatted_body: "" + } + }) + } + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**" + + "\nreply", + avatar_url: undefined + }] + } + ) +}) + test("event2message: editing a rich reply to a sim user", async t => { const eventsFetched = [] t.deepEqual(