From 999276e4073ca0d941f4e7f936ececf94aa907da Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Fri, 13 Oct 2023 23:23:15 +1300 Subject: [PATCH] m->d: Fix HTML entities showing in reply preview --- m2d/converters/event-to-message.js | 13 ++++--- m2d/converters/event-to-message.test.js | 49 +++++++++++++++++++++++++ package-lock.json | 12 ++++++ package.json | 1 + readme.md | 1 + 5 files changed, 71 insertions(+), 5 deletions(-) diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index ff31607..e7bbda5 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -5,6 +5,7 @@ const DiscordTypes = require("discord-api-types/v10") const chunk = require("chunk-text") const TurndownService = require("turndown") const assert = require("assert").strict +const entities = require("entities") const passthrough = require("../../passthrough") const {sync, db, discord, select, from} = passthrough @@ -349,11 +350,13 @@ async function eventToMessage(event, guild, di) { } else { const repliedToContent = repliedToEvent.content.formatted_body || repliedToEvent.content.body const contentPreviewChunks = chunk( - repliedToContent.replace(/.*<\/mx-reply>/, "") // Remove everything before replies, so just use the actual message body - .replace(/
.*?<\/blockquote>/, "") // If the message starts with a blockquote, don't count it and use the message body afterwards - .replace(/(?:\n|
)+/g, " ") // Should all be on one line - .replace(/]*data-mx-spoiler\b[^>]*>.*?<\/span>/g, "[spoiler]") // Good enough method of removing spoiler content. (I don't want to break out the HTML parser unless I have to.) - .replace(/<[^>]+>/g, ""), 50) // Completely strip all other formatting. + entities.decodeHTML5Strict( // Remove entities like & " + repliedToContent.replace(/.*<\/mx-reply>/, "") // Remove everything before replies, so just use the actual message body + .replace(/
.*?<\/blockquote>/, "") // If the message starts with a blockquote, don't count it and use the message body afterwards + .replace(/(?:\n|
)+/g, " ") // Should all be on one line + .replace(/]*data-mx-spoiler\b[^>]*>.*?<\/span>/g, "[spoiler]") // Good enough method of removing spoiler content. (I don't want to break out the HTML parser unless I have to.) + .replace(/<[^>]+>/g, "") // Completely strip all HTML tags and formatting. + ), 50) contentPreview = ":\n> " contentPreview += contentPreviewChunks.length > 1 ? contentPreviewChunks[0] + "..." : contentPreviewChunks[0] } diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index 41d63b2..1a1c3f0 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -813,6 +813,55 @@ test("event2message: should include a reply preview when message ends with a blo ) }) +test("event2message: entities are not escaped in main message or reply preview", async t => { + // Intended result: Testing? in italics, followed by the sequence "':.`[]&things + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "> <@cadence:cadence.moe> _Testing?_ \"':.`[]&things\n\n_Testing?_ \"':.`[]&things", + format: "org.matrix.custom.html", + formatted_body: "
In reply to @cadence:cadence.moe
Testing? \"':.`[]&things
Testing? "':.`[]&things", + "m.relates_to": { + "m.in_reply_to": { + event_id: "$yIWjZPi6Xk56fBxJwqV4ANs_hYLjnWI2cNKbZ2zwk60" + } + } + }, + event_id: "$2I7odT9okTdpwDcqOjkJb_A3utdO4V8Cp3LK6-Rvwcs", + room_id: "!fGgIymcYWOqjbSRUdV:cadence.moe" + }, data.guild.general, { + api: { + getEvent: mockGetEvent(t, "!fGgIymcYWOqjbSRUdV:cadence.moe", "$yIWjZPi6Xk56fBxJwqV4ANs_hYLjnWI2cNKbZ2zwk60", { + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + "msgtype": "m.text", + "body": "_Testing?_ \"':.`[]&things", + "format": "org.matrix.custom.html", + "formatted_body": "Testing? "':.`[]&things" + }, + event_id: "$yIWjZPi6Xk56fBxJwqV4ANs_hYLjnWI2cNKbZ2zwk60", + room_id: "!fGgIymcYWOqjbSRUdV:cadence.moe" + }) + } + }), + { + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "> <:L1:1144820033948762203><:L2:1144820084079087647>Ⓜ️**cadence [they]**:" + + "\n> Testing? \"':.`[]&things" + + "\n_Testing?_ \"':.\\`\\[\\]&things", + avatar_url: "https://matrix.cadence.moe/_matrix/media/r0/download/cadence.moe/azCAhThKTojXSZJRoWwZmhvU" + }] + } + ) +}) + test("event2message: editing a rich reply to a sim user", async t => { const eventsFetched = [] t.deepEqual( diff --git a/package-lock.json b/package-lock.json index d1ed09e..3847ee1 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ "chunk-text": "^2.0.1", "cloudstorm": "^0.8.0", "discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#abc56d544072a1dc5624adfea455b0e902adf7b3", + "entities": "^4.5.0", "giframe": "github:cloudrac3r/giframe#v0.4.1", "heatsync": "^2.4.1", "js-yaml": "^4.1.0", @@ -1096,6 +1097,17 @@ "once": "^1.4.0" } }, + "node_modules/entities": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz", + "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, "node_modules/es-get-iterator": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/es-get-iterator/-/es-get-iterator-1.1.3.tgz", diff --git a/package.json b/package.json index e8eec83..6a9deea 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "chunk-text": "^2.0.1", "cloudstorm": "^0.8.0", "discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#abc56d544072a1dc5624adfea455b0e902adf7b3", + "entities": "^4.5.0", "giframe": "github:cloudrac3r/giframe#v0.4.1", "heatsync": "^2.4.1", "js-yaml": "^4.1.0", diff --git a/readme.md b/readme.md index 77239f7..5c97745 100644 --- a/readme.md +++ b/readme.md @@ -164,6 +164,7 @@ Follow these steps: * (1) discord-markdown: This is my fork! I make sure it does what I want. * (0) giframe: This is my fork! It should do what I want. * (1) heatsync: Module hot-reloader that I trust. +* (0) entities: Looks fine. No dependencies. * (1) js-yaml: It seems to do what I want, and it's already pulled in by matrix-appservice. * (70) matrix-appservice: I wish it didn't pull in express :( * (0) minimist: It's already pulled in by better-sqlite3->prebuild-install