From 5185ae45ab7ec7364b3160d65c26cc7dc2102173 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Mon, 4 Sep 2023 01:37:33 +1200 Subject: [PATCH] m->d fix a few markdown escaping problems --- m2d/converters/event-to-message.js | 43 +++++++++++++-- m2d/converters/event-to-message.test.js | 72 ++++++++++++++++++++++--- 2 files changed, 105 insertions(+), 10 deletions(-) diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index 1eb1be2..1e66c7a 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -22,13 +22,48 @@ const BLOCK_ELEMENTS = [ "TFOOT", "TH", "THEAD", "TR", "UL" ] +/** @type {[RegExp, string][]} */ +const markdownEscapes = [ + [/\\/g, '\\\\'], + [/\*/g, '\\*'], + [/^-/g, '\\-'], + [/^\+ /g, '\\+ '], + [/^(=+)/g, '\\$1'], + [/^(#{1,6}) /g, '\\$1 '], + [/`/g, '\\`'], + [/^~~~/g, '\\~~~'], + [/\[/g, '\\['], + [/\]/g, '\\]'], + [/^>/g, '\\>'], + [/_/g, '\\_'], + [/^(\d+)\. /g, '$1\\. '] + ] + const turndownService = new TurndownService({ hr: "----", headingStyle: "atx", preformattedCode: true, - codeBlockStyle: "fenced" + codeBlockStyle: "fenced", }) +/** + * Markdown characters in the HTML content need to be escaped, though take care not to escape the middle of bare links + * @param {string} string + */ +// @ts-ignore bad type from turndown +turndownService.escape = function (string) { + const escapedWords = string.split(" ").map(word => { + if (word.match(/^https?:\/\//)) { + return word + } else { + return markdownEscapes.reduce(function (accumulator, escape) { + return accumulator.replace(escape[0], escape[1]) + }, word) + } + }) + return escapedWords.join(" ") +} + turndownService.remove("mx-reply") turndownService.addRule("strikethrough", { @@ -67,7 +102,6 @@ turndownService.addRule("spoiler", { turndownService.addRule("inlineLink", { filter: function (node, options) { return ( - options.linkStyle === "inlined" && node.nodeName === "A" && node.getAttribute("href") ) @@ -275,8 +309,9 @@ async function eventToMessage(event, guild, di) { content = `* ${displayName} ${content}` } - // Markdown needs to be escaped - content = content.replace(/([*_~`#])/g, `\\$1`) + // Markdown needs to be escaped, though take care not to escape the middle of links + // @ts-ignore bad type from turndown + content = turndownService.escape(content) } } else if (event.type === "m.room.message" && (event.content.msgtype === "m.file" || event.content.msgtype === "m.video" || event.content.msgtype === "m.audio" || event.content.msgtype === "m.image")) { content = "" diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js index e9061b7..e7f28d4 100644 --- a/m2d/converters/event-to-message.test.js +++ b/m2d/converters/event-to-message.test.js @@ -64,11 +64,11 @@ test("event2message: body is used when there is no formatted_body", async t => { ) }) -test("event2message: any markdown in body is escaped", async t => { +test("event2message: any markdown in body is escaped, except strikethrough", async t => { t.deepEqual( await eventToMessage({ content: { - body: "testing **special** ~~things~~ which _should_ *not* `trigger` @any ", + body: "testing **special** ~~things~~ which _should_ *not* `trigger` @any , except strikethrough", msgtype: "m.text" }, event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", @@ -85,7 +85,67 @@ test("event2message: any markdown in body is escaped", async t => { messagesToEdit: [], messagesToSend: [{ username: "cadence [they]", - content: "testing \\*\\*special\\*\\* \\~\\~things\\~\\~ which \\_should\\_ \\*not\\* \\`trigger\\` @any ", + content: "testing \\*\\*special\\*\\* ~~things~~ which \\_should\\_ \\*not\\* \\`trigger\\` @any , except strikethrough", + avatar_url: undefined + }] + } + ) +}) + +test("event2message: links in formatted body are not broken", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "kyuugryphon I wonder what the midjourney text description of this photo is https://upload.wikimedia.org/wikipedia/commons/f/f3/After_gay_pride%2C_rainbow_flags_flying_along_Beach_Street_%2814853144744%29.jpg", + format: "org.matrix.custom.html", + formatted_body: "kyuugryphon I wonder what the midjourney text description of this photo is https://upload.wikimedia.org/wikipedia/commons/f/f3/After_gay_pride%2C_rainbow_flags_flying_along_Beach_Street_%2814853144744%29.jpg" + }, + origin_server_ts: 1693739630700, + unsigned: { + age: 39, + transaction_id: "m1693739630587.160" + }, + event_id: "$zANQGOdnHKZj48lrajojsejH86KNYST26imgb2Sw1Jg", + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe" + }), + { + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "<@111604486476181504> I wonder what the midjourney text description of this photo is https://upload.wikimedia.org/wikipedia/commons/f/f3/After_gay_pride%2C_rainbow_flags_flying_along_Beach_Street_%2814853144744%29.jpg", + avatar_url: undefined + }] + } + ) +}) + +test("event2message: links in plaintext body are not broken", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "I wonder what the midjourney text description of this photo is https://upload.wikimedia.org/wikipedia/commons/f/f3/After_gay_pride%2C_rainbow_flags_flying_along_Beach_Street_%2814853144744%29.jpg", + }, + origin_server_ts: 1693739630700, + unsigned: { + age: 39, + transaction_id: "m1693739630587.160" + }, + event_id: "$zANQGOdnHKZj48lrajojsejH86KNYST26imgb2Sw1Jg", + room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe" + }), + { + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "I wonder what the midjourney text description of this photo is https://upload.wikimedia.org/wikipedia/commons/f/f3/After_gay_pride%2C_rainbow_flags_flying_along_Beach_Street_%2814853144744%29.jpg", avatar_url: undefined }] } @@ -99,7 +159,7 @@ test("event2message: basic html is converted to markdown", async t => { msgtype: "m.text", body: "wrong body", format: "org.matrix.custom.html", - formatted_body: "this is a test of formatting" + formatted_body: "this is a test of formatting" }, event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU", origin_server_ts: 1688301929913, @@ -115,7 +175,7 @@ test("event2message: basic html is converted to markdown", async t => { messagesToEdit: [], messagesToSend: [{ username: "cadence [they]", - content: "this **is** a _**test** __of___ ~~formatting~~", + content: "this **is** a _**test** __of___ ~~_formatting_~~", avatar_url: undefined }] } @@ -449,7 +509,7 @@ test("event2message: m.emote plaintext works", async t => { messagesToEdit: [], messagesToSend: [{ username: "cadence [they]", - content: "\\* cadence [they] tests an m.emote message", + content: "\\* cadence \\[they\\] tests an m.emote message", avatar_url: undefined }] }