From 77b7772062a53e272c98c9ae822a8a7fe81e40e8 Mon Sep 17 00:00:00 2001 From: Cadence Ember Date: Mon, 19 Feb 2024 17:05:56 +1300 Subject: [PATCH] Upload code block in case of incompatible backtick --- m2d/converters/event-to-message.js | 49 ++++++++++--- m2d/converters/event-to-message.test.js | 92 +++++++++++++++++++++---- 2 files changed, 119 insertions(+), 22 deletions(-) diff --git a/m2d/converters/event-to-message.js b/m2d/converters/event-to-message.js index 7c86cfa..8710531 100644 --- a/m2d/converters/event-to-message.js +++ b/m2d/converters/event-to-message.js @@ -181,7 +181,7 @@ turndownService.addRule("fencedCodeBlock", { const className = node.firstChild.getAttribute("class") || "" const language = (className.match(/language-(\S+)/) || [null, ""])[1] const code = node.firstChild - const visibleCode = code.childNodes.map(c => c.nodeName === "BR" ? "\n" : c.textContent).join("").replace(/\n*$/g, "") + const visibleCode = getCodeContent(code) var fence = "```" @@ -193,6 +193,11 @@ turndownService.addRule("fencedCodeBlock", { } }) +/** @param {{ childNodes: Node[]; }} preCode the directly inside the
 */
+function getCodeContent(preCode) {
+	return preCode.childNodes.map(c => c.nodeName === "BR" ? "\n" : c.textContent).join("").replace(/\n*$/g, "")
+}
+
 /**
  * @param {string | null} mxcUrl
  * @param {string | null} nameForGuess without colons
@@ -308,7 +313,7 @@ async function uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles)
 	const buffer = await emojiSheet.compositeMatrixEmojis(endOfMessageEmojis)
 	// Attach it
 	const name = "emojis.png"
-	attachments.push({id: "0", name})
+	attachments.push({id: String(attachments.length), name})
 	pendingFiles.push({name, buffer})
 	return content
 }
@@ -393,6 +398,16 @@ async function checkWrittenMentions(content, guild, di) {
 	}
 }
 
+/**
+ * @param {Element} node
+ * @param {string[]} tagNames allcaps tag names
+ * @returns {any | undefined} the node you were checking for, or undefined
+ */
+function nodeIsChildOf(node, tagNames) {
+	// @ts-ignore
+	for (; node; node = node.parentNode) if (tagNames.includes(node.tagName)) return node
+}
+
 const attachmentEmojis = new Map([
 	["m.image", "🖼️"],
 	["m.video", "🎞️"],
@@ -651,19 +666,37 @@ async function eventToMessage(event, guild, di) {
 			const root = doc.getElementById("turndown-root");
 			async function forEachNode(node) {
 				for (; node; node = node.nextSibling) {
-					if (node.nodeType === 3 && node.nodeValue.includes("@")) {
+					// Check written mentions
+					if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) {
 						const result = await checkWrittenMentions(node.nodeValue, guild, di)
 						if (result) {
 							node.nodeValue = result.content
 							ensureJoined.push(result.ensureJoined)
 						}
 					}
-					if (node.nodeType === 1 && ["CODE", "PRE", "A"].includes(node.tagName)) {
-						// don't recurse into code or links
-					} else {
-						// do recurse into everything else
-						await forEachNode(node.firstChild)
+					// Check for incompatible backticks in code blocks
+					let preNode
+					if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) {
+						if (preNode.firstChild?.nodeName === "CODE") {
+							const ext = (preNode.firstChild.className.match(/language-(\S+)/) || [null, "txt"])[1]
+							const filename = `inline_code.${ext}`
+							// Build the replacement  node
+							const replacementCode = doc.createElement("code")
+							replacementCode.textContent = `[${filename}]`
+							// Build its containing  node
+							const replacement = doc.createElement("span")
+							replacement.appendChild(doc.createTextNode(" "))
+							replacement.appendChild(replacementCode)
+							replacement.appendChild(doc.createTextNode(" "))
+							// Replace the code block with the 
+							preNode.replaceWith(replacement)
+							// Upload the code as an attachment
+							const content = getCodeContent(preNode.firstChild)
+							attachments.push({id: String(attachments.length), filename})
+							pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")})
+						}
 					}
+					await forEachNode(node.firstChild)
 				}
 			}
 			await forEachNode(root)
diff --git a/m2d/converters/event-to-message.test.js b/m2d/converters/event-to-message.test.js
index 33cbdd8..d8926b6 100644
--- a/m2d/converters/event-to-message.test.js
+++ b/m2d/converters/event-to-message.test.js
@@ -517,21 +517,16 @@ test("event2message: code blocks work", async t => {
 test("event2message: code block contents are formatted correctly and not escaped", async t => {
 	t.deepEqual(
 		await eventToMessage({
-			"type": "m.room.message",
-			"sender": "@cadence:cadence.moe",
-			"content": {
-				"msgtype": "m.text",
-				"body": "wrong body",
-				"format": "org.matrix.custom.html",
-				"formatted_body": "
input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,\n_input_ = input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,\n
\n

input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,

\n" + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: "
input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,\n_input_ = input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,\n
\n

input = input.replace(/(<\\/?([^ >]+)[^>]*>)?\\n(<\\/?([^ >]+)[^>]*>)?/g,

\n" }, - "origin_server_ts": 1693031482275, - "unsigned": { - "age": 99, - "transaction_id": "m1693031482146.511" - }, - "event_id": "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s", - "room_id": "!BpMdOUkWWhFxmTrENV:cadence.moe" + event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s", + room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe" }), { ensureJoined: [], @@ -546,6 +541,75 @@ test("event2message: code block contents are formatted correctly and not escaped ) }) +test("event2message: code blocks are uploaded as attachments instead if they contain incompatible backticks", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: 'So if you run code like this
System.out.println("```");
it should print a markdown formatted code block' + }, + event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s", + room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe" + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "So if you run code like this `[inline_code.java]` it should print a markdown formatted code block", + attachments: [{id: "0", filename: "inline_code.java"}], + pendingFiles: [{name: "inline_code.java", buffer: Buffer.from('System.out.println("```");')}], + avatar_url: undefined + }] + } + ) +}) + +test("event2message: characters are encoded properly in code blocks", async t => { + t.deepEqual( + await eventToMessage({ + type: "m.room.message", + sender: "@cadence:cadence.moe", + content: { + msgtype: "m.text", + body: "wrong body", + format: "org.matrix.custom.html", + formatted_body: '
fn extract_diff(chat_response: &str) -> Result<String> {'
+					+ '\n    let fragments = regex!(r#"^```diff.*?^(.*?)^```.*?($|\\z)"#sm)'
+					+ '\n        .captures_iter(chat_response)'
+					+ '\n        .map(|c| c.get(1).unwrap().as_str())'
+					+ '\n        .collect::<String>();'
+			  		+ '\n
' + }, + event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s", + room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe" + }), + { + ensureJoined: [], + messagesToDelete: [], + messagesToEdit: [], + messagesToSend: [{ + username: "cadence [they]", + content: "`[inline_code.rs]`", + attachments: [{id: "0", filename: "inline_code.rs"}], + pendingFiles: [{name: "inline_code.rs", buffer: Buffer.from( + 'fn extract_diff(chat_response: &str) -> Result {' + + '\n let fragments = regex!(r#"^```diff.*?^(.*?)^```.*?($|\\z)"#sm)' + + '\n .captures_iter(chat_response)' + + '\n .map(|c| c.get(1).unwrap().as_str())' + + '\n .collect::();' + )}], + avatar_url: undefined + }] + } + ) +}) + test("event2message: quotes have an appropriate amount of whitespace", async t => { t.deepEqual( await eventToMessage({