Upload code block in case of incompatible backtick

This commit is contained in:
Cadence Ember 2024-02-19 17:05:56 +13:00
parent 16f9c81097
commit 77b7772062
2 changed files with 119 additions and 22 deletions

View file

@ -181,7 +181,7 @@ turndownService.addRule("fencedCodeBlock", {
const className = node.firstChild.getAttribute("class") || ""
const language = (className.match(/language-(\S+)/) || [null, ""])[1]
const code = node.firstChild
const visibleCode = code.childNodes.map(c => c.nodeName === "BR" ? "\n" : c.textContent).join("").replace(/\n*$/g, "")
const visibleCode = getCodeContent(code)
var fence = "```"
@ -193,6 +193,11 @@ turndownService.addRule("fencedCodeBlock", {
}
})
/** @param {{ childNodes: Node[]; }} preCode the <code> directly inside the <pre> */
function getCodeContent(preCode) {
return preCode.childNodes.map(c => c.nodeName === "BR" ? "\n" : c.textContent).join("").replace(/\n*$/g, "")
}
/**
* @param {string | null} mxcUrl
* @param {string | null} nameForGuess without colons
@ -308,7 +313,7 @@ async function uploadEndOfMessageSpriteSheet(content, attachments, pendingFiles)
const buffer = await emojiSheet.compositeMatrixEmojis(endOfMessageEmojis)
// Attach it
const name = "emojis.png"
attachments.push({id: "0", name})
attachments.push({id: String(attachments.length), name})
pendingFiles.push({name, buffer})
return content
}
@ -393,6 +398,16 @@ async function checkWrittenMentions(content, guild, di) {
}
}
/**
* @param {Element} node
* @param {string[]} tagNames allcaps tag names
* @returns {any | undefined} the node you were checking for, or undefined
*/
function nodeIsChildOf(node, tagNames) {
// @ts-ignore
for (; node; node = node.parentNode) if (tagNames.includes(node.tagName)) return node
}
const attachmentEmojis = new Map([
["m.image", "🖼️"],
["m.video", "🎞️"],
@ -651,19 +666,37 @@ async function eventToMessage(event, guild, di) {
const root = doc.getElementById("turndown-root");
async function forEachNode(node) {
for (; node; node = node.nextSibling) {
if (node.nodeType === 3 && node.nodeValue.includes("@")) {
// Check written mentions
if (node.nodeType === 3 && node.nodeValue.includes("@") && !nodeIsChildOf(node, ["A", "CODE", "PRE"])) {
const result = await checkWrittenMentions(node.nodeValue, guild, di)
if (result) {
node.nodeValue = result.content
ensureJoined.push(result.ensureJoined)
}
}
if (node.nodeType === 1 && ["CODE", "PRE", "A"].includes(node.tagName)) {
// don't recurse into code or links
} else {
// do recurse into everything else
await forEachNode(node.firstChild)
// Check for incompatible backticks in code blocks
let preNode
if (node.nodeType === 3 && node.nodeValue.includes("```") && (preNode = nodeIsChildOf(node, ["PRE"]))) {
if (preNode.firstChild?.nodeName === "CODE") {
const ext = (preNode.firstChild.className.match(/language-(\S+)/) || [null, "txt"])[1]
const filename = `inline_code.${ext}`
// Build the replacement <code> node
const replacementCode = doc.createElement("code")
replacementCode.textContent = `[${filename}]`
// Build its containing <span> node
const replacement = doc.createElement("span")
replacement.appendChild(doc.createTextNode(" "))
replacement.appendChild(replacementCode)
replacement.appendChild(doc.createTextNode(" "))
// Replace the code block with the <span>
preNode.replaceWith(replacement)
// Upload the code as an attachment
const content = getCodeContent(preNode.firstChild)
attachments.push({id: String(attachments.length), filename})
pendingFiles.push({name: filename, buffer: Buffer.from(content, "utf8")})
}
}
await forEachNode(node.firstChild)
}
}
await forEachNode(root)

View file

@ -517,21 +517,16 @@ test("event2message: code blocks work", async t => {
test("event2message: code block contents are formatted correctly and not escaped", async t => {
t.deepEqual(
await eventToMessage({
"type": "m.room.message",
"sender": "@cadence:cadence.moe",
"content": {
"msgtype": "m.text",
"body": "wrong body",
"format": "org.matrix.custom.html",
"formatted_body": "<pre><code>input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,\n_input_ = input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,\n</code></pre>\n<p><code>input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,</code></p>\n"
type: "m.room.message",
sender: "@cadence:cadence.moe",
content: {
msgtype: "m.text",
body: "wrong body",
format: "org.matrix.custom.html",
formatted_body: "<pre><code>input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,\n_input_ = input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,\n</code></pre>\n<p><code>input = input.replace(/(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?\\n(&lt;\\/?([^ &gt;]+)[^&gt;]*&gt;)?/g,</code></p>\n"
},
"origin_server_ts": 1693031482275,
"unsigned": {
"age": 99,
"transaction_id": "m1693031482146.511"
},
"event_id": "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s",
"room_id": "!BpMdOUkWWhFxmTrENV:cadence.moe"
event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s",
room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe"
}),
{
ensureJoined: [],
@ -546,6 +541,75 @@ test("event2message: code block contents are formatted correctly and not escaped
)
})
test("event2message: code blocks are uploaded as attachments instead if they contain incompatible backticks", async t => {
t.deepEqual(
await eventToMessage({
type: "m.room.message",
sender: "@cadence:cadence.moe",
content: {
msgtype: "m.text",
body: "wrong body",
format: "org.matrix.custom.html",
formatted_body: 'So if you run code like this<pre><code class="language-java">System.out.println("```");</code></pre>it should print a markdown formatted code block'
},
event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s",
room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe"
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "So if you run code like this `[inline_code.java]` it should print a markdown formatted code block",
attachments: [{id: "0", filename: "inline_code.java"}],
pendingFiles: [{name: "inline_code.java", buffer: Buffer.from('System.out.println("```");')}],
avatar_url: undefined
}]
}
)
})
test("event2message: characters are encoded properly in code blocks", async t => {
t.deepEqual(
await eventToMessage({
type: "m.room.message",
sender: "@cadence:cadence.moe",
content: {
msgtype: "m.text",
body: "wrong body",
format: "org.matrix.custom.html",
formatted_body: '<pre><code class="rs language-rs">fn extract_diff(chat_response: &amp;str) -&gt; Result&lt;String&gt; {'
+ '\n let fragments = regex!(r#"^```diff.*?^(.*?)^```.*?($|\\z)"#sm)'
+ '\n .captures_iter(chat_response)'
+ '\n .map(|c| c.get(1).unwrap().as_str())'
+ '\n .collect::&lt;String&gt;();'
+ '\n</code></pre>'
},
event_id: "$pGkWQuGVmrPNByrFELxhzI6MCBgJecr5I2J3z88Gc2s",
room_id: "!BpMdOUkWWhFxmTrENV:cadence.moe"
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "`[inline_code.rs]`",
attachments: [{id: "0", filename: "inline_code.rs"}],
pendingFiles: [{name: "inline_code.rs", buffer: Buffer.from(
'fn extract_diff(chat_response: &str) -> Result<String> {'
+ '\n let fragments = regex!(r#"^```diff.*?^(.*?)^```.*?($|\\z)"#sm)'
+ '\n .captures_iter(chat_response)'
+ '\n .map(|c| c.get(1).unwrap().as_str())'
+ '\n .collect::<String>();'
)}],
avatar_url: undefined
}]
}
)
})
test("event2message: quotes have an appropriate amount of whitespace", async t => {
t.deepEqual(
await eventToMessage({