out-of-your-element/m2d/converters/event-to-message.js

142 lines
4.8 KiB
JavaScript
Raw Normal View History

2023-07-02 13:06:05 +00:00
// @ts-check
2023-07-04 05:19:17 +00:00
const Ty = require("../../types")
2023-07-02 13:06:05 +00:00
const DiscordTypes = require("discord-api-types/v10")
2023-08-25 13:43:17 +00:00
const chunk = require("chunk-text")
const TurndownService = require("turndown")
2023-07-02 13:06:05 +00:00
const passthrough = require("../../passthrough")
const { sync, db, discord } = passthrough
/** @type {import("../../matrix/file")} */
const file = sync.require("../../matrix/file")
2023-08-25 13:43:17 +00:00
const BLOCK_ELEMENTS = [
"ADDRESS", "ARTICLE", "ASIDE", "AUDIO", "BLOCKQUOTE", "BODY", "CANVAS",
"CENTER", "DD", "DETAILS", "DIR", "DIV", "DL", "DT", "FIELDSET", "FIGCAPTION", "FIGURE",
"FOOTER", "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEADER",
"HGROUP", "HR", "HTML", "ISINDEX", "LI", "MAIN", "MENU", "NAV", "NOFRAMES",
"NOSCRIPT", "OL", "OUTPUT", "P", "PRE", "SECTION", "SUMMARY", "TABLE", "TBODY", "TD",
"TFOOT", "TH", "THEAD", "TR", "UL"
]
const turndownService = new TurndownService({
2023-08-25 14:04:49 +00:00
hr: "----",
headingStyle: "atx",
preformattedCode: true,
codeBlockStyle: "fenced"
2023-08-25 13:43:17 +00:00
})
turndownService.addRule("strikethrough", {
filter: ["del", "s", "strike"],
replacement: function (content) {
return "~~" + content + "~~"
}
})
2023-08-26 07:07:19 +00:00
turndownService.addRule("blockquote", {
filter: "blockquote",
replacement: function (content) {
content = content.replace(/^\n+|\n+$/g, "")
content = content.replace(/^/gm, "> ")
return content
}
})
turndownService.addRule("fencedCodeBlock", {
filter: function (node, options) {
return (
options.codeBlockStyle === "fenced" &&
node.nodeName === "PRE" &&
node.firstChild &&
node.firstChild.nodeName === "CODE"
)
},
replacement: function (content, node, options) {
const className = node.firstChild.getAttribute("class") || ""
const language = (className.match(/language-(\S+)/) || [null, ""])[1]
const code = node.firstChild
const visibleCode = code.childNodes.map(c => c.nodeName === "BR" ? "\n" : c.textContent).join("").replace(/\n*$/g, "")
var fence = "```"
return (
fence + language + "\n" +
visibleCode +
"\n" + fence
)
}
})
2023-07-02 13:06:05 +00:00
/**
2023-07-04 05:19:17 +00:00
* @param {Ty.Event.Outer<Ty.Event.M_Room_Message>} event
2023-07-02 13:06:05 +00:00
*/
function eventToMessage(event) {
/** @type {(DiscordTypes.RESTPostAPIWebhookWithTokenJSONBody & {files?: {name: string, file: Buffer}[]})[]} */
2023-08-25 13:43:17 +00:00
let messages = []
2023-07-02 13:06:05 +00:00
let displayName = event.sender
let avatarURL = undefined
const match = event.sender.match(/^@(.*?):/)
if (match) {
displayName = match[1]
// TODO: get the media repo domain and the avatar url from the matrix member event
}
2023-08-25 13:43:17 +00:00
// Convert content depending on what the message is
let content = event.content.body // ultimate fallback
if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) {
let input = event.content.formatted_body
if (event.content.msgtype === "m.emote") {
input = `* ${displayName} ${input}`
}
// Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me.
// If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works:
// input = input.replace(/ /g, "&nbsp;")
// There is also a corresponding test to uncomment, named "event2message: whitespace is retained"
2023-08-26 07:07:19 +00:00
// Element adds a bunch of <br> before </blockquote> but doesn't render them. I can't figure out how this works, so let's just delete those.
input = input.replace(/(?:\n|<br ?\/?>\s*)*<\/blockquote>/g, "</blockquote>")
2023-08-25 13:43:17 +00:00
// The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason.
// But I should not count it if it's between block elements.
input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => {
2023-08-26 07:07:19 +00:00
// console.error(beforeContext, beforeTag, afterContext, afterTag)
2023-08-25 13:43:17 +00:00
if (typeof beforeTag !== "string" && typeof afterTag !== "string") {
return "<br>"
}
beforeContext = beforeContext || ""
beforeTag = beforeTag || ""
afterContext = afterContext || ""
afterTag = afterTag || ""
if (!BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) {
return beforeContext + "<br>" + afterContext
} else {
return whole
}
})
2023-08-25 13:43:17 +00:00
// @ts-ignore
content = turndownService.turndown(input)
// It's optimised for commonmark, we need to replace the space-space-newline with just newline
content = content.replace(/ \n/g, "\n")
2023-08-25 14:04:49 +00:00
} else {
// Looks like we're using the plaintext body!
// Markdown needs to be escaped
content = content.replace(/([*_~`#])/g, `\\$1`)
2023-07-02 13:06:05 +00:00
}
2023-08-25 13:43:17 +00:00
// Split into 2000 character chunks
const chunks = chunk(content, 2000)
messages = messages.concat(chunks.map(content => ({
content,
username: displayName,
avatar_url: avatarURL
})))
2023-07-02 13:06:05 +00:00
return messages
}
module.exports.eventToMessage = eventToMessage