add turndown for m->d formatting
This commit is contained in:
parent
27b8c547e3
commit
8c4e16e255
5 changed files with 297 additions and 16 deletions
|
@ -2,19 +2,41 @@
|
|||
|
||||
const Ty = require("../../types")
|
||||
const DiscordTypes = require("discord-api-types/v10")
|
||||
const markdown = require("discord-markdown")
|
||||
const chunk = require("chunk-text")
|
||||
const TurndownService = require("turndown")
|
||||
|
||||
const passthrough = require("../../passthrough")
|
||||
const { sync, db, discord } = passthrough
|
||||
/** @type {import("../../matrix/file")} */
|
||||
const file = sync.require("../../matrix/file")
|
||||
|
||||
// https://github.com/mixmark-io/turndown/blob/97e4535ca76bb2e70d9caa2aa4d4686956b06d44/src/utilities.js#L26C28-L33C2
|
||||
const BLOCK_ELEMENTS = [
|
||||
"ADDRESS", "ARTICLE", "ASIDE", "AUDIO", "BLOCKQUOTE", "BODY", "CANVAS",
|
||||
"CENTER", "DD", "DETAILS", "DIR", "DIV", "DL", "DT", "FIELDSET", "FIGCAPTION", "FIGURE",
|
||||
"FOOTER", "FORM", "FRAMESET", "H1", "H2", "H3", "H4", "H5", "H6", "HEADER",
|
||||
"HGROUP", "HR", "HTML", "ISINDEX", "LI", "MAIN", "MENU", "NAV", "NOFRAMES",
|
||||
"NOSCRIPT", "OL", "OUTPUT", "P", "PRE", "SECTION", "SUMMARY", "TABLE", "TBODY", "TD",
|
||||
"TFOOT", "TH", "THEAD", "TR", "UL"
|
||||
]
|
||||
|
||||
const turndownService = new TurndownService({
|
||||
hr: "----"
|
||||
})
|
||||
|
||||
turndownService.addRule("strikethrough", {
|
||||
filter: ["del", "s", "strike"],
|
||||
replacement: function (content) {
|
||||
return "~~" + content + "~~"
|
||||
}
|
||||
})
|
||||
|
||||
/**
|
||||
* @param {Ty.Event.Outer<Ty.Event.M_Room_Message>} event
|
||||
*/
|
||||
function eventToMessage(event) {
|
||||
/** @type {(DiscordTypes.RESTPostAPIWebhookWithTokenJSONBody & {files?: {name: string, file: Buffer}[]})[]} */
|
||||
const messages = []
|
||||
let messages = []
|
||||
|
||||
let displayName = event.sender
|
||||
let avatarURL = undefined
|
||||
|
@ -24,20 +46,51 @@ function eventToMessage(event) {
|
|||
// TODO: get the media repo domain and the avatar url from the matrix member event
|
||||
}
|
||||
|
||||
if (event.content.msgtype === "m.text") {
|
||||
messages.push({
|
||||
content: event.content.body,
|
||||
username: displayName,
|
||||
avatar_url: avatarURL
|
||||
})
|
||||
} else if (event.content.msgtype === "m.emote") {
|
||||
messages.push({
|
||||
content: `\* _${displayName} ${event.content.body}_`,
|
||||
username: displayName,
|
||||
avatar_url: avatarURL
|
||||
// Convert content depending on what the message is
|
||||
let content = event.content.body // ultimate fallback
|
||||
if (event.content.format === "org.matrix.custom.html" && event.content.formatted_body) {
|
||||
let input = event.content.formatted_body
|
||||
if (event.content.msgtype === "m.emote") {
|
||||
input = `* ${displayName} ${input}`
|
||||
}
|
||||
|
||||
// Note: Element's renderers on Web and Android currently collapse whitespace, like the browser does. Turndown also collapses whitespace which is good for me.
|
||||
// If later I'm using a client that doesn't collapse whitespace and I want turndown to follow suit, uncomment the following line of code, and it Just Works:
|
||||
// input = input.replace(/ /g, " ")
|
||||
// There is also a corresponding test to uncomment, named "event2message: whitespace is retained"
|
||||
|
||||
// The matrix spec hasn't decided whether \n counts as a newline or not, but I'm going to count it, because if it's in the data it's there for a reason.
|
||||
// But I should not count it if it's between block elements.
|
||||
input = input.replace(/(<\/?([^ >]+)[^>]*>)?\n(<\/?([^ >]+)[^>]*>)?/g, (whole, beforeContext, beforeTag, afterContext, afterTag) => {
|
||||
if (typeof beforeTag !== "string" && typeof afterTag !== "string") {
|
||||
return "<br>"
|
||||
}
|
||||
beforeContext = beforeContext || ""
|
||||
beforeTag = beforeTag || ""
|
||||
afterContext = afterContext || ""
|
||||
afterTag = afterTag || ""
|
||||
if (!BLOCK_ELEMENTS.includes(beforeTag.toUpperCase()) && !BLOCK_ELEMENTS.includes(afterTag.toUpperCase())) {
|
||||
return beforeContext + "<br>" + afterContext
|
||||
} else {
|
||||
return whole
|
||||
}
|
||||
})
|
||||
|
||||
// @ts-ignore
|
||||
content = turndownService.turndown(input)
|
||||
|
||||
// It's optimised for commonmark, we need to replace the space-space-newline with just newline
|
||||
content = content.replace(/ \n/g, "\n")
|
||||
}
|
||||
|
||||
// Split into 2000 character chunks
|
||||
const chunks = chunk(content, 2000)
|
||||
messages = messages.concat(chunks.map(content => ({
|
||||
content,
|
||||
username: displayName,
|
||||
avatar_url: avatarURL
|
||||
})))
|
||||
|
||||
return messages
|
||||
}
|
||||
|
||||
|
|
|
@ -4,6 +4,12 @@ const {test} = require("supertape")
|
|||
const {eventToMessage} = require("./event-to-message")
|
||||
const data = require("../../test/data")
|
||||
|
||||
function sameFirstContentAndWhitespace(t, a, b) {
|
||||
const a2 = JSON.stringify(a[0].content)
|
||||
const b2 = JSON.stringify(b[0].content)
|
||||
t.equal(a2, b2)
|
||||
}
|
||||
|
||||
test("event2message: janky test", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
|
@ -28,6 +34,165 @@ test("event2message: janky test", t => {
|
|||
)
|
||||
})
|
||||
|
||||
test("event2message: basic html is converted to markdown", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.text",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "this <strong>is</strong> a <strong><em>test</em></strong> of <del>formatting</del>"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "this **is** a **_test_** of ~~formatting~~",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
||||
test("event2message: markdown syntax is escaped", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.text",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "this **is** an <strong><em>extreme</em></strong> \\*test\\* of"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "this \\*\\*is\\*\\* an **_extreme_** \\\\\\*test\\\\\\* of",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
||||
test("event2message: html lines are bridged correctly", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.text",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "<p>paragraph one<br>line <em>two</em><br>line three<br><br>paragraph two\nline <em>two</em>\nline three\n\nparagraph three</p><p>paragraph four\nline two<br>line three\nline four</p>paragraph five"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "paragraph one\nline _two_\nline three\n\nparagraph two\nline _two_\nline three\n\nparagraph three\n\nparagraph four\nline two\nline three\nline four\n\nparagraph five",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
||||
/*test("event2message: whitespace is retained", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.text",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "line one: test test<br>line two: <strong>test</strong> <strong>test</strong><br>line three: <strong>test test</strong><br>line four: test<strong> </strong>test<br> line five"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "line one: test test\nline two: **test** **test**\nline three: **test test**\nline four: test test\n line five",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})*/
|
||||
|
||||
test("event2message: whitespace is collapsed", t => {
|
||||
sameFirstContentAndWhitespace(
|
||||
t,
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.text",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "line one: test test<br>line two: <strong>test</strong> <strong>test</strong><br>line three: <strong>test test</strong><br>line four: test<strong> </strong>test<br> line five"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "line one: test test\nline two: **test** **test**\nline three: **test test**\nline four: test test\nline five",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
||||
test("event2message: lists are bridged correctly", t => {
|
||||
sameFirstContentAndWhitespace(
|
||||
t,
|
||||
eventToMessage({
|
||||
"type": "m.room.message",
|
||||
"sender": "@cadence:cadence.moe",
|
||||
"content": {
|
||||
"msgtype": "m.text",
|
||||
"body": "* line one\n* line two\n* line three\n * nested one\n * nested two\n* line four",
|
||||
"format": "org.matrix.custom.html",
|
||||
"formatted_body": "<ul>\n<li>line one</li>\n<li>line two</li>\n<li>line three\n<ul>\n<li>nested one</li>\n<li>nested two</li>\n</ul>\n</li>\n<li>line four</li>\n</ul>\n"
|
||||
},
|
||||
"origin_server_ts": 1692967314062,
|
||||
"unsigned": {
|
||||
"age": 112,
|
||||
"transaction_id": "m1692967313951.441"
|
||||
},
|
||||
"event_id": "$l-xQPY5vNJo3SNxU9d8aOWNVD1glMslMyrp4M_JEF70",
|
||||
"room_id": "!BpMdOUkWWhFxmTrENV:cadence.moe"
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "* line one\n* line two\n* line three\n * nested one\n * nested two\n* line four",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
||||
test("event2message: long messages are split", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
|
@ -55,3 +220,29 @@ test("event2message: long messages are split", t => {
|
|||
}]
|
||||
)
|
||||
})
|
||||
|
||||
test("event2message: m.emote markdown syntax is escaped", t => {
|
||||
t.deepEqual(
|
||||
eventToMessage({
|
||||
content: {
|
||||
msgtype: "m.emote",
|
||||
body: "wrong body",
|
||||
format: "org.matrix.custom.html",
|
||||
formatted_body: "shows you **her** <strong><em>extreme</em></strong> \\*test\\* of"
|
||||
},
|
||||
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
|
||||
origin_server_ts: 1688301929913,
|
||||
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
|
||||
sender: "@cadence:cadence.moe",
|
||||
type: "m.room.message",
|
||||
unsigned: {
|
||||
age: 405299
|
||||
}
|
||||
}),
|
||||
[{
|
||||
username: "cadence",
|
||||
content: "\\* cadence shows you \\*\\*her\\*\\* **_extreme_** \\\\\\*test\\\\\\* of",
|
||||
avatar_url: undefined
|
||||
}]
|
||||
)
|
||||
})
|
||||
|
|
37
package-lock.json
generated
37
package-lock.json
generated
|
@ -10,6 +10,7 @@
|
|||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^8.3.0",
|
||||
"chunk-text": "^2.0.1",
|
||||
"cloudstorm": "^0.8.0",
|
||||
"discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#440130ef343c8183a81c7c09809731484aa3a182",
|
||||
"heatsync": "^2.4.1",
|
||||
|
@ -20,7 +21,8 @@
|
|||
"node-fetch": "^2.6.7",
|
||||
"prettier-bytes": "^1.0.4",
|
||||
"snowtransfer": "^0.8.0",
|
||||
"try-to-catch": "^3.0.1"
|
||||
"try-to-catch": "^3.0.1",
|
||||
"turndown": "^7.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.0",
|
||||
|
@ -732,6 +734,18 @@
|
|||
"resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
|
||||
"integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg=="
|
||||
},
|
||||
"node_modules/chunk-text": {
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/chunk-text/-/chunk-text-2.0.1.tgz",
|
||||
"integrity": "sha512-ER6TSpe2DT4wjOVOKJ3FFAYv7wE77HA/Ztz88Peiv3lq/2oVMsItYJJsVVI0xNZM8cdImOOTNqlw+LQz7gYdJg==",
|
||||
"dependencies": {
|
||||
"runes": "^0.4.3"
|
||||
},
|
||||
"bin": {
|
||||
"chunk": "bin/server.js",
|
||||
"chunk-text": "bin/server.js"
|
||||
}
|
||||
},
|
||||
"node_modules/ci-info": {
|
||||
"version": "3.8.0",
|
||||
"resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz",
|
||||
|
@ -1057,6 +1071,11 @@
|
|||
"simple-markdown": "^0.7.2"
|
||||
}
|
||||
},
|
||||
"node_modules/domino": {
|
||||
"version": "2.1.6",
|
||||
"resolved": "https://registry.npmjs.org/domino/-/domino-2.1.6.tgz",
|
||||
"integrity": "sha512-3VdM/SXBZX2omc9JF9nOPCtDaYQ67BGp5CoLpIQlO2KCAPETs8TcDHacF26jXadGbvUteZzRTeos2fhID5+ucQ=="
|
||||
},
|
||||
"node_modules/ee-first": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
|
@ -2646,6 +2665,14 @@
|
|||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/runes": {
|
||||
"version": "0.4.3",
|
||||
"resolved": "https://registry.npmjs.org/runes/-/runes-0.4.3.tgz",
|
||||
"integrity": "sha512-K6p9y4ZyL9wPzA+PMDloNQPfoDGTiFYDvdlXznyGKgD10BJpcAosvATKrExRKOrNLgD8E7Um7WGW0lxsnOuNLg==",
|
||||
"engines": {
|
||||
"node": ">=4.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/safe-buffer": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
||||
|
@ -3216,6 +3243,14 @@
|
|||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/turndown": {
|
||||
"version": "7.1.2",
|
||||
"resolved": "https://registry.npmjs.org/turndown/-/turndown-7.1.2.tgz",
|
||||
"integrity": "sha512-ntI9R7fcUKjqBP6QU8rBK2Ehyt8LAzt3UBT9JR9tgo6GtuKvyUzpayWmeMKJw1DPdXzktvtIT8m2mVXz+bL/Qg==",
|
||||
"dependencies": {
|
||||
"domino": "^2.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/type-is": {
|
||||
"version": "1.6.18",
|
||||
"resolved": "https://registry.npmjs.org/type-is/-/type-is-1.6.18.tgz",
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"better-sqlite3": "^8.3.0",
|
||||
"chunk-text": "^2.0.1",
|
||||
"cloudstorm": "^0.8.0",
|
||||
"discord-markdown": "git+https://git.sr.ht/~cadence/nodejs-discord-markdown#440130ef343c8183a81c7c09809731484aa3a182",
|
||||
"heatsync": "^2.4.1",
|
||||
|
@ -26,7 +27,8 @@
|
|||
"node-fetch": "^2.6.7",
|
||||
"prettier-bytes": "^1.0.4",
|
||||
"snowtransfer": "^0.8.0",
|
||||
"try-to-catch": "^3.0.1"
|
||||
"try-to-catch": "^3.0.1",
|
||||
"turndown": "^7.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^18.16.0",
|
||||
|
|
2
types.d.ts
vendored
2
types.d.ts
vendored
|
@ -67,7 +67,7 @@ export namespace Event {
|
|||
}
|
||||
|
||||
export type M_Room_Message = {
|
||||
msgtype: "m.text"
|
||||
msgtype: "m.text" | "m.emote"
|
||||
body: string
|
||||
format?: "org.matrix.custom.html"
|
||||
formatted_body?: string
|
||||
|
|
Loading…
Reference in a new issue