Compare commits

...

2 commits

8 changed files with 506 additions and 25 deletions

View file

@ -78,18 +78,18 @@ test("edit2changes: bot response", async t => {
newContent: {
$type: "m.room.message",
msgtype: "m.text",
body: "* :ae_botrac4r: @cadence asked ``­``, I respond: Stop drinking paint. (No)\n\nHit :bn_re: to reroll.",
body: "* :ae_botrac4r: [@cadence](https://matrix.to/#/@cadence:cadence.moe) asked ``­``, I respond: Stop drinking paint. (No)\n\nHit :bn_re: to reroll.",
format: "org.matrix.custom.html",
formatted_body: '* <img data-mx-emoticon height="32" src="mxc://cadence.moe/skqfuItqxNmBYekzmVKyoLzs" title=":ae_botrac4r:" alt=":ae_botrac4r:"> @cadence asked <code>­</code>, I respond: Stop drinking paint. (No)<br><br>Hit <img data-mx-emoticon height="32" src="mxc://cadence.moe/OIpqpfxTnHKokcsYqDusxkBT" title=":bn_re:" alt=":bn_re:"> to reroll.',
formatted_body: '* <img data-mx-emoticon height="32" src="mxc://cadence.moe/skqfuItqxNmBYekzmVKyoLzs" title=":ae_botrac4r:" alt=":ae_botrac4r:"> <a href="https://matrix.to/#/@cadence:cadence.moe">@cadence</a> asked <code>­</code>, I respond: Stop drinking paint. (No)<br><br>Hit <img data-mx-emoticon height="32" src="mxc://cadence.moe/OIpqpfxTnHKokcsYqDusxkBT" title=":bn_re:" alt=":bn_re:"> to reroll.',
"m.mentions": {
// Client-Server API spec 11.37.7: Copy Discord's behaviour by not re-notifying anyone that an *edit occurred*
},
// *** Replaced With: ***
"m.new_content": {
msgtype: "m.text",
body: ":ae_botrac4r: @cadence asked ``­``, I respond: Stop drinking paint. (No)\n\nHit :bn_re: to reroll.",
body: ":ae_botrac4r: [@cadence](https://matrix.to/#/@cadence:cadence.moe) asked ``­``, I respond: Stop drinking paint. (No)\n\nHit :bn_re: to reroll.",
format: "org.matrix.custom.html",
formatted_body: '<img data-mx-emoticon height="32" src="mxc://cadence.moe/skqfuItqxNmBYekzmVKyoLzs" title=":ae_botrac4r:" alt=":ae_botrac4r:"> @cadence asked <code>­</code>, I respond: Stop drinking paint. (No)<br><br>Hit <img data-mx-emoticon height="32" src="mxc://cadence.moe/OIpqpfxTnHKokcsYqDusxkBT" title=":bn_re:" alt=":bn_re:"> to reroll.',
formatted_body: '<img data-mx-emoticon height="32" src="mxc://cadence.moe/skqfuItqxNmBYekzmVKyoLzs" title=":ae_botrac4r:" alt=":ae_botrac4r:"> <a href="https://matrix.to/#/@cadence:cadence.moe">@cadence</a> asked <code>­</code>, I respond: Stop drinking paint. (No)<br><br>Hit <img data-mx-emoticon height="32" src="mxc://cadence.moe/OIpqpfxTnHKokcsYqDusxkBT" title=":bn_re:" alt=":bn_re:"> to reroll.',
"m.mentions": {
// Client-Server API spec 11.37.7: This should contain the mentions for the final version of the event
"user_ids": ["@cadence:cadence.moe"]

View file

@ -0,0 +1,157 @@
// @ts-check
const assert = require("assert")
const {reg} = require("../../matrix/read-registration")
const userRegex = reg.namespaces.users.map(u => new RegExp(u.regex))
/**
* @typedef {{text: string, index: number, end: number}} Token
*/
/** @typedef {{mxids: {localpart: string, mxid: string, displayname?: string}[], names: {displaynameTokens: Token[], mxid: string}[]}} ProcessedJoined */
const lengthBonusLengthCap = 50
const lengthBonusValue = 0.5
/**
* Score by how many characters in a row at the start of input are in localpart. 2x if it matches at the start. +1 tiebreaker bonus if it matches all.
* 0 = no match
* @param {string} localpart
* @param {string} input
* @param {string} [displayname] only for the super tiebreaker
* @returns {{score: number, matchedInputTokens: Token[]}}
*/
function scoreLocalpart(localpart, input, displayname) {
let score = 0
let atStart = false
let matchingLocations = []
do {
atStart = matchingLocations[0] === 0
let chars = input[score]
if (score === 0) {
// add all possible places
let i = 0
while ((i = localpart.indexOf(chars, i)) !== -1) {
matchingLocations.push(i)
i++
}
} else {
// trim down remaining places
matchingLocations = matchingLocations.filter(i => localpart[i+score] === input[score])
}
if (matchingLocations.length) {
score++
if (score === localpart.length) break
}
} while (matchingLocations.length)
/** @type {Token} */
const fakeToken = {text: input.slice(0, score), index: 0, end: score}
const displaynameLength = displayname?.length ?? 0
if (score === localpart.length) score = score * 2 + 1 + Math.max(((lengthBonusLengthCap-displaynameLength)/lengthBonusLengthCap)*lengthBonusValue, 0)
else if (atStart) score = score * 2
return {score, matchedInputTokens: [fakeToken]}
}
const decayDistance = 10
const decayValue = 0.33
/**
* Score by how many tokens in sequence (not necessarily back to back) at the start of input are in display name tokens. Score each token on its length. 2x if it matches at the start. +1 tiebreaker bonus if it matches all
* @param {Token[]} displaynameTokens
* @param {Token[]} inputTokens
* @returns {{score: number, matchedInputTokens: Token[]}}
*/
function scoreName(displaynameTokens, inputTokens) {
let matchedInputTokens = []
let score = 0
let searchFrom = 0
for (let nextInputTokenIndex = 0; nextInputTokenIndex < inputTokens.length; nextInputTokenIndex++) {
// take next
const nextToken = inputTokens[nextInputTokenIndex]
// see if it's there
let foundAt = displaynameTokens.findIndex((tk, idx) => idx >= searchFrom && tk.text === nextToken.text)
if (foundAt !== -1) {
// update scoring
matchedInputTokens.push(nextToken)
score += nextToken.text.length * Math.max(((decayDistance-foundAt)*(1+decayValue))/(decayDistance*(1+decayValue)), decayValue) // decay score 100%->33% the further into the displayname it's found
// prepare for next loop
searchFrom = foundAt + 1
} else {
break
}
}
const firstTextualInputToken = inputTokens.find(t => t.text.match(/^\w/))
if (matchedInputTokens[0] === inputTokens[0] || matchedInputTokens[0] === firstTextualInputToken) score *= 2
if (matchedInputTokens.length === displaynameTokens.length) score += 1
return {score, matchedInputTokens}
}
/**
* @param {string} name
* @returns {Token[]}
*/
function tokenise(name) {
let index = 0
let result = []
for (const part of name.split(/(_|\s|\b)/g)) {
if (part.trim()) {
result.push({text: part.toLowerCase(), index, end: index + part.length})
}
index += part.length
}
return result
}
/**
* @param {{mxid: string, displayname?: string}[]} joined
* @returns {ProcessedJoined}
*/
function processJoined(joined) {
joined = joined.filter(j => !userRegex.some(rx => j.mxid.match(rx)))
return {
mxids: joined.map(j => {
const localpart = j.mxid.match(/@([^:]*)/)
assert(localpart)
return {
localpart: localpart[1].toLowerCase(),
mxid: j.mxid,
displayname: j.displayname
}
}),
names: joined.filter(j => j.displayname).map(j => {
return {
displaynameTokens: tokenise(j.displayname),
mxid: j.mxid
}
})
}
}
/**
* @param {ProcessedJoined} pjr
* @param {string} maximumWrittenSection lowercase please
* @param {string} content
*/
function findMention(pjr, maximumWrittenSection, baseOffset, prefix, content) {
if (!pjr.mxids.length && !pjr.names.length) return
const maximumWrittenSectionTokens = tokenise(maximumWrittenSection)
/** @type {{mxid: string, scored: {score: number, matchedInputTokens: Token[]}}[]} */
let allItems = pjr.mxids.map(mxid => ({...mxid, scored: scoreLocalpart(mxid.localpart, maximumWrittenSection, mxid.displayname)}))
allItems = allItems.concat(pjr.names.map(name => ({...name, scored: scoreName(name.displaynameTokens, maximumWrittenSectionTokens)})))
const best = allItems.sort((a, b) => b.scored.score - a.scored.score)[0]
if (best.scored.score > 4) { // requires in smallest case perfect match of 2 characters, or in largest case a partial middle match of 5+ characters in a row
// Highlight the relevant part of the message
const start = baseOffset + best.scored.matchedInputTokens[0].index
const end = baseOffset + prefix.length + best.scored.matchedInputTokens.at(-1).end
const newContent = content.slice(0, start) + "[" + content.slice(start, end) + "](https://matrix.to/#/" + best.mxid + ")" + content.slice(end)
return {
mxid: best.mxid,
newContent
}
}
}
module.exports.scoreLocalpart = scoreLocalpart
module.exports.scoreName = scoreName
module.exports.tokenise = tokenise
module.exports.processJoined = processJoined
module.exports.findMention = findMention

View file

@ -0,0 +1,118 @@
// @ts-check
const {test} = require("supertape")
const {scoreLocalpart, scoreName, tokenise} = require("./find-mentions")
test("score localpart: score against cadence", t => {
const localparts = [
"cadence",
"cadence_test",
"roblkyogre",
"cat",
"arcade_cabinet"
]
t.deepEqual(localparts.map(l => scoreLocalpart(l, "cadence").score), [
15.5,
14,
0,
4,
4
])
})
test("score mxid: tiebreak multiple perfect matches on name length", t => {
const users = [
{displayname: "Emma [it/its] ⚡️", localpart: "emma"},
{displayname: "Emma [it/its]", localpart: "emma"}
]
const results = users.map(u => scoreLocalpart(u.localpart, "emma", u.displayname).score)
t.ok(results[0] < results[1], `comparison: ${results.join(" < ")}`)
})
test("score name: score against cadence", t => {
const names = [
"bgt lover",
"Ash 🦑 (xey/it)",
"Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆",
"underscore_idiot #sunshine",
"INX | Evil Lillith (she/her)",
"INX | Lillith (she/her)",
"🌟luna🌟",
"#1 Ritsuko Kinnie"
]
t.deepEqual(names.map(n => scoreName(tokenise(n), tokenise("cadence")).score), [
0,
0,
14,
0,
0,
0,
0,
0
])
})
test("score name: nothing scored after a token doesn't match", t => {
const names = [
"bgt lover",
"Ash 🦑 (xey/it)",
"Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆",
"underscore_idiot #sunshine",
"INX | Evil Lillith (she/her)",
"INX | Lillith (she/her)",
"🌟luna🌟",
"#1 Ritsuko Kinnie"
]
t.deepEqual(names.map(n => scoreName(tokenise(n), tokenise("I hope so")).score), [
0,
0,
0,
0,
0,
0,
0,
0
])
})
test("score name: prefers earlier match", t => {
const names = [
"INX | Lillith (she/her)",
"INX | Evil Lillith (she/her)"
]
const results = names.map(n => scoreName(tokenise(n), tokenise("lillith")).score)
t.ok(results[0] > results[1], `comparison: ${results.join(" > ")}`)
})
test("score name: matches lots of tokens", t => {
t.deepEqual(
Math.round(scoreName(tokenise("Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆"), tokenise("cadence maid of creation eye of clarity empress of hope")).score),
50
)
})
test("score name: prefers variation when you specify it", t => {
const names = [
"Cadence (test account)",
"Cadence"
]
const results = names.map(n => scoreName(tokenise(n), tokenise("cadence test")).score)
t.ok(results[0] > results[1], `comparison: ${results.join(" > ")}`)
})
test("score name: prefers original when not specified", t => {
const names = [
"Cadence (test account)",
"Cadence"
]
const results = names.map(n => scoreName(tokenise(n), tokenise("cadence")).score)
t.ok(results[0] < results[1], `comparison: ${results.join(" < ")}`)
})
test("score name: finds match location", t => {
const message = "evil lillith is an inspiration"
const result = scoreName(tokenise("INX | Evil Lillith (she/her)"), tokenise(message))
const startLocation = result.matchedInputTokens[0].index
const endLocation = result.matchedInputTokens.at(-1).end
t.equal(message.slice(startLocation, endLocation), "evil lillith")
})

View file

@ -18,10 +18,10 @@ const lottie = sync.require("../actions/lottie")
const mxUtils = sync.require("../../matrix/utils")
/** @type {import("../../discord/utils")} */
const dUtils = sync.require("../../discord/utils")
/** @type {import("./find-mentions")} */
const findMentions = sync.require("./find-mentions")
const {reg} = require("../../matrix/read-registration")
const userRegex = reg.namespaces.users.map(u => new RegExp(u.regex))
/**
* @param {DiscordTypes.APIMessage} message
* @param {DiscordTypes.APIGuild} guild
@ -684,23 +684,28 @@ async function messageToEvent(message, guild, options = {}, di) {
// Then text content
if (message.content) {
// Mentions scenario 3: scan the message content for written @mentions of matrix users. Allows for up to one space between @ and mention.
const matches = [...message.content.matchAll(/@ ?([a-z0-9._]+)\b/gi)]
if (options.scanTextForMentions !== false && matches.length && matches.some(m => m[1].match(/[a-z]/i) && m[1] !== "everyone" && m[1] !== "here")) {
const writtenMentionsText = matches.map(m => m[1].toLowerCase())
const roomID = select("channel_room", "room_id", {channel_id: message.channel_id}).pluck().get()
assert(roomID)
const {joined} = await di.api.getJoinedMembers(roomID)
for (const [mxid, member] of Object.entries(joined)) {
if (!userRegex.some(rx => mxid.match(rx))) {
const localpart = mxid.match(/@([^:]*)/)
assert(localpart)
const displayName = member.display_name || localpart[1]
if (writtenMentionsText.includes(localpart[1].toLowerCase()) || writtenMentionsText.includes(displayName.toLowerCase())) addMention(mxid)
let content = message.content
if (options.scanTextForMentions !== false) {
const matches = [...content.matchAll(/(@ ?)([a-z0-9_.][^@\n]+)/gi)]
for (let i = matches.length; i--;) {
const m = matches[i]
const prefix = m[1]
const maximumWrittenSection = m[2].toLowerCase()
if (maximumWrittenSection.match(/^!?&?[0-9]+>/) || maximumWrittenSection.match(/^everyone\b/) || maximumWrittenSection.match(/^here\b/)) continue
var roomID = roomID ?? select("channel_room", "room_id", {channel_id: message.channel_id}).pluck().get()
assert(roomID)
var pjr = pjr ?? findMentions.processJoined(Object.entries((await di.api.getJoinedMembers(roomID)).joined).map(([mxid, ev]) => ({mxid, displayname: ev.display_name})))
const found = findMentions.findMention(pjr, maximumWrittenSection, m.index, prefix, content)
if (found) {
addMention(found.mxid)
content = found.newContent
}
}
}
const {body, html} = await transformContent(message.content)
const {body, html} = await transformContent(content)
await addTextEvent(body, html, msgtype)
}

View file

@ -789,11 +789,13 @@ test("message2event: simple written @mention for matrix user", async t => {
]
},
msgtype: "m.text",
body: "@ash do you need anything from the store btw as I'm heading there after gym"
body: "[@ash](https://matrix.to/#/@she_who_brings_destruction:cadence.moe) do you need anything from the store btw as I'm heading there after gym",
format: "org.matrix.custom.html",
formatted_body: `<a href="https://matrix.to/#/@she_who_brings_destruction:cadence.moe">@ash</a> do you need anything from the store btw as I'm heading there after gym`
}])
})
test("message2event: advanced written @mentions for matrix users", async t => {
test("message2event: many written @mentions for matrix users", async t => {
let called = 0
const events = await messageToEvent(data.message.advanced_written_at_mention_for_matrix, data.guild.general, {}, {
api: {
@ -831,16 +833,171 @@ test("message2event: advanced written @mentions for matrix users", async t => {
$type: "m.room.message",
"m.mentions": {
user_ids: [
"@cadence:cadence.moe",
"@huckleton:cadence.moe"
"@huckleton:cadence.moe",
"@cadence:cadence.moe"
]
},
msgtype: "m.text",
body: "@Cadence, tell me about @Phil, the creator of the Chin Trick, who has become ever more powerful under the mentorship of @botrac4r and @huck"
body: "[@Cadence](https://matrix.to/#/@cadence:cadence.moe), tell me about @Phil, the creator of the Chin Trick, who has become ever more powerful under the mentorship of @botrac4r and [@huck](https://matrix.to/#/@huckleton:cadence.moe)",
format: "org.matrix.custom.html",
formatted_body: `<a href="https://matrix.to/#/@cadence:cadence.moe">@Cadence</a>, tell me about @Phil, the creator of the Chin Trick, who has become ever more powerful under the mentorship of @botrac4r and <a href="https://matrix.to/#/@huckleton:cadence.moe">@huck</a>`
}])
t.equal(called, 1, "should only look up the member list once")
})
test("message2event: written @mentions may match part of the name", async t => {
let called = 0
const events = await messageToEvent({
...data.message.advanced_written_at_mention_for_matrix,
content: "I wonder if @cadence saw this?"
}, data.guild.general, {}, {
api: {
async getJoinedMembers(roomID) {
called++
t.equal(roomID, "!kLRqKKUQXcibIMtOpl:cadence.moe")
return new Promise(resolve => {
setTimeout(() => {
resolve({
joined: {
"@secret:cadence.moe": {
display_name: "cadence [they]",
avatar_url: "whatever"
},
"@huckleton:cadence.moe": {
display_name: "huck",
avatar_url: "whatever"
},
"@_ooye_botrac4r:cadence.moe": {
display_name: "botrac4r",
avatar_url: "whatever"
},
"@_ooye_bot:cadence.moe": {
display_name: "Out Of Your Element",
avatar_url: "whatever"
}
}
})
})
})
}
}
})
t.deepEqual(events, [{
$type: "m.room.message",
"m.mentions": {
user_ids: [
"@secret:cadence.moe",
]
},
msgtype: "m.text",
body: "I wonder if [@cadence](https://matrix.to/#/@secret:cadence.moe) saw this?",
format: "org.matrix.custom.html",
formatted_body: `I wonder if <a href="https://matrix.to/#/@secret:cadence.moe">@cadence</a> saw this?`
}])
})
test("message2event: written @mentions may match part of the mxid", async t => {
let called = 0
const events = await messageToEvent({
...data.message.advanced_written_at_mention_for_matrix,
content: "I wonder if @huck saw this?"
}, data.guild.general, {}, {
api: {
async getJoinedMembers(roomID) {
called++
t.equal(roomID, "!kLRqKKUQXcibIMtOpl:cadence.moe")
return new Promise(resolve => {
setTimeout(() => {
resolve({
joined: {
"@cadence:cadence.moe": {
display_name: "cadence [they]",
avatar_url: "whatever"
},
"@huckleton:cadence.moe": {
display_name: "wa",
avatar_url: "whatever"
},
"@_ooye_botrac4r:cadence.moe": {
display_name: "botrac4r",
avatar_url: "whatever"
},
"@_ooye_bot:cadence.moe": {
display_name: "Out Of Your Element",
avatar_url: "whatever"
}
}
})
})
})
}
}
})
t.deepEqual(events, [{
$type: "m.room.message",
"m.mentions": {
user_ids: [
"@huckleton:cadence.moe",
]
},
msgtype: "m.text",
body: "I wonder if [@huck](https://matrix.to/#/@huckleton:cadence.moe) saw this?",
format: "org.matrix.custom.html",
formatted_body: `I wonder if <a href="https://matrix.to/#/@huckleton:cadence.moe">@huck</a> saw this?`
}])
})
test("message2event: entire message may match elaborate display name", async t => {
let called = 0
const events = await messageToEvent({
...data.message.advanced_written_at_mention_for_matrix,
content: "@Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆"
}, data.guild.general, {}, {
api: {
async getJoinedMembers(roomID) {
called++
t.equal(roomID, "!kLRqKKUQXcibIMtOpl:cadence.moe")
return new Promise(resolve => {
setTimeout(() => {
resolve({
joined: {
"@wa:cadence.moe": {
display_name: "Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆",
avatar_url: "whatever"
},
"@huckleton:cadence.moe": {
display_name: "huck",
avatar_url: "whatever"
},
"@_ooye_botrac4r:cadence.moe": {
display_name: "botrac4r",
avatar_url: "whatever"
},
"@_ooye_bot:cadence.moe": {
display_name: "Out Of Your Element",
avatar_url: "whatever"
}
}
})
})
})
}
}
})
t.deepEqual(events, [{
$type: "m.room.message",
"m.mentions": {
user_ids: [
"@wa:cadence.moe",
]
},
msgtype: "m.text",
body: "[@Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆](https://matrix.to/#/@wa:cadence.moe)",
format: "org.matrix.custom.html",
formatted_body: `<a href="https://matrix.to/#/@wa:cadence.moe">@Cadence, Maid of Creation, Eye of Clarity, Empress of Hope ☆</a>`
}])
})
test("message2event: spoilers are removed from plaintext body", async t => {
const events = await messageToEvent({
content: "||**beatrice**||"

View file

@ -138,7 +138,9 @@ turndownService.addRule("inlineLink", {
if (node.getAttribute("data-message-id")) return `https://discord.com/channels/${node.getAttribute("data-guild-id")}/${node.getAttribute("data-channel-id")}/${node.getAttribute("data-message-id")}`
if (node.getAttribute("data-channel-id")) return `<#${node.getAttribute("data-channel-id")}>`
const href = node.getAttribute("href")
const suppressedHref = node.hasAttribute("data-suppress") ? "<" + href + ">" : href
let shouldSuppress = node.hasAttribute("data-suppress")
if (href.match(/^https?:\/\/matrix.to\//)) shouldSuppress = false // avoid double-escaping
const suppressedHref = shouldSuppress ? "<" + href + ">" : href
content = content.replace(/ @.*/, "")
if (href === content) return suppressedHref
if (decodeURIComponent(href).startsWith("https://matrix.to/#/@") && content[0] !== "@") content = "@" + content

View file

@ -3314,6 +3314,47 @@ test("event2message: mentioning matrix users works", async t => {
)
})
test("event2message: matrix mentions are not double-escaped when embed links permission is denied", async t => {
t.deepEqual(
await eventToMessage({
content: {
msgtype: "m.text",
body: "wrong body",
format: "org.matrix.custom.html",
formatted_body: `I'm just <a href="https://matrix.to/#/@rnl:cadence.moe">▲</a> testing mentions`
},
event_id: "$g07oYSZFWBkxohNEfywldwgcWj1hbhDzQ1sBAKvqOOU",
origin_server_ts: 1688301929913,
room_id: "!kLRqKKUQXcibIMtOpl:cadence.moe",
sender: "@cadence:cadence.moe",
type: "m.room.message",
unsigned: {
age: 405299
}
}, {
id: "123",
roles: [{
id: "123",
name: "@everyone",
permissions: DiscordTypes.PermissionFlagsBits.SendMessages
}]
}),
{
ensureJoined: [],
messagesToDelete: [],
messagesToEdit: [],
messagesToSend: [{
username: "cadence [they]",
content: "I'm just [@▲](<https://matrix.to/#/@rnl:cadence.moe>) testing mentions",
avatar_url: undefined,
allowed_mentions: {
parse: ["users", "roles"]
}
}]
}
)
})
test("event2message: multiple mentions are both escaped", async t => {
t.deepEqual(
await eventToMessage({

View file

@ -158,6 +158,7 @@ file._actuallyUploadDiscordFileToMxc = function(url, res) { throw new Error(`Not
require("../src/d2m/actions/register-user.test")
require("../src/d2m/converters/edit-to-changes.test")
require("../src/d2m/converters/emoji-to-key.test")
require("../src/d2m/converters/find-mentions.test")
require("../src/d2m/converters/lottie.test")
require("../src/d2m/converters/message-to-event.test")
require("../src/d2m/converters/message-to-event.test.components")