New unicode emoji processor
This commit is contained in:
parent
14574b4e2c
commit
f42eb6495f
7 changed files with 4015 additions and 46 deletions
77
scripts/emoji-surrogates-statistics.js
Normal file
77
scripts/emoji-surrogates-statistics.js
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
// @ts-check
|
||||||
|
|
||||||
|
const fs = require("fs")
|
||||||
|
const {join} = require("path")
|
||||||
|
const s = fs.readFileSync(join(__dirname, "..", "src", "m2d", "converters", "emojis.txt"), "utf8").split("\n").map(x => encodeURIComponent(x))
|
||||||
|
const searchPattern = "%EF%B8%8F"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* adapted from es.map.group-by.js in core-js
|
||||||
|
* @template K,V
|
||||||
|
* @param {V[]} items
|
||||||
|
* @param {(item: V) => K} fn
|
||||||
|
* @returns {Map<K, V[]>}
|
||||||
|
*/
|
||||||
|
function groupBy(items, fn) {
|
||||||
|
var map = new Map();
|
||||||
|
for (const value of items) {
|
||||||
|
var key = fn(value);
|
||||||
|
if (!map.has(key)) map.set(key, [value]);
|
||||||
|
else map.get(key).push(value);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number[]} items
|
||||||
|
* @param {number} width
|
||||||
|
*/
|
||||||
|
function xhistogram(items, width) {
|
||||||
|
const chars = " ▏▎▍▌▋▊▉"
|
||||||
|
const max = items.reduce((a, c) => c > a ? c : a, 0)
|
||||||
|
return items.map(v => {
|
||||||
|
const p = v / max * (width-1)
|
||||||
|
return (
|
||||||
|
Array(Math.floor(p)).fill("█").join("") /* whole part */
|
||||||
|
+ chars[Math.ceil((p % 1) * (chars.length-1))] /* decimal part */
|
||||||
|
).padEnd(width)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {number[]} items
|
||||||
|
* @param {[number, number]} xrange
|
||||||
|
*/
|
||||||
|
function yhistogram(items, xrange, printHeader = false) {
|
||||||
|
const chars = "░▁_▂▃▄▅▆▇█"
|
||||||
|
const ones = "₀₁₂₃₄₅₆₇₈₉"
|
||||||
|
const tens = "0123456789"
|
||||||
|
const xy = []
|
||||||
|
let max = 0
|
||||||
|
/** value (x) -> frequency (y) */
|
||||||
|
const grouped = groupBy(items, x => x)
|
||||||
|
for (let i = xrange[0]; i <= xrange[1]; i++) {
|
||||||
|
if (printHeader) {
|
||||||
|
if (i === -1) process.stdout.write("-")
|
||||||
|
else if (i.toString().at(-1) === "0") process.stdout.write(tens[i/10])
|
||||||
|
else process.stdout.write(ones[i%10])
|
||||||
|
}
|
||||||
|
const y = grouped.get(i)?.length ?? 0
|
||||||
|
if (y > max) max = y
|
||||||
|
xy.push(y)
|
||||||
|
}
|
||||||
|
if (printHeader) console.log()
|
||||||
|
return xy.map(y => chars[Math.ceil(y / max * (chars.length-1))]).join("")
|
||||||
|
}
|
||||||
|
|
||||||
|
const grouped = groupBy(s, x => x.length)
|
||||||
|
const sortedGroups = [...grouped.entries()].sort((a, b) => b[0] - a[0])
|
||||||
|
let length = 0
|
||||||
|
const lengthHistogram = xhistogram(sortedGroups.map(v => v[1].length), 10)
|
||||||
|
for (let i = 0; i < sortedGroups.length; i++) {
|
||||||
|
const [k, v] = sortedGroups[i]
|
||||||
|
const l = lengthHistogram[i]
|
||||||
|
const h = yhistogram(v.map(x => x.indexOf(searchPattern)), [-1, k - searchPattern.length], i === 0)
|
||||||
|
if (i === 0) length = h.length + 1
|
||||||
|
console.log(`${h.padEnd(length, i % 2 === 0 ? "⸱" : " ")}length ${k.toString().padEnd(3)} ${l} ${v.length}`)
|
||||||
|
}
|
|
@ -53,7 +53,7 @@ async function removeReaction(data, reactions) {
|
||||||
*/
|
*/
|
||||||
async function removeEmojiReaction(data, reactions) {
|
async function removeEmojiReaction(data, reactions) {
|
||||||
const key = await emojiToKey.emojiToKey(data.emoji)
|
const key = await emojiToKey.emojiToKey(data.emoji)
|
||||||
const discordPreferredEncoding = emoji.encodeEmoji(key, undefined)
|
const discordPreferredEncoding = await emoji.encodeEmoji(key, undefined)
|
||||||
db.prepare("DELETE FROM reaction WHERE message_id = ? AND encoded_emoji = ?").run(data.message_id, discordPreferredEncoding)
|
db.prepare("DELETE FROM reaction WHERE message_id = ? AND encoded_emoji = ?").run(data.message_id, discordPreferredEncoding)
|
||||||
|
|
||||||
return converter.removeEmojiReaction(data, reactions, key)
|
return converter.removeEmojiReaction(data, reactions, key)
|
||||||
|
|
|
@ -20,7 +20,7 @@ async function addReaction(event) {
|
||||||
if (!messageID) return // Nothing can be done if the parent message was never bridged.
|
if (!messageID) return // Nothing can be done if the parent message was never bridged.
|
||||||
|
|
||||||
const key = event.content["m.relates_to"].key
|
const key = event.content["m.relates_to"].key
|
||||||
const discordPreferredEncoding = emoji.encodeEmoji(key, event.content.shortcode)
|
const discordPreferredEncoding = await emoji.encodeEmoji(key, event.content.shortcode)
|
||||||
if (!discordPreferredEncoding) return
|
if (!discordPreferredEncoding) return
|
||||||
|
|
||||||
await discord.snow.channel.createReaction(channelID, messageID, discordPreferredEncoding) // acting as the discord bot itself
|
await discord.snow.channel.createReaction(channelID, messageID, discordPreferredEncoding) // acting as the discord bot itself
|
||||||
|
|
|
@ -1,58 +1,98 @@
|
||||||
// @ts-check
|
// @ts-check
|
||||||
|
|
||||||
const assert = require("assert").strict
|
const fsp = require("fs").promises
|
||||||
const Ty = require("../../types")
|
const {join} = require("path")
|
||||||
|
const emojisp = fsp.readFile(join(__dirname, "emojis.txt"), "utf8").then(content => content.split("\n"))
|
||||||
|
|
||||||
const passthrough = require("../../passthrough")
|
const passthrough = require("../../passthrough")
|
||||||
const {sync, select} = passthrough
|
const {select} = passthrough
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {string} input
|
* @param {string} input
|
||||||
* @param {string | null | undefined} shortcode
|
* @param {string | null | undefined} shortcode
|
||||||
* @returns {string?}
|
* @returns {string?}
|
||||||
*/
|
*/
|
||||||
function encodeEmoji(input, shortcode) {
|
function encodeCustomEmoji(input, shortcode) {
|
||||||
let discordPreferredEncoding
|
// Custom emoji
|
||||||
if (input.startsWith("mxc://")) {
|
let row = select("emoji", ["emoji_id", "name"], {mxc_url: input}).get()
|
||||||
// Custom emoji
|
if (!row && shortcode) {
|
||||||
let row = select("emoji", ["emoji_id", "name"], {mxc_url: input}).get()
|
// Use the name to try to find a known emoji with the same name.
|
||||||
if (!row && shortcode) {
|
const name = shortcode.replace(/^:|:$/g, "")
|
||||||
// Use the name to try to find a known emoji with the same name.
|
row = select("emoji", ["emoji_id", "name"], {name: name}).get()
|
||||||
const name = shortcode.replace(/^:|:$/g, "")
|
}
|
||||||
row = select("emoji", ["emoji_id", "name"], {name: name}).get()
|
if (!row) {
|
||||||
}
|
// We don't have this emoji and there's no realistic way to just-in-time upload a new emoji somewhere. Sucks!
|
||||||
if (!row) {
|
return null
|
||||||
// We don't have this emoji and there's no realistic way to just-in-time upload a new emoji somewhere.
|
}
|
||||||
// Sucks!
|
return encodeURIComponent(`${row.name}:${row.emoji_id}`)
|
||||||
return null
|
}
|
||||||
}
|
|
||||||
// Cool, we got an exact or a candidate emoji.
|
/**
|
||||||
discordPreferredEncoding = encodeURIComponent(`${row.name}:${row.emoji_id}`)
|
* @param {string} input
|
||||||
} else {
|
* @returns {Promise<string?>} URL encoded!
|
||||||
// Default emoji
|
*/
|
||||||
// https://github.com/discord/discord-api-docs/issues/2723#issuecomment-807022205 ????????????
|
async function encodeDefaultEmoji(input) {
|
||||||
const encoded = encodeURIComponent(input)
|
// Default emoji
|
||||||
const encodedTrimmed = encoded.replace(/%EF%B8%8F/g, "")
|
|
||||||
|
// Shortcut: If there are ASCII letters then it's not an emoji, it's a freeform Matrix text reaction.
|
||||||
const forceTrimmedList = [
|
// (Regional indicator letters are not ASCII. ASCII digits might be part of an emoji.)
|
||||||
"%F0%9F%91%8D", // 👍
|
if (input.match(/[A-Za-z]/)) return null
|
||||||
"%F0%9F%91%8E", // 👎️
|
|
||||||
"%E2%AD%90", // ⭐
|
// Check against the dataset
|
||||||
"%F0%9F%90%88", // 🐈
|
const emojis = await emojisp
|
||||||
"%E2%9D%93", // ❓
|
const encoded = encodeURIComponent(input)
|
||||||
"%F0%9F%8F%86", // 🏆️
|
|
||||||
"%F0%9F%93%9A", // 📚️
|
// Best case scenario: they reacted with an exact replica of a valid emoji.
|
||||||
"%F0%9F%90%9F", // 🐟️
|
if (emojis.includes(input)) return encoded
|
||||||
]
|
|
||||||
|
// Maybe it has some extraneous \ufe0f or \ufe0e (at the end or in the middle), and it'll be valid if they're removed.
|
||||||
discordPreferredEncoding =
|
const trimmed = input.replace(/\ufe0e|\ufe0f/g, "")
|
||||||
( forceTrimmedList.includes(encodedTrimmed) ? encodedTrimmed
|
const trimmedEncoded = encodeURIComponent(trimmed)
|
||||||
: encodedTrimmed !== encoded && [...input].length === 2 ? encoded
|
if (trimmed !== input) {
|
||||||
: encodedTrimmed)
|
if (emojis.includes(trimmed)) return trimmedEncoded
|
||||||
|
}
|
||||||
console.log("add reaction from matrix:", input, encoded, encodedTrimmed, "chosen:", discordPreferredEncoding)
|
|
||||||
|
// Okay, well, maybe it was already missing one and it actually needs an extra \ufe0f, and it'll be valid if that's added.
|
||||||
|
else {
|
||||||
|
const appended = input + "\ufe0f"
|
||||||
|
const appendedEncoded = encodeURIComponent(appended)
|
||||||
|
if (emojis.includes(appended)) return appendedEncoded
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hmm, so adding or removing that from the end didn't help, but maybe there needs to be one in the middle? We can try some heuristics.
|
||||||
|
// These heuristics come from executing scripts/emoji-surrogates-statistics.js.
|
||||||
|
if (trimmedEncoded.length <= 21 && trimmed.match(/^[*#0-9]/)) { // ->19: Keycap digit? 0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ *️⃣ #️⃣
|
||||||
|
const keycap = trimmed[0] + "\ufe0f" + trimmed.slice(1)
|
||||||
|
if (emojis.includes(keycap)) return encodeURIComponent(keycap)
|
||||||
|
} else if (trimmedEncoded.length === 27 && trimmed[0] === "⛹") { // ->45: ⛹️♀️ ⛹️♂️
|
||||||
|
const balling = trimmed[0] + "\ufe0f" + trimmed.slice(1) + "\ufe0f"
|
||||||
|
if (emojis.includes(balling)) return encodeURIComponent(balling)
|
||||||
|
} else if (trimmedEncoded.length === 30) { // ->39: ⛓️💥 ❤️🩹 ❤️🔥 or ->48: 🏳️⚧️ 🏌️♀️ 🕵️♀️ 🏋️♀️ and gender variants
|
||||||
|
const thriving = trimmed[0] + "\ufe0f" + trimmed.slice(1)
|
||||||
|
if (emojis.includes(thriving)) return encodeURIComponent(thriving)
|
||||||
|
const powerful = trimmed.slice(0, 2) + "\ufe0f" + trimmed.slice(2) + "\ufe0f"
|
||||||
|
if (emojis.includes(powerful)) return encodeURIComponent(powerful)
|
||||||
|
} else if (trimmedEncoded.length === 51 && trimmed[3] === "❤") { // ->60: 👩❤️👨 👩❤️👩 👨❤️👨
|
||||||
|
const yellowRomance = trimmed.slice(0, 3) + "❤\ufe0f" + trimmed.slice(4)
|
||||||
|
if (emojis.includes(yellowRomance)) return encodeURIComponent(yellowRomance)
|
||||||
|
}
|
||||||
|
|
||||||
|
// there are a few more longer ones but I got bored
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string} input
|
||||||
|
* @param {string | null | undefined} shortcode
|
||||||
|
* @returns {Promise<string?>}
|
||||||
|
*/
|
||||||
|
async function encodeEmoji(input, shortcode) {
|
||||||
|
if (input.startsWith("mxc://")) {
|
||||||
|
return encodeCustomEmoji(input, shortcode)
|
||||||
|
} else {
|
||||||
|
return encodeDefaultEmoji(input)
|
||||||
}
|
}
|
||||||
return discordPreferredEncoding
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports.encodeEmoji = encodeEmoji
|
module.exports.encodeEmoji = encodeEmoji
|
||||||
|
|
52
src/m2d/converters/emoji.test.js
Normal file
52
src/m2d/converters/emoji.test.js
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
// @ts-check
|
||||||
|
|
||||||
|
const {test} = require("supertape")
|
||||||
|
const {encodeEmoji} = require("./emoji")
|
||||||
|
|
||||||
|
test("emoji: valid", async t => {
|
||||||
|
t.equal(await encodeEmoji("🦄", null), "%F0%9F%A6%84")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: freeform text", async t => {
|
||||||
|
t.equal(await encodeEmoji("ha", null), null)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: suspicious unicode", async t => {
|
||||||
|
t.equal(await encodeEmoji("Ⓐ", null), null)
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: needs u+fe0f added", async t => {
|
||||||
|
t.equal(await encodeEmoji("☺", null), "%E2%98%BA%EF%B8%8F")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: needs u+fe0f removed", async t => {
|
||||||
|
t.equal(await encodeEmoji("⭐️", null), "%E2%AD%90")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: number key needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("3⃣", null), "3%EF%B8%8F%E2%83%A3")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: hash key needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("#⃣", null), "%23%EF%B8%8F%E2%83%A3")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: broken chains needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("⛓💥", null), "%E2%9B%93%EF%B8%8F%E2%80%8D%F0%9F%92%A5")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: balling needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("⛹♀", null), "%E2%9B%B9%EF%B8%8F%E2%80%8D%E2%99%80%EF%B8%8F")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: trans flag needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("🏳⚧", null), "%F0%9F%8F%B3%EF%B8%8F%E2%80%8D%E2%9A%A7%EF%B8%8F")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: spy needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("🕵♀", null), "%F0%9F%95%B5%EF%B8%8F%E2%80%8D%E2%99%80%EF%B8%8F")
|
||||||
|
})
|
||||||
|
|
||||||
|
test("emoji: couple needs u+fe0f in the middle", async t => {
|
||||||
|
t.equal(await encodeEmoji("👩❤👩", null), "%F0%9F%91%A9%E2%80%8D%E2%9D%A4%EF%B8%8F%E2%80%8D%F0%9F%91%A9")
|
||||||
|
})
|
3799
src/m2d/converters/emojis.txt
Normal file
3799
src/m2d/converters/emojis.txt
Normal file
File diff suppressed because it is too large
Load diff
|
@ -141,6 +141,7 @@ file._actuallyUploadDiscordFileToMxc = function(url, res) { throw new Error(`Not
|
||||||
require("../src/d2m/converters/user-to-mxid.test")
|
require("../src/d2m/converters/user-to-mxid.test")
|
||||||
require("../src/m2d/converters/diff-pins.test")
|
require("../src/m2d/converters/diff-pins.test")
|
||||||
require("../src/m2d/converters/event-to-message.test")
|
require("../src/m2d/converters/event-to-message.test")
|
||||||
|
require("../src/m2d/converters/emoji.test")
|
||||||
require("../src/m2d/converters/utils.test")
|
require("../src/m2d/converters/utils.test")
|
||||||
require("../src/m2d/converters/emoji-sheet.test")
|
require("../src/m2d/converters/emoji-sheet.test")
|
||||||
require("../src/discord/interactions/invite.test")
|
require("../src/discord/interactions/invite.test")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue