fedimbed: parse page for icon, site name and color (if we dont have one)

This commit is contained in:
Cynthia Foxwell 2025-04-05 11:23:17 -06:00
parent 2ba24d01aa
commit b1d5fda3a2
Signed by: Cynosphere
SSH key fingerprint: SHA256:H3SM8ufP/uxqLwKSH7xY89TDnbR9uOHzjLoBr0tlajk

View file

@ -2,6 +2,7 @@ const {Message} = require("@projectdysnomia/dysnomia");
const fs = require("node:fs");
const httpSignature = require("@peertube/http-signature");
const {XMLParser} = require("fast-xml-parser");
const events = require("#lib/events.js");
const logger = require("#lib/logger.js");
@ -12,7 +13,8 @@ const {MessageFlags, ApplicationCommandOptionTypes, Permissions} = require("#uti
const {getUploadLimit} = require("#util/misc.js");
const {htmlToMarkdown} = require("#util/html.js");
const FRIENDLY_USERAGENT = "HiddenPhox/fedimbed (https://gitdab.com/Cynosphere/HiddenPhox)";
const FRIENDLY_USERAGENT =
"Mozilla/5.0 (compatible; HiddenPhox/fedimbed; +https://gitdab.com/Cynosphere/HiddenPhox) Discordbot/2.0";
const URLS_REGEX = /(?:\s|^|\]\()(\|\|\s*)?(https?:\/\/[^\s<]+[^<.,:;"'\]|)\s])(\s*\)?\|\||\s*[\S]*?\))?/g;
const SPOILER_REGEX = /(?:\s|^)\|\|([\s\S]+?)\|\|/;
@ -77,8 +79,23 @@ const BSKY_DOMAINS = [
"fixvx.com",
"stupidpenisx.com",
"girlcockx.com",
// these two are base64 encoded just because i dont want those words in my codebase
Buffer.from("aGl0bGVyeC5jb20=", "base64").toString(),
Buffer.from("bmlnZ2VyeC5jb20=", "base64").toString(),
];*/
const pageParser = new XMLParser({
ignoreAttributes: false,
preserveOrder: false, // only want the head tag
unpairedTags: ["hr", "br", "link", "meta"],
stopNodes: ["*.body", "*.style", "*.script"],
processEntities: true,
htmlEntities: true,
attributeNamePrefix: "$",
alwaysCreateTextNode: true,
trimValues: false,
});
const domainCache = new Map();
domainCache.set("cohost.org", "cohost"); // no nodeinfo
@ -854,7 +871,7 @@ async function processUrl(msg, url, spoiler = false, command = false) {
sensitive = postData.sensitive;
if (postData.in_reply_to_id) {
// this url is a dummy and will failed if gone to normally
// this url is a dummy and will fail if gone to normally
const replyData = await fetchPost(
`https://${urlObj.origin}/@fedimbed_reply_fake_user_sorry/${postData.in_reply_to_id}`,
platform,
@ -1203,6 +1220,58 @@ async function processUrl(msg, url, spoiler = false, command = false) {
const user = author.name ? `${author.name} (${author.handle})` : author.handle;
let headTag,
lastIconSize = 0,
icon;
try {
const page = await fetch(url, {
headers: {
"User-Agent": FRIENDLY_USERAGENT,
},
})
.then((res) => res.text())
.catch(() => {});
if (page) {
headTag = pageParser.parse(page)?.html?.head;
}
} catch {
// noop
}
if (headTag) {
for (const tag of headTag.link) {
if (tag.$rel == "icon" || tag.$rel == "apple-touch-icon") {
if (tag.$sizes) {
const [w, h] = tag.$sizes.split("x").map(parseInt);
const size = w * h;
if (size > lastIconSize) {
lastIconSize = size;
icon = tag.$href;
}
} else {
icon = tag.$href;
}
}
}
for (const tag of headTag.meta) {
if (tag.$property == "og:site_name" && tag.$content != platformName) {
if (platformName == "<no nodeinfo>") {
platformName = tag.$content;
} else {
platformName = `${tag.$content} (${platformName})`;
}
} else if (!color && tag.$name == "theme-color") {
color = parseInt(tag.$content.replace("^#", "0x"));
}
}
}
if (icon && icon.startsWith("/")) {
icon = urlObj.origin + icon;
}
const baseEmbed = {
color,
url,
@ -1222,6 +1291,7 @@ async function processUrl(msg, url, spoiler = false, command = false) {
}
: null,
footer: {
icon_url: icon,
text: platformName,
},
thumbnail: {