From b1d5fda3a2c2d07c6b4061408f816c13488c6d3a Mon Sep 17 00:00:00 2001 From: Cynthia Foxwell Date: Sat, 5 Apr 2025 11:23:17 -0600 Subject: [PATCH] fedimbed: parse page for icon, site name and color (if we dont have one) --- src/modules/fedimbed.js | 74 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/src/modules/fedimbed.js b/src/modules/fedimbed.js index 041c02c..c27389f 100644 --- a/src/modules/fedimbed.js +++ b/src/modules/fedimbed.js @@ -2,6 +2,7 @@ const {Message} = require("@projectdysnomia/dysnomia"); const fs = require("node:fs"); const httpSignature = require("@peertube/http-signature"); +const {XMLParser} = require("fast-xml-parser"); const events = require("#lib/events.js"); const logger = require("#lib/logger.js"); @@ -12,7 +13,8 @@ const {MessageFlags, ApplicationCommandOptionTypes, Permissions} = require("#uti const {getUploadLimit} = require("#util/misc.js"); const {htmlToMarkdown} = require("#util/html.js"); -const FRIENDLY_USERAGENT = "HiddenPhox/fedimbed (https://gitdab.com/Cynosphere/HiddenPhox)"; +const FRIENDLY_USERAGENT = + "Mozilla/5.0 (compatible; HiddenPhox/fedimbed; +https://gitdab.com/Cynosphere/HiddenPhox) Discordbot/2.0"; const URLS_REGEX = /(?:\s|^|\]\()(\|\|\s*)?(https?:\/\/[^\s<]+[^<.,:;"'\]|)\s])(\s*\)?\|\||\s*[\S]*?\))?/g; const SPOILER_REGEX = /(?:\s|^)\|\|([\s\S]+?)\|\|/; @@ -77,8 +79,23 @@ const BSKY_DOMAINS = [ "fixvx.com", "stupidpenisx.com", "girlcockx.com", + // these two are base64 encoded just because i dont want those words in my codebase + Buffer.from("aGl0bGVyeC5jb20=", "base64").toString(), + Buffer.from("bmlnZ2VyeC5jb20=", "base64").toString(), ];*/ +const pageParser = new XMLParser({ + ignoreAttributes: false, + preserveOrder: false, // only want the head tag + unpairedTags: ["hr", "br", "link", "meta"], + stopNodes: ["*.body", "*.style", "*.script"], + processEntities: true, + htmlEntities: true, + attributeNamePrefix: "$", + alwaysCreateTextNode: true, + trimValues: false, +}); + const domainCache = new Map(); domainCache.set("cohost.org", "cohost"); // no nodeinfo @@ -854,7 +871,7 @@ async function processUrl(msg, url, spoiler = false, command = false) { sensitive = postData.sensitive; if (postData.in_reply_to_id) { - // this url is a dummy and will failed if gone to normally + // this url is a dummy and will fail if gone to normally const replyData = await fetchPost( `https://${urlObj.origin}/@fedimbed_reply_fake_user_sorry/${postData.in_reply_to_id}`, platform, @@ -1203,6 +1220,58 @@ async function processUrl(msg, url, spoiler = false, command = false) { const user = author.name ? `${author.name} (${author.handle})` : author.handle; + let headTag, + lastIconSize = 0, + icon; + try { + const page = await fetch(url, { + headers: { + "User-Agent": FRIENDLY_USERAGENT, + }, + }) + .then((res) => res.text()) + .catch(() => {}); + + if (page) { + headTag = pageParser.parse(page)?.html?.head; + } + } catch { + // noop + } + + if (headTag) { + for (const tag of headTag.link) { + if (tag.$rel == "icon" || tag.$rel == "apple-touch-icon") { + if (tag.$sizes) { + const [w, h] = tag.$sizes.split("x").map(parseInt); + const size = w * h; + if (size > lastIconSize) { + lastIconSize = size; + icon = tag.$href; + } + } else { + icon = tag.$href; + } + } + } + + for (const tag of headTag.meta) { + if (tag.$property == "og:site_name" && tag.$content != platformName) { + if (platformName == "") { + platformName = tag.$content; + } else { + platformName = `${tag.$content} (${platformName})`; + } + } else if (!color && tag.$name == "theme-color") { + color = parseInt(tag.$content.replace("^#", "0x")); + } + } + } + + if (icon && icon.startsWith("/")) { + icon = urlObj.origin + icon; + } + const baseEmbed = { color, url, @@ -1222,6 +1291,7 @@ async function processUrl(msg, url, spoiler = false, command = false) { } : null, footer: { + icon_url: icon, text: platformName, }, thumbnail: {