fedimbed: attempt to fix anchor tags after the fact since the replace call doesnt seem to be the true issue here

This commit is contained in:
Cynthia Foxwell 2024-01-15 14:34:10 -07:00
parent 7dcd23d93d
commit 6c97c3f547
1 changed files with 30 additions and 26 deletions

View File

@ -118,6 +118,34 @@ async function signedFetch(url, options) {
return await fetch(url, options);
}
function htmlToMarkdown(str) {
// FIXME: stop being lazy and use an html parser
str = str.replace(/<a .*?href="([^"]+?)".*?>(.+?)<\/a>/gi, (_, url, text) =>
url == text ? url : `[${text}](${url})`
);
str = str.replace(
/<img .*?src="([^"]+?)".*?(alt|title)="([^"]+?)".*?\/>/gi,
"[$3]($1)"
);
str = str.replace(/<\/?\s*br\s*\/?>/gi, "\n");
str = str.replace(
/<blockquote.*?>((.|\n)*?)<\/blockquote>/gi,
(_, quote) => "> " + quote.split("\n").join("\n> ")
);
str = str.replace(/<\/p><p>/gi, "\n\n");
str = str.replace(/<ol>/gi, "\n");
str = str.replace(/<li>/gi, "- ");
str = str.replace(/<\/li>/gi, "\n");
str = str.replace(/<\/?code>/gi, "`");
str = str.replace(/<\/?em>/gi, "*");
str = str.replace(/<\/?u>/gi, "__");
str = str.replace(/<\/?s>/gi, "~~");
str = str.replace(/(<([^>]+)>)/gi, "");
str = parseHtmlEntities(str);
// whyyyyyyyyyyyy
str = str.replace(/\[https?:\/\/.+?\]\((https?:\/\/.+?)\)/gi, "$1");
}
async function processUrl(msg, url, spoiler = false) {
let invalidUrl = false;
let urlObj;
@ -630,37 +658,13 @@ async function processUrl(msg, url, spoiler = false) {
content = content ?? "";
cw = cw ?? "";
// FIXME: stop being lazy and use an html parser
content = content.replace(
/<a .*?href="([^"]+?)".*?>(.+?)<\/a>/gi,
(_, url, text) => (url == text ? url : `[${text}](${url})`)
);
content = content.replace(
/<img .*?src="([^"]+?)".*?(alt|title)="([^"]+?)".*?\/>/gi,
"[$3]($1)"
);
content = content.replace(/<\/?\s*br\s*\/?>/gi, "\n");
content = content.replace(
/<blockquote.*?>((.|\n)*?)<\/blockquote>/gi,
(_, quote) => "> " + quote.split("\n").join("\n> ")
);
content = content.replace(/<\/p><p>/gi, "\n\n");
content = content.replace(/<ol>/gi, "\n");
content = content.replace(/<li>/gi, "- ");
content = content.replace(/<\/li>/gi, "\n");
content = content.replace(/<\/?code>/gi, "`");
content = content.replace(/<\/?em>/gi, "*");
content = content.replace(/<\/?u>/gi, "__");
content = content.replace(/<\/?s>/gi, "~~");
content = content.replace(/(<([^>]+)>)/gi, "");
content = parseHtmlEntities(content);
content = htmlToMarkdown(content);
for (const emote of emotes) {
content = content.replaceAll(emote.name, `[${emote.name}](${emote.url})`);
}
cw = cw.replace(/(<([^>]+)>)/gi, "");
cw = parseHtmlEntities(cw);
cw = htmlToMarkdown(cw);
let desc = "";
let MAX_LENGTH = 3999;