fedimbed: attempt to fix anchor tags after the fact since the replace call doesnt seem to be the true issue here

2024-01-15 14:34:10 -07:00 · 2024-01-15 14:34:10 -07:00 · 6c97c3f547
commit 6c97c3f547
parent 7dcd23d93d
1 changed files with 30 additions and 26 deletions
--- a/src/modules/fedimbed.js
+++ b/src/modules/fedimbed.js
@ -118,6 +118,34 @@ async function signedFetch(url, options) {
  return await fetch(url, options);
 }
 function htmlToMarkdown(str) {
  // FIXME: stop being lazy and use an html parser
  str = str.replace(/<a .*?href="([^"]+?)".*?>(.+?)<\/a>/gi, (_, url, text) =>
    url == text ? url : `[${text}](${url})`
  );
  str = str.replace(
    /<img .*?src="([^"]+?)".*?(alt|title)="([^"]+?)".*?\/>/gi,
    "[$3]($1)"
  );
  str = str.replace(/<\/?\s*br\s*\/?>/gi, "\n");
  str = str.replace(
    /<blockquote.*?>((.|\n)*?)<\/blockquote>/gi,
    (_, quote) => "> " + quote.split("\n").join("\n> ")
  );
  str = str.replace(/<\/p><p>/gi, "\n\n");
  str = str.replace(/<ol>/gi, "\n");
  str = str.replace(/<li>/gi, "- ");
  str = str.replace(/<\/li>/gi, "\n");
  str = str.replace(/<\/?code>/gi, "`");
  str = str.replace(/<\/?em>/gi, "*");
  str = str.replace(/<\/?u>/gi, "__");
  str = str.replace(/<\/?s>/gi, "~~");
  str = str.replace(/(<([^>]+)>)/gi, "");
  str = parseHtmlEntities(str);
  // whyyyyyyyyyyyy
  str = str.replace(/\[https?:\/\/.+?\]\((https?:\/\/.+?)\)/gi, "$1");
 }
 async function processUrl(msg, url, spoiler = false) {
  let invalidUrl = false;
  let urlObj;
@ -630,37 +658,13 @@ async function processUrl(msg, url, spoiler = false) {
  content = content ?? "";
  cw = cw ?? "";
-  // FIXME: stop being lazy and use an html parser
+  content = htmlToMarkdown(content);
  content = content.replace(
    /<a .*?href="([^"]+?)".*?>(.+?)<\/a>/gi,
    (_, url, text) => (url == text ? url : `[${text}](${url})`)
  );
  content = content.replace(
    /<img .*?src="([^"]+?)".*?(alt|title)="([^"]+?)".*?\/>/gi,
    "[$3]($1)"
  );
  content = content.replace(/<\/?\s*br\s*\/?>/gi, "\n");
  content = content.replace(
    /<blockquote.*?>((.|\n)*?)<\/blockquote>/gi,
    (_, quote) => "> " + quote.split("\n").join("\n> ")
  );
  content = content.replace(/<\/p><p>/gi, "\n\n");
  content = content.replace(/<ol>/gi, "\n");
  content = content.replace(/<li>/gi, "- ");
  content = content.replace(/<\/li>/gi, "\n");
  content = content.replace(/<\/?code>/gi, "`");
  content = content.replace(/<\/?em>/gi, "*");
  content = content.replace(/<\/?u>/gi, "__");
  content = content.replace(/<\/?s>/gi, "~~");
  content = content.replace(/(<([^>]+)>)/gi, "");
  content = parseHtmlEntities(content);
  for (const emote of emotes) {
    content = content.replaceAll(emote.name, `[${emote.name}](${emote.url})`);
  }
-  cw = cw.replace(/(<([^>]+)>)/gi, "");
+  cw = htmlToMarkdown(cw);
  cw = parseHtmlEntities(cw);
  let desc = "";
  let MAX_LENGTH = 3999;