html to markdown fixes
This commit is contained in:
		
							parent
							
								
									2388f0cc47
								
							
						
					
					
						commit
						1e5c56c0d6
					
				
					 1 changed files with 16 additions and 16 deletions
				
			
		| 
						 | 
				
			
			@ -32,14 +32,14 @@ function parseHtmlEntities(str) {
 | 
			
		|||
 | 
			
		||||
function htmlToMarkdown(str, images = true, embed = true) {
 | 
			
		||||
  str = str.replaceAll("\\", "\\\\");
 | 
			
		||||
  str = str.replace(/<style(\s+[^>]+)?>(.|\n)*?<\/style>/gi, "");
 | 
			
		||||
  str = str.replace(/<a (\s+[^>]+)?href="([^"]+?)"(\s+[^>]+)?>(.+?)<\/a>/gi, (_, __, url, ___, text) => {
 | 
			
		||||
  str = str.replace(/<style(\s*[^>]+)?>(.|\n)*?<\/style>/gi, "");
 | 
			
		||||
  str = str.replace(/<a (\s*[^>]+)?href="([^"]+?)"(\s*[^>]+)?>(.+?)<\/a>/gi, (_, __, url, ___, text) => {
 | 
			
		||||
    url = url.replace(/^\/\//, "https://").replace("\\#", "#");
 | 
			
		||||
    return url == text ? url : `[${text}](${embed ? "" : "<"}${url}${embed ? "" : ">"})`;
 | 
			
		||||
  });
 | 
			
		||||
  if (images)
 | 
			
		||||
    str = str.replace(
 | 
			
		||||
      /<img (\s+[^>]+)?src="([^"]+?)"(\s+[^>]+)?(alt|title)="([^"]+?)"(\s+[^>]+)?\/>/gi,
 | 
			
		||||
      /<img (\s*[^>]+)?src="([^"]+?)"(\s*[^>]+)?(alt|title)="([^"]+?)"(\s*[^>]+)?\/>/gi,
 | 
			
		||||
      `[$5](${embed ? "" : "<"}$2${embed ? "" : ">"})`
 | 
			
		||||
    );
 | 
			
		||||
  str = str.replace(/<\/?\s*br\s*\/?>/gi, "\n");
 | 
			
		||||
| 
						 | 
				
			
			@ -49,7 +49,7 @@ function htmlToMarkdown(str, images = true, embed = true) {
 | 
			
		|||
  );
 | 
			
		||||
  str = str.replace(/<\/?p>/gi, "\n");
 | 
			
		||||
  str = str.replace(/<dd>((.|\n)*?)<\/dd>/gi, (_, inner) => "\u3000\u3000" + inner.split("\n").join("\n\u3000\u3000"));
 | 
			
		||||
  str = str.replace(/<ol(\s+[^>]+)?>((.|\n)*?)<\/ol>/gi, (_, __, inner) => {
 | 
			
		||||
  str = str.replace(/<ol(\s*[^>]+)?>((.|\n)*?)<\/ol>/gi, (_, __, inner) => {
 | 
			
		||||
    let index = 0;
 | 
			
		||||
    return inner
 | 
			
		||||
      .replace(/<li>/gi, () => {
 | 
			
		||||
| 
						 | 
				
			
			@ -59,7 +59,7 @@ function htmlToMarkdown(str, images = true, embed = true) {
 | 
			
		|||
      .replace(/<\/li>/gi, "\n")
 | 
			
		||||
      .replaceAll("\n\n", "\n");
 | 
			
		||||
  });
 | 
			
		||||
  str = str.replace(/<ul(\s+[^>]+)?>((.|\n)*?)<\/ul>/gi, (_, __, inner) => {
 | 
			
		||||
  str = str.replace(/<ul(\s*[^>]+)?>((.|\n)*?)<\/ul>/gi, (_, __, inner) => {
 | 
			
		||||
    let index = 0;
 | 
			
		||||
    return inner
 | 
			
		||||
      .replace(/<li>/gi, () => {
 | 
			
		||||
| 
						 | 
				
			
			@ -69,17 +69,17 @@ function htmlToMarkdown(str, images = true, embed = true) {
 | 
			
		|||
      .replace(/<\/li>/gi, "\n")
 | 
			
		||||
      .replaceAll("\n\n", "\n");
 | 
			
		||||
  });
 | 
			
		||||
  str = str.replace(/<\/?code(\s+[^>]+)?>/gi, "`");
 | 
			
		||||
  str = str.replace(/<\/?em(\s+[^>]+)?>/gi, "_");
 | 
			
		||||
  str = str.replace(/<\/?i(\s+[^>]+)?>/gi, "_");
 | 
			
		||||
  str = str.replace(/<\/?b(\s+[^>]+)?>/gi, "**");
 | 
			
		||||
  str = str.replace(/<\/?u(\s+[^>]+)?>/gi, "__");
 | 
			
		||||
  str = str.replace(/<\/?s(\s+[^>]+)?>/gi, "~~");
 | 
			
		||||
  str = str.replace(/<h1(\s+[^>]+)?>/gi, "# ");
 | 
			
		||||
  str = str.replace(/<h2(\s+[^>]+)?>/gi, "## ");
 | 
			
		||||
  str = str.replace(/<h3(\s+[^>]+)?>/gi, "### ");
 | 
			
		||||
  str = str.replace(/<\/?h4(\s+[^>]+)?>/gi, "**");
 | 
			
		||||
  str = str.replace(/<(math|noscript)(\s+[^>]+)?>((.|\n)*?)<\/(math|noscript)>/gi, "");
 | 
			
		||||
  str = str.replace(/<\/?code(\s*[^>]+)?>/gi, "`");
 | 
			
		||||
  str = str.replace(/<\/?em(\s*[^>]+)?>/gi, "_");
 | 
			
		||||
  str = str.replace(/<\/?i(\s*[^>]+)?>/gi, "_");
 | 
			
		||||
  str = str.replace(/<\/?b(\s*[^>]+)?>/gi, "**");
 | 
			
		||||
  str = str.replace(/<\/?u(\s*[^>]+)?>/gi, "__");
 | 
			
		||||
  str = str.replace(/<\/?s(\s*[^>]+)?>/gi, "~~");
 | 
			
		||||
  str = str.replace(/<h1(\s*[^>]+)?>/gi, "# ");
 | 
			
		||||
  str = str.replace(/<h2(\s*[^>]+)?>/gi, "## ");
 | 
			
		||||
  str = str.replace(/<h3(\s*[^>]+)?>/gi, "### ");
 | 
			
		||||
  str = str.replace(/<\/?h4(\s*[^>]+)?>/gi, "**");
 | 
			
		||||
  str = str.replace(/<(math|noscript)(\s*[^>]+)?>((.|\n)*?)<\/(math|noscript)>/gi, "");
 | 
			
		||||
  str = str.replace(/<[^>]+?>/gi, "");
 | 
			
		||||
  str = parseHtmlEntities(str);
 | 
			
		||||
  // whyyyyyyyyyyyy
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue