html to markdown fixes
This commit is contained in:
parent
2388f0cc47
commit
1e5c56c0d6
1 changed files with 16 additions and 16 deletions
|
@ -32,14 +32,14 @@ function parseHtmlEntities(str) {
|
|||
|
||||
function htmlToMarkdown(str, images = true, embed = true) {
|
||||
str = str.replaceAll("\\", "\\\\");
|
||||
str = str.replace(/<style(\s+[^>]+)?>(.|\n)*?<\/style>/gi, "");
|
||||
str = str.replace(/<a (\s+[^>]+)?href="([^"]+?)"(\s+[^>]+)?>(.+?)<\/a>/gi, (_, __, url, ___, text) => {
|
||||
str = str.replace(/<style(\s*[^>]+)?>(.|\n)*?<\/style>/gi, "");
|
||||
str = str.replace(/<a (\s*[^>]+)?href="([^"]+?)"(\s*[^>]+)?>(.+?)<\/a>/gi, (_, __, url, ___, text) => {
|
||||
url = url.replace(/^\/\//, "https://").replace("\\#", "#");
|
||||
return url == text ? url : `[${text}](${embed ? "" : "<"}${url}${embed ? "" : ">"})`;
|
||||
});
|
||||
if (images)
|
||||
str = str.replace(
|
||||
/<img (\s+[^>]+)?src="([^"]+?)"(\s+[^>]+)?(alt|title)="([^"]+?)"(\s+[^>]+)?\/>/gi,
|
||||
/<img (\s*[^>]+)?src="([^"]+?)"(\s*[^>]+)?(alt|title)="([^"]+?)"(\s*[^>]+)?\/>/gi,
|
||||
`[$5](${embed ? "" : "<"}$2${embed ? "" : ">"})`
|
||||
);
|
||||
str = str.replace(/<\/?\s*br\s*\/?>/gi, "\n");
|
||||
|
@ -49,7 +49,7 @@ function htmlToMarkdown(str, images = true, embed = true) {
|
|||
);
|
||||
str = str.replace(/<\/?p>/gi, "\n");
|
||||
str = str.replace(/<dd>((.|\n)*?)<\/dd>/gi, (_, inner) => "\u3000\u3000" + inner.split("\n").join("\n\u3000\u3000"));
|
||||
str = str.replace(/<ol(\s+[^>]+)?>((.|\n)*?)<\/ol>/gi, (_, __, inner) => {
|
||||
str = str.replace(/<ol(\s*[^>]+)?>((.|\n)*?)<\/ol>/gi, (_, __, inner) => {
|
||||
let index = 0;
|
||||
return inner
|
||||
.replace(/<li>/gi, () => {
|
||||
|
@ -59,7 +59,7 @@ function htmlToMarkdown(str, images = true, embed = true) {
|
|||
.replace(/<\/li>/gi, "\n")
|
||||
.replaceAll("\n\n", "\n");
|
||||
});
|
||||
str = str.replace(/<ul(\s+[^>]+)?>((.|\n)*?)<\/ul>/gi, (_, __, inner) => {
|
||||
str = str.replace(/<ul(\s*[^>]+)?>((.|\n)*?)<\/ul>/gi, (_, __, inner) => {
|
||||
let index = 0;
|
||||
return inner
|
||||
.replace(/<li>/gi, () => {
|
||||
|
@ -69,17 +69,17 @@ function htmlToMarkdown(str, images = true, embed = true) {
|
|||
.replace(/<\/li>/gi, "\n")
|
||||
.replaceAll("\n\n", "\n");
|
||||
});
|
||||
str = str.replace(/<\/?code(\s+[^>]+)?>/gi, "`");
|
||||
str = str.replace(/<\/?em(\s+[^>]+)?>/gi, "_");
|
||||
str = str.replace(/<\/?i(\s+[^>]+)?>/gi, "_");
|
||||
str = str.replace(/<\/?b(\s+[^>]+)?>/gi, "**");
|
||||
str = str.replace(/<\/?u(\s+[^>]+)?>/gi, "__");
|
||||
str = str.replace(/<\/?s(\s+[^>]+)?>/gi, "~~");
|
||||
str = str.replace(/<h1(\s+[^>]+)?>/gi, "# ");
|
||||
str = str.replace(/<h2(\s+[^>]+)?>/gi, "## ");
|
||||
str = str.replace(/<h3(\s+[^>]+)?>/gi, "### ");
|
||||
str = str.replace(/<\/?h4(\s+[^>]+)?>/gi, "**");
|
||||
str = str.replace(/<(math|noscript)(\s+[^>]+)?>((.|\n)*?)<\/(math|noscript)>/gi, "");
|
||||
str = str.replace(/<\/?code(\s*[^>]+)?>/gi, "`");
|
||||
str = str.replace(/<\/?em(\s*[^>]+)?>/gi, "_");
|
||||
str = str.replace(/<\/?i(\s*[^>]+)?>/gi, "_");
|
||||
str = str.replace(/<\/?b(\s*[^>]+)?>/gi, "**");
|
||||
str = str.replace(/<\/?u(\s*[^>]+)?>/gi, "__");
|
||||
str = str.replace(/<\/?s(\s*[^>]+)?>/gi, "~~");
|
||||
str = str.replace(/<h1(\s*[^>]+)?>/gi, "# ");
|
||||
str = str.replace(/<h2(\s*[^>]+)?>/gi, "## ");
|
||||
str = str.replace(/<h3(\s*[^>]+)?>/gi, "### ");
|
||||
str = str.replace(/<\/?h4(\s*[^>]+)?>/gi, "**");
|
||||
str = str.replace(/<(math|noscript)(\s*[^>]+)?>((.|\n)*?)<\/(math|noscript)>/gi, "");
|
||||
str = str.replace(/<[^>]+?>/gi, "");
|
||||
str = parseHtmlEntities(str);
|
||||
// whyyyyyyyyyyyy
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue