parent
							
								
									b2fb92cf0f
								
							
						
					
					
						commit
						4ecd036d0a
					
				
					 2 changed files with 55 additions and 19 deletions
				
			
		|  | @ -1,8 +1,10 @@ | ||||||
| import { parseFragment, DefaultTreeDocumentFragment } from 'parse5'; | import * as parse5 from 'parse5'; | ||||||
| import { urlRegexFull } from './prelude'; | import treeAdapter = require('parse5/lib/tree-adapters/default'); | ||||||
|  | import { URL } from 'url'; | ||||||
|  | import { urlRegex, urlRegexFull } from './prelude'; | ||||||
| 
 | 
 | ||||||
| export function fromHtml(html: string, hashtagNames?: string[]): string { | export function fromHtml(html: string, hashtagNames?: string[]): string { | ||||||
| 	const dom = parseFragment(html) as DefaultTreeDocumentFragment; | 	const dom = parse5.parseFragment(html); | ||||||
| 
 | 
 | ||||||
| 	let text = ''; | 	let text = ''; | ||||||
| 
 | 
 | ||||||
|  | @ -12,30 +14,35 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { | ||||||
| 
 | 
 | ||||||
| 	return text.trim(); | 	return text.trim(); | ||||||
| 
 | 
 | ||||||
| 	function getText(node: any): string { | 	function getText(node: parse5.Node): string { | ||||||
| 		if (node.nodeName === '#text') return node.value; | 		if (treeAdapter.isTextNode(node)) return node.value; | ||||||
|  | 		if (!treeAdapter.isElementNode(node)) return ''; | ||||||
| 
 | 
 | ||||||
| 		if (node.childNodes) { | 		if (node.childNodes) { | ||||||
| 			return node.childNodes.map((n: any) => getText(n)).join(''); | 			return node.childNodes.map(n => getText(n)).join(''); | ||||||
| 		} | 		} | ||||||
| 
 | 
 | ||||||
| 		return ''; | 		return ''; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	function analyze(node: any) { | 	function analyze(node: parse5.Node) { | ||||||
| 		switch (node.nodeName) { | 		if (treeAdapter.isTextNode(node)) { | ||||||
| 			case '#text': | 			text += node.value; | ||||||
| 				text += node.value; | 			return; | ||||||
| 				break; | 		} | ||||||
| 
 | 
 | ||||||
|  | 		// Skip comment or document type node
 | ||||||
|  | 		if (!treeAdapter.isElementNode(node)) return; | ||||||
|  | 
 | ||||||
|  | 		switch (node.nodeName) { | ||||||
| 			case 'br': | 			case 'br': | ||||||
| 				text += '\n'; | 				text += '\n'; | ||||||
| 				break; | 				break; | ||||||
| 
 | 
 | ||||||
| 			case 'a': | 			case 'a': | ||||||
| 				const txt = getText(node); | 				const txt = getText(node); | ||||||
| 				const rel = node.attrs.find((x: any) => x.name === 'rel'); | 				const rel = node.attrs.find(x => x.name === 'rel'); | ||||||
| 				const href = node.attrs.find((x: any) => x.name === 'href'); | 				const href = node.attrs.find(x => x.name === 'href'); | ||||||
| 
 | 
 | ||||||
| 				// ハッシュタグ
 | 				// ハッシュタグ
 | ||||||
| 				if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) { | 				if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) { | ||||||
|  | @ -44,7 +51,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { | ||||||
| 				} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { | 				} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) { | ||||||
| 					const part = txt.split('@'); | 					const part = txt.split('@'); | ||||||
| 
 | 
 | ||||||
| 					if (part.length === 2) { | 					if (part.length === 2 && href) { | ||||||
| 						//#region ホスト名部分が省略されているので復元する
 | 						//#region ホスト名部分が省略されているので復元する
 | ||||||
| 						const acct = `${txt}@${(new URL(href.value)).hostname}`; | 						const acct = `${txt}@${(new URL(href.value)).hostname}`; | ||||||
| 						text += acct; | 						text += acct; | ||||||
|  | @ -54,11 +61,28 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { | ||||||
| 					} | 					} | ||||||
| 				// その他
 | 				// その他
 | ||||||
| 				} else { | 				} else { | ||||||
| 					text += !href ? txt | 					const generateLink = () => { | ||||||
| 						: txt === href.value | 						if (!href && !txt) { | ||||||
| 							? txt.match(urlRegexFull) ? txt | 							return ''; | ||||||
| 							: `<${txt}>` | 						} | ||||||
| 						: `[${txt}](${href.value})`; | 						if (!href) { | ||||||
|  | 							return txt; | ||||||
|  | 						} | ||||||
|  | 						if (!txt || txt === href.value) {	// #6383: Missing text node
 | ||||||
|  | 							if (href.value.match(urlRegexFull)) { | ||||||
|  | 								return href.value; | ||||||
|  | 							} else { | ||||||
|  | 								return `<${href.value}>`; | ||||||
|  | 							} | ||||||
|  | 						} | ||||||
|  | 						if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) { | ||||||
|  | 							return `[${txt}](<${href.value}>)`;	// #6846
 | ||||||
|  | 						} else { | ||||||
|  | 							return `[${txt}](${href.value})`; | ||||||
|  | 						} | ||||||
|  | 					}; | ||||||
|  | 
 | ||||||
|  | 					text += generateLink(); | ||||||
| 				} | 				} | ||||||
| 				break; | 				break; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
							
								
								
									
										12
									
								
								test/mfm.ts
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								test/mfm.ts
									
										
									
									
									
								
							|  | @ -1167,6 +1167,10 @@ describe('fromHtml', () => { | ||||||
| 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">c</a> d</p>'), 'a [c](https://example.com/b) d'); | 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">c</a> d</p>'), 'a [c](https://example.com/b) d'); | ||||||
| 	}); | 	}); | ||||||
| 
 | 
 | ||||||
|  | 	it('link with different text, but not encoded', () => { | ||||||
|  | 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/ä">c</a> d</p>'), 'a [c](<https://example.com/ä>) d'); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
| 	it('link with same text', () => { | 	it('link with same text', () => { | ||||||
| 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">https://example.com/b</a> d</p>'), 'a https://example.com/b d'); | 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b">https://example.com/b</a> d</p>'), 'a https://example.com/b d'); | ||||||
| 	}); | 	}); | ||||||
|  | @ -1183,6 +1187,14 @@ describe('fromHtml', () => { | ||||||
| 		assert.deepStrictEqual(fromHtml('<p>a <a>c</a> d</p>'), 'a c d'); | 		assert.deepStrictEqual(fromHtml('<p>a <a>c</a> d</p>'), 'a c d'); | ||||||
| 	}); | 	}); | ||||||
| 
 | 
 | ||||||
|  | 	it('link without text', () => { | ||||||
|  | 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/b"></a> d</p>'), 'a https://example.com/b d'); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
|  | 	it('link without both', () => { | ||||||
|  | 		assert.deepStrictEqual(fromHtml('<p>a <a></a> d</p>'), 'a  d'); | ||||||
|  | 	}); | ||||||
|  | 
 | ||||||
| 	it('mention', () => { | 	it('mention', () => { | ||||||
| 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/@user" class="u-url mention">@user</a> d</p>'), 'a @user@example.com d'); | 		assert.deepStrictEqual(fromHtml('<p>a <a href="https://example.com/@user" class="u-url mention">@user</a> d</p>'), 'a @user@example.com d'); | ||||||
| 	}); | 	}); | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue