New unicode emoji processor
This commit is contained in:
		
							parent
							
								
									14574b4e2c
								
							
						
					
					
						commit
						f42eb6495f
					
				
					 7 changed files with 4015 additions and 46 deletions
				
			
		
							
								
								
									
										77
									
								
								scripts/emoji-surrogates-statistics.js
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								scripts/emoji-surrogates-statistics.js
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,77 @@
 | 
			
		|||
// @ts-check
 | 
			
		||||
 | 
			
		||||
const fs = require("fs")
 | 
			
		||||
const {join} = require("path")
 | 
			
		||||
const s = fs.readFileSync(join(__dirname, "..", "src", "m2d", "converters", "emojis.txt"), "utf8").split("\n").map(x => encodeURIComponent(x))
 | 
			
		||||
const searchPattern = "%EF%B8%8F"
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * adapted from es.map.group-by.js in core-js
 | 
			
		||||
 * @template K,V
 | 
			
		||||
 * @param {V[]} items
 | 
			
		||||
 * @param {(item: V) => K} fn
 | 
			
		||||
 * @returns {Map<K, V[]>}
 | 
			
		||||
 */
 | 
			
		||||
function groupBy(items, fn) {
 | 
			
		||||
	var map = new Map();
 | 
			
		||||
	for (const value of items) {
 | 
			
		||||
		var key = fn(value);
 | 
			
		||||
		if (!map.has(key)) map.set(key, [value]);
 | 
			
		||||
		else map.get(key).push(value);
 | 
			
		||||
	}
 | 
			
		||||
	return map;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @param {number[]} items
 | 
			
		||||
 * @param {number} width
 | 
			
		||||
 */
 | 
			
		||||
function xhistogram(items, width) {
 | 
			
		||||
	const chars = " ▏▎▍▌▋▊▉"
 | 
			
		||||
	const max = items.reduce((a, c) => c > a ? c : a, 0)
 | 
			
		||||
	return items.map(v => {
 | 
			
		||||
		const p = v / max * (width-1)
 | 
			
		||||
		return (
 | 
			
		||||
			Array(Math.floor(p)).fill("█").join("") /* whole part */
 | 
			
		||||
			+ chars[Math.ceil((p % 1) * (chars.length-1))] /* decimal part */
 | 
			
		||||
		).padEnd(width)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @param {number[]} items
 | 
			
		||||
 * @param {[number, number]} xrange
 | 
			
		||||
 */
 | 
			
		||||
function yhistogram(items, xrange, printHeader = false) {
 | 
			
		||||
	const chars = "░▁_▂▃▄▅▆▇█"
 | 
			
		||||
	const ones = "₀₁₂₃₄₅₆₇₈₉"
 | 
			
		||||
	const tens = "0123456789"
 | 
			
		||||
	const xy = []
 | 
			
		||||
	let max = 0
 | 
			
		||||
	/** value (x) -> frequency (y) */
 | 
			
		||||
	const grouped = groupBy(items, x => x)
 | 
			
		||||
	for (let i = xrange[0]; i <= xrange[1]; i++) {
 | 
			
		||||
		if (printHeader) {
 | 
			
		||||
			if (i === -1) process.stdout.write("-")
 | 
			
		||||
			else if (i.toString().at(-1) === "0") process.stdout.write(tens[i/10])
 | 
			
		||||
			else process.stdout.write(ones[i%10])
 | 
			
		||||
		}
 | 
			
		||||
		const y = grouped.get(i)?.length ?? 0
 | 
			
		||||
		if (y > max) max = y
 | 
			
		||||
		xy.push(y)
 | 
			
		||||
	}
 | 
			
		||||
	if (printHeader) console.log()
 | 
			
		||||
	return xy.map(y => chars[Math.ceil(y / max * (chars.length-1))]).join("")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const grouped = groupBy(s, x => x.length)
 | 
			
		||||
const sortedGroups = [...grouped.entries()].sort((a, b) => b[0] - a[0])
 | 
			
		||||
let length = 0
 | 
			
		||||
const lengthHistogram = xhistogram(sortedGroups.map(v => v[1].length), 10)
 | 
			
		||||
for (let i = 0; i < sortedGroups.length; i++) {
 | 
			
		||||
	const [k, v] = sortedGroups[i]
 | 
			
		||||
	const l = lengthHistogram[i]
 | 
			
		||||
	const h = yhistogram(v.map(x => x.indexOf(searchPattern)), [-1, k - searchPattern.length], i === 0)
 | 
			
		||||
	if (i === 0) length = h.length + 1
 | 
			
		||||
	console.log(`${h.padEnd(length, i % 2 === 0 ? "⸱" : " ")}length ${k.toString().padEnd(3)} ${l} ${v.length}`)
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue