Improve MFM bracket matching
Co-authored-by: syuilo <syuilotan@yahoo.co.jp>
This commit is contained in:
parent
be0cb88b6c
commit
580191fb17
3 changed files with 115 additions and 28 deletions
|
@ -1,7 +1,7 @@
|
||||||
import * as P from 'parsimmon';
|
import * as P from 'parsimmon';
|
||||||
import parseAcct from '../misc/acct/parse';
|
import parseAcct from '../misc/acct/parse';
|
||||||
import { toUnicode } from 'punycode';
|
import { toUnicode } from 'punycode';
|
||||||
import { takeWhile } from '../prelude/array';
|
import { takeWhile, cumulativeSum } from '../prelude/array';
|
||||||
import { Tree } from '../prelude/tree';
|
import { Tree } from '../prelude/tree';
|
||||||
import * as T from '../prelude/tree';
|
import * as T from '../prelude/tree';
|
||||||
|
|
||||||
|
@ -42,30 +42,18 @@ export function createTree(type: string, children: MfmForest, props: any): MfmTr
|
||||||
return T.createTree({ type, props }, children);
|
return T.createTree({ type, props }, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
function getTrailingPosition(x: string): number {
|
export function removeOrphanedBrackets(s: string): string {
|
||||||
const brackets = [
|
const openBrackets = ['(', '「'];
|
||||||
['(', ')'],
|
const closeBrackets = [')', '」'];
|
||||||
['「', '」'],
|
const xs = cumulativeSum(s.split('').map(c => {
|
||||||
];
|
if (openBrackets.includes(c)) return 1;
|
||||||
const pendingBrackets = [] as any;
|
if (closeBrackets.includes(c)) return -1;
|
||||||
const end = x.split('').findIndex(char => {
|
return 0;
|
||||||
const closeMatch = brackets.map(x => x[1]).indexOf(char);
|
}));
|
||||||
const openMatch = brackets.map(x => x[0]).indexOf(char);
|
const firstOrphanedCloseBracket = xs.findIndex(x => x < 0);
|
||||||
if (closeMatch != -1) {
|
if (firstOrphanedCloseBracket !== -1) return s.substr(0, firstOrphanedCloseBracket);
|
||||||
if (pendingBrackets[closeMatch] > 0) {
|
const lastMatched = xs.lastIndexOf(0);
|
||||||
pendingBrackets[closeMatch]--;
|
return s.substr(0, lastMatched + 1);
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} else if (openMatch != -1) {
|
|
||||||
pendingBrackets[openMatch] = (pendingBrackets[openMatch] || 0) + 1;
|
|
||||||
return false;
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return end > 0 ? end : x.length;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const newline = P((input, i) => {
|
const newline = P((input, i) => {
|
||||||
|
@ -220,7 +208,7 @@ const mfm = P.createLanguage({
|
||||||
const match = text.match(/^#([^\s\.,!\?#]+)/i);
|
const match = text.match(/^#([^\s\.,!\?#]+)/i);
|
||||||
if (!match) return P.makeFailure(i, 'not a hashtag');
|
if (!match) return P.makeFailure(i, 'not a hashtag');
|
||||||
let hashtag = match[1];
|
let hashtag = match[1];
|
||||||
hashtag = hashtag.substr(0, getTrailingPosition(hashtag));
|
hashtag = removeOrphanedBrackets(hashtag);
|
||||||
if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag');
|
if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag');
|
||||||
if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag');
|
if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag');
|
||||||
if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag');
|
if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag');
|
||||||
|
@ -390,7 +378,7 @@ const mfm = P.createLanguage({
|
||||||
const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/);
|
const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/);
|
||||||
if (!match) return P.makeFailure(i, 'not a url');
|
if (!match) return P.makeFailure(i, 'not a url');
|
||||||
let url = match[0];
|
let url = match[0];
|
||||||
url = url.substr(0, getTrailingPosition(url));
|
url = removeOrphanedBrackets(url);
|
||||||
if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.'));
|
if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.'));
|
||||||
if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(','));
|
if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(','));
|
||||||
return P.makeSuccess(i + url.length, url);
|
return P.makeSuccess(i + url.length, url);
|
||||||
|
|
|
@ -109,3 +109,9 @@ export function takeWhile<T>(f: Predicate<T>, xs: T[]): T[] {
|
||||||
}
|
}
|
||||||
return ys;
|
return ys;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function cumulativeSum(xs: number[]): number[] {
|
||||||
|
const ys = Array.from(xs); // deep copy
|
||||||
|
for (let i = 1; i < ys.length; i++) ys[i] += ys[i - 1];
|
||||||
|
return ys;
|
||||||
|
}
|
||||||
|
|
95
test/mfm.ts
95
test/mfm.ts
|
@ -6,7 +6,7 @@ import * as assert from 'assert';
|
||||||
|
|
||||||
import analyze from '../src/mfm/parse';
|
import analyze from '../src/mfm/parse';
|
||||||
import toHtml from '../src/mfm/html';
|
import toHtml from '../src/mfm/html';
|
||||||
import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/parser';
|
import { createTree as tree, createLeaf as leaf, MfmTree, removeOrphanedBrackets } from '../src/mfm/parser';
|
||||||
|
|
||||||
function text(text: string): MfmTree {
|
function text(text: string): MfmTree {
|
||||||
return leaf('text', { text });
|
return leaf('text', { text });
|
||||||
|
@ -49,6 +49,99 @@ describe('createTree', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('removeOrphanedBrackets', () => {
|
||||||
|
it('single (contained)', () => {
|
||||||
|
const input = '(foo)';
|
||||||
|
const expected = '(foo)';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('single (head)', () => {
|
||||||
|
const input = '(foo)bar';
|
||||||
|
const expected = '(foo)bar';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('single (tail)', () => {
|
||||||
|
const input = 'foo(bar)';
|
||||||
|
const expected = 'foo(bar)';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('a', () => {
|
||||||
|
const input = '(foo';
|
||||||
|
const expected = '';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('b', () => {
|
||||||
|
const input = ')foo';
|
||||||
|
const expected = '';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('nested', () => {
|
||||||
|
const input = 'foo(「(bar)」)';
|
||||||
|
const expected = 'foo(「(bar)」)';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('no brackets', () => {
|
||||||
|
const input = 'foo';
|
||||||
|
const expected = 'foo';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('with foreign bracket (single)', () => {
|
||||||
|
const input = 'foo(bar))';
|
||||||
|
const expected = 'foo(bar)';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('with foreign bracket (open)', () => {
|
||||||
|
const input = 'foo(bar';
|
||||||
|
const expected = 'foo';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('with foreign bracket (close)', () => {
|
||||||
|
const input = 'foo)bar';
|
||||||
|
const expected = 'foo';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('with foreign bracket (close and open)', () => {
|
||||||
|
const input = 'foo)(bar';
|
||||||
|
const expected = 'foo';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('various bracket type', () => {
|
||||||
|
const input = 'foo「(bar)」(';
|
||||||
|
const expected = 'foo「(bar)」';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('intersected', () => {
|
||||||
|
const input = 'foo(「)」';
|
||||||
|
const expected = 'foo(「)」';
|
||||||
|
const actual = removeOrphanedBrackets(input);
|
||||||
|
assert.deepStrictEqual(actual, expected);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('MFM', () => {
|
describe('MFM', () => {
|
||||||
it('can be analyzed', () => {
|
it('can be analyzed', () => {
|
||||||
const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr');
|
const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr');
|
||||||
|
|
Loading…
Reference in a new issue