323 lines
11 KiB
TypeScript
323 lines
11 KiB
TypeScript
import { ribbit, getWindow } from './setup';
|
|
import { InlineTokenizer, type InlineToken } from '../src/ts/tokenizer';
|
|
import { MarkdownSerializer, type SerializerTagDef } from '../src/ts/serializer';
|
|
|
|
// Set up DOM globals before any tests run
|
|
getWindow();
|
|
|
|
const boldDef = {
|
|
delimiter: '**',
|
|
htmlTag: 'strong',
|
|
recursive: true,
|
|
precedence: 40,
|
|
};
|
|
const italicDef = {
|
|
delimiter: '*',
|
|
htmlTag: 'em',
|
|
recursive: true,
|
|
precedence: 50,
|
|
};
|
|
const strikeDef = {
|
|
delimiter: '~~',
|
|
htmlTag: 'del',
|
|
recursive: true,
|
|
precedence: 45,
|
|
};
|
|
const codeDef = {
|
|
delimiter: '`',
|
|
htmlTag: 'code',
|
|
recursive: false,
|
|
precedence: 10,
|
|
};
|
|
|
|
const tokenizer = new InlineTokenizer([boldDef, italicDef, strikeDef, codeDef]);
|
|
|
|
function roles(tokens: InlineToken[]): string[] {
|
|
return tokens.map(token => token.role);
|
|
}
|
|
|
|
function values(tokens: InlineToken[]): string[] {
|
|
return tokens.map(token => token.value);
|
|
}
|
|
|
|
describe('InlineTokenizer', () => {
|
|
describe('plain text', () => {
|
|
it('produces a single text token', () => {
|
|
const tokens = tokenizer.tokenize('hello world');
|
|
expect(roles(tokens)).toEqual(['text']);
|
|
expect(values(tokens)).toEqual(['hello world']);
|
|
});
|
|
});
|
|
|
|
describe('bold', () => {
|
|
it('tokenizes **bold**', () => {
|
|
const tokens = tokenizer.tokenize('**bold**');
|
|
expect(roles(tokens)).toEqual(['open', 'text', 'close']);
|
|
expect(tokens[0].delimiter).toBe('**');
|
|
expect(tokens[1].value).toBe('bold');
|
|
});
|
|
|
|
it('tokenizes text **bold** text', () => {
|
|
const tokens = tokenizer.tokenize('hello **bold** end');
|
|
expect(roles(tokens)).toEqual(['text', 'open', 'text', 'close', 'text']);
|
|
});
|
|
});
|
|
|
|
describe('italic', () => {
|
|
it('tokenizes *italic*', () => {
|
|
const tokens = tokenizer.tokenize('*italic*');
|
|
expect(roles(tokens)).toEqual(['open', 'text', 'close']);
|
|
expect(tokens[0].delimiter).toBe('*');
|
|
});
|
|
});
|
|
|
|
describe('strikethrough', () => {
|
|
it('tokenizes ~~struck~~', () => {
|
|
const tokens = tokenizer.tokenize('~~struck~~');
|
|
expect(roles(tokens)).toEqual(['open', 'text', 'close']);
|
|
expect(tokens[0].delimiter).toBe('~~');
|
|
});
|
|
});
|
|
|
|
describe('code spans', () => {
|
|
it('tokenizes `code`', () => {
|
|
const tokens = tokenizer.tokenize('`code`');
|
|
expect(roles(tokens)).toEqual(['code']);
|
|
expect(tokens[0].content).toBe('code');
|
|
});
|
|
|
|
it('does not parse delimiters inside code', () => {
|
|
const tokens = tokenizer.tokenize('`**not bold**`');
|
|
expect(roles(tokens)).toEqual(['code']);
|
|
expect(tokens[0].content).toBe('**not bold**');
|
|
});
|
|
});
|
|
|
|
describe('backslash escapes', () => {
|
|
it('\\* becomes literal *', () => {
|
|
const tokens = tokenizer.tokenize('\\*hello');
|
|
expect(roles(tokens)).toEqual(['text']);
|
|
expect(tokens[0].value).toBe('*hello');
|
|
});
|
|
|
|
it('\\\\ becomes literal \\', () => {
|
|
const tokens = tokenizer.tokenize('\\\\');
|
|
expect(roles(tokens)).toEqual(['text']);
|
|
expect(tokens[0].value).toBe('\\');
|
|
});
|
|
|
|
it('\\n at end of line is a hard break', () => {
|
|
const tokens = tokenizer.tokenize('hello\\\nworld');
|
|
expect(roles(tokens)).toEqual(['text', 'break', 'text']);
|
|
});
|
|
});
|
|
|
|
describe('hard line breaks', () => {
|
|
it('two trailing spaces before newline', () => {
|
|
const tokens = tokenizer.tokenize('hello \nworld');
|
|
expect(roles(tokens)).toEqual(['text', 'break', 'text']);
|
|
});
|
|
|
|
it('single space does not break', () => {
|
|
const tokens = tokenizer.tokenize('hello \nworld');
|
|
const breakTokens = tokens.filter(token => token.role === 'break');
|
|
expect(breakTokens.length).toBe(0);
|
|
});
|
|
});
|
|
|
|
describe('entity resolution', () => {
|
|
it('& becomes &', () => {
|
|
const tokens = tokenizer.tokenize('a & b');
|
|
expect(tokens[0].value).toBe('a & b');
|
|
});
|
|
|
|
it('{ becomes {', () => {
|
|
const tokens = tokenizer.tokenize('{');
|
|
expect(tokens[0].value).toBe('{');
|
|
});
|
|
|
|
it('{ becomes {', () => {
|
|
const tokens = tokenizer.tokenize('{');
|
|
expect(tokens[0].value).toBe('{');
|
|
});
|
|
});
|
|
|
|
describe('links', () => {
|
|
it('tokenizes [text](url)', () => {
|
|
const tokens = tokenizer.tokenize('[click](http://x)');
|
|
expect(roles(tokens)).toEqual(['link']);
|
|
expect(tokens[0].href).toBe('http://x');
|
|
expect(tokens[0].value).toBe('click');
|
|
});
|
|
|
|
it('tokenizes [text](url "title")', () => {
|
|
const tokens = tokenizer.tokenize('[click](http://x "My Title")');
|
|
expect(tokens[0].title).toBe('My Title');
|
|
});
|
|
|
|
it('disallows [ in link text', () => {
|
|
const tokens = tokenizer.tokenize('[outer [inner](b)](a)');
|
|
// Should not match as a single link
|
|
const linkTokens = tokens.filter(token => token.role === 'link');
|
|
expect(linkTokens.length).toBeLessThanOrEqual(1);
|
|
});
|
|
});
|
|
|
|
describe('autolinks', () => {
|
|
it('tokenizes <url>', () => {
|
|
const tokens = tokenizer.tokenize('<https://example.com>');
|
|
expect(roles(tokens)).toEqual(['autolink']);
|
|
expect(tokens[0].href).toBe('https://example.com');
|
|
});
|
|
|
|
it('tokenizes bare URL', () => {
|
|
const tokens = tokenizer.tokenize('visit https://example.com today');
|
|
expect(tokens.some(token => token.role === 'autolink')).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('HTML passthrough', () => {
|
|
it('tokenizes HTML tags', () => {
|
|
const tokens = tokenizer.tokenize('a <span>b</span> c');
|
|
const htmlTokens = tokens.filter(token => token.role === 'html');
|
|
expect(htmlTokens.length).toBe(2);
|
|
expect(htmlTokens[0].value).toBe('<span>');
|
|
expect(htmlTokens[1].value).toBe('</span>');
|
|
});
|
|
});
|
|
|
|
describe('flanking rules', () => {
|
|
it('mid-word * is not a delimiter', () => {
|
|
const tokens = tokenizer.tokenize('2*3*4');
|
|
expect(roles(tokens)).toEqual(['text']);
|
|
});
|
|
|
|
it('* at word boundary is a delimiter', () => {
|
|
const tokens = tokenizer.tokenize('*hello*');
|
|
expect(roles(tokens)).toEqual(['open', 'text', 'close']);
|
|
});
|
|
});
|
|
|
|
describe('nested delimiters', () => {
|
|
it('bold inside italic', () => {
|
|
const tokens = tokenizer.tokenize('*hello **world***');
|
|
const openTokens = tokens.filter(token => token.role === 'open');
|
|
expect(openTokens.length).toBe(2);
|
|
});
|
|
});
|
|
});
|
|
|
|
describe('MarkdownSerializer', () => {
|
|
const tagMap = new Map<string, SerializerTagDef>([
|
|
['STRONG', { delimiter: '**' }],
|
|
['B', { delimiter: '**' }],
|
|
['EM', { delimiter: '*' }],
|
|
['I', { delimiter: '*' }],
|
|
['DEL', { delimiter: '~~' }],
|
|
['CODE', {
|
|
serialize: (element) => '`' + (element.textContent || '') + '`',
|
|
}],
|
|
['A', {
|
|
serialize: (element, children) => {
|
|
const href = element.getAttribute('href') || '';
|
|
const title = element.getAttribute('title');
|
|
const titlePart = title ? ` "${title}"` : '';
|
|
return '[' + children() + '](' + href + titlePart + ')';
|
|
},
|
|
}],
|
|
['BR', {
|
|
serialize: () => ' \n',
|
|
}],
|
|
]);
|
|
const delimiterChars = new Set(['*', '`', '~']);
|
|
const serializer = new MarkdownSerializer(tagMap, delimiterChars);
|
|
|
|
it('serializes plain text', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = 'hello world';
|
|
expect(serializer.serialize(div)).toBe('hello world');
|
|
});
|
|
|
|
it('serializes bold', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<strong>bold</strong>';
|
|
expect(serializer.serialize(div)).toBe('**bold**');
|
|
});
|
|
|
|
it('serializes italic', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<em>italic</em>';
|
|
expect(serializer.serialize(div)).toBe('*italic*');
|
|
});
|
|
|
|
it('escapes * in text nodes', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = 'hello * world';
|
|
expect(serializer.serialize(div)).toBe('hello \\* world');
|
|
});
|
|
|
|
it('escapes _ in text nodes', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = 'hello_world';
|
|
expect(serializer.serialize(div)).toBe('hello\\_world');
|
|
});
|
|
|
|
it('escapes \\ in text nodes', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = 'back\\slash';
|
|
expect(serializer.serialize(div)).toBe('back\\\\slash');
|
|
});
|
|
|
|
it('escapes < before letters', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = 'a <b> c';
|
|
expect(serializer.serialize(div)).toBe('a \\<b> c');
|
|
});
|
|
|
|
it('does not escape < before non-letters', () => {
|
|
const div = document.createElement('div');
|
|
div.textContent = '1 < 2';
|
|
expect(serializer.serialize(div)).toBe('1 < 2');
|
|
});
|
|
|
|
it('does not escape * inside delimiters', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<strong>bold</strong>';
|
|
const result = serializer.serialize(div);
|
|
// The ** are delimiter tokens, not escaped
|
|
expect(result).toBe('**bold**');
|
|
expect(result).not.toContain('\\*');
|
|
});
|
|
|
|
it('escapes * in text adjacent to delimiters', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<strong>bold</strong> * text';
|
|
const result = serializer.serialize(div);
|
|
expect(result).toContain('\\*');
|
|
});
|
|
|
|
it('serializes link', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<a href="http://x">click</a>';
|
|
expect(serializer.serialize(div)).toBe('[click](http://x)');
|
|
});
|
|
|
|
it('serializes link with title', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<a href="http://x" title="T">click</a>';
|
|
expect(serializer.serialize(div)).toBe('[click](http://x "T")');
|
|
});
|
|
|
|
it('serializes code', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = '<code>x</code>';
|
|
expect(serializer.serialize(div)).toBe('`x`');
|
|
});
|
|
|
|
it('serializes hard break', () => {
|
|
const div = document.createElement('div');
|
|
div.innerHTML = 'hello<br>world';
|
|
expect(serializer.serialize(div)).toBe('hello \nworld');
|
|
});
|
|
});
|