mirror of
https://github.com/mastodon/mastodon.git
synced 2025-10-05 16:42:47 +00:00
Add basic HTML parser for converting strings to React components (#36071)
This commit is contained in:
parent
2314583606
commit
8a0d0025ff
|
@ -0,0 +1,69 @@
|
||||||
|
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > copies attributes to props 1`] = `
|
||||||
|
[
|
||||||
|
<a
|
||||||
|
href="https://example.com"
|
||||||
|
rel="nofollow"
|
||||||
|
target="_blank"
|
||||||
|
>
|
||||||
|
link
|
||||||
|
</a>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > handles nested elements 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
lorem
|
||||||
|
<strong>
|
||||||
|
ipsum
|
||||||
|
</strong>
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > ignores empty text nodes 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
<span>
|
||||||
|
lorem ipsum
|
||||||
|
</span>
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > respects allowedTags option 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
lorem
|
||||||
|
<em>
|
||||||
|
dolor
|
||||||
|
</em>
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > respects maxDepth option 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
<span />
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > returns converted nodes from string 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
lorem ipsum
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
||||||
|
|
||||||
|
exports[`html > htmlStringToComponents > uses default parsing if onElement returns undefined 1`] = `
|
||||||
|
[
|
||||||
|
<p>
|
||||||
|
lorem ipsum
|
||||||
|
</p>,
|
||||||
|
]
|
||||||
|
`;
|
|
@ -1,3 +1,5 @@
|
||||||
|
import React from 'react';
|
||||||
|
|
||||||
import * as html from '../html';
|
import * as html from '../html';
|
||||||
|
|
||||||
describe('html', () => {
|
describe('html', () => {
|
||||||
|
@ -9,4 +11,104 @@ describe('html', () => {
|
||||||
expect(output).toEqual('lorem\n\nipsum\n<br>');
|
expect(output).toEqual('lorem\n\nipsum\n<br>');
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('htmlStringToComponents', () => {
|
||||||
|
it('returns converted nodes from string', () => {
|
||||||
|
const input = '<p>lorem ipsum</p>';
|
||||||
|
const output = html.htmlStringToComponents(input);
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles nested elements', () => {
|
||||||
|
const input = '<p>lorem <strong>ipsum</strong></p>';
|
||||||
|
const output = html.htmlStringToComponents(input);
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ignores empty text nodes', () => {
|
||||||
|
const input = '<p> <span>lorem ipsum</span> </p>';
|
||||||
|
const output = html.htmlStringToComponents(input);
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('copies attributes to props', () => {
|
||||||
|
const input =
|
||||||
|
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
|
||||||
|
const output = html.htmlStringToComponents(input);
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects maxDepth option', () => {
|
||||||
|
const input = '<p><span>lorem <strong>ipsum</strong></span></p>';
|
||||||
|
const output = html.htmlStringToComponents(input, { maxDepth: 2 });
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calls onText callback', () => {
|
||||||
|
const input = '<p>lorem ipsum</p>';
|
||||||
|
const onText = vi.fn((text: string) => text);
|
||||||
|
html.htmlStringToComponents(input, { onText });
|
||||||
|
expect(onText).toHaveBeenCalledExactlyOnceWith('lorem ipsum');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calls onElement callback', () => {
|
||||||
|
const input = '<p>lorem ipsum</p>';
|
||||||
|
const onElement = vi.fn(
|
||||||
|
(element: HTMLElement, children: React.ReactNode[]) =>
|
||||||
|
React.createElement(element.tagName.toLowerCase(), {}, ...children),
|
||||||
|
);
|
||||||
|
html.htmlStringToComponents(input, { onElement });
|
||||||
|
expect(onElement).toHaveBeenCalledExactlyOnceWith(
|
||||||
|
expect.objectContaining({ tagName: 'P' }),
|
||||||
|
expect.arrayContaining(['lorem ipsum']),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses default parsing if onElement returns undefined', () => {
|
||||||
|
const input = '<p>lorem ipsum</p>';
|
||||||
|
const onElement = vi.fn(() => undefined);
|
||||||
|
const output = html.htmlStringToComponents(input, { onElement });
|
||||||
|
expect(onElement).toHaveBeenCalledExactlyOnceWith(
|
||||||
|
expect.objectContaining({ tagName: 'P' }),
|
||||||
|
expect.arrayContaining(['lorem ipsum']),
|
||||||
|
);
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('calls onAttribute callback', () => {
|
||||||
|
const input =
|
||||||
|
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
|
||||||
|
const onAttribute = vi.fn(
|
||||||
|
(name: string, value: string) =>
|
||||||
|
[name, value] satisfies [string, string],
|
||||||
|
);
|
||||||
|
html.htmlStringToComponents(input, { onAttribute });
|
||||||
|
expect(onAttribute).toHaveBeenCalledTimes(3);
|
||||||
|
expect(onAttribute).toHaveBeenCalledWith(
|
||||||
|
'href',
|
||||||
|
'https://example.com',
|
||||||
|
'a',
|
||||||
|
);
|
||||||
|
expect(onAttribute).toHaveBeenCalledWith('target', '_blank', 'a');
|
||||||
|
expect(onAttribute).toHaveBeenCalledWith('rel', 'nofollow', 'a');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('respects allowedTags option', () => {
|
||||||
|
const input = '<p>lorem <strong>ipsum</strong> <em>dolor</em></p>';
|
||||||
|
const output = html.htmlStringToComponents(input, {
|
||||||
|
allowedTags: new Set(['p', 'em']),
|
||||||
|
});
|
||||||
|
expect(output).toMatchSnapshot();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('ensure performance is acceptable with large input', () => {
|
||||||
|
const input = '<p>' + '<span>lorem</span>'.repeat(1_000) + '</p>';
|
||||||
|
const start = performance.now();
|
||||||
|
html.htmlStringToComponents(input);
|
||||||
|
const duration = performance.now() - start;
|
||||||
|
// Arbitrary threshold of 200ms for this test.
|
||||||
|
// Normally it's much less (<50ms), but the GH Action environment can be slow.
|
||||||
|
expect(duration).toBeLessThan(200);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
import React from 'react';
|
||||||
|
|
||||||
// NB: This function can still return unsafe HTML
|
// NB: This function can still return unsafe HTML
|
||||||
export const unescapeHTML = (html: string) => {
|
export const unescapeHTML = (html: string) => {
|
||||||
const wrapper = document.createElement('div');
|
const wrapper = document.createElement('div');
|
||||||
|
@ -7,3 +9,177 @@ export const unescapeHTML = (html: string) => {
|
||||||
.replace(/<[^>]*>/g, '');
|
.replace(/<[^>]*>/g, '');
|
||||||
return wrapper.textContent;
|
return wrapper.textContent;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
interface QueueItem {
|
||||||
|
node: Node;
|
||||||
|
parent: React.ReactNode[];
|
||||||
|
depth: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Options {
|
||||||
|
maxDepth?: number;
|
||||||
|
onText?: (text: string) => React.ReactNode;
|
||||||
|
onElement?: (
|
||||||
|
element: HTMLElement,
|
||||||
|
children: React.ReactNode[],
|
||||||
|
) => React.ReactNode;
|
||||||
|
onAttribute?: (
|
||||||
|
name: string,
|
||||||
|
value: string,
|
||||||
|
tagName: string,
|
||||||
|
) => [string, unknown] | null;
|
||||||
|
allowedTags?: Set<string>;
|
||||||
|
}
|
||||||
|
const DEFAULT_ALLOWED_TAGS: ReadonlySet<string> = new Set([
|
||||||
|
'a',
|
||||||
|
'abbr',
|
||||||
|
'b',
|
||||||
|
'blockquote',
|
||||||
|
'br',
|
||||||
|
'cite',
|
||||||
|
'code',
|
||||||
|
'del',
|
||||||
|
'dfn',
|
||||||
|
'dl',
|
||||||
|
'dt',
|
||||||
|
'em',
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'hr',
|
||||||
|
'i',
|
||||||
|
'li',
|
||||||
|
'ol',
|
||||||
|
'p',
|
||||||
|
'pre',
|
||||||
|
'small',
|
||||||
|
'span',
|
||||||
|
'strong',
|
||||||
|
'sub',
|
||||||
|
'sup',
|
||||||
|
'time',
|
||||||
|
'u',
|
||||||
|
'ul',
|
||||||
|
]);
|
||||||
|
|
||||||
|
export function htmlStringToComponents(
|
||||||
|
htmlString: string,
|
||||||
|
options: Options = {},
|
||||||
|
) {
|
||||||
|
const wrapper = document.createElement('template');
|
||||||
|
wrapper.innerHTML = htmlString;
|
||||||
|
|
||||||
|
const rootChildren: React.ReactNode[] = [];
|
||||||
|
const queue: QueueItem[] = [
|
||||||
|
{ node: wrapper.content, parent: rootChildren, depth: 0 },
|
||||||
|
];
|
||||||
|
|
||||||
|
const {
|
||||||
|
maxDepth = 10,
|
||||||
|
allowedTags = DEFAULT_ALLOWED_TAGS,
|
||||||
|
onAttribute,
|
||||||
|
onElement,
|
||||||
|
onText,
|
||||||
|
} = options;
|
||||||
|
|
||||||
|
while (queue.length > 0) {
|
||||||
|
const item = queue.shift();
|
||||||
|
if (!item) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { node, parent, depth } = item;
|
||||||
|
// If maxDepth is exceeded, skip processing this node.
|
||||||
|
if (depth > maxDepth) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (node.nodeType) {
|
||||||
|
// Just process children for fragments.
|
||||||
|
case Node.DOCUMENT_FRAGMENT_NODE: {
|
||||||
|
for (const child of node.childNodes) {
|
||||||
|
queue.push({ node: child, parent, depth: depth + 1 });
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Text can be added directly if it has any non-whitespace content.
|
||||||
|
case Node.TEXT_NODE: {
|
||||||
|
const text = node.textContent;
|
||||||
|
if (text && text.trim() !== '') {
|
||||||
|
if (onText) {
|
||||||
|
parent.push(onText(text));
|
||||||
|
} else {
|
||||||
|
parent.push(text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process elements with attributes and then their children.
|
||||||
|
case Node.ELEMENT_NODE: {
|
||||||
|
if (!(node instanceof HTMLElement)) {
|
||||||
|
console.warn('Expected HTMLElement, got', node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the tag is not allowed, skip it and its children.
|
||||||
|
if (!allowedTags.has(node.tagName.toLowerCase())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the element and add it to the parent.
|
||||||
|
const children: React.ReactNode[] = [];
|
||||||
|
let element: React.ReactNode = undefined;
|
||||||
|
|
||||||
|
// If onElement is provided, use it to create the element.
|
||||||
|
if (onElement) {
|
||||||
|
const component = onElement(node, children);
|
||||||
|
// Check for undefined to allow returning null.
|
||||||
|
if (component !== undefined) {
|
||||||
|
element = component;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the element wasn't created, use the default conversion.
|
||||||
|
if (element === undefined) {
|
||||||
|
const props: Record<string, unknown> = {};
|
||||||
|
for (const attr of node.attributes) {
|
||||||
|
if (onAttribute) {
|
||||||
|
const result = onAttribute(
|
||||||
|
attr.name,
|
||||||
|
attr.value,
|
||||||
|
node.tagName.toLowerCase(),
|
||||||
|
);
|
||||||
|
if (result) {
|
||||||
|
const [name, value] = result;
|
||||||
|
props[name] = value;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
props[attr.name] = attr.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
element = React.createElement(
|
||||||
|
node.tagName.toLowerCase(),
|
||||||
|
props,
|
||||||
|
children,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Push the element to the parent.
|
||||||
|
parent.push(element);
|
||||||
|
|
||||||
|
// Iterate over the node children with the newly created component.
|
||||||
|
for (const child of node.childNodes) {
|
||||||
|
queue.push({ node: child, parent: children, depth: depth + 1 });
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rootChildren;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user