mirror of
https://github.com/mastodon/mastodon.git
synced 2025-10-05 00:22:42 +00:00
Add basic HTML parser for converting strings to React components (#36071)
This commit is contained in:
parent
2314583606
commit
8a0d0025ff
|
@ -0,0 +1,69 @@
|
|||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`html > htmlStringToComponents > copies attributes to props 1`] = `
|
||||
[
|
||||
<a
|
||||
href="https://example.com"
|
||||
rel="nofollow"
|
||||
target="_blank"
|
||||
>
|
||||
link
|
||||
</a>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > handles nested elements 1`] = `
|
||||
[
|
||||
<p>
|
||||
lorem
|
||||
<strong>
|
||||
ipsum
|
||||
</strong>
|
||||
</p>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > ignores empty text nodes 1`] = `
|
||||
[
|
||||
<p>
|
||||
<span>
|
||||
lorem ipsum
|
||||
</span>
|
||||
</p>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > respects allowedTags option 1`] = `
|
||||
[
|
||||
<p>
|
||||
lorem
|
||||
<em>
|
||||
dolor
|
||||
</em>
|
||||
</p>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > respects maxDepth option 1`] = `
|
||||
[
|
||||
<p>
|
||||
<span />
|
||||
</p>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > returns converted nodes from string 1`] = `
|
||||
[
|
||||
<p>
|
||||
lorem ipsum
|
||||
</p>,
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`html > htmlStringToComponents > uses default parsing if onElement returns undefined 1`] = `
|
||||
[
|
||||
<p>
|
||||
lorem ipsum
|
||||
</p>,
|
||||
]
|
||||
`;
|
|
@ -1,3 +1,5 @@
|
|||
import React from 'react';
|
||||
|
||||
import * as html from '../html';
|
||||
|
||||
describe('html', () => {
|
||||
|
@ -9,4 +11,104 @@ describe('html', () => {
|
|||
expect(output).toEqual('lorem\n\nipsum\n<br>');
|
||||
});
|
||||
});
|
||||
|
||||
describe('htmlStringToComponents', () => {
|
||||
it('returns converted nodes from string', () => {
|
||||
const input = '<p>lorem ipsum</p>';
|
||||
const output = html.htmlStringToComponents(input);
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('handles nested elements', () => {
|
||||
const input = '<p>lorem <strong>ipsum</strong></p>';
|
||||
const output = html.htmlStringToComponents(input);
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('ignores empty text nodes', () => {
|
||||
const input = '<p> <span>lorem ipsum</span> </p>';
|
||||
const output = html.htmlStringToComponents(input);
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('copies attributes to props', () => {
|
||||
const input =
|
||||
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
|
||||
const output = html.htmlStringToComponents(input);
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('respects maxDepth option', () => {
|
||||
const input = '<p><span>lorem <strong>ipsum</strong></span></p>';
|
||||
const output = html.htmlStringToComponents(input, { maxDepth: 2 });
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('calls onText callback', () => {
|
||||
const input = '<p>lorem ipsum</p>';
|
||||
const onText = vi.fn((text: string) => text);
|
||||
html.htmlStringToComponents(input, { onText });
|
||||
expect(onText).toHaveBeenCalledExactlyOnceWith('lorem ipsum');
|
||||
});
|
||||
|
||||
it('calls onElement callback', () => {
|
||||
const input = '<p>lorem ipsum</p>';
|
||||
const onElement = vi.fn(
|
||||
(element: HTMLElement, children: React.ReactNode[]) =>
|
||||
React.createElement(element.tagName.toLowerCase(), {}, ...children),
|
||||
);
|
||||
html.htmlStringToComponents(input, { onElement });
|
||||
expect(onElement).toHaveBeenCalledExactlyOnceWith(
|
||||
expect.objectContaining({ tagName: 'P' }),
|
||||
expect.arrayContaining(['lorem ipsum']),
|
||||
);
|
||||
});
|
||||
|
||||
it('uses default parsing if onElement returns undefined', () => {
|
||||
const input = '<p>lorem ipsum</p>';
|
||||
const onElement = vi.fn(() => undefined);
|
||||
const output = html.htmlStringToComponents(input, { onElement });
|
||||
expect(onElement).toHaveBeenCalledExactlyOnceWith(
|
||||
expect.objectContaining({ tagName: 'P' }),
|
||||
expect.arrayContaining(['lorem ipsum']),
|
||||
);
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('calls onAttribute callback', () => {
|
||||
const input =
|
||||
'<a href="https://example.com" target="_blank" rel="nofollow">link</a>';
|
||||
const onAttribute = vi.fn(
|
||||
(name: string, value: string) =>
|
||||
[name, value] satisfies [string, string],
|
||||
);
|
||||
html.htmlStringToComponents(input, { onAttribute });
|
||||
expect(onAttribute).toHaveBeenCalledTimes(3);
|
||||
expect(onAttribute).toHaveBeenCalledWith(
|
||||
'href',
|
||||
'https://example.com',
|
||||
'a',
|
||||
);
|
||||
expect(onAttribute).toHaveBeenCalledWith('target', '_blank', 'a');
|
||||
expect(onAttribute).toHaveBeenCalledWith('rel', 'nofollow', 'a');
|
||||
});
|
||||
|
||||
it('respects allowedTags option', () => {
|
||||
const input = '<p>lorem <strong>ipsum</strong> <em>dolor</em></p>';
|
||||
const output = html.htmlStringToComponents(input, {
|
||||
allowedTags: new Set(['p', 'em']),
|
||||
});
|
||||
expect(output).toMatchSnapshot();
|
||||
});
|
||||
|
||||
it('ensure performance is acceptable with large input', () => {
|
||||
const input = '<p>' + '<span>lorem</span>'.repeat(1_000) + '</p>';
|
||||
const start = performance.now();
|
||||
html.htmlStringToComponents(input);
|
||||
const duration = performance.now() - start;
|
||||
// Arbitrary threshold of 200ms for this test.
|
||||
// Normally it's much less (<50ms), but the GH Action environment can be slow.
|
||||
expect(duration).toBeLessThan(200);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
import React from 'react';
|
||||
|
||||
// NB: This function can still return unsafe HTML
|
||||
export const unescapeHTML = (html: string) => {
|
||||
const wrapper = document.createElement('div');
|
||||
|
@ -7,3 +9,177 @@ export const unescapeHTML = (html: string) => {
|
|||
.replace(/<[^>]*>/g, '');
|
||||
return wrapper.textContent;
|
||||
};
|
||||
|
||||
interface QueueItem {
|
||||
node: Node;
|
||||
parent: React.ReactNode[];
|
||||
depth: number;
|
||||
}
|
||||
|
||||
interface Options {
|
||||
maxDepth?: number;
|
||||
onText?: (text: string) => React.ReactNode;
|
||||
onElement?: (
|
||||
element: HTMLElement,
|
||||
children: React.ReactNode[],
|
||||
) => React.ReactNode;
|
||||
onAttribute?: (
|
||||
name: string,
|
||||
value: string,
|
||||
tagName: string,
|
||||
) => [string, unknown] | null;
|
||||
allowedTags?: Set<string>;
|
||||
}
|
||||
const DEFAULT_ALLOWED_TAGS: ReadonlySet<string> = new Set([
|
||||
'a',
|
||||
'abbr',
|
||||
'b',
|
||||
'blockquote',
|
||||
'br',
|
||||
'cite',
|
||||
'code',
|
||||
'del',
|
||||
'dfn',
|
||||
'dl',
|
||||
'dt',
|
||||
'em',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'hr',
|
||||
'i',
|
||||
'li',
|
||||
'ol',
|
||||
'p',
|
||||
'pre',
|
||||
'small',
|
||||
'span',
|
||||
'strong',
|
||||
'sub',
|
||||
'sup',
|
||||
'time',
|
||||
'u',
|
||||
'ul',
|
||||
]);
|
||||
|
||||
export function htmlStringToComponents(
|
||||
htmlString: string,
|
||||
options: Options = {},
|
||||
) {
|
||||
const wrapper = document.createElement('template');
|
||||
wrapper.innerHTML = htmlString;
|
||||
|
||||
const rootChildren: React.ReactNode[] = [];
|
||||
const queue: QueueItem[] = [
|
||||
{ node: wrapper.content, parent: rootChildren, depth: 0 },
|
||||
];
|
||||
|
||||
const {
|
||||
maxDepth = 10,
|
||||
allowedTags = DEFAULT_ALLOWED_TAGS,
|
||||
onAttribute,
|
||||
onElement,
|
||||
onText,
|
||||
} = options;
|
||||
|
||||
while (queue.length > 0) {
|
||||
const item = queue.shift();
|
||||
if (!item) {
|
||||
break;
|
||||
}
|
||||
|
||||
const { node, parent, depth } = item;
|
||||
// If maxDepth is exceeded, skip processing this node.
|
||||
if (depth > maxDepth) {
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (node.nodeType) {
|
||||
// Just process children for fragments.
|
||||
case Node.DOCUMENT_FRAGMENT_NODE: {
|
||||
for (const child of node.childNodes) {
|
||||
queue.push({ node: child, parent, depth: depth + 1 });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Text can be added directly if it has any non-whitespace content.
|
||||
case Node.TEXT_NODE: {
|
||||
const text = node.textContent;
|
||||
if (text && text.trim() !== '') {
|
||||
if (onText) {
|
||||
parent.push(onText(text));
|
||||
} else {
|
||||
parent.push(text);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Process elements with attributes and then their children.
|
||||
case Node.ELEMENT_NODE: {
|
||||
if (!(node instanceof HTMLElement)) {
|
||||
console.warn('Expected HTMLElement, got', node);
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the tag is not allowed, skip it and its children.
|
||||
if (!allowedTags.has(node.tagName.toLowerCase())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create the element and add it to the parent.
|
||||
const children: React.ReactNode[] = [];
|
||||
let element: React.ReactNode = undefined;
|
||||
|
||||
// If onElement is provided, use it to create the element.
|
||||
if (onElement) {
|
||||
const component = onElement(node, children);
|
||||
// Check for undefined to allow returning null.
|
||||
if (component !== undefined) {
|
||||
element = component;
|
||||
}
|
||||
}
|
||||
|
||||
// If the element wasn't created, use the default conversion.
|
||||
if (element === undefined) {
|
||||
const props: Record<string, unknown> = {};
|
||||
for (const attr of node.attributes) {
|
||||
if (onAttribute) {
|
||||
const result = onAttribute(
|
||||
attr.name,
|
||||
attr.value,
|
||||
node.tagName.toLowerCase(),
|
||||
);
|
||||
if (result) {
|
||||
const [name, value] = result;
|
||||
props[name] = value;
|
||||
}
|
||||
} else {
|
||||
props[attr.name] = attr.value;
|
||||
}
|
||||
}
|
||||
element = React.createElement(
|
||||
node.tagName.toLowerCase(),
|
||||
props,
|
||||
children,
|
||||
);
|
||||
}
|
||||
|
||||
// Push the element to the parent.
|
||||
parent.push(element);
|
||||
|
||||
// Iterate over the node children with the newly created component.
|
||||
for (const child of node.childNodes) {
|
||||
queue.push({ node: child, parent: children, depth: depth + 1 });
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rootChildren;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user