diff --git a/app/javascript/mastodon/utils/__tests__/__snapshots__/html-test.ts.snap b/app/javascript/mastodon/utils/__tests__/__snapshots__/html-test.ts.snap new file mode 100644 index 0000000000..a579efa406 --- /dev/null +++ b/app/javascript/mastodon/utils/__tests__/__snapshots__/html-test.ts.snap @@ -0,0 +1,69 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`html > htmlStringToComponents > copies attributes to props 1`] = ` +[ + + link + , +] +`; + +exports[`html > htmlStringToComponents > handles nested elements 1`] = ` +[ +

+ lorem + + ipsum + +

, +] +`; + +exports[`html > htmlStringToComponents > ignores empty text nodes 1`] = ` +[ +

+ + lorem ipsum + +

, +] +`; + +exports[`html > htmlStringToComponents > respects allowedTags option 1`] = ` +[ +

+ lorem + + dolor + +

, +] +`; + +exports[`html > htmlStringToComponents > respects maxDepth option 1`] = ` +[ +

+ +

, +] +`; + +exports[`html > htmlStringToComponents > returns converted nodes from string 1`] = ` +[ +

+ lorem ipsum +

, +] +`; + +exports[`html > htmlStringToComponents > uses default parsing if onElement returns undefined 1`] = ` +[ +

+ lorem ipsum +

, +] +`; diff --git a/app/javascript/mastodon/utils/__tests__/html-test.ts b/app/javascript/mastodon/utils/__tests__/html-test.ts index 99bfdcb801..6c08cc7cbf 100644 --- a/app/javascript/mastodon/utils/__tests__/html-test.ts +++ b/app/javascript/mastodon/utils/__tests__/html-test.ts @@ -1,3 +1,5 @@ +import React from 'react'; + import * as html from '../html'; describe('html', () => { @@ -9,4 +11,104 @@ describe('html', () => { expect(output).toEqual('lorem\n\nipsum\n
'); }); }); + + describe('htmlStringToComponents', () => { + it('returns converted nodes from string', () => { + const input = '

lorem ipsum

'; + const output = html.htmlStringToComponents(input); + expect(output).toMatchSnapshot(); + }); + + it('handles nested elements', () => { + const input = '

lorem ipsum

'; + const output = html.htmlStringToComponents(input); + expect(output).toMatchSnapshot(); + }); + + it('ignores empty text nodes', () => { + const input = '

lorem ipsum

'; + const output = html.htmlStringToComponents(input); + expect(output).toMatchSnapshot(); + }); + + it('copies attributes to props', () => { + const input = + 'link'; + const output = html.htmlStringToComponents(input); + expect(output).toMatchSnapshot(); + }); + + it('respects maxDepth option', () => { + const input = '

lorem ipsum

'; + const output = html.htmlStringToComponents(input, { maxDepth: 2 }); + expect(output).toMatchSnapshot(); + }); + + it('calls onText callback', () => { + const input = '

lorem ipsum

'; + const onText = vi.fn((text: string) => text); + html.htmlStringToComponents(input, { onText }); + expect(onText).toHaveBeenCalledExactlyOnceWith('lorem ipsum'); + }); + + it('calls onElement callback', () => { + const input = '

lorem ipsum

'; + const onElement = vi.fn( + (element: HTMLElement, children: React.ReactNode[]) => + React.createElement(element.tagName.toLowerCase(), {}, ...children), + ); + html.htmlStringToComponents(input, { onElement }); + expect(onElement).toHaveBeenCalledExactlyOnceWith( + expect.objectContaining({ tagName: 'P' }), + expect.arrayContaining(['lorem ipsum']), + ); + }); + + it('uses default parsing if onElement returns undefined', () => { + const input = '

lorem ipsum

'; + const onElement = vi.fn(() => undefined); + const output = html.htmlStringToComponents(input, { onElement }); + expect(onElement).toHaveBeenCalledExactlyOnceWith( + expect.objectContaining({ tagName: 'P' }), + expect.arrayContaining(['lorem ipsum']), + ); + expect(output).toMatchSnapshot(); + }); + + it('calls onAttribute callback', () => { + const input = + 'link'; + const onAttribute = vi.fn( + (name: string, value: string) => + [name, value] satisfies [string, string], + ); + html.htmlStringToComponents(input, { onAttribute }); + expect(onAttribute).toHaveBeenCalledTimes(3); + expect(onAttribute).toHaveBeenCalledWith( + 'href', + 'https://example.com', + 'a', + ); + expect(onAttribute).toHaveBeenCalledWith('target', '_blank', 'a'); + expect(onAttribute).toHaveBeenCalledWith('rel', 'nofollow', 'a'); + }); + + it('respects allowedTags option', () => { + const input = '

lorem ipsum dolor

'; + const output = html.htmlStringToComponents(input, { + allowedTags: new Set(['p', 'em']), + }); + expect(output).toMatchSnapshot(); + }); + + it('ensure performance is acceptable with large input', () => { + const input = '

' + 'lorem'.repeat(1_000) + '

'; + const start = performance.now(); + html.htmlStringToComponents(input); + const duration = performance.now() - start; + // Arbitrary threshold of 200ms for this test. + // Normally it's much less (<50ms), but the GH Action environment can be slow. + expect(duration).toBeLessThan(200); + }); + }); }); diff --git a/app/javascript/mastodon/utils/html.ts b/app/javascript/mastodon/utils/html.ts index 0145a04551..1686322300 100644 --- a/app/javascript/mastodon/utils/html.ts +++ b/app/javascript/mastodon/utils/html.ts @@ -1,3 +1,5 @@ +import React from 'react'; + // NB: This function can still return unsafe HTML export const unescapeHTML = (html: string) => { const wrapper = document.createElement('div'); @@ -7,3 +9,177 @@ export const unescapeHTML = (html: string) => { .replace(/<[^>]*>/g, ''); return wrapper.textContent; }; + +interface QueueItem { + node: Node; + parent: React.ReactNode[]; + depth: number; +} + +interface Options { + maxDepth?: number; + onText?: (text: string) => React.ReactNode; + onElement?: ( + element: HTMLElement, + children: React.ReactNode[], + ) => React.ReactNode; + onAttribute?: ( + name: string, + value: string, + tagName: string, + ) => [string, unknown] | null; + allowedTags?: Set; +} +const DEFAULT_ALLOWED_TAGS: ReadonlySet = new Set([ + 'a', + 'abbr', + 'b', + 'blockquote', + 'br', + 'cite', + 'code', + 'del', + 'dfn', + 'dl', + 'dt', + 'em', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'i', + 'li', + 'ol', + 'p', + 'pre', + 'small', + 'span', + 'strong', + 'sub', + 'sup', + 'time', + 'u', + 'ul', +]); + +export function htmlStringToComponents( + htmlString: string, + options: Options = {}, +) { + const wrapper = document.createElement('template'); + wrapper.innerHTML = htmlString; + + const rootChildren: React.ReactNode[] = []; + const queue: QueueItem[] = [ + { node: wrapper.content, parent: rootChildren, depth: 0 }, + ]; + + const { + maxDepth = 10, + allowedTags = DEFAULT_ALLOWED_TAGS, + onAttribute, + onElement, + onText, + } = options; + + while (queue.length > 0) { + const item = queue.shift(); + if (!item) { + break; + } + + const { node, parent, depth } = item; + // If maxDepth is exceeded, skip processing this node. + if (depth > maxDepth) { + continue; + } + + switch (node.nodeType) { + // Just process children for fragments. + case Node.DOCUMENT_FRAGMENT_NODE: { + for (const child of node.childNodes) { + queue.push({ node: child, parent, depth: depth + 1 }); + } + break; + } + + // Text can be added directly if it has any non-whitespace content. + case Node.TEXT_NODE: { + const text = node.textContent; + if (text && text.trim() !== '') { + if (onText) { + parent.push(onText(text)); + } else { + parent.push(text); + } + } + break; + } + + // Process elements with attributes and then their children. + case Node.ELEMENT_NODE: { + if (!(node instanceof HTMLElement)) { + console.warn('Expected HTMLElement, got', node); + continue; + } + + // If the tag is not allowed, skip it and its children. + if (!allowedTags.has(node.tagName.toLowerCase())) { + continue; + } + + // Create the element and add it to the parent. + const children: React.ReactNode[] = []; + let element: React.ReactNode = undefined; + + // If onElement is provided, use it to create the element. + if (onElement) { + const component = onElement(node, children); + // Check for undefined to allow returning null. + if (component !== undefined) { + element = component; + } + } + + // If the element wasn't created, use the default conversion. + if (element === undefined) { + const props: Record = {}; + for (const attr of node.attributes) { + if (onAttribute) { + const result = onAttribute( + attr.name, + attr.value, + node.tagName.toLowerCase(), + ); + if (result) { + const [name, value] = result; + props[name] = value; + } + } else { + props[attr.name] = attr.value; + } + } + element = React.createElement( + node.tagName.toLowerCase(), + props, + children, + ); + } + + // Push the element to the parent. + parent.push(element); + + // Iterate over the node children with the newly created component. + for (const child of node.childNodes) { + queue.push({ node: child, parent: children, depth: depth + 1 }); + } + break; + } + } + } + + return rootChildren; +}