dfed04166e
- [`linkedom`](https://github.com/WebReflection/linkedom) is being used https://github.com/matrix-org/matrix-public-archive to server-side render (SSR) Hydrogen (`hydrogen-view-sdk`) - This is being fixed by using a explicit HTML wrapper boilerplate with `DOMParser` to get a matching result in the browser and `linkedom`. Currently `parseHTML` is only used for HTML content bodies in events. Events with replies have content bodies that look like `<mx-reply>Hello</mx-reply> What's up` so they're parsed as HTML to strip out the `<mx-reply>` part. Before | After --- | --- ![](https://user-images.githubusercontent.com/558581/153692011-2f0e7114-fcb4-481f-b217-49f461b1740a.png) | ![](https://user-images.githubusercontent.com/558581/153692016-52582fdb-abd9-439d-9dce-3f04da6959db.png) Before: ```js // Browser (Chrome, Firefox) new DOMParser().parseFromString(`<div>foo</div>`, "text/html").body.outerHTML; // '<body><div>foo</div></body>' // `linkedom` ❌ new DOMParser().parseFromString(`<div>foo</div>`, "text/html").body.outerHTML; // '<body></body>' ``` After (consistent matching output): ```js // Browser (Chrome, Firefox) new DOMParser().parseFromString(`<!DOCTYPE html><html><body><div>foo</div></body></html>`, "text/html").body.outerHTML; // '<body><div>foo</div></body>' // `linkedom` new DOMParser().parseFromString(`<!DOCTYPE html><html><body><div>foo</div></body></html>`, "text/html").body.outerHTML; // '<body><div>foo</div></body>' ``` `linkedom` goal is to be close to the current DOM standard, but [not too close](https://github.com/WebReflection/linkedom#faq). Focused on the streamlined cases for server-side rendering (SSR). Here is some context around getting `DOMParser` to interpret things better. The conclusion was to only support the explicit standard cases with a `<html><body></body></html>` specified instead of adding the magic HTML document creation and massaging that the browser does. - https://github.com/WebReflection/linkedom/issues/106 - https://github.com/WebReflection/linkedom/pull/108 --- Part of https://github.com/vector-im/hydrogen-web/pull/653 to support server-side rendering Hydrogen for the [`matrix-public-archive`](https://github.com/matrix-org/matrix-public-archive) project.
69 lines
1.9 KiB
JavaScript
69 lines
1.9 KiB
JavaScript
/*
|
|
Copyright 2021 The Matrix.org Foundation C.I.C.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
import DOMPurify from "dompurify"
|
|
|
|
class HTMLParseResult {
|
|
constructor(bodyNode) {
|
|
this._bodyNode = bodyNode;
|
|
}
|
|
|
|
get rootNodes() {
|
|
return Array.from(this._bodyNode.childNodes);
|
|
}
|
|
|
|
getChildNodes(node) {
|
|
return Array.from(node.childNodes);
|
|
}
|
|
|
|
getAttributeNames(node) {
|
|
return Array.from(node.getAttributeNames());
|
|
}
|
|
|
|
getAttributeValue(node, attr) {
|
|
return node.getAttribute(attr);
|
|
}
|
|
|
|
isTextNode(node) {
|
|
return node.nodeType === Node.TEXT_NODE;
|
|
}
|
|
|
|
getNodeText(node) {
|
|
return node.textContent;
|
|
}
|
|
|
|
isElementNode(node) {
|
|
return node.nodeType === Node.ELEMENT_NODE;
|
|
}
|
|
|
|
getNodeElementName(node) {
|
|
return node.tagName;
|
|
}
|
|
}
|
|
|
|
const sanitizeConfig = {
|
|
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.-]+(?:[^a-z+.-:]|$))/i,
|
|
FORBID_TAGS: ['mx-reply'],
|
|
KEEP_CONTENT: false,
|
|
}
|
|
|
|
export function parseHTML(html) {
|
|
// If DOMPurify uses DOMParser, can't we just get the built tree from it
|
|
// instead of re-parsing?
|
|
const sanitized = DOMPurify.sanitize(html, sanitizeConfig);
|
|
const bodyNode = new DOMParser().parseFromString(`<!DOCTYPE html><html><body>${sanitized}</body></html>`, "text/html").body;
|
|
return new HTMLParseResult(bodyNode);
|
|
}
|