This repository has been archived on 2022-08-19. You can view files and clone it, but cannot push or open issues or pull requests.
hydrogen-web/src/platform/web/parsehtml.js

52 lines
1.2 KiB
JavaScript

import DOMPurify from "../../../../../lib/dompurify/index.js"
class HTMLParseResult {
constructor(bodyNode) {
this._bodyNode = bodyNode;
}
get rootNodes() {
return Array.from(this._bodyNode.childNodes);
}
getChildNodes(node) {
return Array.from(node.childNodes);
}
getAttributeNames(node) {
return Array.from(node.getAttributeNames());
}
getAttributeValue(node, attr) {
return node.getAttribute(attr);
}
isTextNode(node) {
return node.nodeType === Node.TEXT_NODE;
}
getNodeText(node) {
return node.nodeValue;
}
isElementNode(node) {
return node.nodeType === Node.ELEMENT_NODE;
}
getNodeElementName(node) {
return node.tagName;
}
}
const sanitizeConfig = {
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
}
export function parseHTML(html) {
// If DOMPurify uses DOMParser, can't we just get the built tree from it
// instead of re-parsing?
const sanitized = DOMPurify.sanitize(html, sanitizeConfig);
const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body;
return new HTMLParseResult(bodyNode);
}