Switch to DOMPurify in platform layer for sanitization.

This commit is contained in:
Danila Fedorin 2021-07-12 14:46:42 -07:00
parent 012ef2b215
commit 4022c6121b
6 changed files with 31 additions and 95 deletions

View File

@ -59,8 +59,8 @@
"another-json": "^0.2.0",
"base64-arraybuffer": "^0.2.0",
"bs58": "^4.0.1",
"dompurify": "^2.3.0",
"es6-promise": "https://github.com/bwindels/es6-promise.git#bwindels/expose-flush",
"sanitize-html": "^2.4.0",
"text-encoding": "^0.7.0"
}
}

View File

@ -83,11 +83,11 @@ async function populateLib() {
require.resolve('node-html-parser/dist/index.js'),
path.join(libDir, "node-html-parser/index.js")
);
// transpile sanitize-html to esm
await fs.mkdir(path.join(libDir, "sanitize-html/"));
await commonjsToESM(
require.resolve('sanitize-html/index.js'),
path.join(libDir, "sanitize-html/index.js")
// Symlink dompurify
await fs.mkdir(path.join(libDir, "dompurify/"));
await fs.symlink(
require.resolve('dompurify/dist/purify.es.js'),
path.join(libDir, "dompurify/index.js")
);
// transpile another-json to esm
await fs.mkdir(path.join(libDir, "another-json/"));

View File

@ -1,5 +1,4 @@
import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart, RulePart, TextPart, LinkPart, ImagePart } from "./MessageBody.js"
import sanitizeHtml from "../../../../../lib/sanitize-html/index.js"
/* At the time of writing (Jul 1 2021), Matrix Spec recommends
* allowing the following HTML tags:
@ -146,27 +145,8 @@ class Deserializer {
}
}
const sanitizeConfig = {
allowedTags: [
"font", "del", "h1", "h2", "h3", "h4", "h5", "h6",
"blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
"b", "i", "u", "strong", "em", "strike", "code", "hr",
"br", "div", "table", "thead", "tbody", "tr", "th", "td",
"caption", "pre", "span", "img"
],
allowedAttributes: {
"font": ["data-mx-bg-color", "data-mx-color"],
"span": ["data-mx-bg-color", "data-mx-color"],
"a": ["name", "target", "href"],
"img": ["width", "height", "alt", "title", "src"],
"ol": ["start"],
"code": ["class"]
},
allowedSchemes: [ "http", "https", "ftp", "mailto", "tel", "mxc" ]
};
export function parseHTMLBody(platform, mediaRepository, html) {
const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig));
const parseResult = platform.parseHTML(html);
const deserializer = new Deserializer(parseResult, mediaRepository);
const parts = deserializer.parseNodes(parseResult.rootNodes);
return new MessageBody(html, parts);

View File

@ -36,7 +36,7 @@ import {BlobHandle} from "./dom/BlobHandle.js";
import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js";
import {downloadInIframe} from "./dom/download.js";
import {Disposables} from "../../utils/Disposables.js";
import {HTMLParseResult} from "./HTMLParseResult.js";
import {parseHTML} from "./parsehtml.js";
function addScript(src) {
return new Promise(function (resolve, reject) {
@ -238,8 +238,7 @@ export class Platform {
}
parseHTML(html) {
const bodyNode = new DOMParser().parseFromString(html, "text/html").body;
return new HTMLParseResult(bodyNode);
return parseHTML(html);
}
async loadImage(blob) {

View File

@ -1,4 +1,6 @@
export class HTMLParseResult {
import DOMPurify from "../../../../../lib/dompurify/index.js"
class HTMLParseResult {
constructor(bodyNode) {
this._bodyNode = bodyNode;
}
@ -35,3 +37,15 @@ export class HTMLParseResult {
return node.tagName;
}
}
const sanitizeConfig = {
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
}
export function parseHTML(html) {
// If DOMPurify uses DOMParser, can't we just get the built tree from it
// instead of re-parsing?
const sanitized = DOMPurify.sanitize(html, sanitizeConfig);
const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body;
return new HTMLParseResult(bodyNode);
}

View File

@ -1389,13 +1389,18 @@ domhandler@^2.3.0:
dependencies:
domelementtype "1"
domhandler@^4.0.0, domhandler@^4.2.0:
domhandler@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059"
integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA==
dependencies:
domelementtype "^2.2.0"
dompurify@^2.3.0:
version "2.3.0"
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.0.tgz#07bb39515e491588e5756b1d3e8375b5964814e2"
integrity sha512-VV5C6Kr53YVHGOBKO/F86OYX6/iLTw2yVSI721gKetxpHCK/V5TaLEf9ODjRgl1KLSWRMY6cUhAbv/c+IUnwQw==
domutils@1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf"
@ -1412,7 +1417,7 @@ domutils@^1.5.1:
dom-serializer "0"
domelementtype "1"
domutils@^2.5.2, domutils@^2.6.0:
domutils@^2.6.0:
version "2.7.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.7.0.tgz#8ebaf0c41ebafcf55b0b72ec31c56323712c5442"
integrity sha512-8eaHa17IwJUPAiB+SoTYBo5mCdeMgdcAoXJ59m6DT1vw+5iLS3gNoqYaRowaBKtGVrOF1Jz4yDTgYKLK2kvfJg==
@ -1487,11 +1492,6 @@ escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5:
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
escape-string-regexp@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
eslint-scope@^5.1.1:
version "5.1.1"
resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@ -1801,16 +1801,6 @@ htmlparser2@^3.9.1:
inherits "^2.0.1"
readable-stream "^3.1.1"
htmlparser2@^6.0.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7"
integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.0.0"
domutils "^2.5.2"
entities "^2.0.0"
http-errors@~1.7.2:
version "1.7.3"
resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06"
@ -1890,11 +1880,6 @@ is-module@^1.0.0:
resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591"
integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=
is-plain-object@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==
is-reference@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/is-reference/-/is-reference-1.2.1.tgz#8b2dac0b371f4bc994fdeaba9eb542d03002d0b7"
@ -1973,11 +1958,6 @@ json5@^2.1.2:
dependencies:
minimist "^1.2.5"
klona@^2.0.3:
version "2.0.4"
resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0"
integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==
leven@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
@ -2114,11 +2094,6 @@ nanoid@^3.1.12:
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.12.tgz#6f7736c62e8d39421601e4a0c77623a97ea69654"
integrity sha512-1qstj9z5+x491jfiC4Nelk+f8XBad7LN20PmyWINJEMRSf3wcAjAWysw1qaA8z6NSKe2sjq1hRSDpBH5paCb6A==
nanoid@^3.1.23:
version "3.1.23"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.23.tgz#f744086ce7c2bc47ee0a8472574d5c78e4183a81"
integrity sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==
natural-compare@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
@ -2209,11 +2184,6 @@ parent-module@^1.0.0:
dependencies:
callsites "^3.0.0"
parse-srcset@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1"
integrity sha1-8r0iH2zJcKk42IVWq8WJyqqiveE=
parse5@^3.0.1:
version "3.0.3"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-3.0.3.tgz#042f792ffdd36851551cf4e9e066b3874ab45b5c"
@ -2331,15 +2301,6 @@ postcss@^7.0.2, postcss@^7.0.26:
source-map "^0.6.1"
supports-color "^6.1.0"
postcss@^8.0.2:
version "8.3.5"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.3.5.tgz#982216b113412bc20a86289e91eb994952a5b709"
integrity sha512-NxTuJocUhYGsMiMFHDUkmjSKT3EdH4/WbGF6GCi1NDGk+vbcUTun4fpbOqaPtD8IIsztA2ilZm2DhYCuyN58gA==
dependencies:
colorette "^1.2.2"
nanoid "^3.1.23"
source-map-js "^0.6.2"
postcss@^8.1.1:
version "8.1.1"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.1.1.tgz#c3a287dd10e4f6c84cb3791052b96a5d859c9389"
@ -2517,19 +2478,6 @@ safe-buffer@~5.2.0:
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519"
integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==
sanitize-html@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.4.0.tgz#8da7524332eb210d968971621b068b53f17ab5a3"
integrity sha512-Y1OgkUiTPMqwZNRLPERSEi39iOebn2XJLbeiGOBhaJD/yLqtLGu6GE5w7evx177LeGgSE+4p4e107LMiydOf6A==
dependencies:
deepmerge "^4.2.2"
escape-string-regexp "^4.0.0"
htmlparser2 "^6.0.0"
is-plain-object "^5.0.0"
klona "^2.0.3"
parse-srcset "^1.0.2"
postcss "^8.0.2"
semver@7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e"
@ -2612,11 +2560,6 @@ slice-ansi@^4.0.0:
astral-regex "^2.0.0"
is-fullwidth-code-point "^3.0.0"
source-map-js@^0.6.2:
version "0.6.2"
resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e"
integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==
source-map@^0.5.0:
version "0.5.7"
resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"