Add HTML sanitization to domain layer.

This commit is contained in:
Danila Fedorin 2021-07-09 13:06:24 -07:00
parent 695bdec9fe
commit 1435c2c9a2
4 changed files with 93 additions and 5 deletions

View file

@ -60,6 +60,7 @@
"base64-arraybuffer": "^0.2.0",
"bs58": "^4.0.1",
"es6-promise": "https://github.com/bwindels/es6-promise.git#bwindels/expose-flush",
"sanitize-html": "^2.4.0",
"text-encoding": "^0.7.0"
}
}

View file

@ -77,12 +77,18 @@ async function populateLib() {
for (const file of ["olm.js", "olm.wasm", "olm_legacy.js"]) {
await fs.symlink(path.join(olmSrcDir, file), path.join(olmDstDir, file));
}
// transpile another-json to esm
// transpile node-html-parser to esm
await fs.mkdir(path.join(libDir, "node-html-parser/"));
await commonjsToESM(
require.resolve('node-html-parser/dist/index.js'),
path.join(libDir, "node-html-parser/index.js")
);
// transpile sanitize-html to esm
await fs.mkdir(path.join(libDir, "sanitize-html/"));
await commonjsToESM(
require.resolve('sanitize-html/index.js'),
path.join(libDir, "sanitize-html/index.js")
);
// transpile another-json to esm
await fs.mkdir(path.join(libDir, "another-json/"));
await commonjsToESM(

View file

@ -1,5 +1,5 @@
import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart, RulePart, TextPart, LinkPart, ImagePart } from "./MessageBody.js"
import sanitizeHtml from "../../../../../lib/sanitize-html/index.js"
/* At the time of writing (Jul 1 2021), Matrix Spec recommends
* allowing the following HTML tags:
@ -159,8 +159,27 @@ function parseNodes(options, nodes) {
return parsed;
}
const sanitizeConfig = {
allowedTags: [
"font", "del", "h1", "h2", "h3", "h4", "h5", "h6",
"blockquote", "p", "a", "ul", "ol", "sup", "sub", "li",
"b", "i", "u", "strong", "em", "strike", "code", "hr",
"br", "div", "table", "thead", "tbody", "tr", "th", "td",
"caption", "pre", "span", "img"
],
allowedAttributes: {
"font": ["data-mx-bg-color", "data-mx-color"],
"span": ["data-mx-bg-color", "data-mx-color"],
"a": ["name", "target", "href"],
"img": ["width", "height", "alt", "title", "src"],
"ol": ["start"],
"code": ["class"]
},
allowedSchemes: [ "http", "https", "ftp", "mailto", "tel", "mxc" ]
};
export function parseHTMLBody({ mediaRepository, platform }, html) {
const parseResult = platform.parseHTML(html);
const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig));
const options = { result: parseResult, mediaRepository };
const parts = parseNodes(options, parseResult.rootNodes);
return new MessageBody(html, parts);

View file

@ -1389,7 +1389,7 @@ domhandler@^2.3.0:
dependencies:
domelementtype "1"
domhandler@^4.2.0:
domhandler@^4.0.0, domhandler@^4.2.0:
version "4.2.0"
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059"
integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA==
@ -1412,7 +1412,7 @@ domutils@^1.5.1:
dom-serializer "0"
domelementtype "1"
domutils@^2.6.0:
domutils@^2.5.2, domutils@^2.6.0:
version "2.7.0"
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.7.0.tgz#8ebaf0c41ebafcf55b0b72ec31c56323712c5442"
integrity sha512-8eaHa17IwJUPAiB+SoTYBo5mCdeMgdcAoXJ59m6DT1vw+5iLS3gNoqYaRowaBKtGVrOF1Jz4yDTgYKLK2kvfJg==
@ -1487,6 +1487,11 @@ escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5:
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4"
integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=
escape-string-regexp@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34"
integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==
eslint-scope@^5.1.1:
version "5.1.1"
resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c"
@ -1796,6 +1801,16 @@ htmlparser2@^3.9.1:
inherits "^2.0.1"
readable-stream "^3.1.1"
htmlparser2@^6.0.0:
version "6.1.0"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7"
integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A==
dependencies:
domelementtype "^2.0.1"
domhandler "^4.0.0"
domutils "^2.5.2"
entities "^2.0.0"
http-errors@~1.7.2:
version "1.7.3"
resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06"
@ -1875,6 +1890,11 @@ is-module@^1.0.0:
resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591"
integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE=
is-plain-object@^5.0.0:
version "5.0.0"
resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==
is-reference@^1.2.1:
version "1.2.1"
resolved "https://registry.yarnpkg.com/is-reference/-/is-reference-1.2.1.tgz#8b2dac0b371f4bc994fdeaba9eb542d03002d0b7"
@ -1953,6 +1973,11 @@ json5@^2.1.2:
dependencies:
minimist "^1.2.5"
klona@^2.0.3:
version "2.0.4"
resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0"
integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA==
leven@^3.1.0:
version "3.1.0"
resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2"
@ -2089,6 +2114,11 @@ nanoid@^3.1.12:
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.12.tgz#6f7736c62e8d39421601e4a0c77623a97ea69654"
integrity sha512-1qstj9z5+x491jfiC4Nelk+f8XBad7LN20PmyWINJEMRSf3wcAjAWysw1qaA8z6NSKe2sjq1hRSDpBH5paCb6A==
nanoid@^3.1.23:
version "3.1.23"
resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.23.tgz#f744086ce7c2bc47ee0a8472574d5c78e4183a81"
integrity sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw==
natural-compare@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7"
@ -2179,6 +2209,11 @@ parent-module@^1.0.0:
dependencies:
callsites "^3.0.0"
parse-srcset@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1"
integrity sha1-8r0iH2zJcKk42IVWq8WJyqqiveE=
parse5@^3.0.1:
version "3.0.3"
resolved "https://registry.yarnpkg.com/parse5/-/parse5-3.0.3.tgz#042f792ffdd36851551cf4e9e066b3874ab45b5c"
@ -2296,6 +2331,15 @@ postcss@^7.0.2, postcss@^7.0.26:
source-map "^0.6.1"
supports-color "^6.1.0"
postcss@^8.0.2:
version "8.3.5"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.3.5.tgz#982216b113412bc20a86289e91eb994952a5b709"
integrity sha512-NxTuJocUhYGsMiMFHDUkmjSKT3EdH4/WbGF6GCi1NDGk+vbcUTun4fpbOqaPtD8IIsztA2ilZm2DhYCuyN58gA==
dependencies:
colorette "^1.2.2"
nanoid "^3.1.23"
source-map-js "^0.6.2"
postcss@^8.1.1:
version "8.1.1"
resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.1.1.tgz#c3a287dd10e4f6c84cb3791052b96a5d859c9389"
@ -2473,6 +2517,19 @@ safe-buffer@~5.2.0:
resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519"
integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg==
sanitize-html@^2.4.0:
version "2.4.0"
resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.4.0.tgz#8da7524332eb210d968971621b068b53f17ab5a3"
integrity sha512-Y1OgkUiTPMqwZNRLPERSEi39iOebn2XJLbeiGOBhaJD/yLqtLGu6GE5w7evx177LeGgSE+4p4e107LMiydOf6A==
dependencies:
deepmerge "^4.2.2"
escape-string-regexp "^4.0.0"
htmlparser2 "^6.0.0"
is-plain-object "^5.0.0"
klona "^2.0.3"
parse-srcset "^1.0.2"
postcss "^8.0.2"
semver@7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e"
@ -2555,6 +2612,11 @@ slice-ansi@^4.0.0:
astral-regex "^2.0.0"
is-fullwidth-code-point "^3.0.0"
source-map-js@^0.6.2:
version "0.6.2"
resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e"
integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug==
source-map@^0.5.0:
version "0.5.7"
resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"