From 1435c2c9a25f9c2ff12e435a49a00e364b3d649b Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Fri, 9 Jul 2021 13:06:24 -0700 Subject: [PATCH] Add HTML sanitization to domain layer. --- package.json | 1 + scripts/post-install.js | 8 ++- .../session/room/timeline/deserialize.js | 23 ++++++- yarn.lock | 66 ++++++++++++++++++- 4 files changed, 93 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 3e9f1a54..640e1e08 100644 --- a/package.json +++ b/package.json @@ -60,6 +60,7 @@ "base64-arraybuffer": "^0.2.0", "bs58": "^4.0.1", "es6-promise": "https://github.com/bwindels/es6-promise.git#bwindels/expose-flush", + "sanitize-html": "^2.4.0", "text-encoding": "^0.7.0" } } diff --git a/scripts/post-install.js b/scripts/post-install.js index 4d51cd2e..679ae5d6 100644 --- a/scripts/post-install.js +++ b/scripts/post-install.js @@ -77,12 +77,18 @@ async function populateLib() { for (const file of ["olm.js", "olm.wasm", "olm_legacy.js"]) { await fs.symlink(path.join(olmSrcDir, file), path.join(olmDstDir, file)); } - // transpile another-json to esm + // transpile node-html-parser to esm await fs.mkdir(path.join(libDir, "node-html-parser/")); await commonjsToESM( require.resolve('node-html-parser/dist/index.js'), path.join(libDir, "node-html-parser/index.js") ); + // transpile sanitize-html to esm + await fs.mkdir(path.join(libDir, "sanitize-html/")); + await commonjsToESM( + require.resolve('sanitize-html/index.js'), + path.join(libDir, "sanitize-html/index.js") + ); // transpile another-json to esm await fs.mkdir(path.join(libDir, "another-json/")); await commonjsToESM( diff --git a/src/domain/session/room/timeline/deserialize.js b/src/domain/session/room/timeline/deserialize.js index c9d0c84d..069d7669 100644 --- a/src/domain/session/room/timeline/deserialize.js +++ b/src/domain/session/room/timeline/deserialize.js @@ -1,5 +1,5 @@ import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart, RulePart, TextPart, LinkPart, ImagePart } from "./MessageBody.js" - +import sanitizeHtml from "../../../../../lib/sanitize-html/index.js" /* At the time of writing (Jul 1 2021), Matrix Spec recommends * allowing the following HTML tags: @@ -159,8 +159,27 @@ function parseNodes(options, nodes) { return parsed; } +const sanitizeConfig = { + allowedTags: [ + "font", "del", "h1", "h2", "h3", "h4", "h5", "h6", + "blockquote", "p", "a", "ul", "ol", "sup", "sub", "li", + "b", "i", "u", "strong", "em", "strike", "code", "hr", + "br", "div", "table", "thead", "tbody", "tr", "th", "td", + "caption", "pre", "span", "img" + ], + allowedAttributes: { + "font": ["data-mx-bg-color", "data-mx-color"], + "span": ["data-mx-bg-color", "data-mx-color"], + "a": ["name", "target", "href"], + "img": ["width", "height", "alt", "title", "src"], + "ol": ["start"], + "code": ["class"] + }, + allowedSchemes: [ "http", "https", "ftp", "mailto", "tel", "mxc" ] +}; + export function parseHTMLBody({ mediaRepository, platform }, html) { - const parseResult = platform.parseHTML(html); + const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig)); const options = { result: parseResult, mediaRepository }; const parts = parseNodes(options, parseResult.rootNodes); return new MessageBody(html, parts); diff --git a/yarn.lock b/yarn.lock index 211749b0..826ee584 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1389,7 +1389,7 @@ domhandler@^2.3.0: dependencies: domelementtype "1" -domhandler@^4.2.0: +domhandler@^4.0.0, domhandler@^4.2.0: version "4.2.0" resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059" integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA== @@ -1412,7 +1412,7 @@ domutils@^1.5.1: dom-serializer "0" domelementtype "1" -domutils@^2.6.0: +domutils@^2.5.2, domutils@^2.6.0: version "2.7.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.7.0.tgz#8ebaf0c41ebafcf55b0b72ec31c56323712c5442" integrity sha512-8eaHa17IwJUPAiB+SoTYBo5mCdeMgdcAoXJ59m6DT1vw+5iLS3gNoqYaRowaBKtGVrOF1Jz4yDTgYKLK2kvfJg== @@ -1487,6 +1487,11 @@ escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ= +escape-string-regexp@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" + integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== + eslint-scope@^5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" @@ -1796,6 +1801,16 @@ htmlparser2@^3.9.1: inherits "^2.0.1" readable-stream "^3.1.1" +htmlparser2@^6.0.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7" + integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A== + dependencies: + domelementtype "^2.0.1" + domhandler "^4.0.0" + domutils "^2.5.2" + entities "^2.0.0" + http-errors@~1.7.2: version "1.7.3" resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06" @@ -1875,6 +1890,11 @@ is-module@^1.0.0: resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591" integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE= +is-plain-object@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344" + integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q== + is-reference@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/is-reference/-/is-reference-1.2.1.tgz#8b2dac0b371f4bc994fdeaba9eb542d03002d0b7" @@ -1953,6 +1973,11 @@ json5@^2.1.2: dependencies: minimist "^1.2.5" +klona@^2.0.3: + version "2.0.4" + resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0" + integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA== + leven@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2" @@ -2089,6 +2114,11 @@ nanoid@^3.1.12: resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.12.tgz#6f7736c62e8d39421601e4a0c77623a97ea69654" integrity sha512-1qstj9z5+x491jfiC4Nelk+f8XBad7LN20PmyWINJEMRSf3wcAjAWysw1qaA8z6NSKe2sjq1hRSDpBH5paCb6A== +nanoid@^3.1.23: + version "3.1.23" + resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.23.tgz#f744086ce7c2bc47ee0a8472574d5c78e4183a81" + integrity sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw== + natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" @@ -2179,6 +2209,11 @@ parent-module@^1.0.0: dependencies: callsites "^3.0.0" +parse-srcset@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1" + integrity sha1-8r0iH2zJcKk42IVWq8WJyqqiveE= + parse5@^3.0.1: version "3.0.3" resolved "https://registry.yarnpkg.com/parse5/-/parse5-3.0.3.tgz#042f792ffdd36851551cf4e9e066b3874ab45b5c" @@ -2296,6 +2331,15 @@ postcss@^7.0.2, postcss@^7.0.26: source-map "^0.6.1" supports-color "^6.1.0" +postcss@^8.0.2: + version "8.3.5" + resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.3.5.tgz#982216b113412bc20a86289e91eb994952a5b709" + integrity sha512-NxTuJocUhYGsMiMFHDUkmjSKT3EdH4/WbGF6GCi1NDGk+vbcUTun4fpbOqaPtD8IIsztA2ilZm2DhYCuyN58gA== + dependencies: + colorette "^1.2.2" + nanoid "^3.1.23" + source-map-js "^0.6.2" + postcss@^8.1.1: version "8.1.1" resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.1.1.tgz#c3a287dd10e4f6c84cb3791052b96a5d859c9389" @@ -2473,6 +2517,19 @@ safe-buffer@~5.2.0: resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519" integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg== +sanitize-html@^2.4.0: + version "2.4.0" + resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.4.0.tgz#8da7524332eb210d968971621b068b53f17ab5a3" + integrity sha512-Y1OgkUiTPMqwZNRLPERSEi39iOebn2XJLbeiGOBhaJD/yLqtLGu6GE5w7evx177LeGgSE+4p4e107LMiydOf6A== + dependencies: + deepmerge "^4.2.2" + escape-string-regexp "^4.0.0" + htmlparser2 "^6.0.0" + is-plain-object "^5.0.0" + klona "^2.0.3" + parse-srcset "^1.0.2" + postcss "^8.0.2" + semver@7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e" @@ -2555,6 +2612,11 @@ slice-ansi@^4.0.0: astral-regex "^2.0.0" is-fullwidth-code-point "^3.0.0" +source-map-js@^0.6.2: + version "0.6.2" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e" + integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug== + source-map@^0.5.0: version "0.5.7" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"