From 4022c6121b6548eec63da2937b6069f6e2a72fb4 Mon Sep 17 00:00:00 2001 From: Danila Fedorin Date: Mon, 12 Jul 2021 14:46:42 -0700 Subject: [PATCH] Switch to DOMPurify in platform layer for sanitization. --- package.json | 2 +- scripts/post-install.js | 10 +-- .../session/room/timeline/deserialize.js | 22 +----- src/platform/web/Platform.js | 5 +- .../web/{HTMLParseResult.js => parsehtml.js} | 16 ++++- yarn.lock | 71 ++----------------- 6 files changed, 31 insertions(+), 95 deletions(-) rename src/platform/web/{HTMLParseResult.js => parsehtml.js} (55%) diff --git a/package.json b/package.json index 640e1e08..5acbbe77 100644 --- a/package.json +++ b/package.json @@ -59,8 +59,8 @@ "another-json": "^0.2.0", "base64-arraybuffer": "^0.2.0", "bs58": "^4.0.1", + "dompurify": "^2.3.0", "es6-promise": "https://github.com/bwindels/es6-promise.git#bwindels/expose-flush", - "sanitize-html": "^2.4.0", "text-encoding": "^0.7.0" } } diff --git a/scripts/post-install.js b/scripts/post-install.js index 679ae5d6..fe8743b6 100644 --- a/scripts/post-install.js +++ b/scripts/post-install.js @@ -83,11 +83,11 @@ async function populateLib() { require.resolve('node-html-parser/dist/index.js'), path.join(libDir, "node-html-parser/index.js") ); - // transpile sanitize-html to esm - await fs.mkdir(path.join(libDir, "sanitize-html/")); - await commonjsToESM( - require.resolve('sanitize-html/index.js'), - path.join(libDir, "sanitize-html/index.js") + // Symlink dompurify + await fs.mkdir(path.join(libDir, "dompurify/")); + await fs.symlink( + require.resolve('dompurify/dist/purify.es.js'), + path.join(libDir, "dompurify/index.js") ); // transpile another-json to esm await fs.mkdir(path.join(libDir, "another-json/")); diff --git a/src/domain/session/room/timeline/deserialize.js b/src/domain/session/room/timeline/deserialize.js index 480117eb..bbe72d14 100644 --- a/src/domain/session/room/timeline/deserialize.js +++ b/src/domain/session/room/timeline/deserialize.js @@ -1,5 +1,4 @@ import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart, RulePart, TextPart, LinkPart, ImagePart } from "./MessageBody.js" -import sanitizeHtml from "../../../../../lib/sanitize-html/index.js" /* At the time of writing (Jul 1 2021), Matrix Spec recommends * allowing the following HTML tags: @@ -146,27 +145,8 @@ class Deserializer { } } -const sanitizeConfig = { - allowedTags: [ - "font", "del", "h1", "h2", "h3", "h4", "h5", "h6", - "blockquote", "p", "a", "ul", "ol", "sup", "sub", "li", - "b", "i", "u", "strong", "em", "strike", "code", "hr", - "br", "div", "table", "thead", "tbody", "tr", "th", "td", - "caption", "pre", "span", "img" - ], - allowedAttributes: { - "font": ["data-mx-bg-color", "data-mx-color"], - "span": ["data-mx-bg-color", "data-mx-color"], - "a": ["name", "target", "href"], - "img": ["width", "height", "alt", "title", "src"], - "ol": ["start"], - "code": ["class"] - }, - allowedSchemes: [ "http", "https", "ftp", "mailto", "tel", "mxc" ] -}; - export function parseHTMLBody(platform, mediaRepository, html) { - const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig)); + const parseResult = platform.parseHTML(html); const deserializer = new Deserializer(parseResult, mediaRepository); const parts = deserializer.parseNodes(parseResult.rootNodes); return new MessageBody(html, parts); diff --git a/src/platform/web/Platform.js b/src/platform/web/Platform.js index cd31eec6..224d67b6 100644 --- a/src/platform/web/Platform.js +++ b/src/platform/web/Platform.js @@ -36,7 +36,7 @@ import {BlobHandle} from "./dom/BlobHandle.js"; import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js"; import {downloadInIframe} from "./dom/download.js"; import {Disposables} from "../../utils/Disposables.js"; -import {HTMLParseResult} from "./HTMLParseResult.js"; +import {parseHTML} from "./parsehtml.js"; function addScript(src) { return new Promise(function (resolve, reject) { @@ -238,8 +238,7 @@ export class Platform { } parseHTML(html) { - const bodyNode = new DOMParser().parseFromString(html, "text/html").body; - return new HTMLParseResult(bodyNode); + return parseHTML(html); } async loadImage(blob) { diff --git a/src/platform/web/HTMLParseResult.js b/src/platform/web/parsehtml.js similarity index 55% rename from src/platform/web/HTMLParseResult.js rename to src/platform/web/parsehtml.js index 72a08ab7..0efd7d4f 100644 --- a/src/platform/web/HTMLParseResult.js +++ b/src/platform/web/parsehtml.js @@ -1,4 +1,6 @@ -export class HTMLParseResult { +import DOMPurify from "../../../../../lib/dompurify/index.js" + +class HTMLParseResult { constructor(bodyNode) { this._bodyNode = bodyNode; } @@ -35,3 +37,15 @@ export class HTMLParseResult { return node.tagName; } } + +const sanitizeConfig = { + ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|xxx|mxc):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i, +} + +export function parseHTML(html) { + // If DOMPurify uses DOMParser, can't we just get the built tree from it + // instead of re-parsing? + const sanitized = DOMPurify.sanitize(html, sanitizeConfig); + const bodyNode = new DOMParser().parseFromString(sanitized, "text/html").body; + return new HTMLParseResult(bodyNode); +} diff --git a/yarn.lock b/yarn.lock index 826ee584..843a19bc 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1389,13 +1389,18 @@ domhandler@^2.3.0: dependencies: domelementtype "1" -domhandler@^4.0.0, domhandler@^4.2.0: +domhandler@^4.2.0: version "4.2.0" resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-4.2.0.tgz#f9768a5f034be60a89a27c2e4d0f74eba0d8b059" integrity sha512-zk7sgt970kzPks2Bf+dwT/PLzghLnsivb9CcxkvR8Mzr66Olr0Ofd8neSbglHJHaHa2MadfoSdNlKYAaafmWfA== dependencies: domelementtype "^2.2.0" +dompurify@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.0.tgz#07bb39515e491588e5756b1d3e8375b5964814e2" + integrity sha512-VV5C6Kr53YVHGOBKO/F86OYX6/iLTw2yVSI721gKetxpHCK/V5TaLEf9ODjRgl1KLSWRMY6cUhAbv/c+IUnwQw== + domutils@1.5.1: version "1.5.1" resolved "https://registry.yarnpkg.com/domutils/-/domutils-1.5.1.tgz#dcd8488a26f563d61079e48c9f7b7e32373682cf" @@ -1412,7 +1417,7 @@ domutils@^1.5.1: dom-serializer "0" domelementtype "1" -domutils@^2.5.2, domutils@^2.6.0: +domutils@^2.6.0: version "2.7.0" resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.7.0.tgz#8ebaf0c41ebafcf55b0b72ec31c56323712c5442" integrity sha512-8eaHa17IwJUPAiB+SoTYBo5mCdeMgdcAoXJ59m6DT1vw+5iLS3gNoqYaRowaBKtGVrOF1Jz4yDTgYKLK2kvfJg== @@ -1487,11 +1492,6 @@ escape-string-regexp@^1.0.3, escape-string-regexp@^1.0.5: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz#1b61c0562190a8dff6ae3bb2cf0200ca130b86d4" integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ= -escape-string-regexp@^4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" - integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== - eslint-scope@^5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" @@ -1801,16 +1801,6 @@ htmlparser2@^3.9.1: inherits "^2.0.1" readable-stream "^3.1.1" -htmlparser2@^6.0.0: - version "6.1.0" - resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-6.1.0.tgz#c4d762b6c3371a05dbe65e94ae43a9f845fb8fb7" - integrity sha512-gyyPk6rgonLFEDGoeRgQNaEUvdJ4ktTmmUh/h2t7s+M8oPpIPxgNACWa+6ESR57kXstwqPiCut0V8NRpcwgU7A== - dependencies: - domelementtype "^2.0.1" - domhandler "^4.0.0" - domutils "^2.5.2" - entities "^2.0.0" - http-errors@~1.7.2: version "1.7.3" resolved "https://registry.yarnpkg.com/http-errors/-/http-errors-1.7.3.tgz#6c619e4f9c60308c38519498c14fbb10aacebb06" @@ -1890,11 +1880,6 @@ is-module@^1.0.0: resolved "https://registry.yarnpkg.com/is-module/-/is-module-1.0.0.tgz#3258fb69f78c14d5b815d664336b4cffb6441591" integrity sha1-Mlj7afeMFNW4FdZkM2tM/7ZEFZE= -is-plain-object@^5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344" - integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q== - is-reference@^1.2.1: version "1.2.1" resolved "https://registry.yarnpkg.com/is-reference/-/is-reference-1.2.1.tgz#8b2dac0b371f4bc994fdeaba9eb542d03002d0b7" @@ -1973,11 +1958,6 @@ json5@^2.1.2: dependencies: minimist "^1.2.5" -klona@^2.0.3: - version "2.0.4" - resolved "https://registry.yarnpkg.com/klona/-/klona-2.0.4.tgz#7bb1e3affb0cb8624547ef7e8f6708ea2e39dfc0" - integrity sha512-ZRbnvdg/NxqzC7L9Uyqzf4psi1OM4Cuc+sJAkQPjO6XkQIJTNbfK2Rsmbw8fx1p2mkZdp2FZYo2+LwXYY/uwIA== - leven@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/leven/-/leven-3.1.0.tgz#77891de834064cccba82ae7842bb6b14a13ed7f2" @@ -2114,11 +2094,6 @@ nanoid@^3.1.12: resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.12.tgz#6f7736c62e8d39421601e4a0c77623a97ea69654" integrity sha512-1qstj9z5+x491jfiC4Nelk+f8XBad7LN20PmyWINJEMRSf3wcAjAWysw1qaA8z6NSKe2sjq1hRSDpBH5paCb6A== -nanoid@^3.1.23: - version "3.1.23" - resolved "https://registry.yarnpkg.com/nanoid/-/nanoid-3.1.23.tgz#f744086ce7c2bc47ee0a8472574d5c78e4183a81" - integrity sha512-FiB0kzdP0FFVGDKlRLEQ1BgDzU87dy5NnzjeW9YZNt+/c3+q82EQDUwniSAUxp/F0gFNI1ZhKU1FqYsMuqZVnw== - natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" @@ -2209,11 +2184,6 @@ parent-module@^1.0.0: dependencies: callsites "^3.0.0" -parse-srcset@^1.0.2: - version "1.0.2" - resolved "https://registry.yarnpkg.com/parse-srcset/-/parse-srcset-1.0.2.tgz#f2bd221f6cc970a938d88556abc589caaaa2bde1" - integrity sha1-8r0iH2zJcKk42IVWq8WJyqqiveE= - parse5@^3.0.1: version "3.0.3" resolved "https://registry.yarnpkg.com/parse5/-/parse5-3.0.3.tgz#042f792ffdd36851551cf4e9e066b3874ab45b5c" @@ -2331,15 +2301,6 @@ postcss@^7.0.2, postcss@^7.0.26: source-map "^0.6.1" supports-color "^6.1.0" -postcss@^8.0.2: - version "8.3.5" - resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.3.5.tgz#982216b113412bc20a86289e91eb994952a5b709" - integrity sha512-NxTuJocUhYGsMiMFHDUkmjSKT3EdH4/WbGF6GCi1NDGk+vbcUTun4fpbOqaPtD8IIsztA2ilZm2DhYCuyN58gA== - dependencies: - colorette "^1.2.2" - nanoid "^3.1.23" - source-map-js "^0.6.2" - postcss@^8.1.1: version "8.1.1" resolved "https://registry.yarnpkg.com/postcss/-/postcss-8.1.1.tgz#c3a287dd10e4f6c84cb3791052b96a5d859c9389" @@ -2517,19 +2478,6 @@ safe-buffer@~5.2.0: resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.0.tgz#b74daec49b1148f88c64b68d49b1e815c1f2f519" integrity sha512-fZEwUGbVl7kouZs1jCdMLdt95hdIv0ZeHg6L7qPeciMZhZ+/gdesW4wgTARkrFWEpspjEATAzUGPG8N2jJiwbg== -sanitize-html@^2.4.0: - version "2.4.0" - resolved "https://registry.yarnpkg.com/sanitize-html/-/sanitize-html-2.4.0.tgz#8da7524332eb210d968971621b068b53f17ab5a3" - integrity sha512-Y1OgkUiTPMqwZNRLPERSEi39iOebn2XJLbeiGOBhaJD/yLqtLGu6GE5w7evx177LeGgSE+4p4e107LMiydOf6A== - dependencies: - deepmerge "^4.2.2" - escape-string-regexp "^4.0.0" - htmlparser2 "^6.0.0" - is-plain-object "^5.0.0" - klona "^2.0.3" - parse-srcset "^1.0.2" - postcss "^8.0.2" - semver@7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/semver/-/semver-7.0.0.tgz#5f3ca35761e47e05b206c6daff2cf814f0316b8e" @@ -2612,11 +2560,6 @@ slice-ansi@^4.0.0: astral-regex "^2.0.0" is-fullwidth-code-point "^3.0.0" -source-map-js@^0.6.2: - version "0.6.2" - resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-0.6.2.tgz#0bb5de631b41cfbda6cfba8bd05a80efdfd2385e" - integrity sha512-/3GptzWzu0+0MBQFrDKzw/DvvMTUORvgY6k6jd/VS6iCR4RDTKWH6v6WPwQoUO8667uQEf9Oe38DxAYWY5F/Ug== - source-map@^0.5.0: version "0.5.7" resolved "https://registry.yarnpkg.com/source-map/-/source-map-0.5.7.tgz#8a039d2d1021d22d1ea14c80d8ea468ba2ef3fcc"