Rewrite deserialize as a class to avoid passing options around.

This commit is contained in:
Danila Fedorin 2021-07-12 12:19:43 -07:00
parent abd62373e5
commit 9304ca330e

View file

@ -13,29 +13,21 @@ import sanitizeHtml from "../../../../../lib/sanitize-html/index.js"
*/ */
const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ] const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
/** class Deserializer {
* Return a builder function for a particular tag. constructor(result, mediaRepository) {
*/ this.result = result;
function basicWrapper(tag) { this.mediaRepository = mediaRepository;
return (_, node, children) => new FormatPart(tag, children);
} }
/** parseLink(node, children) {
* Return a builder function for a particular header level.
*/
function headerWrapper(level) {
return (_, node, children) => new HeaderBlock(level, children);
}
function parseLink(options, node, children) {
// TODO Not equivalent to `node.href`! // TODO Not equivalent to `node.href`!
// Add another HTMLParseResult method? // Add another HTMLParseResult method?
let href = options.result.getAttributeValue(node, "href"); let href = this.result.getAttributeValue(node, "href");
return new LinkPart(href, children); return new LinkPart(href, children);
} }
function parseList(options, node) { parseList(node) {
const { result } = options; const result = this.result;
let start = null; let start = null;
if (result.getNodeElementName(node) === "OL") { if (result.getNodeElementName(node) === "OL") {
// Will return 1 for, say, '1A', which may not be intended? // Will return 1 for, say, '1A', which may not be intended?
@ -46,14 +38,14 @@ function parseList(options, node) {
if (result.getNodeElementName(child) !== "LI") { if (result.getNodeElementName(child) !== "LI") {
continue; continue;
} }
const item = parseNodes(options, result.getChildNodes(child)); const item = this.parseNodes(result.getChildNodes(child));
nodes.push(item); nodes.push(item);
} }
return new ListBlock(start, nodes); return new ListBlock(start, nodes);
} }
function parseCodeBlock(options, node) { parseCodeBlock(node) {
const { result } = options; const result = this.result;
let codeNode; let codeNode;
for (const child of result.getChildNodes(node)) { for (const child of result.getChildNodes(node)) {
codeNode = child; codeNode = child;
@ -73,10 +65,10 @@ function parseCodeBlock(options, node) {
return new CodeBlock(language, codeNode.textContent); return new CodeBlock(language, codeNode.textContent);
} }
function parseImage(options, node) { parseImage(node) {
const { result, mediaRepository } = options; const result = this.result;
const src = result.getAttributeValue(node, "src") || ""; const src = result.getAttributeValue(node, "src") || "";
const url = mediaRepository.mxcUrl(src); const url = this.mediaRepository.mxcUrl(src);
// We just ignore non-mxc `src` attributes. // We just ignore non-mxc `src` attributes.
if (!url) { if (!url) {
return null; return null;
@ -88,59 +80,63 @@ function parseImage(options, node) {
return new ImagePart(url, { width, height, alt, title }); return new ImagePart(url, { width, height, alt, title });
} }
function buildNodeMap() { parseElement(node) {
let map = { const result = this.result;
A: { descend: true, parsefn: parseLink }, const tag = result.getNodeElementName(node);
UL: { descend: false, parsefn: parseList }, switch (tag) {
OL: { descend: false, parsefn: parseList }, case "H1":
PRE: { descend: false, parsefn: parseCodeBlock }, case "H2":
BR: { descend: false, parsefn: () => new NewLinePart() }, case "H3":
HR: { descend: false, parsefn: () => new RulePart() }, case "H4":
IMG: { descend: false, parsefn: parseImage } case "H5":
case "H6": {
const children = this.parseChildNodes(node);
return new HeaderBlock(parseInt(tag[1]), children)
} }
for (const tag of basicNodes) { case "A": {
map[tag] = { descend: true, parsefn: basicWrapper(tag) } const children = this.parseChildNodes(node);
return this.parseLink(node, children);
}
case "UL":
case "OL":
return this.parseList(node);
case "PRE":
return this.parseCodeBlock(node);
case "BR":
return new NewLinePart();
case "HR":
return new RulePart();
case "IMG":
return this.parseImage(node);
default: {
if (!basicNodes.includes(tag)) {
return null;
}
const children = this.parseChildNodes(node);
return new FormatPart(tag, children);
} }
for (let level = 1; level <= 6; level++) {
const tag = "h" + level;
map[tag] = { descend: true, parsefn: headerWrapper(level) }
} }
return map;
} }
/** parseNode(node) {
* Handlers for various nodes. const result = this.result;
*
* Each handler has two properties: `descend` and `parsefn`.
* If `descend` is true, the node's children should be
* parsed just like any other node, and fed as a second argument
* to `parsefn`. If not, the node's children are either to be ignored
* (as in <pre>) or processed specially (as in <ul>).
*
* The `parsefn` combines a node's data and its children into
* an internal representation node.
*/
const nodes = buildNodeMap();
function parseNode(options, node) {
const { result } = options;
if (result.isTextNode(node)) { if (result.isTextNode(node)) {
return new TextPart(result.getNodeText(node)); return new TextPart(result.getNodeText(node));
} else if (result.isElementNode(node)) { } else if (result.isElementNode(node)) {
const f = nodes[result.getNodeElementName(node)]; return this.parseElement(node);
if (!f) {
return null;
}
const children = f.descend ? parseNodes(options, node.childNodes) : null;
return f.parsefn(options, node, children);
} }
return null; return null;
} }
function parseNodes(options, nodes) { parseChildNodes(node) {
const childNodes = this.result.getChildNodes(node);
return this.parseNodes(childNodes);
}
parseNodes(nodes) {
const parsed = []; const parsed = [];
for (const htmlNode of nodes) { for (const htmlNode of nodes) {
let node = parseNode(options, htmlNode); let node = this.parseNode(htmlNode);
// Just ignore invalid / unknown tags. // Just ignore invalid / unknown tags.
if (node) { if (node) {
parsed.push(node); parsed.push(node);
@ -148,6 +144,7 @@ function parseNodes(options, nodes) {
} }
return parsed; return parsed;
} }
}
const sanitizeConfig = { const sanitizeConfig = {
allowedTags: [ allowedTags: [
@ -170,8 +167,8 @@ const sanitizeConfig = {
export function parseHTMLBody({ mediaRepository, platform }, html) { export function parseHTMLBody({ mediaRepository, platform }, html) {
const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig)); const parseResult = platform.parseHTML(sanitizeHtml(html, sanitizeConfig));
const options = { result: parseResult, mediaRepository }; const deserializer = new Deserializer(parseResult, mediaRepository);
const parts = parseNodes(options, parseResult.rootNodes); const parts = deserializer.parseNodes(parseResult.rootNodes);
return new MessageBody(html, parts); return new MessageBody(html, parts);
} }