Move HTML parsing into platform.

This commit is contained in:
Danila Fedorin 2021-07-02 15:05:50 -07:00
parent 8f44cc21db
commit 005580f385
3 changed files with 79 additions and 29 deletions

View file

@ -0,0 +1,37 @@
export class HTMLParseResult {
constructor(bodyNode) {
this._bodyNode = bodyNode;
}
get rootNodes() {
return this._bodyNode.childNodes;
}
getChildNodes(node) {
return node.childNodes;
}
getAttributeNames(node) {
return node.getAttributeNames();
}
getAttributeValue(node, attr) {
return node.getAttribute(attr);
}
isTextNode(node) {
return node.nodeType === Node.TEXT_NODE;
}
getNodeText(node) {
return node.nodeValue;
}
isElementNode(node) {
return node.nodeType === Node.ELEMENT_NODE;
}
getNodeElementName(node) {
return node.tagName;
}
}

View file

@ -36,6 +36,7 @@ import {BlobHandle} from "./dom/BlobHandle.js";
import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js"; import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js";
import {downloadInIframe} from "./dom/download.js"; import {downloadInIframe} from "./dom/download.js";
import {Disposables} from "../../utils/Disposables.js"; import {Disposables} from "../../utils/Disposables.js";
import {HTMLParseResult} from "./HTMLParseResult.js";
function addScript(src) { function addScript(src) {
return new Promise(function (resolve, reject) { return new Promise(function (resolve, reject) {
@ -236,6 +237,11 @@ export class Platform {
return promise; return promise;
} }
parseHTML(html) {
const bodyNode = new DOMParser().parseFromString(html, "text/html").body;
return new HTMLParseResult(bodyNode);
}
async loadImage(blob) { async loadImage(blob) {
return ImageHandle.fromBlob(blob); return ImageHandle.fromBlob(blob);
} }

View file

@ -17,41 +17,49 @@ const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
* Return a builder function for a particular tag. * Return a builder function for a particular tag.
*/ */
function basicWrapper(tag) { function basicWrapper(tag) {
return (_, children) => new FormatPart(tag, children); return (result, node, children) => new FormatPart(tag, children);
} }
/** /**
* Return a builder function for a particular header level. * Return a builder function for a particular header level.
*/ */
function headerWrapper(level) { function headerWrapper(level) {
return (_, children) => new HeaderBlock(level, children); return (result, node, children) => new HeaderBlock(level, children);
} }
function parseLink(node, children) { function parseLink(result, node, children) {
return new LinkPart(node.href, children); // TODO Not equivalent to `node.href`!
// Add another HTMLParseResult method?
let href = result.getAttributeValue(node, "href");
return new LinkPart(href, children);
} }
function parseList(node) { function parseList(result, node) {
const start = node.getAttribute("start") || 1; // TODO Attribute's a string.
const start = result.getAttributeValue(node, "start") || 1;
const nodes = []; const nodes = [];
const len = node.childNodes.length; for (const child of result.getChildNodes(node)) {
for (let i = 0; i < len; i += 1) { if (result.getNodeElementName(child) !== "LI") {
const child = node.childNodes[i];
if (child.tagName !== "LI") {
continue; continue;
} }
nodes.push(parseNodes(child.childNodes)); const item = parseNodes(result, result.getChildNodes(child));
nodes.push(item);
} }
return new ListBlock(start, nodes); return new ListBlock(start, nodes);
} }
function parseCodeBlock(node) { function parseCodeBlock(result, node) {
let codeNode; let codeNode;
if (!((codeNode = node.firstChild) && codeNode.nodeName === "CODE")) { for (const child of result.getChildNodes(node)) {
codeNode = child;
break;
}
if (!(codeNode && result.getNodeElementName(codeNode) === "CODE")) {
return null; return null;
} }
let language = ""; let language = "";
for (const clname of codeNode.classList) { const cl = result.getAttributeValue(codeNode, "class") || ""
for (const clname of cl.split(" ")) {
if (clname.startsWith("language-") && !clname.startsWith("language-_")) { if (clname.startsWith("language-") && !clname.startsWith("language-_")) {
language = clname.substring(9) // "language-".length language = clname.substring(9) // "language-".length
break; break;
@ -60,7 +68,7 @@ function parseCodeBlock(node) {
return new CodeBlock(language, codeNode.textContent); return new CodeBlock(language, codeNode.textContent);
} }
function parseImage(node) { function parseImage(result, node) {
return null; return null;
} }
@ -98,25 +106,24 @@ function buildNodeMap() {
*/ */
const nodes = buildNodeMap(); const nodes = buildNodeMap();
function parseNode(node) { function parseNode(result, node) {
if (node.nodeType === Node.TEXT_NODE) { if (result.isTextNode(node)) {
return new TextPart(node.nodeValue); return new TextPart(result.getNodeText(node));
} else if (node.nodeType === Node.ELEMENT_NODE) { } else if (result.isElementNode(node)) {
const f = nodes[node.nodeName]; const f = nodes[result.getNodeElementName(node)];
if (!f) { if (!f) {
return null; return null;
} }
let result = f.parsefn(node, f.descend ? parseNodes(node.childNodes) : null); const children = f.descend ? parseNodes(result, node.childNodes) : null;
return result; return f.parsefn(result, node, children);
} }
return null; return null;
} }
function parseNodes(nodes) { function parseNodes(result, nodes) {
const len = nodes.length;
const parsed = []; const parsed = [];
for (let i = 0; i < len; i ++) { for (const htmlNode of nodes) {
let node = parseNode(nodes[i]); let node = parseNode(result, htmlNode);
// Just ignore invalid / unknown tags. // Just ignore invalid / unknown tags.
if (node) { if (node) {
parsed.push(node); parsed.push(node);
@ -125,7 +132,7 @@ function parseNodes(nodes) {
return parsed; return parsed;
} }
export function parse(html) { export function parse(platform, html) {
const rootNode = new DOMParser().parseFromString(html, "text/html").body; const parseResult = platform.parseHTML(html);
return parseNodes(rootNode.childNodes); return parseNodes(parseResult, parseResult.rootNodes);
} }