Move HTML parsing into platform.
This commit is contained in:
parent
8f44cc21db
commit
005580f385
3 changed files with 79 additions and 29 deletions
37
src/platform/web/HTMLParseResult.js
Normal file
37
src/platform/web/HTMLParseResult.js
Normal file
|
@ -0,0 +1,37 @@
|
|||
export class HTMLParseResult {
|
||||
constructor(bodyNode) {
|
||||
this._bodyNode = bodyNode;
|
||||
}
|
||||
|
||||
get rootNodes() {
|
||||
return this._bodyNode.childNodes;
|
||||
}
|
||||
|
||||
getChildNodes(node) {
|
||||
return node.childNodes;
|
||||
}
|
||||
|
||||
getAttributeNames(node) {
|
||||
return node.getAttributeNames();
|
||||
}
|
||||
|
||||
getAttributeValue(node, attr) {
|
||||
return node.getAttribute(attr);
|
||||
}
|
||||
|
||||
isTextNode(node) {
|
||||
return node.nodeType === Node.TEXT_NODE;
|
||||
}
|
||||
|
||||
getNodeText(node) {
|
||||
return node.nodeValue;
|
||||
}
|
||||
|
||||
isElementNode(node) {
|
||||
return node.nodeType === Node.ELEMENT_NODE;
|
||||
}
|
||||
|
||||
getNodeElementName(node) {
|
||||
return node.tagName;
|
||||
}
|
||||
}
|
|
@ -36,6 +36,7 @@ import {BlobHandle} from "./dom/BlobHandle.js";
|
|||
import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js";
|
||||
import {downloadInIframe} from "./dom/download.js";
|
||||
import {Disposables} from "../../utils/Disposables.js";
|
||||
import {HTMLParseResult} from "./HTMLParseResult.js";
|
||||
|
||||
function addScript(src) {
|
||||
return new Promise(function (resolve, reject) {
|
||||
|
@ -236,6 +237,11 @@ export class Platform {
|
|||
return promise;
|
||||
}
|
||||
|
||||
parseHTML(html) {
|
||||
const bodyNode = new DOMParser().parseFromString(html, "text/html").body;
|
||||
return new HTMLParseResult(bodyNode);
|
||||
}
|
||||
|
||||
async loadImage(blob) {
|
||||
return ImageHandle.fromBlob(blob);
|
||||
}
|
||||
|
|
|
@ -17,41 +17,49 @@ const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
|
|||
* Return a builder function for a particular tag.
|
||||
*/
|
||||
function basicWrapper(tag) {
|
||||
return (_, children) => new FormatPart(tag, children);
|
||||
return (result, node, children) => new FormatPart(tag, children);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a builder function for a particular header level.
|
||||
*/
|
||||
function headerWrapper(level) {
|
||||
return (_, children) => new HeaderBlock(level, children);
|
||||
return (result, node, children) => new HeaderBlock(level, children);
|
||||
}
|
||||
|
||||
function parseLink(node, children) {
|
||||
return new LinkPart(node.href, children);
|
||||
function parseLink(result, node, children) {
|
||||
// TODO Not equivalent to `node.href`!
|
||||
// Add another HTMLParseResult method?
|
||||
let href = result.getAttributeValue(node, "href");
|
||||
return new LinkPart(href, children);
|
||||
}
|
||||
|
||||
function parseList(node) {
|
||||
const start = node.getAttribute("start") || 1;
|
||||
function parseList(result, node) {
|
||||
// TODO Attribute's a string.
|
||||
const start = result.getAttributeValue(node, "start") || 1;
|
||||
const nodes = [];
|
||||
const len = node.childNodes.length;
|
||||
for (let i = 0; i < len; i += 1) {
|
||||
const child = node.childNodes[i];
|
||||
if (child.tagName !== "LI") {
|
||||
for (const child of result.getChildNodes(node)) {
|
||||
if (result.getNodeElementName(child) !== "LI") {
|
||||
continue;
|
||||
}
|
||||
nodes.push(parseNodes(child.childNodes));
|
||||
const item = parseNodes(result, result.getChildNodes(child));
|
||||
nodes.push(item);
|
||||
}
|
||||
return new ListBlock(start, nodes);
|
||||
}
|
||||
|
||||
function parseCodeBlock(node) {
|
||||
function parseCodeBlock(result, node) {
|
||||
let codeNode;
|
||||
if (!((codeNode = node.firstChild) && codeNode.nodeName === "CODE")) {
|
||||
for (const child of result.getChildNodes(node)) {
|
||||
codeNode = child;
|
||||
break;
|
||||
}
|
||||
if (!(codeNode && result.getNodeElementName(codeNode) === "CODE")) {
|
||||
return null;
|
||||
}
|
||||
let language = "";
|
||||
for (const clname of codeNode.classList) {
|
||||
const cl = result.getAttributeValue(codeNode, "class") || ""
|
||||
for (const clname of cl.split(" ")) {
|
||||
if (clname.startsWith("language-") && !clname.startsWith("language-_")) {
|
||||
language = clname.substring(9) // "language-".length
|
||||
break;
|
||||
|
@ -60,7 +68,7 @@ function parseCodeBlock(node) {
|
|||
return new CodeBlock(language, codeNode.textContent);
|
||||
}
|
||||
|
||||
function parseImage(node) {
|
||||
function parseImage(result, node) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -98,25 +106,24 @@ function buildNodeMap() {
|
|||
*/
|
||||
const nodes = buildNodeMap();
|
||||
|
||||
function parseNode(node) {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
return new TextPart(node.nodeValue);
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
const f = nodes[node.nodeName];
|
||||
function parseNode(result, node) {
|
||||
if (result.isTextNode(node)) {
|
||||
return new TextPart(result.getNodeText(node));
|
||||
} else if (result.isElementNode(node)) {
|
||||
const f = nodes[result.getNodeElementName(node)];
|
||||
if (!f) {
|
||||
return null;
|
||||
}
|
||||
let result = f.parsefn(node, f.descend ? parseNodes(node.childNodes) : null);
|
||||
return result;
|
||||
const children = f.descend ? parseNodes(result, node.childNodes) : null;
|
||||
return f.parsefn(result, node, children);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function parseNodes(nodes) {
|
||||
const len = nodes.length;
|
||||
function parseNodes(result, nodes) {
|
||||
const parsed = [];
|
||||
for (let i = 0; i < len; i ++) {
|
||||
let node = parseNode(nodes[i]);
|
||||
for (const htmlNode of nodes) {
|
||||
let node = parseNode(result, htmlNode);
|
||||
// Just ignore invalid / unknown tags.
|
||||
if (node) {
|
||||
parsed.push(node);
|
||||
|
@ -125,7 +132,7 @@ function parseNodes(nodes) {
|
|||
return parsed;
|
||||
}
|
||||
|
||||
export function parse(html) {
|
||||
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
|
||||
return parseNodes(rootNode.childNodes);
|
||||
export function parse(platform, html) {
|
||||
const parseResult = platform.parseHTML(html);
|
||||
return parseNodes(parseResult, parseResult.rootNodes);
|
||||
}
|
||||
|
|
Reference in a new issue