Move HTML parsing into platform.
This commit is contained in:
parent
8f44cc21db
commit
005580f385
3 changed files with 79 additions and 29 deletions
37
src/platform/web/HTMLParseResult.js
Normal file
37
src/platform/web/HTMLParseResult.js
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
export class HTMLParseResult {
|
||||||
|
constructor(bodyNode) {
|
||||||
|
this._bodyNode = bodyNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
get rootNodes() {
|
||||||
|
return this._bodyNode.childNodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
getChildNodes(node) {
|
||||||
|
return node.childNodes;
|
||||||
|
}
|
||||||
|
|
||||||
|
getAttributeNames(node) {
|
||||||
|
return node.getAttributeNames();
|
||||||
|
}
|
||||||
|
|
||||||
|
getAttributeValue(node, attr) {
|
||||||
|
return node.getAttribute(attr);
|
||||||
|
}
|
||||||
|
|
||||||
|
isTextNode(node) {
|
||||||
|
return node.nodeType === Node.TEXT_NODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
getNodeText(node) {
|
||||||
|
return node.nodeValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
isElementNode(node) {
|
||||||
|
return node.nodeType === Node.ELEMENT_NODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
getNodeElementName(node) {
|
||||||
|
return node.tagName;
|
||||||
|
}
|
||||||
|
}
|
|
@ -36,6 +36,7 @@ import {BlobHandle} from "./dom/BlobHandle.js";
|
||||||
import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js";
|
import {hasReadPixelPermission, ImageHandle, VideoHandle} from "./dom/ImageHandle.js";
|
||||||
import {downloadInIframe} from "./dom/download.js";
|
import {downloadInIframe} from "./dom/download.js";
|
||||||
import {Disposables} from "../../utils/Disposables.js";
|
import {Disposables} from "../../utils/Disposables.js";
|
||||||
|
import {HTMLParseResult} from "./HTMLParseResult.js";
|
||||||
|
|
||||||
function addScript(src) {
|
function addScript(src) {
|
||||||
return new Promise(function (resolve, reject) {
|
return new Promise(function (resolve, reject) {
|
||||||
|
@ -236,6 +237,11 @@ export class Platform {
|
||||||
return promise;
|
return promise;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parseHTML(html) {
|
||||||
|
const bodyNode = new DOMParser().parseFromString(html, "text/html").body;
|
||||||
|
return new HTMLParseResult(bodyNode);
|
||||||
|
}
|
||||||
|
|
||||||
async loadImage(blob) {
|
async loadImage(blob) {
|
||||||
return ImageHandle.fromBlob(blob);
|
return ImageHandle.fromBlob(blob);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,41 +17,49 @@ const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
|
||||||
* Return a builder function for a particular tag.
|
* Return a builder function for a particular tag.
|
||||||
*/
|
*/
|
||||||
function basicWrapper(tag) {
|
function basicWrapper(tag) {
|
||||||
return (_, children) => new FormatPart(tag, children);
|
return (result, node, children) => new FormatPart(tag, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return a builder function for a particular header level.
|
* Return a builder function for a particular header level.
|
||||||
*/
|
*/
|
||||||
function headerWrapper(level) {
|
function headerWrapper(level) {
|
||||||
return (_, children) => new HeaderBlock(level, children);
|
return (result, node, children) => new HeaderBlock(level, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseLink(node, children) {
|
function parseLink(result, node, children) {
|
||||||
return new LinkPart(node.href, children);
|
// TODO Not equivalent to `node.href`!
|
||||||
|
// Add another HTMLParseResult method?
|
||||||
|
let href = result.getAttributeValue(node, "href");
|
||||||
|
return new LinkPart(href, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseList(node) {
|
function parseList(result, node) {
|
||||||
const start = node.getAttribute("start") || 1;
|
// TODO Attribute's a string.
|
||||||
|
const start = result.getAttributeValue(node, "start") || 1;
|
||||||
const nodes = [];
|
const nodes = [];
|
||||||
const len = node.childNodes.length;
|
for (const child of result.getChildNodes(node)) {
|
||||||
for (let i = 0; i < len; i += 1) {
|
if (result.getNodeElementName(child) !== "LI") {
|
||||||
const child = node.childNodes[i];
|
|
||||||
if (child.tagName !== "LI") {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
nodes.push(parseNodes(child.childNodes));
|
const item = parseNodes(result, result.getChildNodes(child));
|
||||||
|
nodes.push(item);
|
||||||
}
|
}
|
||||||
return new ListBlock(start, nodes);
|
return new ListBlock(start, nodes);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseCodeBlock(node) {
|
function parseCodeBlock(result, node) {
|
||||||
let codeNode;
|
let codeNode;
|
||||||
if (!((codeNode = node.firstChild) && codeNode.nodeName === "CODE")) {
|
for (const child of result.getChildNodes(node)) {
|
||||||
|
codeNode = child;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!(codeNode && result.getNodeElementName(codeNode) === "CODE")) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
let language = "";
|
let language = "";
|
||||||
for (const clname of codeNode.classList) {
|
const cl = result.getAttributeValue(codeNode, "class") || ""
|
||||||
|
for (const clname of cl.split(" ")) {
|
||||||
if (clname.startsWith("language-") && !clname.startsWith("language-_")) {
|
if (clname.startsWith("language-") && !clname.startsWith("language-_")) {
|
||||||
language = clname.substring(9) // "language-".length
|
language = clname.substring(9) // "language-".length
|
||||||
break;
|
break;
|
||||||
|
@ -60,7 +68,7 @@ function parseCodeBlock(node) {
|
||||||
return new CodeBlock(language, codeNode.textContent);
|
return new CodeBlock(language, codeNode.textContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseImage(node) {
|
function parseImage(result, node) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,25 +106,24 @@ function buildNodeMap() {
|
||||||
*/
|
*/
|
||||||
const nodes = buildNodeMap();
|
const nodes = buildNodeMap();
|
||||||
|
|
||||||
function parseNode(node) {
|
function parseNode(result, node) {
|
||||||
if (node.nodeType === Node.TEXT_NODE) {
|
if (result.isTextNode(node)) {
|
||||||
return new TextPart(node.nodeValue);
|
return new TextPart(result.getNodeText(node));
|
||||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
} else if (result.isElementNode(node)) {
|
||||||
const f = nodes[node.nodeName];
|
const f = nodes[result.getNodeElementName(node)];
|
||||||
if (!f) {
|
if (!f) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
let result = f.parsefn(node, f.descend ? parseNodes(node.childNodes) : null);
|
const children = f.descend ? parseNodes(result, node.childNodes) : null;
|
||||||
return result;
|
return f.parsefn(result, node, children);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
function parseNodes(nodes) {
|
function parseNodes(result, nodes) {
|
||||||
const len = nodes.length;
|
|
||||||
const parsed = [];
|
const parsed = [];
|
||||||
for (let i = 0; i < len; i ++) {
|
for (const htmlNode of nodes) {
|
||||||
let node = parseNode(nodes[i]);
|
let node = parseNode(result, htmlNode);
|
||||||
// Just ignore invalid / unknown tags.
|
// Just ignore invalid / unknown tags.
|
||||||
if (node) {
|
if (node) {
|
||||||
parsed.push(node);
|
parsed.push(node);
|
||||||
|
@ -125,7 +132,7 @@ function parseNodes(nodes) {
|
||||||
return parsed;
|
return parsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parse(html) {
|
export function parse(platform, html) {
|
||||||
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
|
const parseResult = platform.parseHTML(html);
|
||||||
return parseNodes(rootNode.childNodes);
|
return parseNodes(parseResult, parseResult.rootNodes);
|
||||||
}
|
}
|
||||||
|
|
Reference in a new issue