forked from mystiq/hydrogen-web
Do some additional validation, blocking block nodes inside inline nodes.
This commit is contained in:
parent
4b92903ddd
commit
5e39eb8f6c
1 changed files with 119 additions and 30 deletions
|
@ -10,7 +10,8 @@ import { MessageBody, HeaderBlock, ListBlock, CodeBlock, FormatPart, NewLinePart
|
||||||
* Nodes that don't have any properties to them other than their tag.
|
* Nodes that don't have any properties to them other than their tag.
|
||||||
* While <a> has `href`, and <img> has `src`, these have... themselves.
|
* While <a> has `href`, and <img> has `src`, these have... themselves.
|
||||||
*/
|
*/
|
||||||
const basicNodes = ["EM", "STRONG", "CODE", "DEL", "P", "DIV", "SPAN" ]
|
const basicInline = ["EM", "STRONG", "CODE", "DEL", "SPAN" ];
|
||||||
|
const basicBlock = ["DIV"];
|
||||||
|
|
||||||
class Deserializer {
|
class Deserializer {
|
||||||
constructor(result, mediaRepository) {
|
constructor(result, mediaRepository) {
|
||||||
|
@ -21,7 +22,7 @@ class Deserializer {
|
||||||
parseLink(node, children) {
|
parseLink(node, children) {
|
||||||
// TODO Not equivalent to `node.href`!
|
// TODO Not equivalent to `node.href`!
|
||||||
// Add another HTMLParseResult method?
|
// Add another HTMLParseResult method?
|
||||||
let href = this.result.getAttributeValue(node, "href");
|
const href = this.result.getAttributeValue(node, "href");
|
||||||
return new LinkPart(href, children);
|
return new LinkPart(href, children);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +38,7 @@ class Deserializer {
|
||||||
if (result.getNodeElementName(child) !== "LI") {
|
if (result.getNodeElementName(child) !== "LI") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const item = this.parseNodes(result.getChildNodes(child));
|
const item = this.parseAnyNodes(result.getChildNodes(child));
|
||||||
nodes.push(item);
|
nodes.push(item);
|
||||||
}
|
}
|
||||||
return new ListBlock(start, nodes);
|
return new ListBlock(start, nodes);
|
||||||
|
@ -79,9 +80,59 @@ class Deserializer {
|
||||||
return new ImagePart(url, width, height, alt, title);
|
return new ImagePart(url, width, height, alt, title);
|
||||||
}
|
}
|
||||||
|
|
||||||
parseElement(node) {
|
/** Once a node is known to be an element,
|
||||||
|
* attempt to interpret it as an inline element.
|
||||||
|
*
|
||||||
|
* @returns the inline message part, or null if the element
|
||||||
|
* is not inline or not allowed.
|
||||||
|
*/
|
||||||
|
parseInlineElement(node) {
|
||||||
const result = this.result;
|
const result = this.result;
|
||||||
const tag = result.getNodeElementName(node);
|
const tag = result.getNodeElementName(node);
|
||||||
|
const children = result.getChildNodes(node);
|
||||||
|
switch (tag) {
|
||||||
|
case "A": {
|
||||||
|
const inlines = this.parseInlineNodes(children);
|
||||||
|
return this.parseLink(node, inlines);
|
||||||
|
}
|
||||||
|
case "BR":
|
||||||
|
return new NewLinePart();
|
||||||
|
default: {
|
||||||
|
if (!basicInline.includes(tag)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const inlines = this.parseInlineNodes(children);
|
||||||
|
return new FormatPart(tag, inlines);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Attempt to interpret a node as inline.
|
||||||
|
*
|
||||||
|
* @returns the inline message part, or null if the
|
||||||
|
* element is not inline or not allowed.
|
||||||
|
*/
|
||||||
|
parseInlineNode(node) {
|
||||||
|
const result = this.result;
|
||||||
|
if (result.isTextNode(node)) {
|
||||||
|
// TODO Linkify
|
||||||
|
return new TextPart(result.getNodeText(node));
|
||||||
|
} else if (result.isElementNode(node)) {
|
||||||
|
return this.parseInlineElement(node);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Once a node is known to be an element, attempt
|
||||||
|
* to interpret it as a block element.
|
||||||
|
*
|
||||||
|
* @returns the block message part, or null of the
|
||||||
|
* element is not a block or not allowed.
|
||||||
|
*/
|
||||||
|
parseBlockElement(node) {
|
||||||
|
const result = this.result;
|
||||||
|
const tag = result.getNodeElementName(node);
|
||||||
|
const children = result.getChildNodes(node);
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case "H1":
|
case "H1":
|
||||||
case "H2":
|
case "H2":
|
||||||
|
@ -89,66 +140,87 @@ class Deserializer {
|
||||||
case "H4":
|
case "H4":
|
||||||
case "H5":
|
case "H5":
|
||||||
case "H6": {
|
case "H6": {
|
||||||
const children = this.parseChildNodes(node);
|
const inlines = this.parseInlineNodes(children);
|
||||||
return new HeaderBlock(parseInt(tag[1]), children)
|
return new HeaderBlock(parseInt(tag[1]), inlines)
|
||||||
}
|
|
||||||
case "A": {
|
|
||||||
const children = this.parseChildNodes(node);
|
|
||||||
return this.parseLink(node, children);
|
|
||||||
}
|
}
|
||||||
case "UL":
|
case "UL":
|
||||||
case "OL":
|
case "OL":
|
||||||
return this.parseList(node);
|
return this.parseList(node);
|
||||||
case "PRE":
|
case "PRE":
|
||||||
return this.parseCodeBlock(node);
|
return this.parseCodeBlock(node);
|
||||||
case "BR":
|
|
||||||
return new NewLinePart();
|
|
||||||
case "HR":
|
case "HR":
|
||||||
return new RulePart();
|
return new RulePart();
|
||||||
case "IMG":
|
case "IMG":
|
||||||
return this.parseImage(node);
|
return this.parseImage(node);
|
||||||
|
case "P": {
|
||||||
|
const inlines = this.parseInlineNodes(children);
|
||||||
|
return new FormatPart(tag, inlines);
|
||||||
|
}
|
||||||
default: {
|
default: {
|
||||||
if (!basicNodes.includes(tag)) {
|
if (!basicBlock.includes(tag)) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
const children = this.parseChildNodes(node);
|
const blocks = this.parseAnyNodes(children);
|
||||||
return new FormatPart(tag, children);
|
return new FormatPart(tag, blocks);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parseNode(node) {
|
/** Attempt to parse a node as a block.
|
||||||
|
*
|
||||||
|
* @return the block message part, or null if the node
|
||||||
|
* is not a block element.
|
||||||
|
*/
|
||||||
|
parseBlockNode(node) {
|
||||||
const result = this.result;
|
const result = this.result;
|
||||||
if (result.isTextNode(node)) {
|
if (result.isElementNode(node)) {
|
||||||
return new TextPart(result.getNodeText(node));
|
return this.parseBlockElement(node);
|
||||||
} else if (result.isElementNode(node)) {
|
|
||||||
return this.parseElement(node);
|
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
parseChildNodes(node) {
|
_parseInlineNodes(nodes, into) {
|
||||||
const childNodes = this.result.getChildNodes(node);
|
for (const htmlNode of nodes) {
|
||||||
return this.parseNodes(childNodes);
|
const node = this.parseInlineNode(htmlNode);
|
||||||
|
if (node) {
|
||||||
|
into.push(node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Node is either block or unrecognized. In
|
||||||
|
// both cases, just move on to its children.
|
||||||
|
this._parseInlineNodes(this.result.getChildNodes(htmlNode), into);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
parseNodes(nodes) {
|
parseInlineNodes(nodes) {
|
||||||
const parsed = [];
|
const into = [];
|
||||||
|
this._parseInlineNodes(nodes, into);
|
||||||
|
return into;
|
||||||
|
}
|
||||||
|
|
||||||
|
_parseAnyNodes(nodes, into) {
|
||||||
for (const htmlNode of nodes) {
|
for (const htmlNode of nodes) {
|
||||||
let node = this.parseNode(htmlNode);
|
const node = this.parseInlineNode(htmlNode) || this.parseBlockNode(htmlNode);
|
||||||
// Just ignore invalid / unknown tags.
|
|
||||||
if (node) {
|
if (node) {
|
||||||
parsed.push(node);
|
into.push(node);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Node is unrecognized. Just move on to its children.
|
||||||
|
this._parseAnyNodes(this.result.getChildNodes(htmlNode), into);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return parsed;
|
|
||||||
|
parseAnyNodes(nodes) {
|
||||||
|
const into = [];
|
||||||
|
this._parseAnyNodes(nodes, into);
|
||||||
|
return into;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function parseHTMLBody(platform, mediaRepository, html) {
|
export function parseHTMLBody(platform, mediaRepository, html) {
|
||||||
const parseResult = platform.parseHTML(html);
|
const parseResult = platform.parseHTML(html);
|
||||||
const deserializer = new Deserializer(parseResult, mediaRepository);
|
const deserializer = new Deserializer(parseResult, mediaRepository);
|
||||||
const parts = deserializer.parseNodes(parseResult.rootNodes);
|
const parts = deserializer.parseAnyNodes(parseResult.rootNodes);
|
||||||
return new MessageBody(html, parts);
|
return new MessageBody(html, parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,6 +328,23 @@ export function tests() {
|
||||||
];
|
];
|
||||||
test(assert, input, output);
|
test(assert, input, output);
|
||||||
},
|
},
|
||||||
|
"Block elements ignored inside inline elements": assert => {
|
||||||
|
const input = '<span><p><code>Hello</code></p></span>';
|
||||||
|
const output = [
|
||||||
|
new FormatPart("span", [new FormatPart("code", [new TextPart("Hello")])])
|
||||||
|
];
|
||||||
|
test(assert, input, output);
|
||||||
|
},
|
||||||
|
"Unknown tags are ignored, but their children are kept": assert => {
|
||||||
|
const input = '<span><dfn><code>Hello</code></dfn><footer><em>World</em></footer></span>';
|
||||||
|
const output = [
|
||||||
|
new FormatPart("span", [
|
||||||
|
new FormatPart("code", [new TextPart("Hello")]),
|
||||||
|
new FormatPart("em", [new TextPart("World")])
|
||||||
|
])
|
||||||
|
];
|
||||||
|
test(assert, input, output);
|
||||||
|
},
|
||||||
/* Doesnt work: HTML library doesn't handle <pre><code> properly.
|
/* Doesnt work: HTML library doesn't handle <pre><code> properly.
|
||||||
"Text with code block": assert => {
|
"Text with code block": assert => {
|
||||||
const code = 'main :: IO ()\nmain = putStrLn "Hello"'
|
const code = 'main :: IO ()\nmain = putStrLn "Hello"'
|
||||||
|
|
Loading…
Reference in a new issue