geno/wp-includes/js/dist/block-serialization-default-parser.js

485 lines
15 KiB
JavaScript
Raw Normal View History

2024-02-01 17:24:18 +05:30
/******/ (function() { // webpackBootstrap
/******/ "use strict";
/******/ // The require scope
/******/ var __webpack_require__ = {};
/******/
/************************************************************************/
/******/ /* webpack/runtime/define property getters */
/******/ !function() {
/******/ // define getter functions for harmony exports
/******/ __webpack_require__.d = function(exports, definition) {
/******/ for(var key in definition) {
/******/ if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {
/******/ Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });
/******/ }
/******/ }
/******/ };
/******/ }();
/******/
/******/ /* webpack/runtime/hasOwnProperty shorthand */
/******/ !function() {
/******/ __webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }
/******/ }();
/******/
/******/ /* webpack/runtime/make namespace object */
/******/ !function() {
/******/ // define __esModule on exports
/******/ __webpack_require__.r = function(exports) {
/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) {
/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
/******/ }
/******/ Object.defineProperty(exports, '__esModule', { value: true });
/******/ };
/******/ }();
/******/
/************************************************************************/
var __webpack_exports__ = {};
__webpack_require__.r(__webpack_exports__);
/* harmony export */ __webpack_require__.d(__webpack_exports__, {
/* harmony export */ parse: function() { return /* binding */ parse; }
/* harmony export */ });
/**
* @type {string}
*/
let document;
/**
* @type {number}
*/
let offset;
/**
* @type {ParsedBlock[]}
*/
let output;
/**
* @type {ParsedFrame[]}
*/
let stack;
/**
* @typedef {Object|null} Attributes
*/
/**
* @typedef {Object} ParsedBlock
* @property {string|null} blockName Block name.
* @property {Attributes} attrs Block attributes.
* @property {ParsedBlock[]} innerBlocks Inner blocks.
* @property {string} innerHTML Inner HTML.
* @property {Array<string|null>} innerContent Inner content.
*/
/**
* @typedef {Object} ParsedFrame
* @property {ParsedBlock} block Block.
* @property {number} tokenStart Token start.
* @property {number} tokenLength Token length.
* @property {number} prevOffset Previous offset.
* @property {number|null} leadingHtmlStart Leading HTML start.
*/
/**
* @typedef {'no-more-tokens'|'void-block'|'block-opener'|'block-closer'} TokenType
*/
/**
* @typedef {[TokenType, string, Attributes, number, number]} Token
*/
/**
* Matches block comment delimiters
*
* While most of this pattern is straightforward the attribute parsing
* incorporates a tricks to make sure we don't choke on specific input
*
* - since JavaScript has no possessive quantifier or atomic grouping
* we are emulating it with a trick
*
* we want a possessive quantifier or atomic group to prevent backtracking
* on the `}`s should we fail to match the remainder of the pattern
*
* we can emulate this with a positive lookahead and back reference
* (a++)*c === ((?=(a+))\1)*c
*
* let's examine an example:
* - /(a+)*c/.test('aaaaaaaaaaaaad') fails after over 49,000 steps
* - /(a++)*c/.test('aaaaaaaaaaaaad') fails after 85 steps
* - /(?>a+)*c/.test('aaaaaaaaaaaaad') fails after 126 steps
*
* this is because the possessive `++` and the atomic group `(?>)`
* tell the engine that all those `a`s belong together as a single group
* and so it won't split it up when stepping backwards to try and match
*
* if we use /((?=(a+))\1)*c/ then we get the same behavior as the atomic group
* or possessive and prevent the backtracking because the `a+` is matched but
* not captured. thus, we find the long string of `a`s and remember it, then
* reference it as a whole unit inside our pattern
*
* @see http://instanceof.me/post/52245507631/regex-emulate-atomic-grouping-with-lookahead
* @see http://blog.stevenlevithan.com/archives/mimic-atomic-groups
* @see https://javascript.info/regexp-infinite-backtracking-problem
*
* once browsers reliably support atomic grouping or possessive
* quantifiers natively we should remove this trick and simplify
*
* @type {RegExp}
*
* @since 3.8.0
* @since 4.6.1 added optimization to prevent backtracking on attribute parsing
*/
const tokenizer = /<!--\s+(\/)?wp:([a-z][a-z0-9_-]*\/)?([a-z][a-z0-9_-]*)\s+({(?:(?=([^}]+|}+(?=})|(?!}\s+\/?-->)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g;
/**
* Constructs a block object.
*
* @param {string|null} blockName
* @param {Attributes} attrs
* @param {ParsedBlock[]} innerBlocks
* @param {string} innerHTML
* @param {string[]} innerContent
* @return {ParsedBlock} The block object.
*/
function Block(blockName, attrs, innerBlocks, innerHTML, innerContent) {
return {
blockName,
attrs,
innerBlocks,
innerHTML,
innerContent
};
}
/**
* Constructs a freeform block object.
*
* @param {string} innerHTML
* @return {ParsedBlock} The freeform block object.
*/
function Freeform(innerHTML) {
return Block(null, {}, [], innerHTML, [innerHTML]);
}
/**
* Constructs a frame object.
*
* @param {ParsedBlock} block
* @param {number} tokenStart
* @param {number} tokenLength
* @param {number} prevOffset
* @param {number|null} leadingHtmlStart
* @return {ParsedFrame} The frame object.
*/
function Frame(block, tokenStart, tokenLength, prevOffset, leadingHtmlStart) {
return {
block,
tokenStart,
tokenLength,
prevOffset: prevOffset || tokenStart + tokenLength,
leadingHtmlStart
};
}
/**
* Parser function, that converts input HTML into a block based structure.
*
* @param {string} doc The HTML document to parse.
*
* @example
* Input post:
* ```html
* <!-- wp:columns {"columns":3} -->
* <div class="wp-block-columns has-3-columns"><!-- wp:column -->
* <div class="wp-block-column"><!-- wp:paragraph -->
* <p>Left</p>
* <!-- /wp:paragraph --></div>
* <!-- /wp:column -->
*
* <!-- wp:column -->
* <div class="wp-block-column"><!-- wp:paragraph -->
* <p><strong>Middle</strong></p>
* <!-- /wp:paragraph --></div>
* <!-- /wp:column -->
*
* <!-- wp:column -->
* <div class="wp-block-column"></div>
* <!-- /wp:column --></div>
* <!-- /wp:columns -->
* ```
*
* Parsing code:
* ```js
* import { parse } from '@wordpress/block-serialization-default-parser';
*
* parse( post ) === [
* {
* blockName: "core/columns",
* attrs: {
* columns: 3
* },
* innerBlocks: [
* {
* blockName: "core/column",
* attrs: null,
* innerBlocks: [
* {
* blockName: "core/paragraph",
* attrs: null,
* innerBlocks: [],
* innerHTML: "\n<p>Left</p>\n"
* }
* ],
* innerHTML: '\n<div class="wp-block-column"></div>\n'
* },
* {
* blockName: "core/column",
* attrs: null,
* innerBlocks: [
* {
* blockName: "core/paragraph",
* attrs: null,
* innerBlocks: [],
* innerHTML: "\n<p><strong>Middle</strong></p>\n"
* }
* ],
* innerHTML: '\n<div class="wp-block-column"></div>\n'
* },
* {
* blockName: "core/column",
* attrs: null,
* innerBlocks: [],
* innerHTML: '\n<div class="wp-block-column"></div>\n'
* }
* ],
* innerHTML: '\n<div class="wp-block-columns has-3-columns">\n\n\n\n</div>\n'
* }
* ];
* ```
* @return {ParsedBlock[]} A block-based representation of the input HTML.
*/
const parse = doc => {
document = doc;
offset = 0;
output = [];
stack = [];
tokenizer.lastIndex = 0;
do {
// twiddle our thumbs
} while (proceed());
return output;
};
/**
* Parses the next token in the input document.
*
* @return {boolean} Returns true when there is more tokens to parse.
*/
function proceed() {
const stackDepth = stack.length;
const next = nextToken();
const [tokenType, blockName, attrs, startOffset, tokenLength] = next;
// We may have some HTML soup before the next block.
const leadingHtmlStart = startOffset > offset ? offset : null;
switch (tokenType) {
case 'no-more-tokens':
// If not in a block then flush output.
if (0 === stackDepth) {
addFreeform();
return false;
}
// Otherwise we have a problem
// This is an error
// we have options
// - treat it all as freeform text
// - assume an implicit closer (easiest when not nesting)
// For the easy case we'll assume an implicit closer.
if (1 === stackDepth) {
addBlockFromStack();
return false;
}
// For the nested case where it's more difficult we'll
// have to assume that multiple closers are missing
// and so we'll collapse the whole stack piecewise.
while (0 < stack.length) {
addBlockFromStack();
}
return false;
case 'void-block':
// easy case is if we stumbled upon a void block
// in the top-level of the document.
if (0 === stackDepth) {
if (null !== leadingHtmlStart) {
output.push(Freeform(document.substr(leadingHtmlStart, startOffset - leadingHtmlStart)));
}
output.push(Block(blockName, attrs, [], '', []));
offset = startOffset + tokenLength;
return true;
}
// Otherwise we found an inner block.
addInnerBlock(Block(blockName, attrs, [], '', []), startOffset, tokenLength);
offset = startOffset + tokenLength;
return true;
case 'block-opener':
// Track all newly-opened blocks on the stack.
stack.push(Frame(Block(blockName, attrs, [], '', []), startOffset, tokenLength, startOffset + tokenLength, leadingHtmlStart));
offset = startOffset + tokenLength;
return true;
case 'block-closer':
// If we're missing an opener we're in trouble
// This is an error.
if (0 === stackDepth) {
// We have options
// - assume an implicit opener
// - assume _this_ is the opener
// - give up and close out the document.
addFreeform();
return false;
}
// If we're not nesting then this is easy - close the block.
if (1 === stackDepth) {
addBlockFromStack(startOffset);
offset = startOffset + tokenLength;
return true;
}
// Otherwise we're nested and we have to close out the current
// block and add it as a innerBlock to the parent.
const stackTop = /** @type {ParsedFrame} */stack.pop();
const html = document.substr(stackTop.prevOffset, startOffset - stackTop.prevOffset);
stackTop.block.innerHTML += html;
stackTop.block.innerContent.push(html);
stackTop.prevOffset = startOffset + tokenLength;
addInnerBlock(stackTop.block, stackTop.tokenStart, stackTop.tokenLength, startOffset + tokenLength);
offset = startOffset + tokenLength;
return true;
default:
// This is an error.
addFreeform();
return false;
}
}
/**
* Parse JSON if valid, otherwise return null
*
* Note that JSON coming from the block comment
* delimiters is constrained to be an object
* and cannot be things like `true` or `null`
*
* @param {string} input JSON input string to parse
* @return {Object|null} parsed JSON if valid
*/
function parseJSON(input) {
try {
return JSON.parse(input);
} catch (e) {
return null;
}
}
/**
* Finds the next token in the document.
*
* @return {Token} The next matched token.
*/
function nextToken() {
// Aye the magic
// we're using a single RegExp to tokenize the block comment delimiters
// we're also using a trick here because the only difference between a
// block opener and a block closer is the leading `/` before `wp:` (and
// a closer has no attributes). we can trap them both and process the
// match back in JavaScript to see which one it was.
const matches = tokenizer.exec(document);
// We have no more tokens.
if (null === matches) {
return ['no-more-tokens', '', null, 0, 0];
}
const startedAt = matches.index;
const [match, closerMatch, namespaceMatch, nameMatch, attrsMatch /* Internal/unused. */,, voidMatch] = matches;
const length = match.length;
const isCloser = !!closerMatch;
const isVoid = !!voidMatch;
const namespace = namespaceMatch || 'core/';
const name = namespace + nameMatch;
const hasAttrs = !!attrsMatch;
const attrs = hasAttrs ? parseJSON(attrsMatch) : {};
// This state isn't allowed
// This is an error.
if (isCloser && (isVoid || hasAttrs)) {
// We can ignore them since they don't hurt anything
// we may warn against this at some point or reject it.
}
if (isVoid) {
return ['void-block', name, attrs, startedAt, length];
}
if (isCloser) {
return ['block-closer', name, null, startedAt, length];
}
return ['block-opener', name, attrs, startedAt, length];
}
/**
* Adds a freeform block to the output.
*
* @param {number} [rawLength]
*/
function addFreeform(rawLength) {
const length = rawLength ? rawLength : document.length - offset;
if (0 === length) {
return;
}
output.push(Freeform(document.substr(offset, length)));
}
/**
* Adds inner block to the parent block.
*
* @param {ParsedBlock} block
* @param {number} tokenStart
* @param {number} tokenLength
* @param {number} [lastOffset]
*/
function addInnerBlock(block, tokenStart, tokenLength, lastOffset) {
const parent = stack[stack.length - 1];
parent.block.innerBlocks.push(block);
const html = document.substr(parent.prevOffset, tokenStart - parent.prevOffset);
if (html) {
parent.block.innerHTML += html;
parent.block.innerContent.push(html);
}
parent.block.innerContent.push(null);
parent.prevOffset = lastOffset ? lastOffset : tokenStart + tokenLength;
}
/**
* Adds block from the stack to the output.
*
* @param {number} [endOffset]
*/
function addBlockFromStack(endOffset) {
const {
block,
leadingHtmlStart,
prevOffset,
tokenStart
} = /** @type {ParsedFrame} */stack.pop();
const html = endOffset ? document.substr(prevOffset, endOffset - prevOffset) : document.substr(prevOffset);
if (html) {
block.innerHTML += html;
block.innerContent.push(html);
}
if (null !== leadingHtmlStart) {
output.push(Freeform(document.substr(leadingHtmlStart, tokenStart - leadingHtmlStart)));
}
output.push(block);
}
(window.wp = window.wp || {}).blockSerializationDefaultParser = __webpack_exports__;
/******/ })()
;