2023-06-20 00:43:36 +05:30
|
|
|
import { sanitize } from '~/lib/dompurify';
|
|
|
|
|
2023-07-09 08:55:56 +05:30
|
|
|
function parseItems(itemIndexes, itemColumns) {
|
|
|
|
// Fetching items: if the dataframe has a single column index, the table is simple
|
|
|
|
// 0: tr > th(index0 value) th(column0 value) th(column1 value)
|
|
|
|
// 1: tr > th(index0 value) th(column0 value) th(column1 value)
|
|
|
|
//
|
|
|
|
// But if the dataframe has multiple column indexes, it uses rowspan, and the row below won't have a value for that
|
|
|
|
// index.
|
|
|
|
// 0: tr > th(index0 value, rowspan=2) th(index1 value) th(column0 value) th(column1 value)
|
|
|
|
// 1: tr > th(index1 value) th(column0 value) th(column1 value)
|
|
|
|
//
|
|
|
|
// So, when parsing row 1, and the count of <th> elements is less than indexCount, we fill with the first
|
|
|
|
// values of row 0
|
|
|
|
const indexCount = itemIndexes[0].length;
|
|
|
|
const rowCount = itemIndexes.length;
|
|
|
|
|
|
|
|
const filledIndexes = itemIndexes.map((row, rowIndex) => {
|
|
|
|
const indexesInRow = row.length;
|
|
|
|
if (indexesInRow === indexCount) {
|
|
|
|
return row;
|
|
|
|
}
|
|
|
|
return itemIndexes[rowIndex - 1].slice(0, -indexesInRow).concat(row);
|
|
|
|
});
|
|
|
|
|
|
|
|
const items = Array(rowCount);
|
|
|
|
|
|
|
|
for (let row = 0; row < rowCount; row += 1) {
|
|
|
|
items[row] = {
|
|
|
|
...Object.fromEntries(filledIndexes[row].map((value, counter) => [`index${counter}`, value])),
|
|
|
|
...Object.fromEntries(itemColumns[row].map((value, counter) => [`column${counter}`, value])),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
return items;
|
|
|
|
}
|
|
|
|
|
|
|
|
function labelsToFields(labels, isIndex = true) {
|
|
|
|
return labels.map((label, counter) => ({
|
|
|
|
key: isIndex ? `index${counter}` : `column${counter}`,
|
|
|
|
label,
|
|
|
|
sortable: true,
|
|
|
|
class: isIndex ? 'gl-font-weight-bold' : '',
|
|
|
|
}));
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseFields(columnAndIndexLabels, indexCount, columnCount) {
|
|
|
|
// Fetching the labels: if the dataframe has a single column index, it will be in the format:
|
|
|
|
// thead
|
|
|
|
// tr
|
|
|
|
// th(index0 label) th(column0 label) th(column1 label)
|
|
|
|
//
|
|
|
|
// If there are multiple index columns, it the header will actually have two rows:
|
|
|
|
// thead
|
|
|
|
// tr
|
|
|
|
// th() th() th(column 0 label) th(column1 label)
|
|
|
|
// tr
|
|
|
|
// th(index0 label) th(index1 label) th() th()
|
|
|
|
|
|
|
|
const columnLabels = columnAndIndexLabels[0].slice(-columnCount);
|
|
|
|
const indexLabels = columnAndIndexLabels[columnAndIndexLabels.length - 1].slice(0, indexCount);
|
|
|
|
|
|
|
|
const indexFields = labelsToFields(indexLabels, true);
|
|
|
|
const columnFields = labelsToFields(columnLabels, false);
|
|
|
|
|
|
|
|
return [...indexFields, ...columnFields];
|
|
|
|
}
|
|
|
|
|
2023-06-20 00:43:36 +05:30
|
|
|
/**
|
|
|
|
* Converts a dataframe in the output of a Jupyter Notebook cell to a json object
|
|
|
|
*
|
|
|
|
* @param {string} input - the dataframe
|
|
|
|
* @param {DOMParser} parser - the html parser
|
|
|
|
* @returns {Object} The converted JSON object with an `items` property containing the rows.
|
|
|
|
*/
|
|
|
|
export function convertHtmlTableToJson(input, domParser) {
|
|
|
|
const parser = domParser || new DOMParser();
|
|
|
|
const htmlDoc = parser.parseFromString(sanitize(input), 'text/html');
|
|
|
|
|
|
|
|
if (!htmlDoc) return { fields: [], items: [] };
|
|
|
|
|
2023-07-09 08:55:56 +05:30
|
|
|
const columnAndIndexLabels = [...htmlDoc.querySelectorAll('table > thead tr')].map((row) =>
|
|
|
|
[...row.querySelectorAll('th')].map((item) => item.innerText),
|
2023-06-20 00:43:36 +05:30
|
|
|
);
|
|
|
|
|
2023-07-09 08:55:56 +05:30
|
|
|
if (columnAndIndexLabels.length === 0) return { fields: [], items: [] };
|
2023-06-20 00:43:36 +05:30
|
|
|
|
2023-07-09 08:55:56 +05:30
|
|
|
const tableRows = [...htmlDoc.querySelectorAll('table > tbody > tr')];
|
|
|
|
|
|
|
|
const itemColumns = tableRows.map((row) =>
|
2023-06-20 00:43:36 +05:30
|
|
|
[...row.querySelectorAll('td')].map((item) => item.innerText),
|
|
|
|
);
|
|
|
|
|
2023-07-09 08:55:56 +05:30
|
|
|
const itemIndexes = tableRows.map((row) =>
|
|
|
|
[...row.querySelectorAll('th')].map((item) => item.innerText),
|
|
|
|
);
|
|
|
|
|
|
|
|
const fields = parseFields(columnAndIndexLabels, itemIndexes[0].length, itemColumns[0].length);
|
|
|
|
const items = parseItems(itemIndexes, itemColumns);
|
|
|
|
|
|
|
|
return { fields, items };
|
2023-06-20 00:43:36 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
export function isDataframe(output) {
|
|
|
|
const htmlData = output.data['text/html'];
|
|
|
|
if (!htmlData) return false;
|
|
|
|
|
|
|
|
return htmlData.slice(0, 20).some((line) => line.includes('dataframe'));
|
|
|
|
}
|