注意:以下翻译的准确性尚未经过验证。这是使用 AIP ↗ 从原始英文文本进行的机器翻译。
如何在Typescript中解析和处理Excel文件?
此代码使用ExcelJS库在Typescript中解析和处理Excel文件。它读取一个Excel文件,处理工作簿,验证表头,并将数据反透视以创建一个新的数据Object。
Copied!1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
import { OntologyEditFunction, Timestamp } from "@foundry/functions-api"; import { Uuid } from "@foundry/functions-utils"; import { Objects, Upload, SampleData } from "@foundry/ontology-api"; import { Workbook } from "exceljs"; interface DataRow { target: string; unit: string; property: string; value: number; uploaded_at: Timestamp; upload_key: string; } export class ParseExcel { @OntologyEditFunction() public async addData(upload: Upload): Promise<void> { try { const arrayBuffer = await this.getArrayBufferFromAttachment(upload); // 从 arrayBuffer 创建一个工作簿 const workbook = new Workbook(); await workbook.xlsx.load(arrayBuffer); const meltedData = this.processWorkbook(workbook, upload.uploadedAt!, upload.uploadKey!); const version = this.generateVersion(upload.title!, upload.uploadedAt!); meltedData.forEach(row => { this.createNewData(row, version); }); upload.status = 'Success' // 更新上传版本 upload.version = version; } catch (error: any) { upload.status = 'Failed'; upload.errorMessage = error.message; } } /** * 验证头部,确保其包含所有必要的标题 */ private validateHeader(header: string[]): string | null { const target = 'target'; const yIntercept = 'Y_intercept'; if (header[1] !== target || header[2] !== yIntercept) { // console.log(header) return '标题必须以 target 和 Y_intercept 作为前两列。'; } const regex = /^[a-zA-Z0-9_]+$/; for (const columnName of header) { if (!regex.test(columnName)) { return `标题包含无效字符。仅允许使用英文字母和下划线。无效列名: ${columnName}`; } } return null; } private createNewData(row: DataRow, version: string) { const newData = Objects.create()._sampleData(Uuid.random()); newData.title = `${row.target} ${row.unit} ${row.property} 数据`; newData.value = row.value; newData.target = row.target; newData.unit = row.unit; newData.version = version; newData.property_ = row.property; newData.uploadKey = row.upload_key; newData.propertyKey = this.generatePropertyKey(row.unit, row.property); return newData; } private generatePropertyKey(unit: string, property: string){ return `Sample_Property_Key_${unit}_${property}` } private generateVersion(unit: string, uploaded_at: Timestamp): string { // 将时间戳格式化为 YYYYMMDD_HHMMSS const formattedDate = uploaded_at.toISOString().slice(0, 10).replace(/docs/-/g, ""); const formattedTime = uploaded_at.toISOString().slice(11, 19).replace(/docs/:/g, ""); // 使用单位和格式化的时间戳创建版本字符串 const version = `${unit}_${formattedDate}_${formattedTime}`; return version; } /** * 从给定的 Upload 对象中的附件读取 ArrayBuffer。 * @param upload 包含附件的 Upload 对象。 */ private async getArrayBufferFromAttachment(upload: Upload): Promise<ArrayBuffer> { const attachmentBlob = await upload.attachment!.readAsync(); return attachmentBlob.arrayBuffer(); } /** * 处理工作簿并返回 DataFrame 格式的二维数组。 * @param workbook 要处理的 Workbook 对象。 */ private processWorkbook(workbook: Workbook, uploadedAt: Timestamp, uploadKey: string): DataRow[] { const allMeltedData: DataRow[] = []; workbook.eachSheet((sheet) => { const sheetUnit = sheet.name; // 处理头部 let headerProcessed = false; let sheetHeader: string[] = []; const sheetData: any[][] = []; // 处理每个表格的行 sheet.eachRow((row, rowIndex) => { if (row.values) { const rowValues = row.values as (string | number)[]; // 如果 rowIndex 是 1,处理为头部 if (rowIndex === 1) { sheetHeader = rowValues.filter(value => value !== null && typeof value === 'string') as string[]; sheetHeader.unshift('unit'); const validationResult = this.validateHeader(sheetHeader); if (validationResult) { throw new Error(validationResult); } headerProcessed = true; } else { const newRowValues = this.processRow(rowValues, sheetHeader); newRowValues.unshift(sheetUnit); sheetData.push(newRowValues); } } }); // 如果在行中没有处理头部,则手动添加 if (!headerProcessed) { sheetHeader.unshift('unit'); const validationResult = this.validateHeader(sheetHeader); if (validationResult) { throw new Error(validationResult); } } const meltedData = this.unpivotData(sheetData, sheetUnit, uploadedAt, uploadKey, sheetHeader); allMeltedData.push(...meltedData); }); return allMeltedData; } /** * 处理工作簿中的一行并返回新的行数据。 * @param rowValues 要处理的行值。 * @param header 头部数组。 */ private processRow(rowValues: (string | number)[], header: (string | number)[]): any[] { const newRowValues = Array(header.length).fill(0); // 使用 0 填充 rowValues.forEach((value, index) => { if (index > 0 && header[index - 1] !== undefined) { newRowValues[index - 1] = value; } }); return newRowValues.map((value, index) => { if (header[index] !== 'target' && header[index] !== 'unit') { return typeof value === 'string' ? parseFloat(value) : value; } return value; }); } /** * 旋转数据并返回 MeltedDataRow 对象数组。 * @param data 要旋转的 DataFrame 格式的二维数组。 */ private unpivotData(data: any[][], unit: string, uploadedAt: Timestamp, uploadKey: string, header: string[]): DataRow[] { const idVars = ["unit", "target"]; const meltedData: DataRow[] = []; data.forEach(row => { const idValues = row.slice(0, idVars.length); row.slice(idVars.length).forEach((value, index) => { if (value !== 0) { const newRow: DataRow = { target: idValues[1], unit: unit, property: header[idVars.length + index] as string, // 使用头部获取属性名 value: value, uploaded_at: uploadedAt, upload_key: uploadKey, }; meltedData.push(newRow); } }); }); return meltedData; } }
excel
, typescript
, Object上的函数
, 文件上传
, dataframe