provides more robust .csv handling with various delimiters

This commit is contained in:
mikesealey 2025-01-29 16:39:21 +00:00
parent d02eea5770
commit 631448871e
2 changed files with 42 additions and 31 deletions

View File

@ -58,7 +58,7 @@ export const parseFile = e => {
resolveRows(rows) resolveRows(rows)
}) })
.catch(() => { .catch(() => {
reject("cannot parse csv.") reject("cannot parse csv")
}) })
} }
}) })

View File

@ -7,6 +7,7 @@ export async function jsonFromCsvString(csvString: string) {
let numOfHeaders: number | undefined = undefined let numOfHeaders: number | undefined = undefined
let headerMismatch = false let headerMismatch = false
try {
const castedWithEmptyValues = await csv({ const castedWithEmptyValues = await csv({
ignoreEmpty: true, ignoreEmpty: true,
delimiter: possibleDelimeters[i], delimiter: possibleDelimeters[i],
@ -21,20 +22,25 @@ export async function jsonFromCsvString(csvString: string) {
delimiter: possibleDelimeters[i], delimiter: possibleDelimeters[i],
}).fromString(csvString) }).fromString(csvString)
for (const [i, r] of result.entries()) { for (const [i, r] of result.entries()) {
// The purpose of this is to find rows that have been split
// into the wrong number of columns - Any valid .CSV file will have
// the same number of colums in each row
// If the number of columms in each row is different to
// the number of headers, this isn't the right delimiter
const columns = Object.keys(r) const columns = Object.keys(r)
if (numOfHeaders == null) { if (numOfHeaders == null) {
numOfHeaders = columns.length numOfHeaders = columns.length
} }
if (numOfHeaders !== columns.length) { if (numOfHeaders === 1 || numOfHeaders !== columns.length) {
headerMismatch = true headerMismatch = true
break break
} }
for (const [key] of Object.entries(r).filter( for (const [key] of Object.entries(r).filter(
([, value]) => value === "" ([, value]) => value === ""
)) { )) {
// if (castedWithEmptyValues[i][key] === undefined) { if (castedWithEmptyValues[i][key] === undefined) {
// r[key] = null r[key] = null
// } }
} }
} }
if (headerMismatch) { if (headerMismatch) {
@ -42,6 +48,11 @@ export async function jsonFromCsvString(csvString: string) {
} else { } else {
return result return result
} }
} catch (err) {
// Splitting on the wrong delimiter sometimes throws CSV parsing error
// (eg unterminated strings), which tells us we've picked the wrong delimiter
continue
}
} }
throw new Error("Unable to determine delimiter") throw new Error("Unable to determine delimiter")
} }