provides more robust .csv handling with various delimiters

This commit is contained in:
mikesealey 2025-01-29 16:39:21 +00:00
parent d02eea5770
commit 631448871e
2 changed files with 42 additions and 31 deletions

View File

@ -58,7 +58,7 @@ export const parseFile = e => {
resolveRows(rows)
})
.catch(() => {
reject("cannot parse csv.")
reject("cannot parse csv")
})
}
})

View File

@ -7,40 +7,51 @@ export async function jsonFromCsvString(csvString: string) {
let numOfHeaders: number | undefined = undefined
let headerMismatch = false
const castedWithEmptyValues = await csv({
ignoreEmpty: true,
delimiter: possibleDelimeters[i],
}).fromString(csvString)
try {
const castedWithEmptyValues = await csv({
ignoreEmpty: true,
delimiter: possibleDelimeters[i],
}).fromString(csvString)
// By default the csvtojson library casts empty values as empty strings. This
// is causing issues on conversion. ignoreEmpty will remove the key completly
// if empty, so creating this empty object will ensure we return the values
// with the keys but empty values
const result = await csv({
ignoreEmpty: false,
delimiter: possibleDelimeters[i],
}).fromString(csvString)
for (const [i, r] of result.entries()) {
const columns = Object.keys(r)
if (numOfHeaders == null) {
numOfHeaders = columns.length
// By default the csvtojson library casts empty values as empty strings. This
// is causing issues on conversion. ignoreEmpty will remove the key completly
// if empty, so creating this empty object will ensure we return the values
// with the keys but empty values
const result = await csv({
ignoreEmpty: false,
delimiter: possibleDelimeters[i],
}).fromString(csvString)
for (const [i, r] of result.entries()) {
// The purpose of this is to find rows that have been split
// into the wrong number of columns - Any valid .CSV file will have
// the same number of colums in each row
// If the number of columms in each row is different to
// the number of headers, this isn't the right delimiter
const columns = Object.keys(r)
if (numOfHeaders == null) {
numOfHeaders = columns.length
}
if (numOfHeaders === 1 || numOfHeaders !== columns.length) {
headerMismatch = true
break
}
for (const [key] of Object.entries(r).filter(
([, value]) => value === ""
)) {
if (castedWithEmptyValues[i][key] === undefined) {
r[key] = null
}
}
}
if (numOfHeaders !== columns.length) {
headerMismatch = true
break
if (headerMismatch) {
continue
} else {
return result
}
for (const [key] of Object.entries(r).filter(
([, value]) => value === ""
)) {
// if (castedWithEmptyValues[i][key] === undefined) {
// r[key] = null
// }
}
}
if (headerMismatch) {
} catch (err) {
// Splitting on the wrong delimiter sometimes throws CSV parsing error
// (eg unterminated strings), which tells us we've picked the wrong delimiter
continue
} else {
return result
}
}
throw new Error("Unable to determine delimiter")