provides more robust .csv handling with various delimiters

This commit is contained in:
mikesealey 2025-01-29 16:39:21 +00:00
parent d02eea5770
commit 631448871e
2 changed files with 42 additions and 31 deletions

View File

@ -58,7 +58,7 @@ export const parseFile = e => {
resolveRows(rows) resolveRows(rows)
}) })
.catch(() => { .catch(() => {
reject("cannot parse csv.") reject("cannot parse csv")
}) })
} }
}) })

View File

@ -7,40 +7,51 @@ export async function jsonFromCsvString(csvString: string) {
let numOfHeaders: number | undefined = undefined let numOfHeaders: number | undefined = undefined
let headerMismatch = false let headerMismatch = false
const castedWithEmptyValues = await csv({ try {
ignoreEmpty: true, const castedWithEmptyValues = await csv({
delimiter: possibleDelimeters[i], ignoreEmpty: true,
}).fromString(csvString) delimiter: possibleDelimeters[i],
}).fromString(csvString)
// By default the csvtojson library casts empty values as empty strings. This // By default the csvtojson library casts empty values as empty strings. This
// is causing issues on conversion. ignoreEmpty will remove the key completly // is causing issues on conversion. ignoreEmpty will remove the key completly
// if empty, so creating this empty object will ensure we return the values // if empty, so creating this empty object will ensure we return the values
// with the keys but empty values // with the keys but empty values
const result = await csv({ const result = await csv({
ignoreEmpty: false, ignoreEmpty: false,
delimiter: possibleDelimeters[i], delimiter: possibleDelimeters[i],
}).fromString(csvString) }).fromString(csvString)
for (const [i, r] of result.entries()) { for (const [i, r] of result.entries()) {
const columns = Object.keys(r) // The purpose of this is to find rows that have been split
if (numOfHeaders == null) { // into the wrong number of columns - Any valid .CSV file will have
numOfHeaders = columns.length // the same number of colums in each row
// If the number of columms in each row is different to
// the number of headers, this isn't the right delimiter
const columns = Object.keys(r)
if (numOfHeaders == null) {
numOfHeaders = columns.length
}
if (numOfHeaders === 1 || numOfHeaders !== columns.length) {
headerMismatch = true
break
}
for (const [key] of Object.entries(r).filter(
([, value]) => value === ""
)) {
if (castedWithEmptyValues[i][key] === undefined) {
r[key] = null
}
}
} }
if (numOfHeaders !== columns.length) { if (headerMismatch) {
headerMismatch = true continue
break } else {
return result
} }
for (const [key] of Object.entries(r).filter( } catch (err) {
([, value]) => value === "" // Splitting on the wrong delimiter sometimes throws CSV parsing error
)) { // (eg unterminated strings), which tells us we've picked the wrong delimiter
// if (castedWithEmptyValues[i][key] === undefined) {
// r[key] = null
// }
}
}
if (headerMismatch) {
continue continue
} else {
return result
} }
} }
throw new Error("Unable to determine delimiter") throw new Error("Unable to determine delimiter")