276 lines
6.9 KiB
JavaScript
276 lines
6.9 KiB
JavaScript
const { Transform } = require('stream')
|
|
|
|
const [cr] = Buffer.from('\r')
|
|
const [nl] = Buffer.from('\n')
|
|
const defaults = {
|
|
escape: '"',
|
|
headers: null,
|
|
mapHeaders: ({ header }) => header,
|
|
mapValues: ({ value }) => value,
|
|
newline: '\n',
|
|
quote: '"',
|
|
raw: false,
|
|
separator: ',',
|
|
skipComments: false,
|
|
skipLines: null,
|
|
maxRowBytes: Number.MAX_SAFE_INTEGER,
|
|
strict: false
|
|
}
|
|
|
|
class CsvParser extends Transform {
|
|
constructor (opts = {}) {
|
|
super({ objectMode: true, highWaterMark: 16 })
|
|
|
|
if (Array.isArray(opts)) opts = { headers: opts }
|
|
|
|
const options = Object.assign({}, defaults, opts)
|
|
|
|
options.customNewline = options.newline !== defaults.newline
|
|
|
|
for (const key of ['newline', 'quote', 'separator']) {
|
|
if (typeof options[key] !== 'undefined') {
|
|
([options[key]] = Buffer.from(options[key]))
|
|
}
|
|
}
|
|
|
|
// if escape is not defined on the passed options, use the end value of quote
|
|
options.escape = (opts || {}).escape ? Buffer.from(options.escape)[0] : options.quote
|
|
|
|
this.state = {
|
|
empty: options.raw ? Buffer.alloc(0) : '',
|
|
escaped: false,
|
|
first: true,
|
|
lineNumber: 0,
|
|
previousEnd: 0,
|
|
rowLength: 0,
|
|
quoted: false
|
|
}
|
|
|
|
this._prev = null
|
|
|
|
if (options.headers === false) {
|
|
// enforce, as the column length check will fail if headers:false
|
|
options.strict = false
|
|
}
|
|
|
|
if (options.headers || options.headers === false) {
|
|
this.state.first = false
|
|
}
|
|
|
|
this.options = options
|
|
this.headers = options.headers
|
|
}
|
|
|
|
parseCell (buffer, start, end) {
|
|
const { escape, quote } = this.options
|
|
// remove quotes from quoted cells
|
|
if (buffer[start] === quote && buffer[end - 1] === quote) {
|
|
start++
|
|
end--
|
|
}
|
|
|
|
let y = start
|
|
|
|
for (let i = start; i < end; i++) {
|
|
// check for escape characters and skip them
|
|
if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
|
|
i++
|
|
}
|
|
|
|
if (y !== i) {
|
|
buffer[y] = buffer[i]
|
|
}
|
|
y++
|
|
}
|
|
|
|
return this.parseValue(buffer, start, y)
|
|
}
|
|
|
|
parseLine (buffer, start, end) {
|
|
const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
|
|
|
|
end-- // trim newline
|
|
if (!customNewline && buffer.length && buffer[end - 1] === cr) {
|
|
end--
|
|
}
|
|
|
|
const comma = separator
|
|
const cells = []
|
|
let isQuoted = false
|
|
let offset = start
|
|
|
|
if (skipComments) {
|
|
const char = typeof skipComments === 'string' ? skipComments : '#'
|
|
if (buffer[start] === Buffer.from(char)[0]) {
|
|
return
|
|
}
|
|
}
|
|
|
|
const mapValue = (value) => {
|
|
if (this.state.first) {
|
|
return value
|
|
}
|
|
|
|
const index = cells.length
|
|
const header = this.headers[index]
|
|
|
|
return mapValues({ header, index, value })
|
|
}
|
|
|
|
for (let i = start; i < end; i++) {
|
|
const isStartingQuote = !isQuoted && buffer[i] === quote
|
|
const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
|
|
const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
|
|
|
|
if (isStartingQuote || isEndingQuote) {
|
|
isQuoted = !isQuoted
|
|
continue
|
|
} else if (isEscape) {
|
|
i++
|
|
continue
|
|
}
|
|
|
|
if (buffer[i] === comma && !isQuoted) {
|
|
let value = this.parseCell(buffer, offset, i)
|
|
value = mapValue(value)
|
|
cells.push(value)
|
|
offset = i + 1
|
|
}
|
|
}
|
|
|
|
if (offset < end) {
|
|
let value = this.parseCell(buffer, offset, end)
|
|
value = mapValue(value)
|
|
cells.push(value)
|
|
}
|
|
|
|
if (buffer[end - 1] === comma) {
|
|
cells.push(mapValue(this.state.empty))
|
|
}
|
|
|
|
const skip = skipLines && skipLines > this.state.lineNumber
|
|
this.state.lineNumber++
|
|
|
|
if (this.state.first && !skip) {
|
|
this.state.first = false
|
|
this.headers = cells.map((header, index) => mapHeaders({ header, index }))
|
|
|
|
this.emit('headers', this.headers)
|
|
return
|
|
}
|
|
|
|
if (!skip && this.options.strict && cells.length !== this.headers.length) {
|
|
const e = new RangeError('Row length does not match headers')
|
|
this.emit('error', e)
|
|
} else {
|
|
if (!skip) this.writeRow(cells)
|
|
}
|
|
}
|
|
|
|
parseValue (buffer, start, end) {
|
|
if (this.options.raw) {
|
|
return buffer.slice(start, end)
|
|
}
|
|
|
|
return buffer.toString('utf-8', start, end)
|
|
}
|
|
|
|
writeRow (cells) {
|
|
const headers = (this.headers === false) ? cells.map((value, index) => index) : this.headers
|
|
|
|
const row = cells.reduce((o, cell, index) => {
|
|
const header = headers[index]
|
|
if (header === null) return o // skip columns
|
|
if (header !== undefined) {
|
|
o[header] = cell
|
|
} else {
|
|
o[`_${index}`] = cell
|
|
}
|
|
return o
|
|
}, {})
|
|
|
|
this.push(row)
|
|
}
|
|
|
|
_flush (cb) {
|
|
if (this.state.escaped || !this._prev) return cb()
|
|
this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s
|
|
cb()
|
|
}
|
|
|
|
_transform (data, enc, cb) {
|
|
if (typeof data === 'string') {
|
|
data = Buffer.from(data)
|
|
}
|
|
|
|
const { escape, quote } = this.options
|
|
let start = 0
|
|
let buffer = data
|
|
|
|
if (this._prev) {
|
|
start = this._prev.length
|
|
buffer = Buffer.concat([this._prev, data])
|
|
this._prev = null
|
|
}
|
|
|
|
const bufferLength = buffer.length
|
|
|
|
for (let i = start; i < bufferLength; i++) {
|
|
const chr = buffer[i]
|
|
const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
|
|
|
|
this.state.rowLength++
|
|
if (this.state.rowLength > this.options.maxRowBytes) {
|
|
return cb(new Error('Row exceeds the maximum size'))
|
|
}
|
|
|
|
if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
|
|
this.state.escaped = true
|
|
continue
|
|
} else if (chr === quote) {
|
|
if (this.state.escaped) {
|
|
this.state.escaped = false
|
|
// non-escaped quote (quoting the cell)
|
|
} else {
|
|
this.state.quoted = !this.state.quoted
|
|
}
|
|
continue
|
|
}
|
|
|
|
if (!this.state.quoted) {
|
|
if (this.state.first && !this.options.customNewline) {
|
|
if (chr === nl) {
|
|
this.options.newline = nl
|
|
} else if (chr === cr) {
|
|
if (nextChr !== nl) {
|
|
this.options.newline = cr
|
|
}
|
|
}
|
|
}
|
|
|
|
if (chr === this.options.newline) {
|
|
this.parseLine(buffer, this.state.previousEnd, i + 1)
|
|
this.state.previousEnd = i + 1
|
|
this.state.rowLength = 0
|
|
}
|
|
}
|
|
}
|
|
|
|
if (this.state.previousEnd === bufferLength) {
|
|
this.state.previousEnd = 0
|
|
return cb()
|
|
}
|
|
|
|
if (bufferLength - this.state.previousEnd < data.length) {
|
|
this._prev = data
|
|
this.state.previousEnd -= (bufferLength - data.length)
|
|
return cb()
|
|
}
|
|
|
|
this._prev = buffer
|
|
cb()
|
|
}
|
|
}
|
|
|
|
module.exports = (opts) => new CsvParser(opts)
|