276 lines
6.9 KiB
JavaScript

const { Transform } = require('stream')
const [cr] = Buffer.from('\r')
const [nl] = Buffer.from('\n')
const defaults = {
escape: '"',
headers: null,
mapHeaders: ({ header }) => header,
mapValues: ({ value }) => value,
newline: '\n',
quote: '"',
raw: false,
separator: ',',
skipComments: false,
skipLines: null,
maxRowBytes: Number.MAX_SAFE_INTEGER,
strict: false
}
class CsvParser extends Transform {
constructor (opts = {}) {
super({ objectMode: true, highWaterMark: 16 })
if (Array.isArray(opts)) opts = { headers: opts }
const options = Object.assign({}, defaults, opts)
options.customNewline = options.newline !== defaults.newline
for (const key of ['newline', 'quote', 'separator']) {
if (typeof options[key] !== 'undefined') {
([options[key]] = Buffer.from(options[key]))
}
}
// if escape is not defined on the passed options, use the end value of quote
options.escape = (opts || {}).escape ? Buffer.from(options.escape)[0] : options.quote
this.state = {
empty: options.raw ? Buffer.alloc(0) : '',
escaped: false,
first: true,
lineNumber: 0,
previousEnd: 0,
rowLength: 0,
quoted: false
}
this._prev = null
if (options.headers === false) {
// enforce, as the column length check will fail if headers:false
options.strict = false
}
if (options.headers || options.headers === false) {
this.state.first = false
}
this.options = options
this.headers = options.headers
}
parseCell (buffer, start, end) {
const { escape, quote } = this.options
// remove quotes from quoted cells
if (buffer[start] === quote && buffer[end - 1] === quote) {
start++
end--
}
let y = start
for (let i = start; i < end; i++) {
// check for escape characters and skip them
if (buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote) {
i++
}
if (y !== i) {
buffer[y] = buffer[i]
}
y++
}
return this.parseValue(buffer, start, y)
}
parseLine (buffer, start, end) {
const { customNewline, escape, mapHeaders, mapValues, quote, separator, skipComments, skipLines } = this.options
end-- // trim newline
if (!customNewline && buffer.length && buffer[end - 1] === cr) {
end--
}
const comma = separator
const cells = []
let isQuoted = false
let offset = start
if (skipComments) {
const char = typeof skipComments === 'string' ? skipComments : '#'
if (buffer[start] === Buffer.from(char)[0]) {
return
}
}
const mapValue = (value) => {
if (this.state.first) {
return value
}
const index = cells.length
const header = this.headers[index]
return mapValues({ header, index, value })
}
for (let i = start; i < end; i++) {
const isStartingQuote = !isQuoted && buffer[i] === quote
const isEndingQuote = isQuoted && buffer[i] === quote && i + 1 <= end && buffer[i + 1] === comma
const isEscape = isQuoted && buffer[i] === escape && i + 1 < end && buffer[i + 1] === quote
if (isStartingQuote || isEndingQuote) {
isQuoted = !isQuoted
continue
} else if (isEscape) {
i++
continue
}
if (buffer[i] === comma && !isQuoted) {
let value = this.parseCell(buffer, offset, i)
value = mapValue(value)
cells.push(value)
offset = i + 1
}
}
if (offset < end) {
let value = this.parseCell(buffer, offset, end)
value = mapValue(value)
cells.push(value)
}
if (buffer[end - 1] === comma) {
cells.push(mapValue(this.state.empty))
}
const skip = skipLines && skipLines > this.state.lineNumber
this.state.lineNumber++
if (this.state.first && !skip) {
this.state.first = false
this.headers = cells.map((header, index) => mapHeaders({ header, index }))
this.emit('headers', this.headers)
return
}
if (!skip && this.options.strict && cells.length !== this.headers.length) {
const e = new RangeError('Row length does not match headers')
this.emit('error', e)
} else {
if (!skip) this.writeRow(cells)
}
}
parseValue (buffer, start, end) {
if (this.options.raw) {
return buffer.slice(start, end)
}
return buffer.toString('utf-8', start, end)
}
writeRow (cells) {
const headers = (this.headers === false) ? cells.map((value, index) => index) : this.headers
const row = cells.reduce((o, cell, index) => {
const header = headers[index]
if (header === null) return o // skip columns
if (header !== undefined) {
o[header] = cell
} else {
o[`_${index}`] = cell
}
return o
}, {})
this.push(row)
}
_flush (cb) {
if (this.state.escaped || !this._prev) return cb()
this.parseLine(this._prev, this.state.previousEnd, this._prev.length + 1) // plus since online -1s
cb()
}
_transform (data, enc, cb) {
if (typeof data === 'string') {
data = Buffer.from(data)
}
const { escape, quote } = this.options
let start = 0
let buffer = data
if (this._prev) {
start = this._prev.length
buffer = Buffer.concat([this._prev, data])
this._prev = null
}
const bufferLength = buffer.length
for (let i = start; i < bufferLength; i++) {
const chr = buffer[i]
const nextChr = i + 1 < bufferLength ? buffer[i + 1] : null
this.state.rowLength++
if (this.state.rowLength > this.options.maxRowBytes) {
return cb(new Error('Row exceeds the maximum size'))
}
if (!this.state.escaped && chr === escape && nextChr === quote && i !== start) {
this.state.escaped = true
continue
} else if (chr === quote) {
if (this.state.escaped) {
this.state.escaped = false
// non-escaped quote (quoting the cell)
} else {
this.state.quoted = !this.state.quoted
}
continue
}
if (!this.state.quoted) {
if (this.state.first && !this.options.customNewline) {
if (chr === nl) {
this.options.newline = nl
} else if (chr === cr) {
if (nextChr !== nl) {
this.options.newline = cr
}
}
}
if (chr === this.options.newline) {
this.parseLine(buffer, this.state.previousEnd, i + 1)
this.state.previousEnd = i + 1
this.state.rowLength = 0
}
}
}
if (this.state.previousEnd === bufferLength) {
this.state.previousEnd = 0
return cb()
}
if (bufferLength - this.state.previousEnd < data.length) {
this._prev = data
this.state.previousEnd -= (bufferLength - data.length)
return cb()
}
this._prev = buffer
cb()
}
}
module.exports = (opts) => new CsvParser(opts)