1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52 package csv
53
54 import (
55 "bufio"
56 "bytes"
57 "errors"
58 "fmt"
59 "io"
60 "unicode"
61 "unicode/utf8"
62 )
63
64
65
66 type ParseError struct {
67 StartLine int
68 Line int
69 Column int
70 Err error
71 }
72
73 func (e *ParseError) Error() string {
74 if e.Err == ErrFieldCount {
75 return fmt.Sprintf("record on line %d: %v", e.Line, e.Err)
76 }
77 if e.StartLine != e.Line {
78 return fmt.Sprintf("record on line %d; parse error on line %d, column %d: %v", e.StartLine, e.Line, e.Column, e.Err)
79 }
80 return fmt.Sprintf("parse error on line %d, column %d: %v", e.Line, e.Column, e.Err)
81 }
82
83
84 var (
85 ErrTrailingComma = errors.New("extra delimiter at end of line")
86 ErrBareQuote = errors.New("bare \" in non-quoted-field")
87 ErrQuote = errors.New("extraneous or missing \" in quoted-field")
88 ErrFieldCount = errors.New("wrong number of fields")
89 )
90
91 var errInvalidDelim = errors.New("csv: invalid field or comment delimiter")
92
93 func validDelim(r rune) bool {
94 return r != 0 && r != '\r' && r != '\n' && utf8.ValidRune(r) && r != utf8.RuneError
95 }
96
97
98
99
100
101
102
103
104
105
106 type Reader struct {
107
108
109
110
111 Comma rune
112
113
114
115
116
117
118
119
120 Comment rune
121
122
123
124
125
126
127
128 FieldsPerRecord int
129
130
131
132 LazyQuotes bool
133
134
135
136 TrimLeadingSpace bool
137
138
139
140
141 ReuseRecord bool
142
143 TrailingComma bool
144
145 r *bufio.Reader
146
147
148 numLine int
149
150
151 rawBuffer []byte
152
153
154
155
156
157 recordBuffer []byte
158
159
160
161 fieldIndexes []int
162
163
164 lastRecord []string
165 }
166
167
168 func NewReader(r io.Reader) *Reader {
169 return &Reader{
170 Comma: ',',
171 r: bufio.NewReader(r),
172 }
173 }
174
175
176
177
178
179
180
181
182
183 func (r *Reader) Read() (record []string, err error) {
184 if r.ReuseRecord {
185 record, err = r.readRecord(r.lastRecord)
186 r.lastRecord = record
187 } else {
188 record, err = r.readRecord(nil)
189 }
190 return record, err
191 }
192
193
194
195
196
197
198 func (r *Reader) ReadAll() (records [][]string, err error) {
199 for {
200 record, err := r.readRecord(nil)
201 if err == io.EOF {
202 return records, nil
203 }
204 if err != nil {
205 return nil, err
206 }
207 records = append(records, record)
208 }
209 }
210
211
212
213
214
215 func (r *Reader) readLine() ([]byte, error) {
216 line, err := r.r.ReadSlice('\n')
217 if err == bufio.ErrBufferFull {
218 r.rawBuffer = append(r.rawBuffer[:0], line...)
219 for err == bufio.ErrBufferFull {
220 line, err = r.r.ReadSlice('\n')
221 r.rawBuffer = append(r.rawBuffer, line...)
222 }
223 line = r.rawBuffer
224 }
225 if len(line) > 0 && err == io.EOF {
226 err = nil
227
228 if line[len(line)-1] == '\r' {
229 line = line[:len(line)-1]
230 }
231 }
232 r.numLine++
233
234 if n := len(line); n >= 2 && line[n-2] == '\r' && line[n-1] == '\n' {
235 line[n-2] = '\n'
236 line = line[:n-1]
237 }
238 return line, err
239 }
240
241
242 func lengthNL(b []byte) int {
243 if len(b) > 0 && b[len(b)-1] == '\n' {
244 return 1
245 }
246 return 0
247 }
248
249
250 func nextRune(b []byte) rune {
251 r, _ := utf8.DecodeRune(b)
252 return r
253 }
254
255 func (r *Reader) readRecord(dst []string) ([]string, error) {
256 if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
257 return nil, errInvalidDelim
258 }
259
260
261 var line, fullLine []byte
262 var errRead error
263 for errRead == nil {
264 line, errRead = r.readLine()
265 if r.Comment != 0 && nextRune(line) == r.Comment {
266 line = nil
267 continue
268 }
269 if errRead == nil && len(line) == lengthNL(line) {
270 line = nil
271 continue
272 }
273 fullLine = line
274 break
275 }
276 if errRead == io.EOF {
277 return nil, errRead
278 }
279
280
281 var err error
282 const quoteLen = len(`"`)
283 commaLen := utf8.RuneLen(r.Comma)
284 recLine := r.numLine
285 r.recordBuffer = r.recordBuffer[:0]
286 r.fieldIndexes = r.fieldIndexes[:0]
287 parseField:
288 for {
289 if r.TrimLeadingSpace {
290 line = bytes.TrimLeftFunc(line, unicode.IsSpace)
291 }
292 if len(line) == 0 || line[0] != '"' {
293
294 i := bytes.IndexRune(line, r.Comma)
295 field := line
296 if i >= 0 {
297 field = field[:i]
298 } else {
299 field = field[:len(field)-lengthNL(field)]
300 }
301
302 if !r.LazyQuotes {
303 if j := bytes.IndexByte(field, '"'); j >= 0 {
304 col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
305 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
306 break parseField
307 }
308 }
309 r.recordBuffer = append(r.recordBuffer, field...)
310 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
311 if i >= 0 {
312 line = line[i+commaLen:]
313 continue parseField
314 }
315 break parseField
316 } else {
317
318 line = line[quoteLen:]
319 for {
320 i := bytes.IndexByte(line, '"')
321 if i >= 0 {
322
323 r.recordBuffer = append(r.recordBuffer, line[:i]...)
324 line = line[i+quoteLen:]
325 switch rn := nextRune(line); {
326 case rn == '"':
327
328 r.recordBuffer = append(r.recordBuffer, '"')
329 line = line[quoteLen:]
330 case rn == r.Comma:
331
332 line = line[commaLen:]
333 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
334 continue parseField
335 case lengthNL(line) == len(line):
336
337 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
338 break parseField
339 case r.LazyQuotes:
340
341 r.recordBuffer = append(r.recordBuffer, '"')
342 default:
343
344 col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
345 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
346 break parseField
347 }
348 } else if len(line) > 0 {
349
350 r.recordBuffer = append(r.recordBuffer, line...)
351 if errRead != nil {
352 break parseField
353 }
354 line, errRead = r.readLine()
355 if errRead == io.EOF {
356 errRead = nil
357 }
358 fullLine = line
359 } else {
360
361 if !r.LazyQuotes && errRead == nil {
362 col := utf8.RuneCount(fullLine)
363 err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
364 break parseField
365 }
366 r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
367 break parseField
368 }
369 }
370 }
371 }
372 if err == nil {
373 err = errRead
374 }
375
376
377
378 str := string(r.recordBuffer)
379 dst = dst[:0]
380 if cap(dst) < len(r.fieldIndexes) {
381 dst = make([]string, len(r.fieldIndexes))
382 }
383 dst = dst[:len(r.fieldIndexes)]
384 var preIdx int
385 for i, idx := range r.fieldIndexes {
386 dst[i] = str[preIdx:idx]
387 preIdx = idx
388 }
389
390
391 if r.FieldsPerRecord > 0 {
392 if len(dst) != r.FieldsPerRecord && err == nil {
393 err = &ParseError{StartLine: recLine, Line: recLine, Err: ErrFieldCount}
394 }
395 } else if r.FieldsPerRecord == 0 {
396 r.FieldsPerRecord = len(dst)
397 }
398 return dst, err
399 }
400
View as plain text