1
2
3
4
5 package tar
6
7 import (
8 "bytes"
9 "io"
10 "io/ioutil"
11 "strconv"
12 "strings"
13 "time"
14 )
15
16
17
18
19 type Reader struct {
20 r io.Reader
21 pad int64
22 curr fileReader
23 blk block
24
25
26
27
28 err error
29 }
30
31 type fileReader interface {
32 io.Reader
33 fileState
34
35 WriteTo(io.Writer) (int64, error)
36 }
37
38
39 func NewReader(r io.Reader) *Reader {
40 return &Reader{r: r, curr: ®FileReader{r, 0}}
41 }
42
43
44
45
46
47
48 func (tr *Reader) Next() (*Header, error) {
49 if tr.err != nil {
50 return nil, tr.err
51 }
52 hdr, err := tr.next()
53 tr.err = err
54 return hdr, err
55 }
56
57 func (tr *Reader) next() (*Header, error) {
58 var paxHdrs map[string]string
59 var gnuLongName, gnuLongLink string
60
61
62
63
64
65
66 format := FormatUSTAR | FormatPAX | FormatGNU
67 loop:
68 for {
69
70 if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil {
71 return nil, err
72 }
73 if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil {
74 return nil, err
75 }
76 tr.pad = 0
77
78 hdr, rawHdr, err := tr.readHeader()
79 if err != nil {
80 return nil, err
81 }
82 if err := tr.handleRegularFile(hdr); err != nil {
83 return nil, err
84 }
85 format.mayOnlyBe(hdr.Format)
86
87
88 switch hdr.Typeflag {
89 case TypeXHeader, TypeXGlobalHeader:
90 format.mayOnlyBe(FormatPAX)
91 paxHdrs, err = parsePAX(tr)
92 if err != nil {
93 return nil, err
94 }
95 if hdr.Typeflag == TypeXGlobalHeader {
96 mergePAX(hdr, paxHdrs)
97 return &Header{
98 Name: hdr.Name,
99 Typeflag: hdr.Typeflag,
100 Xattrs: hdr.Xattrs,
101 PAXRecords: hdr.PAXRecords,
102 Format: format,
103 }, nil
104 }
105 continue loop
106 case TypeGNULongName, TypeGNULongLink:
107 format.mayOnlyBe(FormatGNU)
108 realname, err := ioutil.ReadAll(tr)
109 if err != nil {
110 return nil, err
111 }
112
113 var p parser
114 switch hdr.Typeflag {
115 case TypeGNULongName:
116 gnuLongName = p.parseString(realname)
117 case TypeGNULongLink:
118 gnuLongLink = p.parseString(realname)
119 }
120 continue loop
121 default:
122
123
124
125 if err := mergePAX(hdr, paxHdrs); err != nil {
126 return nil, err
127 }
128 if gnuLongName != "" {
129 hdr.Name = gnuLongName
130 }
131 if gnuLongLink != "" {
132 hdr.Linkname = gnuLongLink
133 }
134 if hdr.Typeflag == TypeRegA && strings.HasSuffix(hdr.Name, "/") {
135 hdr.Typeflag = TypeDir
136 }
137
138
139
140 if err := tr.handleRegularFile(hdr); err != nil {
141 return nil, err
142 }
143
144
145
146 if err := tr.handleSparseFile(hdr, rawHdr); err != nil {
147 return nil, err
148 }
149
150
151 if format.has(FormatUSTAR) && format.has(FormatPAX) {
152 format.mayOnlyBe(FormatUSTAR)
153 }
154 hdr.Format = format
155 return hdr, nil
156 }
157 }
158 }
159
160
161
162
163 func (tr *Reader) handleRegularFile(hdr *Header) error {
164 nb := hdr.Size
165 if isHeaderOnlyType(hdr.Typeflag) {
166 nb = 0
167 }
168 if nb < 0 {
169 return ErrHeader
170 }
171
172 tr.pad = blockPadding(nb)
173 tr.curr = ®FileReader{r: tr.r, nb: nb}
174 return nil
175 }
176
177
178
179 func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error {
180 var spd sparseDatas
181 var err error
182 if hdr.Typeflag == TypeGNUSparse {
183 spd, err = tr.readOldGNUSparseMap(hdr, rawHdr)
184 } else {
185 spd, err = tr.readGNUSparsePAXHeaders(hdr)
186 }
187
188
189
190 if err == nil && spd != nil {
191 if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) {
192 return ErrHeader
193 }
194 sph := invertSparseEntries(spd, hdr.Size)
195 tr.curr = &sparseFileReader{tr.curr, sph, 0}
196 }
197 return err
198 }
199
200
201
202
203
204 func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) {
205
206 var is1x0 bool
207 major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor]
208 switch {
209 case major == "0" && (minor == "0" || minor == "1"):
210 is1x0 = false
211 case major == "1" && minor == "0":
212 is1x0 = true
213 case major != "" || minor != "":
214 return nil, nil
215 case hdr.PAXRecords[paxGNUSparseMap] != "":
216 is1x0 = false
217 default:
218 return nil, nil
219 }
220 hdr.Format.mayOnlyBe(FormatPAX)
221
222
223 if name := hdr.PAXRecords[paxGNUSparseName]; name != "" {
224 hdr.Name = name
225 }
226 size := hdr.PAXRecords[paxGNUSparseSize]
227 if size == "" {
228 size = hdr.PAXRecords[paxGNUSparseRealSize]
229 }
230 if size != "" {
231 n, err := strconv.ParseInt(size, 10, 64)
232 if err != nil {
233 return nil, ErrHeader
234 }
235 hdr.Size = n
236 }
237
238
239 if is1x0 {
240 return readGNUSparseMap1x0(tr.curr)
241 }
242 return readGNUSparseMap0x1(hdr.PAXRecords)
243 }
244
245
246 func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) {
247 for k, v := range paxHdrs {
248 if v == "" {
249 continue
250 }
251 var id64 int64
252 switch k {
253 case paxPath:
254 hdr.Name = v
255 case paxLinkpath:
256 hdr.Linkname = v
257 case paxUname:
258 hdr.Uname = v
259 case paxGname:
260 hdr.Gname = v
261 case paxUid:
262 id64, err = strconv.ParseInt(v, 10, 64)
263 hdr.Uid = int(id64)
264 case paxGid:
265 id64, err = strconv.ParseInt(v, 10, 64)
266 hdr.Gid = int(id64)
267 case paxAtime:
268 hdr.AccessTime, err = parsePAXTime(v)
269 case paxMtime:
270 hdr.ModTime, err = parsePAXTime(v)
271 case paxCtime:
272 hdr.ChangeTime, err = parsePAXTime(v)
273 case paxSize:
274 hdr.Size, err = strconv.ParseInt(v, 10, 64)
275 default:
276 if strings.HasPrefix(k, paxSchilyXattr) {
277 if hdr.Xattrs == nil {
278 hdr.Xattrs = make(map[string]string)
279 }
280 hdr.Xattrs[k[len(paxSchilyXattr):]] = v
281 }
282 }
283 if err != nil {
284 return ErrHeader
285 }
286 }
287 hdr.PAXRecords = paxHdrs
288 return nil
289 }
290
291
292
293 func parsePAX(r io.Reader) (map[string]string, error) {
294 buf, err := ioutil.ReadAll(r)
295 if err != nil {
296 return nil, err
297 }
298 sbuf := string(buf)
299
300
301
302
303 var sparseMap []string
304
305 paxHdrs := make(map[string]string)
306 for len(sbuf) > 0 {
307 key, value, residual, err := parsePAXRecord(sbuf)
308 if err != nil {
309 return nil, ErrHeader
310 }
311 sbuf = residual
312
313 switch key {
314 case paxGNUSparseOffset, paxGNUSparseNumBytes:
315
316 if (len(sparseMap)%2 == 0 && key != paxGNUSparseOffset) ||
317 (len(sparseMap)%2 == 1 && key != paxGNUSparseNumBytes) ||
318 strings.Contains(value, ",") {
319 return nil, ErrHeader
320 }
321 sparseMap = append(sparseMap, value)
322 default:
323 paxHdrs[key] = value
324 }
325 }
326 if len(sparseMap) > 0 {
327 paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",")
328 }
329 return paxHdrs, nil
330 }
331
332
333
334
335
336
337
338
339
340 func (tr *Reader) readHeader() (*Header, *block, error) {
341
342 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
343 return nil, nil, err
344 }
345 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
346 if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
347 return nil, nil, err
348 }
349 if bytes.Equal(tr.blk[:], zeroBlock[:]) {
350 return nil, nil, io.EOF
351 }
352 return nil, nil, ErrHeader
353 }
354
355
356 format := tr.blk.GetFormat()
357 if format == FormatUnknown {
358 return nil, nil, ErrHeader
359 }
360
361 var p parser
362 hdr := new(Header)
363
364
365 v7 := tr.blk.V7()
366 hdr.Typeflag = v7.TypeFlag()[0]
367 hdr.Name = p.parseString(v7.Name())
368 hdr.Linkname = p.parseString(v7.LinkName())
369 hdr.Size = p.parseNumeric(v7.Size())
370 hdr.Mode = p.parseNumeric(v7.Mode())
371 hdr.Uid = int(p.parseNumeric(v7.UID()))
372 hdr.Gid = int(p.parseNumeric(v7.GID()))
373 hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0)
374
375
376 if format > formatV7 {
377 ustar := tr.blk.USTAR()
378 hdr.Uname = p.parseString(ustar.UserName())
379 hdr.Gname = p.parseString(ustar.GroupName())
380 hdr.Devmajor = p.parseNumeric(ustar.DevMajor())
381 hdr.Devminor = p.parseNumeric(ustar.DevMinor())
382
383 var prefix string
384 switch {
385 case format.has(FormatUSTAR | FormatPAX):
386 hdr.Format = format
387 ustar := tr.blk.USTAR()
388 prefix = p.parseString(ustar.Prefix())
389
390
391
392 notASCII := func(r rune) bool { return r >= 0x80 }
393 if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 {
394 hdr.Format = FormatUnknown
395 }
396 nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 }
397 if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) &&
398 nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) {
399 hdr.Format = FormatUnknown
400 }
401 case format.has(formatSTAR):
402 star := tr.blk.STAR()
403 prefix = p.parseString(star.Prefix())
404 hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0)
405 hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0)
406 case format.has(FormatGNU):
407 hdr.Format = format
408 var p2 parser
409 gnu := tr.blk.GNU()
410 if b := gnu.AccessTime(); b[0] != 0 {
411 hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0)
412 }
413 if b := gnu.ChangeTime(); b[0] != 0 {
414 hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0)
415 }
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438 if p2.err != nil {
439 hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{}
440 ustar := tr.blk.USTAR()
441 if s := p.parseString(ustar.Prefix()); isASCII(s) {
442 prefix = s
443 }
444 hdr.Format = FormatUnknown
445 }
446 }
447 if len(prefix) > 0 {
448 hdr.Name = prefix + "/" + hdr.Name
449 }
450 }
451 return hdr, &tr.blk, p.err
452 }
453
454
455
456
457
458
459
460
461
462 func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) {
463
464
465
466 if blk.GetFormat() != FormatGNU {
467 return nil, ErrHeader
468 }
469 hdr.Format.mayOnlyBe(FormatGNU)
470
471 var p parser
472 hdr.Size = p.parseNumeric(blk.GNU().RealSize())
473 if p.err != nil {
474 return nil, p.err
475 }
476 s := blk.GNU().Sparse()
477 spd := make(sparseDatas, 0, s.MaxEntries())
478 for {
479 for i := 0; i < s.MaxEntries(); i++ {
480
481 if s.Entry(i).Offset()[0] == 0x00 {
482 break
483 }
484 offset := p.parseNumeric(s.Entry(i).Offset())
485 length := p.parseNumeric(s.Entry(i).Length())
486 if p.err != nil {
487 return nil, p.err
488 }
489 spd = append(spd, sparseEntry{Offset: offset, Length: length})
490 }
491
492 if s.IsExtended()[0] > 0 {
493
494 if _, err := mustReadFull(tr.r, blk[:]); err != nil {
495 return nil, err
496 }
497 s = blk.Sparse()
498 continue
499 }
500 return spd, nil
501 }
502 }
503
504
505
506
507
508
509
510
511
512
513
514 func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) {
515 var (
516 cntNewline int64
517 buf bytes.Buffer
518 blk block
519 )
520
521
522
523 feedTokens := func(n int64) error {
524 for cntNewline < n {
525 if _, err := mustReadFull(r, blk[:]); err != nil {
526 return err
527 }
528 buf.Write(blk[:])
529 for _, c := range blk {
530 if c == '\n' {
531 cntNewline++
532 }
533 }
534 }
535 return nil
536 }
537
538
539
540 nextToken := func() string {
541 cntNewline--
542 tok, _ := buf.ReadString('\n')
543 return strings.TrimRight(tok, "\n")
544 }
545
546
547
548 if err := feedTokens(1); err != nil {
549 return nil, err
550 }
551 numEntries, err := strconv.ParseInt(nextToken(), 10, 0)
552 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
553 return nil, ErrHeader
554 }
555
556
557
558
559 if err := feedTokens(2 * numEntries); err != nil {
560 return nil, err
561 }
562 spd := make(sparseDatas, 0, numEntries)
563 for i := int64(0); i < numEntries; i++ {
564 offset, err1 := strconv.ParseInt(nextToken(), 10, 64)
565 length, err2 := strconv.ParseInt(nextToken(), 10, 64)
566 if err1 != nil || err2 != nil {
567 return nil, ErrHeader
568 }
569 spd = append(spd, sparseEntry{Offset: offset, Length: length})
570 }
571 return spd, nil
572 }
573
574
575
576 func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) {
577
578
579 numEntriesStr := paxHdrs[paxGNUSparseNumBlocks]
580 numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0)
581 if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) {
582 return nil, ErrHeader
583 }
584
585
586 sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",")
587 if len(sparseMap) == 1 && sparseMap[0] == "" {
588 sparseMap = sparseMap[:0]
589 }
590 if int64(len(sparseMap)) != 2*numEntries {
591 return nil, ErrHeader
592 }
593
594
595
596 spd := make(sparseDatas, 0, numEntries)
597 for len(sparseMap) >= 2 {
598 offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64)
599 length, err2 := strconv.ParseInt(sparseMap[1], 10, 64)
600 if err1 != nil || err2 != nil {
601 return nil, ErrHeader
602 }
603 spd = append(spd, sparseEntry{Offset: offset, Length: length})
604 sparseMap = sparseMap[2:]
605 }
606 return spd, nil
607 }
608
609
610
611
612
613
614
615
616
617
618
619 func (tr *Reader) Read(b []byte) (int, error) {
620 if tr.err != nil {
621 return 0, tr.err
622 }
623 n, err := tr.curr.Read(b)
624 if err != nil && err != io.EOF {
625 tr.err = err
626 }
627 return n, err
628 }
629
630
631
632
633
634
635
636
637
638
639
640 func (tr *Reader) writeTo(w io.Writer) (int64, error) {
641 if tr.err != nil {
642 return 0, tr.err
643 }
644 n, err := tr.curr.WriteTo(w)
645 if err != nil {
646 tr.err = err
647 }
648 return n, err
649 }
650
651
652 type regFileReader struct {
653 r io.Reader
654 nb int64
655 }
656
657 func (fr *regFileReader) Read(b []byte) (n int, err error) {
658 if int64(len(b)) > fr.nb {
659 b = b[:fr.nb]
660 }
661 if len(b) > 0 {
662 n, err = fr.r.Read(b)
663 fr.nb -= int64(n)
664 }
665 switch {
666 case err == io.EOF && fr.nb > 0:
667 return n, io.ErrUnexpectedEOF
668 case err == nil && fr.nb == 0:
669 return n, io.EOF
670 default:
671 return n, err
672 }
673 }
674
675 func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) {
676 return io.Copy(w, struct{ io.Reader }{fr})
677 }
678
679 func (fr regFileReader) LogicalRemaining() int64 {
680 return fr.nb
681 }
682
683 func (fr regFileReader) PhysicalRemaining() int64 {
684 return fr.nb
685 }
686
687
688 type sparseFileReader struct {
689 fr fileReader
690 sp sparseHoles
691 pos int64
692 }
693
694 func (sr *sparseFileReader) Read(b []byte) (n int, err error) {
695 finished := int64(len(b)) >= sr.LogicalRemaining()
696 if finished {
697 b = b[:sr.LogicalRemaining()]
698 }
699
700 b0 := b
701 endPos := sr.pos + int64(len(b))
702 for endPos > sr.pos && err == nil {
703 var nf int
704 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
705 if sr.pos < holeStart {
706 bf := b[:min(int64(len(b)), holeStart-sr.pos)]
707 nf, err = tryReadFull(sr.fr, bf)
708 } else {
709 bf := b[:min(int64(len(b)), holeEnd-sr.pos)]
710 nf, err = tryReadFull(zeroReader{}, bf)
711 }
712 b = b[nf:]
713 sr.pos += int64(nf)
714 if sr.pos >= holeEnd && len(sr.sp) > 1 {
715 sr.sp = sr.sp[1:]
716 }
717 }
718
719 n = len(b0) - len(b)
720 switch {
721 case err == io.EOF:
722 return n, errMissData
723 case err != nil:
724 return n, err
725 case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
726 return n, errUnrefData
727 case finished:
728 return n, io.EOF
729 default:
730 return n, nil
731 }
732 }
733
734 func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) {
735 ws, ok := w.(io.WriteSeeker)
736 if ok {
737 if _, err := ws.Seek(0, io.SeekCurrent); err != nil {
738 ok = false
739 }
740 }
741 if !ok {
742 return io.Copy(w, struct{ io.Reader }{sr})
743 }
744
745 var writeLastByte bool
746 pos0 := sr.pos
747 for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil {
748 var nf int64
749 holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset()
750 if sr.pos < holeStart {
751 nf = holeStart - sr.pos
752 nf, err = io.CopyN(ws, sr.fr, nf)
753 } else {
754 nf = holeEnd - sr.pos
755 if sr.PhysicalRemaining() == 0 {
756 writeLastByte = true
757 nf--
758 }
759 _, err = ws.Seek(nf, io.SeekCurrent)
760 }
761 sr.pos += nf
762 if sr.pos >= holeEnd && len(sr.sp) > 1 {
763 sr.sp = sr.sp[1:]
764 }
765 }
766
767
768
769 if writeLastByte && err == nil {
770 _, err = ws.Write([]byte{0})
771 sr.pos++
772 }
773
774 n = sr.pos - pos0
775 switch {
776 case err == io.EOF:
777 return n, errMissData
778 case err != nil:
779 return n, err
780 case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0:
781 return n, errUnrefData
782 default:
783 return n, nil
784 }
785 }
786
787 func (sr sparseFileReader) LogicalRemaining() int64 {
788 return sr.sp[len(sr.sp)-1].endOffset() - sr.pos
789 }
790 func (sr sparseFileReader) PhysicalRemaining() int64 {
791 return sr.fr.PhysicalRemaining()
792 }
793
794 type zeroReader struct{}
795
796 func (zeroReader) Read(b []byte) (int, error) {
797 for i := range b {
798 b[i] = 0
799 }
800 return len(b), nil
801 }
802
803
804
805 func mustReadFull(r io.Reader, b []byte) (int, error) {
806 n, err := tryReadFull(r, b)
807 if err == io.EOF {
808 err = io.ErrUnexpectedEOF
809 }
810 return n, err
811 }
812
813
814
815 func tryReadFull(r io.Reader, b []byte) (n int, err error) {
816 for len(b) > n && err == nil {
817 var nn int
818 nn, err = r.Read(b[n:])
819 n += nn
820 }
821 if len(b) == n && err == io.EOF {
822 err = nil
823 }
824 return n, err
825 }
826
827
828 func discard(r io.Reader, n int64) error {
829
830
831
832
833 var seekSkipped int64
834 if sr, ok := r.(io.Seeker); ok && n > 1 {
835
836
837
838
839 pos1, err := sr.Seek(0, io.SeekCurrent)
840 if pos1 >= 0 && err == nil {
841
842 pos2, err := sr.Seek(n-1, io.SeekCurrent)
843 if pos2 < 0 || err != nil {
844 return err
845 }
846 seekSkipped = pos2 - pos1
847 }
848 }
849
850 copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped)
851 if err == io.EOF && seekSkipped+copySkipped < n {
852 err = io.ErrUnexpectedEOF
853 }
854 return err
855 }
856
View as plain text