1
2
3
4
5
6
7
8 package flate
9
10 import (
11 "bufio"
12 "io"
13 mathbits "math/bits"
14 "strconv"
15 "sync"
16 )
17
18 const (
19 maxCodeLen = 16
20
21
22
23 maxNumLit = 286
24 maxNumDist = 30
25 numCodes = 19
26 )
27
28
29 var fixedOnce sync.Once
30 var fixedHuffmanDecoder huffmanDecoder
31
32
33 type CorruptInputError int64
34
35 func (e CorruptInputError) Error() string {
36 return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
37 }
38
39
40 type InternalError string
41
42 func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
43
44
45
46
47 type ReadError struct {
48 Offset int64
49 Err error
50 }
51
52 func (e *ReadError) Error() string {
53 return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
54 }
55
56
57
58
59 type WriteError struct {
60 Offset int64
61 Err error
62 }
63
64 func (e *WriteError) Error() string {
65 return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
66 }
67
68
69
70
71 type Resetter interface {
72
73
74 Reset(r io.Reader, dict []byte) error
75 }
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97 const (
98 huffmanChunkBits = 9
99 huffmanNumChunks = 1 << huffmanChunkBits
100 huffmanCountMask = 15
101 huffmanValueShift = 4
102 )
103
104 type huffmanDecoder struct {
105 min int
106 chunks [huffmanNumChunks]uint32
107 links [][]uint32
108 linkMask uint32
109 }
110
111
112
113
114
115
116 func (h *huffmanDecoder) init(bits []int) bool {
117
118
119
120 const sanity = false
121
122 if h.min != 0 {
123 *h = huffmanDecoder{}
124 }
125
126
127
128 var count [maxCodeLen]int
129 var min, max int
130 for _, n := range bits {
131 if n == 0 {
132 continue
133 }
134 if min == 0 || n < min {
135 min = n
136 }
137 if n > max {
138 max = n
139 }
140 count[n]++
141 }
142
143
144
145
146
147
148
149
150 if max == 0 {
151 return true
152 }
153
154 code := 0
155 var nextcode [maxCodeLen]int
156 for i := min; i <= max; i++ {
157 code <<= 1
158 nextcode[i] = code
159 code += count[i]
160 }
161
162
163
164
165
166
167 if code != 1<<uint(max) && !(code == 1 && max == 1) {
168 return false
169 }
170
171 h.min = min
172 if max > huffmanChunkBits {
173 numLinks := 1 << (uint(max) - huffmanChunkBits)
174 h.linkMask = uint32(numLinks - 1)
175
176
177 link := nextcode[huffmanChunkBits+1] >> 1
178 h.links = make([][]uint32, huffmanNumChunks-link)
179 for j := uint(link); j < huffmanNumChunks; j++ {
180 reverse := int(mathbits.Reverse16(uint16(j)))
181 reverse >>= uint(16 - huffmanChunkBits)
182 off := j - uint(link)
183 if sanity && h.chunks[reverse] != 0 {
184 panic("impossible: overwriting existing chunk")
185 }
186 h.chunks[reverse] = uint32(off<<huffmanValueShift | (huffmanChunkBits + 1))
187 h.links[off] = make([]uint32, numLinks)
188 }
189 }
190
191 for i, n := range bits {
192 if n == 0 {
193 continue
194 }
195 code := nextcode[n]
196 nextcode[n]++
197 chunk := uint32(i<<huffmanValueShift | n)
198 reverse := int(mathbits.Reverse16(uint16(code)))
199 reverse >>= uint(16 - n)
200 if n <= huffmanChunkBits {
201 for off := reverse; off < len(h.chunks); off += 1 << uint(n) {
202
203
204
205
206
207 if sanity && h.chunks[off] != 0 {
208 panic("impossible: overwriting existing chunk")
209 }
210 h.chunks[off] = chunk
211 }
212 } else {
213 j := reverse & (huffmanNumChunks - 1)
214 if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 {
215
216
217 panic("impossible: not an indirect chunk")
218 }
219 value := h.chunks[j] >> huffmanValueShift
220 linktab := h.links[value]
221 reverse >>= huffmanChunkBits
222 for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) {
223 if sanity && linktab[off] != 0 {
224 panic("impossible: overwriting existing chunk")
225 }
226 linktab[off] = chunk
227 }
228 }
229 }
230
231 if sanity {
232
233
234
235 for i, chunk := range h.chunks {
236 if chunk == 0 {
237
238
239
240 if code == 1 && i%2 == 1 {
241 continue
242 }
243 panic("impossible: missing chunk")
244 }
245 }
246 for _, linktab := range h.links {
247 for _, chunk := range linktab {
248 if chunk == 0 {
249 panic("impossible: missing chunk")
250 }
251 }
252 }
253 }
254
255 return true
256 }
257
258
259
260
261 type Reader interface {
262 io.Reader
263 io.ByteReader
264 }
265
266
267 type decompressor struct {
268
269 r Reader
270 roffset int64
271
272
273 b uint32
274 nb uint
275
276
277 h1, h2 huffmanDecoder
278
279
280 bits *[maxNumLit + maxNumDist]int
281 codebits *[numCodes]int
282
283
284 dict dictDecoder
285
286
287 buf [4]byte
288
289
290
291 step func(*decompressor)
292 stepState int
293 final bool
294 err error
295 toRead []byte
296 hl, hd *huffmanDecoder
297 copyLen int
298 copyDist int
299 }
300
301 func (f *decompressor) nextBlock() {
302 for f.nb < 1+2 {
303 if f.err = f.moreBits(); f.err != nil {
304 return
305 }
306 }
307 f.final = f.b&1 == 1
308 f.b >>= 1
309 typ := f.b & 3
310 f.b >>= 2
311 f.nb -= 1 + 2
312 switch typ {
313 case 0:
314 f.dataBlock()
315 case 1:
316
317 f.hl = &fixedHuffmanDecoder
318 f.hd = nil
319 f.huffmanBlock()
320 case 2:
321
322 if f.err = f.readHuffman(); f.err != nil {
323 break
324 }
325 f.hl = &f.h1
326 f.hd = &f.h2
327 f.huffmanBlock()
328 default:
329
330 f.err = CorruptInputError(f.roffset)
331 }
332 }
333
334 func (f *decompressor) Read(b []byte) (int, error) {
335 for {
336 if len(f.toRead) > 0 {
337 n := copy(b, f.toRead)
338 f.toRead = f.toRead[n:]
339 if len(f.toRead) == 0 {
340 return n, f.err
341 }
342 return n, nil
343 }
344 if f.err != nil {
345 return 0, f.err
346 }
347 f.step(f)
348 if f.err != nil && len(f.toRead) == 0 {
349 f.toRead = f.dict.readFlush()
350 }
351 }
352 }
353
354 func (f *decompressor) Close() error {
355 if f.err == io.EOF {
356 return nil
357 }
358 return f.err
359 }
360
361
362
363
364 var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
365
366 func (f *decompressor) readHuffman() error {
367
368 for f.nb < 5+5+4 {
369 if err := f.moreBits(); err != nil {
370 return err
371 }
372 }
373 nlit := int(f.b&0x1F) + 257
374 if nlit > maxNumLit {
375 return CorruptInputError(f.roffset)
376 }
377 f.b >>= 5
378 ndist := int(f.b&0x1F) + 1
379 if ndist > maxNumDist {
380 return CorruptInputError(f.roffset)
381 }
382 f.b >>= 5
383 nclen := int(f.b&0xF) + 4
384
385 f.b >>= 4
386 f.nb -= 5 + 5 + 4
387
388
389 for i := 0; i < nclen; i++ {
390 for f.nb < 3 {
391 if err := f.moreBits(); err != nil {
392 return err
393 }
394 }
395 f.codebits[codeOrder[i]] = int(f.b & 0x7)
396 f.b >>= 3
397 f.nb -= 3
398 }
399 for i := nclen; i < len(codeOrder); i++ {
400 f.codebits[codeOrder[i]] = 0
401 }
402 if !f.h1.init(f.codebits[0:]) {
403 return CorruptInputError(f.roffset)
404 }
405
406
407
408 for i, n := 0, nlit+ndist; i < n; {
409 x, err := f.huffSym(&f.h1)
410 if err != nil {
411 return err
412 }
413 if x < 16 {
414
415 f.bits[i] = x
416 i++
417 continue
418 }
419
420 var rep int
421 var nb uint
422 var b int
423 switch x {
424 default:
425 return InternalError("unexpected length code")
426 case 16:
427 rep = 3
428 nb = 2
429 if i == 0 {
430 return CorruptInputError(f.roffset)
431 }
432 b = f.bits[i-1]
433 case 17:
434 rep = 3
435 nb = 3
436 b = 0
437 case 18:
438 rep = 11
439 nb = 7
440 b = 0
441 }
442 for f.nb < nb {
443 if err := f.moreBits(); err != nil {
444 return err
445 }
446 }
447 rep += int(f.b & uint32(1<<nb-1))
448 f.b >>= nb
449 f.nb -= nb
450 if i+rep > n {
451 return CorruptInputError(f.roffset)
452 }
453 for j := 0; j < rep; j++ {
454 f.bits[i] = b
455 i++
456 }
457 }
458
459 if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
460 return CorruptInputError(f.roffset)
461 }
462
463
464
465
466
467 if f.h1.min < f.bits[endBlockMarker] {
468 f.h1.min = f.bits[endBlockMarker]
469 }
470
471 return nil
472 }
473
474
475
476
477
478 func (f *decompressor) huffmanBlock() {
479 const (
480 stateInit = iota
481 stateDict
482 )
483
484 switch f.stepState {
485 case stateInit:
486 goto readLiteral
487 case stateDict:
488 goto copyHistory
489 }
490
491 readLiteral:
492
493 {
494 v, err := f.huffSym(f.hl)
495 if err != nil {
496 f.err = err
497 return
498 }
499 var n uint
500 var length int
501 switch {
502 case v < 256:
503 f.dict.writeByte(byte(v))
504 if f.dict.availWrite() == 0 {
505 f.toRead = f.dict.readFlush()
506 f.step = (*decompressor).huffmanBlock
507 f.stepState = stateInit
508 return
509 }
510 goto readLiteral
511 case v == 256:
512 f.finishBlock()
513 return
514
515 case v < 265:
516 length = v - (257 - 3)
517 n = 0
518 case v < 269:
519 length = v*2 - (265*2 - 11)
520 n = 1
521 case v < 273:
522 length = v*4 - (269*4 - 19)
523 n = 2
524 case v < 277:
525 length = v*8 - (273*8 - 35)
526 n = 3
527 case v < 281:
528 length = v*16 - (277*16 - 67)
529 n = 4
530 case v < 285:
531 length = v*32 - (281*32 - 131)
532 n = 5
533 case v < maxNumLit:
534 length = 258
535 n = 0
536 default:
537 f.err = CorruptInputError(f.roffset)
538 return
539 }
540 if n > 0 {
541 for f.nb < n {
542 if err = f.moreBits(); err != nil {
543 f.err = err
544 return
545 }
546 }
547 length += int(f.b & uint32(1<<n-1))
548 f.b >>= n
549 f.nb -= n
550 }
551
552 var dist int
553 if f.hd == nil {
554 for f.nb < 5 {
555 if err = f.moreBits(); err != nil {
556 f.err = err
557 return
558 }
559 }
560 dist = int(mathbits.Reverse8(uint8(f.b & 0x1F << 3)))
561 f.b >>= 5
562 f.nb -= 5
563 } else {
564 if dist, err = f.huffSym(f.hd); err != nil {
565 f.err = err
566 return
567 }
568 }
569
570 switch {
571 case dist < 4:
572 dist++
573 case dist < maxNumDist:
574 nb := uint(dist-2) >> 1
575
576 extra := (dist & 1) << nb
577 for f.nb < nb {
578 if err = f.moreBits(); err != nil {
579 f.err = err
580 return
581 }
582 }
583 extra |= int(f.b & uint32(1<<nb-1))
584 f.b >>= nb
585 f.nb -= nb
586 dist = 1<<(nb+1) + 1 + extra
587 default:
588 f.err = CorruptInputError(f.roffset)
589 return
590 }
591
592
593 if dist > f.dict.histSize() {
594 f.err = CorruptInputError(f.roffset)
595 return
596 }
597
598 f.copyLen, f.copyDist = length, dist
599 goto copyHistory
600 }
601
602 copyHistory:
603
604 {
605 cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen)
606 if cnt == 0 {
607 cnt = f.dict.writeCopy(f.copyDist, f.copyLen)
608 }
609 f.copyLen -= cnt
610
611 if f.dict.availWrite() == 0 || f.copyLen > 0 {
612 f.toRead = f.dict.readFlush()
613 f.step = (*decompressor).huffmanBlock
614 f.stepState = stateDict
615 return
616 }
617 goto readLiteral
618 }
619 }
620
621
622 func (f *decompressor) dataBlock() {
623
624
625 f.nb = 0
626 f.b = 0
627
628
629 nr, err := io.ReadFull(f.r, f.buf[0:4])
630 f.roffset += int64(nr)
631 if err != nil {
632 if err == io.EOF {
633 err = io.ErrUnexpectedEOF
634 }
635 f.err = err
636 return
637 }
638 n := int(f.buf[0]) | int(f.buf[1])<<8
639 nn := int(f.buf[2]) | int(f.buf[3])<<8
640 if uint16(nn) != uint16(^n) {
641 f.err = CorruptInputError(f.roffset)
642 return
643 }
644
645 if n == 0 {
646 f.toRead = f.dict.readFlush()
647 f.finishBlock()
648 return
649 }
650
651 f.copyLen = n
652 f.copyData()
653 }
654
655
656
657 func (f *decompressor) copyData() {
658 buf := f.dict.writeSlice()
659 if len(buf) > f.copyLen {
660 buf = buf[:f.copyLen]
661 }
662
663 cnt, err := io.ReadFull(f.r, buf)
664 f.roffset += int64(cnt)
665 f.copyLen -= cnt
666 f.dict.writeMark(cnt)
667 if err != nil {
668 if err == io.EOF {
669 err = io.ErrUnexpectedEOF
670 }
671 f.err = err
672 return
673 }
674
675 if f.dict.availWrite() == 0 || f.copyLen > 0 {
676 f.toRead = f.dict.readFlush()
677 f.step = (*decompressor).copyData
678 return
679 }
680 f.finishBlock()
681 }
682
683 func (f *decompressor) finishBlock() {
684 if f.final {
685 if f.dict.availRead() > 0 {
686 f.toRead = f.dict.readFlush()
687 }
688 f.err = io.EOF
689 }
690 f.step = (*decompressor).nextBlock
691 }
692
693 func (f *decompressor) moreBits() error {
694 c, err := f.r.ReadByte()
695 if err != nil {
696 if err == io.EOF {
697 err = io.ErrUnexpectedEOF
698 }
699 return err
700 }
701 f.roffset++
702 f.b |= uint32(c) << f.nb
703 f.nb += 8
704 return nil
705 }
706
707
708 func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
709
710
711
712
713 n := uint(h.min)
714 for {
715 for f.nb < n {
716 if err := f.moreBits(); err != nil {
717 return 0, err
718 }
719 }
720 chunk := h.chunks[f.b&(huffmanNumChunks-1)]
721 n = uint(chunk & huffmanCountMask)
722 if n > huffmanChunkBits {
723 chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
724 n = uint(chunk & huffmanCountMask)
725 }
726 if n <= f.nb {
727 if n == 0 {
728 f.err = CorruptInputError(f.roffset)
729 return 0, f.err
730 }
731 f.b >>= n
732 f.nb -= n
733 return int(chunk >> huffmanValueShift), nil
734 }
735 }
736 }
737
738 func makeReader(r io.Reader) Reader {
739 if rr, ok := r.(Reader); ok {
740 return rr
741 }
742 return bufio.NewReader(r)
743 }
744
745 func fixedHuffmanDecoderInit() {
746 fixedOnce.Do(func() {
747
748 var bits [288]int
749 for i := 0; i < 144; i++ {
750 bits[i] = 8
751 }
752 for i := 144; i < 256; i++ {
753 bits[i] = 9
754 }
755 for i := 256; i < 280; i++ {
756 bits[i] = 7
757 }
758 for i := 280; i < 288; i++ {
759 bits[i] = 8
760 }
761 fixedHuffmanDecoder.init(bits[:])
762 })
763 }
764
765 func (f *decompressor) Reset(r io.Reader, dict []byte) error {
766 *f = decompressor{
767 r: makeReader(r),
768 bits: f.bits,
769 codebits: f.codebits,
770 dict: f.dict,
771 step: (*decompressor).nextBlock,
772 }
773 f.dict.init(maxMatchOffset, dict)
774 return nil
775 }
776
777
778
779
780
781
782
783
784
785 func NewReader(r io.Reader) io.ReadCloser {
786 fixedHuffmanDecoderInit()
787
788 var f decompressor
789 f.r = makeReader(r)
790 f.bits = new([maxNumLit + maxNumDist]int)
791 f.codebits = new([numCodes]int)
792 f.step = (*decompressor).nextBlock
793 f.dict.init(maxMatchOffset, nil)
794 return &f
795 }
796
797
798
799
800
801
802
803
804 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
805 fixedHuffmanDecoderInit()
806
807 var f decompressor
808 f.r = makeReader(r)
809 f.bits = new([maxNumLit + maxNumDist]int)
810 f.codebits = new([numCodes]int)
811 f.step = (*decompressor).nextBlock
812 f.dict.init(maxMatchOffset, dict)
813 return &f
814 }
815
View as plain text