...
Run Format

Source file src/archive/zip/writer.go

Documentation: archive/zip

     1  // Copyright 2011 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package zip
     6  
     7  import (
     8  	"bufio"
     9  	"encoding/binary"
    10  	"errors"
    11  	"hash"
    12  	"hash/crc32"
    13  	"io"
    14  	"unicode/utf8"
    15  )
    16  
    17  var (
    18  	errLongName  = errors.New("zip: FileHeader.Name too long")
    19  	errLongExtra = errors.New("zip: FileHeader.Extra too long")
    20  )
    21  
    22  // Writer implements a zip file writer.
    23  type Writer struct {
    24  	cw          *countWriter
    25  	dir         []*header
    26  	last        *fileWriter
    27  	closed      bool
    28  	compressors map[uint16]Compressor
    29  	comment     string
    30  
    31  	// testHookCloseSizeOffset if non-nil is called with the size
    32  	// of offset of the central directory at Close.
    33  	testHookCloseSizeOffset func(size, offset uint64)
    34  }
    35  
    36  type header struct {
    37  	*FileHeader
    38  	offset uint64
    39  }
    40  
    41  // NewWriter returns a new Writer writing a zip file to w.
    42  func NewWriter(w io.Writer) *Writer {
    43  	return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}}
    44  }
    45  
    46  // SetOffset sets the offset of the beginning of the zip data within the
    47  // underlying writer. It should be used when the zip data is appended to an
    48  // existing file, such as a binary executable.
    49  // It must be called before any data is written.
    50  func (w *Writer) SetOffset(n int64) {
    51  	if w.cw.count != 0 {
    52  		panic("zip: SetOffset called after data was written")
    53  	}
    54  	w.cw.count = n
    55  }
    56  
    57  // Flush flushes any buffered data to the underlying writer.
    58  // Calling Flush is not normally necessary; calling Close is sufficient.
    59  func (w *Writer) Flush() error {
    60  	return w.cw.w.(*bufio.Writer).Flush()
    61  }
    62  
    63  // SetComment sets the end-of-central-directory comment field.
    64  // It can only be called before Close.
    65  func (w *Writer) SetComment(comment string) error {
    66  	if len(comment) > uint16max {
    67  		return errors.New("zip: Writer.Comment too long")
    68  	}
    69  	w.comment = comment
    70  	return nil
    71  }
    72  
    73  // Close finishes writing the zip file by writing the central directory.
    74  // It does not (and cannot) close the underlying writer.
    75  func (w *Writer) Close() error {
    76  	if w.last != nil && !w.last.closed {
    77  		if err := w.last.close(); err != nil {
    78  			return err
    79  		}
    80  		w.last = nil
    81  	}
    82  	if w.closed {
    83  		return errors.New("zip: writer closed twice")
    84  	}
    85  	w.closed = true
    86  
    87  	// write central directory
    88  	start := w.cw.count
    89  	for _, h := range w.dir {
    90  		var buf [directoryHeaderLen]byte
    91  		b := writeBuf(buf[:])
    92  		b.uint32(uint32(directoryHeaderSignature))
    93  		b.uint16(h.CreatorVersion)
    94  		b.uint16(h.ReaderVersion)
    95  		b.uint16(h.Flags)
    96  		b.uint16(h.Method)
    97  		b.uint16(h.ModifiedTime)
    98  		b.uint16(h.ModifiedDate)
    99  		b.uint32(h.CRC32)
   100  		if h.isZip64() || h.offset >= uint32max {
   101  			// the file needs a zip64 header. store maxint in both
   102  			// 32 bit size fields (and offset later) to signal that the
   103  			// zip64 extra header should be used.
   104  			b.uint32(uint32max) // compressed size
   105  			b.uint32(uint32max) // uncompressed size
   106  
   107  			// append a zip64 extra block to Extra
   108  			var buf [28]byte // 2x uint16 + 3x uint64
   109  			eb := writeBuf(buf[:])
   110  			eb.uint16(zip64ExtraID)
   111  			eb.uint16(24) // size = 3x uint64
   112  			eb.uint64(h.UncompressedSize64)
   113  			eb.uint64(h.CompressedSize64)
   114  			eb.uint64(h.offset)
   115  			h.Extra = append(h.Extra, buf[:]...)
   116  		} else {
   117  			b.uint32(h.CompressedSize)
   118  			b.uint32(h.UncompressedSize)
   119  		}
   120  
   121  		b.uint16(uint16(len(h.Name)))
   122  		b.uint16(uint16(len(h.Extra)))
   123  		b.uint16(uint16(len(h.Comment)))
   124  		b = b[4:] // skip disk number start and internal file attr (2x uint16)
   125  		b.uint32(h.ExternalAttrs)
   126  		if h.offset > uint32max {
   127  			b.uint32(uint32max)
   128  		} else {
   129  			b.uint32(uint32(h.offset))
   130  		}
   131  		if _, err := w.cw.Write(buf[:]); err != nil {
   132  			return err
   133  		}
   134  		if _, err := io.WriteString(w.cw, h.Name); err != nil {
   135  			return err
   136  		}
   137  		if _, err := w.cw.Write(h.Extra); err != nil {
   138  			return err
   139  		}
   140  		if _, err := io.WriteString(w.cw, h.Comment); err != nil {
   141  			return err
   142  		}
   143  	}
   144  	end := w.cw.count
   145  
   146  	records := uint64(len(w.dir))
   147  	size := uint64(end - start)
   148  	offset := uint64(start)
   149  
   150  	if f := w.testHookCloseSizeOffset; f != nil {
   151  		f(size, offset)
   152  	}
   153  
   154  	if records >= uint16max || size >= uint32max || offset >= uint32max {
   155  		var buf [directory64EndLen + directory64LocLen]byte
   156  		b := writeBuf(buf[:])
   157  
   158  		// zip64 end of central directory record
   159  		b.uint32(directory64EndSignature)
   160  		b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64)
   161  		b.uint16(zipVersion45)           // version made by
   162  		b.uint16(zipVersion45)           // version needed to extract
   163  		b.uint32(0)                      // number of this disk
   164  		b.uint32(0)                      // number of the disk with the start of the central directory
   165  		b.uint64(records)                // total number of entries in the central directory on this disk
   166  		b.uint64(records)                // total number of entries in the central directory
   167  		b.uint64(size)                   // size of the central directory
   168  		b.uint64(offset)                 // offset of start of central directory with respect to the starting disk number
   169  
   170  		// zip64 end of central directory locator
   171  		b.uint32(directory64LocSignature)
   172  		b.uint32(0)           // number of the disk with the start of the zip64 end of central directory
   173  		b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record
   174  		b.uint32(1)           // total number of disks
   175  
   176  		if _, err := w.cw.Write(buf[:]); err != nil {
   177  			return err
   178  		}
   179  
   180  		// store max values in the regular end record to signal that
   181  		// that the zip64 values should be used instead
   182  		records = uint16max
   183  		size = uint32max
   184  		offset = uint32max
   185  	}
   186  
   187  	// write end record
   188  	var buf [directoryEndLen]byte
   189  	b := writeBuf(buf[:])
   190  	b.uint32(uint32(directoryEndSignature))
   191  	b = b[4:]                        // skip over disk number and first disk number (2x uint16)
   192  	b.uint16(uint16(records))        // number of entries this disk
   193  	b.uint16(uint16(records))        // number of entries total
   194  	b.uint32(uint32(size))           // size of directory
   195  	b.uint32(uint32(offset))         // start of directory
   196  	b.uint16(uint16(len(w.comment))) // byte size of EOCD comment
   197  	if _, err := w.cw.Write(buf[:]); err != nil {
   198  		return err
   199  	}
   200  	if _, err := io.WriteString(w.cw, w.comment); err != nil {
   201  		return err
   202  	}
   203  
   204  	return w.cw.w.(*bufio.Writer).Flush()
   205  }
   206  
   207  // Create adds a file to the zip file using the provided name.
   208  // It returns a Writer to which the file contents should be written.
   209  // The file contents will be compressed using the Deflate method.
   210  // The name must be a relative path: it must not start with a drive
   211  // letter (e.g. C:) or leading slash, and only forward slashes are
   212  // allowed.
   213  // The file's contents must be written to the io.Writer before the next
   214  // call to Create, CreateHeader, or Close.
   215  func (w *Writer) Create(name string) (io.Writer, error) {
   216  	header := &FileHeader{
   217  		Name:   name,
   218  		Method: Deflate,
   219  	}
   220  	return w.CreateHeader(header)
   221  }
   222  
   223  // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string
   224  // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
   225  // or any other common encoding).
   226  func detectUTF8(s string) (valid, require bool) {
   227  	for i := 0; i < len(s); {
   228  		r, size := utf8.DecodeRuneInString(s[i:])
   229  		i += size
   230  		// Officially, ZIP uses CP-437, but many readers use the system's
   231  		// local character encoding. Most encoding are compatible with a large
   232  		// subset of CP-437, which itself is ASCII-like.
   233  		//
   234  		// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
   235  		// characters with localized currency and overline characters.
   236  		if r < 0x20 || r > 0x7d || r == 0x5c {
   237  			if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
   238  				return false, false
   239  			}
   240  			require = true
   241  		}
   242  	}
   243  	return true, require
   244  }
   245  
   246  // CreateHeader adds a file to the zip archive using the provided FileHeader
   247  // for the file metadata. Writer takes ownership of fh and may mutate
   248  // its fields. The caller must not modify fh after calling CreateHeader.
   249  //
   250  // This returns a Writer to which the file contents should be written.
   251  // The file's contents must be written to the io.Writer before the next
   252  // call to Create, CreateHeader, or Close.
   253  func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
   254  	if w.last != nil && !w.last.closed {
   255  		if err := w.last.close(); err != nil {
   256  			return nil, err
   257  		}
   258  	}
   259  	if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh {
   260  		// See https://golang.org/issue/11144 confusion.
   261  		return nil, errors.New("archive/zip: invalid duplicate FileHeader")
   262  	}
   263  
   264  	fh.Flags |= 0x8 // we will write a data descriptor
   265  
   266  	// The ZIP format has a sad state of affairs regarding character encoding.
   267  	// Officially, the name and comment fields are supposed to be encoded
   268  	// in CP-437 (which is mostly compatible with ASCII), unless the UTF-8
   269  	// flag bit is set. However, there are several problems:
   270  	//
   271  	//	* Many ZIP readers still do not support UTF-8.
   272  	//	* If the UTF-8 flag is cleared, several readers simply interpret the
   273  	//	name and comment fields as whatever the local system encoding is.
   274  	//
   275  	// In order to avoid breaking readers without UTF-8 support,
   276  	// we avoid setting the UTF-8 flag if the strings are CP-437 compatible.
   277  	// However, if the strings require multibyte UTF-8 encoding and is a
   278  	// valid UTF-8 string, then we set the UTF-8 bit.
   279  	//
   280  	// For the case, where the user explicitly wants to specify the encoding
   281  	// as UTF-8, they will need to set the flag bit themselves.
   282  	utf8Valid1, utf8Require1 := detectUTF8(fh.Name)
   283  	utf8Valid2, utf8Require2 := detectUTF8(fh.Comment)
   284  	switch {
   285  	case fh.NonUTF8:
   286  		fh.Flags &^= 0x800
   287  	case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2):
   288  		fh.Flags |= 0x800
   289  	}
   290  
   291  	fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
   292  	fh.ReaderVersion = zipVersion20
   293  
   294  	// If Modified is set, this takes precedence over MS-DOS timestamp fields.
   295  	if !fh.Modified.IsZero() {
   296  		// Contrary to the FileHeader.SetModTime method, we intentionally
   297  		// do not convert to UTC, because we assume the user intends to encode
   298  		// the date using the specified timezone. A user may want this control
   299  		// because many legacy ZIP readers interpret the timestamp according
   300  		// to the local timezone.
   301  		//
   302  		// The timezone is only non-UTC if a user directly sets the Modified
   303  		// field directly themselves. All other approaches sets UTC.
   304  		fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified)
   305  
   306  		// Use "extended timestamp" format since this is what Info-ZIP uses.
   307  		// Nearly every major ZIP implementation uses a different format,
   308  		// but at least most seem to be able to understand the other formats.
   309  		//
   310  		// This format happens to be identical for both local and central header
   311  		// if modification time is the only timestamp being encoded.
   312  		var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32)
   313  		mt := uint32(fh.Modified.Unix())
   314  		eb := writeBuf(mbuf[:])
   315  		eb.uint16(extTimeExtraID)
   316  		eb.uint16(5)  // Size: SizeOf(uint8) + SizeOf(uint32)
   317  		eb.uint8(1)   // Flags: ModTime
   318  		eb.uint32(mt) // ModTime
   319  		fh.Extra = append(fh.Extra, mbuf[:]...)
   320  	}
   321  
   322  	fw := &fileWriter{
   323  		zipw:      w.cw,
   324  		compCount: &countWriter{w: w.cw},
   325  		crc32:     crc32.NewIEEE(),
   326  	}
   327  	comp := w.compressor(fh.Method)
   328  	if comp == nil {
   329  		return nil, ErrAlgorithm
   330  	}
   331  	var err error
   332  	fw.comp, err = comp(fw.compCount)
   333  	if err != nil {
   334  		return nil, err
   335  	}
   336  	fw.rawCount = &countWriter{w: fw.comp}
   337  
   338  	h := &header{
   339  		FileHeader: fh,
   340  		offset:     uint64(w.cw.count),
   341  	}
   342  	w.dir = append(w.dir, h)
   343  	fw.header = h
   344  
   345  	if err := writeHeader(w.cw, fh); err != nil {
   346  		return nil, err
   347  	}
   348  
   349  	w.last = fw
   350  	return fw, nil
   351  }
   352  
   353  func writeHeader(w io.Writer, h *FileHeader) error {
   354  	const maxUint16 = 1<<16 - 1
   355  	if len(h.Name) > maxUint16 {
   356  		return errLongName
   357  	}
   358  	if len(h.Extra) > maxUint16 {
   359  		return errLongExtra
   360  	}
   361  
   362  	var buf [fileHeaderLen]byte
   363  	b := writeBuf(buf[:])
   364  	b.uint32(uint32(fileHeaderSignature))
   365  	b.uint16(h.ReaderVersion)
   366  	b.uint16(h.Flags)
   367  	b.uint16(h.Method)
   368  	b.uint16(h.ModifiedTime)
   369  	b.uint16(h.ModifiedDate)
   370  	b.uint32(0) // since we are writing a data descriptor crc32,
   371  	b.uint32(0) // compressed size,
   372  	b.uint32(0) // and uncompressed size should be zero
   373  	b.uint16(uint16(len(h.Name)))
   374  	b.uint16(uint16(len(h.Extra)))
   375  	if _, err := w.Write(buf[:]); err != nil {
   376  		return err
   377  	}
   378  	if _, err := io.WriteString(w, h.Name); err != nil {
   379  		return err
   380  	}
   381  	_, err := w.Write(h.Extra)
   382  	return err
   383  }
   384  
   385  // RegisterCompressor registers or overrides a custom compressor for a specific
   386  // method ID. If a compressor for a given method is not found, Writer will
   387  // default to looking up the compressor at the package level.
   388  func (w *Writer) RegisterCompressor(method uint16, comp Compressor) {
   389  	if w.compressors == nil {
   390  		w.compressors = make(map[uint16]Compressor)
   391  	}
   392  	w.compressors[method] = comp
   393  }
   394  
   395  func (w *Writer) compressor(method uint16) Compressor {
   396  	comp := w.compressors[method]
   397  	if comp == nil {
   398  		comp = compressor(method)
   399  	}
   400  	return comp
   401  }
   402  
   403  type fileWriter struct {
   404  	*header
   405  	zipw      io.Writer
   406  	rawCount  *countWriter
   407  	comp      io.WriteCloser
   408  	compCount *countWriter
   409  	crc32     hash.Hash32
   410  	closed    bool
   411  }
   412  
   413  func (w *fileWriter) Write(p []byte) (int, error) {
   414  	if w.closed {
   415  		return 0, errors.New("zip: write to closed file")
   416  	}
   417  	w.crc32.Write(p)
   418  	return w.rawCount.Write(p)
   419  }
   420  
   421  func (w *fileWriter) close() error {
   422  	if w.closed {
   423  		return errors.New("zip: file closed twice")
   424  	}
   425  	w.closed = true
   426  	if err := w.comp.Close(); err != nil {
   427  		return err
   428  	}
   429  
   430  	// update FileHeader
   431  	fh := w.header.FileHeader
   432  	fh.CRC32 = w.crc32.Sum32()
   433  	fh.CompressedSize64 = uint64(w.compCount.count)
   434  	fh.UncompressedSize64 = uint64(w.rawCount.count)
   435  
   436  	if fh.isZip64() {
   437  		fh.CompressedSize = uint32max
   438  		fh.UncompressedSize = uint32max
   439  		fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions
   440  	} else {
   441  		fh.CompressedSize = uint32(fh.CompressedSize64)
   442  		fh.UncompressedSize = uint32(fh.UncompressedSize64)
   443  	}
   444  
   445  	// Write data descriptor. This is more complicated than one would
   446  	// think, see e.g. comments in zipfile.c:putextended() and
   447  	// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588.
   448  	// The approach here is to write 8 byte sizes if needed without
   449  	// adding a zip64 extra in the local header (too late anyway).
   450  	var buf []byte
   451  	if fh.isZip64() {
   452  		buf = make([]byte, dataDescriptor64Len)
   453  	} else {
   454  		buf = make([]byte, dataDescriptorLen)
   455  	}
   456  	b := writeBuf(buf)
   457  	b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X
   458  	b.uint32(fh.CRC32)
   459  	if fh.isZip64() {
   460  		b.uint64(fh.CompressedSize64)
   461  		b.uint64(fh.UncompressedSize64)
   462  	} else {
   463  		b.uint32(fh.CompressedSize)
   464  		b.uint32(fh.UncompressedSize)
   465  	}
   466  	_, err := w.zipw.Write(buf)
   467  	return err
   468  }
   469  
   470  type countWriter struct {
   471  	w     io.Writer
   472  	count int64
   473  }
   474  
   475  func (w *countWriter) Write(p []byte) (int, error) {
   476  	n, err := w.w.Write(p)
   477  	w.count += int64(n)
   478  	return n, err
   479  }
   480  
   481  type nopCloser struct {
   482  	io.Writer
   483  }
   484  
   485  func (w nopCloser) Close() error {
   486  	return nil
   487  }
   488  
   489  type writeBuf []byte
   490  
   491  func (b *writeBuf) uint8(v uint8) {
   492  	(*b)[0] = v
   493  	*b = (*b)[1:]
   494  }
   495  
   496  func (b *writeBuf) uint16(v uint16) {
   497  	binary.LittleEndian.PutUint16(*b, v)
   498  	*b = (*b)[2:]
   499  }
   500  
   501  func (b *writeBuf) uint32(v uint32) {
   502  	binary.LittleEndian.PutUint32(*b, v)
   503  	*b = (*b)[4:]
   504  }
   505  
   506  func (b *writeBuf) uint64(v uint64) {
   507  	binary.LittleEndian.PutUint64(*b, v)
   508  	*b = (*b)[8:]
   509  }
   510  

View as plain text