...
Run Format

Source file src/encoding/json/stream.go

Documentation: encoding/json

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package json
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"io"
    11  )
    12  
    13  // A Decoder reads and decodes JSON values from an input stream.
    14  type Decoder struct {
    15  	r       io.Reader
    16  	buf     []byte
    17  	d       decodeState
    18  	scanp   int   // start of unread data in buf
    19  	scanned int64 // amount of data already scanned
    20  	scan    scanner
    21  	err     error
    22  
    23  	tokenState int
    24  	tokenStack []int
    25  }
    26  
    27  // NewDecoder returns a new decoder that reads from r.
    28  //
    29  // The decoder introduces its own buffering and may
    30  // read data from r beyond the JSON values requested.
    31  func NewDecoder(r io.Reader) *Decoder {
    32  	return &Decoder{r: r}
    33  }
    34  
    35  // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
    36  // Number instead of as a float64.
    37  func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
    38  
    39  // DisallowUnknownFields causes the Decoder to return an error when the destination
    40  // is a struct and the input contains object keys which do not match any
    41  // non-ignored, exported fields in the destination.
    42  func (dec *Decoder) DisallowUnknownFields() { dec.d.disallowUnknownFields = true }
    43  
    44  // Decode reads the next JSON-encoded value from its
    45  // input and stores it in the value pointed to by v.
    46  //
    47  // See the documentation for Unmarshal for details about
    48  // the conversion of JSON into a Go value.
    49  func (dec *Decoder) Decode(v interface{}) error {
    50  	if dec.err != nil {
    51  		return dec.err
    52  	}
    53  
    54  	if err := dec.tokenPrepareForDecode(); err != nil {
    55  		return err
    56  	}
    57  
    58  	if !dec.tokenValueAllowed() {
    59  		return &SyntaxError{msg: "not at beginning of value", Offset: dec.offset()}
    60  	}
    61  
    62  	// Read whole value into buffer.
    63  	n, err := dec.readValue()
    64  	if err != nil {
    65  		return err
    66  	}
    67  	dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
    68  	dec.scanp += n
    69  
    70  	// Don't save err from unmarshal into dec.err:
    71  	// the connection is still usable since we read a complete JSON
    72  	// object from it before the error happened.
    73  	err = dec.d.unmarshal(v)
    74  
    75  	// fixup token streaming state
    76  	dec.tokenValueEnd()
    77  
    78  	return err
    79  }
    80  
    81  // Buffered returns a reader of the data remaining in the Decoder's
    82  // buffer. The reader is valid until the next call to Decode.
    83  func (dec *Decoder) Buffered() io.Reader {
    84  	return bytes.NewReader(dec.buf[dec.scanp:])
    85  }
    86  
    87  // readValue reads a JSON value into dec.buf.
    88  // It returns the length of the encoding.
    89  func (dec *Decoder) readValue() (int, error) {
    90  	dec.scan.reset()
    91  
    92  	scanp := dec.scanp
    93  	var err error
    94  Input:
    95  	for {
    96  		// Look in the buffer for a new value.
    97  		for i, c := range dec.buf[scanp:] {
    98  			dec.scan.bytes++
    99  			v := dec.scan.step(&dec.scan, c)
   100  			if v == scanEnd {
   101  				scanp += i
   102  				break Input
   103  			}
   104  			// scanEnd is delayed one byte.
   105  			// We might block trying to get that byte from src,
   106  			// so instead invent a space byte.
   107  			if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
   108  				scanp += i + 1
   109  				break Input
   110  			}
   111  			if v == scanError {
   112  				dec.err = dec.scan.err
   113  				return 0, dec.scan.err
   114  			}
   115  		}
   116  		scanp = len(dec.buf)
   117  
   118  		// Did the last read have an error?
   119  		// Delayed until now to allow buffer scan.
   120  		if err != nil {
   121  			if err == io.EOF {
   122  				if dec.scan.step(&dec.scan, ' ') == scanEnd {
   123  					break Input
   124  				}
   125  				if nonSpace(dec.buf) {
   126  					err = io.ErrUnexpectedEOF
   127  				}
   128  			}
   129  			dec.err = err
   130  			return 0, err
   131  		}
   132  
   133  		n := scanp - dec.scanp
   134  		err = dec.refill()
   135  		scanp = dec.scanp + n
   136  	}
   137  	return scanp - dec.scanp, nil
   138  }
   139  
   140  func (dec *Decoder) refill() error {
   141  	// Make room to read more into the buffer.
   142  	// First slide down data already consumed.
   143  	if dec.scanp > 0 {
   144  		dec.scanned += int64(dec.scanp)
   145  		n := copy(dec.buf, dec.buf[dec.scanp:])
   146  		dec.buf = dec.buf[:n]
   147  		dec.scanp = 0
   148  	}
   149  
   150  	// Grow buffer if not large enough.
   151  	const minRead = 512
   152  	if cap(dec.buf)-len(dec.buf) < minRead {
   153  		newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
   154  		copy(newBuf, dec.buf)
   155  		dec.buf = newBuf
   156  	}
   157  
   158  	// Read. Delay error for next iteration (after scan).
   159  	n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
   160  	dec.buf = dec.buf[0 : len(dec.buf)+n]
   161  
   162  	return err
   163  }
   164  
   165  func nonSpace(b []byte) bool {
   166  	for _, c := range b {
   167  		if !isSpace(c) {
   168  			return true
   169  		}
   170  	}
   171  	return false
   172  }
   173  
   174  // An Encoder writes JSON values to an output stream.
   175  type Encoder struct {
   176  	w          io.Writer
   177  	err        error
   178  	escapeHTML bool
   179  
   180  	indentBuf    *bytes.Buffer
   181  	indentPrefix string
   182  	indentValue  string
   183  }
   184  
   185  // NewEncoder returns a new encoder that writes to w.
   186  func NewEncoder(w io.Writer) *Encoder {
   187  	return &Encoder{w: w, escapeHTML: true}
   188  }
   189  
   190  // Encode writes the JSON encoding of v to the stream,
   191  // followed by a newline character.
   192  //
   193  // See the documentation for Marshal for details about the
   194  // conversion of Go values to JSON.
   195  func (enc *Encoder) Encode(v interface{}) error {
   196  	if enc.err != nil {
   197  		return enc.err
   198  	}
   199  	e := newEncodeState()
   200  	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
   201  	if err != nil {
   202  		return err
   203  	}
   204  
   205  	// Terminate each value with a newline.
   206  	// This makes the output look a little nicer
   207  	// when debugging, and some kind of space
   208  	// is required if the encoded value was a number,
   209  	// so that the reader knows there aren't more
   210  	// digits coming.
   211  	e.WriteByte('\n')
   212  
   213  	b := e.Bytes()
   214  	if enc.indentPrefix != "" || enc.indentValue != "" {
   215  		if enc.indentBuf == nil {
   216  			enc.indentBuf = new(bytes.Buffer)
   217  		}
   218  		enc.indentBuf.Reset()
   219  		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
   220  		if err != nil {
   221  			return err
   222  		}
   223  		b = enc.indentBuf.Bytes()
   224  	}
   225  	if _, err = enc.w.Write(b); err != nil {
   226  		enc.err = err
   227  	}
   228  	encodeStatePool.Put(e)
   229  	return err
   230  }
   231  
   232  // SetIndent instructs the encoder to format each subsequent encoded
   233  // value as if indented by the package-level function Indent(dst, src, prefix, indent).
   234  // Calling SetIndent("", "") disables indentation.
   235  func (enc *Encoder) SetIndent(prefix, indent string) {
   236  	enc.indentPrefix = prefix
   237  	enc.indentValue = indent
   238  }
   239  
   240  // SetEscapeHTML specifies whether problematic HTML characters
   241  // should be escaped inside JSON quoted strings.
   242  // The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
   243  // to avoid certain safety problems that can arise when embedding JSON in HTML.
   244  //
   245  // In non-HTML settings where the escaping interferes with the readability
   246  // of the output, SetEscapeHTML(false) disables this behavior.
   247  func (enc *Encoder) SetEscapeHTML(on bool) {
   248  	enc.escapeHTML = on
   249  }
   250  
   251  // RawMessage is a raw encoded JSON value.
   252  // It implements Marshaler and Unmarshaler and can
   253  // be used to delay JSON decoding or precompute a JSON encoding.
   254  type RawMessage []byte
   255  
   256  // MarshalJSON returns m as the JSON encoding of m.
   257  func (m RawMessage) MarshalJSON() ([]byte, error) {
   258  	if m == nil {
   259  		return []byte("null"), nil
   260  	}
   261  	return m, nil
   262  }
   263  
   264  // UnmarshalJSON sets *m to a copy of data.
   265  func (m *RawMessage) UnmarshalJSON(data []byte) error {
   266  	if m == nil {
   267  		return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
   268  	}
   269  	*m = append((*m)[0:0], data...)
   270  	return nil
   271  }
   272  
   273  var _ Marshaler = (*RawMessage)(nil)
   274  var _ Unmarshaler = (*RawMessage)(nil)
   275  
   276  // A Token holds a value of one of these types:
   277  //
   278  //	Delim, for the four JSON delimiters [ ] { }
   279  //	bool, for JSON booleans
   280  //	float64, for JSON numbers
   281  //	Number, for JSON numbers
   282  //	string, for JSON string literals
   283  //	nil, for JSON null
   284  //
   285  type Token interface{}
   286  
   287  const (
   288  	tokenTopValue = iota
   289  	tokenArrayStart
   290  	tokenArrayValue
   291  	tokenArrayComma
   292  	tokenObjectStart
   293  	tokenObjectKey
   294  	tokenObjectColon
   295  	tokenObjectValue
   296  	tokenObjectComma
   297  )
   298  
   299  // advance tokenstate from a separator state to a value state
   300  func (dec *Decoder) tokenPrepareForDecode() error {
   301  	// Note: Not calling peek before switch, to avoid
   302  	// putting peek into the standard Decode path.
   303  	// peek is only called when using the Token API.
   304  	switch dec.tokenState {
   305  	case tokenArrayComma:
   306  		c, err := dec.peek()
   307  		if err != nil {
   308  			return err
   309  		}
   310  		if c != ',' {
   311  			return &SyntaxError{"expected comma after array element", dec.offset()}
   312  		}
   313  		dec.scanp++
   314  		dec.tokenState = tokenArrayValue
   315  	case tokenObjectColon:
   316  		c, err := dec.peek()
   317  		if err != nil {
   318  			return err
   319  		}
   320  		if c != ':' {
   321  			return &SyntaxError{"expected colon after object key", dec.offset()}
   322  		}
   323  		dec.scanp++
   324  		dec.tokenState = tokenObjectValue
   325  	}
   326  	return nil
   327  }
   328  
   329  func (dec *Decoder) tokenValueAllowed() bool {
   330  	switch dec.tokenState {
   331  	case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
   332  		return true
   333  	}
   334  	return false
   335  }
   336  
   337  func (dec *Decoder) tokenValueEnd() {
   338  	switch dec.tokenState {
   339  	case tokenArrayStart, tokenArrayValue:
   340  		dec.tokenState = tokenArrayComma
   341  	case tokenObjectValue:
   342  		dec.tokenState = tokenObjectComma
   343  	}
   344  }
   345  
   346  // A Delim is a JSON array or object delimiter, one of [ ] { or }.
   347  type Delim rune
   348  
   349  func (d Delim) String() string {
   350  	return string(d)
   351  }
   352  
   353  // Token returns the next JSON token in the input stream.
   354  // At the end of the input stream, Token returns nil, io.EOF.
   355  //
   356  // Token guarantees that the delimiters [ ] { } it returns are
   357  // properly nested and matched: if Token encounters an unexpected
   358  // delimiter in the input, it will return an error.
   359  //
   360  // The input stream consists of basic JSON values—bool, string,
   361  // number, and null—along with delimiters [ ] { } of type Delim
   362  // to mark the start and end of arrays and objects.
   363  // Commas and colons are elided.
   364  func (dec *Decoder) Token() (Token, error) {
   365  	for {
   366  		c, err := dec.peek()
   367  		if err != nil {
   368  			return nil, err
   369  		}
   370  		switch c {
   371  		case '[':
   372  			if !dec.tokenValueAllowed() {
   373  				return dec.tokenError(c)
   374  			}
   375  			dec.scanp++
   376  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   377  			dec.tokenState = tokenArrayStart
   378  			return Delim('['), nil
   379  
   380  		case ']':
   381  			if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
   382  				return dec.tokenError(c)
   383  			}
   384  			dec.scanp++
   385  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   386  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   387  			dec.tokenValueEnd()
   388  			return Delim(']'), nil
   389  
   390  		case '{':
   391  			if !dec.tokenValueAllowed() {
   392  				return dec.tokenError(c)
   393  			}
   394  			dec.scanp++
   395  			dec.tokenStack = append(dec.tokenStack, dec.tokenState)
   396  			dec.tokenState = tokenObjectStart
   397  			return Delim('{'), nil
   398  
   399  		case '}':
   400  			if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
   401  				return dec.tokenError(c)
   402  			}
   403  			dec.scanp++
   404  			dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
   405  			dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
   406  			dec.tokenValueEnd()
   407  			return Delim('}'), nil
   408  
   409  		case ':':
   410  			if dec.tokenState != tokenObjectColon {
   411  				return dec.tokenError(c)
   412  			}
   413  			dec.scanp++
   414  			dec.tokenState = tokenObjectValue
   415  			continue
   416  
   417  		case ',':
   418  			if dec.tokenState == tokenArrayComma {
   419  				dec.scanp++
   420  				dec.tokenState = tokenArrayValue
   421  				continue
   422  			}
   423  			if dec.tokenState == tokenObjectComma {
   424  				dec.scanp++
   425  				dec.tokenState = tokenObjectKey
   426  				continue
   427  			}
   428  			return dec.tokenError(c)
   429  
   430  		case '"':
   431  			if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
   432  				var x string
   433  				old := dec.tokenState
   434  				dec.tokenState = tokenTopValue
   435  				err := dec.Decode(&x)
   436  				dec.tokenState = old
   437  				if err != nil {
   438  					return nil, err
   439  				}
   440  				dec.tokenState = tokenObjectColon
   441  				return x, nil
   442  			}
   443  			fallthrough
   444  
   445  		default:
   446  			if !dec.tokenValueAllowed() {
   447  				return dec.tokenError(c)
   448  			}
   449  			var x interface{}
   450  			if err := dec.Decode(&x); err != nil {
   451  				return nil, err
   452  			}
   453  			return x, nil
   454  		}
   455  	}
   456  }
   457  
   458  func (dec *Decoder) tokenError(c byte) (Token, error) {
   459  	var context string
   460  	switch dec.tokenState {
   461  	case tokenTopValue:
   462  		context = " looking for beginning of value"
   463  	case tokenArrayStart, tokenArrayValue, tokenObjectValue:
   464  		context = " looking for beginning of value"
   465  	case tokenArrayComma:
   466  		context = " after array element"
   467  	case tokenObjectKey:
   468  		context = " looking for beginning of object key string"
   469  	case tokenObjectColon:
   470  		context = " after object key"
   471  	case tokenObjectComma:
   472  		context = " after object key:value pair"
   473  	}
   474  	return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, dec.offset()}
   475  }
   476  
   477  // More reports whether there is another element in the
   478  // current array or object being parsed.
   479  func (dec *Decoder) More() bool {
   480  	c, err := dec.peek()
   481  	return err == nil && c != ']' && c != '}'
   482  }
   483  
   484  func (dec *Decoder) peek() (byte, error) {
   485  	var err error
   486  	for {
   487  		for i := dec.scanp; i < len(dec.buf); i++ {
   488  			c := dec.buf[i]
   489  			if isSpace(c) {
   490  				continue
   491  			}
   492  			dec.scanp = i
   493  			return c, nil
   494  		}
   495  		// buffer has been scanned, now report any error
   496  		if err != nil {
   497  			return 0, err
   498  		}
   499  		err = dec.refill()
   500  	}
   501  }
   502  
   503  func (dec *Decoder) offset() int64 {
   504  	return dec.scanned + int64(dec.scanp)
   505  }
   506  

View as plain text