// Copyright 2010 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.package json// JSON value parser state machine.// Just about at the limit of what is reasonable to write by hand.// Some parts are a bit tedious, but overall it nicely factors out the// otherwise common code from the multiple scanning functions// in this package (Compact, Indent, checkValid, etc).//// This file starts with two simple examples using the scanner// before diving into the scanner itself.import ()// Valid reports whether data is a valid JSON encoding.func ( []byte) bool { := newScanner()deferfreeScanner()returncheckValid(, ) == nil}// checkValid verifies that data is valid JSON-encoded data.// scan is passed in for use by checkValid to avoid an allocation.// checkValid returns nil or a SyntaxError.func checkValid( []byte, *scanner) error { .reset()for , := range { .bytes++if .step(, ) == scanError {return .err } }if .eof() == scanError {return .err }returnnil}// A SyntaxError is a description of a JSON syntax error.// [Unmarshal] will return a SyntaxError if the JSON can't be parsed.typeSyntaxErrorstruct { msg string// description of error Offset int64// error occurred after reading Offset bytes}func ( *SyntaxError) () string { return .msg }// A scanner is a JSON scanning state machine.// Callers call scan.reset and then pass bytes in one at a time// by calling scan.step(&scan, c) for each byte.// The return value, referred to as an opcode, tells the// caller about significant parsing events like beginning// and ending literals, objects, and arrays, so that the// caller can follow along if it wishes.// The return value scanEnd indicates that a single top-level// JSON value has been completed, *before* the byte that// just got passed in. (The indication must be delayed in order// to recognize the end of numbers: is 123 a whole value or// the beginning of 12345e+6?).type scanner struct {// The step is a func to be called to execute the next transition. // Also tried using an integer constant and a single func // with a switch, but using the func directly was 10% faster // on a 64-bit Mac Mini, and it's nicer to read. step func(*scanner, byte) int// Reached end of top-level value. endTop bool// Stack of what we're in the middle of - array values, object keys, object values. parseState []int// Error that happened, if any. err error// total bytes consumed, updated by decoder.Decode (and deliberately // not set to zero by scan.reset) bytes int64}var scannerPool = sync.Pool{New: func() any {return &scanner{} },}func newScanner() *scanner { := scannerPool.Get().(*scanner)// scan.reset by design doesn't set bytes to zero .bytes = 0 .reset()return}func freeScanner( *scanner) {// Avoid hanging on to too much memory in extreme cases.iflen(.parseState) > 1024 { .parseState = nil }scannerPool.Put()}// These values are returned by the state transition functions// assigned to scanner.state and the method scanner.eof.// They give details about the current state of the scan that// callers might be interested to know about.// It is okay to ignore the return value of any particular// call to scanner.state: if one call returns scanError,// every subsequent call will return scanError too.const (// Continue. scanContinue = iota// uninteresting byte scanBeginLiteral // end implied by next result != scanContinue scanBeginObject // begin object scanObjectKey // just finished object key (string) scanObjectValue // just finished non-last object value scanEndObject // end object (implies scanObjectValue if possible) scanBeginArray // begin array scanArrayValue // just finished array value scanEndArray // end array (implies scanArrayValue if possible) scanSkipSpace // space byte; can skip; known to be last "continue" result// Stop. scanEnd // top-level value ended *before* this byte; known to be first "stop" result scanError // hit an error, scanner.err.)// These values are stored in the parseState stack.// They give the current state of a composite value// being scanned. If the parser is inside a nested value// the parseState describes the nested state, outermost at entry 0.const ( parseObjectKey = iota// parsing object key (before colon) parseObjectValue // parsing object value (after colon) parseArrayValue // parsing array value)// This limits the max nesting depth to prevent stack overflow.// This is permitted by https://tools.ietf.org/html/rfc7159#section-9const maxNestingDepth = 10000// reset prepares the scanner for use.// It must be called before calling s.step.func ( *scanner) () { .step = stateBeginValue .parseState = .parseState[0:0] .err = nil .endTop = false}// eof tells the scanner that the end of input has been reached.// It returns a scan status just as s.step does.func ( *scanner) () int {if .err != nil {returnscanError }if .endTop {returnscanEnd } .step(, ' ')if .endTop {returnscanEnd }if .err == nil { .err = &SyntaxError{"unexpected end of JSON input", .bytes} }returnscanError}// pushParseState pushes a new parse state p onto the parse stack.// an error state is returned if maxNestingDepth was exceeded, otherwise successState is returned.func ( *scanner) ( byte, int, int) int { .parseState = append(.parseState, )iflen(.parseState) <= maxNestingDepth {return }return .error(, "exceeded max depth")}// popParseState pops a parse state (already obtained) off the stack// and updates s.step accordingly.func ( *scanner) () { := len(.parseState) - 1 .parseState = .parseState[0:]if == 0 { .step = stateEndTop .endTop = true } else { .step = stateEndValue }}func isSpace( byte) bool {return <= ' ' && ( == ' ' || == '\t' || == '\r' || == '\n')}// stateBeginValueOrEmpty is the state after reading `[`.func stateBeginValueOrEmpty( *scanner, byte) int {ifisSpace() {returnscanSkipSpace }if == ']' {returnstateEndValue(, ) }returnstateBeginValue(, )}// stateBeginValue is the state at the beginning of the input.func stateBeginValue( *scanner, byte) int {ifisSpace() {returnscanSkipSpace }switch {case'{': .step = stateBeginStringOrEmptyreturn .pushParseState(, parseObjectKey, scanBeginObject)case'[': .step = stateBeginValueOrEmptyreturn .pushParseState(, parseArrayValue, scanBeginArray)case'"': .step = stateInStringreturnscanBeginLiteralcase'-': .step = stateNegreturnscanBeginLiteralcase'0': // beginning of 0.123 .step = state0returnscanBeginLiteralcase't': // beginning of true .step = stateTreturnscanBeginLiteralcase'f': // beginning of false .step = stateFreturnscanBeginLiteralcase'n': // beginning of null .step = stateNreturnscanBeginLiteral }if'1' <= && <= '9' { // beginning of 1234.5 .step = state1returnscanBeginLiteral }return .error(, "looking for beginning of value")}// stateBeginStringOrEmpty is the state after reading `{`.func stateBeginStringOrEmpty( *scanner, byte) int {ifisSpace() {returnscanSkipSpace }if == '}' { := len(.parseState) .parseState[-1] = parseObjectValuereturnstateEndValue(, ) }returnstateBeginString(, )}// stateBeginString is the state after reading `{"key": value,`.func stateBeginString( *scanner, byte) int {ifisSpace() {returnscanSkipSpace }if == '"' { .step = stateInStringreturnscanBeginLiteral }return .error(, "looking for beginning of object key string")}// stateEndValue is the state after completing a value,// such as after reading `{}` or `true` or `["x"`.func stateEndValue( *scanner, byte) int { := len(.parseState)if == 0 {// Completed top-level before the current byte. .step = stateEndTop .endTop = truereturnstateEndTop(, ) }ifisSpace() { .step = returnscanSkipSpace } := .parseState[-1]switch {caseparseObjectKey:if == ':' { .parseState[-1] = parseObjectValue .step = stateBeginValuereturnscanObjectKey }return .error(, "after object key")caseparseObjectValue:if == ',' { .parseState[-1] = parseObjectKey .step = stateBeginStringreturnscanObjectValue }if == '}' { .popParseState()returnscanEndObject }return .error(, "after object key:value pair")caseparseArrayValue:if == ',' { .step = stateBeginValuereturnscanArrayValue }if == ']' { .popParseState()returnscanEndArray }return .error(, "after array element") }return .error(, "")}// stateEndTop is the state after finishing the top-level value,// such as after reading `{}` or `[1,2,3]`.// Only space characters should be seen now.func stateEndTop( *scanner, byte) int {if !isSpace() {// Complain about non-space byte on next call. .error(, "after top-level value") }returnscanEnd}// stateInString is the state after reading `"`.func stateInString( *scanner, byte) int {if == '"' { .step = stateEndValuereturnscanContinue }if == '\\' { .step = stateInStringEscreturnscanContinue }if < 0x20 {return .error(, "in string literal") }returnscanContinue}// stateInStringEsc is the state after reading `"\` during a quoted string.func stateInStringEsc( *scanner, byte) int {switch {case'b', 'f', 'n', 'r', 't', '\\', '/', '"': .step = stateInStringreturnscanContinuecase'u': .step = stateInStringEscUreturnscanContinue }return .error(, "in string escape code")}// stateInStringEscU is the state after reading `"\u` during a quoted string.func stateInStringEscU( *scanner, byte) int {if'0' <= && <= '9' || 'a' <= && <= 'f' || 'A' <= && <= 'F' { .step = stateInStringEscU1returnscanContinue }// numbersreturn .error(, "in \\u hexadecimal character escape")}// stateInStringEscU1 is the state after reading `"\u1` during a quoted string.func stateInStringEscU1( *scanner, byte) int {if'0' <= && <= '9' || 'a' <= && <= 'f' || 'A' <= && <= 'F' { .step = stateInStringEscU12returnscanContinue }// numbersreturn .error(, "in \\u hexadecimal character escape")}// stateInStringEscU12 is the state after reading `"\u12` during a quoted string.func stateInStringEscU12( *scanner, byte) int {if'0' <= && <= '9' || 'a' <= && <= 'f' || 'A' <= && <= 'F' { .step = stateInStringEscU123returnscanContinue }// numbersreturn .error(, "in \\u hexadecimal character escape")}// stateInStringEscU123 is the state after reading `"\u123` during a quoted string.func stateInStringEscU123( *scanner, byte) int {if'0' <= && <= '9' || 'a' <= && <= 'f' || 'A' <= && <= 'F' { .step = stateInStringreturnscanContinue }// numbersreturn .error(, "in \\u hexadecimal character escape")}// stateNeg is the state after reading `-` during a number.func stateNeg( *scanner, byte) int {if == '0' { .step = state0returnscanContinue }if'1' <= && <= '9' { .step = state1returnscanContinue }return .error(, "in numeric literal")}// state1 is the state after reading a non-zero integer during a number,// such as after reading `1` or `100` but not `0`.func state1( *scanner, byte) int {if'0' <= && <= '9' { .step = returnscanContinue }returnstate0(, )}// state0 is the state after reading `0` during a number.func state0( *scanner, byte) int {if == '.' { .step = stateDotreturnscanContinue }if == 'e' || == 'E' { .step = stateEreturnscanContinue }returnstateEndValue(, )}// stateDot is the state after reading the integer and decimal point in a number,// such as after reading `1.`.func stateDot( *scanner, byte) int {if'0' <= && <= '9' { .step = stateDot0returnscanContinue }return .error(, "after decimal point in numeric literal")}// stateDot0 is the state after reading the integer, decimal point, and subsequent// digits of a number, such as after reading `3.14`.func stateDot0( *scanner, byte) int {if'0' <= && <= '9' {returnscanContinue }if == 'e' || == 'E' { .step = stateEreturnscanContinue }returnstateEndValue(, )}// stateE is the state after reading the mantissa and e in a number,// such as after reading `314e` or `0.314e`.func stateE( *scanner, byte) int {if == '+' || == '-' { .step = stateESignreturnscanContinue }returnstateESign(, )}// stateESign is the state after reading the mantissa, e, and sign in a number,// such as after reading `314e-` or `0.314e+`.func stateESign( *scanner, byte) int {if'0' <= && <= '9' { .step = stateE0returnscanContinue }return .error(, "in exponent of numeric literal")}// stateE0 is the state after reading the mantissa, e, optional sign,// and at least one digit of the exponent in a number,// such as after reading `314e-2` or `0.314e+1` or `3.14e0`.func stateE0( *scanner, byte) int {if'0' <= && <= '9' {returnscanContinue }returnstateEndValue(, )}// stateT is the state after reading `t`.func stateT( *scanner, byte) int {if == 'r' { .step = stateTrreturnscanContinue }return .error(, "in literal true (expecting 'r')")}// stateTr is the state after reading `tr`.func stateTr( *scanner, byte) int {if == 'u' { .step = stateTrureturnscanContinue }return .error(, "in literal true (expecting 'u')")}// stateTru is the state after reading `tru`.func stateTru( *scanner, byte) int {if == 'e' { .step = stateEndValuereturnscanContinue }return .error(, "in literal true (expecting 'e')")}// stateF is the state after reading `f`.func stateF( *scanner, byte) int {if == 'a' { .step = stateFareturnscanContinue }return .error(, "in literal false (expecting 'a')")}// stateFa is the state after reading `fa`.func stateFa( *scanner, byte) int {if == 'l' { .step = stateFalreturnscanContinue }return .error(, "in literal false (expecting 'l')")}// stateFal is the state after reading `fal`.func stateFal( *scanner, byte) int {if == 's' { .step = stateFalsreturnscanContinue }return .error(, "in literal false (expecting 's')")}// stateFals is the state after reading `fals`.func stateFals( *scanner, byte) int {if == 'e' { .step = stateEndValuereturnscanContinue }return .error(, "in literal false (expecting 'e')")}// stateN is the state after reading `n`.func stateN( *scanner, byte) int {if == 'u' { .step = stateNureturnscanContinue }return .error(, "in literal null (expecting 'u')")}// stateNu is the state after reading `nu`.func stateNu( *scanner, byte) int {if == 'l' { .step = stateNulreturnscanContinue }return .error(, "in literal null (expecting 'l')")}// stateNul is the state after reading `nul`.func stateNul( *scanner, byte) int {if == 'l' { .step = stateEndValuereturnscanContinue }return .error(, "in literal null (expecting 'l')")}// stateError is the state after reaching a syntax error,// such as after reading `[1}` or `5.1.2`.func stateError( *scanner, byte) int {returnscanError}// error records an error and switches to the error state.func ( *scanner) ( byte, string) int { .step = stateError .err = &SyntaxError{"invalid character " + quoteChar() + " " + , .bytes}returnscanError}// quoteChar formats c as a quoted character literal.func quoteChar( byte) string {// special cases - different from quoted stringsif == '\'' {return`'\''` }if == '"' {return`'"'` }// use quoted string with different quotation marks := strconv.Quote(string())return"'" + [1:len()-1] + "'"}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.