// Copyright 2023 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:build goexperiment.jsonv2package jsonwireimport ()typeValueFlagsuintconst ( _ ValueFlags = (1 << iota) / 2// powers of two starting with zero stringNonVerbatim // string cannot be naively treated as valid UTF-8 stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.// TODO: Track whether a number is a non-integer?)func ( *ValueFlags) ( ValueFlags) { * |= }func ( ValueFlags) () bool { return &stringNonVerbatim == 0 }func ( ValueFlags) () bool { return &stringNonCanonical == 0 }// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.func ( []byte) ( int) {// NOTE: The arguments and logic are kept simple to keep this inlinable.forlen() > && ([] == ' ' || [] == '\t' || [] == '\r' || [] == '\n') { ++ }return}// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.// It returns 0 if it is invalid, in which case consumeLiteral should be used.func ( []byte) int {// NOTE: The arguments and logic are kept simple to keep this inlinable.const = "null"iflen() >= len() && string([:len()]) == {returnlen() }return0}// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.// It returns 0 if it is invalid, in which case consumeLiteral should be used.func ( []byte) int {// NOTE: The arguments and logic are kept simple to keep this inlinable.const = "false"iflen() >= len() && string([:len()]) == {returnlen() }return0}// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.// It returns 0 if it is invalid, in which case consumeLiteral should be used.func ( []byte) int {// NOTE: The arguments and logic are kept simple to keep this inlinable.const = "true"iflen() >= len() && string([:len()]) == {returnlen() }return0}// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.// If the input appears truncated, it returns io.ErrUnexpectedEOF.func ( []byte, string) ( int, error) {for := 0; < len() && < len(); ++ {if [] != [] {return , NewInvalidCharacterError([:], "in literal "++" (expecting "+strconv.QuoteRune(rune([]))+")") } }iflen() < len() {returnlen(), io.ErrUnexpectedEOF }returnlen(), nil}// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7// but is limited to the grammar for an ASCII string without escape sequences.// It returns 0 if it is invalid or more complicated than a simple string,// in which case consumeString should be called.//// It rejects '<', '>', and '&' for compatibility reasons since these were// always escaped in the v1 implementation. Thus, if this function reports// non-zero then we know that the string would be encoded the same way// under both v1 or v2 escape semantics.func ( []byte) ( int) {// NOTE: The arguments and logic are kept simple to keep this inlinable.iflen() > 0 && [0] == '"' { ++forlen() > && [] < utf8.RuneSelf && escapeASCII[[]] == 0 { ++ }ifuint(len()) > uint() && [] == '"' { ++return } }return0}// ConsumeString consumes the next JSON string per RFC 7159, section 7.// If validateUTF8 is false, then this allows the presence of invalid UTF-8// characters within the string itself.// It reports the number of bytes consumed and whether an error was encountered.// If the input appears truncated, it returns io.ErrUnexpectedEOF.func ( *ValueFlags, []byte, bool) ( int, error) {returnConsumeStringResumable(, , 0, )}// ConsumeStringResumable is identical to consumeString but supports resuming// from a previous call that returned io.ErrUnexpectedEOF.func ( *ValueFlags, []byte, int, bool) ( int, error) {// Consume the leading double quote.switch {case > 0: = // already handled the leading quotecaseuint(len()) == 0:return , io.ErrUnexpectedEOFcase [0] == '"': ++default:return , NewInvalidCharacterError([:], `at start of string (expecting '"')`) }// Consume every character in the string.foruint(len()) > uint() {// Optimize for long sequences of unescaped characters. := func( byte) bool {return < utf8.RuneSelf && ' ' <= && != '\\' && != '"' }foruint(len()) > uint() && ([]) { ++ }ifuint(len()) <= uint() {return , io.ErrUnexpectedEOF }// Check for terminating double quote.if [] == '"' { ++return , nil }switch , := utf8.DecodeRune([:]); {// Handle UTF-8 encoded byte sequence. // Due to specialized handling of ASCII above, we know that // all normal sequences at this point must be 2 bytes or larger.case > 1: += // Handle escape sequence.case == '\\': .Join(stringNonVerbatim) = ifuint(len()) < uint(+2) {return , io.ErrUnexpectedEOF }switch := [+1]; {case'/':// Forward slash is the only character with 3 representations. // Per RFC 8785, section 3.2.2.2., this must not be escaped. .Join(stringNonCanonical) += 2case'"', '\\', 'b', 'f', 'n', 'r', 't': += 2case'u':ifuint(len()) < uint(+6) {ifhasEscapedUTF16Prefix([:], false) {return , io.ErrUnexpectedEOF } .Join(stringNonCanonical)return , NewInvalidEscapeSequenceError([:]) } , := parseHexUint16([+2 : +6])if ! { .Join(stringNonCanonical)return , NewInvalidEscapeSequenceError([ : +6]) }// Only certain control characters can use the \uFFFF notation // for canonical formatting (per RFC 8785, section 3.2.2.2.).switch {// \uFFFF notation not permitted for these characters.case'\b', '\f', '\n', '\r', '\t': .Join(stringNonCanonical)default:// \uFFFF notation only permitted for control characters.if >= ' ' { .Join(stringNonCanonical) } else {// \uFFFF notation must be lower case.for , := range [+2 : +6] {if'A' <= && <= 'F' { .Join(stringNonCanonical) } } } } += 6 := rune()if && utf16.IsSurrogate() {ifuint(len()) < uint(+6) {ifhasEscapedUTF16Prefix([:], true) {return , io.ErrUnexpectedEOF } .Join(stringNonCanonical)return - 6, NewInvalidEscapeSequenceError([-6:]) } elseif , := parseHexUint16([+2 : +6]); [] != '\\' || [+1] != 'u' || ! { .Join(stringNonCanonical)return - 6, NewInvalidEscapeSequenceError([-6 : +6]) } elseif = utf16.DecodeRune(rune(), rune()); == utf8.RuneError { .Join(stringNonCanonical)return - 6, NewInvalidEscapeSequenceError([-6 : +6]) } else { += 6 } }default: .Join(stringNonCanonical)return , NewInvalidEscapeSequenceError([ : +2]) }// Handle invalid UTF-8.case == utf8.RuneError:if !utf8.FullRune([:]) {return , io.ErrUnexpectedEOF } .Join(stringNonVerbatim | stringNonCanonical)if {return , ErrInvalidUTF8 } ++// Handle invalid control characters.case < ' ': .Join(stringNonVerbatim | stringNonCanonical)return , NewInvalidCharacterError([:], "in string (expecting non-control character)")default:panic("BUG: unhandled character " + QuoteRune([:])) } }return , io.ErrUnexpectedEOF}// AppendUnquote appends the unescaped form of a JSON string in src to dst.// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,// but the error will be specified as having encountered such an error.// The input must be an entire JSON string with no surrounding whitespace.func [ ~[]byte | ~string]( []byte, ) ( []byte, error) { = slices.Grow(, len())// Consume the leading double quote.var , intswitch {caseuint(len()) == 0:return , io.ErrUnexpectedEOFcase [0] == '"': , = 1, 1default:return , NewInvalidCharacterError(, `at start of string (expecting '"')`) }// Consume every character in the string.foruint(len()) > uint() {// Optimize for long sequences of unescaped characters. := func( byte) bool {return < utf8.RuneSelf && ' ' <= && != '\\' && != '"' }foruint(len()) > uint() && ([]) { ++ }ifuint(len()) <= uint() { = append(, [:]...)return , io.ErrUnexpectedEOF }// Check for terminating double quote.if [] == '"' { = append(, [:]...) ++if < len() { = NewInvalidCharacterError([:], "after string value") }return , }switch , := utf8.DecodeRuneInString(string(truncateMaxUTF8([:]))); {// Handle UTF-8 encoded byte sequence. // Due to specialized handling of ASCII above, we know that // all normal sequences at this point must be 2 bytes or larger.case > 1: += // Handle escape sequence.case == '\\': = append(, [:]...)// Handle escape sequence.ifuint(len()) < uint(+2) {return , io.ErrUnexpectedEOF }switch := [+1]; {case'"', '\\', '/': = append(, ) += 2case'b': = append(, '\b') += 2case'f': = append(, '\f') += 2case'n': = append(, '\n') += 2case'r': = append(, '\r') += 2case't': = append(, '\t') += 2case'u':ifuint(len()) < uint(+6) {ifhasEscapedUTF16Prefix([:], false) {return , io.ErrUnexpectedEOF }return , NewInvalidEscapeSequenceError([:]) } , := parseHexUint16([+2 : +6])if ! {return , NewInvalidEscapeSequenceError([ : +6]) } += 6// Check whether this is a surrogate half. := rune()ifutf16.IsSurrogate() { = utf8.RuneError// assume failure unless the following succeedsifuint(len()) < uint(+6) {ifhasEscapedUTF16Prefix([:], true) {returnutf8.AppendRune(, ), io.ErrUnexpectedEOF } = NewInvalidEscapeSequenceError([-6:]) } elseif , := parseHexUint16([+2 : +6]); [] != '\\' || [+1] != 'u' || ! { = NewInvalidEscapeSequenceError([-6 : +6]) } elseif = utf16.DecodeRune(rune(), rune()); == utf8.RuneError { = NewInvalidEscapeSequenceError([-6 : +6]) } else { += 6 } } = utf8.AppendRune(, )default:return , NewInvalidEscapeSequenceError([ : +2]) } = // Handle invalid UTF-8.case == utf8.RuneError: = append(, [:]...)if !utf8.FullRuneInString(string(truncateMaxUTF8([:]))) {return , io.ErrUnexpectedEOF }// NOTE: An unescaped string may be longer than the escaped string // because invalid UTF-8 bytes are being replaced. = append(, "\uFFFD"...) += = = ErrInvalidUTF8// Handle invalid control characters.case < ' ': = append(, [:]...)return , NewInvalidCharacterError([:], "in string (expecting non-control character)")default:panic("BUG: unhandled character " + QuoteRune([:])) } } = append(, [:]...)return , io.ErrUnexpectedEOF}// hasEscapedUTF16Prefix reports whether b is possibly// the truncated prefix of a \uFFFF escape sequence.func hasEscapedUTF16Prefix[ ~[]byte | ~string]( , bool) bool {for := rangelen() {switch := []; {case == 0 && != '\\':returnfalsecase == 1 && != 'u':returnfalsecase == 2 && && != 'd' && != 'D':returnfalse// not within ['\uDC00':'\uDFFF']case == 3 && && !('c' <= && <= 'f') && !('C' <= && <= 'F'):returnfalse// not within ['\uDC00':'\uDFFF']case >= 2 && < 6 && !('0' <= && <= '9') && !('a' <= && <= 'f') && !('A' <= && <= 'F'):returnfalse } }returntrue}// UnquoteMayCopy returns the unescaped form of b.// If there are no escaped characters, the output is simply a subslice of// the input with the surrounding quotes removed.// Otherwise, a new buffer is allocated for the output.// It assumes the input is valid.func ( []byte, bool) []byte {// NOTE: The arguments and logic are kept simple to keep this inlinable.if {return [len(`"`) : len()-len(`"`)] } , _ = AppendUnquote(nil, )return}// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6// but is limited to the grammar for a positive integer.// It returns 0 if it is invalid or more complicated than a simple integer,// in which case consumeNumber should be called.func ( []byte) ( int) {// NOTE: The arguments and logic are kept simple to keep this inlinable.iflen() > 0 {if [0] == '0' { ++ } elseif'1' <= [0] && [0] <= '9' { ++forlen() > && ('0' <= [] && [] <= '9') { ++ } } else {return0 }ifuint(len()) <= uint() || ([] != '.' && [] != 'e' && [] != 'E') {return } }return0}typeConsumeNumberStateuintconst ( consumeNumberInit ConsumeNumberState = iota beforeIntegerDigits withinIntegerDigits beforeFractionalDigits withinFractionalDigits beforeExponentDigits withinExponentDigits)// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.// It reports the number of bytes consumed and whether an error was encountered.// If the input appears truncated, it returns io.ErrUnexpectedEOF.//// Note that JSON numbers are not self-terminating.// If the entire input is consumed, then the caller needs to consider whether// there may be subsequent unread data that may still be part of this number.func ( []byte) ( int, error) { , _, = ConsumeNumberResumable(, 0, consumeNumberInit)return , }// ConsumeNumberResumable is identical to consumeNumber but supports resuming// from a previous call that returned io.ErrUnexpectedEOF.func ( []byte, int, ConsumeNumberState) ( int, ConsumeNumberState, error) {// Jump to the right state when resuming from a partial consumption. = if > consumeNumberInit {switch {casewithinIntegerDigits, withinFractionalDigits, withinExponentDigits:// Consume leading digits.foruint(len()) > uint() && ('0' <= [] && [] <= '9') { ++ }ifuint(len()) <= uint() {return , , nil// still within the same state } ++ // switches "withinX" to "beforeY" where Y is the state after X }switch {casebeforeIntegerDigits:gotocasebeforeFractionalDigits:gotocasebeforeExponentDigits:gotodefault:return , , nil } }// Consume required integer component (with optional minus sign).: = ifuint(len()) > 0 && [0] == '-' { ++ }switch {caseuint(len()) <= uint():return , beforeIntegerDigits, io.ErrUnexpectedEOFcase [] == '0': ++ = beforeFractionalDigitscase'1' <= [] && [] <= '9': ++foruint(len()) > uint() && ('0' <= [] && [] <= '9') { ++ } = withinIntegerDigitsdefault:return , , NewInvalidCharacterError([:], "in number (expecting digit)") }// Consume optional fractional component.:ifuint(len()) > uint() && [] == '.' { = ++switch {caseuint(len()) <= uint():return , beforeFractionalDigits, io.ErrUnexpectedEOFcase'0' <= [] && [] <= '9': ++default:return , , NewInvalidCharacterError([:], "in number (expecting digit)") }foruint(len()) > uint() && ('0' <= [] && [] <= '9') { ++ } = withinFractionalDigits }// Consume optional exponent component.:ifuint(len()) > uint() && ([] == 'e' || [] == 'E') { = ++ifuint(len()) > uint() && ([] == '-' || [] == '+') { ++ }switch {caseuint(len()) <= uint():return , beforeExponentDigits, io.ErrUnexpectedEOFcase'0' <= [] && [] <= '9': ++default:return , , NewInvalidCharacterError([:], "in number (expecting digit)") }foruint(len()) > uint() && ('0' <= [] && [] <= '9') { ++ } = withinExponentDigits }return , , nil}// parseHexUint16 is similar to strconv.ParseUint,// but operates directly on []byte and is optimized for base-16.// See https://go.dev/issue/42429.func parseHexUint16[ ~[]byte | ~string]( ) ( uint16, bool) {iflen() != 4 {return0, false }for := range4 { := []switch {case'0' <= && <= '9': = - '0'case'a' <= && <= 'f': = 10 + - 'a'case'A' <= && <= 'F': = 10 + - 'A'default:return0, false } = *16 + uint16() }return , true}// ParseUint parses b as a decimal unsigned integer according to// a strict subset of the JSON number grammar, returning the value if valid.// It returns (0, false) if there is a syntax error and// returns (math.MaxUint64, false) if there is an overflow.func ( []byte) ( uint64, bool) {const = 20// len(fmt.Sprint(uint64(math.MaxUint64)))varintfor ; len() > && ('0' <= [] && [] <= '9'); ++ { = 10* + uint64([]-'0') }switch {case == 0 || len() != || ([0] == '0' && string() != "0"):return0, falsecase >= && ([0] != '1' || < 1e19 || > ):returnmath.MaxUint64, false }return , true}// ParseFloat parses a floating point number according to the Go float grammar.// Note that the JSON number grammar is a strict subset.//// If the number overflows the finite representation of a float,// then we return MaxFloat since any finite value will always be infinitely// more accurate at representing another finite value than an infinite value.func ( []byte, int) ( float64, bool) { , := strconv.ParseFloat(string(), )ifmath.IsInf(, 0) {switch {case == 32 && math.IsInf(, +1): = +math.MaxFloat32case == 64 && math.IsInf(, +1): = +math.MaxFloat64case == 32 && math.IsInf(, -1): = -math.MaxFloat32case == 64 && math.IsInf(, -1): = -math.MaxFloat64 } }return , == nil}
The pages are generated with Goldsv0.7.7-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.