// Copyright 2020 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:build goexperiment.jsonv2package jsontextimport ()// ErrDuplicateName indicates that a JSON token could not be// encoded or decoded because it results in a duplicate JSON object name.// This error is directly wrapped within a [SyntacticError] when produced.//// The name of a duplicate JSON object member can be extracted as://// err := ...// var serr jsontext.SyntacticError// if errors.As(err, &serr) && serr.Err == jsontext.ErrDuplicateName {// ptr := serr.JSONPointer // JSON pointer to duplicate name// name := ptr.LastToken() // duplicate name itself// ...// }//// This error is only returned if [AllowDuplicateNames] is false.varErrDuplicateName = errors.New("duplicate object member name")// ErrNonStringName indicates that a JSON token could not be// encoded or decoded because it is not a string,// as required for JSON object names according to RFC 8259, section 4.// This error is directly wrapped within a [SyntacticError] when produced.varErrNonStringName = errors.New("object member name must be a string")var ( errMissingValue = errors.New("missing value after object name") errMismatchDelim = errors.New("mismatching structural token for object or array") errMaxDepth = errors.New("exceeded max depth") errInvalidNamespace = errors.New("object namespace is in an invalid state"))// Per RFC 8259, section 9, implementations may enforce a maximum depth.// Such a limit is necessary to prevent stack overflows.const maxNestingDepth = 10000type state struct {// Tokens validates whether the next token kind is valid. Tokens stateMachine// Names is a stack of object names. Names objectNameStack// Namespaces is a stack of object namespaces. // For performance reasons, Encoder or Decoder may not update this // if Marshal or Unmarshal is able to track names in a more efficient way. // See makeMapArshaler and makeStructArshaler. // Not used if AllowDuplicateNames is true. Namespaces objectNamespaceStack}// needObjectValue reports whether the next token should be an object value.// This method is used by [wrapSyntacticError].func ( *state) () bool {return .Tokens.Last.needObjectValue()}func ( *state) () { .Tokens.reset() .Names.reset() .Namespaces.reset()}// Pointer is a JSON Pointer (RFC 6901) that references a particular JSON value// relative to the root of the top-level JSON value.//// A Pointer is a slash-separated list of tokens, where each token is// either a JSON object name or an index to a JSON array element// encoded as a base-10 integer value.// It is impossible to distinguish between an array index and an object name// (that happens to be an base-10 encoded integer) without also knowing// the structure of the top-level JSON value that the pointer refers to.//// There is exactly one representation of a pointer to a particular value,// so comparability of Pointer values is equivalent to checking whether// they both point to the exact same value.typePointerstring// IsValid reports whether p is a valid JSON Pointer according to RFC 6901.// Note that the concatenation of two valid pointers produces a valid pointer.func ( Pointer) () bool {for , := range {switch {case == '~' && (+1 == len() || ([+1] != '0' && [+1] != '1')):returnfalse// invalid escapecase == '\ufffd' && !strings.HasPrefix(string([:]), "\ufffd"):returnfalse// invalid UTF-8 } }returnlen() == 0 || [0] == '/'}// Contains reports whether the JSON value that p points to// is equal to or contains the JSON value that pc points to.func ( Pointer) ( Pointer) bool {// Invariant: len(p) <= len(pc) if p.Contains(pc) , := strings.CutPrefix(string(), string())return && ( == "" || [0] == '/')}// Parent strips off the last token and returns the remaining pointer.// The parent of an empty p is an empty string.func ( Pointer) () Pointer {return [:max(strings.LastIndexByte(string(), '/'), 0)]}// LastToken returns the last token in the pointer.// The last token of an empty p is an empty string.func ( Pointer) () string { := [max(strings.LastIndexByte(string(), '/'), 0):]returnunescapePointerToken(strings.TrimPrefix(string(), "/"))}// AppendToken appends a token to the end of p and returns the full pointer.func ( Pointer) ( string) Pointer {returnPointer(appendEscapePointerName([]byte(+"/"), ))}// TODO: Add Pointer.AppendTokens,// but should this take in a ...string or an iter.Seq[string]?// Tokens returns an iterator over the reference tokens in the JSON pointer,// starting from the first token until the last token (unless stopped early).func ( Pointer) () iter.Seq[string] {returnfunc( func(string) bool) {forlen() > 0 { = Pointer(strings.TrimPrefix(string(), "/")) := min(uint(strings.IndexByte(string(), '/')), uint(len()))if !(unescapePointerToken(string()[:])) {return } = [:] } }}func unescapePointerToken( string) string {ifstrings.Contains(, "~") {// Per RFC 6901, section 3, unescape '~' and '/' characters. = strings.ReplaceAll(, "~1", "/") = strings.ReplaceAll(, "~0", "~") }return}// appendStackPointer appends a JSON Pointer (RFC 6901) to the current value.//// - If where is -1, then it points to the previously processed token.//// - If where is 0, then it points to the parent JSON object or array,// or an object member if in-between an object member key and value.// This is useful when the position is ambiguous whether// we are interested in the previous or next token, or// when we are uncertain whether the next token// continues or terminates the current object or array.//// - If where is +1, then it points to the next expected value,// assuming that it continues the current JSON object or array.// As a special case, if the next token is a JSON object name,// then it points to the parent JSON object.//// Invariant: Must call s.names.copyQuotedBuffer beforehand.func ( state) ( []byte, int) []byte {varintfor := 1; < .Tokens.Depth(); ++ { := .Tokens.index() := -1// by default point to previous array elementif := == .Tokens.Depth()-1; {switch {case < 0 && .Length() == 0 || == 0 && !.needObjectValue() || > 0 && .NeedObjectName():returncase > 0 && .isArray(): = 0// point to next array element } }switch {case .isObject(): = appendEscapePointerName(append(, '/'), .Names.getUnquoted()) ++case .isArray(): = strconv.AppendUint(append(, '/'), uint64(.Length()+int64()), 10) } }return}func appendEscapePointerName[ ~[]byte | ~string]( []byte, ) []byte {for , := rangestring() {// Per RFC 6901, section 3, escape '~' and '/' characters.switch {case'~': = append(, "~0"...)case'/': = append(, "~1"...)default: = utf8.AppendRune(, ) } }return}// stateMachine is a push-down automaton that validates whether// a sequence of tokens is valid or not according to the JSON grammar.// It is useful for both encoding and decoding.//// It is a stack where each entry represents a nested JSON object or array.// The stack has a minimum depth of 1 where the first level is a// virtual JSON array to handle a stream of top-level JSON values.// The top-level virtual JSON array is special in that it doesn't require commas// between each JSON value.//// For performance, most methods are carefully written to be inlinable.// The zero value is a valid state machine ready for use.type stateMachine struct { Stack []stateEntry Last stateEntry}// reset resets the state machine.// The machine always starts with a minimum depth of 1.func ( *stateMachine) () { .Stack = .Stack[:0]ifcap(.Stack) > 1<<10 { .Stack = nil } .Last = stateTypeArray}// Depth is the current nested depth of JSON objects and arrays.// It is one-indexed (i.e., top-level values have a depth of 1).func ( stateMachine) () int {returnlen(.Stack) + 1}// index returns a reference to the ith entry.// It is only valid until the next push method call.func ( *stateMachine) ( int) *stateEntry {if == len(.Stack) {return &.Last }return &.Stack[]}// DepthLength reports the current nested depth and// the length of the last JSON object or array.func ( stateMachine) () (int, int64) {return .Depth(), .Last.Length()}// appendLiteral appends a JSON literal as the next token in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case .Last.NeedObjectName():returnErrNonStringNamecase !.Last.isValidNamespace():returnerrInvalidNamespacedefault: .Last.Increment()returnnil }}// appendString appends a JSON string as the next token in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case !.Last.isValidNamespace():returnerrInvalidNamespacedefault: .Last.Increment()returnnil }}// appendNumber appends a JSON number as the next token in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {return .appendLiteral()}// pushObject appends a JSON begin object token as next in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case .Last.NeedObjectName():returnErrNonStringNamecase !.Last.isValidNamespace():returnerrInvalidNamespacecaselen(.Stack) == maxNestingDepth:returnerrMaxDepthdefault: .Last.Increment() .Stack = append(.Stack, .Last) .Last = stateTypeObjectreturnnil }}// popObject appends a JSON end object token as next in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case !.Last.isObject():returnerrMismatchDelimcase .Last.needObjectValue():returnerrMissingValuecase !.Last.isValidNamespace():returnerrInvalidNamespacedefault: .Last = .Stack[len(.Stack)-1] .Stack = .Stack[:len(.Stack)-1]returnnil }}// pushArray appends a JSON begin array token as next in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case .Last.NeedObjectName():returnErrNonStringNamecase !.Last.isValidNamespace():returnerrInvalidNamespacecaselen(.Stack) == maxNestingDepth:returnerrMaxDepthdefault: .Last.Increment() .Stack = append(.Stack, .Last) .Last = stateTypeArrayreturnnil }}// popArray appends a JSON end array token as next in the sequence.// If an error is returned, the state is not mutated.func ( *stateMachine) () error {switch {case !.Last.isArray() || len(.Stack) == 0: // forbid popping top-level virtual JSON arrayreturnerrMismatchDelimcase !.Last.isValidNamespace():returnerrInvalidNamespacedefault: .Last = .Stack[len(.Stack)-1] .Stack = .Stack[:len(.Stack)-1]returnnil }}// NeedIndent reports whether indent whitespace should be injected.// A zero value means that no whitespace should be injected.// A positive value means '\n', indentPrefix, and (n-1) copies of indentBody// should be appended to the output immediately before the next token.func ( stateMachine) ( Kind) ( int) { := == '}' || == ']'switch {case .Depth() == 1:return0// top-level values are never indentedcase .Last.Length() == 0 && :return0// an empty object or array is never indentedcase .Last.Length() == 0 || .Last.needImplicitComma():return .Depth()case :return .Depth() - 1default:return0 }}// MayAppendDelim appends a colon or comma that may precede the next token.func ( stateMachine) ( []byte, Kind) []byte {switch {case .Last.needImplicitColon():returnappend(, ':')case .Last.needImplicitComma() && len(.Stack) != 0: // comma not needed for top-level valuesreturnappend(, ',')default:return }}// needDelim reports whether a colon or comma token should be implicitly emitted// before the next token of the specified kind.// A zero value means no delimiter should be emitted.func ( stateMachine) ( Kind) ( byte) {switch {case .Last.needImplicitColon():return':'case .Last.needImplicitComma() && len(.Stack) != 0: // comma not needed for top-level valuesreturn','default:return0 }}// InvalidateDisabledNamespaces marks all disabled namespaces as invalid.//// For efficiency, Marshal and Unmarshal may disable namespaces since there are// more efficient ways to track duplicate names. However, if an error occurs,// the namespaces in Encoder or Decoder will be left in an inconsistent state.// Mark the namespaces as invalid so that future method calls on// Encoder or Decoder will return an error.func ( *stateMachine) () {for := range .Depth() { := .index()if !.isActiveNamespace() { .invalidateNamespace() } }}// stateEntry encodes several artifacts within a single unsigned integer:// - whether this represents a JSON object or array,// - whether this object should check for duplicate names, and// - how many elements are in this JSON object or array.type stateEntry uint64const (// The type mask (1 bit) records whether this is a JSON object or array. stateTypeMask stateEntry = 0x8000_0000_0000_0000 stateTypeObject stateEntry = 0x8000_0000_0000_0000 stateTypeArray stateEntry = 0x0000_0000_0000_0000// The name check mask (2 bit) records whether to update // the namespaces for the current JSON object and // whether the namespace is valid. stateNamespaceMask stateEntry = 0x6000_0000_0000_0000 stateDisableNamespace stateEntry = 0x4000_0000_0000_0000 stateInvalidNamespace stateEntry = 0x2000_0000_0000_0000// The count mask (61 bits) records the number of elements. stateCountMask stateEntry = 0x1fff_ffff_ffff_ffff stateCountLSBMask stateEntry = 0x0000_0000_0000_0001 stateCountOdd stateEntry = 0x0000_0000_0000_0001 stateCountEven stateEntry = 0x0000_0000_0000_0000)// Length reports the number of elements in the JSON object or array.// Each name and value in an object entry is treated as a separate element.func ( stateEntry) () int64 {returnint64( & stateCountMask)}// isObject reports whether this is a JSON object.func ( stateEntry) () bool {return &stateTypeMask == stateTypeObject}// isArray reports whether this is a JSON array.func ( stateEntry) () bool {return &stateTypeMask == stateTypeArray}// NeedObjectName reports whether the next token must be a JSON string,// which is necessary for JSON object names.func ( stateEntry) () bool {return &(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountEven}// needImplicitColon reports whether an colon should occur next,// which always occurs after JSON object names.func ( stateEntry) () bool {return .needObjectValue()}// needObjectValue reports whether the next token must be a JSON value,// which is necessary after every JSON object name.func ( stateEntry) () bool {return &(stateTypeMask|stateCountLSBMask) == stateTypeObject|stateCountOdd}// needImplicitComma reports whether an comma should occur next,// which always occurs after a value in a JSON object or array// before the next value (or name).func ( stateEntry) ( Kind) bool {return !.needObjectValue() && .Length() > 0 && != '}' && != ']'}// Increment increments the number of elements for the current object or array.// This assumes that overflow won't practically be an issue since// 1<<bits.OnesCount(stateCountMask) is sufficiently large.func ( *stateEntry) () { (*)++}// decrement decrements the number of elements for the current object or array.// It is the callers responsibility to ensure that e.length > 0.func ( *stateEntry) () { (*)--}// DisableNamespace disables the JSON object namespace such that the// Encoder or Decoder no longer updates the namespace.func ( *stateEntry) () { * |= stateDisableNamespace}// isActiveNamespace reports whether the JSON object namespace is actively// being updated and used for duplicate name checks.func ( stateEntry) () bool {return &(stateDisableNamespace) == 0}// invalidateNamespace marks the JSON object namespace as being invalid.func ( *stateEntry) () { * |= stateInvalidNamespace}// isValidNamespace reports whether the JSON object namespace is valid.func ( stateEntry) () bool {return &(stateInvalidNamespace) == 0}// objectNameStack is a stack of names when descending into a JSON object.// In contrast to objectNamespaceStack, this only has to remember a single name// per JSON object.//// This data structure may contain offsets to encodeBuffer or decodeBuffer.// It violates clean abstraction of layers, but is significantly more efficient.// This ensures that popping and pushing in the common case is a trivial// push/pop of an offset integer.//// The zero value is an empty names stack ready for use.type objectNameStack struct {// offsets is a stack of offsets for each name. // A non-negative offset is the ending offset into the local names buffer. // A negative offset is the bit-wise inverse of a starting offset into // a remote buffer (e.g., encodeBuffer or decodeBuffer). // A math.MinInt offset at the end implies that the last object is empty. // Invariant: Positive offsets always occur before negative offsets. offsets []int// unquotedNames is a back-to-back concatenation of names. unquotedNames []byte}func ( *objectNameStack) () { .offsets = .offsets[:0] .unquotedNames = .unquotedNames[:0]ifcap(.offsets) > 1<<6 { .offsets = nil// avoid pinning arbitrarily large amounts of memory }ifcap(.unquotedNames) > 1<<10 { .unquotedNames = nil// avoid pinning arbitrarily large amounts of memory }}func ( *objectNameStack) () int {returnlen(.offsets)}// getUnquoted retrieves the ith unquoted name in the stack.// It returns an empty string if the last object is empty.//// Invariant: Must call copyQuotedBuffer beforehand.func ( *objectNameStack) ( int) []byte { .ensureCopiedBuffer()if == 0 {return .unquotedNames[:.offsets[0]] } else {return .unquotedNames[.offsets[-1]:.offsets[-0]] }}// invalidOffset indicates that the last JSON object currently has no name.const invalidOffset = math.MinInt// push descends into a nested JSON object.func ( *objectNameStack) () { .offsets = append(.offsets, invalidOffset)}// ReplaceLastQuotedOffset replaces the last name with the starting offset// to the quoted name in some remote buffer. All offsets provided must be// relative to the same buffer until copyQuotedBuffer is called.func ( *objectNameStack) ( int) {// Use bit-wise inversion instead of naive multiplication by -1 to avoid // ambiguity regarding zero (which is a valid offset into the names field). // Bit-wise inversion is mathematically equivalent to -i-1, // such that 0 becomes -1, 1 becomes -2, and so forth. // This ensures that remote offsets are always negative. .offsets[len(.offsets)-1] = ^}// replaceLastUnquotedName replaces the last name with the provided name.//// Invariant: Must call copyQuotedBuffer beforehand.func ( *objectNameStack) ( string) { .ensureCopiedBuffer()varintiflen(.offsets) > 1 { = .offsets[len(.offsets)-2] } .unquotedNames = append(.unquotedNames[:], ...) .offsets[len(.offsets)-1] = len(.unquotedNames)}// clearLast removes any name in the last JSON object.// It is semantically equivalent to ns.push followed by ns.pop.func ( *objectNameStack) () { .offsets[len(.offsets)-1] = invalidOffset}// pop ascends out of a nested JSON object.func ( *objectNameStack) () { .offsets = .offsets[:len(.offsets)-1]}// copyQuotedBuffer copies names from the remote buffer into the local names// buffer so that there are no more offset references into the remote buffer.// This allows the remote buffer to change contents without affecting// the names that this data structure is trying to remember.func ( *objectNameStack) ( []byte) {// Find the first negative offset.varintfor = len(.offsets) - 1; >= 0 && .offsets[] < 0; -- {continue }// Copy each name from the remote buffer into the local buffer.for = + 1; < len(.offsets); ++ {if == len(.offsets)-1 && .offsets[] == invalidOffset {if == 0 { .offsets[] = 0 } else { .offsets[] = .offsets[-1] }break// last JSON object had a push without any names }// As a form of Hyrum proofing, we write an invalid character into the // buffer to make misuse of Decoder.ReadToken more obvious. // We need to undo that mutation here. := [^.offsets[]:]if [0] == invalidateBufferByte { [0] = '"' }// Append the unquoted name to the local buffer.varintif > 0 { = .offsets[-1] }if := jsonwire.ConsumeSimpleString(); > 0 { .unquotedNames = append(.unquotedNames[:], [len(`"`):-len(`"`)]...) } else { .unquotedNames, _ = jsonwire.AppendUnquote(.unquotedNames[:], ) } .offsets[] = len(.unquotedNames) }}func ( *objectNameStack) () {iflen(.offsets) > 0 && .offsets[len(.offsets)-1] < 0 {panic("BUG: copyQuotedBuffer not called beforehand") }}// objectNamespaceStack is a stack of object namespaces.// This data structure assists in detecting duplicate names.type objectNamespaceStack []objectNamespace// reset resets the object namespace stack.func ( *objectNamespaceStack) () {ifcap(*) > 1<<10 { * = nil } * = (*)[:0]}// push starts a new namespace for a nested JSON object.func ( *objectNamespaceStack) () {ifcap(*) > len(*) { * = (*)[:len(*)+1] .Last().reset() } else { * = append(*, objectNamespace{}) }}// Last returns a pointer to the last JSON object namespace.func ( objectNamespaceStack) () *objectNamespace {return &[len()-1]}// pop terminates the namespace for a nested JSON object.func ( *objectNamespaceStack) () { * = (*)[:len(*)-1]}// objectNamespace is the namespace for a JSON object.// In contrast to objectNameStack, this needs to remember a all names// per JSON object.//// The zero value is an empty namespace ready for use.type objectNamespace struct {// It relies on a linear search over all the names before switching // to use a Go map for direct lookup.// endOffsets is a list of offsets to the end of each name in buffers. // The length of offsets is the number of names in the namespace. endOffsets []uint// allUnquotedNames is a back-to-back concatenation of every name in the namespace. allUnquotedNames []byte// mapNames is a Go map containing every name in the namespace. // Only valid if non-nil. mapNames map[string]struct{}}// reset resets the namespace to be empty.func ( *objectNamespace) () { .endOffsets = .endOffsets[:0] .allUnquotedNames = .allUnquotedNames[:0] .mapNames = nilifcap(.endOffsets) > 1<<6 { .endOffsets = nil// avoid pinning arbitrarily large amounts of memory }ifcap(.allUnquotedNames) > 1<<10 { .allUnquotedNames = nil// avoid pinning arbitrarily large amounts of memory }}// length reports the number of names in the namespace.func ( *objectNamespace) () int {returnlen(.endOffsets)}// getUnquoted retrieves the ith unquoted name in the namespace.func ( *objectNamespace) ( int) []byte {if == 0 {return .allUnquotedNames[:.endOffsets[0]] } else {return .allUnquotedNames[.endOffsets[-1]:.endOffsets[-0]] }}// lastUnquoted retrieves the last name in the namespace.func ( *objectNamespace) () []byte {return .getUnquoted(.length() - 1)}// insertQuoted inserts a name and reports whether it was inserted,// which only occurs if name is not already in the namespace.// The provided name must be a valid JSON string.func ( *objectNamespace) ( []byte, bool) bool {if { = [len(`"`) : len()-len(`"`)] }return .insert(, !)}func ( *objectNamespace) ( []byte) bool {return .insert(, false)}func ( *objectNamespace) ( []byte, bool) bool {var []byteif { , _ = jsonwire.AppendUnquote(.allUnquotedNames, ) } else { = append(.allUnquotedNames, ...) } = [len(.allUnquotedNames):]// Switch to a map if the buffer is too large for linear search. // This does not add the current name to the map.if .mapNames == nil && (.length() > 64 || len(.allUnquotedNames) > 1024) { .mapNames = make(map[string]struct{})varuintfor , := range .endOffsets { := .allUnquotedNames[:] .mapNames[string()] = struct{}{} // allocates a new string = } }if .mapNames == nil {// Perform linear search over the buffer to find matching names. // It provides O(n) lookup, but does not require any allocations.varuintfor , := range .endOffsets {ifstring(.allUnquotedNames[:]) == string() {returnfalse } = } } else {// Use the map if it is populated. // It provides O(1) lookup, but requires a string allocation per name.if , := .mapNames[string()]; {returnfalse } .mapNames[string()] = struct{}{} // allocates a new string } .allUnquotedNames = .endOffsets = append(.endOffsets, uint(len(.allUnquotedNames)))returntrue}// removeLast removes the last name in the namespace.func ( *objectNamespace) () {if .mapNames != nil {delete(.mapNames, string(.lastUnquoted())) }if .length()-1 == 0 { .endOffsets = .endOffsets[:0] .allUnquotedNames = .allUnquotedNames[:0] } else { .endOffsets = .endOffsets[:.length()-1] .allUnquotedNames = .allUnquotedNames[:.endOffsets[.length()-1]] }}
The pages are generated with Goldsv0.7.9-preview. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.