// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build goexperiment.jsonv2

// Package jsonwire implements stateless functionality for handling JSON text.
package jsonwire import ( ) // TrimSuffixWhitespace trims JSON from the end of b. func ( []byte) []byte { // NOTE: The arguments and logic are kept simple to keep this inlinable. := len() - 1 for >= 0 && ([] == ' ' || [] == '\t' || [] == '\r' || [] == '\n') { -- } return [:+1] } // TrimSuffixString trims a valid JSON string at the end of b. // The behavior is undefined if there is not a valid JSON string present. func ( []byte) []byte { // NOTE: The arguments and logic are kept simple to keep this inlinable. if len() > 0 && [len()-1] == '"' { = [:len()-1] } for len() >= 2 && !([len()-1] == '"' && [len()-2] != '\\') { = [:len()-1] // trim all characters except an unescaped quote } if len() > 0 && [len()-1] == '"' { = [:len()-1] } return } // HasSuffixByte reports whether b ends with c. func ( []byte, byte) bool { // NOTE: The arguments and logic are kept simple to keep this inlinable. return len() > 0 && [len()-1] == } // TrimSuffixByte removes c from the end of b if it is present. func ( []byte, byte) []byte { // NOTE: The arguments and logic are kept simple to keep this inlinable. if len() > 0 && [len()-1] == { return [:len()-1] } return } // QuoteRune quotes the first rune in the input. func [ ~[]byte | ~string]( ) string { , := utf8.DecodeRuneInString(string(truncateMaxUTF8())) if == utf8.RuneError && == 1 { return `'\x` + strconv.FormatUint(uint64([0]), 16) + `'` } return strconv.QuoteRune() } // CompareUTF16 lexicographically compares x to y according // to the UTF-16 codepoints of the UTF-8 encoded input strings. // This implements the ordering specified in RFC 8785, section 3.2.3. func [ ~[]byte | ~string](, ) int { // NOTE: This is an optimized, mostly allocation-free implementation // of CompareUTF16Simple in wire_test.go. FuzzCompareUTF16 verifies that the // two implementations agree on the result of comparing any two strings. := func( rune) bool { return ('\u0000' <= && <= '\uD7FF') || ('\uE000' <= && <= '\uFFFF') } for { if len() == 0 || len() == 0 { return cmp.Compare(len(), len()) } // ASCII fast-path. if [0] < utf8.RuneSelf || [0] < utf8.RuneSelf { if [0] != [0] { return cmp.Compare([0], [0]) } , = [1:], [1:] continue } // Decode next pair of runes as UTF-8. , := utf8.DecodeRuneInString(string(truncateMaxUTF8())) , := utf8.DecodeRuneInString(string(truncateMaxUTF8())) := () := () switch { // The x rune is a single UTF-16 codepoint, while // the y rune is a surrogate pair of UTF-16 codepoints. case && !: , _ = utf16.EncodeRune() // The y rune is a single UTF-16 codepoint, while // the x rune is a surrogate pair of UTF-16 codepoints. case && !: , _ = utf16.EncodeRune() } if != { return cmp.Compare(, ) } // Check for invalid UTF-8, in which case, // we just perform a byte-for-byte comparison. if isInvalidUTF8(, ) || isInvalidUTF8(, ) { if [0] != [0] { return cmp.Compare([0], [0]) } } , = [:], [:] } } // truncateMaxUTF8 truncates b such it contains at least one rune. // // The utf8 package currently lacks generic variants, which complicates // generic functions that operates on either []byte or string. // As a hack, we always call the utf8 function operating on strings, // but always truncate the input such that the result is identical. // // Example usage: // // utf8.DecodeRuneInString(string(truncateMaxUTF8(b))) // // Converting a []byte to a string is stack allocated since // truncateMaxUTF8 guarantees that the []byte is short. func truncateMaxUTF8[ ~[]byte | ~string]( ) { // TODO(https://go.dev/issue/56948): Remove this function and // instead directly call generic utf8 functions wherever used. if len() > utf8.UTFMax { return [:utf8.UTFMax] } return } // TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead. var ErrInvalidUTF8 = errors.New("invalid UTF-8") func [ ~[]byte | ~string]( , string) error { := QuoteRune() return errors.New("invalid character " + + " " + ) } func [ ~[]byte | ~string]( ) error { := "escape sequence" if len() > 6 { = "surrogate pair" } := strings.IndexFunc(string(), func( rune) bool { return == '`' || == utf8.RuneError || unicode.IsSpace() || !unicode.IsPrint() }) >= 0 if { return errors.New("invalid " + + " " + strconv.Quote(string()) + " in string") } else { return errors.New("invalid " + + " `" + string() + "` in string") } } // TruncatePointer optionally truncates the JSON pointer, // enforcing that the length roughly does not exceed n. func ( string, int) string { if len() <= { return } := / 2 := len() - /2 // Avoid truncating a name if there are multiple names present. if := strings.LastIndexByte([:], '/'); > 0 { = } if := strings.IndexByte([:], '/'); >= 0 { += + len("/") } // Avoid truncation in the middle of a UTF-8 rune. for > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString([:])) { -- } for < len() && isInvalidUTF8(utf8.DecodeRuneInString([:])) { ++ } // Determine the right middle fragment to use. var string switch strings.Count([:], "/") { case 0: = "…" case 1: = "…/…" default: = "…/…/…" } if strings.HasPrefix([:], "/") && != "…" { = strings.TrimPrefix(, "…") } if strings.HasSuffix([:], "/") && != "…" { = strings.TrimSuffix(, "…") } return [:] + + [:] } func isInvalidUTF8( rune, int) bool { return == utf8.RuneError && == 1 }