// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build goexperiment.jsonv2

package jsonwire

import (
	
	
	
	
	
	
)

type ValueFlags uint

const (
	_ ValueFlags = (1 << iota) / 2 // powers of two starting with zero

	stringNonVerbatim  // string cannot be naively treated as valid UTF-8
	stringNonCanonical // string not formatted according to RFC 8785, section 3.2.2.2.
	// TODO: Track whether a number is a non-integer?
)

func ( *ValueFlags) ( ValueFlags) { * |=  }
func ( ValueFlags) () bool    { return &stringNonVerbatim == 0 }
func ( ValueFlags) () bool   { return &stringNonCanonical == 0 }

// ConsumeWhitespace consumes leading JSON whitespace per RFC 7159, section 2.
func ( []byte) ( int) {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	for len() >  && ([] == ' ' || [] == '\t' || [] == '\r' || [] == '\n') {
		++
	}
	return 
}

// ConsumeNull consumes the next JSON null literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ( []byte) int {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	const  = "null"
	if len() >= len() && string([:len()]) ==  {
		return len()
	}
	return 0
}

// ConsumeFalse consumes the next JSON false literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ( []byte) int {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	const  = "false"
	if len() >= len() && string([:len()]) ==  {
		return len()
	}
	return 0
}

// ConsumeTrue consumes the next JSON true literal per RFC 7159, section 3.
// It returns 0 if it is invalid, in which case consumeLiteral should be used.
func ( []byte) int {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	const  = "true"
	if len() >= len() && string([:len()]) ==  {
		return len()
	}
	return 0
}

// ConsumeLiteral consumes the next JSON literal per RFC 7159, section 3.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ( []byte,  string) ( int,  error) {
	for  := 0;  < len() &&  < len(); ++ {
		if [] != [] {
			return , NewInvalidCharacterError([:], "in literal "++" (expecting "+strconv.QuoteRune(rune([]))+")")
		}
	}
	if len() < len() {
		return len(), io.ErrUnexpectedEOF
	}
	return len(), nil
}

// ConsumeSimpleString consumes the next JSON string per RFC 7159, section 7
// but is limited to the grammar for an ASCII string without escape sequences.
// It returns 0 if it is invalid or more complicated than a simple string,
// in which case consumeString should be called.
//
// It rejects '<', '>', and '&' for compatibility reasons since these were
// always escaped in the v1 implementation. Thus, if this function reports
// non-zero then we know that the string would be encoded the same way
// under both v1 or v2 escape semantics.
func ( []byte) ( int) {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	if len() > 0 && [0] == '"' {
		++
		for len() >  && [] < utf8.RuneSelf && escapeASCII[[]] == 0 {
			++
		}
		if uint(len()) > uint() && [] == '"' {
			++
			return 
		}
	}
	return 0
}

// ConsumeString consumes the next JSON string per RFC 7159, section 7.
// If validateUTF8 is false, then this allows the presence of invalid UTF-8
// characters within the string itself.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
func ( *ValueFlags,  []byte,  bool) ( int,  error) {
	return ConsumeStringResumable(, , 0, )
}

// ConsumeStringResumable is identical to consumeString but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ( *ValueFlags,  []byte,  int,  bool) ( int,  error) {
	// Consume the leading double quote.
	switch {
	case  > 0:
		 =  // already handled the leading quote
	case uint(len()) == 0:
		return , io.ErrUnexpectedEOF
	case [0] == '"':
		++
	default:
		return , NewInvalidCharacterError([:], `at start of string (expecting '"')`)
	}

	// Consume every character in the string.
	for uint(len()) > uint() {
		// Optimize for long sequences of unescaped characters.
		 := func( byte) bool {
			return  < utf8.RuneSelf && ' ' <=  &&  != '\\' &&  != '"'
		}
		for uint(len()) > uint() && ([]) {
			++
		}
		if uint(len()) <= uint() {
			return , io.ErrUnexpectedEOF
		}

		// Check for terminating double quote.
		if [] == '"' {
			++
			return , nil
		}

		switch ,  := utf8.DecodeRune([:]); {
		// Handle UTF-8 encoded byte sequence.
		// Due to specialized handling of ASCII above, we know that
		// all normal sequences at this point must be 2 bytes or larger.
		case  > 1:
			 += 
		// Handle escape sequence.
		case  == '\\':
			.Join(stringNonVerbatim)
			 = 
			if uint(len()) < uint(+2) {
				return , io.ErrUnexpectedEOF
			}
			switch  := [+1];  {
			case '/':
				// Forward slash is the only character with 3 representations.
				// Per RFC 8785, section 3.2.2.2., this must not be escaped.
				.Join(stringNonCanonical)
				 += 2
			case '"', '\\', 'b', 'f', 'n', 'r', 't':
				 += 2
			case 'u':
				if uint(len()) < uint(+6) {
					if hasEscapedUTF16Prefix([:], false) {
						return , io.ErrUnexpectedEOF
					}
					.Join(stringNonCanonical)
					return , NewInvalidEscapeSequenceError([:])
				}
				,  := parseHexUint16([+2 : +6])
				if ! {
					.Join(stringNonCanonical)
					return , NewInvalidEscapeSequenceError([ : +6])
				}
				// Only certain control characters can use the \uFFFF notation
				// for canonical formatting (per RFC 8785, section 3.2.2.2.).
				switch  {
				// \uFFFF notation not permitted for these characters.
				case '\b', '\f', '\n', '\r', '\t':
					.Join(stringNonCanonical)
				default:
					// \uFFFF notation only permitted for control characters.
					if  >= ' ' {
						.Join(stringNonCanonical)
					} else {
						// \uFFFF notation must be lower case.
						for ,  := range [+2 : +6] {
							if 'A' <=  &&  <= 'F' {
								.Join(stringNonCanonical)
							}
						}
					}
				}
				 += 6

				 := rune()
				if  && utf16.IsSurrogate() {
					if uint(len()) < uint(+6) {
						if hasEscapedUTF16Prefix([:], true) {
							return , io.ErrUnexpectedEOF
						}
						.Join(stringNonCanonical)
						return  - 6, NewInvalidEscapeSequenceError([-6:])
					} else if ,  := parseHexUint16([+2 : +6]); [] != '\\' || [+1] != 'u' || ! {
						.Join(stringNonCanonical)
						return  - 6, NewInvalidEscapeSequenceError([-6 : +6])
					} else if  = utf16.DecodeRune(rune(), rune());  == utf8.RuneError {
						.Join(stringNonCanonical)
						return  - 6, NewInvalidEscapeSequenceError([-6 : +6])
					} else {
						 += 6
					}
				}
			default:
				.Join(stringNonCanonical)
				return , NewInvalidEscapeSequenceError([ : +2])
			}
		// Handle invalid UTF-8.
		case  == utf8.RuneError:
			if !utf8.FullRune([:]) {
				return , io.ErrUnexpectedEOF
			}
			.Join(stringNonVerbatim | stringNonCanonical)
			if  {
				return , ErrInvalidUTF8
			}
			++
		// Handle invalid control characters.
		case  < ' ':
			.Join(stringNonVerbatim | stringNonCanonical)
			return , NewInvalidCharacterError([:], "in string (expecting non-control character)")
		default:
			panic("BUG: unhandled character " + QuoteRune([:]))
		}
	}
	return , io.ErrUnexpectedEOF
}

// AppendUnquote appends the unescaped form of a JSON string in src to dst.
// Any invalid UTF-8 within the string will be replaced with utf8.RuneError,
// but the error will be specified as having encountered such an error.
// The input must be an entire JSON string with no surrounding whitespace.
func [ ~[]byte | ~string]( []byte,  ) ( []byte,  error) {
	 = slices.Grow(, len())

	// Consume the leading double quote.
	var ,  int
	switch {
	case uint(len()) == 0:
		return , io.ErrUnexpectedEOF
	case [0] == '"':
		,  = 1, 1
	default:
		return , NewInvalidCharacterError(, `at start of string (expecting '"')`)
	}

	// Consume every character in the string.
	for uint(len()) > uint() {
		// Optimize for long sequences of unescaped characters.
		 := func( byte) bool {
			return  < utf8.RuneSelf && ' ' <=  &&  != '\\' &&  != '"'
		}
		for uint(len()) > uint() && ([]) {
			++
		}
		if uint(len()) <= uint() {
			 = append(, [:]...)
			return , io.ErrUnexpectedEOF
		}

		// Check for terminating double quote.
		if [] == '"' {
			 = append(, [:]...)
			++
			if  < len() {
				 = NewInvalidCharacterError([:], "after string value")
			}
			return , 
		}

		switch ,  := utf8.DecodeRuneInString(string(truncateMaxUTF8([:]))); {
		// Handle UTF-8 encoded byte sequence.
		// Due to specialized handling of ASCII above, we know that
		// all normal sequences at this point must be 2 bytes or larger.
		case  > 1:
			 += 
		// Handle escape sequence.
		case  == '\\':
			 = append(, [:]...)

			// Handle escape sequence.
			if uint(len()) < uint(+2) {
				return , io.ErrUnexpectedEOF
			}
			switch  := [+1];  {
			case '"', '\\', '/':
				 = append(, )
				 += 2
			case 'b':
				 = append(, '\b')
				 += 2
			case 'f':
				 = append(, '\f')
				 += 2
			case 'n':
				 = append(, '\n')
				 += 2
			case 'r':
				 = append(, '\r')
				 += 2
			case 't':
				 = append(, '\t')
				 += 2
			case 'u':
				if uint(len()) < uint(+6) {
					if hasEscapedUTF16Prefix([:], false) {
						return , io.ErrUnexpectedEOF
					}
					return , NewInvalidEscapeSequenceError([:])
				}
				,  := parseHexUint16([+2 : +6])
				if ! {
					return , NewInvalidEscapeSequenceError([ : +6])
				}
				 += 6

				// Check whether this is a surrogate half.
				 := rune()
				if utf16.IsSurrogate() {
					 = utf8.RuneError // assume failure unless the following succeeds
					if uint(len()) < uint(+6) {
						if hasEscapedUTF16Prefix([:], true) {
							return utf8.AppendRune(, ), io.ErrUnexpectedEOF
						}
						 = NewInvalidEscapeSequenceError([-6:])
					} else if ,  := parseHexUint16([+2 : +6]); [] != '\\' || [+1] != 'u' || ! {
						 = NewInvalidEscapeSequenceError([-6 : +6])
					} else if  = utf16.DecodeRune(rune(), rune());  == utf8.RuneError {
						 = NewInvalidEscapeSequenceError([-6 : +6])
					} else {
						 += 6
					}
				}

				 = utf8.AppendRune(, )
			default:
				return , NewInvalidEscapeSequenceError([ : +2])
			}
			 = 
		// Handle invalid UTF-8.
		case  == utf8.RuneError:
			 = append(, [:]...)
			if !utf8.FullRuneInString(string(truncateMaxUTF8([:]))) {
				return , io.ErrUnexpectedEOF
			}
			// NOTE: An unescaped string may be longer than the escaped string
			// because invalid UTF-8 bytes are being replaced.
			 = append(, "\uFFFD"...)
			 += 
			 = 
			 = ErrInvalidUTF8
		// Handle invalid control characters.
		case  < ' ':
			 = append(, [:]...)
			return , NewInvalidCharacterError([:], "in string (expecting non-control character)")
		default:
			panic("BUG: unhandled character " + QuoteRune([:]))
		}
	}
	 = append(, [:]...)
	return , io.ErrUnexpectedEOF
}

// hasEscapedUTF16Prefix reports whether b is possibly
// the truncated prefix of a \uFFFF escape sequence.
func hasEscapedUTF16Prefix[ ~[]byte | ~string]( ,  bool) bool {
	for  := range len() {
		switch  := []; {
		case  == 0 &&  != '\\':
			return false
		case  == 1 &&  != 'u':
			return false
		case  == 2 &&  &&  != 'd' &&  != 'D':
			return false // not within ['\uDC00':'\uDFFF']
		case  == 3 &&  && !('c' <=  &&  <= 'f') && !('C' <=  &&  <= 'F'):
			return false // not within ['\uDC00':'\uDFFF']
		case  >= 2 &&  < 6 && !('0' <=  &&  <= '9') && !('a' <=  &&  <= 'f') && !('A' <=  &&  <= 'F'):
			return false
		}
	}
	return true
}

// UnquoteMayCopy returns the unescaped form of b.
// If there are no escaped characters, the output is simply a subslice of
// the input with the surrounding quotes removed.
// Otherwise, a new buffer is allocated for the output.
// It assumes the input is valid.
func ( []byte,  bool) []byte {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	if  {
		return [len(`"`) : len()-len(`"`)]
	}
	, _ = AppendUnquote(nil, )
	return 
}

// ConsumeSimpleNumber consumes the next JSON number per RFC 7159, section 6
// but is limited to the grammar for a positive integer.
// It returns 0 if it is invalid or more complicated than a simple integer,
// in which case consumeNumber should be called.
func ( []byte) ( int) {
	// NOTE: The arguments and logic are kept simple to keep this inlinable.
	if len() > 0 {
		if [0] == '0' {
			++
		} else if '1' <= [0] && [0] <= '9' {
			++
			for len() >  && ('0' <= [] && [] <= '9') {
				++
			}
		} else {
			return 0
		}
		if uint(len()) <= uint() || ([] != '.' && [] != 'e' && [] != 'E') {
			return 
		}
	}
	return 0
}

type ConsumeNumberState uint

const (
	consumeNumberInit ConsumeNumberState = iota
	beforeIntegerDigits
	withinIntegerDigits
	beforeFractionalDigits
	withinFractionalDigits
	beforeExponentDigits
	withinExponentDigits
)

// ConsumeNumber consumes the next JSON number per RFC 7159, section 6.
// It reports the number of bytes consumed and whether an error was encountered.
// If the input appears truncated, it returns io.ErrUnexpectedEOF.
//
// Note that JSON numbers are not self-terminating.
// If the entire input is consumed, then the caller needs to consider whether
// there may be subsequent unread data that may still be part of this number.
func ( []byte) ( int,  error) {
	, _,  = ConsumeNumberResumable(, 0, consumeNumberInit)
	return , 
}

// ConsumeNumberResumable is identical to consumeNumber but supports resuming
// from a previous call that returned io.ErrUnexpectedEOF.
func ( []byte,  int,  ConsumeNumberState) ( int,  ConsumeNumberState,  error) {
	// Jump to the right state when resuming from a partial consumption.
	 = 
	if  > consumeNumberInit {
		switch  {
		case withinIntegerDigits, withinFractionalDigits, withinExponentDigits:
			// Consume leading digits.
			for uint(len()) > uint() && ('0' <= [] && [] <= '9') {
				++
			}
			if uint(len()) <= uint() {
				return , , nil // still within the same state
			}
			++ // switches "withinX" to "beforeY" where Y is the state after X
		}
		switch  {
		case beforeIntegerDigits:
			goto 
		case beforeFractionalDigits:
			goto 
		case beforeExponentDigits:
			goto 
		default:
			return , , nil
		}
	}

	// Consume required integer component (with optional minus sign).
:
	 = 
	if uint(len()) > 0 && [0] == '-' {
		++
	}
	switch {
	case uint(len()) <= uint():
		return , beforeIntegerDigits, io.ErrUnexpectedEOF
	case [] == '0':
		++
		 = beforeFractionalDigits
	case '1' <= [] && [] <= '9':
		++
		for uint(len()) > uint() && ('0' <= [] && [] <= '9') {
			++
		}
		 = withinIntegerDigits
	default:
		return , , NewInvalidCharacterError([:], "in number (expecting digit)")
	}

	// Consume optional fractional component.
:
	if uint(len()) > uint() && [] == '.' {
		 = 
		++
		switch {
		case uint(len()) <= uint():
			return , beforeFractionalDigits, io.ErrUnexpectedEOF
		case '0' <= [] && [] <= '9':
			++
		default:
			return , , NewInvalidCharacterError([:], "in number (expecting digit)")
		}
		for uint(len()) > uint() && ('0' <= [] && [] <= '9') {
			++
		}
		 = withinFractionalDigits
	}

	// Consume optional exponent component.
:
	if uint(len()) > uint() && ([] == 'e' || [] == 'E') {
		 = 
		++
		if uint(len()) > uint() && ([] == '-' || [] == '+') {
			++
		}
		switch {
		case uint(len()) <= uint():
			return , beforeExponentDigits, io.ErrUnexpectedEOF
		case '0' <= [] && [] <= '9':
			++
		default:
			return , , NewInvalidCharacterError([:], "in number (expecting digit)")
		}
		for uint(len()) > uint() && ('0' <= [] && [] <= '9') {
			++
		}
		 = withinExponentDigits
	}

	return , , nil
}

// parseHexUint16 is similar to strconv.ParseUint,
// but operates directly on []byte and is optimized for base-16.
// See https://go.dev/issue/42429.
func parseHexUint16[ ~[]byte | ~string]( ) ( uint16,  bool) {
	if len() != 4 {
		return 0, false
	}
	for  := range 4 {
		 := []
		switch {
		case '0' <=  &&  <= '9':
			 =  - '0'
		case 'a' <=  &&  <= 'f':
			 = 10 +  - 'a'
		case 'A' <=  &&  <= 'F':
			 = 10 +  - 'A'
		default:
			return 0, false
		}
		 = *16 + uint16()
	}
	return , true
}

// ParseUint parses b as a decimal unsigned integer according to
// a strict subset of the JSON number grammar, returning the value if valid.
// It returns (0, false) if there is a syntax error and
// returns (math.MaxUint64, false) if there is an overflow.
func ( []byte) ( uint64,  bool) {
	const  = 20 // len(fmt.Sprint(uint64(math.MaxUint64)))
	var  int
	for ; len() >  && ('0' <= [] && [] <= '9'); ++ {
		 = 10* + uint64([]-'0')
	}
	switch {
	case  == 0 || len() !=  || ([0] == '0' && string() != "0"):
		return 0, false
	case  >=  && ([0] != '1' ||  < 1e19 ||  > ):
		return math.MaxUint64, false
	}
	return , true
}

// ParseFloat parses a floating point number according to the Go float grammar.
// Note that the JSON number grammar is a strict subset.
//
// If the number overflows the finite representation of a float,
// then we return MaxFloat since any finite value will always be infinitely
// more accurate at representing another finite value than an infinite value.
func ( []byte,  int) ( float64,  bool) {
	,  := strconv.ParseFloat(string(), )
	if math.IsInf(, 0) {
		switch {
		case  == 32 && math.IsInf(, +1):
			 = +math.MaxFloat32
		case  == 64 && math.IsInf(, +1):
			 = +math.MaxFloat64
		case  == 32 && math.IsInf(, -1):
			 = -math.MaxFloat32
		case  == 64 && math.IsInf(, -1):
			 = -math.MaxFloat64
		}
	}
	return ,  == nil
}