// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package norm

import 

const (
	maxNonStarters = 30
	// The maximum number of characters needed for a buffer is
	// maxNonStarters + 1 for the starter + 1 for the GCJ
	maxBufferSize    = maxNonStarters + 2
	maxNFCExpansion  = 3  // NFC(0x1D160)
	maxNFKCExpansion = 18 // NFKC(0xFDFA)

	maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
)

// ssState is used for reporting the segment state after inserting a rune.
// It is returned by streamSafe.next.
type ssState int

const (
	// Indicates a rune was successfully added to the segment.
	ssSuccess ssState = iota
	// Indicates a rune starts a new segment and should not be added.
	ssStarter
	// Indicates a rune caused a segment overflow and a CGJ should be inserted.
	ssOverflow
)

// streamSafe implements the policy of when a CGJ should be inserted.
type streamSafe uint8

// first inserts the first rune of a segment. It is a faster version of next if
// it is known p represents the first rune in a segment.
func ( *streamSafe) ( Properties) {
	* = streamSafe(.nTrailingNonStarters())
}

// insert returns a ssState value to indicate whether a rune represented by p
// can be inserted.
func ( *streamSafe) ( Properties) ssState {
	if * > maxNonStarters {
		panic("streamSafe was not reset")
	}
	 := .nLeadingNonStarters()
	if * += streamSafe(); * > maxNonStarters {
		* = 0
		return ssOverflow
	}
	// The Stream-Safe Text Processing prescribes that the counting can stop
	// as soon as a starter is encountered. However, there are some starters,
	// like Jamo V and T, that can combine with other runes, leaving their
	// successive non-starters appended to the previous, possibly causing an
	// overflow. We will therefore consider any rune with a non-zero nLead to
	// be a non-starter. Note that it always hold that if nLead > 0 then
	// nLead == nTrail.
	if  == 0 {
		* = streamSafe(.nTrailingNonStarters())
		return ssStarter
	}
	return ssSuccess
}

// backwards is used for checking for overflow and segment starts
// when traversing a string backwards. Users do not need to call first
// for the first rune. The state of the streamSafe retains the count of
// the non-starters loaded.
func ( *streamSafe) ( Properties) ssState {
	if * > maxNonStarters {
		panic("streamSafe was not reset")
	}
	 := * + streamSafe(.nTrailingNonStarters())
	if  > maxNonStarters {
		return ssOverflow
	}
	* = 
	if .nLeadingNonStarters() == 0 {
		return ssStarter
	}
	return ssSuccess
}

func ( streamSafe) () bool {
	return  == maxNonStarters
}

// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
const GraphemeJoiner = "\u034F"

// reorderBuffer is used to normalize a single segment.  Characters inserted with
// insert are decomposed and reordered based on CCC. The compose method can
// be used to recombine characters.  Note that the byte buffer does not hold
// the UTF-8 characters in order.  Only the rune array is maintained in sorted
// order. flush writes the resulting segment to a byte array.
type reorderBuffer struct {
	rune  [maxBufferSize]Properties // Per character info.
	byte  [maxByteBufferSize]byte   // UTF-8 buffer. Referenced by runeInfo.pos.
	nbyte uint8                     // Number or bytes.
	ss    streamSafe                // For limiting length of non-starter sequence.
	nrune int                       // Number of runeInfos.
	f     formInfo

	src      input
	nsrc     int
	tmpBytes input

	out    []byte
	flushF func(*reorderBuffer) bool
}

func ( *reorderBuffer) ( Form,  []byte) {
	.f = *formTable[]
	.src.setBytes()
	.nsrc = len()
	.ss = 0
}

func ( *reorderBuffer) ( Form,  string) {
	.f = *formTable[]
	.src.setString()
	.nsrc = len()
	.ss = 0
}

func ( *reorderBuffer) ( []byte,  func(*reorderBuffer) bool) {
	.out = 
	.flushF = 
}

// reset discards all characters from the buffer.
func ( *reorderBuffer) () {
	.nrune = 0
	.nbyte = 0
}

func ( *reorderBuffer) () bool {
	if .f.composing {
		.compose()
	}
	 := .flushF()
	.reset()
	return 
}

// appendFlush appends the normalized segment to rb.out.
func appendFlush( *reorderBuffer) bool {
	for  := 0;  < .nrune; ++ {
		 := .rune[].pos
		 :=  + .rune[].size
		.out = append(.out, .byte[:]...)
	}
	return true
}

// flush appends the normalized segment to out and resets rb.
func ( *reorderBuffer) ( []byte) []byte {
	for  := 0;  < .nrune; ++ {
		 := .rune[].pos
		 :=  + .rune[].size
		 = append(, .byte[:]...)
	}
	.reset()
	return 
}

// flushCopy copies the normalized segment to buf and resets rb.
// It returns the number of bytes written to buf.
func ( *reorderBuffer) ( []byte) int {
	 := 0
	for  := 0;  < .nrune; ++ {
		 := .rune[]
		 += copy([:], .byte[.pos:.pos+.size])
	}
	.reset()
	return 
}

// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
// It returns false if the buffer is not large enough to hold the rune.
// It is used internally by insert and insertString only.
func ( *reorderBuffer) ( Properties) {
	 := .nrune
	 := .rune[:]
	 := .ccc
	if  > 0 {
		// Find insertion position + move elements to make room.
		for ;  > 0; -- {
			if [-1].ccc <=  {
				break
			}
			[] = [-1]
		}
	}
	.nrune += 1
	 := uint8(.nbyte)
	.nbyte += utf8.UTFMax
	.pos = 
	[] = 
}

// insertErr is an error code returned by insert. Using this type instead
// of error improves performance up to 20% for many of the benchmarks.
type insertErr int

const (
	iSuccess insertErr = -iota
	iShortDst
	iShortSrc
)

// insertFlush inserts the given rune in the buffer ordered by CCC.
// If a decomposition with multiple segments are encountered, they leading
// ones are flushed.
// It returns a non-zero error code if the rune was not inserted.
func ( *reorderBuffer) ( input,  int,  Properties) insertErr {
	if  := .hangul();  != 0 {
		.decomposeHangul()
		return iSuccess
	}
	if .hasDecomposition() {
		return .insertDecomposed(.Decomposition())
	}
	.insertSingle(, , )
	return iSuccess
}

// insertUnsafe inserts the given rune in the buffer ordered by CCC.
// It is assumed there is sufficient space to hold the runes. It is the
// responsibility of the caller to ensure this. This can be done by checking
// the state returned by the streamSafe type.
func ( *reorderBuffer) ( input,  int,  Properties) {
	if  := .hangul();  != 0 {
		.decomposeHangul()
	}
	if .hasDecomposition() {
		// TODO: inline.
		.insertDecomposed(.Decomposition())
	} else {
		.insertSingle(, , )
	}
}

// insertDecomposed inserts an entry in to the reorderBuffer for each rune
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
// It flushes the buffer on each new segment start.
func ( *reorderBuffer) ( []byte) insertErr {
	.tmpBytes.setBytes()
	// As the streamSafe accounting already handles the counting for modifiers,
	// we don't have to call next. However, we do need to keep the accounting
	// intact when flushing the buffer.
	for  := 0;  < len(); {
		 := .f.info(.tmpBytes, )
		if .BoundaryBefore() && .nrune > 0 && !.doFlush() {
			return iShortDst
		}
		 += copy(.byte[.nbyte:], [:+int(.size)])
		.insertOrdered()
	}
	return iSuccess
}

// insertSingle inserts an entry in the reorderBuffer for the rune at
// position i. info is the runeInfo for the rune at position i.
func ( *reorderBuffer) ( input,  int,  Properties) {
	.copySlice(.byte[.nbyte:], , +int(.size))
	.insertOrdered()
}

// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
func ( *reorderBuffer) () {
	.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
}

// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
func ( *reorderBuffer) ( rune) {
	 := .nbyte
	 := utf8.EncodeRune(.byte[:], rune())
	.nbyte += utf8.UTFMax
	.rune[.nrune] = Properties{pos: , size: uint8()}
	.nrune++
}

// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
func ( *reorderBuffer) ( int,  rune) {
	 := .rune[].pos
	 := utf8.EncodeRune(.byte[:], rune())
	.rune[] = Properties{pos: , size: uint8()}
}

// runeAt returns the rune at position n. It is used for Hangul and recomposition.
func ( *reorderBuffer) ( int) rune {
	 := .rune[]
	,  := utf8.DecodeRune(.byte[.pos : .pos+.size])
	return 
}

// bytesAt returns the UTF-8 encoding of the rune at position n.
// It is used for Hangul and recomposition.
func ( *reorderBuffer) ( int) []byte {
	 := .rune[]
	return .byte[.pos : int(.pos)+int(.size)]
}

// For Hangul we combine algorithmically, instead of using tables.
const (
	hangulBase  = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
	hangulBase0 = 0xEA
	hangulBase1 = 0xB0
	hangulBase2 = 0x80

	hangulEnd  = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
	hangulEnd0 = 0xED
	hangulEnd1 = 0x9E
	hangulEnd2 = 0xA4

	jamoLBase  = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
	jamoLBase0 = 0xE1
	jamoLBase1 = 0x84
	jamoLEnd   = 0x1113
	jamoVBase  = 0x1161
	jamoVEnd   = 0x1176
	jamoTBase  = 0x11A7
	jamoTEnd   = 0x11C3

	jamoTCount   = 28
	jamoVCount   = 21
	jamoVTCount  = 21 * 28
	jamoLVTCount = 19 * 21 * 28
)

const hangulUTF8Size = 3

func isHangul( []byte) bool {
	if len() < hangulUTF8Size {
		return false
	}
	 := [0]
	if  < hangulBase0 {
		return false
	}
	 := [1]
	switch {
	case  == hangulBase0:
		return  >= hangulBase1
	case  < hangulEnd0:
		return true
	case  > hangulEnd0:
		return false
	case  < hangulEnd1:
		return true
	}
	return  == hangulEnd1 && [2] < hangulEnd2
}

func isHangulString( string) bool {
	if len() < hangulUTF8Size {
		return false
	}
	 := [0]
	if  < hangulBase0 {
		return false
	}
	 := [1]
	switch {
	case  == hangulBase0:
		return  >= hangulBase1
	case  < hangulEnd0:
		return true
	case  > hangulEnd0:
		return false
	case  < hangulEnd1:
		return true
	}
	return  == hangulEnd1 && [2] < hangulEnd2
}

// Caller must ensure len(b) >= 2.
func isJamoVT( []byte) bool {
	// True if (rune & 0xff00) == jamoLBase
	return [0] == jamoLBase0 && ([1]&0xFC) == jamoLBase1
}

func isHangulWithoutJamoT( []byte) bool {
	,  := utf8.DecodeRune()
	 -= hangulBase
	return  < jamoLVTCount && %jamoTCount == 0
}

// decomposeHangul writes the decomposed Hangul to buf and returns the number
// of bytes written.  len(buf) should be at least 9.
func decomposeHangul( []byte,  rune) int {
	const  = 3
	 -= hangulBase
	 :=  % jamoTCount
	 /= jamoTCount
	utf8.EncodeRune(, jamoLBase+/jamoVCount)
	utf8.EncodeRune([:], jamoVBase+%jamoVCount)
	if  != 0 {
		utf8.EncodeRune([2*:], jamoTBase+)
		return 3 * 
	}
	return 2 * 
}

// decomposeHangul algorithmically decomposes a Hangul rune into
// its Jamo components.
// See https://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
func ( *reorderBuffer) ( rune) {
	 -= hangulBase
	 :=  % jamoTCount
	 /= jamoTCount
	.appendRune(jamoLBase + /jamoVCount)
	.appendRune(jamoVBase + %jamoVCount)
	if  != 0 {
		.appendRune(jamoTBase + )
	}
}

// combineHangul algorithmically combines Jamo character components into Hangul.
// See https://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
func ( *reorderBuffer) (, ,  int) {
	 := .rune[:]
	 := .nrune
	for ;  < ; ++ {
		 := [-1].ccc
		 := [].ccc
		if  == 0 {
			 =  - 1
		}
		if  != -1 &&  >=  {
			// b[i] is blocked by greater-equal cccX below it
			[] = []
			++
		} else {
			 := .runeAt() // also used to compare to hangulBase
			 := .runeAt() // also used to compare to jamoT
			switch {
			case jamoLBase <=  &&  < jamoLEnd &&
				jamoVBase <=  &&  < jamoVEnd:
				// 11xx plus 116x to LV
				.assignRune(, hangulBase+
					(-jamoLBase)*jamoVTCount+(-jamoVBase)*jamoTCount)
			case hangulBase <=  &&  < hangulEnd &&
				jamoTBase <  &&  < jamoTEnd &&
				((-hangulBase)%jamoTCount) == 0:
				// ACxx plus 11Ax to LVT
				.assignRune(, +-jamoTBase)
			default:
				[] = []
				++
			}
		}
	}
	.nrune = 
}

// compose recombines the runes in the buffer.
// It should only be used to recompose a single segment, as it will not
// handle alternations between Hangul and non-Hangul characters correctly.
func ( *reorderBuffer) () {
	// Lazily load the map used by the combine func below, but do
	// it outside of the loop.
	recompMapOnce.Do(buildRecompMap)

	// UAX #15, section X5 , including Corrigendum #5
	// "In any character sequence beginning with starter S, a character C is
	//  blocked from S if and only if there is some character B between S
	//  and C, and either B is a starter or it has the same or higher
	//  combining class as C."
	 := .nrune
	if  == 0 {
		return
	}
	 := 1
	 := .rune[:]
	for ,  := 0, 1;  < ; ++ {
		if isJamoVT(.bytesAt()) {
			// Redo from start in Hangul mode. Necessary to support
			// U+320E..U+321E in NFKC mode.
			.combineHangul(, , )
			return
		}
		 := []
		// We can only use combineForward as a filter if we later
		// get the info for the combined character. This is more
		// expensive than using the filter. Using combinesBackward()
		// is safe.
		if .combinesBackward() {
			 := [-1].ccc
			 := .ccc
			 := false // b[i] blocked by starter or greater or equal CCC?
			if  == 0 {
				 =  - 1
			} else {
				 =  != -1 &&  >= 
			}
			if ! {
				 := combine(.runeAt(), .runeAt())
				if  != 0 {
					.assignRune(, )
					continue
				}
			}
		}
		[] = []
		++
	}
	.nrune = 
}