// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package norm

import (
	
	
)

// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
// sequence of starter and non-starter runes for the purpose of normalization.
const MaxSegmentSize = maxByteBufferSize

// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
	rb     reorderBuffer
	buf    [maxByteBufferSize]byte
	info   Properties // first character saved from previous iteration
	next   iterFunc   // implementation of next depends on form
	asciiF iterFunc

	p        int    // current position in input source
	multiSeg []byte // remainder of multi-segment decomposition
}

type iterFunc func(*Iter) []byte

// Init initializes i to iterate over src after normalizing it to Form f.
func ( *Iter) ( Form,  []byte) {
	.p = 0
	if len() == 0 {
		.setDone()
		.rb.nsrc = 0
		return
	}
	.multiSeg = nil
	.rb.init(, )
	.next = .rb.f.nextMain
	.asciiF = nextASCIIBytes
	.info = .rb.f.info(.rb.src, .p)
	.rb.ss.first(.info)
}

// InitString initializes i to iterate over src after normalizing it to Form f.
func ( *Iter) ( Form,  string) {
	.p = 0
	if len() == 0 {
		.setDone()
		.rb.nsrc = 0
		return
	}
	.multiSeg = nil
	.rb.initString(, )
	.next = .rb.f.nextMain
	.asciiF = nextASCIIString
	.info = .rb.f.info(.rb.src, .p)
	.rb.ss.first(.info)
}

// Seek sets the segment to be returned by the next call to Next to start
// at position p.  It is the responsibility of the caller to set p to the
// start of a segment.
func ( *Iter) ( int64,  int) (int64, error) {
	var  int64
	switch  {
	case 0:
		 = 
	case 1:
		 = int64(.p) + 
	case 2:
		 = int64(.rb.nsrc) + 
	default:
		return 0, fmt.Errorf("norm: invalid whence")
	}
	if  < 0 {
		return 0, fmt.Errorf("norm: negative position")
	}
	if int() >= .rb.nsrc {
		.setDone()
		return int64(.p), nil
	}
	.p = int()
	.multiSeg = nil
	.next = .rb.f.nextMain
	.info = .rb.f.info(.rb.src, .p)
	.rb.ss.first(.info)
	return , nil
}

// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func ( *Iter) (,  int) []byte {
	if .rb.src.bytes == nil {
		return .buf[:copy(.buf[:], .rb.src.str[:])]
	}
	return .rb.src.bytes[:]
}

// Pos returns the byte position at which the next call to Next will commence processing.
func ( *Iter) () int {
	return .p
}

func ( *Iter) () {
	.next = nextDone
	.p = .rb.nsrc
}

// Done returns true if there is no more input to process.
func ( *Iter) () bool {
	return .p >= .rb.nsrc
}

// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func ( *Iter) () []byte {
	return .next()
}

func nextASCIIBytes( *Iter) []byte {
	 := .p + 1
	if  >= .rb.nsrc {
		 := .p
		.setDone()
		return .rb.src.bytes[:]
	}
	if .rb.src.bytes[] < utf8.RuneSelf {
		 := .p
		.p = 
		return .rb.src.bytes[:]
	}
	.info = .rb.f.info(.rb.src, .p)
	.next = .rb.f.nextMain
	return .next()
}

func nextASCIIString( *Iter) []byte {
	 := .p + 1
	if  >= .rb.nsrc {
		.buf[0] = .rb.src.str[.p]
		.setDone()
		return .buf[:1]
	}
	if .rb.src.str[] < utf8.RuneSelf {
		.buf[0] = .rb.src.str[.p]
		.p = 
		return .buf[:1]
	}
	.info = .rb.f.info(.rb.src, .p)
	.next = .rb.f.nextMain
	return .next()
}

func nextHangul( *Iter) []byte {
	 := .p
	 :=  + hangulUTF8Size
	if  >= .rb.nsrc {
		.setDone()
	} else if .rb.src.hangul() == 0 {
		.rb.ss.next(.info)
		.info = .rb.f.info(.rb.src, .p)
		.next = .rb.f.nextMain
		return .next()
	}
	.p = 
	return .buf[:decomposeHangul(.buf[:], .rb.src.hangul())]
}

func nextDone( *Iter) []byte {
	return nil
}

// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti( *Iter) []byte {
	 := 0
	 := .multiSeg
	// skip first rune
	for  = 1;  < len() && !utf8.RuneStart([]); ++ {
	}
	for  < len() {
		 := .rb.f.info(input{bytes: }, )
		if .BoundaryBefore() {
			.multiSeg = [:]
			return [:]
		}
		 += int(.size)
	}
	// treat last segment as normal decomposition
	.next = .rb.f.nextMain
	return .next()
}

// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm( *Iter) []byte {
	 := 0
	 := .multiSeg
	for  < len() {
		 := .rb.f.info(input{bytes: }, )
		if .BoundaryBefore() {
			.rb.compose()
			 := .buf[:.rb.flushCopy(.buf[:])]
			.rb.insertUnsafe(input{bytes: }, , )
			.multiSeg = [+int(.size):]
			return 
		}
		.rb.insertUnsafe(input{bytes: }, , )
		 += int(.size)
	}
	.multiSeg = nil
	.next = nextComposed
	return doNormComposed()
}

// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed( *Iter) ( []byte) {
	 := 0
	,  := .p, 0
	for {
		if  := int(.info.size);  <= 1 {
			.rb.ss = 0
			 := .p
			.p++ // ASCII or illegal byte.  Either way, advance by 1.
			if .p >= .rb.nsrc {
				.setDone()
				return .returnSlice(, .p)
			} else if .rb.src._byte(.p) < utf8.RuneSelf {
				.next = .asciiF
				return .returnSlice(, .p)
			}
			++
		} else if  := .info.Decomposition();  != nil {
			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
			// Case 1: there is a leftover to copy.  In this case the decomposition
			// must begin with a modifier and should always be appended.
			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
			 :=  + len()
			if  > 0 {
				.rb.src.copySlice(.buf[:], , .p)
				// TODO: this condition should not be possible, but we leave it
				// in for defensive purposes.
				if  > len(.buf) {
					return .buf[:]
				}
			} else if .info.multiSegment() {
				// outp must be 0 as multi-segment decompositions always
				// start a new segment.
				if .multiSeg == nil {
					.multiSeg = 
					.next = nextMulti
					return nextMulti()
				}
				// We are in the last segment.  Treat as normal decomposition.
				 = .multiSeg
				.multiSeg = nil
				 = len()
			}
			 := .info.tccc
			if .p += ; .p >= .rb.nsrc {
				.setDone()
				.info = Properties{} // Force BoundaryBefore to succeed.
			} else {
				.info = .rb.f.info(.rb.src, .p)
			}
			switch .rb.ss.next(.info) {
			case ssOverflow:
				.next = nextCGJDecompose
				fallthrough
			case ssStarter:
				if  > 0 {
					copy(.buf[:], )
					return .buf[:]
				}
				return 
			}
			copy(.buf[:], )
			 = 
			,  = .p, 
			if .info.ccc <  {
				goto 
			}
			continue
		} else if  := .rb.src.hangul(.p);  != 0 {
			 = decomposeHangul(.buf[:], )
			.p += hangulUTF8Size
			,  = .p, 
			if .p >= .rb.nsrc {
				.setDone()
				break
			} else if .rb.src.hangul(.p) != 0 {
				.next = nextHangul
				return .buf[:]
			}
		} else {
			 :=  + 
			if  > len(.buf) {
				break
			}
			 = 
			.p += 
		}
		if .p >= .rb.nsrc {
			.setDone()
			break
		}
		 := .info.tccc
		.info = .rb.f.info(.rb.src, .p)
		if  := .rb.ss.next(.info);  == ssStarter {
			break
		} else if  == ssOverflow {
			.next = nextCGJDecompose
			break
		}
		if .info.ccc <  {
			goto 
		}
	}
	if  == 0 {
		return .returnSlice(, .p)
	} else if  < .p {
		.rb.src.copySlice(.buf[:], , .p)
	}
	return .buf[:]
:
	// Insert what we have decomposed so far in the reorderBuffer.
	// As we will only reorder, there will always be enough room.
	.rb.src.copySlice(.buf[:], , .p)
	.rb.insertDecomposed(.buf[0:])
	return doNormDecomposed()
}

func doNormDecomposed( *Iter) []byte {
	for {
		.rb.insertUnsafe(.rb.src, .p, .info)
		if .p += int(.info.size); .p >= .rb.nsrc {
			.setDone()
			break
		}
		.info = .rb.f.info(.rb.src, .p)
		if .info.ccc == 0 {
			break
		}
		if  := .rb.ss.next(.info);  == ssOverflow {
			.next = nextCGJDecompose
			break
		}
	}
	// new segment or too many combining characters: exit normalization
	return .buf[:.rb.flushCopy(.buf[:])]
}

func nextCGJDecompose( *Iter) []byte {
	.rb.ss = 0
	.rb.insertCGJ()
	.next = nextDecomposed
	.rb.ss.first(.info)
	 := doNormDecomposed()
	return 
}

// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed( *Iter) []byte {
	,  := 0, .p
	var  uint8
	for {
		if !.info.isYesC() {
			goto 
		}
		 = .info.tccc
		 := int(.info.size)
		if  == 0 {
			 = 1 // illegal rune: copy byte-by-byte
		}
		 :=  + 
		if  > len(.buf) {
			break
		}
		 = 
		.p += 
		if .p >= .rb.nsrc {
			.setDone()
			break
		} else if .rb.src._byte(.p) < utf8.RuneSelf {
			.rb.ss = 0
			.next = .asciiF
			break
		}
		.info = .rb.f.info(.rb.src, .p)
		if  := .rb.ss.next(.info);  == ssStarter {
			break
		} else if  == ssOverflow {
			.next = nextCGJCompose
			break
		}
		if .info.ccc <  {
			goto 
		}
	}
	return .returnSlice(, .p)
:
	// reset to start position
	.p = 
	.info = .rb.f.info(.rb.src, .p)
	.rb.ss.first(.info)
	if .info.multiSegment() {
		 := .info.Decomposition()
		 := .rb.f.info(input{bytes: }, 0)
		.rb.insertUnsafe(input{bytes: }, 0, )
		.multiSeg = [int(.size):]
		.next = nextMultiNorm
		return nextMultiNorm()
	}
	.rb.ss.first(.info)
	.rb.insertUnsafe(.rb.src, .p, .info)
	return doNormComposed()
}

func doNormComposed( *Iter) []byte {
	// First rune should already be inserted.
	for {
		if .p += int(.info.size); .p >= .rb.nsrc {
			.setDone()
			break
		}
		.info = .rb.f.info(.rb.src, .p)
		if  := .rb.ss.next(.info);  == ssStarter {
			break
		} else if  == ssOverflow {
			.next = nextCGJCompose
			break
		}
		.rb.insertUnsafe(.rb.src, .p, .info)
	}
	.rb.compose()
	 := .buf[:.rb.flushCopy(.buf[:])]
	return 
}

func nextCGJCompose( *Iter) []byte {
	.rb.ss = 0 // instead of first
	.rb.insertCGJ()
	.next = nextComposed
	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
	// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
	// If we ever change that, insert a check here.
	.rb.ss.first(.info)
	.rb.insertUnsafe(.rb.src, .p, .info)
	return doNormComposed()
}