// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package norm

import 

// This file contains Form-specific logic and wrappers for data in tables.go.

// Rune info is stored in a separate trie per composing form. A composing form
// and its corresponding decomposing form share the same trie.  Each trie maps
// a rune to a uint16. The values take two forms.  For v >= 0x8000:
//   bits
//   15:    1 (inverse of NFD_QC bit of qcInfo)
//   13..7: qcInfo (see below). isYesD is always true (no decomposition).
//    6..0: ccc (compressed CCC value).
// For v < 0x8000, the respective rune has a decomposition and v is an index
// into a byte array of UTF-8 decomposition sequences and additional info and
// has the form:
//    <header> <decomp_byte>* [<tccc> [<lccc>]]
// The header contains the number of bytes in the decomposition (excluding this
// length byte). The two most significant bits of this length byte correspond
// to bit 5 and 4 of qcInfo (see below).  The byte sequence itself starts at v+1.
// The byte sequence is followed by a trailing and leading CCC if the values
// for these are not zero.  The value of v determines which ccc are appended
// to the sequences.  For v < firstCCC, there are none, for v >= firstCCC,
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
// there is an additional leading ccc. The value of tccc itself is the
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
// are the number of trailing non-starters.

const (
	qcInfoMask      = 0x3F // to clear all but the relevant bits in a qcInfo
	headerLenMask   = 0x3F // extract the length value from the header byte
	headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
)

// Properties provides access to normalization properties of a rune.
type Properties struct {
	pos   uint8  // start position in reorderBuffer; used in composition.go
	size  uint8  // length of UTF-8 encoding of this rune
	ccc   uint8  // leading canonical combining class (ccc if not decomposition)
	tccc  uint8  // trailing canonical combining class (ccc if not decomposition)
	nLead uint8  // number of leading non-starters.
	flags qcInfo // quick check flags
	index uint16
}

// functions dispatchable per form
type lookupFunc func(b input, i int) Properties

// formInfo holds Form-specific functions and tables.
type formInfo struct {
	form                     Form
	composing, compatibility bool // form type
	info                     lookupFunc
	nextMain                 iterFunc
}

var formTable = []*formInfo{{
	form:          NFC,
	composing:     true,
	compatibility: false,
	info:          lookupInfoNFC,
	nextMain:      nextComposed,
}, {
	form:          NFD,
	composing:     false,
	compatibility: false,
	info:          lookupInfoNFC,
	nextMain:      nextDecomposed,
}, {
	form:          NFKC,
	composing:     true,
	compatibility: true,
	info:          lookupInfoNFKC,
	nextMain:      nextComposed,
}, {
	form:          NFKD,
	composing:     false,
	compatibility: true,
	info:          lookupInfoNFKC,
	nextMain:      nextDecomposed,
}}

// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
// unexpected behavior for the user.  For example, in NFD, there is a boundary
// after 'a'.  However, 'a' might combine with modifiers, so from the application's
// perspective it is not a good boundary. We will therefore always use the
// boundaries for the combining variants.

// BoundaryBefore returns true if this rune starts a new segment and
// cannot combine with any rune on the left.
func ( Properties) () bool {
	if .ccc == 0 && !.combinesBackward() {
		return true
	}
	// We assume that the CCC of the first character in a decomposition
	// is always non-zero if different from info.ccc and that we can return
	// false at this point. This is verified by maketables.
	return false
}

// BoundaryAfter returns true if runes cannot combine with or otherwise
// interact with this or previous runes.
func ( Properties) () bool {
	// TODO: loosen these conditions.
	return .isInert()
}

// We pack quick check data in 4 bits:
//
//	5:    Combines forward  (0 == false, 1 == true)
//	4..3: NFC_QC Yes(00), No (10), or Maybe (11)
//	2:    NFD_QC Yes (0) or No (1). No also means there is a decomposition.
//	1..0: Number of trailing non-starters.
//
// When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization.
type qcInfo uint8

func ( Properties) () bool { return .flags&0x10 == 0 }
func ( Properties) () bool { return .flags&0x4 == 0 }

func ( Properties) () bool  { return .flags&0x20 != 0 }
func ( Properties) () bool { return .flags&0x8 != 0 } // == isMaybe
func ( Properties) () bool { return .flags&0x4 != 0 } // == isNoD

func ( Properties) () bool {
	return .flags&qcInfoMask == 0 && .ccc == 0
}

func ( Properties) () bool {
	return .index >= firstMulti && .index < endMulti
}

func ( Properties) () uint8 {
	return .nLead
}

func ( Properties) () uint8 {
	return uint8(.flags & 0x03)
}

// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func ( Properties) () []byte {
	// TODO: create the decomposition for Hangul?
	if .index == 0 {
		return nil
	}
	 := .index
	 := decomps[] & headerLenMask
	++
	return decomps[ : +uint16()]
}

// Size returns the length of UTF-8 encoding of the rune.
func ( Properties) () int {
	return int(.size)
}

// CCC returns the canonical combining class of the underlying rune.
func ( Properties) () uint8 {
	if .index >= firstCCCZeroExcept {
		return 0
	}
	return ccc[.ccc]
}

// LeadCCC returns the CCC of the first rune in the decomposition.
// If there is no decomposition, LeadCCC equals CCC.
func ( Properties) () uint8 {
	return ccc[.ccc]
}

// TrailCCC returns the CCC of the last rune in the decomposition.
// If there is no decomposition, TrailCCC equals CCC.
func ( Properties) () uint8 {
	return ccc[.tccc]
}

func buildRecompMap() {
	recompMap = make(map[uint32]rune, len(recompMapPacked)/8)
	var  [8]byte
	for  := 0;  < len(recompMapPacked);  += 8 {
		copy([:], recompMapPacked[:+8])
		 := binary.BigEndian.Uint32([:4])
		 := binary.BigEndian.Uint32([4:])
		recompMap[] = rune()
	}
}

// Recomposition
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
// This clips off the bits of three entries, but we know this will not
// result in a collision. In the unlikely event that changes to
// UnicodeData.txt introduce collisions, the compiler will catch it.
// Note that the recomposition map for NFC and NFKC are identical.

// combine returns the combined rune or 0 if it doesn't exist.
//
// The caller is responsible for calling
// recompMapOnce.Do(buildRecompMap) sometime before this is called.
func combine(,  rune) rune {
	 := uint32(uint16())<<16 + uint32(uint16())
	if recompMap == nil {
		panic("caller error") // see func comment
	}
	return recompMap[]
}

func lookupInfoNFC( input,  int) Properties {
	,  := .charinfoNFC()
	return compInfo(, )
}

func lookupInfoNFKC( input,  int) Properties {
	,  := .charinfoNFKC()
	return compInfo(, )
}

// Properties returns properties for the first rune in s.
func ( Form) ( []byte) Properties {
	if  == NFC ||  == NFD {
		return compInfo(nfcData.lookup())
	}
	return compInfo(nfkcData.lookup())
}

// PropertiesString returns properties for the first rune in s.
func ( Form) ( string) Properties {
	if  == NFC ||  == NFD {
		return compInfo(nfcData.lookupString())
	}
	return compInfo(nfkcData.lookupString())
}

// compInfo converts the information contained in v and sz
// to a Properties.  See the comment at the top of the file
// for more information on the format.
func compInfo( uint16,  int) Properties {
	if  == 0 {
		return Properties{size: uint8()}
	} else if  >= 0x8000 {
		 := Properties{
			size:  uint8(),
			ccc:   uint8(),
			tccc:  uint8(),
			flags: qcInfo( >> 8),
		}
		if .ccc > 0 || .combinesBackward() {
			.nLead = uint8(.flags & 0x3)
		}
		return 
	}
	// has decomposition
	 := decomps[]
	 := (qcInfo(&headerFlagsMask) >> 2) | 0x4
	 := Properties{size: uint8(), flags: , index: }
	if  >= firstCCC {
		 += uint16(&headerLenMask) + 1
		 := decomps[]
		.tccc =  >> 2
		.flags |= qcInfo( & 0x3)
		if  >= firstLeadingCCC {
			.nLead =  & 0x3
			if  >= firstStarterWithNLead {
				// We were tricked. Remove the decomposition.
				.flags &= 0x03
				.index = 0
				return 
			}
			.ccc = decomps[+1]
		}
	}
	return 
}