// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax

import (
	"strconv"
	"strings"
	"unicode"
)

// Compiled program.
// May not belong in this package, but convenient for now.

// A Prog is a compiled regular expression program.
type Prog struct {
	Inst   []Inst
	Start  int // index of start instruction
	NumCap int // number of InstCapture insts in re
}

// An InstOp is an instruction opcode.
type InstOp uint8

const (
	InstAlt InstOp = iota
	InstAltMatch
	InstCapture
	InstEmptyWidth
	InstMatch
	InstFail
	InstNop
	InstRune
	InstRune1
	InstRuneAny
	InstRuneAnyNotNL
)

var instOpNames = []string{
	"InstAlt",
	"InstAltMatch",
	"InstCapture",
	"InstEmptyWidth",
	"InstMatch",
	"InstFail",
	"InstNop",
	"InstRune",
	"InstRune1",
	"InstRuneAny",
	"InstRuneAnyNotNL",
}

func ( InstOp) () string {
	if uint() >= uint(len(instOpNames)) {
		return ""
	}
	return instOpNames[]
}

// An EmptyOp specifies a kind or mixture of zero-width assertions.
type EmptyOp uint8

const (
	EmptyBeginLine EmptyOp = 1 << iota
	EmptyEndLine
	EmptyBeginText
	EmptyEndText
	EmptyWordBoundary
	EmptyNoWordBoundary
)

// EmptyOpContext returns the zero-width assertions
// satisfied at the position between the runes r1 and r2.
// Passing r1 == -1 indicates that the position is
// at the beginning of the text.
// Passing r2 == -1 indicates that the position is
// at the end of the text.
func (,  rune) EmptyOp {
	var  EmptyOp = EmptyNoWordBoundary
	var  byte
	switch {
	case IsWordChar():
		 = 1
	case  == '\n':
		 |= EmptyBeginLine
	case  < 0:
		 |= EmptyBeginText | EmptyBeginLine
	}
	switch {
	case IsWordChar():
		 ^= 1
	case  == '\n':
		 |= EmptyEndLine
	case  < 0:
		 |= EmptyEndText | EmptyEndLine
	}
	if  != 0 { // IsWordChar(r1) != IsWordChar(r2)
		 ^= (EmptyWordBoundary | EmptyNoWordBoundary)
	}
	return 
}

// IsWordChar reports whether r is consider a ``word character''
// during the evaluation of the \b and \B zero-width assertions.
// These assertions are ASCII-only: the word characters are [A-Za-z0-9_].
func ( rune) bool {
	return 'A' <=  &&  <= 'Z' || 'a' <=  &&  <= 'z' || '0' <=  &&  <= '9' ||  == '_'
}

// An Inst is a single instruction in a regular expression program.
type Inst struct {
	Op   InstOp
	Out  uint32 // all but InstMatch, InstFail
	Arg  uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
	Rune []rune
}

func ( *Prog) () string {
	var  strings.Builder
	dumpProg(&, )
	return .String()
}

// skipNop follows any no-op or capturing instructions.
func ( *Prog) ( uint32) *Inst {
	 := &.Inst[]
	for .Op == InstNop || .Op == InstCapture {
		 = &.Inst[.Out]
	}
	return 
}

// op returns i.Op but merges all the Rune special cases into InstRune
func ( *Inst) () InstOp {
	 := .Op
	switch  {
	case InstRune1, InstRuneAny, InstRuneAnyNotNL:
		 = InstRune
	}
	return 
}

// Prefix returns a literal string that all matches for the
// regexp must start with. Complete is true if the prefix
// is the entire match.
func ( *Prog) () ( string,  bool) {
	 := .skipNop(uint32(.Start))

	// Avoid allocation of buffer if prefix is empty.
	if .op() != InstRune || len(.Rune) != 1 {
		return "", .Op == InstMatch
	}

	// Have prefix; gather characters.
	var  strings.Builder
	for .op() == InstRune && len(.Rune) == 1 && Flags(.Arg)&FoldCase == 0 {
		.WriteRune(.Rune[0])
		 = .skipNop(.Out)
	}
	return .String(), .Op == InstMatch
}

// StartCond returns the leading empty-width conditions that must
// be true in any match. It returns ^EmptyOp(0) if no matches are possible.
func ( *Prog) () EmptyOp {
	var  EmptyOp
	 := uint32(.Start)
	 := &.Inst[]
:
	for {
		switch .Op {
		case InstEmptyWidth:
			 |= EmptyOp(.Arg)
		case InstFail:
			return ^EmptyOp(0)
		case InstCapture, InstNop:
			// skip
		default:
			break 
		}
		 = .Out
		 = &.Inst[]
	}
	return 
}

const noMatch = -1

// MatchRune reports whether the instruction matches (and consumes) r.
// It should only be called when i.Op == InstRune.
func ( *Inst) ( rune) bool {
	return .MatchRunePos() != noMatch
}

// MatchRunePos checks whether the instruction matches (and consumes) r.
// If so, MatchRunePos returns the index of the matching rune pair
// (or, when len(i.Rune) == 1, rune singleton).
// If not, MatchRunePos returns -1.
// MatchRunePos should only be called when i.Op == InstRune.
func ( *Inst) ( rune) int {
	 := .Rune

	switch len() {
	case 0:
		return noMatch

	case 1:
		// Special case: single-rune slice is from literal string, not char class.
		 := [0]
		if  ==  {
			return 0
		}
		if Flags(.Arg)&FoldCase != 0 {
			for  := unicode.SimpleFold();  != ;  = unicode.SimpleFold() {
				if  ==  {
					return 0
				}
			}
		}
		return noMatch

	case 2:
		if  >= [0] &&  <= [1] {
			return 0
		}
		return noMatch

	case 4, 6, 8:
		// Linear search for a few pairs.
		// Should handle ASCII well.
		for  := 0;  < len();  += 2 {
			if  < [] {
				return noMatch
			}
			if  <= [+1] {
				return  / 2
			}
		}
		return noMatch
	}

	// Otherwise binary search.
	 := 0
	 := len() / 2
	for  <  {
		 :=  + (-)/2
		if  := [2*];  <=  {
			if  <= [2*+1] {
				return 
			}
			 =  + 1
		} else {
			 = 
		}
	}
	return noMatch
}

// MatchEmptyWidth reports whether the instruction matches
// an empty string between the runes before and after.
// It should only be called when i.Op == InstEmptyWidth.
func ( *Inst) ( rune,  rune) bool {
	switch EmptyOp(.Arg) {
	case EmptyBeginLine:
		return  == '\n' ||  == -1
	case EmptyEndLine:
		return  == '\n' ||  == -1
	case EmptyBeginText:
		return  == -1
	case EmptyEndText:
		return  == -1
	case EmptyWordBoundary:
		return IsWordChar() != IsWordChar()
	case EmptyNoWordBoundary:
		return IsWordChar() == IsWordChar()
	}
	panic("unknown empty width arg")
}

func ( *Inst) () string {
	var  strings.Builder
	dumpInst(&, )
	return .String()
}

func bw( *strings.Builder,  ...string) {
	for ,  := range  {
		.WriteString()
	}
}

func dumpProg( *strings.Builder,  *Prog) {
	for  := range .Inst {
		 := &.Inst[]
		 := strconv.Itoa()
		if len() < 3 {
			.WriteString("   "[len():])
		}
		if  == .Start {
			 += "*"
		}
		bw(, , "\t")
		dumpInst(, )
		bw(, "\n")
	}
}

func u32( uint32) string {
	return strconv.FormatUint(uint64(), 10)
}

func dumpInst( *strings.Builder,  *Inst) {
	switch .Op {
	case InstAlt:
		bw(, "alt -> ", u32(.Out), ", ", u32(.Arg))
	case InstAltMatch:
		bw(, "altmatch -> ", u32(.Out), ", ", u32(.Arg))
	case InstCapture:
		bw(, "cap ", u32(.Arg), " -> ", u32(.Out))
	case InstEmptyWidth:
		bw(, "empty ", u32(.Arg), " -> ", u32(.Out))
	case InstMatch:
		bw(, "match")
	case InstFail:
		bw(, "fail")
	case InstNop:
		bw(, "nop -> ", u32(.Out))
	case InstRune:
		if .Rune == nil {
			// shouldn't happen
			bw(, "rune <nil>")
		}
		bw(, "rune ", strconv.QuoteToASCII(string(.Rune)))
		if Flags(.Arg)&FoldCase != 0 {
			bw(, "/i")
		}
		bw(, " -> ", u32(.Out))
	case InstRune1:
		bw(, "rune1 ", strconv.QuoteToASCII(string(.Rune)), " -> ", u32(.Out))
	case InstRuneAny:
		bw(, "any -> ", u32(.Out))
	case InstRuneAnyNotNL:
		bw(, "anynotnl -> ", u32(.Out))
	}
}