// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package syntax

import (
	
	
	
	
)

// An Error describes a failure to parse a regular expression
// and gives the offending expression.
type Error struct {
	Code ErrorCode
	Expr string
}

func ( *Error) () string {
	return "error parsing regexp: " + .Code.String() + ": `" + .Expr + "`"
}

// An ErrorCode describes a failure to parse a regular expression.
type ErrorCode string

const (
	// Unexpected error
	ErrInternalError ErrorCode = "regexp/syntax: internal error"

	// Parse errors
	ErrInvalidCharClass      ErrorCode = "invalid character class"
	ErrInvalidCharRange      ErrorCode = "invalid character class range"
	ErrInvalidEscape         ErrorCode = "invalid escape sequence"
	ErrInvalidNamedCapture   ErrorCode = "invalid named capture"
	ErrInvalidPerlOp         ErrorCode = "invalid or unsupported Perl syntax"
	ErrInvalidRepeatOp       ErrorCode = "invalid nested repetition operator"
	ErrInvalidRepeatSize     ErrorCode = "invalid repeat count"
	ErrInvalidUTF8           ErrorCode = "invalid UTF-8"
	ErrMissingBracket        ErrorCode = "missing closing ]"
	ErrMissingParen          ErrorCode = "missing closing )"
	ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
	ErrTrailingBackslash     ErrorCode = "trailing backslash at end of expression"
	ErrUnexpectedParen       ErrorCode = "unexpected )"
	ErrNestingDepth          ErrorCode = "expression nests too deeply"
	ErrLarge                 ErrorCode = "expression too large"
)

func ( ErrorCode) () string {
	return string()
}

// Flags control the behavior of the parser and record information about regexp context.
type Flags uint16

const (
	FoldCase      Flags = 1 << iota // case-insensitive match
	Literal                         // treat pattern as literal string
	ClassNL                         // allow character classes like [^a-z] and [[:space:]] to match newline
	DotNL                           // allow . to match newline
	OneLine                         // treat ^ and $ as only matching at beginning and end of text
	NonGreedy                       // make repetition operators default to non-greedy
	PerlX                           // allow Perl extensions
	UnicodeGroups                   // allow \p{Han}, \P{Han} for Unicode group and negation
	WasDollar                       // regexp OpEndText was $, not \z
	Simple                          // regexp contains no counted repetition

	MatchNL = ClassNL | DotNL

	Perl        = ClassNL | OneLine | PerlX | UnicodeGroups // as close to Perl as possible
	POSIX Flags = 0                                         // POSIX syntax
)

// Pseudo-ops for parsing stack.
const (
	opLeftParen = opPseudo + iota
	opVerticalBar
)

// maxHeight is the maximum height of a regexp parse tree.
// It is somewhat arbitrarily chosen, but the idea is to be large enough
// that no one will actually hit in real use but at the same time small enough
// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
// The maximum amount of stack for a single recursive frame is probably
// closer to 1kB, so this could potentially be raised, but it seems unlikely
// that people have regexps nested even this deeply.
// We ran a test on Google's C++ code base and turned up only
// a single use case with depth > 100; it had depth 128.
// Using depth 1000 should be plenty of margin.
// As an optimization, we don't even bother calculating heights
// until we've allocated at least maxHeight Regexp structures.
const maxHeight = 1000

// maxSize is the maximum size of a compiled regexp in Insts.
// It too is somewhat arbitrarily chosen, but the idea is to be large enough
// to allow significant regexps while at the same time small enough that
// the compiled form will not take up too much memory.
// 128 MB is enough for a 3.3 million Inst structures, which roughly
// corresponds to a 3.3 MB regexp.
const (
	maxSize  = 128 << 20 / instSize
	instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
)

// maxRunes is the maximum number of runes allowed in a regexp tree
// counting the runes in all the nodes.
// Ignoring character classes p.numRunes is always less than the length of the regexp.
// Character classes can make it much larger: each \pL adds 1292 runes.
// 128 MB is enough for 32M runes, which is over 26k \pL instances.
// Note that repetitions do not make copies of the rune slices,
// so \pL{1000} is only one rune slice, not 1000.
// We could keep a cache of character classes we've seen,
// so that all the \pL we see use the same rune list,
// but that doesn't remove the problem entirely:
// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
// And because the Rune slice is exposed directly in the Regexp,
// there is not an opportunity to change the representation to allow
// partial sharing between different character classes.
// So the limit is the best we can do.
const (
	maxRunes = 128 << 20 / runeSize
	runeSize = 4 // rune is int32
)

type parser struct {
	flags       Flags     // parse mode flags
	stack       []*Regexp // stack of parsed expressions
	free        *Regexp
	numCap      int // number of capturing groups seen
	wholeRegexp string
	tmpClass    []rune            // temporary char class work space
	numRegexp   int               // number of regexps allocated
	numRunes    int               // number of runes in char classes
	repeats     int64             // product of all repetitions seen
	height      map[*Regexp]int   // regexp height, for height limit check
	size        map[*Regexp]int64 // regexp compiled size, for size limit check
}

func ( *parser) ( Op) *Regexp {
	 := .free
	if  != nil {
		.free = .Sub0[0]
		* = Regexp{}
	} else {
		 = new(Regexp)
		.numRegexp++
	}
	.Op = 
	return 
}

func ( *parser) ( *Regexp) {
	if .height != nil {
		delete(.height, )
	}
	.Sub0[0] = .free
	.free = 
}

func ( *parser) ( *Regexp) {
	if .numRunes > maxRunes {
		panic(ErrLarge)
	}
	.checkSize()
	.checkHeight()
}

func ( *parser) ( *Regexp) {
	if .size == nil {
		// We haven't started tracking size yet.
		// Do a relatively cheap check to see if we need to start.
		// Maintain the product of all the repeats we've seen
		// and don't track if the total number of regexp nodes
		// we've seen times the repeat product is in budget.
		if .repeats == 0 {
			.repeats = 1
		}
		if .Op == OpRepeat {
			 := .Max
			if  == -1 {
				 = .Min
			}
			if  <= 0 {
				 = 1
			}
			if int64() > maxSize/.repeats {
				.repeats = maxSize
			} else {
				.repeats *= int64()
			}
		}
		if int64(.numRegexp) < maxSize/.repeats {
			return
		}

		// We need to start tracking size.
		// Make the map and belatedly populate it
		// with info about everything we've constructed so far.
		.size = make(map[*Regexp]int64)
		for ,  := range .stack {
			.()
		}
	}

	if .calcSize(, true) > maxSize {
		panic(ErrLarge)
	}
}

func ( *parser) ( *Regexp,  bool) int64 {
	if ! {
		if ,  := .size[];  {
			return 
		}
	}

	var  int64
	switch .Op {
	case OpLiteral:
		 = int64(len(.Rune))
	case OpCapture, OpStar:
		// star can be 1+ or 2+; assume 2 pessimistically
		 = 2 + .(.Sub[0], false)
	case OpPlus, OpQuest:
		 = 1 + .(.Sub[0], false)
	case OpConcat:
		for ,  := range .Sub {
			 += .(, false)
		}
	case OpAlternate:
		for ,  := range .Sub {
			 += .(, false)
		}
		if len(.Sub) > 1 {
			 += int64(len(.Sub)) - 1
		}
	case OpRepeat:
		 := .(.Sub[0], false)
		if .Max == -1 {
			if .Min == 0 {
				 = 2 +  // x*
			} else {
				 = 1 + int64(.Min)* // xxx+
			}
			break
		}
		// x{2,5} = xx(x(x(x)?)?)?
		 = int64(.Max)* + int64(.Max-.Min)
	}

	 = max(1, )
	.size[] = 
	return 
}

func ( *parser) ( *Regexp) {
	if .numRegexp < maxHeight {
		return
	}
	if .height == nil {
		.height = make(map[*Regexp]int)
		for ,  := range .stack {
			.()
		}
	}
	if .calcHeight(, true) > maxHeight {
		panic(ErrNestingDepth)
	}
}

func ( *parser) ( *Regexp,  bool) int {
	if ! {
		if ,  := .height[];  {
			return 
		}
	}
	 := 1
	for ,  := range .Sub {
		 := .(, false)
		if  < 1+ {
			 = 1 + 
		}
	}
	.height[] = 
	return 
}

// Parse stack manipulation.

// push pushes the regexp re onto the parse stack and returns the regexp.
func ( *parser) ( *Regexp) *Regexp {
	.numRunes += len(.Rune)
	if .Op == OpCharClass && len(.Rune) == 2 && .Rune[0] == .Rune[1] {
		// Single rune.
		if .maybeConcat(.Rune[0], .flags&^FoldCase) {
			return nil
		}
		.Op = OpLiteral
		.Rune = .Rune[:1]
		.Flags = .flags &^ FoldCase
	} else if .Op == OpCharClass && len(.Rune) == 4 &&
		.Rune[0] == .Rune[1] && .Rune[2] == .Rune[3] &&
		unicode.SimpleFold(.Rune[0]) == .Rune[2] &&
		unicode.SimpleFold(.Rune[2]) == .Rune[0] ||
		.Op == OpCharClass && len(.Rune) == 2 &&
			.Rune[0]+1 == .Rune[1] &&
			unicode.SimpleFold(.Rune[0]) == .Rune[1] &&
			unicode.SimpleFold(.Rune[1]) == .Rune[0] {
		// Case-insensitive rune like [Aa] or [Δδ].
		if .maybeConcat(.Rune[0], .flags|FoldCase) {
			return nil
		}

		// Rewrite as (case-insensitive) literal.
		.Op = OpLiteral
		.Rune = .Rune[:1]
		.Flags = .flags | FoldCase
	} else {
		// Incremental concatenation.
		.maybeConcat(-1, 0)
	}

	.stack = append(.stack, )
	.checkLimits()
	return 
}

// maybeConcat implements incremental concatenation
// of literal runes into string nodes. The parser calls this
// before each push, so only the top fragment of the stack
// might need processing. Since this is called before a push,
// the topmost literal is no longer subject to operators like *
// (Otherwise ab* would turn into (ab)*.)
// If r >= 0 and there's a node left over, maybeConcat uses it
// to push r with the given flags.
// maybeConcat reports whether r was pushed.
func ( *parser) ( rune,  Flags) bool {
	 := len(.stack)
	if  < 2 {
		return false
	}

	 := .stack[-1]
	 := .stack[-2]
	if .Op != OpLiteral || .Op != OpLiteral || .Flags&FoldCase != .Flags&FoldCase {
		return false
	}

	// Push re1 into re2.
	.Rune = append(.Rune, .Rune...)

	// Reuse re1 if possible.
	if  >= 0 {
		.Rune = .Rune0[:1]
		.Rune[0] = 
		.Flags = 
		return true
	}

	.stack = .stack[:-1]
	.reuse()
	return false // did not push r
}

// literal pushes a literal regexp for the rune r on the stack.
func ( *parser) ( rune) {
	 := .newRegexp(OpLiteral)
	.Flags = .flags
	if .flags&FoldCase != 0 {
		 = minFoldRune()
	}
	.Rune0[0] = 
	.Rune = .Rune0[:1]
	.push()
}

// minFoldRune returns the minimum rune fold-equivalent to r.
func minFoldRune( rune) rune {
	if  < minFold ||  > maxFold {
		return 
	}
	 := 
	 := 
	for  = unicode.SimpleFold();  != ;  = unicode.SimpleFold() {
		 = min(, )
	}
	return 
}

// op pushes a regexp with the given op onto the stack
// and returns that regexp.
func ( *parser) ( Op) *Regexp {
	 := .newRegexp()
	.Flags = .flags
	return .push()
}

// repeat replaces the top stack element with itself repeated according to op, min, max.
// before is the regexp suffix starting at the repetition operator.
// after is the regexp suffix following after the repetition operator.
// repeat returns an updated 'after' and an error, if any.
func ( *parser) ( Op, ,  int, , ,  string) (string, error) {
	 := .flags
	if .flags&PerlX != 0 {
		if len() > 0 && [0] == '?' {
			 = [1:]
			 ^= NonGreedy
		}
		if  != "" {
			// In Perl it is not allowed to stack repetition operators:
			// a** is a syntax error, not a doubled star, and a++ means
			// something else entirely, which we don't support!
			return "", &Error{ErrInvalidRepeatOp, [:len()-len()]}
		}
	}
	 := len(.stack)
	if  == 0 {
		return "", &Error{ErrMissingRepeatArgument, [:len()-len()]}
	}
	 := .stack[-1]
	if .Op >= opPseudo {
		return "", &Error{ErrMissingRepeatArgument, [:len()-len()]}
	}

	 := .newRegexp()
	.Min = 
	.Max = 
	.Flags = 
	.Sub = .Sub0[:1]
	.Sub[0] = 
	.stack[-1] = 
	.checkLimits()

	if  == OpRepeat && ( >= 2 ||  >= 2) && !repeatIsValid(, 1000) {
		return "", &Error{ErrInvalidRepeatSize, [:len()-len()]}
	}

	return , nil
}

// repeatIsValid reports whether the repetition re is valid.
// Valid means that the combination of the top-level repetition
// and any inner repetitions does not exceed n copies of the
// innermost thing.
// This function rewalks the regexp tree and is called for every repetition,
// so we have to worry about inducing quadratic behavior in the parser.
// We avoid this by only calling repeatIsValid when min or max >= 2.
// In that case the depth of any >= 2 nesting can only get to 9 without
// triggering a parse error, so each subtree can only be rewalked 9 times.
func repeatIsValid( *Regexp,  int) bool {
	if .Op == OpRepeat {
		 := .Max
		if  == 0 {
			return true
		}
		if  < 0 {
			 = .Min
		}
		if  >  {
			return false
		}
		if  > 0 {
			 /= 
		}
	}
	for ,  := range .Sub {
		if !(, ) {
			return false
		}
	}
	return true
}

// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func ( *parser) () *Regexp {
	.maybeConcat(-1, 0)

	// Scan down to find pseudo-operator | or (.
	 := len(.stack)
	for  > 0 && .stack[-1].Op < opPseudo {
		--
	}
	 := .stack[:]
	.stack = .stack[:]

	// Empty concatenation is special case.
	if len() == 0 {
		return .push(.newRegexp(OpEmptyMatch))
	}

	return .push(.collapse(, OpConcat))
}

// alternate replaces the top of the stack (above the topmost '(') with its alternation.
func ( *parser) () *Regexp {
	// Scan down to find pseudo-operator (.
	// There are no | above (.
	 := len(.stack)
	for  > 0 && .stack[-1].Op < opPseudo {
		--
	}
	 := .stack[:]
	.stack = .stack[:]

	// Make sure top class is clean.
	// All the others already are (see swapVerticalBar).
	if len() > 0 {
		cleanAlt([len()-1])
	}

	// Empty alternate is special case
	// (shouldn't happen but easy to handle).
	if len() == 0 {
		return .push(.newRegexp(OpNoMatch))
	}

	return .push(.collapse(, OpAlternate))
}

// cleanAlt cleans re for eventual inclusion in an alternation.
func cleanAlt( *Regexp) {
	switch .Op {
	case OpCharClass:
		.Rune = cleanClass(&.Rune)
		if len(.Rune) == 2 && .Rune[0] == 0 && .Rune[1] == unicode.MaxRune {
			.Rune = nil
			.Op = OpAnyChar
			return
		}
		if len(.Rune) == 4 && .Rune[0] == 0 && .Rune[1] == '\n'-1 && .Rune[2] == '\n'+1 && .Rune[3] == unicode.MaxRune {
			.Rune = nil
			.Op = OpAnyCharNotNL
			return
		}
		if cap(.Rune)-len(.Rune) > 100 {
			// re.Rune will not grow any more.
			// Make a copy or inline to reclaim storage.
			.Rune = append(.Rune0[:0], .Rune...)
		}
	}
}

// collapse returns the result of applying op to sub.
// If sub contains op nodes, they all get hoisted up
// so that there is never a concat of a concat or an
// alternate of an alternate.
func ( *parser) ( []*Regexp,  Op) *Regexp {
	if len() == 1 {
		return [0]
	}
	 := .newRegexp()
	.Sub = .Sub0[:0]
	for ,  := range  {
		if .Op ==  {
			.Sub = append(.Sub, .Sub...)
			.reuse()
		} else {
			.Sub = append(.Sub, )
		}
	}
	if  == OpAlternate {
		.Sub = .factor(.Sub)
		if len(.Sub) == 1 {
			 := 
			 = .Sub[0]
			.reuse()
		}
	}
	return 
}

// factor factors common prefixes from the alternation list sub.
// It returns a replacement list that reuses the same storage and
// frees (passes to p.reuse) any removed *Regexps.
//
// For example,
//
//	ABC|ABD|AEF|BCX|BCY
//
// simplifies by literal prefix extraction to
//
//	A(B(C|D)|EF)|BC(X|Y)
//
// which simplifies by character class introduction to
//
//	A(B[CD]|EF)|BC[XY]
func ( *parser) ( []*Regexp) []*Regexp {
	if len() < 2 {
		return 
	}

	// Round 1: Factor out common literal prefixes.
	var  []rune
	var  Flags
	 := 0
	 := [:0]
	for  := 0;  <= len(); ++ {
		// Invariant: the Regexps that were in sub[0:start] have been
		// used or marked for reuse, and the slice space has been reused
		// for out (len(out) <= start).
		//
		// Invariant: sub[start:i] consists of regexps that all begin
		// with str as modified by strflags.
		var  []rune
		var  Flags
		if  < len() {
			,  = .leadingString([])
			if  ==  {
				 := 0
				for  < len() &&  < len() && [] == [] {
					++
				}
				if  > 0 {
					// Matches at least one rune in current range.
					// Keep going around.
					 = [:]
					continue
				}
			}
		}

		// Found end of a run with common leading literal string:
		// sub[start:i] all begin with str[0:len(str)], but sub[i]
		// does not even begin with str[0].
		//
		// Factor out common string and append factored expression to out.
		if  ==  {
			// Nothing to do - run of length 0.
		} else if  == +1 {
			// Just one: don't bother factoring.
			 = append(, [])
		} else {
			// Construct factored form: prefix(suffix1|suffix2|...)
			 := .newRegexp(OpLiteral)
			.Flags = 
			.Rune = append(.Rune[:0], ...)

			for  := ;  < ; ++ {
				[] = .removeLeadingString([], len())
				.checkLimits([])
			}
			 := .collapse([:], OpAlternate) // recurse

			 := .newRegexp(OpConcat)
			.Sub = append(.Sub[:0], , )
			 = append(, )
		}

		// Prepare for next iteration.
		 = 
		 = 
		 = 
	}
	 = 

	// Round 2: Factor out common simple prefixes,
	// just the first piece of each concatenation.
	// This will be good enough a lot of the time.
	//
	// Complex subexpressions (e.g. involving quantifiers)
	// are not safe to factor because that collapses their
	// distinct paths through the automaton, which affects
	// correctness in some cases.
	 = 0
	 = [:0]
	var  *Regexp
	for  := 0;  <= len(); ++ {
		// Invariant: the Regexps that were in sub[0:start] have been
		// used or marked for reuse, and the slice space has been reused
		// for out (len(out) <= start).
		//
		// Invariant: sub[start:i] consists of regexps that all begin with ifirst.
		var  *Regexp
		if  < len() {
			 = .leadingRegexp([])
			if  != nil && .Equal() &&
				// first must be a character class OR a fixed repeat of a character class.
				(isCharClass() || (.Op == OpRepeat && .Min == .Max && isCharClass(.Sub[0]))) {
				continue
			}
		}

		// Found end of a run with common leading regexp:
		// sub[start:i] all begin with first but sub[i] does not.
		//
		// Factor out common regexp and append factored expression to out.
		if  ==  {
			// Nothing to do - run of length 0.
		} else if  == +1 {
			// Just one: don't bother factoring.
			 = append(, [])
		} else {
			// Construct factored form: prefix(suffix1|suffix2|...)
			 := 
			for  := ;  < ; ++ {
				 :=  !=  // prefix came from sub[start]
				[] = .removeLeadingRegexp([], )
				.checkLimits([])
			}
			 := .collapse([:], OpAlternate) // recurse

			 := .newRegexp(OpConcat)
			.Sub = append(.Sub[:0], , )
			 = append(, )
		}

		// Prepare for next iteration.
		 = 
		 = 
	}
	 = 

	// Round 3: Collapse runs of single literals into character classes.
	 = 0
	 = [:0]
	for  := 0;  <= len(); ++ {
		// Invariant: the Regexps that were in sub[0:start] have been
		// used or marked for reuse, and the slice space has been reused
		// for out (len(out) <= start).
		//
		// Invariant: sub[start:i] consists of regexps that are either
		// literal runes or character classes.
		if  < len() && isCharClass([]) {
			continue
		}

		// sub[i] is not a char or char class;
		// emit char class for sub[start:i]...
		if  ==  {
			// Nothing to do - run of length 0.
		} else if  == +1 {
			 = append(, [])
		} else {
			// Make new char class.
			// Start with most complex regexp in sub[start].
			 := 
			for  :=  + 1;  < ; ++ {
				if [].Op < [].Op || [].Op == [].Op && len([].Rune) < len([].Rune) {
					 = 
				}
			}
			[], [] = [], []

			for  :=  + 1;  < ; ++ {
				mergeCharClass([], [])
				.reuse([])
			}
			cleanAlt([])
			 = append(, [])
		}

		// ... and then emit sub[i].
		if  < len() {
			 = append(, [])
		}
		 =  + 1
	}
	 = 

	// Round 4: Collapse runs of empty matches into a single empty match.
	 = 0
	 = [:0]
	for  := range  {
		if +1 < len() && [].Op == OpEmptyMatch && [+1].Op == OpEmptyMatch {
			continue
		}
		 = append(, [])
	}
	 = 

	return 
}

// leadingString returns the leading literal string that re begins with.
// The string refers to storage in re or its children.
func ( *parser) ( *Regexp) ([]rune, Flags) {
	if .Op == OpConcat && len(.Sub) > 0 {
		 = .Sub[0]
	}
	if .Op != OpLiteral {
		return nil, 0
	}
	return .Rune, .Flags & FoldCase
}

// removeLeadingString removes the first n leading runes
// from the beginning of re. It returns the replacement for re.
func ( *parser) ( *Regexp,  int) *Regexp {
	if .Op == OpConcat && len(.Sub) > 0 {
		// Removing a leading string in a concatenation
		// might simplify the concatenation.
		 := .Sub[0]
		 = .(, )
		.Sub[0] = 
		if .Op == OpEmptyMatch {
			.reuse()
			switch len(.Sub) {
			case 0, 1:
				// Impossible but handle.
				.Op = OpEmptyMatch
				.Sub = nil
			case 2:
				 := 
				 = .Sub[1]
				.reuse()
			default:
				copy(.Sub, .Sub[1:])
				.Sub = .Sub[:len(.Sub)-1]
			}
		}
		return 
	}

	if .Op == OpLiteral {
		.Rune = .Rune[:copy(.Rune, .Rune[:])]
		if len(.Rune) == 0 {
			.Op = OpEmptyMatch
		}
	}
	return 
}

// leadingRegexp returns the leading regexp that re begins with.
// The regexp refers to storage in re or its children.
func ( *parser) ( *Regexp) *Regexp {
	if .Op == OpEmptyMatch {
		return nil
	}
	if .Op == OpConcat && len(.Sub) > 0 {
		 := .Sub[0]
		if .Op == OpEmptyMatch {
			return nil
		}
		return 
	}
	return 
}

// removeLeadingRegexp removes the leading regexp in re.
// It returns the replacement for re.
// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.
func ( *parser) ( *Regexp,  bool) *Regexp {
	if .Op == OpConcat && len(.Sub) > 0 {
		if  {
			.reuse(.Sub[0])
		}
		.Sub = .Sub[:copy(.Sub, .Sub[1:])]
		switch len(.Sub) {
		case 0:
			.Op = OpEmptyMatch
			.Sub = nil
		case 1:
			 := 
			 = .Sub[0]
			.reuse()
		}
		return 
	}
	if  {
		.reuse()
	}
	return .newRegexp(OpEmptyMatch)
}

func literalRegexp( string,  Flags) *Regexp {
	 := &Regexp{Op: OpLiteral}
	.Flags = 
	.Rune = .Rune0[:0] // use local storage for small strings
	for ,  := range  {
		if len(.Rune) >= cap(.Rune) {
			// string is too long to fit in Rune0.  let Go handle it
			.Rune = []rune()
			break
		}
		.Rune = append(.Rune, )
	}
	return 
}

// Parsing.

// Parse parses a regular expression string s, controlled by the specified
// Flags, and returns a regular expression parse tree. The syntax is
// described in the top-level comment.
func ( string,  Flags) (*Regexp, error) {
	return parse(, )
}

func parse( string,  Flags) ( *Regexp,  error) {
	defer func() {
		switch  := recover();  {
		default:
			panic()
		case nil:
			// ok
		case ErrLarge: // too big
			 = &Error{Code: ErrLarge, Expr: }
		case ErrNestingDepth:
			 = &Error{Code: ErrNestingDepth, Expr: }
		}
	}()

	if &Literal != 0 {
		// Trivial parser for literal string.
		if  := checkUTF8();  != nil {
			return nil, 
		}
		return literalRegexp(, ), nil
	}

	// Otherwise, must do real work.
	var (
		          parser
		          rune
		         Op
		 string
	)
	.flags = 
	.wholeRegexp = 
	 := 
	for  != "" {
		 := ""
	:
		switch [0] {
		default:
			if , ,  = nextRune();  != nil {
				return nil, 
			}
			.literal()

		case '(':
			if .flags&PerlX != 0 && len() >= 2 && [1] == '?' {
				// Flag changes and non-capturing groups.
				if ,  = .parsePerlFlags();  != nil {
					return nil, 
				}
				break
			}
			.numCap++
			.op(opLeftParen).Cap = .numCap
			 = [1:]
		case '|':
			.parseVerticalBar()
			 = [1:]
		case ')':
			if  = .parseRightParen();  != nil {
				return nil, 
			}
			 = [1:]
		case '^':
			if .flags&OneLine != 0 {
				.op(OpBeginText)
			} else {
				.op(OpBeginLine)
			}
			 = [1:]
		case '$':
			if .flags&OneLine != 0 {
				.op(OpEndText).Flags |= WasDollar
			} else {
				.op(OpEndLine)
			}
			 = [1:]
		case '.':
			if .flags&DotNL != 0 {
				.op(OpAnyChar)
			} else {
				.op(OpAnyCharNotNL)
			}
			 = [1:]
		case '[':
			if ,  = .parseClass();  != nil {
				return nil, 
			}
		case '*', '+', '?':
			 := 
			switch [0] {
			case '*':
				 = OpStar
			case '+':
				 = OpPlus
			case '?':
				 = OpQuest
			}
			 := [1:]
			if ,  = .repeat(, 0, 0, , , );  != nil {
				return nil, 
			}
			 = 
			 = 
		case '{':
			 = OpRepeat
			 := 
			, , ,  := .parseRepeat()
			if ! {
				// If the repeat cannot be parsed, { is a literal.
				.literal('{')
				 = [1:]
				break
			}
			if  < 0 ||  > 1000 ||  > 1000 ||  >= 0 &&  >  {
				// Numbers were too big, or max is present and min > max.
				return nil, &Error{ErrInvalidRepeatSize, [:len()-len()]}
			}
			if ,  = .repeat(, , , , , );  != nil {
				return nil, 
			}
			 = 
			 = 
		case '\\':
			if .flags&PerlX != 0 && len() >= 2 {
				switch [1] {
				case 'A':
					.op(OpBeginText)
					 = [2:]
					break 
				case 'b':
					.op(OpWordBoundary)
					 = [2:]
					break 
				case 'B':
					.op(OpNoWordBoundary)
					 = [2:]
					break 
				case 'C':
					// any byte; not supported
					return nil, &Error{ErrInvalidEscape, [:2]}
				case 'Q':
					// \Q ... \E: the ... is always literals
					var  string
					, , _ = strings.Cut([2:], `\E`)
					for  != "" {
						, ,  := nextRune()
						if  != nil {
							return nil, 
						}
						.literal()
						 = 
					}
					break 
				case 'z':
					.op(OpEndText)
					 = [2:]
					break 
				}
			}

			 := .newRegexp(OpCharClass)
			.Flags = .flags

			// Look for Unicode character group like \p{Han}
			if len() >= 2 && ([1] == 'p' || [1] == 'P') {
				, ,  := .parseUnicodeClass(, .Rune0[:0])
				if  != nil {
					return nil, 
				}
				if  != nil {
					.Rune = 
					 = 
					.push()
					break 
				}
			}

			// Perl character class escape.
			if ,  := .parsePerlClassEscape(, .Rune0[:0]);  != nil {
				.Rune = 
				 = 
				.push()
				break 
			}
			.reuse()

			// Ordinary single-character escape.
			if , ,  = .parseEscape();  != nil {
				return nil, 
			}
			.literal()
		}
		 = 
	}

	.concat()
	if .swapVerticalBar() {
		// pop vertical bar
		.stack = .stack[:len(.stack)-1]
	}
	.alternate()

	 := len(.stack)
	if  != 1 {
		return nil, &Error{ErrMissingParen, }
	}
	return .stack[0], nil
}

// parseRepeat parses {min} (max=min) or {min,} (max=-1) or {min,max}.
// If s is not of that form, it returns ok == false.
// If s has the right form but the values are too big, it returns min == -1, ok == true.
func ( *parser) ( string) (,  int,  string,  bool) {
	if  == "" || [0] != '{' {
		return
	}
	 = [1:]
	var  bool
	if , ,  = .parseInt(); ! {
		return
	}
	if  == "" {
		return
	}
	if [0] != ',' {
		 = 
	} else {
		 = [1:]
		if  == "" {
			return
		}
		if [0] == '}' {
			 = -1
		} else if , ,  = .parseInt(); ! {
			return
		} else if  < 0 {
			// parseInt found too big a number
			 = -1
		}
	}
	if  == "" || [0] != '}' {
		return
	}
	 = [1:]
	 = true
	return
}

// parsePerlFlags parses a Perl flag setting or non-capturing group or both,
// like (?i) or (?: or (?i:.  It removes the prefix from s and updates the parse state.
// The caller must have ensured that s begins with "(?".
func ( *parser) ( string) ( string,  error) {
	 := 

	// Check for named captures, first introduced in Python's regexp library.
	// As usual, there are three slightly different syntaxes:
	//
	//   (?P<name>expr)   the original, introduced by Python
	//   (?<name>expr)    the .NET alteration, adopted by Perl 5.10
	//   (?'name'expr)    another .NET alteration, adopted by Perl 5.10
	//
	// Perl 5.10 gave in and implemented the Python version too,
	// but they claim that the last two are the preferred forms.
	// PCRE and languages based on it (specifically, PHP and Ruby)
	// support all three as well. EcmaScript 4 uses only the Python form.
	//
	// In both the open source world (via Code Search) and the
	// Google source tree, (?P<expr>name) and (?<expr>name) are the
	// dominant forms of named captures and both are supported.
	 := len() > 4 && [2] == 'P' && [3] == '<'
	 := len() > 3 && [2] == '<'

	if  ||  {
		// position of expr start
		 := 4
		if  {
			 = 3
		}

		// Pull out name.
		 := strings.IndexRune(, '>')
		if  < 0 {
			if  = checkUTF8();  != nil {
				return "", 
			}
			return "", &Error{ErrInvalidNamedCapture, }
		}

		 := [:+1]        // "(?P<name>" or "(?<name>"
		 := [:] // "name"
		if  = checkUTF8();  != nil {
			return "", 
		}
		if !isValidCaptureName() {
			return "", &Error{ErrInvalidNamedCapture, }
		}

		// Like ordinary capture, but named.
		.numCap++
		 := .op(opLeftParen)
		.Cap = .numCap
		.Name = 
		return [+1:], nil
	}

	// Non-capturing group. Might also twiddle Perl flags.
	var  rune
	 = [2:] // skip (?
	 := .flags
	 := +1
	 := false
:
	for  != "" {
		if , ,  = nextRune();  != nil {
			return "", 
		}
		switch  {
		default:
			break 

		// Flags.
		case 'i':
			 |= FoldCase
			 = true
		case 'm':
			 &^= OneLine
			 = true
		case 's':
			 |= DotNL
			 = true
		case 'U':
			 |= NonGreedy
			 = true

		// Switch to negation.
		case '-':
			if  < 0 {
				break 
			}
			 = -1
			// Invert flags so that | above turn into &^ and vice versa.
			// We'll invert flags again before using it below.
			 = ^
			 = false

		// End of flags, starting group or not.
		case ':', ')':
			if  < 0 {
				if ! {
					break 
				}
				 = ^
			}
			if  == ':' {
				// Open new group
				.op(opLeftParen)
			}
			.flags = 
			return , nil
		}
	}

	return "", &Error{ErrInvalidPerlOp, [:len()-len()]}
}

// isValidCaptureName reports whether name
// is a valid capture name: [A-Za-z0-9_]+.
// PCRE limits names to 32 bytes.
// Python rejects names starting with digits.
// We don't enforce either of those.
func isValidCaptureName( string) bool {
	if  == "" {
		return false
	}
	for ,  := range  {
		if  != '_' && !isalnum() {
			return false
		}
	}
	return true
}

// parseInt parses a decimal integer.
func ( *parser) ( string) ( int,  string,  bool) {
	if  == "" || [0] < '0' || '9' < [0] {
		return
	}
	// Disallow leading zeros.
	if len() >= 2 && [0] == '0' && '0' <= [1] && [1] <= '9' {
		return
	}
	 := 
	for  != "" && '0' <= [0] && [0] <= '9' {
		 = [1:]
	}
	 = 
	 = true
	// Have digits, compute value.
	 = [:len()-len()]
	for  := 0;  < len(); ++ {
		// Avoid overflow.
		if  >= 1e8 {
			 = -1
			break
		}
		 = *10 + int([]) - '0'
	}
	return
}

// can this be represented as a character class?
// single-rune literal string, char class, ., and .|\n.
func isCharClass( *Regexp) bool {
	return .Op == OpLiteral && len(.Rune) == 1 ||
		.Op == OpCharClass ||
		.Op == OpAnyCharNotNL ||
		.Op == OpAnyChar
}

// does re match r?
func matchRune( *Regexp,  rune) bool {
	switch .Op {
	case OpLiteral:
		return len(.Rune) == 1 && .Rune[0] == 
	case OpCharClass:
		for  := 0;  < len(.Rune);  += 2 {
			if .Rune[] <=  &&  <= .Rune[+1] {
				return true
			}
		}
		return false
	case OpAnyCharNotNL:
		return  != '\n'
	case OpAnyChar:
		return true
	}
	return false
}

// parseVerticalBar handles a | in the input.
func ( *parser) () {
	.concat()

	// The concatenation we just parsed is on top of the stack.
	// If it sits above an opVerticalBar, swap it below
	// (things below an opVerticalBar become an alternation).
	// Otherwise, push a new vertical bar.
	if !.swapVerticalBar() {
		.op(opVerticalBar)
	}
}

// mergeCharClass makes dst = dst|src.
// The caller must ensure that dst.Op >= src.Op,
// to reduce the amount of copying.
func mergeCharClass(,  *Regexp) {
	switch .Op {
	case OpAnyChar:
		// src doesn't add anything.
	case OpAnyCharNotNL:
		// src might add \n
		if matchRune(, '\n') {
			.Op = OpAnyChar
		}
	case OpCharClass:
		// src is simpler, so either literal or char class
		if .Op == OpLiteral {
			.Rune = appendLiteral(.Rune, .Rune[0], .Flags)
		} else {
			.Rune = appendClass(.Rune, .Rune)
		}
	case OpLiteral:
		// both literal
		if .Rune[0] == .Rune[0] && .Flags == .Flags {
			break
		}
		.Op = OpCharClass
		.Rune = appendLiteral(.Rune[:0], .Rune[0], .Flags)
		.Rune = appendLiteral(.Rune, .Rune[0], .Flags)
	}
}

// If the top of the stack is an element followed by an opVerticalBar
// swapVerticalBar swaps the two and returns true.
// Otherwise it returns false.
func ( *parser) () bool {
	// If above and below vertical bar are literal or char class,
	// can merge into a single char class.
	 := len(.stack)
	if  >= 3 && .stack[-2].Op == opVerticalBar && isCharClass(.stack[-1]) && isCharClass(.stack[-3]) {
		 := .stack[-1]
		 := .stack[-3]
		// Make re3 the more complex of the two.
		if .Op > .Op {
			,  = , 
			.stack[-3] = 
		}
		mergeCharClass(, )
		.reuse()
		.stack = .stack[:-1]
		return true
	}

	if  >= 2 {
		 := .stack[-1]
		 := .stack[-2]
		if .Op == opVerticalBar {
			if  >= 3 {
				// Now out of reach.
				// Clean opportunistically.
				cleanAlt(.stack[-3])
			}
			.stack[-2] = 
			.stack[-1] = 
			return true
		}
	}
	return false
}

// parseRightParen handles a ) in the input.
func ( *parser) () error {
	.concat()
	if .swapVerticalBar() {
		// pop vertical bar
		.stack = .stack[:len(.stack)-1]
	}
	.alternate()

	 := len(.stack)
	if  < 2 {
		return &Error{ErrUnexpectedParen, .wholeRegexp}
	}
	 := .stack[-1]
	 := .stack[-2]
	.stack = .stack[:-2]
	if .Op != opLeftParen {
		return &Error{ErrUnexpectedParen, .wholeRegexp}
	}
	// Restore flags at time of paren.
	.flags = .Flags
	if .Cap == 0 {
		// Just for grouping.
		.push()
	} else {
		.Op = OpCapture
		.Sub = .Sub0[:1]
		.Sub[0] = 
		.push()
	}
	return nil
}

// parseEscape parses an escape sequence at the beginning of s
// and returns the rune.
func ( *parser) ( string) ( rune,  string,  error) {
	 := [1:]
	if  == "" {
		return 0, "", &Error{ErrTrailingBackslash, ""}
	}
	, ,  := nextRune()
	if  != nil {
		return 0, "", 
	}

:
	switch  {
	default:
		if  < utf8.RuneSelf && !isalnum() {
			// Escaped non-word characters are always themselves.
			// PCRE is not quite so rigorous: it accepts things like
			// \q, but we don't. We once rejected \_, but too many
			// programs and people insist on using it, so allow \_.
			return , , nil
		}

	// Octal escapes.
	case '1', '2', '3', '4', '5', '6', '7':
		// Single non-zero digit is a backreference; not supported
		if  == "" || [0] < '0' || [0] > '7' {
			break
		}
		fallthrough
	case '0':
		// Consume up to three octal digits; already have one.
		 =  - '0'
		for  := 1;  < 3; ++ {
			if  == "" || [0] < '0' || [0] > '7' {
				break
			}
			 = *8 + rune([0]) - '0'
			 = [1:]
		}
		return , , nil

	// Hexadecimal escapes.
	case 'x':
		if  == "" {
			break
		}
		if , ,  = nextRune();  != nil {
			return 0, "", 
		}
		if  == '{' {
			// Any number of digits in braces.
			// Perl accepts any text at all; it ignores all text
			// after the first non-hex digit. We require only hex digits,
			// and at least one.
			 := 0
			 = 0
			for {
				if  == "" {
					break 
				}
				if , ,  = nextRune();  != nil {
					return 0, "", 
				}
				if  == '}' {
					break
				}
				 := unhex()
				if  < 0 {
					break 
				}
				 = *16 + 
				if  > unicode.MaxRune {
					break 
				}
				++
			}
			if  == 0 {
				break 
			}
			return , , nil
		}

		// Easy case: two hex digits.
		 := unhex()
		if , ,  = nextRune();  != nil {
			return 0, "", 
		}
		 := unhex()
		if  < 0 ||  < 0 {
			break
		}
		return *16 + , , nil

	// C escapes. There is no case 'b', to avoid misparsing
	// the Perl word-boundary \b as the C backspace \b
	// when in POSIX mode. In Perl, /\b/ means word-boundary
	// but /[\b]/ means backspace. We don't support that.
	// If you want a backspace, embed a literal backspace
	// character or use \x08.
	case 'a':
		return '\a', , 
	case 'f':
		return '\f', , 
	case 'n':
		return '\n', , 
	case 'r':
		return '\r', , 
	case 't':
		return '\t', , 
	case 'v':
		return '\v', , 
	}
	return 0, "", &Error{ErrInvalidEscape, [:len()-len()]}
}

// parseClassChar parses a character class character at the beginning of s
// and returns it.
func ( *parser) (,  string) ( rune,  string,  error) {
	if  == "" {
		return 0, "", &Error{Code: ErrMissingBracket, Expr: }
	}

	// Allow regular escape sequences even though
	// many need not be escaped in this context.
	if [0] == '\\' {
		return .parseEscape()
	}

	return nextRune()
}

type charGroup struct {
	sign  int
	class []rune
}

//go:generate perl make_perl_groups.pl perl_groups.go

// parsePerlClassEscape parses a leading Perl character class escape like \d
// from the beginning of s. If one is present, it appends the characters to r
// and returns the new slice r and the remainder of the string.
func ( *parser) ( string,  []rune) ( []rune,  string) {
	if .flags&PerlX == 0 || len() < 2 || [0] != '\\' {
		return
	}
	 := perlGroup[[0:2]]
	if .sign == 0 {
		return
	}
	return .appendGroup(, ), [2:]
}

// parseNamedClass parses a leading POSIX named character class like [:alnum:]
// from the beginning of s. If one is present, it appends the characters to r
// and returns the new slice r and the remainder of the string.
func ( *parser) ( string,  []rune) ( []rune,  string,  error) {
	if len() < 2 || [0] != '[' || [1] != ':' {
		return
	}

	 := strings.Index([2:], ":]")
	if  < 0 {
		return
	}
	 += 2
	,  := [0:+2], [+2:]
	 := posixGroup[]
	if .sign == 0 {
		return nil, "", &Error{ErrInvalidCharRange, }
	}
	return .appendGroup(, ), , nil
}

func ( *parser) ( []rune,  charGroup) []rune {
	if .flags&FoldCase == 0 {
		if .sign < 0 {
			 = appendNegatedClass(, .class)
		} else {
			 = appendClass(, .class)
		}
	} else {
		 := .tmpClass[:0]
		 = appendFoldedClass(, .class)
		.tmpClass = 
		 = cleanClass(&.tmpClass)
		if .sign < 0 {
			 = appendNegatedClass(, )
		} else {
			 = appendClass(, )
		}
	}
	return 
}

var anyTable = &unicode.RangeTable{
	R16: []unicode.Range16{{Lo: 0, Hi: 1<<16 - 1, Stride: 1}},
	R32: []unicode.Range32{{Lo: 1 << 16, Hi: unicode.MaxRune, Stride: 1}},
}

// unicodeTable returns the unicode.RangeTable identified by name
// and the table of additional fold-equivalent code points.
func unicodeTable( string) (*unicode.RangeTable, *unicode.RangeTable) {
	// Special case: "Any" means any.
	if  == "Any" {
		return anyTable, anyTable
	}
	if  := unicode.Categories[];  != nil {
		return , unicode.FoldCategory[]
	}
	if  := unicode.Scripts[];  != nil {
		return , unicode.FoldScript[]
	}
	return nil, nil
}

// parseUnicodeClass parses a leading Unicode character class like \p{Han}
// from the beginning of s. If one is present, it appends the characters to r
// and returns the new slice r and the remainder of the string.
func ( *parser) ( string,  []rune) ( []rune,  string,  error) {
	if .flags&UnicodeGroups == 0 || len() < 2 || [0] != '\\' || [1] != 'p' && [1] != 'P' {
		return
	}

	// Committed to parse or return error.
	 := +1
	if [1] == 'P' {
		 = -1
	}
	 := [2:]
	, ,  := nextRune()
	if  != nil {
		return
	}
	var ,  string
	if  != '{' {
		// Single-letter name.
		 = [:len()-len()]
		 = [2:]
	} else {
		// Name is in braces.
		 := strings.IndexRune(, '}')
		if  < 0 {
			if  = checkUTF8();  != nil {
				return
			}
			return nil, "", &Error{ErrInvalidCharRange, }
		}
		,  = [:+1], [+1:]
		 = [3:]
		if  = checkUTF8();  != nil {
			return
		}
	}

	// Group can have leading negation too.  \p{^Han} == \P{Han}, \P{^Han} == \p{Han}.
	if  != "" && [0] == '^' {
		 = -
		 = [1:]
	}

	,  := unicodeTable()
	if  == nil {
		return nil, "", &Error{ErrInvalidCharRange, }
	}

	if .flags&FoldCase == 0 ||  == nil {
		if  > 0 {
			 = appendTable(, )
		} else {
			 = appendNegatedTable(, )
		}
	} else {
		// Merge and clean tab and fold in a temporary buffer.
		// This is necessary for the negative case and just tidy
		// for the positive case.
		 := .tmpClass[:0]
		 = appendTable(, )
		 = appendTable(, )
		.tmpClass = 
		 = cleanClass(&.tmpClass)
		if  > 0 {
			 = appendClass(, )
		} else {
			 = appendNegatedClass(, )
		}
	}
	return , , nil
}

// parseClass parses a character class at the beginning of s
// and pushes it onto the parse stack.
func ( *parser) ( string) ( string,  error) {
	 := [1:] // chop [
	 := .newRegexp(OpCharClass)
	.Flags = .flags
	.Rune = .Rune0[:0]

	 := +1
	if  != "" && [0] == '^' {
		 = -1
		 = [1:]

		// If character class does not match \n, add it here,
		// so that negation later will do the right thing.
		if .flags&ClassNL == 0 {
			.Rune = append(.Rune, '\n', '\n')
		}
	}

	 := .Rune
	 := true // ] and - are okay as first char in class
	for  == "" || [0] != ']' ||  {
		// POSIX: - is only okay unescaped as first or last in class.
		// Perl: - is okay anywhere.
		if  != "" && [0] == '-' && .flags&PerlX == 0 && ! && (len() == 1 || [1] != ']') {
			,  := utf8.DecodeRuneInString([1:])
			return "", &Error{Code: ErrInvalidCharRange, Expr: [:1+]}
		}
		 = false

		// Look for POSIX [:alnum:] etc.
		if len() > 2 && [0] == '[' && [1] == ':' {
			, ,  := .parseNamedClass(, )
			if  != nil {
				return "", 
			}
			if  != nil {
				,  = , 
				continue
			}
		}

		// Look for Unicode character group like \p{Han}.
		, ,  := .parseUnicodeClass(, )
		if  != nil {
			return "", 
		}
		if  != nil {
			,  = , 
			continue
		}

		// Look for Perl character class symbols (extension).
		if ,  := .parsePerlClassEscape(, );  != nil {
			,  = , 
			continue
		}

		// Single character or simple range.
		 := 
		var ,  rune
		if , ,  = .parseClassChar(, );  != nil {
			return "", 
		}
		 = 
		// [a-] means (a|-) so check for final ].
		if len() >= 2 && [0] == '-' && [1] != ']' {
			 = [1:]
			if , ,  = .parseClassChar(, );  != nil {
				return "", 
			}
			if  <  {
				 = [:len()-len()]
				return "", &Error{Code: ErrInvalidCharRange, Expr: }
			}
		}
		if .flags&FoldCase == 0 {
			 = appendRange(, , )
		} else {
			 = appendFoldedRange(, , )
		}
	}
	 = [1:] // chop ]

	// Use &re.Rune instead of &class to avoid allocation.
	.Rune = 
	 = cleanClass(&.Rune)
	if  < 0 {
		 = negateClass()
	}
	.Rune = 
	.push()
	return , nil
}

// cleanClass sorts the ranges (pairs of elements of r),
// merges them, and eliminates duplicates.
func cleanClass( *[]rune) []rune {

	// Sort by lo increasing, hi decreasing to break ties.
	sort.Sort(ranges{})

	 := *
	if len() < 2 {
		return 
	}

	// Merge abutting, overlapping.
	 := 2 // write index
	for  := 2;  < len();  += 2 {
		,  := [], [+1]
		if  <= [-1]+1 {
			// merge with previous range
			if  > [-1] {
				[-1] = 
			}
			continue
		}
		// new disjoint range
		[] = 
		[+1] = 
		 += 2
	}

	return [:]
}

// inCharClass reports whether r is in the class.
// It assumes the class has been cleaned by cleanClass.
func inCharClass( rune,  []rune) bool {
	,  := sort.Find(len()/2, func( int) int {
		,  := [2*], [2*+1]
		if  >  {
			return +1
		}
		if  <  {
			return -1
		}
		return 0
	})
	return 
}

// appendLiteral returns the result of appending the literal x to the class r.
func appendLiteral( []rune,  rune,  Flags) []rune {
	if &FoldCase != 0 {
		return appendFoldedRange(, , )
	}
	return appendRange(, , )
}

// appendRange returns the result of appending the range lo-hi to the class r.
func appendRange( []rune, ,  rune) []rune {
	// Expand last range or next to last range if it overlaps or abuts.
	// Checking two ranges helps when appending case-folded
	// alphabets, so that one range can be expanding A-Z and the
	// other expanding a-z.
	 := len()
	for  := 2;  <= 4;  += 2 { // twice, using i=2, i=4
		if  >=  {
			,  := [-], [-+1]
			if  <= +1 &&  <= +1 {
				if  <  {
					[-] = 
				}
				if  >  {
					[-+1] = 
				}
				return 
			}
		}
	}

	return append(, , )
}

const (
	// minimum and maximum runes involved in folding.
	// checked during test.
	minFold = 0x0041
	maxFold = 0x1e943
)

// appendFoldedRange returns the result of appending the range lo-hi
// and its case folding-equivalent runes to the class r.
func appendFoldedRange( []rune, ,  rune) []rune {
	// Optimizations.
	if  <= minFold &&  >= maxFold {
		// Range is full: folding can't add more.
		return appendRange(, , )
	}
	if  < minFold ||  > maxFold {
		// Range is outside folding possibilities.
		return appendRange(, , )
	}
	if  < minFold {
		// [lo, minFold-1] needs no folding.
		 = appendRange(, , minFold-1)
		 = minFold
	}
	if  > maxFold {
		// [maxFold+1, hi] needs no folding.
		 = appendRange(, maxFold+1, )
		 = maxFold
	}

	// Brute force. Depend on appendRange to coalesce ranges on the fly.
	for  := ;  <= ; ++ {
		 = appendRange(, , )
		 := unicode.SimpleFold()
		for  !=  {
			 = appendRange(, , )
			 = unicode.SimpleFold()
		}
	}
	return 
}

// appendClass returns the result of appending the class x to the class r.
// It assume x is clean.
func appendClass( []rune,  []rune) []rune {
	for  := 0;  < len();  += 2 {
		 = appendRange(, [], [+1])
	}
	return 
}

// appendFoldedClass returns the result of appending the case folding of the class x to the class r.
func appendFoldedClass( []rune,  []rune) []rune {
	for  := 0;  < len();  += 2 {
		 = appendFoldedRange(, [], [+1])
	}
	return 
}

// appendNegatedClass returns the result of appending the negation of the class x to the class r.
// It assumes x is clean.
func appendNegatedClass( []rune,  []rune) []rune {
	 := '\u0000'
	for  := 0;  < len();  += 2 {
		,  := [], [+1]
		if  <= -1 {
			 = appendRange(, , -1)
		}
		 =  + 1
	}
	if  <= unicode.MaxRune {
		 = appendRange(, , unicode.MaxRune)
	}
	return 
}

// appendTable returns the result of appending x to the class r.
func appendTable( []rune,  *unicode.RangeTable) []rune {
	for ,  := range .R16 {
		, ,  := rune(.Lo), rune(.Hi), rune(.Stride)
		if  == 1 {
			 = appendRange(, , )
			continue
		}
		for  := ;  <= ;  +=  {
			 = appendRange(, , )
		}
	}
	for ,  := range .R32 {
		, ,  := rune(.Lo), rune(.Hi), rune(.Stride)
		if  == 1 {
			 = appendRange(, , )
			continue
		}
		for  := ;  <= ;  +=  {
			 = appendRange(, , )
		}
	}
	return 
}

// appendNegatedTable returns the result of appending the negation of x to the class r.
func appendNegatedTable( []rune,  *unicode.RangeTable) []rune {
	 := '\u0000' // lo end of next class to add
	for ,  := range .R16 {
		, ,  := rune(.Lo), rune(.Hi), rune(.Stride)
		if  == 1 {
			if  <= -1 {
				 = appendRange(, , -1)
			}
			 =  + 1
			continue
		}
		for  := ;  <= ;  +=  {
			if  <= -1 {
				 = appendRange(, , -1)
			}
			 =  + 1
		}
	}
	for ,  := range .R32 {
		, ,  := rune(.Lo), rune(.Hi), rune(.Stride)
		if  == 1 {
			if  <= -1 {
				 = appendRange(, , -1)
			}
			 =  + 1
			continue
		}
		for  := ;  <= ;  +=  {
			if  <= -1 {
				 = appendRange(, , -1)
			}
			 =  + 1
		}
	}
	if  <= unicode.MaxRune {
		 = appendRange(, , unicode.MaxRune)
	}
	return 
}

// negateClass overwrites r and returns r's negation.
// It assumes the class r is already clean.
func negateClass( []rune) []rune {
	 := '\u0000' // lo end of next class to add
	 := 0             // write index
	for  := 0;  < len();  += 2 {
		,  := [], [+1]
		if  <= -1 {
			[] = 
			[+1] =  - 1
			 += 2
		}
		 =  + 1
	}
	 = [:]
	if  <= unicode.MaxRune {
		// It's possible for the negation to have one more
		// range - this one - than the original class, so use append.
		 = append(, , unicode.MaxRune)
	}
	return 
}

// ranges implements sort.Interface on a []rune.
// The choice of receiver type definition is strange
// but avoids an allocation since we already have
// a *[]rune.
type ranges struct {
	p *[]rune
}

func ( ranges) (,  int) bool {
	 := *.p
	 *= 2
	 *= 2
	return [] < [] || [] == [] && [+1] > [+1]
}

func ( ranges) () int {
	return len(*.p) / 2
}

func ( ranges) (,  int) {
	 := *.p
	 *= 2
	 *= 2
	[], [+1], [], [+1] = [], [+1], [], [+1]
}

func checkUTF8( string) error {
	for  != "" {
		,  := utf8.DecodeRuneInString()
		if  == utf8.RuneError &&  == 1 {
			return &Error{Code: ErrInvalidUTF8, Expr: }
		}
		 = [:]
	}
	return nil
}

func nextRune( string) ( rune,  string,  error) {
	,  := utf8.DecodeRuneInString()
	if  == utf8.RuneError &&  == 1 {
		return 0, "", &Error{Code: ErrInvalidUTF8, Expr: }
	}
	return , [:], nil
}

func isalnum( rune) bool {
	return '0' <=  &&  <= '9' || 'A' <=  &&  <= 'Z' || 'a' <=  &&  <= 'z'
}

func unhex( rune) rune {
	if '0' <=  &&  <= '9' {
		return  - '0'
	}
	if 'a' <=  &&  <= 'f' {
		return  - 'a' + 10
	}
	if 'A' <=  &&  <= 'F' {
		return  - 'A' + 10
	}
	return -1
}