package syntax
import (
"slices"
"strconv"
"strings"
"unicode"
)
type Regexp struct {
Op Op
Flags Flags
Sub []*Regexp
Sub0 [1 ]*Regexp
Rune []rune
Rune0 [2 ]rune
Min, Max int
Cap int
Name string
}
type Op uint8
const (
OpNoMatch Op = 1 + iota
OpEmptyMatch
OpLiteral
OpCharClass
OpAnyCharNotNL
OpAnyChar
OpBeginLine
OpEndLine
OpBeginText
OpEndText
OpWordBoundary
OpNoWordBoundary
OpCapture
OpStar
OpPlus
OpQuest
OpRepeat
OpConcat
OpAlternate
)
const opPseudo Op = 128
func (x *Regexp ) Equal (y *Regexp ) bool {
if x == nil || y == nil {
return x == y
}
if x .Op != y .Op {
return false
}
switch x .Op {
case OpEndText :
if x .Flags &WasDollar != y .Flags &WasDollar {
return false
}
case OpLiteral , OpCharClass :
return slices .Equal (x .Rune , y .Rune )
case OpAlternate , OpConcat :
return slices .EqualFunc (x .Sub , y .Sub , (*Regexp ).Equal )
case OpStar , OpPlus , OpQuest :
if x .Flags &NonGreedy != y .Flags &NonGreedy || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
case OpRepeat :
if x .Flags &NonGreedy != y .Flags &NonGreedy || x .Min != y .Min || x .Max != y .Max || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
case OpCapture :
if x .Cap != y .Cap || x .Name != y .Name || !x .Sub [0 ].Equal (y .Sub [0 ]) {
return false
}
}
return true
}
type printFlags uint8
const (
flagI printFlags = 1 << iota
flagM
flagS
flagOff
flagPrec
negShift = 5
)
func addSpan(start , last *Regexp , f printFlags , flags *map [*Regexp ]printFlags ) {
if *flags == nil {
*flags = make (map [*Regexp ]printFlags )
}
(*flags )[start ] = f
(*flags )[last ] |= flagOff
}
func calcFlags(re *Regexp , flags *map [*Regexp ]printFlags ) (must , cant printFlags ) {
switch re .Op {
default :
return 0 , 0
case OpLiteral :
for _ , r := range re .Rune {
if minFold <= r && r <= maxFold && unicode .SimpleFold (r ) != r {
if re .Flags &FoldCase != 0 {
return flagI , 0
} else {
return 0 , flagI
}
}
}
return 0 , 0
case OpCharClass :
for i := 0 ; i < len (re .Rune ); i += 2 {
lo := max (minFold , re .Rune [i ])
hi := min (maxFold , re .Rune [i +1 ])
for r := lo ; r <= hi ; r ++ {
for f := unicode .SimpleFold (r ); f != r ; f = unicode .SimpleFold (f ) {
if !(lo <= f && f <= hi ) && !inCharClass (f , re .Rune ) {
return 0 , flagI
}
}
}
}
return 0 , 0
case OpAnyCharNotNL :
return 0 , flagS
case OpAnyChar :
return flagS , 0
case OpBeginLine , OpEndLine :
return flagM , 0
case OpEndText :
if re .Flags &WasDollar != 0 {
return 0 , flagM
}
return 0 , 0
case OpCapture , OpStar , OpPlus , OpQuest , OpRepeat :
return calcFlags (re .Sub [0 ], flags )
case OpConcat , OpAlternate :
var must , cant , allCant printFlags
start := 0
last := 0
did := false
for i , sub := range re .Sub {
subMust , subCant := calcFlags (sub , flags )
if must &subCant != 0 || subMust &cant != 0 {
if must != 0 {
addSpan (re .Sub [start ], re .Sub [last ], must , flags )
}
must = 0
cant = 0
start = i
did = true
}
must |= subMust
cant |= subCant
allCant |= subCant
if subMust != 0 {
last = i
}
if must == 0 && start == i {
start ++
}
}
if !did {
return must , cant
}
if must != 0 {
addSpan (re .Sub [start ], re .Sub [last ], must , flags )
}
return 0 , allCant
}
}
func writeRegexp(b *strings .Builder , re *Regexp , f printFlags , flags map [*Regexp ]printFlags ) {
f |= flags [re ]
if f &flagPrec != 0 && f &^(flagOff |flagPrec ) != 0 && f &flagOff != 0 {
f &^= flagPrec
}
if f &^(flagOff |flagPrec ) != 0 {
b .WriteString (`(?` )
if f &flagI != 0 {
b .WriteString (`i` )
}
if f &flagM != 0 {
b .WriteString (`m` )
}
if f &flagS != 0 {
b .WriteString (`s` )
}
if f &((flagM |flagS )<<negShift ) != 0 {
b .WriteString (`-` )
if f &(flagM <<negShift ) != 0 {
b .WriteString (`m` )
}
if f &(flagS <<negShift ) != 0 {
b .WriteString (`s` )
}
}
b .WriteString (`:` )
}
if f &flagOff != 0 {
defer b .WriteString (`)` )
}
if f &flagPrec != 0 {
b .WriteString (`(?:` )
defer b .WriteString (`)` )
}
switch re .Op {
default :
b .WriteString ("<invalid op" + strconv .Itoa (int (re .Op )) + ">" )
case OpNoMatch :
b .WriteString (`[^\x00-\x{10FFFF}]` )
case OpEmptyMatch :
b .WriteString (`(?:)` )
case OpLiteral :
for _ , r := range re .Rune {
escape (b , r , false )
}
case OpCharClass :
if len (re .Rune )%2 != 0 {
b .WriteString (`[invalid char class]` )
break
}
b .WriteRune ('[' )
if len (re .Rune ) == 0 {
b .WriteString (`^\x00-\x{10FFFF}` )
} else if re .Rune [0 ] == 0 && re .Rune [len (re .Rune )-1 ] == unicode .MaxRune && len (re .Rune ) > 2 {
b .WriteRune ('^' )
for i := 1 ; i < len (re .Rune )-1 ; i += 2 {
lo , hi := re .Rune [i ]+1 , re .Rune [i +1 ]-1
escape (b , lo , lo == '-' )
if lo != hi {
if hi != lo +1 {
b .WriteRune ('-' )
}
escape (b , hi , hi == '-' )
}
}
} else {
for i := 0 ; i < len (re .Rune ); i += 2 {
lo , hi := re .Rune [i ], re .Rune [i +1 ]
escape (b , lo , lo == '-' )
if lo != hi {
if hi != lo +1 {
b .WriteRune ('-' )
}
escape (b , hi , hi == '-' )
}
}
}
b .WriteRune (']' )
case OpAnyCharNotNL , OpAnyChar :
b .WriteString (`.` )
case OpBeginLine :
b .WriteString (`^` )
case OpEndLine :
b .WriteString (`$` )
case OpBeginText :
b .WriteString (`\A` )
case OpEndText :
if re .Flags &WasDollar != 0 {
b .WriteString (`$` )
} else {
b .WriteString (`\z` )
}
case OpWordBoundary :
b .WriteString (`\b` )
case OpNoWordBoundary :
b .WriteString (`\B` )
case OpCapture :
if re .Name != "" {
b .WriteString (`(?P<` )
b .WriteString (re .Name )
b .WriteRune ('>' )
} else {
b .WriteRune ('(' )
}
if re .Sub [0 ].Op != OpEmptyMatch {
writeRegexp (b , re .Sub [0 ], flags [re .Sub [0 ]], flags )
}
b .WriteRune (')' )
case OpStar , OpPlus , OpQuest , OpRepeat :
p := printFlags (0 )
sub := re .Sub [0 ]
if sub .Op > OpCapture || sub .Op == OpLiteral && len (sub .Rune ) > 1 {
p = flagPrec
}
writeRegexp (b , sub , p , flags )
switch re .Op {
case OpStar :
b .WriteRune ('*' )
case OpPlus :
b .WriteRune ('+' )
case OpQuest :
b .WriteRune ('?' )
case OpRepeat :
b .WriteRune ('{' )
b .WriteString (strconv .Itoa (re .Min ))
if re .Max != re .Min {
b .WriteRune (',' )
if re .Max >= 0 {
b .WriteString (strconv .Itoa (re .Max ))
}
}
b .WriteRune ('}' )
}
if re .Flags &NonGreedy != 0 {
b .WriteRune ('?' )
}
case OpConcat :
for _ , sub := range re .Sub {
p := printFlags (0 )
if sub .Op == OpAlternate {
p = flagPrec
}
writeRegexp (b , sub , p , flags )
}
case OpAlternate :
for i , sub := range re .Sub {
if i > 0 {
b .WriteRune ('|' )
}
writeRegexp (b , sub , 0 , flags )
}
}
}
func (re *Regexp ) String () string {
var b strings .Builder
var flags map [*Regexp ]printFlags
must , cant := calcFlags (re , &flags )
must |= (cant &^ flagI ) << negShift
if must != 0 {
must |= flagOff
}
writeRegexp (&b , re , must , flags )
return b .String ()
}
const meta = `\.+*?()|[]{}^$`
func escape(b *strings .Builder , r rune , force bool ) {
if unicode .IsPrint (r ) {
if strings .ContainsRune (meta , r ) || force {
b .WriteRune ('\\' )
}
b .WriteRune (r )
return
}
switch r {
case '\a' :
b .WriteString (`\a` )
case '\f' :
b .WriteString (`\f` )
case '\n' :
b .WriteString (`\n` )
case '\r' :
b .WriteString (`\r` )
case '\t' :
b .WriteString (`\t` )
case '\v' :
b .WriteString (`\v` )
default :
if r < 0x100 {
b .WriteString (`\x` )
s := strconv .FormatInt (int64 (r ), 16 )
if len (s ) == 1 {
b .WriteRune ('0' )
}
b .WriteString (s )
break
}
b .WriteString (`\x{` )
b .WriteString (strconv .FormatInt (int64 (r ), 16 ))
b .WriteString (`}` )
}
}
func (re *Regexp ) MaxCap () int {
m := 0
if re .Op == OpCapture {
m = re .Cap
}
for _ , sub := range re .Sub {
if n := sub .MaxCap (); m < n {
m = n
}
}
return m
}
func (re *Regexp ) CapNames () []string {
names := make ([]string , re .MaxCap ()+1 )
re .capNames (names )
return names
}
func (re *Regexp ) capNames (names []string ) {
if re .Op == OpCapture {
names [re .Cap ] = re .Name
}
for _ , sub := range re .Sub {
sub .capNames (names )
}
}
The pages are generated with Golds v0.7.0-preview . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .