package syntax
import (
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
type Prog struct {
Inst []Inst
Start int
NumCap int
}
type InstOp uint8
const (
InstAlt InstOp = iota
InstAltMatch
InstCapture
InstEmptyWidth
InstMatch
InstFail
InstNop
InstRune
InstRune1
InstRuneAny
InstRuneAnyNotNL
)
var instOpNames = []string {
"InstAlt" ,
"InstAltMatch" ,
"InstCapture" ,
"InstEmptyWidth" ,
"InstMatch" ,
"InstFail" ,
"InstNop" ,
"InstRune" ,
"InstRune1" ,
"InstRuneAny" ,
"InstRuneAnyNotNL" ,
}
func (i InstOp ) String () string {
if uint (i ) >= uint (len (instOpNames )) {
return ""
}
return instOpNames [i ]
}
type EmptyOp uint8
const (
EmptyBeginLine EmptyOp = 1 << iota
EmptyEndLine
EmptyBeginText
EmptyEndText
EmptyWordBoundary
EmptyNoWordBoundary
)
func EmptyOpContext (r1 , r2 rune ) EmptyOp {
var op EmptyOp = EmptyNoWordBoundary
var boundary byte
switch {
case IsWordChar (r1 ):
boundary = 1
case r1 == '\n' :
op |= EmptyBeginLine
case r1 < 0 :
op |= EmptyBeginText | EmptyBeginLine
}
switch {
case IsWordChar (r2 ):
boundary ^= 1
case r2 == '\n' :
op |= EmptyEndLine
case r2 < 0 :
op |= EmptyEndText | EmptyEndLine
}
if boundary != 0 {
op ^= (EmptyWordBoundary | EmptyNoWordBoundary )
}
return op
}
func IsWordChar (r rune ) bool {
return 'a' <= r && r <= 'z' || 'A' <= r && r <= 'Z' || '0' <= r && r <= '9' || r == '_'
}
type Inst struct {
Op InstOp
Out uint32
Arg uint32
Rune []rune
}
func (p *Prog ) String () string {
var b strings .Builder
dumpProg (&b , p )
return b .String ()
}
func (p *Prog ) skipNop (pc uint32 ) *Inst {
i := &p .Inst [pc ]
for i .Op == InstNop || i .Op == InstCapture {
i = &p .Inst [i .Out ]
}
return i
}
func (i *Inst ) op () InstOp {
op := i .Op
switch op {
case InstRune1 , InstRuneAny , InstRuneAnyNotNL :
op = InstRune
}
return op
}
func (p *Prog ) Prefix () (prefix string , complete bool ) {
i := p .skipNop (uint32 (p .Start ))
if i .op () != InstRune || len (i .Rune ) != 1 {
return "" , i .Op == InstMatch
}
var buf strings .Builder
for i .op () == InstRune && len (i .Rune ) == 1 && Flags (i .Arg )&FoldCase == 0 && i .Rune [0 ] != utf8 .RuneError {
buf .WriteRune (i .Rune [0 ])
i = p .skipNop (i .Out )
}
return buf .String (), i .Op == InstMatch
}
func (p *Prog ) StartCond () EmptyOp {
var flag EmptyOp
pc := uint32 (p .Start )
i := &p .Inst [pc ]
Loop :
for {
switch i .Op {
case InstEmptyWidth :
flag |= EmptyOp (i .Arg )
case InstFail :
return ^EmptyOp (0 )
case InstCapture , InstNop :
default :
break Loop
}
pc = i .Out
i = &p .Inst [pc ]
}
return flag
}
const noMatch = -1
func (i *Inst ) MatchRune (r rune ) bool {
return i .MatchRunePos (r ) != noMatch
}
func (i *Inst ) MatchRunePos (r rune ) int {
rune := i .Rune
switch len (rune ) {
case 0 :
return noMatch
case 1 :
r0 := rune [0 ]
if r == r0 {
return 0
}
if Flags (i .Arg )&FoldCase != 0 {
for r1 := unicode .SimpleFold (r0 ); r1 != r0 ; r1 = unicode .SimpleFold (r1 ) {
if r == r1 {
return 0
}
}
}
return noMatch
case 2 :
if r >= rune [0 ] && r <= rune [1 ] {
return 0
}
return noMatch
case 4 , 6 , 8 :
for j := 0 ; j < len (rune ); j += 2 {
if r < rune [j ] {
return noMatch
}
if r <= rune [j +1 ] {
return j / 2
}
}
return noMatch
}
lo := 0
hi := len (rune ) / 2
for lo < hi {
m := int (uint (lo +hi ) >> 1 )
if c := rune [2 *m ]; c <= r {
if r <= rune [2 *m +1 ] {
return m
}
lo = m + 1
} else {
hi = m
}
}
return noMatch
}
func (i *Inst ) MatchEmptyWidth (before rune , after rune ) bool {
switch EmptyOp (i .Arg ) {
case EmptyBeginLine :
return before == '\n' || before == -1
case EmptyEndLine :
return after == '\n' || after == -1
case EmptyBeginText :
return before == -1
case EmptyEndText :
return after == -1
case EmptyWordBoundary :
return IsWordChar (before ) != IsWordChar (after )
case EmptyNoWordBoundary :
return IsWordChar (before ) == IsWordChar (after )
}
panic ("unknown empty width arg" )
}
func (i *Inst ) String () string {
var b strings .Builder
dumpInst (&b , i )
return b .String ()
}
func bw(b *strings .Builder , args ...string ) {
for _ , s := range args {
b .WriteString (s )
}
}
func dumpProg(b *strings .Builder , p *Prog ) {
for j := range p .Inst {
i := &p .Inst [j ]
pc := strconv .Itoa (j )
if len (pc ) < 3 {
b .WriteString (" " [len (pc ):])
}
if j == p .Start {
pc += "*"
}
bw (b , pc , "\t" )
dumpInst (b , i )
bw (b , "\n" )
}
}
func u32(i uint32 ) string {
return strconv .FormatUint (uint64 (i ), 10 )
}
func dumpInst(b *strings .Builder , i *Inst ) {
switch i .Op {
case InstAlt :
bw (b , "alt -> " , u32 (i .Out ), ", " , u32 (i .Arg ))
case InstAltMatch :
bw (b , "altmatch -> " , u32 (i .Out ), ", " , u32 (i .Arg ))
case InstCapture :
bw (b , "cap " , u32 (i .Arg ), " -> " , u32 (i .Out ))
case InstEmptyWidth :
bw (b , "empty " , u32 (i .Arg ), " -> " , u32 (i .Out ))
case InstMatch :
bw (b , "match" )
case InstFail :
bw (b , "fail" )
case InstNop :
bw (b , "nop -> " , u32 (i .Out ))
case InstRune :
if i .Rune == nil {
bw (b , "rune <nil>" )
}
bw (b , "rune " , strconv .QuoteToASCII (string (i .Rune )))
if Flags (i .Arg )&FoldCase != 0 {
bw (b , "/i" )
}
bw (b , " -> " , u32 (i .Out ))
case InstRune1 :
bw (b , "rune1 " , strconv .QuoteToASCII (string (i .Rune )), " -> " , u32 (i .Out ))
case InstRuneAny :
bw (b , "any -> " , u32 (i .Out ))
case InstRuneAnyNotNL :
bw (b , "anynotnl -> " , u32 (i .Out ))
}
}
The pages are generated with Golds v0.7.0-preview . (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu .
PR and bug reports are welcome and can be submitted to the issue list .
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds .