// Copyright 2015 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// backtrack is a regular expression search with submatch// tracking for small regular expressions and texts. It allocates// a bit vector with (length of input) * (length of prog) bits,// to make sure it never explores the same (character position, instruction)// state multiple times. This limits the search to run in time linear in// the length of the test.//// backtrack is a fast replacement for the NFA code on small// regexps when onepass cannot be used.package regexpimport ()// A job is an entry on the backtracker's job stack. It holds// the instruction pc and the position in the input.type job struct { pc uint32 arg bool pos int}const ( visitedBits = 32 maxBacktrackProg = 500// len(prog.Inst) <= max maxBacktrackVector = 256 * 1024// bit vector size <= max (bits))// bitState holds state for the backtracker.type bitState struct { end int cap []int matchcap []int jobs []job visited []uint32 inputs inputs}var bitStatePool sync.Poolfunc newBitState() *bitState { , := bitStatePool.Get().(*bitState)if ! { = new(bitState) }return}func freeBitState( *bitState) { .inputs.clear()bitStatePool.Put()}// maxBitStateLen returns the maximum length of a string to search with// the backtracker using prog.func maxBitStateLen( *syntax.Prog) int {if !shouldBacktrack() {return0 }returnmaxBacktrackVector / len(.Inst)}// shouldBacktrack reports whether the program is too// long for the backtracker to run.func shouldBacktrack( *syntax.Prog) bool {returnlen(.Inst) <= maxBacktrackProg}// reset resets the state of the backtracker.// end is the end position in the input.// ncap is the number of captures.func ( *bitState) ( *syntax.Prog, int, int) { .end = ifcap(.jobs) == 0 { .jobs = make([]job, 0, 256) } else { .jobs = .jobs[:0] } := (len(.Inst)*(+1) + visitedBits - 1) / visitedBitsifcap(.visited) < { .visited = make([]uint32, , maxBacktrackVector/visitedBits) } else { .visited = .visited[:]clear(.visited) // set to 0 }ifcap(.cap) < { .cap = make([]int, ) } else { .cap = .cap[:] }for := range .cap { .cap[] = -1 }ifcap(.matchcap) < { .matchcap = make([]int, ) } else { .matchcap = .matchcap[:] }for := range .matchcap { .matchcap[] = -1 }}// shouldVisit reports whether the combination of (pc, pos) has not// been visited yet.func ( *bitState) ( uint32, int) bool { := uint(int()*(.end+1) + )if .visited[/visitedBits]&(1<<(&(visitedBits-1))) != 0 {returnfalse } .visited[/visitedBits] |= 1 << ( & (visitedBits - 1))returntrue}// push pushes (pc, pos, arg) onto the job stack if it should be// visited.func ( *bitState) ( *Regexp, uint32, int, bool) {// Only check shouldVisit when arg is false. // When arg is true, we are continuing a previous visit.if .prog.Inst[].Op != syntax.InstFail && ( || .shouldVisit(, )) { .jobs = append(.jobs, job{pc: , arg: , pos: }) }}// tryBacktrack runs a backtracking search starting at pos.func ( *Regexp) ( *bitState, input, uint32, int) bool { := .longest .push(, , , false)forlen(.jobs) > 0 { := len(.jobs) - 1// Pop job off the stack. := .jobs[].pc := .jobs[].pos := .jobs[].arg .jobs = .jobs[:]// Optimization: rather than push and pop, // code that is going to Push and continue // the loop simply updates ip, p, and arg // and jumps to CheckAndLoop. We have to // do the ShouldVisit check that Push // would have, but we avoid the stack // manipulation.goto :if !.shouldVisit(, ) {continue } : := &.prog.Inst[]switch .Op {default:panic("bad inst")casesyntax.InstFail:panic("unexpected InstFail")casesyntax.InstAlt:// Cannot just // b.push(inst.Out, pos, false) // b.push(inst.Arg, pos, false) // If during the processing of inst.Out, we encounter // inst.Arg via another path, we want to process it then. // Pushing it here will inhibit that. Instead, re-push // inst with arg==true as a reminder to push inst.Arg out // later.if {// Finished inst.Out; try inst.Arg. = false = .Arggoto } else { .push(, , , true) = .Outgoto }casesyntax.InstAltMatch:// One opcode consumes runes; the other leads to match.switch .prog.Inst[.Out].Op {casesyntax.InstRune, syntax.InstRune1, syntax.InstRuneAny, syntax.InstRuneAnyNotNL:// inst.Arg is the match. .push(, .Arg, , false) = .Arg = .endgoto }// inst.Out is the match - non-greedy .push(, .Out, .end, false) = .Outgotocasesyntax.InstRune: , := .step()if !.MatchRune() {continue } += = .Outgotocasesyntax.InstRune1: , := .step()if != .Rune[0] {continue } += = .Outgotocasesyntax.InstRuneAnyNotNL: , := .step()if == '\n' || == endOfText {continue } += = .Outgotocasesyntax.InstRuneAny: , := .step()if == endOfText {continue } += = .Outgotocasesyntax.InstCapture:if {// Finished inst.Out; restore the old value. .cap[.Arg] = continue } else {if .Arg < uint32(len(.cap)) {// Capture pos to register, but save old value. .push(, , .cap[.Arg], true) // come back when we're done. .cap[.Arg] = } = .Outgoto }casesyntax.InstEmptyWidth: := .context()if !.match(syntax.EmptyOp(.Arg)) {continue } = .Outgotocasesyntax.InstNop: = .Outgotocasesyntax.InstMatch:// We found a match. If the caller doesn't care // where the match is, no point going further.iflen(.cap) == 0 {returntrue }// Record best match so far. // Only need to check end point, because this entire // call is only considering one start position.iflen(.cap) > 1 { .cap[1] = }if := .matchcap[1]; == -1 || ( && > 0 && > ) {copy(.matchcap, .cap) }// If going for first match, we're done.if ! {returntrue }// If we used the entire text, no longer match is possible.if == .end {returntrue }// Otherwise, continue on in hope of a longer match.continue } }return && len(.matchcap) > 1 && .matchcap[1] >= 0}// backtrack runs a backtracking search of prog on the input starting at pos.func ( *Regexp) ( []byte, string, int, int, []int) []int { := .condif == ^syntax.EmptyOp(0) { // impossiblereturnnil }if &syntax.EmptyBeginText != 0 && != 0 {// Anchored match, past beginning of text.returnnil } := newBitState() , := .inputs.init(nil, , ) .reset(.prog, , )// Anchored search must start at the beginning of the inputif &syntax.EmptyBeginText != 0 {iflen(.cap) > 0 { .cap[0] = }if !.tryBacktrack(, , uint32(.prog.Start), ) {freeBitState()returnnil } } else {// Unanchored search, starting from each possible text position. // Notice that we have to try the empty string at the end of // the text, so the loop condition is pos <= end, not pos < end. // This looks like it's quadratic in the size of the text, // but we are not clearing visited between calls to TrySearch, // so no work is duplicated and it ends up still being linear. := -1for ; <= && != 0; += {iflen(.prefix) > 0 {// Match requires literal prefix; fast search for it. := .index(, )if < 0 {freeBitState()returnnil } += }iflen(.cap) > 0 { .cap[0] = }if .tryBacktrack(, , uint32(.prog.Start), ) {// Match must be leftmost; done.goto } _, = .step() }freeBitState()returnnil }: = append(, .matchcap...)freeBitState()return}
The pages are generated with Goldsv0.7.3. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds.