// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package bisect can be used by compilers and other programs // to serve as a target for the bisect debugging tool. // See [golang.org/x/tools/cmd/bisect] for details about using the tool. // // To be a bisect target, allowing bisect to help determine which of a set of independent // changes provokes a failure, a program needs to: // // 1. Define a way to accept a change pattern on its command line or in its environment. // The most common mechanism is a command-line flag. // The pattern can be passed to [New] to create a [Matcher], the compiled form of a pattern. // // 2. Assign each change a unique ID. One possibility is to use a sequence number, // but the most common mechanism is to hash some kind of identifying information // like the file and line number where the change might be applied. // [Hash] hashes its arguments to compute an ID. // // 3. Enable each change that the pattern says should be enabled. // The [Matcher.ShouldEnable] method answers this question for a given change ID. // // 4. Print a report identifying each change that the pattern says should be printed. // The [Matcher.ShouldPrint] method answers this question for a given change ID. // The report consists of one more lines on standard error or standard output // that contain a “match marker”. [Marker] returns the match marker for a given ID. // When bisect reports a change as causing the failure, it identifies the change // by printing the report lines with the match marker removed. // // # Example Usage // // A program starts by defining how it receives the pattern. In this example, we will assume a flag. // The next step is to compile the pattern: // // m, err := bisect.New(patternFlag) // if err != nil { // log.Fatal(err) // } // // Then, each time a potential change is considered, the program computes // a change ID by hashing identifying information (source file and line, in this case) // and then calls m.ShouldPrint and m.ShouldEnable to decide whether to // print and enable the change, respectively. The two can return different values // depending on whether bisect is trying to find a minimal set of changes to // disable or to enable to provoke the failure. // // It is usually helpful to write a helper function that accepts the identifying information // and then takes care of hashing, printing, and reporting whether the identified change // should be enabled. For example, a helper for changes identified by a file and line number // would be: // // func ShouldEnable(file string, line int) { // h := bisect.Hash(file, line) // if m.ShouldPrint(h) { // fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line) // } // return m.ShouldEnable(h) // } // // Finally, note that New returns a nil Matcher when there is no pattern, // meaning that the target is not running under bisect at all, // so all changes should be enabled and none should be printed. // In that common case, the computation of the hash can be avoided entirely // by checking for m == nil first: // // func ShouldEnable(file string, line int) bool { // if m == nil { // return true // } // h := bisect.Hash(file, line) // if m.ShouldPrint(h) { // fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line) // } // return m.ShouldEnable(h) // } // // When the identifying information is expensive to format, this code can call // [Matcher.MarkerOnly] to find out whether short report lines containing only the // marker are permitted for a given run. (Bisect permits such lines when it is // still exploring the space of possible changes and will not be showing the // output to the user.) If so, the client can choose to print only the marker: // // func ShouldEnable(file string, line int) bool { // if m == nil { // return true // } // h := bisect.Hash(file, line) // if m.ShouldPrint(h) { // if m.MarkerOnly() { // bisect.PrintMarker(os.Stderr, h) // } else { // fmt.Fprintf(os.Stderr, "%v %s:%d\n", bisect.Marker(h), file, line) // } // } // return m.ShouldEnable(h) // } // // This specific helper – deciding whether to enable a change identified by // file and line number and printing about the change when necessary – is // provided by the [Matcher.FileLine] method. // // Another common usage is deciding whether to make a change in a function // based on the caller's stack, to identify the specific calling contexts that the // change breaks. The [Matcher.Stack] method takes care of obtaining the stack, // printing it when necessary, and reporting whether to enable the change // based on that stack. // // # Pattern Syntax // // Patterns are generated by the bisect tool and interpreted by [New]. // Users should not have to understand the patterns except when // debugging a target's bisect support or debugging the bisect tool itself. // // The pattern syntax selecting a change is a sequence of bit strings // separated by + and - operators. Each bit string denotes the set of // changes with IDs ending in those bits, + is set addition, - is set subtraction, // and the expression is evaluated in the usual left-to-right order. // The special binary number “y” denotes the set of all changes, // standing in for the empty bit string. // In the expression, all the + operators must appear before all the - operators. // A leading + adds to an empty set. A leading - subtracts from the set of all // possible suffixes. // // For example: // // - “01+10” and “+01+10” both denote the set of changes // with IDs ending with the bits 01 or 10. // // - “01+10-1001” denotes the set of changes with IDs // ending with the bits 01 or 10, but excluding those ending in 1001. // // - “-01-1000” and “y-01-1000 both denote the set of all changes // with IDs not ending in 01 nor 1000. // // - “0+1-01+001” is not a valid pattern, because all the + operators do not // appear before all the - operators. // // In the syntaxes described so far, the pattern specifies the changes to // enable and report. If a pattern is prefixed by a “!”, the meaning // changes: the pattern specifies the changes to DISABLE and report. This // mode of operation is needed when a program passes with all changes // enabled but fails with no changes enabled. In this case, bisect // searches for minimal sets of changes to disable. // Put another way, the leading “!” inverts the result from [Matcher.ShouldEnable] // but does not invert the result from [Matcher.ShouldPrint]. // // As a convenience for manual debugging, “n” is an alias for “!y”, // meaning to disable and report all changes. // // Finally, a leading “v” in the pattern indicates that the reports will be shown // to the user of bisect to describe the changes involved in a failure. // At the API level, the leading “v” causes [Matcher.Visible] to return true. // See the next section for details. // // # Match Reports // // The target program must enable only those changed matched // by the pattern, and it must print a match report for each such change. // A match report consists of one or more lines of text that will be // printed by the bisect tool to describe a change implicated in causing // a failure. Each line in the report for a given change must contain a // match marker with that change ID, as returned by [Marker]. // The markers are elided when displaying the lines to the user. // // A match marker has the form “[bisect-match 0x1234]” where // 0x1234 is the change ID in hexadecimal. // An alternate form is “[bisect-match 010101]”, giving the change ID in binary. // // When [Matcher.Visible] returns false, the match reports are only // being processed by bisect to learn the set of enabled changes, // not shown to the user, meaning that each report can be a match // marker on a line by itself, eliding the usual textual description. // When the textual description is expensive to compute, // checking [Matcher.Visible] can help the avoid that expense // in most runs.
package bisect import ( ) // New creates and returns a new Matcher implementing the given pattern. // The pattern syntax is defined in the package doc comment. // // In addition to the pattern syntax syntax, New("") returns nil, nil. // The nil *Matcher is valid for use: it returns true from ShouldEnable // and false from ShouldPrint for all changes. Callers can avoid calling // [Hash], [Matcher.ShouldEnable], and [Matcher.ShouldPrint] entirely // when they recognize the nil Matcher. func ( string) (*Matcher, error) { if == "" { return nil, nil } := new(Matcher) := // Special case for leading 'q' so that 'qn' quietly disables, e.g. fmahash=qn to disable fma // Any instance of 'v' disables 'q'. if len() > 0 && [0] == 'q' { .quiet = true = [1:] if == "" { return nil, &parseError{"invalid pattern syntax: " + } } } // Allow multiple v, so that “bisect cmd vPATTERN” can force verbose all the time. for len() > 0 && [0] == 'v' { .verbose = true .quiet = false = [1:] if == "" { return nil, &parseError{"invalid pattern syntax: " + } } } // Allow multiple !, each negating the last, so that “bisect cmd !PATTERN” works // even when bisect chooses to add its own !. .enable = true for len() > 0 && [0] == '!' { .enable = !.enable = [1:] if == "" { return nil, &parseError{"invalid pattern syntax: " + } } } if == "n" { // n is an alias for !y. .enable = !.enable = "y" } // Parse actual pattern syntax. := true := uint64(0) := 0 := 1 // 1-bit (binary); sometimes 4-bit (hex) for := 0; <= len(); ++ { // Imagine a trailing - at the end of the pattern to flush final suffix := byte('-') if < len() { = [] } if == && == 1 && == 'x' { // leading x for hex = + 1 = 4 continue } switch { default: return nil, &parseError{"invalid pattern syntax: " + } case '2', '3', '4', '5', '6', '7', '8', '9': if != 4 { return nil, &parseError{"invalid pattern syntax: " + } } fallthrough case '0', '1': <<= |= uint64( - '0') case 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F': if != 4 { return nil, &parseError{"invalid pattern syntax: " + } } <<= 4 |= uint64(&^0x20 - 'A' + 10) case 'y': if +1 < len() && ([+1] == '0' || [+1] == '1') { return nil, &parseError{"invalid pattern syntax: " + } } = 0 case '+', '-': if == '+' && == false { // Have already seen a -. Should be - from here on. return nil, &parseError{"invalid pattern syntax (+ after -): " + } } if > 0 { := ( - ) * if > 64 { return nil, &parseError{"pattern bits too long: " + } } if <= 0 { return nil, &parseError{"invalid pattern syntax: " + } } if [] == 'y' { = 0 } := uint64(1)<< - 1 .list = append(.list, cond{, , }) } else if == '-' { // leading - subtracts from complete set .list = append(.list, cond{0, 0, true}) } = 0 = == '+' = + 1 = 1 } } return , nil } // A Matcher is the parsed, compiled form of a PATTERN string. // The nil *Matcher is valid: it has all changes enabled but none reported. type Matcher struct { verbose bool // annotate reporting with human-helpful information quiet bool // disables all reporting. reset if verbose is true. use case is -d=fmahash=qn enable bool // when true, list is for “enable and report” (when false, “disable and report”) list []cond // conditions; later ones win over earlier ones dedup atomicPointerDedup } // atomicPointerDedup is an atomic.Pointer[dedup], // but we are avoiding using Go 1.19's atomic.Pointer // until the bootstrap toolchain can be relied upon to have it. type atomicPointerDedup struct { p unsafe.Pointer } func ( *atomicPointerDedup) () *dedup { return (*dedup)(atomic.LoadPointer(&.p)) } func ( *atomicPointerDedup) (, *dedup) bool { return atomic.CompareAndSwapPointer(&.p, unsafe.Pointer(), unsafe.Pointer()) } // A cond is a single condition in the matcher. // Given an input id, if id&mask == bits, return the result. type cond struct { mask uint64 bits uint64 result bool } // MarkerOnly reports whether it is okay to print only the marker for // a given change, omitting the identifying information. // MarkerOnly returns true when bisect is using the printed reports // only for an intermediate search step, not for showing to users. func ( *Matcher) () bool { return !.verbose } // ShouldEnable reports whether the change with the given id should be enabled. func ( *Matcher) ( uint64) bool { if == nil { return true } return .matchResult() == .enable } // ShouldPrint reports whether to print identifying information about the change with the given id. func ( *Matcher) ( uint64) bool { if == nil || .quiet { return false } return .matchResult() } // matchResult returns the result from the first condition that matches id. func ( *Matcher) ( uint64) bool { for := len(.list) - 1; >= 0; -- { := &.list[] if &.mask == .bits { return .result } } return false } // FileLine reports whether the change identified by file and line should be enabled. // If the change should be printed, FileLine prints a one-line report to w. func ( *Matcher) ( Writer, string, int) bool { if == nil { return true } return .fileLine(, , ) } // fileLine does the real work for FileLine. // This lets FileLine's body handle m == nil and potentially be inlined. func ( *Matcher) ( Writer, string, int) bool { := Hash(, ) if .ShouldPrint() { if .MarkerOnly() { PrintMarker(, ) } else { printFileLine(, , , ) } } return .ShouldEnable() } // printFileLine prints a non-marker-only report for file:line to w. func printFileLine( Writer, uint64, string, int) error { const = 40 // overestimate := make([]byte, 0, +len()+24) = AppendMarker(, ) = appendFileLine(, , ) = append(, '\n') , := .Write() return } // appendFileLine appends file:line to dst, returning the extended slice. func appendFileLine( []byte, string, int) []byte { = append(, ...) = append(, ':') := uint() if < 0 { = append(, '-') = - } var [24]byte := len() for == len() || > 0 { -- [] = '0' + byte(%10) /= 10 } = append(, [:]...) return } // MatchStack assigns the current call stack a change ID. // If the stack should be printed, MatchStack prints it. // Then MatchStack reports whether a change at the current call stack should be enabled. func ( *Matcher) ( Writer) bool { if == nil { return true } return .stack() } // stack does the real work for Stack. // This lets stack's body handle m == nil and potentially be inlined. func ( *Matcher) ( Writer) bool { const = 16 var []uintptr := runtime.Callers(2, [:]) // caller #2 is not for printing; need it to normalize PCs if ASLR. if <= 1 { return false } := [0] // normalize PCs for := range [:] { [] -= } := Hash([:]) if .ShouldPrint() { var *dedup for { = .dedup.Load() if != nil { break } = new(dedup) if .dedup.CompareAndSwap(nil, ) { break } } if .MarkerOnly() { if !.seenLossy() { PrintMarker(, ) } } else { if !.seen() { // Restore PCs in stack for printing for := range [:] { [] += } printStack(, , [1:]) } } } return .ShouldEnable() } // Writer is the same interface as io.Writer. // It is duplicated here to avoid importing io. type Writer interface { Write([]byte) (int, error) } // PrintMarker prints to w a one-line report containing only the marker for h. // It is appropriate to use when [Matcher.ShouldPrint] and [Matcher.MarkerOnly] both return true. func ( Writer, uint64) error { var [50]byte := AppendMarker([:0], ) = append(, '\n') , := .Write() return } // printStack prints to w a multi-line report containing a formatting of the call stack stk, // with each line preceded by the marker for h. func printStack( Writer, uint64, []uintptr) error { := make([]byte, 0, 2048) var [100]byte := AppendMarker([:0], ) := runtime.CallersFrames() for { , := .Next() = append(, ...) = append(, .Func.Name()...) = append(, "()\n"...) = append(, ...) = append(, '\t') = appendFileLine(, .File, .Line) = append(, '\n') if ! { break } } = append(, ...) = append(, '\n') , := .Write() return } // Marker returns the match marker text to use on any line reporting details // about a match of the given ID. // It always returns the hexadecimal format. func ( uint64) string { return string(AppendMarker(nil, )) } // AppendMarker is like [Marker] but appends the marker to dst. func ( []byte, uint64) []byte { const = "[bisect-match 0x" var [len() + 16 + 1]byte copy([:], ) for := 0; < 16; ++ { [len()+] = "0123456789abcdef"[>>60] <<= 4 } [len()+16] = ']' return append(, [:]...) } // CutMarker finds the first match marker in line and removes it, // returning the shortened line (with the marker removed), // the ID from the match marker, // and whether a marker was found at all. // If there is no marker, CutMarker returns line, 0, false. func ( string) ( string, uint64, bool) { // Find first instance of prefix. := "[bisect-match " := 0 for ; ; ++ { if >= len()-len() { return , 0, false } if [] == '[' && [:+len()] == { break } } // Scan to ]. := + len() for < len() && [] != ']' { ++ } if >= len() { return , 0, false } // Parse id. := [+len() : ] if len() >= 3 && [:2] == "0x" { // parse hex if len() > 2+16 { // max 0x + 16 digits return , 0, false } for := 2; < len(); ++ { <<= 4 switch := []; { case '0' <= && <= '9': |= uint64( - '0') case 'a' <= && <= 'f': |= uint64( - 'a' + 10) case 'A' <= && <= 'F': |= uint64( - 'A' + 10) } } } else { if == "" || len() > 64 { // min 1 digit, max 64 digits return , 0, false } // parse binary for := 0; < len(); ++ { <<= 1 switch := []; { default: return , 0, false case '0', '1': |= uint64( - '0') } } } // Construct shortened line. // Remove at most one space from around the marker, // so that "foo [marker] bar" shortens to "foo bar". ++ // skip ] if > 0 && [-1] == ' ' { -- } else if < len() && [] == ' ' { ++ } = [:] + [:] return , , true } // Hash computes a hash of the data arguments, // each of which must be of type string, byte, int, uint, int32, uint32, int64, uint64, uintptr, or a slice of one of those types. func ( ...any) uint64 { := offset64 for , := range { switch v := .(type) { default: // Note: Not printing the type, because reflect.ValueOf(v) // would make the interfaces prepared by the caller escape // and therefore allocate. This way, Hash(file, line) runs // without any allocation. It should be clear from the // source code calling Hash what the bad argument was. panic("bisect.Hash: unexpected argument type") case string: = fnvString(, ) case byte: = fnv(, ) case int: = fnvUint64(, uint64()) case uint: = fnvUint64(, uint64()) case int32: = fnvUint32(, uint32()) case uint32: = fnvUint32(, ) case int64: = fnvUint64(, uint64()) case uint64: = fnvUint64(, ) case uintptr: = fnvUint64(, uint64()) case []string: for , := range { = fnvString(, ) } case []byte: for , := range { = fnv(, ) } case []int: for , := range { = fnvUint64(, uint64()) } case []uint: for , := range { = fnvUint64(, uint64()) } case []int32: for , := range { = fnvUint32(, uint32()) } case []uint32: for , := range { = fnvUint32(, ) } case []int64: for , := range { = fnvUint64(, uint64()) } case []uint64: for , := range { = fnvUint64(, ) } case []uintptr: for , := range { = fnvUint64(, uint64()) } } } return } // Trivial error implementation, here to avoid importing errors. // parseError is a trivial error implementation, // defined here to avoid importing errors. type parseError struct{ text string } func ( *parseError) () string { return .text } // FNV-1a implementation. See Go's hash/fnv/fnv.go. // Copied here for simplicity (can handle integers more directly) // and to avoid importing hash/fnv. const ( offset64 uint64 = 14695981039346656037 prime64 uint64 = 1099511628211 ) func fnv( uint64, byte) uint64 { ^= uint64() *= prime64 return } func fnvString( uint64, string) uint64 { for := 0; < len(); ++ { ^= uint64([]) *= prime64 } return } func fnvUint64( uint64, uint64) uint64 { for := 0; < 8; ++ { ^= & 0xFF >>= 8 *= prime64 } return } func fnvUint32( uint64, uint32) uint64 { for := 0; < 4; ++ { ^= uint64( & 0xFF) >>= 8 *= prime64 } return } // A dedup is a deduplicator for call stacks, so that we only print // a report for new call stacks, not for call stacks we've already // reported. // // It has two modes: an approximate but lock-free mode that // may still emit some duplicates, and a precise mode that uses // a lock and never emits duplicates. type dedup struct { // 128-entry 4-way, lossy cache for seenLossy recent [128][4]uint64 // complete history for seen mu sync.Mutex m map[uint64]bool } // seen records that h has now been seen and reports whether it was seen before. // When seen returns false, the caller is expected to print a report for h. func ( *dedup) ( uint64) bool { .mu.Lock() if .m == nil { .m = make(map[uint64]bool) } := .m[] .m[] = true .mu.Unlock() return } // seenLossy is a variant of seen that avoids a lock by using a cache of recently seen hashes. // Each cache entry is N-way set-associative: h can appear in any of the slots. // If h does not appear in any of them, then it is inserted into a random slot, // overwriting whatever was there before. func ( *dedup) ( uint64) bool { := &.recent[uint()%uint(len(.recent))] for := 0; < len(); ++ { if atomic.LoadUint64(&[]) == { return true } } // Compute index in set to evict as hash of current set. := offset64 for , := range { = fnvUint64(, ) } atomic.StoreUint64(&[uint()%uint(len())], ) return false }