// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package oldtrace implements a parser for Go execution traces from versions // 1.11–1.21. // // The package started as a copy of Go 1.19's internal/trace, but has been // optimized to be faster while using less memory and fewer allocations. It has // been further modified for the specific purpose of converting traces to the // new 1.22+ format.
package oldtrace import ( ) // Timestamp represents a count of nanoseconds since the beginning of the trace. // They can only be meaningfully compared with other timestamps from the same // trace. type Timestamp int64 // Event describes one event in the trace. type Event struct { // The Event type is carefully laid out to optimize its size and to avoid // pointers, the latter so that the garbage collector won't have to scan any // memory of our millions of events. Ts Timestamp // timestamp in nanoseconds G uint64 // G on which the event happened Args [4]uint64 // event-type-specific arguments StkID uint32 // unique stack ID P int32 // P on which the event happened (can be a real P or one of TimerP, NetpollP, SyscallP) Type event.Type // one of Ev* } // Frame is a frame in stack traces. type Frame struct { PC uint64 // string ID of the function name Fn uint64 // string ID of the file name File uint64 Line int } const ( // Special P identifiers: FakeP = 1000000 + iota TimerP // contains timer unblocks NetpollP // contains network unblocks SyscallP // contains returns from syscalls GCP // contains GC state ProfileP // contains recording of CPU profile samples ) // Trace is the result of Parse. type Trace struct { Version version.Version // Events is the sorted list of Events in the trace. Events Events // Stacks is the stack traces (stored as slices of PCs), keyed by stack IDs // from the trace. Stacks map[uint32][]uint64 PCs map[uint64]Frame Strings map[uint64]string InlineStrings []string } // batchOffset records the byte offset of, and number of events in, a batch. A // batch is a sequence of events emitted by a P. Events within a single batch // are sorted by time. type batchOffset struct { offset int numEvents int } type parser struct { ver version.Version data []byte off int strings map[uint64]string inlineStrings []string inlineStringsMapping map[string]int // map from Ps to their batch offsets batchOffsets map[int32][]batchOffset stacks map[uint32][]uint64 stacksData []uint64 ticksPerSec int64 pcs map[uint64]Frame cpuSamples []Event timerGoids map[uint64]bool // state for readRawEvent args []uint64 // state for parseEvent lastTs Timestamp lastG uint64 // map from Ps to the last Gs that ran on them lastGs map[int32]uint64 lastP int32 } func ( *parser) ( uint64) bool { if > math.MaxInt { return false } if := .off + int(); < .off || > len(.data) { return false } else { .off = } return true } func newParser( io.Reader, version.Version) (*parser, error) { var []byte if , := .(io.Seeker); { // Determine the size of the reader so that we can allocate a buffer // without having to grow it later. , := .Seek(0, io.SeekCurrent) if != nil { return nil, } , := .Seek(0, io.SeekEnd) if != nil { return nil, } _, = .Seek(, io.SeekStart) if != nil { return nil, } = make([]byte, -) _, = io.ReadFull(, ) if != nil { return nil, } } else { var error , = io.ReadAll() if != nil { return nil, } } return &parser{data: , ver: , timerGoids: make(map[uint64]bool)}, nil } // Parse parses Go execution traces from versions 1.11–1.21. The provided reader // will be read to completion and the entire trace will be materialized in // memory. That is, this function does not allow incremental parsing. // // The reader has to be positioned just after the trace header and vers needs to // be the version of the trace. This can be achieved by using // version.ReadHeader. func ( io.Reader, version.Version) (Trace, error) { // We accept the version as an argument because internal/trace will have // already read the version to determine which parser to use. , := newParser(, ) if != nil { return Trace{}, } return .parse() } // parse parses, post-processes and verifies the trace. func ( *parser) () (Trace, error) { defer func() { .data = nil }() // We parse a trace by running the following steps in order: // // 1. In the initial pass we collect information about batches (their // locations and sizes.) We also parse CPU profiling samples in this // step, simply to reduce the number of full passes that we need. // // 2. In the second pass we parse batches and merge them into a globally // ordered event stream. This uses the batch information from the first // pass to quickly find batches. // // 3. After all events have been parsed we convert their timestamps from CPU // ticks to wall time. Furthermore we move timers and syscalls to // dedicated, fake Ps. // // 4. Finally, we validate the trace. .strings = make(map[uint64]string) .batchOffsets = make(map[int32][]batchOffset) .lastGs = make(map[int32]uint64) .stacks = make(map[uint32][]uint64) .pcs = make(map[uint64]Frame) .inlineStringsMapping = make(map[string]int) if := .collectBatchesAndCPUSamples(); != nil { return Trace{}, } , := .parseEventBatches() if != nil { return Trace{}, } if .ticksPerSec == 0 { return Trace{}, errors.New("no EvFrequency event") } if .Len() > 0 { // Translate cpu ticks to real time. := .Ptr(0).Ts // Use floating point to avoid integer overflows. := 1e9 / float64(.ticksPerSec) for := 0; < .Len(); ++ { := .Ptr() .Ts = Timestamp(float64(.Ts-) * ) // Move timers and syscalls to separate fake Ps. if .timerGoids[.G] && .Type == EvGoUnblock { .P = TimerP } if .Type == EvGoSysExit { .P = SyscallP } } } if := .postProcessTrace(); != nil { return Trace{}, } := Trace{ Version: .ver, Events: , Stacks: .stacks, Strings: .strings, InlineStrings: .inlineStrings, PCs: .pcs, } return , nil } // rawEvent is a helper type used during parsing. type rawEvent struct { typ event.Type args []uint64 sargs []string // if typ == EvBatch, these fields describe the batch. batchPid int32 batchOffset int } type proc struct { pid int32 // the remaining events in the current batch events []Event // buffer for reading batches into, aliased by proc.events buf []Event // there are no more batches left done bool } const eventsBucketSize = 524288 // 32 MiB of events type Events struct { // Events is a slice of slices that grows one slice of size eventsBucketSize // at a time. This avoids the O(n) cost of slice growth in append, and // additionally allows consumers to drop references to parts of the data, // freeing memory piecewise. n int buckets []*[eventsBucketSize]Event off int } // grow grows the slice by one and returns a pointer to the new element, without // overwriting it. func ( *Events) () *Event { , := .index(.n) if >= len(.buckets) { .buckets = append(.buckets, new([eventsBucketSize]Event)) } := &.buckets[][] .n++ return } // append appends v to the slice and returns a pointer to the new element. func ( *Events) ( Event) *Event { := .grow() * = return } func ( *Events) ( int) *Event { , := .index( + .off) return &.buckets[][] } func ( *Events) ( int) (int, int) { // Doing the division on uint instead of int compiles this function to a // shift and an AND (for power of 2 bucket sizes), versus a whole bunch of // instructions for int. return int(uint() / eventsBucketSize), int(uint() % eventsBucketSize) } func ( *Events) () int { return .n - .off } func ( *Events) (, int) bool { return .Ptr().Ts < .Ptr().Ts } func ( *Events) (, int) { *.Ptr(), *.Ptr() = *.Ptr(), *.Ptr() } func ( *Events) () (*Event, bool) { if .off == .n { return nil, false } , := .index(.off) := &.buckets[][] .off++ if == eventsBucketSize-1 || .off == .n { // We've consumed the last event from the bucket, so drop the bucket and // allow GC to collect it. .buckets[] = nil } return , true } func ( *Events) () func( func( *Event) bool) { return func( func( *Event) bool) { for := 0; < .Len(); ++ { , := .index( + .off) := &.buckets[][] if !() { return } } } } // parseEventBatches reads per-P event batches and merges them into a single, consistent // stream. The high level idea is as follows. Events within an individual batch // are in correct order, because they are emitted by a single P. So we need to // produce a correct interleaving of the batches. To do this we take first // unmerged event from each batch (frontier). Then choose subset that is "ready" // to be merged, that is, events for which all dependencies are already merged. // Then we choose event with the lowest timestamp from the subset, merge it and // repeat. This approach ensures that we form a consistent stream even if // timestamps are incorrect (condition observed on some machines). func ( *parser) () (Events, error) { // The ordering of CPU profile sample events in the data stream is based on // when each run of the signal handler was able to acquire the spinlock, // with original timestamps corresponding to when ReadTrace pulled the data // off of the profBuf queue. Re-sort them by the timestamp we captured // inside the signal handler. slices.SortFunc(.cpuSamples, func(, Event) int { return cmp.Compare(.Ts, .Ts) }) := make([]proc, 0, len(.batchOffsets)) for := range .batchOffsets { = append(, proc{pid: }) } = append(, proc{pid: ProfileP, events: .cpuSamples}) := Events{} // Merge events as long as at least one P has more events := make(map[uint64]gState) // Note: technically we don't need a priority queue here. We're only ever // interested in the earliest eligible event, which means we just have to // track the smallest element. However, in practice, the priority queue // performs better, because for each event we only have to compute its state // transition once, not on each iteration. If it was eligible before, it'll // already be in the queue. Furthermore, on average, we only have one P to // look at in each iteration, because all other Ps are already in the queue. var orderEventList := make([]*proc, len()) for := range { [] = &[] } for { : for := 0; < len(); ++ { := [] for len(.events) == 0 { // Call loadBatch in a loop because sometimes batches are empty , := .loadBatch(.pid, .buf[:0]) .buf = [:0] if == io.EOF { // This P has no more events .done = true [], [len()-1] = [len()-1], [] = [:len()-1] // We swapped the element at i with another proc, so look at // the index again -- continue } else if != nil { return Events{}, } else { .events = } } := &.events[0] , , := stateTransition() // TODO(dh): This implementation matches the behavior of the // upstream 'go tool trace', and works in practice, but has run into // the following inconsistency during fuzzing: what happens if // multiple Ps have events for the same G? While building the // frontier we will check all of the events against the current // state of the G. However, when we process the frontier, the state // of the G changes, and a transition that was valid while building // the frontier may no longer be valid when processing the frontier. // Is this something that can happen for real, valid traces, or is // this only possible with corrupt data? if !transitionReady(, [], ) { continue } .events = .events[1:] [], [len()-1] = [len()-1], [] = [:len()-1] .Push(orderEvent{*, }) // We swapped the element at i with another proc, so look at the // index again -- } if len() == 0 { for := range { if ![].done { return Events{}, fmt.Errorf("no consistent ordering of events possible") } } break } := .Pop() // We're computing the state transition twice, once when computing the // frontier, and now to apply the transition. This is fine because // stateTransition is a pure function. Computing it again is cheaper // than storing large items in the frontier. , , := stateTransition(&.ev) // Get rid of "Local" events, they are intended merely for ordering. switch .ev.Type { case EvGoStartLocal: .ev.Type = EvGoStart case EvGoUnblockLocal: .ev.Type = EvGoUnblock case EvGoSysExitLocal: .ev.Type = EvGoSysExit } .append(.ev) if := transition(, , , ); != nil { return Events{}, } = append(, .proc) } // At this point we have a consistent stream of events. Make sure time // stamps respect the ordering. The tests will skip (not fail) the test case // if they see this error. if !sort.IsSorted(&) { return Events{}, ErrTimeOrder } // The last part is giving correct timestamps to EvGoSysExit events. The // problem with EvGoSysExit is that actual syscall exit timestamp // (ev.Args[2]) is potentially acquired long before event emission. So far // we've used timestamp of event emission (ev.Ts). We could not set ev.Ts = // ev.Args[2] earlier, because it would produce seemingly broken timestamps // (misplaced event). We also can't simply update the timestamp and resort // events, because if timestamps are broken we will misplace the event and // later report logically broken trace (instead of reporting broken // timestamps). := make(map[uint64]Timestamp) for := 0; < .Len(); ++ { := .Ptr() switch .Type { case EvGoSysBlock, EvGoInSyscall: [.G] = .Ts case EvGoSysExit: := Timestamp(.Args[2]) if == 0 { continue } := [.G] if == 0 { return Events{}, fmt.Errorf("stray syscall exit") } if < { return Events{}, ErrTimeOrder } .Ts = } } sort.Stable(&) return , nil } // collectBatchesAndCPUSamples records the offsets of batches and parses CPU samples. func ( *parser) () error { // Read events. var rawEvent var int32 for := uint64(0); ; ++ { := .readRawEvent(skipArgs|skipStrings, &) if == io.EOF { break } if != nil { return } if .typ == EvNone { continue } if .typ == EvBatch { := batchOffset{offset: .batchOffset} .batchOffsets[.batchPid] = append(.batchOffsets[.batchPid], ) = .batchPid } := .batchOffsets[] if len() == 0 { return fmt.Errorf("read event %d with current P of %d, but P has no batches yet", .typ, ) } [len()-1].numEvents++ if .typ == EvCPUSample { := Event{Type: .typ} := 1 := .argNum() if len(.args) != { return fmt.Errorf("CPU sample has wrong number of arguments: want %d, got %d", , len(.args)) } for := ; < ; ++ { if == -1 { .StkID = uint32(.args[]) } else { .Args[-] = .args[] } } .Ts = Timestamp(.Args[0]) .P = int32(.Args[1]) .G = .Args[2] .Args[0] = 0 // Most events are written out by the active P at the exact moment // they describe. CPU profile samples are different because they're // written to the tracing log after some delay, by a separate worker // goroutine, into a separate buffer. // // We keep these in their own batch until all of the batches are // merged in timestamp order. We also (right before the merge) // re-sort these events by the timestamp captured in the profiling // signal handler. // // Note that we're not concerned about the memory usage of storing // all CPU samples during the indexing phase. There are orders of // magnitude fewer CPU samples than runtime events. .cpuSamples = append(.cpuSamples, ) } } return nil } const ( skipArgs = 1 << iota skipStrings ) func ( *parser) () (byte, bool) { if .off < len(.data) && .off >= 0 { := .data[.off] .off++ return , true } else { return 0, false } } func ( *parser) ( int) ([]byte, error) { if .off >= len(.data) || .off < 0 || .off+ > len(.data) { // p.off < 0 is impossible but makes BCE happy. // // We do fail outright if there's not enough data, we don't care about // partial results. return nil, io.ErrUnexpectedEOF } := .data[.off : .off+] .off += return , nil } // readRawEvent reads a raw event into ev. The slices in ev are only valid until // the next call to readRawEvent, even when storing to a different location. func ( *parser) ( uint, *rawEvent) error { // The number of arguments is encoded using two bits and can thus only // represent the values 0–3. The value 3 (on the wire) indicates that // arguments are prefixed by their byte length, to encode >=3 arguments. const = 3 // Read event type and number of arguments (1 byte). , := .readByte() if ! { return io.EOF } := event.Type( << 2 >> 2) // Most events have a timestamp before the actual arguments, so we add 1 and // parse it like it's the first argument. EvString has a special format and // the number of arguments doesn't matter. EvBatch writes '1' as the number // of arguments, but actually has two: a pid and a timestamp, but here the // timestamp is the second argument, not the first; adding 1 happens to come // up with the correct number, but it doesn't matter, because EvBatch has // custom logic for parsing. // // Note that because we're adding 1, inlineArgs == 3 describes the largest // number of logical arguments that isn't length-prefixed, even though the // value 3 on the wire indicates length-prefixing. For us, that becomes narg // == 4. := >>6 + 1 if == EvNone || >= EvCount || EventDescriptions[].minVersion > .ver { return fmt.Errorf("unknown event type %d", ) } switch { case EvString: if &skipStrings != 0 { // String dictionary entry [ID, length, string]. if , := .readVal(); != nil { return errMalformedVarint } , := .readVal() if != nil { return } if !.discard() { return fmt.Errorf("failed to read trace: %w", io.EOF) } } else { // String dictionary entry [ID, length, string]. , := .readVal() if != nil { return } if == 0 { return errors.New("string has invalid id 0") } if .strings[] != "" { return fmt.Errorf("string has duplicate id %d", ) } var uint64 , = .readVal() if != nil { return } if == 0 { return errors.New("string has invalid length 0") } if > 1e6 { return fmt.Errorf("string has too large length %d", ) } , := .readFull(int()) if != nil { return fmt.Errorf("failed to read trace: %w", ) } .strings[] = string() } .typ = EvNone return nil case EvBatch: if := byte(2); != { return fmt.Errorf("EvBatch has wrong number of arguments: got %d, want %d", , ) } // -1 because we've already read the first byte of the batch := .off - 1 , := .readVal() if != nil { return } if != math.MaxUint64 && > math.MaxInt32 { return fmt.Errorf("processor ID %d is larger than maximum of %d", , uint64(math.MaxUint)) } var int32 if == math.MaxUint64 { = -1 } else { = int32() } , := .readVal() if != nil { return } * = rawEvent{ typ: EvBatch, args: .args[:0], batchPid: , batchOffset: , } .args = append(.args, , ) return nil default: * = rawEvent{typ: , args: .args[:0]} if <= { if &skipArgs == 0 { for := 0; < int(); ++ { , := .readVal() if != nil { return fmt.Errorf("failed to read event %d argument: %w", , ) } .args = append(.args, ) } } else { for := 0; < int(); ++ { if , := .readVal(); != nil { return fmt.Errorf("failed to read event %d argument: %w", , errMalformedVarint) } } } } else { // More than inlineArgs args, the first value is length of the event // in bytes. , := .readVal() if != nil { return fmt.Errorf("failed to read event %d argument: %w", , ) } if := uint64(2048); > { // At the time of Go 1.19, v seems to be at most 128. Set 2048 // as a generous upper limit and guard against malformed traces. return fmt.Errorf("failed to read event %d argument: length-prefixed argument too big: %d bytes, limit is %d", , , ) } if &skipArgs == 0 || == EvCPUSample { , := .readFull(int()) if != nil { return fmt.Errorf("failed to read trace: %w", ) } for len() > 0 { var uint64 , , = readValFrom() if != nil { return } .args = append(.args, ) } } else { // Skip over arguments if !.discard() { return fmt.Errorf("failed to read trace: %w", io.EOF) } } if == EvUserLog { // EvUserLog records are followed by a value string if &skipArgs == 0 { // Read string , := .readStr() if != nil { return } .sargs = append(.sargs, ) } else { // Skip string , := .readVal() if != nil { return } if !.discard() { return io.EOF } } } } .args = .args[:0] return nil } } // loadBatch loads the next batch for pid and appends its contents to events. func ( *parser) ( int32, []Event) ([]Event, error) { := .batchOffsets[] if len() == 0 { return nil, io.EOF } := [0].numEvents := [0].offset = [1:] .batchOffsets[] = .off = if cap() < { = make([]Event, 0, ) } := false var rawEvent var Event for { := .readRawEvent(0, &) if == io.EOF { break } if != nil { return nil, } if .typ == EvNone || .typ == EvCPUSample { continue } if .typ == EvBatch { if { break } else { = true } } = .parseEvent(&, &) if != nil { return nil, } if .Type != EvNone { = append(, ) } } return , nil } func ( *parser) () ( string, error) { , := .readVal() if != nil { return "", } if == 0 { return "", nil } if > 1e6 { return "", fmt.Errorf("string is too large (len=%d)", ) } , := .readFull(int()) if != nil { return "", fmt.Errorf("failed to read trace: %w", ) } return string(), nil } // parseEvent transforms raw events into events. // It does analyze and verify per-event-type arguments. func ( *parser) ( *rawEvent, *Event) error { := &EventDescriptions[.typ] if .Name == "" { return fmt.Errorf("missing description for event type %d", .typ) } := .argNum() if len(.args) != { return fmt.Errorf("%s has wrong number of arguments: want %d, got %d", .Name, , len(.args)) } switch .typ { case EvBatch: .lastGs[.lastP] = .lastG if .args[0] != math.MaxUint64 && .args[0] > math.MaxInt32 { return fmt.Errorf("processor ID %d is larger than maximum of %d", .args[0], uint64(math.MaxInt32)) } if .args[0] == math.MaxUint64 { .lastP = -1 } else { .lastP = int32(.args[0]) } .lastG = .lastGs[.lastP] .lastTs = Timestamp(.args[1]) case EvFrequency: .ticksPerSec = int64(.args[0]) if .ticksPerSec <= 0 { // The most likely cause for this is tick skew on different CPUs. // For example, solaris/amd64 seems to have wildly different // ticks on different CPUs. return ErrTimeOrder } case EvTimerGoroutine: .timerGoids[.args[0]] = true case EvStack: if len(.args) < 2 { return fmt.Errorf("EvStack has wrong number of arguments: want at least 2, got %d", len(.args)) } := .args[1] if > 1000 { return fmt.Errorf("EvStack has bad number of frames: %d", ) } := 2 + 4* if uint64(len(.args)) != { return fmt.Errorf("EvStack has wrong number of arguments: want %d, got %d", , len(.args)) } := uint32(.args[0]) if != 0 && > 0 { := .allocateStack() for := 0; < int(); ++ { := .args[2+*4+0] := .args[2+*4+1] := .args[2+*4+2] := .args[2+*4+3] [] = if , := .pcs[]; ! { .pcs[] = Frame{PC: , Fn: , File: , Line: int()} } } .stacks[] = } case EvCPUSample: // These events get parsed during the indexing step and don't strictly // belong to the batch. default: * = Event{Type: .typ, P: .lastP, G: .lastG} var int .Ts = .lastTs + Timestamp(.args[0]) = 1 .lastTs = .Ts for := ; < ; ++ { if == -1 && .Stack { .StkID = uint32(.args[]) } else { .Args[-] = .args[] } } switch .typ { case EvGoStart, EvGoStartLocal, EvGoStartLabel: .lastG = .Args[0] .G = .lastG case EvGoEnd, EvGoStop, EvGoSched, EvGoPreempt, EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoSysBlock, EvGoBlockGC: .lastG = 0 case EvGoSysExit, EvGoWaiting, EvGoInSyscall: .G = .Args[0] case EvUserTaskCreate: // e.Args 0: taskID, 1:parentID, 2:nameID case EvUserRegion: // e.Args 0: taskID, 1: mode, 2:nameID case EvUserLog: // e.Args 0: taskID, 1:keyID, 2: stackID, 3: messageID // raw.sargs 0: message if , := .inlineStringsMapping[.sargs[0]]; { .Args[3] = uint64() } else { := len(.inlineStrings) .inlineStringsMapping[.sargs[0]] = .inlineStrings = append(.inlineStrings, .sargs[0]) .Args[3] = uint64() } } return nil } .Type = EvNone return nil } // ErrTimeOrder is returned by Parse when the trace contains // time stamps that do not respect actual event ordering. var ErrTimeOrder = errors.New("time stamps out of order") // postProcessTrace does inter-event verification and information restoration. // The resulting trace is guaranteed to be consistent // (for example, a P does not run two Gs at the same time, or a G is indeed // blocked before an unblock event). func ( *parser) ( Events) error { const ( = iota ) type struct { int *Event *Event *Event *Event } type struct { bool uint64 *Event } := make(map[uint64]) := make(map[int32]) := make(map[uint64]*Event) // task id to task creation events := make(map[uint64][]*Event) // goroutine id to stack of regions [0] = {: } var , *Event := func( , , *Event, bool) error { := EventDescriptions[.Type].Name if . != { return fmt.Errorf("g %d is not running while %s (time %d)", .G, , .Ts) } if . != .G { return fmt.Errorf("p %d is not running g %d while %s (time %d)", .P, .G, , .Ts) } if ! && .G == 0 { return fmt.Errorf("g 0 did %s (time %d)", , .Ts) } return nil } for := 0; < .Len(); ++ { := .Ptr() switch .Type { case EvProcStart: := [.P] if . { return fmt.Errorf("p %d is running before start (time %d)", .P, .Ts) } . = true [.P] = case EvProcStop: := [.P] if !. { return fmt.Errorf("p %d is not running before stop (time %d)", .P, .Ts) } if . != 0 { return fmt.Errorf("p %d is running a goroutine %d during stop (time %d)", .P, ., .Ts) } . = false [.P] = case EvGCStart: if != nil { return fmt.Errorf("previous GC is not ended before a new one (time %d)", .Ts) } = // Attribute this to the global GC state. .P = GCP case EvGCDone: if == nil { return fmt.Errorf("bogus GC end (time %d)", .Ts) } = nil case EvSTWStart: := & if * != nil { return fmt.Errorf("previous STW is not ended before a new one (time %d)", .Ts) } * = case EvSTWDone: := & if * == nil { return fmt.Errorf("bogus STW end (time %d)", .Ts) } * = nil case EvGCSweepStart: := [.P] if . != nil { return fmt.Errorf("previous sweeping is not ended before a new one (time %d)", .Ts) } . = [.P] = case EvGCMarkAssistStart: := [.G] if . != nil { return fmt.Errorf("previous mark assist is not ended before a new one (time %d)", .Ts) } . = [.G] = case EvGCMarkAssistDone: // Unlike most events, mark assists can be in progress when a // goroutine starts tracing, so we can't report an error here. := [.G] if . != nil { . = nil } [.G] = case EvGCSweepDone: := [.P] if . == nil { return fmt.Errorf("bogus sweeping end (time %d)", .Ts) } . = nil [.P] = case EvGoWaiting: := [.G] if . != { return fmt.Errorf("g %d is not runnable before EvGoWaiting (time %d)", .G, .Ts) } . = . = [.G] = case EvGoInSyscall: := [.G] if . != { return fmt.Errorf("g %d is not runnable before EvGoInSyscall (time %d)", .G, .Ts) } . = . = [.G] = case EvGoCreate: := [.G] := [.P] if := (, , , true); != nil { return } if , := [.Args[0]]; { return fmt.Errorf("g %d already exists (time %d)", .Args[0], .Ts) } [.Args[0]] = {: , : , : } case EvGoStart, EvGoStartLabel: := [.G] := [.P] if . != { return fmt.Errorf("g %d is not runnable before start (time %d)", .G, .Ts) } if . != 0 { return fmt.Errorf("p %d is already running g %d while start g %d (time %d)", .P, ., .G, .Ts) } . = . = . = .G if . != nil { .StkID = uint32(..Args[1]) . = nil } if . != nil { . = nil } [.G] = [.P] = case EvGoEnd, EvGoStop: := [.G] := [.P] if := (, , , false); != nil { return } . = nil . = . = 0 if .Type == EvGoEnd { // flush all active regions delete(, .G) } [.G] = [.P] = case EvGoSched, EvGoPreempt: := [.G] := [.P] if := (, , , false); != nil { return } . = . = nil . = 0 . = [.G] = [.P] = case EvGoUnblock: := [.G] := [.P] if . != { return fmt.Errorf("g %d is not running while unpark (time %d)", .G, .Ts) } if .P != TimerP && . != .G { return fmt.Errorf("p %d is not running g %d while unpark (time %d)", .P, .G, .Ts) } := [.Args[0]] if . != { return fmt.Errorf("g %d is not waiting before unpark (time %d)", .Args[0], .Ts) } if . != nil && ..Type == EvGoBlockNet { .P = NetpollP } . = . = [.Args[0]] = case EvGoSysCall: := [.G] := [.P] if := (, , , false); != nil { return } . = [.G] = case EvGoSysBlock: := [.G] := [.P] if := (, , , false); != nil { return } . = . = nil . = 0 [.G] = [.P] = case EvGoSysExit: := [.G] if . != { return fmt.Errorf("g %d is not waiting during syscall exit (time %d)", .G, .Ts) } . = . = [.G] = case EvGoSleep, EvGoBlock, EvGoBlockSend, EvGoBlockRecv, EvGoBlockSelect, EvGoBlockSync, EvGoBlockCond, EvGoBlockNet, EvGoBlockGC: := [.G] := [.P] if := (, , , false); != nil { return } . = . = . = nil . = 0 [.G] = [.P] = case EvUserTaskCreate: := .Args[0] if , := []; { return fmt.Errorf("task id conflicts (id:%d), %q vs %q", , , ) } [.Args[0]] = case EvUserTaskEnd: := .Args[0] delete(, ) case EvUserRegion: := .Args[1] := [.G] if == 0 { // region start [.G] = append(, ) // push } else if == 1 { // region end := len() if > 0 { // matching region start event is in the trace. := [-1] if .Args[0] != .Args[0] || .Args[2] != .Args[2] { // task id, region name mismatch return fmt.Errorf("misuse of region in goroutine %d: span end %q when the inner-most active span start event is %q", .G, , ) } if > 1 { [.G] = [:-1] } else { delete(, .G) } } } else { return fmt.Errorf("invalid user region mode: %q", ) } } if .StkID != 0 && len(.stacks[.StkID]) == 0 { // Make sure events don't refer to stacks that don't exist or to // stacks with zero frames. Neither of these should be possible, but // better be safe than sorry. .StkID = 0 } } // TODO(mknyszek): restore stacks for EvGoStart events. return nil } var errMalformedVarint = errors.New("malformatted base-128 varint") // readVal reads unsigned base-128 value from r. func ( *parser) () (uint64, error) { , := binary.Uvarint(.data[.off:]) if <= 0 { return 0, errMalformedVarint } .off += return , nil } func readValFrom( []byte) ( uint64, []byte, error) { , := binary.Uvarint() if <= 0 { return 0, nil, errMalformedVarint } return , [:], nil } func ( *Event) () string { := &EventDescriptions[.Type] := new(bytes.Buffer) fmt.Fprintf(, "%d %s p=%d g=%d stk=%d", .Ts, .Name, .P, .G, .StkID) for , := range .Args { fmt.Fprintf(, " %s=%d", , .Args[]) } return .String() } // argNum returns total number of args for the event accounting for timestamps, // sequence numbers and differences between trace format versions. func ( *rawEvent) () int { := &EventDescriptions[.typ] if .typ == EvStack { return len(.args) } := len(.Args) if .Stack { ++ } switch .typ { case EvBatch, EvFrequency, EvTimerGoroutine: return } ++ // timestamp return } // Event types in the trace. // Verbatim copy from src/runtime/trace.go with the "trace" prefix removed. const ( EvNone event.Type = 0 // unused EvBatch event.Type = 1 // start of per-P batch of events [pid, timestamp] EvFrequency event.Type = 2 // contains tracer timer frequency [frequency (ticks per second)] EvStack event.Type = 3 // stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] EvGomaxprocs event.Type = 4 // current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] EvProcStart event.Type = 5 // start of P [timestamp, thread id] EvProcStop event.Type = 6 // stop of P [timestamp] EvGCStart event.Type = 7 // GC start [timestamp, seq, stack id] EvGCDone event.Type = 8 // GC done [timestamp] EvSTWStart event.Type = 9 // GC mark termination start [timestamp, kind] EvSTWDone event.Type = 10 // GC mark termination done [timestamp] EvGCSweepStart event.Type = 11 // GC sweep start [timestamp, stack id] EvGCSweepDone event.Type = 12 // GC sweep done [timestamp, swept, reclaimed] EvGoCreate event.Type = 13 // goroutine creation [timestamp, new goroutine id, new stack id, stack id] EvGoStart event.Type = 14 // goroutine starts running [timestamp, goroutine id, seq] EvGoEnd event.Type = 15 // goroutine ends [timestamp] EvGoStop event.Type = 16 // goroutine stops (like in select{}) [timestamp, stack] EvGoSched event.Type = 17 // goroutine calls Gosched [timestamp, stack] EvGoPreempt event.Type = 18 // goroutine is preempted [timestamp, stack] EvGoSleep event.Type = 19 // goroutine calls Sleep [timestamp, stack] EvGoBlock event.Type = 20 // goroutine blocks [timestamp, stack] EvGoUnblock event.Type = 21 // goroutine is unblocked [timestamp, goroutine id, seq, stack] EvGoBlockSend event.Type = 22 // goroutine blocks on chan send [timestamp, stack] EvGoBlockRecv event.Type = 23 // goroutine blocks on chan recv [timestamp, stack] EvGoBlockSelect event.Type = 24 // goroutine blocks on select [timestamp, stack] EvGoBlockSync event.Type = 25 // goroutine blocks on Mutex/RWMutex [timestamp, stack] EvGoBlockCond event.Type = 26 // goroutine blocks on Cond [timestamp, stack] EvGoBlockNet event.Type = 27 // goroutine blocks on network [timestamp, stack] EvGoSysCall event.Type = 28 // syscall enter [timestamp, stack] EvGoSysExit event.Type = 29 // syscall exit [timestamp, goroutine id, seq, real timestamp] EvGoSysBlock event.Type = 30 // syscall blocks [timestamp] EvGoWaiting event.Type = 31 // denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] EvGoInSyscall event.Type = 32 // denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] EvHeapAlloc event.Type = 33 // gcController.heapLive change [timestamp, heap live bytes] EvHeapGoal event.Type = 34 // gcController.heapGoal change [timestamp, heap goal bytes] EvTimerGoroutine event.Type = 35 // denotes timer goroutine [timer goroutine id] EvFutileWakeup event.Type = 36 // denotes that the previous wakeup of this goroutine was futile [timestamp] EvString event.Type = 37 // string dictionary entry [ID, length, string] EvGoStartLocal event.Type = 38 // goroutine starts running on the same P as the last event [timestamp, goroutine id] EvGoUnblockLocal event.Type = 39 // goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] EvGoSysExitLocal event.Type = 40 // syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] EvGoStartLabel event.Type = 41 // goroutine starts running with label [timestamp, goroutine id, seq, label string id] EvGoBlockGC event.Type = 42 // goroutine blocks on GC assist [timestamp, stack] EvGCMarkAssistStart event.Type = 43 // GC mark assist start [timestamp, stack] EvGCMarkAssistDone event.Type = 44 // GC mark assist done [timestamp] EvUserTaskCreate event.Type = 45 // trace.NewTask [timestamp, internal task id, internal parent id, stack, name string] EvUserTaskEnd event.Type = 46 // end of task [timestamp, internal task id, stack] EvUserRegion event.Type = 47 // trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), name string] EvUserLog event.Type = 48 // trace.Log [timestamp, internal id, key string id, stack, value string] EvCPUSample event.Type = 49 // CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id] EvCount event.Type = 50 ) var EventDescriptions = [256]struct { Name string minVersion version.Version Stack bool Args []string SArgs []string // string arguments }{ EvNone: {"None", 5, false, []string{}, nil}, EvBatch: {"Batch", 5, false, []string{"p", "ticks"}, nil}, // in 1.5 format it was {"p", "seq", "ticks"} EvFrequency: {"Frequency", 5, false, []string{"freq"}, nil}, // in 1.5 format it was {"freq", "unused"} EvStack: {"Stack", 5, false, []string{"id", "siz"}, nil}, EvGomaxprocs: {"Gomaxprocs", 5, true, []string{"procs"}, nil}, EvProcStart: {"ProcStart", 5, false, []string{"thread"}, nil}, EvProcStop: {"ProcStop", 5, false, []string{}, nil}, EvGCStart: {"GCStart", 5, true, []string{"seq"}, nil}, // in 1.5 format it was {} EvGCDone: {"GCDone", 5, false, []string{}, nil}, EvSTWStart: {"GCSTWStart", 5, false, []string{"kindid"}, []string{"kind"}}, // <= 1.9, args was {} (implicitly {0}) EvSTWDone: {"GCSTWDone", 5, false, []string{}, nil}, EvGCSweepStart: {"GCSweepStart", 5, true, []string{}, nil}, EvGCSweepDone: {"GCSweepDone", 5, false, []string{"swept", "reclaimed"}, nil}, // before 1.9, format was {} EvGoCreate: {"GoCreate", 5, true, []string{"g", "stack"}, nil}, EvGoStart: {"GoStart", 5, false, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"} EvGoEnd: {"GoEnd", 5, false, []string{}, nil}, EvGoStop: {"GoStop", 5, true, []string{}, nil}, EvGoSched: {"GoSched", 5, true, []string{}, nil}, EvGoPreempt: {"GoPreempt", 5, true, []string{}, nil}, EvGoSleep: {"GoSleep", 5, true, []string{}, nil}, EvGoBlock: {"GoBlock", 5, true, []string{}, nil}, EvGoUnblock: {"GoUnblock", 5, true, []string{"g", "seq"}, nil}, // in 1.5 format it was {"g"} EvGoBlockSend: {"GoBlockSend", 5, true, []string{}, nil}, EvGoBlockRecv: {"GoBlockRecv", 5, true, []string{}, nil}, EvGoBlockSelect: {"GoBlockSelect", 5, true, []string{}, nil}, EvGoBlockSync: {"GoBlockSync", 5, true, []string{}, nil}, EvGoBlockCond: {"GoBlockCond", 5, true, []string{}, nil}, EvGoBlockNet: {"GoBlockNet", 5, true, []string{}, nil}, EvGoSysCall: {"GoSysCall", 5, true, []string{}, nil}, EvGoSysExit: {"GoSysExit", 5, false, []string{"g", "seq", "ts"}, nil}, EvGoSysBlock: {"GoSysBlock", 5, false, []string{}, nil}, EvGoWaiting: {"GoWaiting", 5, false, []string{"g"}, nil}, EvGoInSyscall: {"GoInSyscall", 5, false, []string{"g"}, nil}, EvHeapAlloc: {"HeapAlloc", 5, false, []string{"mem"}, nil}, EvHeapGoal: {"HeapGoal", 5, false, []string{"mem"}, nil}, EvTimerGoroutine: {"TimerGoroutine", 5, false, []string{"g"}, nil}, // in 1.5 format it was {"g", "unused"} EvFutileWakeup: {"FutileWakeup", 5, false, []string{}, nil}, EvString: {"String", 7, false, []string{}, nil}, EvGoStartLocal: {"GoStartLocal", 7, false, []string{"g"}, nil}, EvGoUnblockLocal: {"GoUnblockLocal", 7, true, []string{"g"}, nil}, EvGoSysExitLocal: {"GoSysExitLocal", 7, false, []string{"g", "ts"}, nil}, EvGoStartLabel: {"GoStartLabel", 8, false, []string{"g", "seq", "labelid"}, []string{"label"}}, EvGoBlockGC: {"GoBlockGC", 8, true, []string{}, nil}, EvGCMarkAssistStart: {"GCMarkAssistStart", 9, true, []string{}, nil}, EvGCMarkAssistDone: {"GCMarkAssistDone", 9, false, []string{}, nil}, EvUserTaskCreate: {"UserTaskCreate", 11, true, []string{"taskid", "pid", "typeid"}, []string{"name"}}, EvUserTaskEnd: {"UserTaskEnd", 11, true, []string{"taskid"}, nil}, EvUserRegion: {"UserRegion", 11, true, []string{"taskid", "mode", "typeid"}, []string{"name"}}, EvUserLog: {"UserLog", 11, true, []string{"id", "keyid"}, []string{"category", "message"}}, EvCPUSample: {"CPUSample", 19, true, []string{"ts", "p", "g"}, nil}, } //gcassert:inline func ( *parser) ( uint64) []uint64 { if == 0 { return nil } // Stacks are plentiful but small. For our "Staticcheck on std" trace with // 11e6 events, we have roughly 500,000 stacks, using 200 MiB of memory. To // avoid making 500,000 small allocations we allocate backing arrays 1 MiB // at a time. := .stacksData if uint64(len()) < { = make([]uint64, 1024*128) } .stacksData = [:] return [::] } func ( *Trace) ( uint64) STWReason { if .Version < 21 { if == 0 || == 1 { return STWReason( + 1) } else { return STWUnknown } } else if .Version == 21 { if < NumSTWReasons { return STWReason() } else { return STWUnknown } } else { return STWUnknown } } type STWReason int const ( STWUnknown STWReason = 0 STWGCMarkTermination STWReason = 1 STWGCSweepTermination STWReason = 2 STWWriteHeapDump STWReason = 3 STWGoroutineProfile STWReason = 4 STWGoroutineProfileCleanup STWReason = 5 STWAllGoroutinesStackTrace STWReason = 6 STWReadMemStats STWReason = 7 STWAllThreadsSyscall STWReason = 8 STWGOMAXPROCS STWReason = 9 STWStartTrace STWReason = 10 STWStopTrace STWReason = 11 STWCountPagesInUse STWReason = 12 STWReadMetricsSlow STWReason = 13 STWReadMemStatsSlow STWReason = 14 STWPageCachePagesLeaked STWReason = 15 STWResetDebugLog STWReason = 16 NumSTWReasons = 17 )