// Copyright 2014 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Go execution tracer.// The tracer captures a wide range of execution events like goroutine// creation/blocking/unblocking, syscall enter/exit/block, GC-related events,// changes of heap size, processor start/stop, etc and writes them to a buffer// in a compact form. A precise nanosecond-precision timestamp and a stack// trace is captured for most events.// See https://golang.org/s/go15trace for more info.package runtimeimport ()// Event types in the trace, args are given in square brackets.const ( traceEvNone = 0// unused traceEvBatch = 1// start of per-P batch of events [pid, timestamp] traceEvFrequency = 2// contains tracer timer frequency [frequency (ticks per second)] traceEvStack = 3// stack [stack id, number of PCs, array of {PC, func string ID, file string ID, line}] traceEvGomaxprocs = 4// current value of GOMAXPROCS [timestamp, GOMAXPROCS, stack id] traceEvProcStart = 5// start of P [timestamp, thread id] traceEvProcStop = 6// stop of P [timestamp] traceEvGCStart = 7// GC start [timestamp, seq, stack id] traceEvGCDone = 8// GC done [timestamp] traceEvGCSTWStart = 9// GC STW start [timestamp, kind] traceEvGCSTWDone = 10// GC STW done [timestamp] traceEvGCSweepStart = 11// GC sweep start [timestamp, stack id] traceEvGCSweepDone = 12// GC sweep done [timestamp, swept, reclaimed] traceEvGoCreate = 13// goroutine creation [timestamp, new goroutine id, new stack id, stack id] traceEvGoStart = 14// goroutine starts running [timestamp, goroutine id, seq] traceEvGoEnd = 15// goroutine ends [timestamp] traceEvGoStop = 16// goroutine stops (like in select{}) [timestamp, stack] traceEvGoSched = 17// goroutine calls Gosched [timestamp, stack] traceEvGoPreempt = 18// goroutine is preempted [timestamp, stack] traceEvGoSleep = 19// goroutine calls Sleep [timestamp, stack] traceEvGoBlock = 20// goroutine blocks [timestamp, stack] traceEvGoUnblock = 21// goroutine is unblocked [timestamp, goroutine id, seq, stack] traceEvGoBlockSend = 22// goroutine blocks on chan send [timestamp, stack] traceEvGoBlockRecv = 23// goroutine blocks on chan recv [timestamp, stack] traceEvGoBlockSelect = 24// goroutine blocks on select [timestamp, stack] traceEvGoBlockSync = 25// goroutine blocks on Mutex/RWMutex [timestamp, stack] traceEvGoBlockCond = 26// goroutine blocks on Cond [timestamp, stack] traceEvGoBlockNet = 27// goroutine blocks on network [timestamp, stack] traceEvGoSysCall = 28// syscall enter [timestamp, stack] traceEvGoSysExit = 29// syscall exit [timestamp, goroutine id, seq, real timestamp] traceEvGoSysBlock = 30// syscall blocks [timestamp] traceEvGoWaiting = 31// denotes that goroutine is blocked when tracing starts [timestamp, goroutine id] traceEvGoInSyscall = 32// denotes that goroutine is in syscall when tracing starts [timestamp, goroutine id] traceEvHeapAlloc = 33// gcController.heapLive change [timestamp, heap_alloc] traceEvHeapGoal = 34// gcController.heapGoal() (formerly next_gc) change [timestamp, heap goal in bytes] traceEvTimerGoroutine = 35// not currently used; previously denoted timer goroutine [timer goroutine id] traceEvFutileWakeup = 36// denotes that the previous wakeup of this goroutine was futile [timestamp] traceEvString = 37// string dictionary entry [ID, length, string] traceEvGoStartLocal = 38// goroutine starts running on the same P as the last event [timestamp, goroutine id] traceEvGoUnblockLocal = 39// goroutine is unblocked on the same P as the last event [timestamp, goroutine id, stack] traceEvGoSysExitLocal = 40// syscall exit on the same P as the last event [timestamp, goroutine id, real timestamp] traceEvGoStartLabel = 41// goroutine starts running with label [timestamp, goroutine id, seq, label string id] traceEvGoBlockGC = 42// goroutine blocks on GC assist [timestamp, stack] traceEvGCMarkAssistStart = 43// GC mark assist start [timestamp, stack] traceEvGCMarkAssistDone = 44// GC mark assist done [timestamp] traceEvUserTaskCreate = 45// trace.NewContext [timestamp, internal task id, internal parent task id, stack, name string] traceEvUserTaskEnd = 46// end of a task [timestamp, internal task id, stack] traceEvUserRegion = 47// trace.WithRegion [timestamp, internal task id, mode(0:start, 1:end), stack, name string] traceEvUserLog = 48// trace.Log [timestamp, internal task id, key string id, stack, value string] traceEvCPUSample = 49// CPU profiling sample [timestamp, stack, real timestamp, real P id (-1 when absent), goroutine id] traceEvCount = 50// Byte is used but only 6 bits are available for event type. // The remaining 2 bits are used to specify the number of arguments. // That means, the max event type value is 63.)const (// Timestamps in trace are cputicks/traceTickDiv. // This makes absolute values of timestamp diffs smaller, // and so they are encoded in less number of bytes. // 64 on x86 is somewhat arbitrary (one tick is ~20ns on a 3GHz machine). // The suggested increment frequency for PowerPC's time base register is // 512 MHz according to Power ISA v2.07 section 6.2, so we use 16 on ppc64 // and ppc64le. // Tracing won't work reliably for architectures where cputicks is emulated // by nanotime, so the value doesn't matter for those architectures. traceTickDiv = 16 + 48*(goarch.Is386|goarch.IsAmd64)// Maximum number of PCs in a single stack trace. // Since events contain only stack id rather than whole stack trace, // we can allow quite large values here. traceStackSize = 128// Identifier of a fake P that is used when we trace without a real P. traceGlobProc = -1// Maximum number of bytes to encode uint64 in base-128. traceBytesPerNumber = 10// Shift of the number of arguments in the first event byte. traceArgCountShift = 6// Flag passed to traceGoPark to denote that the previous wakeup of this // goroutine was futile. For example, a goroutine was unblocked on a mutex, // but another goroutine got ahead and acquired the mutex before the first // goroutine is scheduled, so the first goroutine has to block again. // Such wakeups happen on buffered channels and sync.Mutex, // but are generally not interesting for end user. traceFutileWakeup byte = 128)// trace is global tracing context.var trace struct {// trace.lock must only be acquired on the system stack where // stack splits cannot happen while it is held. lock mutex// protects the following members lockOwner *g// to avoid deadlocks during recursive lock locks enabled bool// when set runtime traces events shutdown bool// set when we are waiting for trace reader to finish after setting enabled to false headerWritten bool// whether ReadTrace has emitted trace header footerWritten bool// whether ReadTrace has emitted trace footer shutdownSema uint32// used to wait for ReadTrace completion seqStart uint64// sequence number when tracing was started ticksStart int64// cputicks when tracing was started ticksEnd int64// cputicks when tracing was stopped timeStart int64// nanotime when tracing was started timeEnd int64// nanotime when tracing was stopped seqGC uint64// GC start/done sequencer reading traceBufPtr// buffer currently handed off to user empty traceBufPtr// stack of empty buffers fullHead traceBufPtr// queue of full buffers fullTail traceBufPtr stackTab traceStackTable// maps stack traces to unique ids// cpuLogRead accepts CPU profile samples from the signal handler where // they're generated. It uses a two-word header to hold the IDs of the P and // G (respectively) that were active at the time of the sample. Because // profBuf uses a record with all zeros in its header to indicate overflow, // we make sure to make the P field always non-zero: The ID of a real P will // start at bit 1, and bit 0 will be set. Samples that arrive while no P is // running (such as near syscalls) will set the first header field to 0b10. // This careful handling of the first header field allows us to store ID of // the active G directly in the second field, even though that will be 0 // when sampling g0. cpuLogRead *profBuf// cpuLogBuf is a trace buffer to hold events corresponding to CPU profile // samples, which arrive out of band and not directly connected to a // specific P. cpuLogBuf traceBufPtr reader atomic.Pointer[g] // goroutine that called ReadTrace, or nil signalLock atomic.Uint32// protects use of the following member, only usable in signal handlers cpuLogWrite *profBuf// copy of cpuLogRead for use in signal handlers, set without signalLock// Dictionary for traceEvString. // // TODO: central lock to access the map is not ideal. // option: pre-assign ids to all user annotation region names and tags // option: per-P cache // option: sync.Map like data structure stringsLock mutex strings map[string]uint64 stringSeq uint64// markWorkerLabels maps gcMarkWorkerMode to string ID. markWorkerLabels [len(gcMarkWorkerModeStrings)]uint64 bufLock mutex// protects buf buf traceBufPtr// global trace buffer, used when running without a p}// traceBufHeader is per-P tracing buffer.type traceBufHeader struct { link traceBufPtr// in trace.empty/full lastTicks uint64// when we wrote the last event pos int// next write offset in arr stk [traceStackSize]uintptr// scratch buffer for traceback}// traceBuf is per-P tracing buffer.type traceBuf struct { _ sys.NotInHeaptraceBufHeader arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte// underlying buffer for traceBufHeader.buf}// traceBufPtr is a *traceBuf that is not traced by the garbage// collector and doesn't have write barriers. traceBufs are not// allocated from the GC'd heap, so this is safe, and are often// manipulated in contexts where write barriers are not allowed, so// this is necessary.//// TODO: Since traceBuf is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.type traceBufPtr uintptrfunc ( traceBufPtr) () *traceBuf { return (*traceBuf)(unsafe.Pointer()) }func ( *traceBufPtr) ( *traceBuf) { * = traceBufPtr(unsafe.Pointer()) }func traceBufPtrOf( *traceBuf) traceBufPtr {returntraceBufPtr(unsafe.Pointer())}// StartTrace enables tracing for the current process.// While tracing, the data will be buffered and available via ReadTrace.// StartTrace returns an error if tracing is already enabled.// Most clients should use the runtime/trace package or the testing package's// -test.trace flag instead of calling StartTrace directly.func () error {// Stop the world so that we can take a consistent snapshot // of all goroutines at the beginning of the trace. // Do not stop the world during GC so we ensure we always see // a consistent view of GC-related events (e.g. a start is always // paired with an end).stopTheWorldGC("start tracing")// Prevent sysmon from running any code that could generate events.lock(&sched.sysmonlock)// We are in stop-the-world, but syscalls can finish and write to trace concurrently. // Exitsyscall could check trace.enabled long before and then suddenly wake up // and decide to write to trace at a random point in time. // However, such syscall will use the global trace.buf buffer, because we've // acquired all p's by doing stop-the-world. So this protects us from such races.lock(&trace.bufLock)iftrace.enabled || trace.shutdown {unlock(&trace.bufLock)unlock(&sched.sysmonlock)startTheWorldGC()returnerrorString("tracing is already enabled") }// Can't set trace.enabled yet. While the world is stopped, exitsyscall could // already emit a delayed event (see exitTicks in exitsyscall) if we set trace.enabled here. // That would lead to an inconsistent trace: // - either GoSysExit appears before EvGoInSyscall, // - or GoSysExit appears for a goroutine for which we don't emit EvGoInSyscall below. // To instruct traceEvent that it must not ignore events below, we set startingtrace. // trace.enabled is set afterwards once we have emitted all preliminary events. := getg().m .startingtrace = true// Obtain current stack ID to use in all traceEvGoCreate events below. := make([]uintptr, traceStackSize) := traceStackID(, , 2) := newProfBuf(2, profBufWordCount, profBufTagCount) // after the timestamp, header is [pp.id, gp.goid]trace.cpuLogRead = // We must not acquire trace.signalLock outside of a signal handler: a // profiling signal may arrive at any time and try to acquire it, leading to // deadlock. Because we can't use that lock to protect updates to // trace.cpuLogWrite (only use of the structure it references), reads and // writes of the pointer must be atomic. (And although this field is never // the sole pointer to the profBuf value, it's best to allow a write barrier // here.)atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), unsafe.Pointer())// World is stopped, no need to lock.forEachGRace(func( *g) { := readgstatus()if != _Gdead { .traceseq = 0 .tracelastp = getg().m.p// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. := trace.stackTab.put([]uintptr{startPCforTrace(.startpc) + sys.PCQuantum})traceEvent(traceEvGoCreate, -1, .goid, uint64(), ) }if == _Gwaiting {// traceEvGoWaiting is implied to have seq=1. .traceseq++traceEvent(traceEvGoWaiting, -1, .goid) }if == _Gsyscall { .traceseq++traceEvent(traceEvGoInSyscall, -1, .goid) } elseif == _Gdead && .m != nil && .m.isextra {// Trigger two trace events for the dead g in the extra m, // since the next event of the g will be traceEvGoSysExit in exitsyscall, // while calling from C thread to Go. .traceseq = 0 .tracelastp = getg().m.p// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. := trace.stackTab.put([]uintptr{startPCforTrace(0) + sys.PCQuantum}) // no start pctraceEvent(traceEvGoCreate, -1, .goid, uint64(), ) .traceseq++traceEvent(traceEvGoInSyscall, -1, .goid) } else { .sysblocktraced = false } })traceProcStart()traceGoStart()// Note: ticksStart needs to be set after we emit traceEvGoInSyscall events. // If we do it the other way around, it is possible that exitsyscall will // query sysexitticks after ticksStart but before traceEvGoInSyscall timestamp. // It will lead to a false conclusion that cputicks is broken.trace.ticksStart = cputicks()trace.timeStart = nanotime()trace.headerWritten = falsetrace.footerWritten = false// string to id mapping // 0 : reserved for an empty string // remaining: other strings registered by traceStringtrace.stringSeq = 0trace.strings = make(map[string]uint64)trace.seqGC = 0 .startingtrace = falsetrace.enabled = true// Register runtime goroutine labels. , , := traceAcquireBuffer()for , := rangegcMarkWorkerModeStrings[:] {trace.markWorkerLabels[], = traceString(, , ) }traceReleaseBuffer()unlock(&trace.bufLock)unlock(&sched.sysmonlock)startTheWorldGC()returnnil}// StopTrace stops tracing, if it was previously enabled.// StopTrace only returns after all the reads for the trace have completed.func () {// Stop the world so that we can collect the trace buffers from all p's below, // and also to avoid races with traceEvent.stopTheWorldGC("stop tracing")// See the comment in StartTrace.lock(&sched.sysmonlock)// See the comment in StartTrace.lock(&trace.bufLock)if !trace.enabled {unlock(&trace.bufLock)unlock(&sched.sysmonlock)startTheWorldGC()return }traceGoSched()atomicstorep(unsafe.Pointer(&trace.cpuLogWrite), nil)trace.cpuLogRead.close()traceReadCPU()// Loop over all allocated Ps because dead Ps may still have // trace buffers.for , := rangeallp[:cap(allp)] { := .tracebufif != 0 {traceFullQueue() .tracebuf = 0 } }iftrace.buf != 0 { := trace.buftrace.buf = 0if .ptr().pos != 0 {traceFullQueue() } }iftrace.cpuLogBuf != 0 { := trace.cpuLogBuftrace.cpuLogBuf = 0if .ptr().pos != 0 {traceFullQueue() } }for {trace.ticksEnd = cputicks()trace.timeEnd = nanotime()// Windows time can tick only every 15ms, wait for at least one tick.iftrace.timeEnd != trace.timeStart {break }osyield() }trace.enabled = falsetrace.shutdown = trueunlock(&trace.bufLock)unlock(&sched.sysmonlock)startTheWorldGC()// The world is started but we've set trace.shutdown, so new tracing can't start. // Wait for the trace reader to flush pending buffers and stop.semacquire(&trace.shutdownSema)ifraceenabled {raceacquire(unsafe.Pointer(&trace.shutdownSema)) }systemstack(func() {// The lock protects us from races with StartTrace/StopTrace because they do stop-the-world.lock(&trace.lock)for , := rangeallp[:cap(allp)] {if .tracebuf != 0 {throw("trace: non-empty trace buffer in proc") } }iftrace.buf != 0 {throw("trace: non-empty global trace buffer") }iftrace.fullHead != 0 || trace.fullTail != 0 {throw("trace: non-empty full trace buffer") }iftrace.reading != 0 || trace.reader.Load() != nil {throw("trace: reading after shutdown") }fortrace.empty != 0 { := trace.emptytrace.empty = .ptr().linksysFree(unsafe.Pointer(), unsafe.Sizeof(*.ptr()), &memstats.other_sys) }trace.strings = niltrace.shutdown = falsetrace.cpuLogRead = nilunlock(&trace.lock) })}// ReadTrace returns the next chunk of binary tracing data, blocking until data// is available. If tracing is turned off and all the data accumulated while it// was on has been returned, ReadTrace returns nil. The caller must copy the// returned data before calling ReadTrace again.// ReadTrace must be called from one goroutine at a time.func () []byte {:var []bytevarboolsystemstack(func() { , = readTrace0() })if {gopark(func( *g, unsafe.Pointer) bool {if !trace.reader.CompareAndSwapNoWB(nil, ) {// We're racing with another reader. // Wake up and handle this case.returnfalse }if := traceReader(); == {// New data arrived between unlocking // and the CAS and we won the wake-up // race, so wake up directly.returnfalse } elseif != nil {printlock()println("runtime: got trace reader", , .goid)throw("unexpected trace reader") }returntrue }, nil, waitReasonTraceReaderBlocked, traceEvGoBlock, 2)goto }return}// readTrace0 is ReadTrace's continuation on g0. This must run on the// system stack because it acquires trace.lock.////go:systemstackfunc readTrace0() ( []byte, bool) {ifraceenabled {// g0 doesn't have a race context. Borrow the user G's.ifgetg().racectx != 0 {throw("expected racectx == 0") }getg().racectx = getg().m.curg.racectx// (This defer should get open-coded, which is safe on // the system stack.)deferfunc() { getg().racectx = 0 }() }// This function may need to lock trace.lock recursively // (goparkunlock -> traceGoPark -> traceEvent -> traceFlush). // To allow this we use trace.lockOwner. // Also this function must not allocate while holding trace.lock: // allocation can call heap allocate, which will try to emit a trace // event while holding heap lock.lock(&trace.lock)trace.lockOwner = getg().m.curgiftrace.reader.Load() != nil {// More than one goroutine reads trace. This is bad. // But we rather do not crash the program because of tracing, // because tracing can be enabled at runtime on prod servers.trace.lockOwner = nilunlock(&trace.lock)println("runtime: ReadTrace called from multiple goroutines simultaneously")returnnil, false }// Recycle the old buffer.if := trace.reading; != 0 { .ptr().link = trace.emptytrace.empty = trace.reading = 0 }// Write trace header.if !trace.headerWritten {trace.headerWritten = truetrace.lockOwner = nilunlock(&trace.lock)return []byte("go 1.19 trace\x00\x00\x00"), false }// Optimistically look for CPU profile samples. This may write new stack // records, and may write new tracing buffers.if !trace.footerWritten && !trace.shutdown {traceReadCPU() }// Wait for new data.iftrace.fullHead == 0 && !trace.shutdown {// We don't simply use a note because the scheduler // executes this goroutine directly when it wakes up // (also a note would consume an M).trace.lockOwner = nilunlock(&trace.lock)returnnil, true }:assertLockHeld(&trace.lock)// Write a buffer.iftrace.fullHead != 0 { := traceFullDequeue()trace.reading = trace.lockOwner = nilunlock(&trace.lock)return .ptr().arr[:.ptr().pos], false }// Write footer with timer frequency.if !trace.footerWritten {trace.footerWritten = true// Use float64 because (trace.ticksEnd - trace.ticksStart) * 1e9 can overflow int64. := float64(trace.ticksEnd-trace.ticksStart) * 1e9 / float64(trace.timeEnd-trace.timeStart) / traceTickDivif <= 0 {throw("trace: ReadTrace got invalid frequency") }trace.lockOwner = nilunlock(&trace.lock)// Write frequency event. := traceFlush(0, 0) := .ptr() .byte(traceEvFrequency | 0<<traceArgCountShift) .varint(uint64())// Dump stack table. // This will emit a bunch of full buffers, we will pick them up // on the next iteration. = trace.stackTab.dump()// Flush final buffer.lock(&trace.lock)traceFullQueue()goto// trace.lock should be held at newFull }// Done.iftrace.shutdown {trace.lockOwner = nilunlock(&trace.lock)ifraceenabled {// Model synchronization on trace.shutdownSema, which race // detector does not see. This is required to avoid false // race reports on writer passed to trace.Start.racerelease(unsafe.Pointer(&trace.shutdownSema)) }// trace.enabled is already reset, so can call traceable functions.semrelease(&trace.shutdownSema)returnnil, false }// Also bad, but see the comment above.trace.lockOwner = nilunlock(&trace.lock)println("runtime: spurious wakeup of trace reader")returnnil, false}// traceReader returns the trace reader that should be woken up, if any.// Callers should first check that trace.enabled or trace.shutdown is set.//// This must run on the system stack because it acquires trace.lock.////go:systemstackfunc traceReader() *g {// Optimistic check firstiftraceReaderAvailable() == nil {returnnil }lock(&trace.lock) := traceReaderAvailable()if == nil || !trace.reader.CompareAndSwapNoWB(, nil) {unlock(&trace.lock)returnnil }unlock(&trace.lock)return}// traceReaderAvailable returns the trace reader if it is not currently// scheduled and should be. Callers should first check that trace.enabled// or trace.shutdown is set.func traceReaderAvailable() *g {iftrace.fullHead != 0 || trace.shutdown {returntrace.reader.Load() }returnnil}// traceProcFree frees trace buffer associated with pp.//// This must run on the system stack because it acquires trace.lock.////go:systemstackfunc traceProcFree( *p) { := .tracebuf .tracebuf = 0if == 0 {return }lock(&trace.lock)traceFullQueue()unlock(&trace.lock)}// traceFullQueue queues buf into queue of full buffers.func traceFullQueue( traceBufPtr) { .ptr().link = 0iftrace.fullHead == 0 {trace.fullHead = } else {trace.fullTail.ptr().link = }trace.fullTail = }// traceFullDequeue dequeues from queue of full buffers.func traceFullDequeue() traceBufPtr { := trace.fullHeadif == 0 {return0 }trace.fullHead = .ptr().linkiftrace.fullHead == 0 {trace.fullTail = 0 } .ptr().link = 0return}// traceEvent writes a single event to trace buffer, flushing the buffer if necessary.// ev is event type.// If skip > 0, write current stack id as the last argument (skipping skip top frames).// If skip = 0, this event type should contain a stack, but we don't want// to collect and remember it for this particular call.func traceEvent( byte, int, ...uint64) { , , := traceAcquireBuffer()// Double-check trace.enabled now that we've done m.locks++ and acquired bufLock. // This protects from races between traceEvent and StartTrace/StopTrace.// The caller checked that trace.enabled == true, but trace.enabled might have been // turned off between the check and now. Check again. traceLockBuffer did mp.locks++, // StopTrace does stopTheWorld, and stopTheWorld waits for mp.locks to go back to zero, // so if we see trace.enabled == true now, we know it's true for the rest of the function. // Exitsyscall can run even during stopTheWorld. The race with StartTrace/StopTrace // during tracing in exitsyscall is resolved by locking trace.bufLock in traceLockBuffer. // // Note trace_userTaskCreate runs the same check.if !trace.enabled && !.startingtrace {traceReleaseBuffer()return }if > 0 {ifgetg() == .curg { ++ // +1 because stack is captured in traceEventLocked. } }traceEventLocked(0, , , , , 0, , ...)traceReleaseBuffer()}// traceEventLocked writes a single event of type ev to the trace buffer bufp,// flushing the buffer if necessary. pid is the id of the current P, or// traceGlobProc if we're tracing without a real P.//// Preemption is disabled, and if running without a real P the global tracing// buffer is locked.//// Events types that do not include a stack set skip to -1. Event types that// include a stack may explicitly reference a stackID from the trace.stackTab// (obtained by an earlier call to traceStackID). Without an explicit stackID,// this function will automatically capture the stack of the goroutine currently// running on mp, skipping skip top frames or, if skip is 0, writing out an// empty stack record.//// It records the event's args to the traceBuf, and also makes an effort to// reserve extraBytes bytes of additional space immediately following the event,// in the same traceBuf.func traceEventLocked( int, *m, int32, *traceBufPtr, byte, uint32, int, ...uint64) { := .ptr()// TODO: test on non-zero extraBytes param. := 2 + 5*traceBytesPerNumber + // event type, length, sequence, timestamp, stack id and two add paramsif == nil || len(.arr)-.pos < {systemstack(func() { = traceFlush(traceBufPtrOf(), ).ptr() }) .set() }// NOTE: ticks might be same after tick division, although the real cputicks is // linear growth. := uint64(cputicks()) / traceTickDiv := - .lastTicksif == 0 { = .lastTicks + 1 = 1 } .lastTicks = := byte(len())if != 0 || >= 0 { ++ }// We have only 2 bits for number of arguments. // If number is >= 3, then the event type is followed by event length in bytes.if > 3 { = 3 } := .pos .byte( | <<traceArgCountShift)var *byteif == 3 {// Reserve the byte for length assuming that length < 128. .varint(0) = &.arr[.pos-1] } .varint()for , := range { .varint() }if != 0 { .varint(uint64()) } elseif == 0 { .varint(0) } elseif > 0 { .varint(traceStackID(, .stk[:], )) } := .pos - if > {throw("invalid length of trace event") }if != nil {// Fill in actual length. * = byte( - 2) }}// traceCPUSample writes a CPU profile sample stack to the execution tracer's// profiling buffer. It is called from a signal handler, so is limited in what// it can do.func traceCPUSample( *g, *p, []uintptr) {if !trace.enabled {// Tracing is usually turned off; don't spend time acquiring the signal // lock unless it's active.return }// Match the clock used in traceEventLocked := cputicks()// The "header" here is the ID of the P that was running the profiled code, // followed by the ID of the goroutine. (For normal CPU profiling, it's // usually the number of samples with the given stack.) Near syscalls, pp // may be nil. Reporting goid of 0 is fine for either g0 or a nil gp.var [2]uint64if != nil {// Overflow records in profBuf have all header values set to zero. Make // sure that real headers have at least one bit set. [0] = uint64(.id)<<1 | 0b1 } else { [0] = 0b10 }if != nil { [1] = .goid }// Allow only one writer at a timefor !trace.signalLock.CompareAndSwap(0, 1) {// TODO: Is it safe to osyield here? https://go.dev/issue/52672osyield() }if := (*profBuf)(atomic.Loadp(unsafe.Pointer(&trace.cpuLogWrite))); != nil {// Note: we don't pass a tag pointer here (how should profiling tags // interact with the execution tracer?), but if we did we'd need to be // careful about write barriers. See the long comment in profBuf.write. .write(nil, , [:], ) }trace.signalLock.Store(0)}func traceReadCPU() { := &trace.cpuLogBuffor { , , := trace.cpuLogRead.read(profBufNonBlocking)iflen() == 0 {break }forlen() > 0 {iflen() < 4 || [0] > uint64(len()) {break// truncated profile }if [0] < 4 || != nil && len() < 1 {break// malformed profile }iflen() < 1 {break// mismatched profile records and tags } := [1] := [2] >> 1if := ([2] & 0b1) != 0; ! { = ^uint64(0) } := [3] := [4:[0]] := len() == 1 && [2] == 0 && [3] == 0 = [[0]:]// No support here for reporting goroutine tags at the moment; if // that information is to be part of the execution trace, we'd // probably want to see when the tags are applied and when they // change, instead of only seeing them when we get a CPU sample. = [1:]if {// Looks like an overflow record from the profBuf. Not much to // do here, we only want to report full records. // // TODO: should we start a goroutine to drain the profBuf, // rather than relying on a high-enough volume of tracing events // to keep ReadTrace busy? https://go.dev/issue/52674continue } := .ptr()if == nil {systemstack(func() { * = traceFlush(*, 0) }) = .ptr() }for := range {if >= len(.stk) {break } .stk[] = uintptr([]) } := trace.stackTab.put(.stk[:len()])traceEventLocked(0, nil, 0, , traceEvCPUSample, , 1, /traceTickDiv, , ) } }}func traceStackID( *m, []uintptr, int) uint64 { := getg() := .curgvarintif == { = callers(+1, ) } elseif != nil { = gcallers(, , ) }if > 0 { -- // skip runtime.goexit }if > 0 && .goid == 1 { -- // skip runtime.main } := trace.stackTab.put([:])returnuint64()}// traceAcquireBuffer returns trace buffer to use and, if necessary, locks it.func traceAcquireBuffer() ( *m, int32, *traceBufPtr) {// Any time we acquire a buffer, we may end up flushing it, // but flushes are rare. Record the lock edge even if it // doesn't happen this time.lockRankMayTraceFlush() = acquirem()if := .p.ptr(); != nil {return , .id, &.tracebuf }lock(&trace.bufLock)return , traceGlobProc, &trace.buf}// traceReleaseBuffer releases a buffer previously acquired with traceAcquireBuffer.func traceReleaseBuffer( int32) {if == traceGlobProc {unlock(&trace.bufLock) }releasem(getg().m)}// lockRankMayTraceFlush records the lock ranking effects of a// potential call to traceFlush.func lockRankMayTraceFlush() { := trace.lockOwner := == nil || != getg().m.curgif {lockWithRankMayAcquire(&trace.lock, getLockRank(&trace.lock)) }}// traceFlush puts buf onto stack of full buffers and returns an empty buffer.//// This must run on the system stack because it acquires trace.lock.////go:systemstackfunc traceFlush( traceBufPtr, int32) traceBufPtr { := trace.lockOwner := == nil || != getg().m.curgif {lock(&trace.lock) }if != 0 {traceFullQueue() }iftrace.empty != 0 { = trace.emptytrace.empty = .ptr().link } else { = traceBufPtr(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys))if == 0 {throw("trace: out of memory") } } := .ptr() .link.set(nil) .pos = 0// initialize the buffer for a new batch := uint64(cputicks()) / traceTickDivif == .lastTicks { = .lastTicks + 1 } .lastTicks = .byte(traceEvBatch | 1<<traceArgCountShift) .varint(uint64()) .varint()if {unlock(&trace.lock) }return}// traceString adds a string to the trace.strings and returns the id.func traceString( *traceBufPtr, int32, string) (uint64, *traceBufPtr) {if == "" {return0, }lock(&trace.stringsLock)ifraceenabled {// raceacquire is necessary because the map access // below is race annotated.raceacquire(unsafe.Pointer(&trace.stringsLock)) }if , := trace.strings[]; {ifraceenabled {racerelease(unsafe.Pointer(&trace.stringsLock)) }unlock(&trace.stringsLock)return , }trace.stringSeq++ := trace.stringSeqtrace.strings[] = ifraceenabled {racerelease(unsafe.Pointer(&trace.stringsLock)) }unlock(&trace.stringsLock)// memory allocation in above may trigger tracing and // cause *bufp changes. Following code now works with *bufp, // so there must be no memory allocation or any activities // that causes tracing after this point. := .ptr() := 1 + 2*traceBytesPerNumber + len()if == nil || len(.arr)-.pos < {systemstack(func() { = traceFlush(traceBufPtrOf(), ).ptr() .set() }) } .byte(traceEvString) .varint()// double-check the string and the length can fit. // Otherwise, truncate the string. := len()if := len(.arr) - .pos; < +traceBytesPerNumber { = } .varint(uint64()) .pos += copy(.arr[.pos:], [:]) .set()return , }// varint appends v to buf in little-endian-base-128 encoding.func ( *traceBuf) ( uint64) { := .posfor ; >= 0x80; >>= 7 { .arr[] = 0x80 | byte() ++ } .arr[] = byte() ++ .pos = }// varintAt writes varint v at byte position pos in buf. This always// consumes traceBytesPerNumber bytes. This is intended for when the// caller needs to reserve space for a varint but can't populate it// until later.func ( *traceBuf) ( int, uint64) {for := 0; < traceBytesPerNumber; ++ {if < traceBytesPerNumber-1 { .arr[] = 0x80 | byte() } else { .arr[] = byte() } >>= 7 ++ }}// byte appends v to buf.func ( *traceBuf) ( byte) { .arr[.pos] = .pos++}// traceStackTable maps stack traces (arrays of PC's) to unique uint32 ids.// It is lock-free for reading.type traceStackTable struct { lock mutex// Must be acquired on the system stack seq uint32 mem traceAlloc tab [1 << 13]traceStackPtr}// traceStack is a single stack in traceStackTable.type traceStack struct { link traceStackPtr hash uintptr id uint32 n int stk [0]uintptr// real type [n]uintptr}type traceStackPtr uintptrfunc ( traceStackPtr) () *traceStack { return (*traceStack)(unsafe.Pointer()) }// stack returns slice of PCs.func ( *traceStack) () []uintptr {return (*[traceStackSize]uintptr)(unsafe.Pointer(&.stk))[:.n]}// put returns a unique id for the stack trace pcs and caches it in the table,// if it sees the trace for the first time.func ( *traceStackTable) ( []uintptr) uint32 {iflen() == 0 {return0 } := memhash(unsafe.Pointer(&[0]), 0, uintptr(len())*unsafe.Sizeof([0]))// First, search the hashtable w/o the mutex.if := .find(, ); != 0 {return }// Now, double check under the mutex. // Switch to the system stack so we can acquire tab.lockvaruint32systemstack(func() {lock(&.lock)if = .find(, ); != 0 {unlock(&.lock)return }// Create new record. .seq++ := .newStack(len()) .hash = .id = .seq = .id .n = len() := .stack()for , := range { [] = } := int( % uintptr(len(.tab))) .link = .tab[]atomicstorep(unsafe.Pointer(&.tab[]), unsafe.Pointer())unlock(&.lock) })return}// find checks if the stack trace pcs is already present in the table.func ( *traceStackTable) ( []uintptr, uintptr) uint32 { := int( % uintptr(len(.tab))):for := .tab[].ptr(); != nil; = .link.ptr() {if .hash == && .n == len() {for , := range .stack() {if != [] {continue } }return .id } }return0}// newStack allocates a new stack of size n.func ( *traceStackTable) ( int) *traceStack {return (*traceStack)(.mem.alloc(unsafe.Sizeof(traceStack{}) + uintptr()*goarch.PtrSize))}// traceFrames returns the frames corresponding to pcs. It may// allocate and may emit trace events.func traceFrames( traceBufPtr, []uintptr) ([]traceFrame, traceBufPtr) { := make([]traceFrame, 0, len()) := CallersFrames()for {vartraceFrame , := .Next() , = traceFrameForPC(, 0, ) = append(, )if ! {return , } }}// dump writes all previously cached stacks to trace buffers,// releases all memory and resets state.//// This must run on the system stack because it calls traceFlush.////go:systemstackfunc ( *traceStackTable) ( traceBufPtr) traceBufPtr {for := range .tab { := .tab[].ptr()for ; != nil; = .link.ptr() {var []traceFrame , = traceFrames(, .stack())// Estimate the size of this record. This // bound is pretty loose, but avoids counting // lots of varint sizes. := 1 + traceBytesPerNumber + (2+4*len())*traceBytesPerNumber// Make sure we have enough buffer space.if := .ptr(); len(.arr)-.pos < { = traceFlush(, 0) }// Emit header, with space reserved for length. := .ptr() .byte(traceEvStack | 3<<traceArgCountShift) := .pos .pos += traceBytesPerNumber// Emit body. := .pos .varint(uint64(.id)) .varint(uint64(len()))for , := range { .varint(uint64(.PC)) .varint(.funcID) .varint(.fileID) .varint(.line) }// Fill in size header. .varintAt(, uint64(.pos-)) } } .mem.drop() * = traceStackTable{}lockInit(&((*).lock), lockRankTraceStackTab)return}type traceFrame struct { PC uintptr funcID uint64 fileID uint64 line uint64}// traceFrameForPC records the frame information.// It may allocate memory.func traceFrameForPC( traceBufPtr, int32, Frame) (traceFrame, traceBufPtr) { := &vartraceFrame .PC = .PC := .Functionconst = 1 << 10iflen() > { = [len()-:] } .funcID, = traceString(, , ) .line = uint64(.Line) := .Fileiflen() > { = [len()-:] } .fileID, = traceString(, , )return , (*)}// traceAlloc is a non-thread-safe region allocator.// It holds a linked list of traceAllocBlock.type traceAlloc struct { head traceAllocBlockPtr off uintptr}// traceAllocBlock is a block in traceAlloc.//// traceAllocBlock is allocated from non-GC'd memory, so it must not// contain heap pointers. Writes to pointers to traceAllocBlocks do// not need write barriers.type traceAllocBlock struct { _ sys.NotInHeap next traceAllocBlockPtr data [64<<10 - goarch.PtrSize]byte}// TODO: Since traceAllocBlock is now embedded runtime/internal/sys.NotInHeap, this isn't necessary.type traceAllocBlockPtr uintptrfunc ( traceAllocBlockPtr) () *traceAllocBlock { return (*traceAllocBlock)(unsafe.Pointer()) }func ( *traceAllocBlockPtr) ( *traceAllocBlock) { * = traceAllocBlockPtr(unsafe.Pointer()) }// alloc allocates n-byte block.func ( *traceAlloc) ( uintptr) unsafe.Pointer { = alignUp(, goarch.PtrSize)if .head == 0 || .off+ > uintptr(len(.head.ptr().data)) {if > uintptr(len(.head.ptr().data)) {throw("trace: alloc too large") } := (*traceAllocBlock)(sysAlloc(unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys))if == nil {throw("trace: out of memory") } .next.set(.head.ptr()) .head.set() .off = 0 } := &.head.ptr().data[.off] .off += returnunsafe.Pointer()}// drop frees all previously allocated memory and resets the allocator.func ( *traceAlloc) () {for .head != 0 { := .head.ptr() .head.set(.next.ptr())sysFree(unsafe.Pointer(), unsafe.Sizeof(traceAllocBlock{}), &memstats.other_sys) }}// The following functions write specific events to trace.func traceGomaxprocs( int32) {traceEvent(traceEvGomaxprocs, 1, uint64())}func traceProcStart() {traceEvent(traceEvProcStart, -1, uint64(getg().m.id))}func traceProcStop( *p) {// Sysmon and stopTheWorld can stop Ps blocked in syscalls, // to handle this we temporary employ the P. := acquirem() := .p .p.set()traceEvent(traceEvProcStop, -1) .p = releasem()}func traceGCStart() {traceEvent(traceEvGCStart, 3, trace.seqGC)trace.seqGC++}func traceGCDone() {traceEvent(traceEvGCDone, -1)}func traceGCSTWStart( int) {traceEvent(traceEvGCSTWStart, -1, uint64())}func traceGCSTWDone() {traceEvent(traceEvGCSTWDone, -1)}// traceGCSweepStart prepares to trace a sweep loop. This does not// emit any events until traceGCSweepSpan is called.//// traceGCSweepStart must be paired with traceGCSweepDone and there// must be no preemption points between these two calls.func traceGCSweepStart() {// Delay the actual GCSweepStart event until the first span // sweep. If we don't sweep anything, don't emit any events. := getg().m.p.ptr()if .traceSweep {throw("double traceGCSweepStart") } .traceSweep, .traceSwept, .traceReclaimed = true, 0, 0}// traceGCSweepSpan traces the sweep of a single page.//// This may be called outside a traceGCSweepStart/traceGCSweepDone// pair; however, it will not emit any trace events in this case.func traceGCSweepSpan( uintptr) { := getg().m.p.ptr()if .traceSweep {if .traceSwept == 0 {traceEvent(traceEvGCSweepStart, 1) } .traceSwept += }}func traceGCSweepDone() { := getg().m.p.ptr()if !.traceSweep {throw("missing traceGCSweepStart") }if .traceSwept != 0 {traceEvent(traceEvGCSweepDone, -1, uint64(.traceSwept), uint64(.traceReclaimed)) } .traceSweep = false}func traceGCMarkAssistStart() {traceEvent(traceEvGCMarkAssistStart, 1)}func traceGCMarkAssistDone() {traceEvent(traceEvGCMarkAssistDone, -1)}func traceGoCreate( *g, uintptr) { .traceseq = 0 .tracelastp = getg().m.p// +PCQuantum because traceFrameForPC expects return PCs and subtracts PCQuantum. := trace.stackTab.put([]uintptr{startPCforTrace() + sys.PCQuantum})traceEvent(traceEvGoCreate, 2, .goid, uint64())}func traceGoStart() { := getg().m.curg := .m.p .traceseq++if .ptr().gcMarkWorkerMode != gcMarkWorkerNotWorker {traceEvent(traceEvGoStartLabel, -1, .goid, .traceseq, trace.markWorkerLabels[.ptr().gcMarkWorkerMode]) } elseif .tracelastp == {traceEvent(traceEvGoStartLocal, -1, .goid) } else { .tracelastp = traceEvent(traceEvGoStart, -1, .goid, .traceseq) }}func traceGoEnd() {traceEvent(traceEvGoEnd, -1)}func traceGoSched() { := getg() .tracelastp = .m.ptraceEvent(traceEvGoSched, 1)}func traceGoPreempt() { := getg() .tracelastp = .m.ptraceEvent(traceEvGoPreempt, 1)}func traceGoPark( byte, int) {if &traceFutileWakeup != 0 {traceEvent(traceEvFutileWakeup, -1) }traceEvent( & ^traceFutileWakeup, )}func traceGoUnpark( *g, int) { := getg().m.p .traceseq++if .tracelastp == {traceEvent(traceEvGoUnblockLocal, , .goid) } else { .tracelastp = traceEvent(traceEvGoUnblock, , .goid, .traceseq) }}func traceGoSysCall() {traceEvent(traceEvGoSysCall, 1)}func traceGoSysExit( int64) {if != 0 && < trace.ticksStart {// There is a race between the code that initializes sysexitticks // (in exitsyscall, which runs without a P, and therefore is not // stopped with the rest of the world) and the code that initializes // a new trace. The recorded sysexitticks must therefore be treated // as "best effort". If they are valid for this trace, then great, // use them for greater accuracy. But if they're not valid for this // trace, assume that the trace was started after the actual syscall // exit (but before we actually managed to start the goroutine, // aka right now), and assign a fresh time stamp to keep the log consistent. = 0 } := getg().m.curg .traceseq++ .tracelastp = .m.ptraceEvent(traceEvGoSysExit, -1, .goid, .traceseq, uint64()/traceTickDiv)}func traceGoSysBlock( *p) {// Sysmon and stopTheWorld can declare syscalls running on remote Ps as blocked, // to handle this we temporary employ the P. := acquirem() := .p .p.set()traceEvent(traceEvGoSysBlock, -1) .p = releasem()}func traceHeapAlloc( uint64) {traceEvent(traceEvHeapAlloc, -1, )}func traceHeapGoal() { := gcController.heapGoal()if == ^uint64(0) {// Heap-based triggering is disabled.traceEvent(traceEvHeapGoal, -1, 0) } else {traceEvent(traceEvHeapGoal, -1, ) }}// To access runtime functions from runtime/trace.// See runtime/trace/annotation.go//go:linkname trace_userTaskCreate runtime/trace.userTaskCreatefunc trace_userTaskCreate(, uint64, string) {if !trace.enabled {return }// Same as in traceEvent. , , := traceAcquireBuffer()if !trace.enabled && !.startingtrace {traceReleaseBuffer()return } , := traceString(, , )traceEventLocked(0, , , , traceEvUserTaskCreate, 0, 3, , , )traceReleaseBuffer()}//go:linkname trace_userTaskEnd runtime/trace.userTaskEndfunc trace_userTaskEnd( uint64) {traceEvent(traceEvUserTaskEnd, 2, )}//go:linkname trace_userRegion runtime/trace.userRegionfunc trace_userRegion(, uint64, string) {if !trace.enabled {return } , , := traceAcquireBuffer()if !trace.enabled && !.startingtrace {traceReleaseBuffer()return } , := traceString(, , )traceEventLocked(0, , , , traceEvUserRegion, 0, 3, , , )traceReleaseBuffer()}//go:linkname trace_userLog runtime/trace.userLogfunc trace_userLog( uint64, , string) {if !trace.enabled {return } , , := traceAcquireBuffer()if !trace.enabled && !.startingtrace {traceReleaseBuffer()return } , := traceString(, , ) := traceBytesPerNumber + len() // extraSpace for the value stringtraceEventLocked(, , , , traceEvUserLog, 0, 3, , )// traceEventLocked reserved extra space for val and len(val) // in buf, so buf now has room for the following. := .ptr()// double-check the message and its length can fit. // Otherwise, truncate the message. := len()if := len(.arr) - .pos; < +traceBytesPerNumber { = } .varint(uint64()) .pos += copy(.arr[.pos:], [:])traceReleaseBuffer()}// the start PC of a goroutine for tracing purposes. If pc is a wrapper,// it returns the PC of the wrapped function. Otherwise it returns pc.func startPCforTrace( uintptr) uintptr { := findfunc()if !.valid() {return// may happen for locked g in extra M since its pc is 0. } := funcdata(, _FUNCDATA_WrapInfo)if == nil {return// not a wrapper }return .datap.textAddr(*(*uint32)())}
The pages are generated with Goldsv0.6.4. (GOOS=linux GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.