Source File
mprof.go
Belonging Package
runtime
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Malloc profiling.
// Patterned after tcmalloc's algorithms; shorter code.
package runtime
import (
)
// NOTE(rsc): Everything here could use cas if contention became an issue.
var (
// profInsertLock protects changes to the start of all *bucket linked lists
profInsertLock mutex
// profBlockLock protects the contents of every blockRecord struct
profBlockLock mutex
// profMemActiveLock protects the active field of every memRecord struct
profMemActiveLock mutex
// profMemFutureLock is a set of locks that protect the respective elements
// of the future array of every memRecord struct
profMemFutureLock [len(memRecord{}.future)]mutex
)
// All memory allocations are local and do not escape outside of the profiler.
// The profiler is forbidden from referring to garbage-collected memory.
const (
// profile types
memProfile bucketType = 1 + iota
blockProfile
mutexProfile
// size of bucket hash table
buckHashSize = 179999
// maxSkip is to account for deferred inline expansion
// when using frame pointer unwinding. We record the stack
// with "physical" frame pointers but handle skipping "logical"
// frames at some point after collecting the stack. So
// we need extra space in order to avoid getting fewer than the
// desired maximum number of frames after expansion.
// This should be at least as large as the largest skip value
// used for profiling; otherwise stacks may be truncated inconsistently
maxSkip = 6
// maxProfStackDepth is the highest valid value for debug.profstackdepth.
// It's used for the bucket.stk func.
// TODO(fg): can we get rid of this?
maxProfStackDepth = 1024
)
type bucketType int
// A bucket holds per-call-stack profiling information.
// The representation is a bit sleazy, inherited from C.
// This struct defines the bucket header. It is followed in
// memory by the stack words and then the actual record
// data, either a memRecord or a blockRecord.
//
// Per-call-stack profiling information.
// Lookup by hashing call stack into a linked-list hash table.
//
// None of the fields in this bucket header are modified after
// creation, including its next and allnext links.
//
// No heap pointers.
type bucket struct {
_ sys.NotInHeap
next *bucket
allnext *bucket
typ bucketType // memBucket or blockBucket (includes mutexProfile)
hash uintptr
size uintptr
nstk uintptr
}
// A memRecord is the bucket data for a bucket of type memProfile,
// part of the memory profile.
type memRecord struct {
// The following complex 3-stage scheme of stats accumulation
// is required to obtain a consistent picture of mallocs and frees
// for some point in time.
// The problem is that mallocs come in real time, while frees
// come only after a GC during concurrent sweeping. So if we would
// naively count them, we would get a skew toward mallocs.
//
// Hence, we delay information to get consistent snapshots as
// of mark termination. Allocations count toward the next mark
// termination's snapshot, while sweep frees count toward the
// previous mark termination's snapshot:
//
// MT MT MT MT
// .·| .·| .·| .·|
// .·˙ | .·˙ | .·˙ | .·˙ |
// .·˙ | .·˙ | .·˙ | .·˙ |
// .·˙ |.·˙ |.·˙ |.·˙ |
//
// alloc → ▲ ← free
// ┠┅┅┅┅┅┅┅┅┅┅┅P
// C+2 → C+1 → C
//
// alloc → ▲ ← free
// ┠┅┅┅┅┅┅┅┅┅┅┅P
// C+2 → C+1 → C
//
// Since we can't publish a consistent snapshot until all of
// the sweep frees are accounted for, we wait until the next
// mark termination ("MT" above) to publish the previous mark
// termination's snapshot ("P" above). To do this, allocation
// and free events are accounted to *future* heap profile
// cycles ("C+n" above) and we only publish a cycle once all
// of the events from that cycle must be done. Specifically:
//
// Mallocs are accounted to cycle C+2.
// Explicit frees are accounted to cycle C+2.
// GC frees (done during sweeping) are accounted to cycle C+1.
//
// After mark termination, we increment the global heap
// profile cycle counter and accumulate the stats from cycle C
// into the active profile.
// active is the currently published profile. A profiling
// cycle can be accumulated into active once its complete.
active memRecordCycle
// future records the profile events we're counting for cycles
// that have not yet been published. This is ring buffer
// indexed by the global heap profile cycle C and stores
// cycles C, C+1, and C+2. Unlike active, these counts are
// only for a single cycle; they are not cumulative across
// cycles.
//
// We store cycle C here because there's a window between when
// C becomes the active cycle and when we've flushed it to
// active.
future [3]memRecordCycle
}
// memRecordCycle
type memRecordCycle struct {
allocs, frees uintptr
alloc_bytes, free_bytes uintptr
}
// add accumulates b into a. It does not zero b.
func ( *memRecordCycle) ( *memRecordCycle) {
.allocs += .allocs
.frees += .frees
.alloc_bytes += .alloc_bytes
.free_bytes += .free_bytes
}
// A blockRecord is the bucket data for a bucket of type blockProfile,
// which is used in blocking and mutex profiles.
type blockRecord struct {
count float64
cycles int64
}
var (
mbuckets atomic.UnsafePointer // *bucket, memory profile buckets
bbuckets atomic.UnsafePointer // *bucket, blocking profile buckets
xbuckets atomic.UnsafePointer // *bucket, mutex profile buckets
buckhash atomic.UnsafePointer // *buckhashArray
mProfCycle mProfCycleHolder
)
type buckhashArray [buckHashSize]atomic.UnsafePointer // *bucket
const mProfCycleWrap = uint32(len(memRecord{}.future)) * (2 << 24)
// mProfCycleHolder holds the global heap profile cycle number (wrapped at
// mProfCycleWrap, stored starting at bit 1), and a flag (stored at bit 0) to
// indicate whether future[cycle] in all buckets has been queued to flush into
// the active profile.
type mProfCycleHolder struct {
value atomic.Uint32
}
// read returns the current cycle count.
func ( *mProfCycleHolder) () ( uint32) {
:= .value.Load()
= >> 1
return
}
// setFlushed sets the flushed flag. It returns the current cycle count and the
// previous value of the flushed flag.
func ( *mProfCycleHolder) () ( uint32, bool) {
for {
:= .value.Load()
= >> 1
= ( & 0x1) != 0
:= | 0x1
if .value.CompareAndSwap(, ) {
return ,
}
}
}
// increment increases the cycle count by one, wrapping the value at
// mProfCycleWrap. It clears the flushed flag.
func ( *mProfCycleHolder) () {
// We explicitly wrap mProfCycle rather than depending on
// uint wraparound because the memRecord.future ring does not
// itself wrap at a power of two.
for {
:= .value.Load()
:= >> 1
= ( + 1) % mProfCycleWrap
:= << 1
if .value.CompareAndSwap(, ) {
break
}
}
}
// newBucket allocates a bucket with the given type and number of stack entries.
func newBucket( bucketType, int) *bucket {
:= unsafe.Sizeof(bucket{}) + uintptr()*unsafe.Sizeof(uintptr(0))
switch {
default:
throw("invalid profile bucket type")
case memProfile:
+= unsafe.Sizeof(memRecord{})
case blockProfile, mutexProfile:
+= unsafe.Sizeof(blockRecord{})
}
:= (*bucket)(persistentalloc(, 0, &memstats.buckhash_sys))
.typ =
.nstk = uintptr()
return
}
// stk returns the slice in b holding the stack. The caller can assume that the
// backing array is immutable.
func ( *bucket) () []uintptr {
:= (*[maxProfStackDepth]uintptr)(add(unsafe.Pointer(), unsafe.Sizeof(*)))
if .nstk > maxProfStackDepth {
// prove that slicing works; otherwise a failure requires a P
throw("bad profile stack count")
}
return [:.nstk:.nstk]
}
// mp returns the memRecord associated with the memProfile bucket b.
func ( *bucket) () *memRecord {
if .typ != memProfile {
throw("bad use of bucket.mp")
}
:= add(unsafe.Pointer(), unsafe.Sizeof(*)+.nstk*unsafe.Sizeof(uintptr(0)))
return (*memRecord)()
}
// bp returns the blockRecord associated with the blockProfile bucket b.
func ( *bucket) () *blockRecord {
if .typ != blockProfile && .typ != mutexProfile {
throw("bad use of bucket.bp")
}
:= add(unsafe.Pointer(), unsafe.Sizeof(*)+.nstk*unsafe.Sizeof(uintptr(0)))
return (*blockRecord)()
}
// Return the bucket for stk[0:nstk], allocating new bucket if needed.
func stkbucket( bucketType, uintptr, []uintptr, bool) *bucket {
:= (*buckhashArray)(buckhash.Load())
if == nil {
lock(&profInsertLock)
// check again under the lock
= (*buckhashArray)(buckhash.Load())
if == nil {
= (*buckhashArray)(sysAlloc(unsafe.Sizeof(buckhashArray{}), &memstats.buckhash_sys))
if == nil {
throw("runtime: cannot allocate memory")
}
buckhash.StoreNoWB(unsafe.Pointer())
}
unlock(&profInsertLock)
}
// Hash stack.
var uintptr
for , := range {
+=
+= << 10
^= >> 6
}
// hash in size
+=
+= << 10
^= >> 6
// finalize
+= << 3
^= >> 11
:= int( % buckHashSize)
// first check optimistically, without the lock
for := (*bucket)([].Load()); != nil; = .next {
if .typ == && .hash == && .size == && eqslice(.stk(), ) {
return
}
}
if ! {
return nil
}
lock(&profInsertLock)
// check again under the insertion lock
for := (*bucket)([].Load()); != nil; = .next {
if .typ == && .hash == && .size == && eqslice(.stk(), ) {
unlock(&profInsertLock)
return
}
}
// Create new bucket.
:= newBucket(, len())
copy(.stk(), )
.hash =
.size =
var *atomic.UnsafePointer
if == memProfile {
= &mbuckets
} else if == mutexProfile {
= &xbuckets
} else {
= &bbuckets
}
.next = (*bucket)([].Load())
.allnext = (*bucket)(.Load())
[].StoreNoWB(unsafe.Pointer())
.StoreNoWB(unsafe.Pointer())
unlock(&profInsertLock)
return
}
func eqslice(, []uintptr) bool {
if len() != len() {
return false
}
for , := range {
if != [] {
return false
}
}
return true
}
// mProf_NextCycle publishes the next heap profile cycle and creates a
// fresh heap profile cycle. This operation is fast and can be done
// during STW. The caller must call mProf_Flush before calling
// mProf_NextCycle again.
//
// This is called by mark termination during STW so allocations and
// frees after the world is started again count towards a new heap
// profiling cycle.
func mProf_NextCycle() {
mProfCycle.increment()
}
// mProf_Flush flushes the events from the current heap profiling
// cycle into the active profile. After this it is safe to start a new
// heap profiling cycle with mProf_NextCycle.
//
// This is called by GC after mark termination starts the world. In
// contrast with mProf_NextCycle, this is somewhat expensive, but safe
// to do concurrently.
func mProf_Flush() {
, := mProfCycle.setFlushed()
if {
return
}
:= % uint32(len(memRecord{}.future))
lock(&profMemActiveLock)
lock(&profMemFutureLock[])
mProf_FlushLocked()
unlock(&profMemFutureLock[])
unlock(&profMemActiveLock)
}
// mProf_FlushLocked flushes the events from the heap profiling cycle at index
// into the active profile. The caller must hold the lock for the active profile
// (profMemActiveLock) and for the profiling cycle at index
// (profMemFutureLock[index]).
func mProf_FlushLocked( uint32) {
assertLockHeld(&profMemActiveLock)
assertLockHeld(&profMemFutureLock[])
:= (*bucket)(mbuckets.Load())
for := ; != nil; = .allnext {
:= .mp()
// Flush cycle C into the published profile and clear
// it for reuse.
:= &.future[]
.active.add()
* = memRecordCycle{}
}
}
// mProf_PostSweep records that all sweep frees for this GC cycle have
// completed. This has the effect of publishing the heap profile
// snapshot as of the last mark termination without advancing the heap
// profile cycle.
func mProf_PostSweep() {
// Flush cycle C+1 to the active profile so everything as of
// the last mark termination becomes visible. *Don't* advance
// the cycle, since we're still accumulating allocs in cycle
// C+2, which have to become C+1 in the next mark termination
// and so on.
:= mProfCycle.read() + 1
:= % uint32(len(memRecord{}.future))
lock(&profMemActiveLock)
lock(&profMemFutureLock[])
mProf_FlushLocked()
unlock(&profMemFutureLock[])
unlock(&profMemActiveLock)
}
// Called by malloc to record a profiled block.
func mProf_Malloc( *m, unsafe.Pointer, uintptr) {
if .profStack == nil {
// mp.profStack is nil if we happen to sample an allocation during the
// initialization of mp. This case is rare, so we just ignore such
// allocations. Change MemProfileRate to 1 if you need to reproduce such
// cases for testing purposes.
return
}
// Only use the part of mp.profStack we need and ignore the extra space
// reserved for delayed inline expansion with frame pointer unwinding.
:= callers(5, .profStack[:debug.profstackdepth])
:= (mProfCycle.read() + 2) % uint32(len(memRecord{}.future))
:= stkbucket(memProfile, , .profStack[:], true)
:= .mp()
:= &.future[]
lock(&profMemFutureLock[])
.allocs++
.alloc_bytes +=
unlock(&profMemFutureLock[])
// Setprofilebucket locks a bunch of other mutexes, so we call it outside of
// the profiler locks. This reduces potential contention and chances of
// deadlocks. Since the object must be alive during the call to
// mProf_Malloc, it's fine to do this non-atomically.
systemstack(func() {
setprofilebucket(, )
})
}
// Called when freeing a profiled block.
func mProf_Free( *bucket, uintptr) {
:= (mProfCycle.read() + 1) % uint32(len(memRecord{}.future))
:= .mp()
:= &.future[]
lock(&profMemFutureLock[])
.frees++
.free_bytes +=
unlock(&profMemFutureLock[])
}
var blockprofilerate uint64 // in CPU ticks
// SetBlockProfileRate controls the fraction of goroutine blocking events
// that are reported in the blocking profile. The profiler aims to sample
// an average of one blocking event per rate nanoseconds spent blocked.
//
// To include every blocking event in the profile, pass rate = 1.
// To turn off profiling entirely, pass rate <= 0.
func ( int) {
var int64
if <= 0 {
= 0 // disable profiling
} else if == 1 {
= 1 // profile everything
} else {
// convert ns to cycles, use float64 to prevent overflow during multiplication
= int64(float64() * float64(ticksPerSecond()) / (1000 * 1000 * 1000))
if == 0 {
= 1
}
}
atomic.Store64(&blockprofilerate, uint64())
}
func blockevent( int64, int) {
if <= 0 {
= 1
}
:= int64(atomic.Load64(&blockprofilerate))
if blocksampled(, ) {
saveblockevent(, , +1, blockProfile)
}
}
// blocksampled returns true for all events where cycles >= rate. Shorter
// events have a cycles/rate random chance of returning true.
func blocksampled(, int64) bool {
if <= 0 || ( > && cheaprand64()% > ) {
return false
}
return true
}
// saveblockevent records a profile event of the type specified by which.
// cycles is the quantity associated with this event and rate is the sampling rate,
// used to adjust the cycles value in the manner determined by the profile type.
// skip is the number of frames to omit from the traceback associated with the event.
// The traceback will be recorded from the stack of the goroutine associated with the current m.
// skip should be positive if this event is recorded from the current stack
// (e.g. when this is not called from a system stack)
func saveblockevent(, int64, int, bucketType) {
if debug.profstackdepth == 0 {
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
// can't record a stack trace.
return
}
if > maxSkip {
print("requested skip=", )
throw("invalid skip value")
}
:= getg()
:= acquirem() // we must not be preempted while accessing profstack
var int
if tracefpunwindoff() || .m.hasCgoOnStack() {
if .m.curg == nil || .m.curg == {
= callers(, .profStack)
} else {
= gcallers(.m.curg, , .profStack)
}
} else {
if .m.curg == nil || .m.curg == {
if > 0 {
// We skip one fewer frame than the provided value for frame
// pointer unwinding because the skip value includes the current
// frame, whereas the saved frame pointer will give us the
// caller's return address first (so, not including
// saveblockevent)
-= 1
}
= fpTracebackPartialExpand(, unsafe.Pointer(getfp()), .profStack)
} else {
.profStack[0] = .m.curg.sched.pc
= 1 + fpTracebackPartialExpand(, unsafe.Pointer(.m.curg.sched.bp), .profStack[1:])
}
}
saveBlockEventStack(, , .profStack[:], )
releasem()
}
// fpTracebackPartialExpand records a call stack obtained starting from fp.
// This function will skip the given number of frames, properly accounting for
// inlining, and save remaining frames as "physical" return addresses. The
// consumer should later use CallersFrames or similar to expand inline frames.
func fpTracebackPartialExpand( int, unsafe.Pointer, []uintptr) int {
var int
:= abi.FuncIDNormal
:= func( uintptr) bool {
if > 0 {
--
} else if < len() {
[] =
++
}
return < len()
}
for < len() && != nil {
// return addr sits one word above the frame pointer
:= *(*uintptr)(unsafe.Pointer(uintptr() + goarch.PtrSize))
if > 0 {
:= - 1
:= findfunc()
, := newInlineUnwinder(, )
for ; .valid(); = .next() {
:= .srcFunc()
if .funcID == abi.FuncIDWrapper && elideWrapperCalling() {
// ignore wrappers
} else if := (.pc + 1); ! {
return
}
= .funcID
}
} else {
// We've skipped the desired number of frames, so no need
// to perform further inline expansion now.
[] =
++
}
// follow the frame pointer to the next one
= unsafe.Pointer(*(*uintptr)())
}
return
}
// lockTimer assists with profiling contention on runtime-internal locks.
//
// There are several steps between the time that an M experiences contention and
// when that contention may be added to the profile. This comes from our
// constraints: We need to keep the critical section of each lock small,
// especially when those locks are contended. The reporting code cannot acquire
// new locks until the M has released all other locks, which means no memory
// allocations and encourages use of (temporary) M-local storage.
//
// The M will have space for storing one call stack that caused contention, and
// for the magnitude of that contention. It will also have space to store the
// magnitude of additional contention the M caused, since it only has space to
// remember one call stack and might encounter several contention events before
// it releases all of its locks and is thus able to transfer the local buffer
// into the profile.
//
// The M will collect the call stack when it unlocks the contended lock. That
// minimizes the impact on the critical section of the contended lock, and
// matches the mutex profile's behavior for contention in sync.Mutex: measured
// at the Unlock method.
//
// The profile for contention on sync.Mutex blames the caller of Unlock for the
// amount of contention experienced by the callers of Lock which had to wait.
// When there are several critical sections, this allows identifying which of
// them is responsible.
//
// Matching that behavior for runtime-internal locks will require identifying
// which Ms are blocked on the mutex. The semaphore-based implementation is
// ready to allow that, but the futex-based implementation will require a bit
// more work. Until then, we report contention on runtime-internal locks with a
// call stack taken from the unlock call (like the rest of the user-space
// "mutex" profile), but assign it a duration value based on how long the
// previous lock call took (like the user-space "block" profile).
//
// Thus, reporting the call stacks of runtime-internal lock contention is
// guarded by GODEBUG for now. Set GODEBUG=runtimecontentionstacks=1 to enable.
//
// TODO(rhysh): plumb through the delay duration, remove GODEBUG, update comment
//
// The M will track this by storing a pointer to the lock; lock/unlock pairs for
// runtime-internal locks are always on the same M.
//
// Together, that demands several steps for recording contention. First, when
// finally acquiring a contended lock, the M decides whether it should plan to
// profile that event by storing a pointer to the lock in its "to be profiled
// upon unlock" field. If that field is already set, it uses the relative
// magnitudes to weight a random choice between itself and the other lock, with
// the loser's time being added to the "additional contention" field. Otherwise
// if the M's call stack buffer is occupied, it does the comparison against that
// sample's magnitude.
//
// Second, having unlocked a mutex the M checks to see if it should capture the
// call stack into its local buffer. Finally, when the M unlocks its last mutex,
// it transfers the local buffer into the profile. As part of that step, it also
// transfers any "additional contention" time to the profile. Any lock
// contention that it experiences while adding samples to the profile will be
// recorded later as "additional contention" and not include a call stack, to
// avoid an echo.
type lockTimer struct {
lock *mutex
timeRate int64
timeStart int64
tickStart int64
}
func ( *lockTimer) () {
:= int64(atomic.Load64(&mutexprofilerate))
.timeRate = gTrackingPeriod
if != 0 && < .timeRate {
.timeRate =
}
if int64(cheaprand())%.timeRate == 0 {
.timeStart = nanotime()
}
if > 0 && int64(cheaprand())% == 0 {
.tickStart = cputicks()
}
}
func ( *lockTimer) () {
:= getg()
if .timeStart != 0 {
:= nanotime()
.m.mLockProfile.waitTime.Add(( - .timeStart) * .timeRate)
}
if .tickStart != 0 {
:= cputicks()
.m.mLockProfile.recordLock(-.tickStart, .lock)
}
}
type mLockProfile struct {
waitTime atomic.Int64 // total nanoseconds spent waiting in runtime.lockWithRank
stack []uintptr // stack that experienced contention in runtime.lockWithRank
pending uintptr // *mutex that experienced contention (to be traceback-ed)
cycles int64 // cycles attributable to "pending" (if set), otherwise to "stack"
cyclesLost int64 // contention for which we weren't able to record a call stack
haveStack bool // stack and cycles are to be added to the mutex profile
disabled bool // attribute all time to "lost"
}
func ( *mLockProfile) ( int64, *mutex) {
if < 0 {
= 0
}
if .disabled {
// We're experiencing contention while attempting to report contention.
// Make a note of its magnitude, but don't allow it to be the sole cause
// of another contention report.
.cyclesLost +=
return
}
if uintptr(unsafe.Pointer()) == .pending {
// Optimization: we'd already planned to profile this same lock (though
// possibly from a different unlock site).
.cycles +=
return
}
if := .cycles; > 0 {
// We can only store one call stack for runtime-internal lock contention
// on this M, and we've already got one. Decide which should stay, and
// add the other to the report for runtime._LostContendedRuntimeLock.
if == 0 {
return
}
:= uint64(cheaprand64()) % uint64()
:= uint64(cheaprand64()) % uint64()
if > {
.cyclesLost +=
return
} else {
.cyclesLost +=
}
}
// Saving the *mutex as a uintptr is safe because:
// - lockrank_on.go does this too, which gives it regular exercise
// - the lock would only move if it's stack allocated, which means it
// cannot experience multi-M contention
.pending = uintptr(unsafe.Pointer())
.cycles =
}
// From unlock2, we might not be holding a p in this code.
//
//go:nowritebarrierrec
func ( *mLockProfile) ( *mutex) {
if uintptr(unsafe.Pointer()) == .pending {
.captureStack()
}
if := getg(); .m.locks == 1 && .m.mLockProfile.haveStack {
.store()
}
}
func ( *mLockProfile) () {
if debug.profstackdepth == 0 {
// profstackdepth is set to 0 by the user, so mp.profStack is nil and we
// can't record a stack trace.
return
}
:= 3 // runtime.(*mLockProfile).recordUnlock runtime.unlock2 runtime.unlockWithRank
if staticLockRanking {
// When static lock ranking is enabled, we'll always be on the system
// stack at this point. There will be a runtime.unlockWithRank.func1
// frame, and if the call to runtime.unlock took place on a user stack
// then there'll also be a runtime.systemstack frame. To keep stack
// traces somewhat consistent whether or not static lock ranking is
// enabled, we'd like to skip those. But it's hard to tell how long
// we've been on the system stack so accept an extra frame in that case,
// with a leaf of "runtime.unlockWithRank runtime.unlock" instead of
// "runtime.unlock".
+= 1 // runtime.unlockWithRank.func1
}
.pending = 0
.haveStack = true
.stack[0] = logicalStackSentinel
if debug.runtimeContentionStacks.Load() == 0 {
.stack[1] = abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum
.stack[2] = 0
return
}
var int
:= getg()
:= sys.GetCallerSP()
:= sys.GetCallerPC()
systemstack(func() {
var unwinder
.initAt(, , 0, , unwindSilentErrors|unwindJumpStack)
= 1 + tracebackPCs(&, , .stack[1:])
})
if < len(.stack) {
.stack[] = 0
}
}
func ( *mLockProfile) () {
// Report any contention we experience within this function as "lost"; it's
// important that the act of reporting a contention event not lead to a
// reportable contention event. This also means we can use prof.stack
// without copying, since it won't change during this function.
:= acquirem()
.disabled = true
:= int(debug.profstackdepth)
for := 0; < ; ++ {
if := .stack[]; == 0 {
=
break
}
}
, := .cycles, .cyclesLost
.cycles, .cyclesLost = 0, 0
.haveStack = false
:= int64(atomic.Load64(&mutexprofilerate))
saveBlockEventStack(, , .stack[:], mutexProfile)
if > 0 {
:= [...]uintptr{
logicalStackSentinel,
abi.FuncPCABIInternal(_LostContendedRuntimeLock) + sys.PCQuantum,
}
saveBlockEventStack(, , [:], mutexProfile)
}
.disabled = false
releasem()
}
func saveBlockEventStack(, int64, []uintptr, bucketType) {
:= stkbucket(, 0, , true)
:= .bp()
lock(&profBlockLock)
// We want to up-scale the count and cycles according to the
// probability that the event was sampled. For block profile events,
// the sample probability is 1 if cycles >= rate, and cycles / rate
// otherwise. For mutex profile events, the sample probability is 1 / rate.
// We scale the events by 1 / (probability the event was sampled).
if == blockProfile && < {
// Remove sampling bias, see discussion on http://golang.org/cl/299991.
.count += float64() / float64()
.cycles +=
} else if == mutexProfile {
.count += float64()
.cycles += *
} else {
.count++
.cycles +=
}
unlock(&profBlockLock)
}
var mutexprofilerate uint64 // fraction sampled
// SetMutexProfileFraction controls the fraction of mutex contention events
// that are reported in the mutex profile. On average 1/rate events are
// reported. The previous rate is returned.
//
// To turn off profiling entirely, pass rate 0.
// To just read the current rate, pass rate < 0.
// (For n>1 the details of sampling may change.)
func ( int) int {
if < 0 {
return int(mutexprofilerate)
}
:= mutexprofilerate
atomic.Store64(&mutexprofilerate, uint64())
return int()
}
//go:linkname mutexevent sync.event
func mutexevent( int64, int) {
if < 0 {
= 0
}
:= int64(atomic.Load64(&mutexprofilerate))
if > 0 && cheaprand64()% == 0 {
saveblockevent(, , +1, mutexProfile)
}
}
// Go interface to profile data.
// A StackRecord describes a single execution stack.
type StackRecord struct {
Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
}
// Stack returns the stack trace associated with the record,
// a prefix of r.Stack0.
func ( *StackRecord) () []uintptr {
for , := range .Stack0 {
if == 0 {
return .Stack0[0:]
}
}
return .Stack0[0:]
}
// MemProfileRate controls the fraction of memory allocations
// that are recorded and reported in the memory profile.
// The profiler aims to sample an average of
// one allocation per MemProfileRate bytes allocated.
//
// To include every allocated block in the profile, set MemProfileRate to 1.
// To turn off profiling entirely, set MemProfileRate to 0.
//
// The tools that process the memory profiles assume that the
// profile rate is constant across the lifetime of the program
// and equal to the current value. Programs that change the
// memory profiling rate should do so just once, as early as
// possible in the execution of the program (for example,
// at the beginning of main).
var MemProfileRate int = 512 * 1024
// disableMemoryProfiling is set by the linker if memory profiling
// is not used and the link type guarantees nobody else could use it
// elsewhere.
// We check if the runtime.memProfileInternal symbol is present.
var disableMemoryProfiling bool
// A MemProfileRecord describes the live objects allocated
// by a particular call sequence (stack trace).
type MemProfileRecord struct {
AllocBytes, FreeBytes int64 // number of bytes allocated, freed
AllocObjects, FreeObjects int64 // number of objects allocated, freed
Stack0 [32]uintptr // stack trace for this record; ends at first 0 entry
}
// InUseBytes returns the number of bytes in use (AllocBytes - FreeBytes).
func ( *MemProfileRecord) () int64 { return .AllocBytes - .FreeBytes }
// InUseObjects returns the number of objects in use (AllocObjects - FreeObjects).
func ( *MemProfileRecord) () int64 {
return .AllocObjects - .FreeObjects
}
// Stack returns the stack trace associated with the record,
// a prefix of r.Stack0.
func ( *MemProfileRecord) () []uintptr {
for , := range .Stack0 {
if == 0 {
return .Stack0[0:]
}
}
return .Stack0[0:]
}
// MemProfile returns a profile of memory allocated and freed per allocation
// site.
//
// MemProfile returns n, the number of records in the current memory profile.
// If len(p) >= n, MemProfile copies the profile into p and returns n, true.
// If len(p) < n, MemProfile does not change p and returns n, false.
//
// If inuseZero is true, the profile includes allocation records
// where r.AllocBytes > 0 but r.AllocBytes == r.FreeBytes.
// These are sites where memory was allocated, but it has all
// been released back to the runtime.
//
// The returned profile may be up to two garbage collection cycles old.
// This is to avoid skewing the profile toward allocations; because
// allocations happen in real time but frees are delayed until the garbage
// collector performs sweeping, the profile only accounts for allocations
// that have had a chance to be freed by the garbage collector.
//
// Most clients should use the runtime/pprof package or
// the testing package's -test.memprofile flag instead
// of calling MemProfile directly.
func ( []MemProfileRecord, bool) ( int, bool) {
return memProfileInternal(len(), , func( profilerecord.MemProfileRecord) {
copyMemProfileRecord(&[0], )
= [1:]
})
}
// memProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
//
// The linker set disableMemoryProfiling to true to disable memory profiling
// if this function is not reachable. Mark it noinline to ensure the symbol exists.
// (This function is big and normally not inlined anyway.)
// See also disableMemoryProfiling above and cmd/link/internal/ld/lib.go:linksetup.
//
//go:noinline
func memProfileInternal( int, bool, func(profilerecord.MemProfileRecord)) ( int, bool) {
:= mProfCycle.read()
// If we're between mProf_NextCycle and mProf_Flush, take care
// of flushing to the active profile so we only have to look
// at the active profile below.
:= % uint32(len(memRecord{}.future))
lock(&profMemActiveLock)
lock(&profMemFutureLock[])
mProf_FlushLocked()
unlock(&profMemFutureLock[])
:= true
:= (*bucket)(mbuckets.Load())
for := ; != nil; = .allnext {
:= .mp()
if || .active.alloc_bytes != .active.free_bytes {
++
}
if .active.allocs != 0 || .active.frees != 0 {
= false
}
}
if {
// Absolutely no data, suggesting that a garbage collection
// has not yet happened. In order to allow profiling when
// garbage collection is disabled from the beginning of execution,
// accumulate all of the cycles, and recount buckets.
= 0
for := ; != nil; = .allnext {
:= .mp()
for := range .future {
lock(&profMemFutureLock[])
.active.add(&.future[])
.future[] = memRecordCycle{}
unlock(&profMemFutureLock[])
}
if || .active.alloc_bytes != .active.free_bytes {
++
}
}
}
if <= {
= true
for := ; != nil; = .allnext {
:= .mp()
if || .active.alloc_bytes != .active.free_bytes {
:= profilerecord.MemProfileRecord{
AllocBytes: int64(.active.alloc_bytes),
FreeBytes: int64(.active.free_bytes),
AllocObjects: int64(.active.allocs),
FreeObjects: int64(.active.frees),
Stack: .stk(),
}
()
}
}
}
unlock(&profMemActiveLock)
return
}
func copyMemProfileRecord( *MemProfileRecord, profilerecord.MemProfileRecord) {
.AllocBytes = .AllocBytes
.FreeBytes = .FreeBytes
.AllocObjects = .AllocObjects
.FreeObjects = .FreeObjects
if raceenabled {
racewriterangepc(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0), sys.GetCallerPC(), abi.FuncPCABIInternal(MemProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0))
}
if asanenabled {
asanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0))
}
:= copy(.Stack0[:], .Stack)
clear(.Stack0[:])
}
//go:linkname pprof_memProfileInternal
func pprof_memProfileInternal( []profilerecord.MemProfileRecord, bool) ( int, bool) {
return memProfileInternal(len(), , func( profilerecord.MemProfileRecord) {
[0] =
= [1:]
})
}
func iterate_memprof( func(*bucket, uintptr, *uintptr, uintptr, uintptr, uintptr)) {
lock(&profMemActiveLock)
:= (*bucket)(mbuckets.Load())
for := ; != nil; = .allnext {
:= .mp()
(, .nstk, &.stk()[0], .size, .active.allocs, .active.frees)
}
unlock(&profMemActiveLock)
}
// BlockProfileRecord describes blocking events originated
// at a particular call sequence (stack trace).
type BlockProfileRecord struct {
Count int64
Cycles int64
StackRecord
}
// BlockProfile returns n, the number of records in the current blocking profile.
// If len(p) >= n, BlockProfile copies the profile into p and returns n, true.
// If len(p) < n, BlockProfile does not change p and returns n, false.
//
// Most clients should use the [runtime/pprof] package or
// the [testing] package's -test.blockprofile flag instead
// of calling BlockProfile directly.
func ( []BlockProfileRecord) ( int, bool) {
var int
, = blockProfileInternal(len(), func( profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&[], )
++
})
if {
expandFrames([:])
}
return
}
func expandFrames( []BlockProfileRecord) {
:= makeProfStack()
for := range {
:= CallersFrames([].Stack())
:= 0
for < len() {
, := .Next()
// f.PC is a "call PC", but later consumers will expect
// "return PCs"
[] = .PC + 1
++
if ! {
break
}
}
:= copy([].Stack0[:], [:])
clear([].Stack0[:])
}
}
// blockProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
func blockProfileInternal( int, func(profilerecord.BlockProfileRecord)) ( int, bool) {
lock(&profBlockLock)
:= (*bucket)(bbuckets.Load())
for := ; != nil; = .allnext {
++
}
if <= {
= true
for := ; != nil; = .allnext {
:= .bp()
:= profilerecord.BlockProfileRecord{
Count: int64(.count),
Cycles: .cycles,
Stack: .stk(),
}
// Prevent callers from having to worry about division by zero errors.
// See discussion on http://golang.org/cl/299991.
if .Count == 0 {
.Count = 1
}
()
}
}
unlock(&profBlockLock)
return
}
// copyBlockProfileRecord copies the sample values and call stack from src to dst.
// The call stack is copied as-is. The caller is responsible for handling inline
// expansion, needed when the call stack was collected with frame pointer unwinding.
func copyBlockProfileRecord( *BlockProfileRecord, profilerecord.BlockProfileRecord) {
.Count = .Count
.Cycles = .Cycles
if raceenabled {
racewriterangepc(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0), sys.GetCallerPC(), abi.FuncPCABIInternal(BlockProfile))
}
if msanenabled {
msanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0))
}
if asanenabled {
asanwrite(unsafe.Pointer(&.Stack0[0]), unsafe.Sizeof(.Stack0))
}
// We just copy the stack here without inline expansion
// (needed if frame pointer unwinding is used)
// since this function is called under the profile lock,
// and doing something that might allocate can violate lock ordering.
:= copy(.Stack0[:], .Stack)
clear(.Stack0[:])
}
//go:linkname pprof_blockProfileInternal
func pprof_blockProfileInternal( []profilerecord.BlockProfileRecord) ( int, bool) {
return blockProfileInternal(len(), func( profilerecord.BlockProfileRecord) {
[0] =
= [1:]
})
}
// MutexProfile returns n, the number of records in the current mutex profile.
// If len(p) >= n, MutexProfile copies the profile into p and returns n, true.
// Otherwise, MutexProfile does not change p, and returns n, false.
//
// Most clients should use the [runtime/pprof] package
// instead of calling MutexProfile directly.
func ( []BlockProfileRecord) ( int, bool) {
var int
, = mutexProfileInternal(len(), func( profilerecord.BlockProfileRecord) {
copyBlockProfileRecord(&[], )
++
})
if {
expandFrames([:])
}
return
}
// mutexProfileInternal returns the number of records n in the profile. If there
// are less than size records, copyFn is invoked for each record, and ok returns
// true.
func mutexProfileInternal( int, func(profilerecord.BlockProfileRecord)) ( int, bool) {
lock(&profBlockLock)
:= (*bucket)(xbuckets.Load())
for := ; != nil; = .allnext {
++
}
if <= {
= true
for := ; != nil; = .allnext {
:= .bp()
:= profilerecord.BlockProfileRecord{
Count: int64(.count),
Cycles: .cycles,
Stack: .stk(),
}
()
}
}
unlock(&profBlockLock)
return
}
//go:linkname pprof_mutexProfileInternal
func pprof_mutexProfileInternal( []profilerecord.BlockProfileRecord) ( int, bool) {
return mutexProfileInternal(len(), func( profilerecord.BlockProfileRecord) {
[0] =
= [1:]
})
}
// ThreadCreateProfile returns n, the number of records in the thread creation profile.
// If len(p) >= n, ThreadCreateProfile copies the profile into p and returns n, true.
// If len(p) < n, ThreadCreateProfile does not change p and returns n, false.
//
// Most clients should use the runtime/pprof package instead
// of calling ThreadCreateProfile directly.
func ( []StackRecord) ( int, bool) {
return threadCreateProfileInternal(len(), func( profilerecord.StackRecord) {
:= copy([0].Stack0[:], .Stack)
clear([0].Stack0[:])
= [1:]
})
}
// threadCreateProfileInternal returns the number of records n in the profile.
// If there are less than size records, copyFn is invoked for each record, and
// ok returns true.
func threadCreateProfileInternal( int, func(profilerecord.StackRecord)) ( int, bool) {
:= (*m)(atomic.Loadp(unsafe.Pointer(&allm)))
for := ; != nil; = .alllink {
++
}
if <= {
= true
for := ; != nil; = .alllink {
:= profilerecord.StackRecord{Stack: .createstack[:]}
()
}
}
return
}
//go:linkname pprof_threadCreateInternal
func pprof_threadCreateInternal( []profilerecord.StackRecord) ( int, bool) {
return threadCreateProfileInternal(len(), func( profilerecord.StackRecord) {
[0] =
= [1:]
})
}
//go:linkname pprof_goroutineProfileWithLabels
func pprof_goroutineProfileWithLabels( []profilerecord.StackRecord, []unsafe.Pointer) ( int, bool) {
return goroutineProfileWithLabels(, )
}
// labels may be nil. If labels is non-nil, it must have the same length as p.
func goroutineProfileWithLabels( []profilerecord.StackRecord, []unsafe.Pointer) ( int, bool) {
if != nil && len() != len() {
= nil
}
return goroutineProfileWithLabelsConcurrent(, )
}
var goroutineProfile = struct {
sema uint32
active bool
offset atomic.Int64
records []profilerecord.StackRecord
labels []unsafe.Pointer
}{
sema: 1,
}
// goroutineProfileState indicates the status of a goroutine's stack for the
// current in-progress goroutine profile. Goroutines' stacks are initially
// "Absent" from the profile, and end up "Satisfied" by the time the profile is
// complete. While a goroutine's stack is being captured, its
// goroutineProfileState will be "InProgress" and it will not be able to run
// until the capture completes and the state moves to "Satisfied".
//
// Some goroutines (the finalizer goroutine, which at various times can be
// either a "system" or a "user" goroutine, and the goroutine that is
// coordinating the profile, any goroutines created during the profile) move
// directly to the "Satisfied" state.
type goroutineProfileState uint32
const (
goroutineProfileAbsent goroutineProfileState = iota
goroutineProfileInProgress
goroutineProfileSatisfied
)
type goroutineProfileStateHolder atomic.Uint32
func ( *goroutineProfileStateHolder) () goroutineProfileState {
return goroutineProfileState((*atomic.Uint32)().Load())
}
func ( *goroutineProfileStateHolder) ( goroutineProfileState) {
(*atomic.Uint32)().Store(uint32())
}
func ( *goroutineProfileStateHolder) (, goroutineProfileState) bool {
return (*atomic.Uint32)().CompareAndSwap(uint32(), uint32())
}
func goroutineProfileWithLabelsConcurrent( []profilerecord.StackRecord, []unsafe.Pointer) ( int, bool) {
if len() == 0 {
// An empty slice is obviously too small. Return a rough
// allocation estimate without bothering to STW. As long as
// this is close, then we'll only need to STW once (on the next
// call).
return int(gcount()), false
}
semacquire(&goroutineProfile.sema)
:= getg()
:= makeProfStack() // see saveg() for explanation
:= stopTheWorld(stwGoroutineProfile)
// Using gcount while the world is stopped should give us a consistent view
// of the number of live goroutines, minus the number of goroutines that are
// alive and permanently marked as "system". But to make this count agree
// with what we'd get from isSystemGoroutine, we need special handling for
// goroutines that can vary between user and system to ensure that the count
// doesn't change during the collection. So, check the finalizer goroutine
// in particular.
= int(gcount())
if fingStatus.Load()&fingRunningFinalizer != 0 {
++
}
if > len() {
// There's not enough space in p to store the whole profile, so (per the
// contract of runtime.GoroutineProfile) we're not allowed to write to p
// at all and must return n, false.
startTheWorld()
semrelease(&goroutineProfile.sema)
return , false
}
// Save current goroutine.
:= sys.GetCallerSP()
:= sys.GetCallerPC()
systemstack(func() {
saveg(, , , &[0], )
})
if != nil {
[0] = .labels
}
.goroutineProfiled.Store(goroutineProfileSatisfied)
goroutineProfile.offset.Store(1)
// Prepare for all other goroutines to enter the profile. Aside from ourg,
// every goroutine struct in the allgs list has its goroutineProfiled field
// cleared. Any goroutine created from this point on (while
// goroutineProfile.active is set) will start with its goroutineProfiled
// field set to goroutineProfileSatisfied.
goroutineProfile.active = true
goroutineProfile.records =
goroutineProfile.labels =
// The finalizer goroutine needs special handling because it can vary over
// time between being a user goroutine (eligible for this profile) and a
// system goroutine (to be excluded). Pick one before restarting the world.
if fing != nil {
fing.goroutineProfiled.Store(goroutineProfileSatisfied)
if readgstatus(fing) != _Gdead && !isSystemGoroutine(fing, false) {
doRecordGoroutineProfile(fing, )
}
}
startTheWorld()
// Visit each goroutine that existed as of the startTheWorld call above.
//
// New goroutines may not be in this list, but we didn't want to know about
// them anyway. If they do appear in this list (via reusing a dead goroutine
// struct, or racing to launch between the world restarting and us getting
// the list), they will already have their goroutineProfiled field set to
// goroutineProfileSatisfied before their state transitions out of _Gdead.
//
// Any goroutine that the scheduler tries to execute concurrently with this
// call will start by adding itself to the profile (before the act of
// executing can cause any changes in its stack).
forEachGRace(func( *g) {
tryRecordGoroutineProfile(, , Gosched)
})
= stopTheWorld(stwGoroutineProfileCleanup)
:= goroutineProfile.offset.Swap(0)
goroutineProfile.active = false
goroutineProfile.records = nil
goroutineProfile.labels = nil
startTheWorld()
// Restore the invariant that every goroutine struct in allgs has its
// goroutineProfiled field cleared.
forEachGRace(func( *g) {
.goroutineProfiled.Store(goroutineProfileAbsent)
})
if raceenabled {
raceacquire(unsafe.Pointer(&labelSync))
}
if != int() {
// It's a big surprise that the number of goroutines changed while we
// were collecting the profile. But probably better to return a
// truncated profile than to crash the whole process.
//
// For instance, needm moves a goroutine out of the _Gdead state and so
// might be able to change the goroutine count without interacting with
// the scheduler. For code like that, the race windows are small and the
// combination of features is uncommon, so it's hard to be (and remain)
// sure we've caught them all.
}
semrelease(&goroutineProfile.sema)
return , true
}
// tryRecordGoroutineProfileWB asserts that write barriers are allowed and calls
// tryRecordGoroutineProfile.
//
//go:yeswritebarrierrec
func tryRecordGoroutineProfileWB( *g) {
if getg().m.p.ptr() == nil {
throw("no P available, write barriers are forbidden")
}
tryRecordGoroutineProfile(, nil, osyield)
}
// tryRecordGoroutineProfile ensures that gp1 has the appropriate representation
// in the current goroutine profile: either that it should not be profiled, or
// that a snapshot of its call stack and labels are now in the profile.
func tryRecordGoroutineProfile( *g, []uintptr, func()) {
if readgstatus() == _Gdead {
// Dead goroutines should not appear in the profile. Goroutines that
// start while profile collection is active will get goroutineProfiled
// set to goroutineProfileSatisfied before transitioning out of _Gdead,
// so here we check _Gdead first.
return
}
if isSystemGoroutine(, true) {
// System goroutines should not appear in the profile. (The finalizer
// goroutine is marked as "already profiled".)
return
}
for {
:= .goroutineProfiled.Load()
if == goroutineProfileSatisfied {
// This goroutine is already in the profile (or is new since the
// start of collection, so shouldn't appear in the profile).
break
}
if == goroutineProfileInProgress {
// Something else is adding gp1 to the goroutine profile right now.
// Give that a moment to finish.
()
continue
}
// While we have gp1.goroutineProfiled set to
// goroutineProfileInProgress, gp1 may appear _Grunnable but will not
// actually be able to run. Disable preemption for ourselves, to make
// sure we finish profiling gp1 right away instead of leaving it stuck
// in this limbo.
:= acquirem()
if .goroutineProfiled.CompareAndSwap(goroutineProfileAbsent, goroutineProfileInProgress) {
doRecordGoroutineProfile(, )
.goroutineProfiled.Store(goroutineProfileSatisfied)
}
releasem()
}
}
// doRecordGoroutineProfile writes gp1's call stack and labels to an in-progress
// goroutine profile. Preemption is disabled.
//
// This may be called via tryRecordGoroutineProfile in two ways: by the
// goroutine that is coordinating the goroutine profile (running on its own
// stack), or from the scheduler in preparation to execute gp1 (running on the
// system stack).
func doRecordGoroutineProfile( *g, []uintptr) {
if readgstatus() == _Grunning {
print("doRecordGoroutineProfile gp1=", .goid, "\n")
throw("cannot read stack of running goroutine")
}
:= int(goroutineProfile.offset.Add(1)) - 1
if >= len(goroutineProfile.records) {
// Should be impossible, but better to return a truncated profile than
// to crash the entire process at this point. Instead, deal with it in
// goroutineProfileWithLabelsConcurrent where we have more context.
return
}
// saveg calls gentraceback, which may call cgo traceback functions. When
// called from the scheduler, this is on the system stack already so
// traceback.go:cgoContextPCs will avoid calling back into the scheduler.
//
// When called from the goroutine coordinating the profile, we still have
// set gp1.goroutineProfiled to goroutineProfileInProgress and so are still
// preventing it from being truly _Grunnable. So we'll use the system stack
// to avoid schedule delays.
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), , &goroutineProfile.records[], ) })
if goroutineProfile.labels != nil {
goroutineProfile.labels[] = .labels
}
}
func goroutineProfileWithLabelsSync( []profilerecord.StackRecord, []unsafe.Pointer) ( int, bool) {
:= getg()
:= func( *g) bool {
// Checking isSystemGoroutine here makes GoroutineProfile
// consistent with both NumGoroutine and Stack.
return != && readgstatus() != _Gdead && !isSystemGoroutine(, false)
}
:= makeProfStack() // see saveg() for explanation
:= stopTheWorld(stwGoroutineProfile)
// World is stopped, no locking required.
= 1
forEachGRace(func( *g) {
if () {
++
}
})
if <= len() {
= true
, := ,
// Save current goroutine.
:= sys.GetCallerSP()
:= sys.GetCallerPC()
systemstack(func() {
saveg(, , , &[0], )
})
= [1:]
// If we have a place to put our goroutine labelmap, insert it there.
if != nil {
[0] = .labels
= [1:]
}
// Save other goroutines.
forEachGRace(func( *g) {
if !() {
return
}
if len() == 0 {
// Should be impossible, but better to return a
// truncated profile than to crash the entire process.
return
}
// saveg calls gentraceback, which may call cgo traceback functions.
// The world is stopped, so it cannot use cgocall (which will be
// blocked at exitsyscall). Do it on the system stack so it won't
// call into the schedular (see traceback.go:cgoContextPCs).
systemstack(func() { saveg(^uintptr(0), ^uintptr(0), , &[0], ) })
if != nil {
[0] = .labels
= [1:]
}
= [1:]
})
}
if raceenabled {
raceacquire(unsafe.Pointer(&labelSync))
}
startTheWorld()
return ,
}
// GoroutineProfile returns n, the number of records in the active goroutine stack profile.
// If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
// If len(p) < n, GoroutineProfile does not change p and returns n, false.
//
// Most clients should use the [runtime/pprof] package instead
// of calling GoroutineProfile directly.
func ( []StackRecord) ( int, bool) {
:= make([]profilerecord.StackRecord, len())
, = goroutineProfileInternal()
if ! {
return
}
for , := range [0:] {
:= copy([].Stack0[:], .Stack)
clear([].Stack0[:])
}
return
}
func goroutineProfileInternal( []profilerecord.StackRecord) ( int, bool) {
return goroutineProfileWithLabels(, nil)
}
func saveg(, uintptr, *g, *profilerecord.StackRecord, []uintptr) {
// To reduce memory usage, we want to allocate a r.Stack that is just big
// enough to hold gp's stack trace. Naively we might achieve this by
// recording our stack trace into mp.profStack, and then allocating a
// r.Stack of the right size. However, mp.profStack is also used for
// allocation profiling, so it could get overwritten if the slice allocation
// gets profiled. So instead we record the stack trace into a temporary
// pcbuf which is usually given to us by our caller. When it's not, we have
// to allocate one here. This will only happen for goroutines that were in a
// syscall when the goroutine profile started or for goroutines that manage
// to execute before we finish iterating over all the goroutines.
if == nil {
= makeProfStack()
}
var unwinder
.initAt(, , 0, , unwindSilentErrors)
:= tracebackPCs(&, 0, )
.Stack = make([]uintptr, )
copy(.Stack, )
}
// Stack formats a stack trace of the calling goroutine into buf
// and returns the number of bytes written to buf.
// If all is true, Stack formats stack traces of all other goroutines
// into buf after the trace for the current goroutine.
func ( []byte, bool) int {
var worldStop
if {
= stopTheWorld(stwAllGoroutinesStack)
}
:= 0
if len() > 0 {
:= getg()
:= sys.GetCallerSP()
:= sys.GetCallerPC()
systemstack(func() {
:= getg()
// Force traceback=1 to override GOTRACEBACK setting,
// so that Stack's results are consistent.
// GOTRACEBACK is only about crash dumps.
.m.traceback = 1
.writebuf = [0:0:len()]
goroutineheader()
traceback(, , 0, )
if {
tracebackothers()
}
.m.traceback = 0
= len(.writebuf)
.writebuf = nil
})
}
if {
startTheWorld()
}
return
}
The pages are generated with Golds v0.7.3. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |