Source File
lock_spinbit.go
Belonging Package
runtime
// Copyright 2024 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.//go:build !wasmpackage runtimeimport ()// This implementation depends on OS-specific implementations of//// func semacreate(mp *m)// Create a semaphore for mp, if it does not already have one.//// func semasleep(ns int64) int32// If ns < 0, acquire m's semaphore and return 0.// If ns >= 0, try to acquire m's semaphore for at most ns nanoseconds.// Return 0 if the semaphore was acquired, -1 if interrupted or timed out.//// func semawakeup(mp *m)// Wake up mp, which is or will soon be sleeping on its semaphore.// The mutex state consists of four flags and a pointer. The flag at bit 0,// mutexLocked, represents the lock itself. Bit 1, mutexSleeping, is a hint that// the pointer is non-nil. The fast paths for locking and unlocking the mutex// are based on atomic 8-bit swap operations on the low byte; bits 2 through 7// are unused.//// Bit 8, mutexSpinning, is a try-lock that grants a waiting M permission to// spin on the state word. Most other Ms must attempt to spend their time// sleeping to reduce traffic on the cache line. This is the "spin bit" for// which the implementation is named. (The anti-starvation mechanism also grants// temporary permission for an M to spin.)//// Bit 9, mutexStackLocked, is a try-lock that grants an unlocking M permission// to inspect the list of waiting Ms and to pop an M off of that stack.//// The upper bits hold a (partial) pointer to the M that most recently went to// sleep. The sleeping Ms form a stack linked by their mWaitList.next fields.// Because the fast paths use an 8-bit swap on the low byte of the state word,// we'll need to reconstruct the full M pointer from the bits we have. Most Ms// are allocated on the heap, and have a known alignment and base offset. (The// offset is due to mallocgc's allocation headers.) The main program thread uses// a static M value, m0. We check for m0 specifically and add a known offset// otherwise.const (active_spin = 4 // referenced in proc.go for sync.Mutex implementationactive_spin_cnt = 30 // referenced in proc.go for sync.Mutex implementation)const (mutexLocked = 0x001mutexSleeping = 0x002mutexSpinning = 0x100mutexStackLocked = 0x200mutexMMask = 0x3FFmutexMOffset = gc.MallocHeaderSize // alignment of heap-allocated Ms (those other than m0)mutexActiveSpinCount = 4mutexActiveSpinSize = 30mutexPassiveSpinCount = 1mutexTailWakePeriod = 16)//go:nosplitfunc key8( *uintptr) *uint8 {if goarch.BigEndian {return &(*[8]uint8)(unsafe.Pointer())[goarch.PtrSize/1-1]}return &(*[8]uint8)(unsafe.Pointer())[0]}// mWaitList is part of the M struct, and holds the list of Ms that are waiting// for a particular runtime.mutex.//// When an M is unable to immediately obtain a lock, it adds itself to the list// of Ms waiting for the lock. It does that via this struct's next field,// forming a singly-linked list with the mutex's key field pointing to the head// of the list.type mWaitList struct {next muintptr // next m waiting for lockstartTicks int64 // when this m started waiting for the current lock holder, in cputicks}// lockVerifyMSize confirms that we can recreate the low bits of the M pointer.func lockVerifyMSize() {:= roundupsize(unsafe.Sizeof(mPadded{}), false) + gc.MallocHeaderSizeif &mutexMMask != 0 {print("M structure uses sizeclass ", , "/", hex(), " bytes; ","incompatible with mutex flag mask ", hex(mutexMMask), "\n")throw("runtime.m memory alignment too small for spinbit mutex")}}// mutexWaitListHead recovers a full muintptr that was missing its low bits.// With the exception of the static m0 value, it requires allocating runtime.m// values in a size class with a particular minimum alignment. The 2048-byte// size class allows recovering the full muintptr value even after overwriting// the low 11 bits with flags. We can use those 11 bits as 3 flags and an// atomically-swapped byte.////go:nosplitfunc mutexWaitListHead( uintptr) muintptr {if := &^ mutexMMask; == 0 {return 0} else if := muintptr(unsafe.Pointer(&m0)); == uintptr()&^mutexMMask {return} else {return muintptr( + mutexMOffset)}}// mutexPreferLowLatency reports if this mutex prefers low latency at the risk// of performance collapse. If so, we can allow all waiting threads to spin on// the state word rather than go to sleep.//// TODO: We could have the waiting Ms each spin on their own private cache line,// especially if we can put a bound on the on-CPU time that would consume.//// TODO: If there's a small set of mutex values with special requirements, they// could make use of a more specialized lock2/unlock2 implementation. Otherwise,// we're constrained to what we can fit within a single uintptr with no// additional storage on the M for each lock held.////go:nosplitfunc mutexPreferLowLatency( *mutex) bool {switch {default:return falsecase &sched.lock:// We often expect sched.lock to pass quickly between Ms in a way that// each M has unique work to do: for instance when we stop-the-world// (bringing each P to idle) or add new netpoller-triggered work to the// global run queue.return true}}func mutexContended( *mutex) bool {return atomic.Loaduintptr(&.key)&^mutexMMask != 0}func lock( *mutex) {lockWithRank(, getLockRank())}func lock2( *mutex) {:= getg()if .m.locks < 0 {throw("runtime·lock: lock count")}.m.locks++:= key8(&.key)// Speculative grab for lock.:= atomic.Xchg8(, mutexLocked)if &mutexLocked == 0 {if &mutexSleeping != 0 {atomic.Or8(, mutexSleeping)}return}semacreate(.m)var int64// On uniprocessors, no point spinning.// On multiprocessors, spin for mutexActiveSpinCount attempts.:= 0if numCPUStartup > 1 {= mutexActiveSpinCount}var , , bool:= atomic.Loaduintptr(&.key):for := 0; ; ++ {if &mutexLocked == 0 {if {:= ( &^ mutexSpinning) | mutexSleeping | mutexLockedif &^mutexMMask == 0 {// The fast-path Xchg8 may have cleared mutexSleeping. Fix// the hint so unlock2 knows when to use its slow path.= &^ mutexSleeping}if atomic.Casuintptr(&.key, , ) {.m.mLockProfile.end()return}} else {:= atomic.Xchg8(, mutexLocked|mutexSleeping)if &mutexLocked == 0 {.m.mLockProfile.end()return}}= atomic.Loaduintptr(&.key)continue}if ! && &mutexSpinning == 0 && atomic.Casuintptr(&.key, , |mutexSpinning) {|= mutexSpinning= true}if || || mutexPreferLowLatency() {if < {procyield(mutexActiveSpinSize)= atomic.Loaduintptr(&.key)continue} else if < +mutexPassiveSpinCount {osyield() // TODO: Consider removing this step. See https://go.dev/issue/69268.= atomic.Loaduintptr(&.key)continue}}// Go to sleepif &mutexLocked == 0 {throw("runtime·lock: sleeping while lock is available")}// Collect times for mutex profile (seen in unlock2 only via mWaitList),// and for "/sync/mutex/wait/total:seconds" metric (to match).if ! {.m.mWaitList.startTicks = cputicks()= .m.mLockProfile.start()= true}// Store the current head of the list of sleeping Ms in our gp.m.mWaitList.next field.m.mWaitList.next = mutexWaitListHead()// Pack a (partial) pointer to this M with the current lock state bits:= (uintptr(unsafe.Pointer(.m)) &^ mutexMMask) | &mutexMMask | mutexSleepingif { // If we were spinning, prepare to retire= &^ mutexSpinning}if atomic.Casuintptr(&.key, , ) {= false// We've pushed ourselves onto the stack of waiters. Wait.semasleep(-1)= .m.mWaitList.next == 0 // we were at risk of starving= 0}.m.mWaitList.next = 0= atomic.Loaduintptr(&.key)}}func unlock( *mutex) {unlockWithRank()}// We might not be holding a p in this code.////go:nowritebarrierfunc unlock2( *mutex) {:= getg()var uint8var boolvar int64if !mutexSampleContention() {// Not collecting a sample for the contention profile, do the quick release= atomic.Xchg8(key8(&.key), 0)} else {// If there's contention, we'll sample it. Don't allow another// lock2/unlock2 pair to finish before us and take our blame. Prevent// that by trading for the stack lock with a CAS.:= atomic.Loaduintptr(&.key)for {if &^mutexMMask == 0 || &mutexStackLocked != 0 {// No contention, or (stack lock unavailable) no way to calculate it= atomic.Xchg8(key8(&.key), 0)= 0break}// There's contention, the stack lock appeared to be available, and// we'd like to collect a sample for the contention profile.if == 0 {// Read the time before releasing the lock. The profile will be// strictly smaller than what other threads would see by timing// their lock calls.= cputicks()}:= ( | mutexStackLocked) &^ (mutexLocked | mutexSleeping)if atomic.Casuintptr(&.key, , ) {= true= uint8()// The fast path of lock2 may have cleared mutexSleeping.// Restore it so we're sure to call unlock2Wake below.|= mutexSleepingbreak}= atomic.Loaduintptr(&.key)}}if &mutexLocked == 0 {throw("unlock of unlocked lock")}if &mutexSleeping != 0 {unlock2Wake(, , )}.m.mLockProfile.store().m.locks--if .m.locks < 0 {throw("runtime·unlock: lock count")}if .m.locks == 0 && .preempt { // restore the preemption request in case we've cleared it in newstack.stackguard0 = stackPreempt}}// mutexSampleContention returns whether the current mutex operation should// report any contention it discovers.func mutexSampleContention() bool {if := int64(atomic.Load64(&mutexprofilerate)); <= 0 {return false} else {// TODO: have SetMutexProfileFraction do the clamping:= uint32()if int64() != {= ^uint32(0)}return cheaprandn() == 0}}// unlock2Wake updates the list of Ms waiting on l, waking an M if necessary.////go:nowritebarrierfunc unlock2Wake( *mutex, bool, int64) {:= atomic.Loaduintptr(&.key)// On occasion, seek out and wake the M at the bottom of the stack so it// doesn't starve.:= cheaprandn(mutexTailWakePeriod) == 0if {goto}if !( || // avoiding starvation may require a wake&mutexSpinning == 0 || // no spinners means we must wakemutexPreferLowLatency()) { // prefer waiters be awake as much as possiblereturn}for {if &^mutexMMask == 0 || &mutexStackLocked != 0 {// No waiting Ms means nothing to do.//// If the stack lock is unavailable, its owner would make the same// wake decisions that we would, so there's nothing for us to do.//// Although: This thread may have a different call stack, which// would result in a different entry in the mutex contention profile// (upon completion of go.dev/issue/66999). That could lead to weird// results if a slow critical section ends but another thread// quickly takes the lock, finishes its own critical section,// releases the lock, and then grabs the stack lock. That quick// thread would then take credit (blame) for the delay that this// slow thread caused. The alternative is to have more expensive// atomic operations (a CAS) on the critical path of unlock2.return}// Other M's are waiting for the lock.// Obtain the stack lock, and pop off an M.:= | mutexStackLockedif atomic.Casuintptr(&.key, , ) {break}= atomic.Loaduintptr(&.key)}// We own the mutexStackLocked flag. New Ms may push themselves onto the// stack concurrently, but we're now the only thread that can remove or// modify the Ms that are sleeping in the list.:if != 0 {// Find the M at the bottom of the stack of waiters, which has been// asleep for the longest. Take the average of its wait time and the// head M's wait time for the mutex contention profile, matching the// estimate we do in semrelease1 (for sync.Mutex contention).//// We don't keep track of the tail node (we don't need it often), so do// an O(N) walk on the list of sleeping Ms to find it.:= mutexWaitListHead().ptr()for , := , 0; ; {++:= .mWaitList.next.ptr()if == nil {:= (( - .mWaitList.startTicks) + ( - .mWaitList.startTicks)) / 2.mWaitList.startTicks =.mWaitList.startTicks =getg().m.mLockProfile.recordUnlock( * int64())break}=}}var *m // If we choose an M within the stack, we've made a promise to wake itfor {:= &^ mutexMMask:= & (mutexMMask &^ mutexStackLocked) // preserve low bits, but release stack lock:= mutexWaitListHead().ptr():=if == nil {if &mutexSpinning == 0 || mutexPreferLowLatency() {=}if {// Wake the M at the bottom of the stack of waiters. (This is// O(N) with the number of waiters.)=:=for {:= .mWaitList.next.ptr()if == nil {break}, = ,}if != {=.mWaitList.next = .mWaitList.next// An M sets its own startTicks when it first goes to sleep.// When an unlock operation is sampled for the mutex// contention profile, it takes blame for the entire list of// waiting Ms but only updates the startTicks value at the// tail. Copy any updates to the next-oldest M..mWaitList.startTicks = .mWaitList.startTicks}}}if == {= uintptr(.mWaitList.next) &^ mutexMMask}:= |if atomic.Casuintptr(&.key, , ) {if != nil {// Claimed an M. Wake it.semawakeup()}return}= atomic.Loaduintptr(&.key)}}
![]() |
The pages are generated with Golds v0.7.9-preview. (GOOS=linux GOARCH=amd64) Golds is a Go 101 project developed by Tapir Liu. PR and bug reports are welcome and can be submitted to the issue list. Please follow @zigo_101 (reachable from the left QR code) to get the latest news of Golds. |