// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This file contains stub functions that are not meant to be called directly,
// but that will be assembled together using the inlining logic in runtime/_mkmalloc
// to produce a full mallocgc function that's specialized for a span class
// or specific size in the case of the tiny allocator.
//
// To generate the specialized mallocgc functions, do 'go run .' inside runtime/_mkmalloc.
//
// To assemble a mallocgc function, the mallocStub function is cloned, and the call to
// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
// smallNoScanStub or tinyStub, depending on the parameters being specialized.
//
// The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases)
// identifiers are replaced with the value of the parameter in the specialized case.
// The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub
// functions are also inlined by _mkmalloc.

package runtime

import (
	
	
	
	
)

// These identifiers will all be replaced by the inliner. So their values don't
// really matter: they just need to be set so that the stub functions, which
// will never be used on their own, can compile. elemsize_ can't be  set to
// zero because we divide by it in nextFreeFastTiny, and the compiler would
// complain about a division by zero. Its replaced value will always be greater
// than zero.
const elemsize_ = 8
const sizeclass_ = 0
const noscanint_ = 0
const size_ = 0
const isTiny_ = false

func malloc0( uintptr,  *_type,  bool) unsafe.Pointer {
	if doubleCheckMalloc {
		if gcphase == _GCmarktermination {
			throw("mallocgc called with gcphase == _GCmarktermination")
		}
	}

	// Short-circuit zero-sized allocation requests.
	return unsafe.Pointer(&zerobase)
}

func mallocPanic( uintptr,  *_type,  bool) unsafe.Pointer {
	panic("not defined for sizeclass")
}

// WARNING: mallocStub does not do any work for sanitizers so callers need
// to steer out of this codepath early if sanitizers are enabled.
func mallocStub( uintptr,  *_type,  bool) unsafe.Pointer {

	if isTiny_ {
		// secret code, need to avoid the tiny allocator since it might keep
		// co-located values alive longer and prevent timely zero-ing
		//
		// Call directly into the NoScan allocator.
		// See go.dev/issue/76356
		 := getg()
		if goexperiment.RuntimeSecret && .secret > 0 {
			return mallocgcSmallNoScanSC2(, , )
		}
	}
	if doubleCheckMalloc {
		if gcphase == _GCmarktermination {
			throw("mallocgc called with gcphase == _GCmarktermination")
		}
	}

	// It's possible for any malloc to trigger sweeping, which may in
	// turn queue finalizers. Record this dynamic lock edge.
	// N.B. Compiled away if lockrank experiment is not enabled.
	lockRankMayQueueFinalizer()

	// Pre-malloc debug hooks.
	if debug.malloc {
		if  := preMallocgcDebug(, );  != nil {
			return 
		}
	}

	// Assist the GC if needed. (On the reuse path, we currently compensate for this;
	// changes here might require changes there.)
	if gcBlackenEnabled != 0 {
		deductAssistCredit()
	}

	// Actually do the allocation.
	,  := inlinedMalloc(, , )

	if !isTiny_ {
		 := getg()
		if goexperiment.RuntimeSecret && .secret > 0 {
			// Mark any object allocated while in secret mode as secret.
			// This ensures we zero it immediately when freeing it.
			addSecret()
		}
	}

	// Notify valgrind, if enabled.
	// To allow the compiler to not know about valgrind, we do valgrind instrumentation
	// unlike the other sanitizers.
	if valgrindenabled {
		valgrindMalloc(, )
	}

	// Adjust our GC assist debt to account for internal fragmentation.
	if gcBlackenEnabled != 0 &&  != 0 {
		if  := getg().m.curg;  != nil {
			.gcAssistBytes -= int64( - )
		}
	}

	// Post-malloc debug hooks.
	if debug.malloc {
		postMallocgcDebug(, , )
	}
	return 
}

// inlinedMalloc will never be called. It is defined just so that the compiler can compile
// the mallocStub function, which will also never be called, but instead used as a template
// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
// will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub
// when generating the size-specialized malloc function. See the comment at the top of this
// file for more information.
func inlinedMalloc( uintptr,  *_type,  bool) (unsafe.Pointer, uintptr) {
	return unsafe.Pointer(uintptr(0)), 0
}

func doubleCheckSmallScanNoHeader( uintptr,  *_type,  *m) {
	if .mallocing != 0 {
		throw("malloc deadlock")
	}
	if .gsignal == getg() {
		throw("malloc during signal")
	}
	if  == nil || !.Pointers() {
		throw("noscan allocated in scan-only path")
	}
	if !heapBitsInSpan() {
		throw("heap bits in not in span for non-header-only path")
	}
}

func smallScanNoHeaderStub( uintptr,  *_type,  bool) (unsafe.Pointer, uintptr) {
	const  = sizeclass_
	const  = elemsize_

	// Set mp.mallocing to keep from being preempted by GC.
	 := acquirem()
	if doubleCheckMalloc {
		doubleCheckSmallScanNoHeader(, , )
	}
	.mallocing = 1

	 := false
	 := getMCache()
	const  = spanClass(<<1) | spanClass(noscanint_)
	 := .alloc[]
	 := nextFreeFastStub()
	if  == 0 {
		, ,  = .nextFree()
	}
	 := unsafe.Pointer()
	if .needzero != 0 {
		memclrNoHeapPointers(, )
	}
	if goarch.PtrSize == 8 &&  == 1 {
		// initHeapBits already set the pointer bits for the 8-byte sizeclass
		// on 64-bit platforms.
		.scanAlloc += 8
	} else {
		 :=  // make the inliner happy
		 := uintptr()
		 := heapSetTypeNoHeaderStub(, , , )
		.scanAlloc += 
	}

	// Ensure that the stores above that initialize x to
	// type-safe memory and set the heap bits occur before
	// the caller can make x observable to the garbage
	// collector. Otherwise, on weakly ordered machines,
	// the garbage collector could follow a pointer to x,
	// but see uninitialized memory or stale heap bits.
	publicationBarrier()

	if writeBarrier.enabled {
		// Allocate black during GC.
		// All slots hold nil so no scanning is needed.
		// This may be racing with GC so do it atomically if there can be
		// a race marking the bit.
		gcmarknewobject(, uintptr())
	} else {
		// Track the last free index before the mark phase. This field
		// is only used by the garbage collector. During the mark phase
		// this is used by the conservative scanner to filter out objects
		// that are both free and recently-allocated. It's safe to do that
		// because we allocate-black if the GC is enabled. The conservative
		// scanner produces pointers out of thin air, so without additional
		// synchronization it might otherwise observe a partially-initialized
		// object, which could crash the program.
		.freeIndexForScan = .freeindex
	}

	// Note cache c only valid while m acquired; see #47302
	//
	// N.B. Use the full size because that matches how the GC
	// will update the mem profile on the "free" side.
	//
	// TODO(mknyszek): We should really count the header as part
	// of gc_sys or something. The code below just pretends it is
	// internal fragmentation and matches the GC's accounting by
	// using the whole allocation slot.
	.nextSample -= int64()
	if .nextSample < 0 || MemProfileRate != .memProfRate {
		profilealloc(, , )
	}
	.mallocing = 0
	releasem()

	if  {
		if  := (gcTrigger{kind: gcTriggerHeap}); .test() {
			gcStart()
		}
	}

	return , 
}

func doubleCheckSmallNoScan( *_type,  *m) {
	if .mallocing != 0 {
		throw("malloc deadlock")
	}
	if .gsignal == getg() {
		throw("malloc during signal")
	}
	if  != nil && .Pointers() {
		throw("expected noscan type for noscan alloc")
	}
}

func smallNoScanStub( uintptr,  *_type,  bool) (unsafe.Pointer, uintptr) {
	// TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant
	// sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class
	// and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically
	// spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself,
	// so that its code could not diverge from the generated functions.
	const  = sizeclass_
	const  = elemsize_

	// Set mp.mallocing to keep from being preempted by GC.
	 := acquirem()
	if doubleCheckMalloc {
		doubleCheckSmallNoScan(, )
	}
	.mallocing = 1

	 := false
	 := getMCache()
	const  = spanClass(<<1) | spanClass(noscanint_)
	 := .alloc[]

	// First, check for a reusable object.
	if runtimeFreegcEnabled && .hasReusableNoscan() {
		// We have a reusable object, use it.
		 := mallocgcSmallNoscanReuse(, , , , )
		.mallocing = 0
		releasem()

		// TODO(thepudds): note that the generated return path is essentially duplicated
		// by the generator. For example, see the two postMallocgcDebug calls and
		// related duplicated code on the return path currently in the generated
		// mallocgcSmallNoScanSC2 function. One set of those correspond to this
		// return here. We might be able to de-duplicate the generated return path
		// by updating the generator, perhaps by jumping to a shared return or similar.
		return , 
	}

	 := nextFreeFastStub()
	if  == 0 {
		, ,  = .nextFree()
	}
	 := unsafe.Pointer()
	if  && .needzero != 0 {
		memclrNoHeapPointers(, )
	}

	// Ensure that the stores above that initialize x to
	// type-safe memory and set the heap bits occur before
	// the caller can make x observable to the garbage
	// collector. Otherwise, on weakly ordered machines,
	// the garbage collector could follow a pointer to x,
	// but see uninitialized memory or stale heap bits.
	publicationBarrier()

	if writeBarrier.enabled {
		// Allocate black during GC.
		// All slots hold nil so no scanning is needed.
		// This may be racing with GC so do it atomically if there can be
		// a race marking the bit.
		gcmarknewobject(, uintptr())
	} else {
		// Track the last free index before the mark phase. This field
		// is only used by the garbage collector. During the mark phase
		// this is used by the conservative scanner to filter out objects
		// that are both free and recently-allocated. It's safe to do that
		// because we allocate-black if the GC is enabled. The conservative
		// scanner produces pointers out of thin air, so without additional
		// synchronization it might otherwise observe a partially-initialized
		// object, which could crash the program.
		.freeIndexForScan = .freeindex
	}

	// Note cache c only valid while m acquired; see #47302
	//
	// N.B. Use the full size because that matches how the GC
	// will update the mem profile on the "free" side.
	//
	// TODO(mknyszek): We should really count the header as part
	// of gc_sys or something. The code below just pretends it is
	// internal fragmentation and matches the GC's accounting by
	// using the whole allocation slot.
	.nextSample -= int64()
	if .nextSample < 0 || MemProfileRate != .memProfRate {
		profilealloc(, , )
	}
	.mallocing = 0
	releasem()

	if  {
		if  := (gcTrigger{kind: gcTriggerHeap}); .test() {
			gcStart()
		}
	}
	return , 
}

func doubleCheckTiny( uintptr,  *_type,  *m) {
	if .mallocing != 0 {
		throw("malloc deadlock")
	}
	if .gsignal == getg() {
		throw("malloc during signal")
	}
	if  != nil && .Pointers() {
		throw("expected noscan for tiny alloc")
	}
}

func tinyStub( uintptr,  *_type,  bool) (unsafe.Pointer, uintptr) {
	const  = size_
	const  = elemsize_

	// Set mp.mallocing to keep from being preempted by GC.
	 := acquirem()
	if doubleCheckMalloc {
		doubleCheckTiny(, , )
	}
	.mallocing = 1

	// Tiny allocator.
	//
	// Tiny allocator combines several tiny allocation requests
	// into a single memory block. The resulting memory block
	// is freed when all subobjects are unreachable. The subobjects
	// must be noscan (don't have pointers), this ensures that
	// the amount of potentially wasted memory is bounded.
	//
	// Size of the memory block used for combining (maxTinySize) is tunable.
	// Current setting is 16 bytes, which relates to 2x worst case memory
	// wastage (when all but one subobjects are unreachable).
	// 8 bytes would result in no wastage at all, but provides less
	// opportunities for combining.
	// 32 bytes provides more opportunities for combining,
	// but can lead to 4x worst case wastage.
	// The best case winning is 8x regardless of block size.
	//
	// Objects obtained from tiny allocator must not be freed explicitly.
	// So when an object will be freed explicitly, we ensure that
	// its size >= maxTinySize.
	//
	// SetFinalizer has a special case for objects potentially coming
	// from tiny allocator, it such case it allows to set finalizers
	// for an inner byte of a memory block.
	//
	// The main targets of tiny allocator are small strings and
	// standalone escaping variables. On a json benchmark
	// the allocator reduces number of allocations by ~12% and
	// reduces heap size by ~20%.
	 := getMCache()
	 := .tinyoffset
	// Align tiny pointer for required (conservative) alignment.
	if &7 == 0 {
		 = alignUp(, 8)
	} else if goarch.PtrSize == 4 &&  == 12 {
		// Conservatively align 12-byte objects to 8 bytes on 32-bit
		// systems so that objects whose first field is a 64-bit
		// value is aligned to 8 bytes and does not cause a fault on
		// atomic access. See issue 37262.
		// TODO(mknyszek): Remove this workaround if/when issue 36606
		// is resolved.
		 = alignUp(, 8)
	} else if &3 == 0 {
		 = alignUp(, 4)
	} else if &1 == 0 {
		 = alignUp(, 2)
	}
	if + <= maxTinySize && .tiny != 0 {
		// The object fits into existing tiny block.
		 := unsafe.Pointer(.tiny + )
		.tinyoffset =  + 
		.tinyAllocs++
		.mallocing = 0
		releasem()
		return , 0
	}
	// Allocate a new maxTinySize block.
	 := false
	 := .alloc[tinySpanClass]
	 := nextFreeFastTiny()
	if  == 0 {
		, ,  = .nextFree(tinySpanClass)
	}
	 := unsafe.Pointer()
	(*[2]uint64)()[0] = 0 // Always zero
	(*[2]uint64)()[1] = 0
	// See if we need to replace the existing tiny block with the new one
	// based on amount of remaining free space.
	if !raceenabled && ( < .tinyoffset || .tiny == 0) {
		// Note: disabled when race detector is on, see comment near end of this function.
		.tiny = uintptr()
		.tinyoffset = 
	}

	// Ensure that the stores above that initialize x to
	// type-safe memory and set the heap bits occur before
	// the caller can make x observable to the garbage
	// collector. Otherwise, on weakly ordered machines,
	// the garbage collector could follow a pointer to x,
	// but see uninitialized memory or stale heap bits.
	publicationBarrier()

	if writeBarrier.enabled {
		// Allocate black during GC.
		// All slots hold nil so no scanning is needed.
		// This may be racing with GC so do it atomically if there can be
		// a race marking the bit.
		gcmarknewobject(, uintptr())
	} else {
		// Track the last free index before the mark phase. This field
		// is only used by the garbage collector. During the mark phase
		// this is used by the conservative scanner to filter out objects
		// that are both free and recently-allocated. It's safe to do that
		// because we allocate-black if the GC is enabled. The conservative
		// scanner produces pointers out of thin air, so without additional
		// synchronization it might otherwise observe a partially-initialized
		// object, which could crash the program.
		.freeIndexForScan = .freeindex
	}

	// Note cache c only valid while m acquired; see #47302
	//
	// N.B. Use the full size because that matches how the GC
	// will update the mem profile on the "free" side.
	//
	// TODO(mknyszek): We should really count the header as part
	// of gc_sys or something. The code below just pretends it is
	// internal fragmentation and matches the GC's accounting by
	// using the whole allocation slot.
	.nextSample -= int64()
	if .nextSample < 0 || MemProfileRate != .memProfRate {
		profilealloc(, , )
	}
	.mallocing = 0
	releasem()

	if  {
		if  := (gcTrigger{kind: gcTriggerHeap}); .test() {
			gcStart()
		}
	}

	if raceenabled {
		// Pad tinysize allocations so they are aligned with the end
		// of the tinyalloc region. This ensures that any arithmetic
		// that goes off the top end of the object will be detectable
		// by checkptr (issue 38872).
		// Note that we disable tinyalloc when raceenabled for this to work.
		// TODO: This padding is only performed when the race detector
		// is enabled. It would be nice to enable it if any package
		// was compiled with checkptr, but there's no easy way to
		// detect that (especially at compile time).
		// TODO: enable this padding for all allocations, not just
		// tinyalloc ones. It's tricky because of pointer maps.
		// Maybe just all noscan objects?
		 = add(, -)
	}
	return , 
}

// TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc?
// We won't be able to use elemsize_ but that's probably ok.
func nextFreeFastTiny( *mspan) gclinkptr {
	const  = 8192
	const  = uint16(( - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_)
	var  gclinkptr
	if .allocCache != 0 {
		 := sys.TrailingZeros64(.allocCache) // Is there a free object in the allocCache?
		 := .freeindex + uint16()
		if  <  {
			 :=  + 1
			if !(%64 == 0 &&  != ) {
				.allocCache >>= uint( + 1)
				.freeindex = 
				.allocCount++
				 = gclinkptr(uintptr()*elemsize_ + .base())
			}
		}
	}
	return 
}

func nextFreeFastStub( *mspan) gclinkptr {
	var  gclinkptr
	if .allocCache != 0 {
		 := sys.TrailingZeros64(.allocCache) // Is there a free object in the allocCache?
		 := .freeindex + uint16()
		if  < .nelems {
			 :=  + 1
			if !(%64 == 0 &&  != .nelems) {
				.allocCache >>= uint( + 1)
				.freeindex = 
				.allocCount++
				 = gclinkptr(uintptr()*elemsize_ + .base())
			}
		}
	}
	return 
}

func heapSetTypeNoHeaderStub(,  uintptr,  *_type,  *mspan) uintptr {
	if doubleCheckHeapSetType && (!heapBitsInSpan() || !heapBitsInSpan(elemsize_)) {
		throw("tried to write heap bits, but no heap bits in span")
	}
	 := writeHeapBitsSmallStub(, , , )
	if doubleCheckHeapSetType {
		doubleCheckHeapType(, , , nil, )
	}
	return 
}

// writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is
// stored as a bitmap at the end of the span.
//
// Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span.
// heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_.
//
//go:nosplit
func writeHeapBitsSmallStub( *mspan, ,  uintptr,  *_type) uintptr {
	// The objects here are always really small, so a single load is sufficient.
	 := readUintptr(getGCMask())

	const  = elemsize_

	// Create repetitions of the bitmap if we have a small slice backing store.
	 := .PtrBytes
	 := 
	if .Size_ == goarch.PtrSize {
		 = (1 << ( / goarch.PtrSize)) - 1
	} else {
		// N.B. We rely on dataSize being an exact multiple of the type size.
		// The alternative is to be defensive and mask out src to the length
		// of dataSize. The purpose is to save on one additional masking operation.
		if doubleCheckHeapSetType && !asanenabled && %.Size_ != 0 {
			throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_")
		}
		for  := .Size_;  < ;  += .Size_ {
			 |=  << ( / goarch.PtrSize)
			 += .Size_
		}
	}

	// Since we're never writing more than one uintptr's worth of bits, we're either going
	// to do one or two writes.
	,  := spanHeapBitsRange(.base(), pageSize, )
	 := unsafe.Pointer()
	 := ( - .base()) / goarch.PtrSize
	 :=  / ptrBits
	 :=  % ptrBits
	const  uintptr =  / goarch.PtrSize
	// In the if statement below, we have to do two uintptr writes if the bits
	// we need to write straddle across two different memory locations. But if
	// the number of bits we're writing divides evenly into the number of bits
	// in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo
	// is a compile-time constant in the generated code, in the case where the size is
	// a power of two less than or equal to ptrBits, the compiler can remove the
	// 'two writes' branch of the if statement and always do only one write without
	// the check.
	const  = &(-1) == 0
	if  > ptrBits || (! && + > ptrBits) {
		// Two writes.
		 := ptrBits - 
		 :=  - 
		 := (*uintptr)(add(, (+0)*goarch.PtrSize))
		 := (*uintptr)(add(, (+1)*goarch.PtrSize))
		* = (*)&(^uintptr(0)>>) | ( << )
		* = (*)&^((1<<)-1) | ( >> )
	} else {
		// One write.
		 := (*uintptr)(add(, *goarch.PtrSize))
		* = (*)&^(((1<<(min(, ptrBits)))-1)<<) | ( << ) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch
	}

	const  = false
	if  {
		writeHeapBitsDoubleCheck(, , , , , , , , )
	}
	return 
}

func writeHeapBitsDoubleCheck( *mspan, , , , , , ,  uintptr,  *_type) {
	 := .heapBitsSmallForAddr()
	if  !=  {
		print("runtime: x=", hex(), " i=", , " j=", , " bits=", , "\n")
		print("runtime: dataSize=", , " typ.Size_=", .Size_, " typ.PtrBytes=", .PtrBytes, "\n")
		print("runtime: src0=", hex(), " src=", hex(), " srcRead=", hex(), "\n")
		throw("bad pointer bits written for small object")
	}
}