// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package runtime

import (

// A spanSet is a set of *mspans.
// spanSet is safe for concurrent push and pop operations.
type spanSet struct {
	// A spanSet is a two-level data structure consisting of a
	// growable spine that points to fixed-sized blocks. The spine
	// can be accessed without locks, but adding a block or
	// growing it requires taking the spine lock.
	// Because each mspan covers at least 8K of heap and takes at
	// most 8 bytes in the spanSet, the growth of the spine is
	// quite limited.
	// The spine and all blocks are allocated off-heap, which
	// allows this to be used in the memory manager and avoids the
	// need for write barriers on all of these. spanSetBlocks are
	// managed in a pool, though never freed back to the operating
	// system. We never release spine memory because there could be
	// concurrent lock-free access and we're likely to reuse it
	// anyway. (In principle, we could do this during STW.)

	spineLock mutex
	spine     unsafe.Pointer // *[N]*spanSetBlock, accessed atomically
	spineLen  uintptr        // Spine array length, accessed atomically
	spineCap  uintptr        // Spine array cap, accessed under lock

	// index is the head and tail of the spanSet in a single field.
	// The head and the tail both represent an index into the logical
	// concatenation of all blocks, with the head always behind or
	// equal to the tail (indicating an empty set). This field is
	// always accessed atomically.
	// The head and the tail are only 32 bits wide, which means we
	// can only support up to 2^32 pushes before a reset. If every
	// span in the heap were stored in this set, and each span were
	// the minimum size (1 runtime page, 8 KiB), then roughly the
	// smallest heap which would be unrepresentable is 32 TiB in size.
	index headTailIndex

const (
	spanSetBlockEntries = 512 // 4KB on 64-bit
	spanSetInitSpineCap = 256 // Enough for 1GB heap on 64-bit

type spanSetBlock struct {
	// Free spanSetBlocks are managed via a lock-free stack.

	// popped is the number of pop operations that have occurred on
	// this block. This number is used to help determine when a block
	// may be safely recycled.
	popped uint32

	// spans is the set of spans in this block.
	spans [spanSetBlockEntries]*mspan

// push adds span s to buffer b. push is safe to call concurrently
// with other push and pop operations.
func ( *spanSet) ( *mspan) {
	// Obtain our slot.
	 := uintptr(.index.incTail().tail() - 1)
	,  := /spanSetBlockEntries, %spanSetBlockEntries

	// Do we need to add a block?
	 := atomic.Loaduintptr(&.spineLen)
	var  *spanSetBlock
	if  <  {
		 := atomic.Loadp(unsafe.Pointer(&.spine))
		 := add(, goarch.PtrSize*)
		 = (*spanSetBlock)(atomic.Loadp())
	} else {
		// Add a new block to the spine, potentially growing
		// the spine.
		// spineLen cannot change until we release the lock,
		// but may have changed while we were waiting.
		 = atomic.Loaduintptr(&.spineLen)
		if  <  {

		if  == .spineCap {
			// Grow the spine.
			 := .spineCap * 2
			if  == 0 {
				 = spanSetInitSpineCap
			 := persistentalloc(*goarch.PtrSize, cpu.CacheLineSize, &memstats.gcMiscSys)
			if .spineCap != 0 {
				// Blocks are allocated off-heap, so
				// no write barriers.
				memmove(, .spine, .spineCap*goarch.PtrSize)
			// Spine is allocated off-heap, so no write barrier.
			atomic.StorepNoWB(unsafe.Pointer(&.spine), )
			.spineCap = 
			// We can't immediately free the old spine
			// since a concurrent push with a lower index
			// could still be reading from it. We let it
			// leak because even a 1TB heap would waste
			// less than 2MB of memory on old spines. If
			// this is a problem, we could free old spines
			// during STW.

		// Allocate a new block from the pool.
		 = spanSetBlockPool.alloc()

		// Add it to the spine.
		 := add(.spine, goarch.PtrSize*)
		// Blocks are allocated off-heap, so no write barrier.
		atomic.StorepNoWB(, unsafe.Pointer())
		atomic.Storeuintptr(&.spineLen, +1)

	// We have a block. Insert the span atomically, since there may be
	// concurrent readers via the block API.
	atomic.StorepNoWB(unsafe.Pointer(&.spans[]), unsafe.Pointer())

// pop removes and returns a span from buffer b, or nil if b is empty.
// pop is safe to call concurrently with other pop and push operations.
func ( *spanSet) () *mspan {
	var ,  uint32
	for {
		 := .index.load()
		,  = .split()
		if  >=  {
			// The buf is empty, as far as we can tell.
			return nil
		// Check if the head position we want to claim is actually
		// backed by a block.
		 := atomic.Loaduintptr(&.spineLen)
		if  <= uintptr()/spanSetBlockEntries {
			// We're racing with a spine growth and the allocation of
			// a new block (and maybe a new spine!), and trying to grab
			// the span at the index which is currently being pushed.
			// Instead of spinning, let's just notify the caller that
			// there's nothing currently here. Spinning on this is
			// almost definitely not worth it.
			return nil
		// Try to claim the current head by CASing in an updated head.
		// This may fail transiently due to a push which modifies the
		// tail, so keep trying while the head isn't changing.
		for  ==  {
			if .index.cas(, makeHeadTailIndex(+1, )) {
			 = .index.load()
			,  = .split()
		// We failed to claim the spot we were after and the head changed,
		// meaning a popper got ahead of us. Try again from the top because
		// the buf may not be empty.
	,  := /spanSetBlockEntries, %spanSetBlockEntries

	// We may be reading a stale spine pointer, but because the length
	// grows monotonically and we've already verified it, we'll definitely
	// be reading from a valid block.
	 := atomic.Loadp(unsafe.Pointer(&.spine))
	 := add(, goarch.PtrSize*uintptr())

	// Given that the spine length is correct, we know we will never
	// see a nil block here, since the length is always updated after
	// the block is set.
	 := (*spanSetBlock)(atomic.Loadp())
	 := (*mspan)(atomic.Loadp(unsafe.Pointer(&.spans[])))
	for  == nil {
		// We raced with the span actually being set, but given that we
		// know a block for this span exists, the race window here is
		// extremely small. Try again.
		 = (*mspan)(atomic.Loadp(unsafe.Pointer(&.spans[])))
	// Clear the pointer. This isn't strictly necessary, but defensively
	// avoids accidentally re-using blocks which could lead to memory
	// corruption. This way, we'll get a nil pointer access instead.
	atomic.StorepNoWB(unsafe.Pointer(&.spans[]), nil)

	// Increase the popped count. If we are the last possible popper
	// in the block (note that bottom need not equal spanSetBlockEntries-1
	// due to races) then it's our responsibility to free the block.
	// If we increment popped to spanSetBlockEntries, we can be sure that
	// we're the last popper for this block, and it's thus safe to free it.
	// Every other popper must have crossed this barrier (and thus finished
	// popping its corresponding mspan) by the time we get here. Because
	// we're the last popper, we also don't have to worry about concurrent
	// pushers (there can't be any). Note that we may not be the popper
	// which claimed the last slot in the block, we're just the last one
	// to finish popping.
	if atomic.Xadd(&.popped, 1) == spanSetBlockEntries {
		// Clear the block's pointer.
		atomic.StorepNoWB(, nil)

		// Return the block to the block pool.

// reset resets a spanSet which is empty. It will also clean up
// any left over blocks.
// Throws if the buf is not empty.
// reset may not be called concurrently with any other operations
// on the span set.
func ( *spanSet) () {
	,  := .index.load().split()
	if  <  {
		print("head = ", , ", tail = ", , "\n")
		throw("attempt to clear non-empty span set")
	 :=  / spanSetBlockEntries
	if uintptr() < .spineLen {
		// If the head catches up to the tail and the set is empty,
		// we may not clean up the block containing the head and tail
		// since it may be pushed into again. In order to avoid leaking
		// memory since we're going to reset the head and tail, clean
		// up such a block now, if it exists.
		 := (**spanSetBlock)(add(.spine, goarch.PtrSize*uintptr()))
		 := *
		if  != nil {
			// Sanity check the popped value.
			if .popped == 0 {
				// popped should never be zero because that means we have
				// pushed at least one value but not yet popped if this
				// block pointer is not nil.
				throw("span set block with unpopped elements found in reset")
			if .popped == spanSetBlockEntries {
				// popped should also never be equal to spanSetBlockEntries
				// because the last popper should have made the block pointer
				// in this slot nil.
				throw("fully empty unfreed span set block found in reset")

			// Clear the pointer to the block.
			atomic.StorepNoWB(unsafe.Pointer(), nil)

			// Return the block to the block pool.
	atomic.Storeuintptr(&.spineLen, 0)

// spanSetBlockPool is a global pool of spanSetBlocks.
var spanSetBlockPool spanSetBlockAlloc

// spanSetBlockAlloc represents a concurrent pool of spanSetBlocks.
type spanSetBlockAlloc struct {
	stack lfstack

// alloc tries to grab a spanSetBlock out of the pool, and if it fails
// persistentallocs a new one and returns it.
func ( *spanSetBlockAlloc) () *spanSetBlock {
	if  := (*spanSetBlock)(.stack.pop());  != nil {
	return (*spanSetBlock)(persistentalloc(unsafe.Sizeof(spanSetBlock{}), cpu.CacheLineSize, &memstats.gcMiscSys))

// free returns a spanSetBlock back to the pool.
func ( *spanSetBlockAlloc) ( *spanSetBlock) {
	atomic.Store(&.popped, 0)

// haidTailIndex represents a combined 32-bit head and 32-bit tail
// of a queue into a single 64-bit value.
type headTailIndex uint64

// makeHeadTailIndex creates a headTailIndex value from a separate
// head and tail.
func makeHeadTailIndex(,  uint32) headTailIndex {
	return headTailIndex(uint64()<<32 | uint64())

// head returns the head of a headTailIndex value.
func ( headTailIndex) () uint32 {
	return uint32( >> 32)

// tail returns the tail of a headTailIndex value.
func ( headTailIndex) () uint32 {
	return uint32()

// split splits the headTailIndex value into its parts.
func ( headTailIndex) () ( uint32,  uint32) {
	return .head(), .tail()

// load atomically reads a headTailIndex value.
func ( *headTailIndex) () headTailIndex {
	return headTailIndex(atomic.Load64((*uint64)()))

// cas atomically compares-and-swaps a headTailIndex value.
func ( *headTailIndex) (,  headTailIndex) bool {
	return atomic.Cas64((*uint64)(), uint64(), uint64())

// incHead atomically increments the head of a headTailIndex.
func ( *headTailIndex) () headTailIndex {
	return headTailIndex(atomic.Xadd64((*uint64)(), (1 << 32)))

// decHead atomically decrements the head of a headTailIndex.
func ( *headTailIndex) () headTailIndex {
	return headTailIndex(atomic.Xadd64((*uint64)(), -(1 << 32)))

// incTail atomically increments the tail of a headTailIndex.
func ( *headTailIndex) () headTailIndex {
	 := headTailIndex(atomic.Xadd64((*uint64)(), +1))
	// Check for overflow.
	if .tail() == 0 {
		print("runtime: head = ", .head(), ", tail = ", .tail(), "\n")
		throw("headTailIndex overflow")

// reset clears the headTailIndex to (0, 0).
func ( *headTailIndex) () {
	atomic.Store64((*uint64)(), 0)