// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build unix

// Fork, exec, wait, etc.

package syscall

import (
	errorspkg 
	
	
	
	
)

// ForkLock is used to synchronize creation of new file descriptors
// with fork.
//
// We want the child in a fork/exec sequence to inherit only the
// file descriptors we intend. To do that, we mark all file
// descriptors close-on-exec and then, in the child, explicitly
// unmark the ones we want the exec'ed program to keep.
// Unix doesn't make this easy: there is, in general, no way to
// allocate a new file descriptor close-on-exec. Instead you
// have to allocate the descriptor and then mark it close-on-exec.
// If a fork happens between those two events, the child's exec
// will inherit an unwanted file descriptor.
//
// This lock solves that race: the create new fd/mark close-on-exec
// operation is done holding ForkLock for reading, and the fork itself
// is done holding ForkLock for writing. At least, that's the idea.
// There are some complications.
//
// Some system calls that create new file descriptors can block
// for arbitrarily long times: open on a hung NFS server or named
// pipe, accept on a socket, and so on. We can't reasonably grab
// the lock across those operations.
//
// It is worse to inherit some file descriptors than others.
// If a non-malicious child accidentally inherits an open ordinary file,
// that's not a big deal. On the other hand, if a long-lived child
// accidentally inherits the write end of a pipe, then the reader
// of that pipe will not see EOF until that child exits, potentially
// causing the parent program to hang. This is a common problem
// in threaded C programs that use popen.
//
// Luckily, the file descriptors that are most important not to
// inherit are not the ones that can take an arbitrarily long time
// to create: pipe returns instantly, and the net package uses
// non-blocking I/O to accept on a listening socket.
// The rules for which file descriptor-creating operations use the
// ForkLock are as follows:
//
//   - [Pipe]. Use pipe2 if available. Otherwise, does not block,
//     so use ForkLock.
//   - [Socket]. Use SOCK_CLOEXEC if available. Otherwise, does not
//     block, so use ForkLock.
//   - [Open]. Use [O_CLOEXEC] if available. Otherwise, may block,
//     so live with the race.
//   - [Dup]. Use [F_DUPFD_CLOEXEC] or dup3 if available. Otherwise,
//     does not block, so use ForkLock.
var ForkLock sync.RWMutex

// StringSlicePtr converts a slice of strings to a slice of pointers
// to NUL-terminated byte arrays. If any string contains a NUL byte
// this function panics instead of returning an error.
//
// Deprecated: Use [SlicePtrFromStrings] instead.
func ( []string) []*byte {
	 := make([]*byte, len()+1)
	for  := 0;  < len(); ++ {
		[] = StringBytePtr([])
	}
	[len()] = nil
	return 
}

// SlicePtrFromStrings converts a slice of strings to a slice of
// pointers to NUL-terminated byte arrays. If any string contains
// a NUL byte, it returns (nil, [EINVAL]).
func ( []string) ([]*byte, error) {
	 := 0
	for ,  := range  {
		if bytealg.IndexByteString(, 0) != -1 {
			return nil, EINVAL
		}
		 += len() + 1 // +1 for NUL
	}
	 := make([]*byte, len()+1)
	 := make([]byte, )
	 = 0
	for ,  := range  {
		[] = &[]
		copy([:], )
		 += len() + 1
	}
	return , nil
}

func ( int) { fcntl(, F_SETFD, FD_CLOEXEC) }

func ( int,  bool) ( error) {
	,  := fcntl(, F_GETFL, 0)
	if  != nil {
		return 
	}
	if (&O_NONBLOCK != 0) ==  {
		return nil
	}
	if  {
		 |= O_NONBLOCK
	} else {
		 &^= O_NONBLOCK
	}
	_,  = fcntl(, F_SETFL, )
	return 
}

// Credential holds user and group identities to be assumed
// by a child process started by [StartProcess].
type Credential struct {
	Uid         uint32   // User ID.
	Gid         uint32   // Group ID.
	Groups      []uint32 // Supplementary group IDs.
	NoSetGroups bool     // If true, don't set supplementary groups
}

// ProcAttr holds attributes that will be applied to a new process started
// by [StartProcess].
type ProcAttr struct {
	Dir   string    // Current working directory.
	Env   []string  // Environment.
	Files []uintptr // File descriptors.
	Sys   *SysProcAttr
}

var zeroProcAttr ProcAttr
var zeroSysProcAttr SysProcAttr

func forkExec( string,  []string,  *ProcAttr) ( int,  error) {
	var  [2]int
	var  int
	var  Errno
	var  WaitStatus

	if  == nil {
		 = &zeroProcAttr
	}
	 := .Sys
	if  == nil {
		 = &zeroSysProcAttr
	}

	// Convert args to C form.
	,  := BytePtrFromString()
	if  != nil {
		return 0, 
	}
	,  := SlicePtrFromStrings()
	if  != nil {
		return 0, 
	}
	,  := SlicePtrFromStrings(.Env)
	if  != nil {
		return 0, 
	}

	if (runtime.GOOS == "freebsd" || runtime.GOOS == "dragonfly") && len() > 0 && len([0]) > len() {
		[0] = 
	}

	var  *byte
	if .Chroot != "" {
		,  = BytePtrFromString(.Chroot)
		if  != nil {
			return 0, 
		}
	}
	var  *byte
	if .Dir != "" {
		,  = BytePtrFromString(.Dir)
		if  != nil {
			return 0, 
		}
	}

	// Both Setctty and Foreground use the Ctty field,
	// but they give it slightly different meanings.
	if .Setctty && .Foreground {
		return 0, errorspkg.New("both Setctty and Foreground set in SysProcAttr")
	}
	if .Setctty && .Ctty >= len(.Files) {
		return 0, errorspkg.New("Setctty set but Ctty not valid in child")
	}

	acquireForkLock()

	// Allocate child status pipe close on exec.
	if  = forkExecPipe([:]);  != nil {
		releaseForkLock()
		return 0, 
	}

	// Kick off child.
	,  = forkAndExecInChild(, , , , , , , [1])
	if  != 0 {
		Close([0])
		Close([1])
		releaseForkLock()
		return 0, Errno()
	}
	releaseForkLock()

	// Read child error status from pipe.
	Close([1])
	for {
		,  = readlen([0], (*byte)(unsafe.Pointer(&)), int(unsafe.Sizeof()))
		if  != EINTR {
			break
		}
	}
	Close([0])
	if  != nil ||  != 0 {
		if  == int(unsafe.Sizeof()) {
			 = Errno()
		}
		if  == nil {
			 = EPIPE
		}

		// Child failed; wait for it to exit, to make sure
		// the zombies don't accumulate.
		,  := Wait4(, &, 0, nil)
		for  == EINTR {
			_,  = Wait4(, &, 0, nil)
		}
		return 0, 
	}

	// Read got EOF, so pipe closed on exec, so exec succeeded.
	return , nil
}

// Combination of fork and exec, careful to be thread safe.
func ( string,  []string,  *ProcAttr) ( int,  error) {
	return forkExec(, , )
}

// StartProcess wraps [ForkExec] for package os.
func ( string,  []string,  *ProcAttr) ( int,  uintptr,  error) {
	,  = forkExec(, , )
	return , 0, 
}

// Implemented in runtime package.
func runtime_BeforeExec()
func runtime_AfterExec()

// execveLibc is non-nil on OS using libc syscall, set to execve in exec_libc.go; this
// avoids a build dependency for other platforms.
var execveLibc func(path uintptr, argv uintptr, envp uintptr) Errno
var execveDarwin func(path *byte, argv **byte, envp **byte) error
var execveOpenBSD func(path *byte, argv **byte, envp **byte) error

// Exec invokes the execve(2) system call.
func ( string,  []string,  []string) ( error) {
	,  := BytePtrFromString()
	if  != nil {
		return 
	}
	,  := SlicePtrFromStrings()
	if  != nil {
		return 
	}
	,  := SlicePtrFromStrings()
	if  != nil {
		return 
	}
	runtime_BeforeExec()

	 := origRlimitNofile.Load()
	if  != nil {
		Setrlimit(RLIMIT_NOFILE, )
	}

	var  error
	if runtime.GOOS == "solaris" || runtime.GOOS == "illumos" || runtime.GOOS == "aix" {
		// RawSyscall should never be used on Solaris, illumos, or AIX.
		 = execveLibc(
			uintptr(unsafe.Pointer()),
			uintptr(unsafe.Pointer(&[0])),
			uintptr(unsafe.Pointer(&[0])))
	} else if runtime.GOOS == "darwin" || runtime.GOOS == "ios" {
		// Similarly on Darwin.
		 = execveDarwin(, &[0], &[0])
	} else if runtime.GOOS == "openbsd" && runtime.GOARCH != "mips64" {
		// Similarly on OpenBSD.
		 = execveOpenBSD(, &[0], &[0])
	} else {
		_, _,  = RawSyscall(SYS_EXECVE,
			uintptr(unsafe.Pointer()),
			uintptr(unsafe.Pointer(&[0])),
			uintptr(unsafe.Pointer(&[0])))
	}
	runtime_AfterExec()
	return 
}