Home | History | Annotate | Line # | Download | only in kern
kern_exec.c revision 1.287
      1  1.287  christos /*	$NetBSD: kern_exec.c,v 1.287 2009/03/24 21:00:05 christos Exp $	*/
      2  1.277        ad 
      3  1.277        ad /*-
      4  1.277        ad  * Copyright (c) 2008 The NetBSD Foundation, Inc.
      5  1.277        ad  * All rights reserved.
      6  1.277        ad  *
      7  1.277        ad  * Redistribution and use in source and binary forms, with or without
      8  1.277        ad  * modification, are permitted provided that the following conditions
      9  1.277        ad  * are met:
     10  1.277        ad  * 1. Redistributions of source code must retain the above copyright
     11  1.277        ad  *    notice, this list of conditions and the following disclaimer.
     12  1.277        ad  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.277        ad  *    notice, this list of conditions and the following disclaimer in the
     14  1.277        ad  *    documentation and/or other materials provided with the distribution.
     15  1.277        ad  *
     16  1.277        ad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  1.277        ad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  1.277        ad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  1.277        ad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  1.277        ad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  1.277        ad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  1.277        ad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  1.277        ad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  1.277        ad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  1.277        ad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  1.277        ad  * POSSIBILITY OF SUCH DAMAGE.
     27  1.277        ad  */
     28   1.55       cgd 
     29   1.55       cgd /*-
     30   1.77       cgd  * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
     31   1.55       cgd  * Copyright (C) 1992 Wolfgang Solfrank.
     32   1.55       cgd  * Copyright (C) 1992 TooLs GmbH.
     33   1.55       cgd  * All rights reserved.
     34   1.55       cgd  *
     35   1.55       cgd  * Redistribution and use in source and binary forms, with or without
     36   1.55       cgd  * modification, are permitted provided that the following conditions
     37   1.55       cgd  * are met:
     38   1.55       cgd  * 1. Redistributions of source code must retain the above copyright
     39   1.55       cgd  *    notice, this list of conditions and the following disclaimer.
     40   1.55       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     41   1.55       cgd  *    notice, this list of conditions and the following disclaimer in the
     42   1.55       cgd  *    documentation and/or other materials provided with the distribution.
     43   1.55       cgd  * 3. All advertising materials mentioning features or use of this software
     44   1.55       cgd  *    must display the following acknowledgement:
     45   1.55       cgd  *	This product includes software developed by TooLs GmbH.
     46   1.55       cgd  * 4. The name of TooLs GmbH may not be used to endorse or promote products
     47   1.55       cgd  *    derived from this software without specific prior written permission.
     48   1.55       cgd  *
     49   1.55       cgd  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
     50   1.55       cgd  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     51   1.55       cgd  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     52   1.55       cgd  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     53   1.55       cgd  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     54   1.55       cgd  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     55   1.55       cgd  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     56   1.55       cgd  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     57   1.55       cgd  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     58   1.55       cgd  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     59   1.55       cgd  */
     60  1.146     lukem 
     61  1.146     lukem #include <sys/cdefs.h>
     62  1.287  christos __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.287 2009/03/24 21:00:05 christos Exp $");
     63   1.89       mrg 
     64   1.92   thorpej #include "opt_ktrace.h"
     65  1.285       apb #include "opt_modular.h"
     66  1.124  jdolecek #include "opt_syscall_debug.h"
     67  1.226    dogcow #include "veriexec.h"
     68  1.232      elad #include "opt_pax.h"
     69  1.279  wrstuden #include "opt_sa.h"
     70   1.55       cgd 
     71   1.55       cgd #include <sys/param.h>
     72   1.55       cgd #include <sys/systm.h>
     73   1.55       cgd #include <sys/filedesc.h>
     74   1.55       cgd #include <sys/kernel.h>
     75   1.55       cgd #include <sys/proc.h>
     76   1.55       cgd #include <sys/mount.h>
     77   1.55       cgd #include <sys/malloc.h>
     78  1.265      yamt #include <sys/kmem.h>
     79   1.55       cgd #include <sys/namei.h>
     80   1.55       cgd #include <sys/vnode.h>
     81   1.55       cgd #include <sys/file.h>
     82   1.55       cgd #include <sys/acct.h>
     83   1.55       cgd #include <sys/exec.h>
     84   1.55       cgd #include <sys/ktrace.h>
     85  1.278     pooka #include <sys/uidinfo.h>
     86   1.55       cgd #include <sys/wait.h>
     87   1.55       cgd #include <sys/mman.h>
     88  1.155  gmcgarry #include <sys/ras.h>
     89   1.55       cgd #include <sys/signalvar.h>
     90   1.55       cgd #include <sys/stat.h>
     91  1.124  jdolecek #include <sys/syscall.h>
     92  1.218      elad #include <sys/kauth.h>
     93  1.253        ad #include <sys/lwpctl.h>
     94  1.260  christos #include <sys/pax.h>
     95  1.263        ad #include <sys/cpu.h>
     96  1.282        ad #include <sys/module.h>
     97  1.279  wrstuden #include <sys/sa.h>
     98  1.279  wrstuden #include <sys/savar.h>
     99   1.56       cgd #include <sys/syscallargs.h>
    100  1.222      elad #if NVERIEXEC > 0
    101  1.197     blymn #include <sys/verified_exec.h>
    102  1.222      elad #endif /* NVERIEXEC > 0 */
    103   1.55       cgd 
    104   1.88       mrg #include <uvm/uvm_extern.h>
    105   1.88       mrg 
    106   1.55       cgd #include <machine/reg.h>
    107   1.55       cgd 
    108  1.244       dsl #include <compat/common/compat_util.h>
    109  1.244       dsl 
    110  1.171       chs static int exec_sigcode_map(struct proc *, const struct emul *);
    111  1.171       chs 
    112  1.143  christos #ifdef DEBUG_EXEC
    113  1.143  christos #define DPRINTF(a) uprintf a
    114  1.143  christos #else
    115  1.143  christos #define DPRINTF(a)
    116  1.143  christos #endif /* DEBUG_EXEC */
    117  1.165   thorpej 
    118  1.130  jdolecek /*
    119  1.130  jdolecek  * Exec function switch:
    120  1.130  jdolecek  *
    121  1.130  jdolecek  * Note that each makecmds function is responsible for loading the
    122  1.130  jdolecek  * exec package with the necessary functions for any exec-type-specific
    123  1.130  jdolecek  * handling.
    124  1.130  jdolecek  *
    125  1.130  jdolecek  * Functions for specific exec types should be defined in their own
    126  1.130  jdolecek  * header file.
    127  1.130  jdolecek  */
    128  1.138     lukem static const struct execsw	**execsw = NULL;
    129  1.138     lukem static int			nexecs;
    130  1.138     lukem 
    131  1.282        ad u_int	exec_maxhdrsz;	 /* must not be static - used by netbsd32 */
    132  1.130  jdolecek 
    133  1.130  jdolecek /* list of dynamically loaded execsw entries */
    134  1.282        ad static LIST_HEAD(execlist_head, exec_entry) ex_head =
    135  1.282        ad     LIST_HEAD_INITIALIZER(ex_head);
    136  1.130  jdolecek struct exec_entry {
    137  1.138     lukem 	LIST_ENTRY(exec_entry)	ex_list;
    138  1.282        ad 	SLIST_ENTRY(exec_entry)	ex_slist;
    139  1.282        ad 	const struct execsw	*ex_sw;
    140  1.130  jdolecek };
    141  1.130  jdolecek 
    142  1.124  jdolecek #ifdef SYSCALL_DEBUG
    143  1.124  jdolecek extern const char * const syscallnames[];
    144  1.124  jdolecek #endif
    145  1.124  jdolecek 
    146  1.203  christos #ifndef __HAVE_SYSCALL_INTERN
    147  1.203  christos void	syscall(void);
    148  1.203  christos #endif
    149  1.203  christos 
    150  1.280      matt #ifdef KERN_SA
    151  1.282        ad static struct sa_emul saemul_netbsd = {
    152  1.279  wrstuden 	sizeof(ucontext_t),
    153  1.279  wrstuden 	sizeof(struct sa_t),
    154  1.279  wrstuden 	sizeof(struct sa_t *),
    155  1.279  wrstuden 	NULL,
    156  1.279  wrstuden 	NULL,
    157  1.279  wrstuden 	cpu_upcall,
    158  1.279  wrstuden 	(void (*)(struct lwp *, void *))getucontext_sa,
    159  1.279  wrstuden 	sa_ucsp
    160  1.279  wrstuden };
    161  1.280      matt #endif /* KERN_SA */
    162  1.279  wrstuden 
    163  1.173  christos /* NetBSD emul struct */
    164  1.282        ad struct emul emul_netbsd = {
    165  1.124  jdolecek 	"netbsd",
    166  1.127  jdolecek 	NULL,		/* emulation path */
    167  1.133   mycroft #ifndef __HAVE_MINIMAL_EMUL
    168  1.140      manu 	EMUL_HAS_SYS___syscall,
    169  1.124  jdolecek 	NULL,
    170  1.124  jdolecek 	SYS_syscall,
    171  1.161  jdolecek 	SYS_NSYSENT,
    172  1.133   mycroft #endif
    173  1.124  jdolecek 	sysent,
    174  1.124  jdolecek #ifdef SYSCALL_DEBUG
    175  1.124  jdolecek 	syscallnames,
    176  1.124  jdolecek #else
    177  1.124  jdolecek 	NULL,
    178  1.124  jdolecek #endif
    179  1.133   mycroft 	sendsig,
    180  1.142  christos 	trapsignal,
    181  1.180      fvdl 	NULL,
    182  1.173  christos 	NULL,
    183  1.173  christos 	NULL,
    184  1.173  christos 	NULL,
    185  1.145  jdolecek 	setregs,
    186  1.128  jdolecek 	NULL,
    187  1.128  jdolecek 	NULL,
    188  1.128  jdolecek 	NULL,
    189  1.179      manu 	NULL,
    190  1.179      manu 	NULL,
    191  1.133   mycroft #ifdef __HAVE_SYSCALL_INTERN
    192  1.133   mycroft 	syscall_intern,
    193  1.133   mycroft #else
    194  1.133   mycroft 	syscall,
    195  1.133   mycroft #endif
    196  1.156      manu 	NULL,
    197  1.156      manu 	NULL,
    198  1.195      fvdl 
    199  1.195      fvdl 	uvm_default_mapaddr,
    200  1.216      cube 	NULL,
    201  1.280      matt #ifdef KERN_SA
    202  1.279  wrstuden 	&saemul_netbsd,
    203  1.280      matt #else
    204  1.280      matt 	NULL,
    205  1.280      matt #endif
    206  1.237        ad 	sizeof(ucontext_t),
    207  1.239      cube 	startlwp,
    208  1.124  jdolecek };
    209  1.124  jdolecek 
    210   1.55       cgd /*
    211  1.130  jdolecek  * Exec lock. Used to control access to execsw[] structures.
    212  1.130  jdolecek  * This must not be static so that netbsd32 can access it, too.
    213  1.130  jdolecek  */
    214  1.237        ad krwlock_t exec_lock;
    215  1.183  junyoung 
    216  1.259        ad static kmutex_t sigobject_lock;
    217  1.259        ad 
    218  1.277        ad static void *
    219  1.277        ad exec_pool_alloc(struct pool *pp, int flags)
    220  1.277        ad {
    221  1.277        ad 
    222  1.277        ad 	return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
    223  1.277        ad 	    UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
    224  1.277        ad }
    225  1.277        ad 
    226  1.277        ad static void
    227  1.277        ad exec_pool_free(struct pool *pp, void *addr)
    228  1.277        ad {
    229  1.277        ad 
    230  1.277        ad 	uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
    231  1.277        ad }
    232  1.277        ad 
    233  1.277        ad static struct pool exec_pool;
    234  1.277        ad 
    235  1.277        ad static struct pool_allocator exec_palloc = {
    236  1.277        ad 	.pa_alloc = exec_pool_alloc,
    237  1.277        ad 	.pa_free = exec_pool_free,
    238  1.277        ad 	.pa_pagesz = NCARGS
    239  1.277        ad };
    240  1.277        ad 
    241  1.130  jdolecek /*
    242   1.55       cgd  * check exec:
    243   1.55       cgd  * given an "executable" described in the exec package's namei info,
    244   1.55       cgd  * see what we can do with it.
    245   1.55       cgd  *
    246   1.55       cgd  * ON ENTRY:
    247   1.55       cgd  *	exec package with appropriate namei info
    248  1.212  christos  *	lwp pointer of exec'ing lwp
    249   1.55       cgd  *	NO SELF-LOCKED VNODES
    250   1.55       cgd  *
    251   1.55       cgd  * ON EXIT:
    252   1.55       cgd  *	error:	nothing held, etc.  exec header still allocated.
    253   1.77       cgd  *	ok:	filled exec package, executable's vnode (unlocked).
    254   1.55       cgd  *
    255   1.55       cgd  * EXEC SWITCH ENTRY:
    256   1.55       cgd  * 	Locked vnode to check, exec package, proc.
    257   1.55       cgd  *
    258   1.55       cgd  * EXEC SWITCH EXIT:
    259   1.77       cgd  *	ok:	return 0, filled exec package, executable's vnode (unlocked).
    260   1.55       cgd  *	error:	destructive:
    261   1.55       cgd  *			everything deallocated execept exec header.
    262   1.76       cgd  *		non-destructive:
    263   1.77       cgd  *			error code, executable's vnode (unlocked),
    264   1.76       cgd  *			exec header unmodified.
    265   1.55       cgd  */
    266   1.55       cgd int
    267  1.205  christos /*ARGSUSED*/
    268  1.233      elad check_exec(struct lwp *l, struct exec_package *epp)
    269   1.55       cgd {
    270  1.138     lukem 	int		error, i;
    271  1.138     lukem 	struct vnode	*vp;
    272   1.55       cgd 	struct nameidata *ndp;
    273  1.138     lukem 	size_t		resid;
    274   1.55       cgd 
    275   1.55       cgd 	ndp = epp->ep_ndp;
    276   1.55       cgd 	ndp->ni_cnd.cn_nameiop = LOOKUP;
    277  1.244       dsl 	ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
    278   1.55       cgd 	/* first get the vnode */
    279   1.74  christos 	if ((error = namei(ndp)) != 0)
    280   1.55       cgd 		return error;
    281   1.55       cgd 	epp->ep_vp = vp = ndp->ni_vp;
    282   1.55       cgd 
    283   1.84   mycroft 	/* check access and type */
    284   1.55       cgd 	if (vp->v_type != VREG) {
    285   1.81    kleink 		error = EACCES;
    286   1.55       cgd 		goto bad1;
    287   1.55       cgd 	}
    288  1.254     pooka 	if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
    289   1.84   mycroft 		goto bad1;
    290   1.55       cgd 
    291   1.55       cgd 	/* get attributes */
    292  1.254     pooka 	if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
    293   1.55       cgd 		goto bad1;
    294   1.55       cgd 
    295   1.55       cgd 	/* Check mount point */
    296   1.55       cgd 	if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
    297   1.55       cgd 		error = EACCES;
    298   1.55       cgd 		goto bad1;
    299   1.55       cgd 	}
    300  1.141   thorpej 	if (vp->v_mount->mnt_flag & MNT_NOSUID)
    301   1.83   mycroft 		epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
    302   1.55       cgd 
    303   1.55       cgd 	/* try to open it */
    304  1.254     pooka 	if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
    305   1.55       cgd 		goto bad1;
    306   1.55       cgd 
    307   1.99  wrstuden 	/* unlock vp, since we need it unlocked from here on out. */
    308   1.90      fvdl 	VOP_UNLOCK(vp, 0);
    309   1.77       cgd 
    310  1.222      elad #if NVERIEXEC > 0
    311  1.236      elad 	error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
    312  1.233      elad 	    epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
    313  1.236      elad 	    NULL);
    314  1.236      elad 	if (error)
    315  1.234      elad 		goto bad2;
    316  1.222      elad #endif /* NVERIEXEC > 0 */
    317  1.160     blymn 
    318  1.232      elad #ifdef PAX_SEGVGUARD
    319  1.240   thorpej 	error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
    320  1.234      elad 	if (error)
    321  1.234      elad 		goto bad2;
    322  1.232      elad #endif /* PAX_SEGVGUARD */
    323  1.232      elad 
    324   1.55       cgd 	/* now we have the file, get the exec header */
    325   1.74  christos 	error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
    326  1.223        ad 			UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
    327   1.74  christos 	if (error)
    328   1.55       cgd 		goto bad2;
    329   1.55       cgd 	epp->ep_hdrvalid = epp->ep_hdrlen - resid;
    330   1.55       cgd 
    331   1.55       cgd 	/*
    332  1.136       eeh 	 * Set up default address space limits.  Can be overridden
    333  1.136       eeh 	 * by individual exec packages.
    334  1.183  junyoung 	 *
    335  1.235    rillig 	 * XXX probably should be all done in the exec packages.
    336  1.136       eeh 	 */
    337  1.136       eeh 	epp->ep_vm_minaddr = VM_MIN_ADDRESS;
    338  1.136       eeh 	epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
    339  1.136       eeh 	/*
    340   1.55       cgd 	 * set up the vmcmds for creation of the process
    341   1.55       cgd 	 * address space
    342   1.55       cgd 	 */
    343   1.55       cgd 	error = ENOEXEC;
    344  1.244       dsl 	for (i = 0; i < nexecs; i++) {
    345   1.68       cgd 		int newerror;
    346   1.68       cgd 
    347  1.130  jdolecek 		epp->ep_esch = execsw[i];
    348  1.212  christos 		newerror = (*execsw[i]->es_makecmds)(l, epp);
    349  1.244       dsl 
    350  1.244       dsl 		if (!newerror) {
    351  1.244       dsl 			/* Seems ok: check that entry point is sane */
    352  1.244       dsl 			if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
    353  1.244       dsl 				error = ENOEXEC;
    354  1.244       dsl 				break;
    355  1.244       dsl 			}
    356  1.244       dsl 
    357  1.244       dsl 			/* check limits */
    358  1.244       dsl 			if ((epp->ep_tsize > MAXTSIZ) ||
    359  1.244       dsl 			    (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
    360  1.244       dsl 						    [RLIMIT_DATA].rlim_cur)) {
    361  1.244       dsl 				error = ENOMEM;
    362  1.244       dsl 				break;
    363  1.244       dsl 			}
    364  1.244       dsl 			return 0;
    365  1.244       dsl 		}
    366  1.244       dsl 
    367  1.244       dsl 		if (epp->ep_emul_root != NULL) {
    368  1.244       dsl 			vrele(epp->ep_emul_root);
    369  1.244       dsl 			epp->ep_emul_root = NULL;
    370  1.244       dsl 		}
    371  1.244       dsl 		if (epp->ep_interp != NULL) {
    372  1.244       dsl 			vrele(epp->ep_interp);
    373  1.244       dsl 			epp->ep_interp = NULL;
    374  1.244       dsl 		}
    375  1.244       dsl 
    376   1.68       cgd 		/* make sure the first "interesting" error code is saved. */
    377  1.244       dsl 		if (error == ENOEXEC)
    378   1.68       cgd 			error = newerror;
    379  1.124  jdolecek 
    380  1.244       dsl 		if (epp->ep_flags & EXEC_DESTR)
    381  1.244       dsl 			/* Error from "#!" code, tidied up by recursive call */
    382   1.55       cgd 			return error;
    383   1.55       cgd 	}
    384   1.55       cgd 
    385  1.249     pooka 	/* not found, error */
    386  1.249     pooka 
    387   1.55       cgd 	/*
    388   1.55       cgd 	 * free any vmspace-creation commands,
    389   1.55       cgd 	 * and release their references
    390   1.55       cgd 	 */
    391   1.55       cgd 	kill_vmcmds(&epp->ep_vmcmds);
    392   1.55       cgd 
    393   1.55       cgd bad2:
    394   1.55       cgd 	/*
    395   1.99  wrstuden 	 * close and release the vnode, restore the old one, free the
    396   1.55       cgd 	 * pathname buf, and punt.
    397   1.55       cgd 	 */
    398   1.99  wrstuden 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
    399  1.254     pooka 	VOP_CLOSE(vp, FREAD, l->l_cred);
    400   1.99  wrstuden 	vput(vp);
    401  1.120   thorpej 	PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
    402   1.55       cgd 	return error;
    403   1.55       cgd 
    404   1.55       cgd bad1:
    405   1.55       cgd 	/*
    406   1.55       cgd 	 * free the namei pathname buffer, and put the vnode
    407   1.55       cgd 	 * (which we don't yet have open).
    408   1.55       cgd 	 */
    409   1.77       cgd 	vput(vp);				/* was still locked */
    410  1.120   thorpej 	PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
    411   1.55       cgd 	return error;
    412   1.55       cgd }
    413   1.55       cgd 
    414  1.188       chs #ifdef __MACHINE_STACK_GROWS_UP
    415  1.188       chs #define STACK_PTHREADSPACE NBPG
    416  1.188       chs #else
    417  1.188       chs #define STACK_PTHREADSPACE 0
    418  1.188       chs #endif
    419  1.188       chs 
    420  1.204      cube static int
    421  1.204      cube execve_fetch_element(char * const *array, size_t index, char **value)
    422  1.204      cube {
    423  1.204      cube 	return copyin(array + index, value, sizeof(*value));
    424  1.204      cube }
    425  1.204      cube 
    426   1.55       cgd /*
    427   1.55       cgd  * exec system call
    428   1.55       cgd  */
    429   1.55       cgd /* ARGSUSED */
    430   1.75  christos int
    431  1.258       dsl sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
    432   1.71   thorpej {
    433  1.258       dsl 	/* {
    434  1.138     lukem 		syscallarg(const char *)	path;
    435  1.138     lukem 		syscallarg(char * const *)	argp;
    436  1.138     lukem 		syscallarg(char * const *)	envp;
    437  1.258       dsl 	} */
    438  1.204      cube 
    439  1.204      cube 	return execve1(l, SCARG(uap, path), SCARG(uap, argp),
    440  1.204      cube 	    SCARG(uap, envp), execve_fetch_element);
    441  1.204      cube }
    442  1.204      cube 
    443  1.282        ad /*
    444  1.282        ad  * Load modules to try and execute an image that we do not understand.
    445  1.282        ad  * If no execsw entries are present, we load those likely to be needed
    446  1.282        ad  * in order to run native images only.  Otherwise, we autoload all
    447  1.282        ad  * possible modules that could let us run the binary.  XXX lame
    448  1.282        ad  */
    449  1.282        ad static void
    450  1.282        ad exec_autoload(void)
    451  1.282        ad {
    452  1.282        ad #ifdef MODULAR
    453  1.282        ad 	static const char * const native[] = {
    454  1.282        ad 		"exec_elf32",
    455  1.282        ad 		"exec_elf64",
    456  1.282        ad 		"exec_script",
    457  1.282        ad 		NULL
    458  1.282        ad 	};
    459  1.282        ad 	static const char * const compat[] = {
    460  1.282        ad 		"exec_elf32",
    461  1.282        ad 		"exec_elf64",
    462  1.282        ad 		"exec_script",
    463  1.282        ad 		"exec_aout",
    464  1.282        ad 		"exec_coff",
    465  1.282        ad 		"exec_ecoff",
    466  1.282        ad 		"compat_aoutm68k",
    467  1.282        ad 		"compat_freebsd",
    468  1.282        ad 		"compat_ibcs2",
    469  1.282        ad 		"compat_irix",
    470  1.282        ad 		"compat_linux",
    471  1.282        ad 		"compat_linux32",
    472  1.282        ad 		"compat_netbsd32",
    473  1.282        ad 		"compat_sunos",
    474  1.282        ad 		"compat_sunos32",
    475  1.282        ad 		"compat_svr4",
    476  1.282        ad 		"compat_svr4_32",
    477  1.282        ad 		"compat_ultrix",
    478  1.282        ad 		NULL
    479  1.282        ad 	};
    480  1.282        ad 	char const * const *list;
    481  1.282        ad 	int i;
    482  1.282        ad 
    483  1.282        ad 	mutex_enter(&module_lock);
    484  1.282        ad 	list = (nexecs == 0 ? native : compat);
    485  1.282        ad 	for (i = 0; list[i] != NULL; i++) {
    486  1.282        ad 		if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
    487  1.282        ad 		    	continue;
    488  1.282        ad 		}
    489  1.282        ad 		mutex_exit(&module_lock);
    490  1.282        ad 	   	yield();
    491  1.282        ad 		mutex_enter(&module_lock);
    492  1.282        ad 	}
    493  1.282        ad 	mutex_exit(&module_lock);
    494  1.282        ad #endif
    495  1.282        ad }
    496  1.282        ad 
    497  1.204      cube int
    498  1.204      cube execve1(struct lwp *l, const char *path, char * const *args,
    499  1.204      cube     char * const *envs, execve_fetch_element_t fetch_element)
    500  1.204      cube {
    501  1.153   thorpej 	int			error;
    502  1.138     lukem 	struct exec_package	pack;
    503  1.138     lukem 	struct nameidata	nid;
    504  1.138     lukem 	struct vattr		attr;
    505  1.164   thorpej 	struct proc		*p;
    506  1.138     lukem 	char			*argp;
    507  1.138     lukem 	char			*dp, *sp;
    508  1.138     lukem 	long			argc, envc;
    509  1.248  christos 	size_t			i, len;
    510  1.138     lukem 	char			*stack;
    511  1.138     lukem 	struct ps_strings	arginfo;
    512  1.213      manu 	struct ps_strings	*aip = &arginfo;
    513  1.138     lukem 	struct vmspace		*vm;
    514  1.265      yamt 	struct exec_fakearg	*tmpfap;
    515  1.138     lukem 	int			szsigcode;
    516  1.138     lukem 	struct exec_vmcmd	*base_vcp;
    517  1.279  wrstuden 	int			oldlwpflags;
    518  1.237        ad 	ksiginfo_t		ksi;
    519  1.237        ad 	ksiginfoq_t		kq;
    520  1.260  christos 	char			*pathbuf;
    521  1.255  christos 	size_t			pathbuflen;
    522  1.282        ad 	u_int			modgen;
    523   1.55       cgd 
    524  1.237        ad 	p = l->l_proc;
    525  1.282        ad  	modgen = 0;
    526  1.164   thorpej 
    527  1.149  christos 	/*
    528  1.269  christos 	 * Check if we have exceeded our number of processes limit.
    529  1.269  christos 	 * This is so that we handle the case where a root daemon
    530  1.269  christos 	 * forked, ran setuid to become the desired user and is trying
    531  1.269  christos 	 * to exec. The obvious place to do the reference counting check
    532  1.269  christos 	 * is setuid(), but we don't do the reference counting check there
    533  1.269  christos 	 * like other OS's do because then all the programs that use setuid()
    534  1.269  christos 	 * must be modified to check the return code of setuid() and exit().
    535  1.269  christos 	 * It is dangerous to make setuid() fail, because it fails open and
    536  1.269  christos 	 * the program will continue to run as root. If we make it succeed
    537  1.269  christos 	 * and return an error code, again we are not enforcing the limit.
    538  1.269  christos 	 * The best place to enforce the limit is here, when the process tries
    539  1.269  christos 	 * to execute a new image, because eventually the process will need
    540  1.269  christos 	 * to call exec in order to do something useful.
    541  1.269  christos 	 */
    542  1.282        ad  retry:
    543  1.287  christos 	if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
    544  1.287  christos 	    KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
    545  1.287  christos 	    l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
    546  1.269  christos 		return EAGAIN;
    547  1.269  christos 
    548  1.279  wrstuden 	oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
    549  1.279  wrstuden 	if (l->l_flag & LW_SA) {
    550  1.279  wrstuden 		lwp_lock(l);
    551  1.279  wrstuden 		l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
    552  1.279  wrstuden 		lwp_unlock(l);
    553  1.279  wrstuden 	}
    554  1.279  wrstuden 
    555  1.269  christos 	/*
    556  1.237        ad 	 * Drain existing references and forbid new ones.  The process
    557  1.237        ad 	 * should be left alone until we're done here.  This is necessary
    558  1.237        ad 	 * to avoid race conditions - e.g. in ptrace() - that might allow
    559  1.237        ad 	 * a local user to illicitly obtain elevated privileges.
    560  1.237        ad 	 */
    561  1.252        ad 	rw_enter(&p->p_reflock, RW_WRITER);
    562  1.149  christos 
    563  1.138     lukem 	base_vcp = NULL;
    564   1.55       cgd 	/*
    565  1.129  jdolecek 	 * Init the namei data to point the file user's program name.
    566  1.129  jdolecek 	 * This is done here rather than in check_exec(), so that it's
    567  1.129  jdolecek 	 * possible to override this settings if any of makecmd/probe
    568  1.129  jdolecek 	 * functions call check_exec() recursively - for example,
    569  1.129  jdolecek 	 * see exec_script_makecmds().
    570  1.129  jdolecek 	 */
    571  1.260  christos 	pathbuf = PNBUF_GET();
    572  1.260  christos 	error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
    573  1.248  christos 	if (error) {
    574  1.248  christos 		DPRINTF(("execve: copyinstr path %d", error));
    575  1.200      elad 		goto clrflg;
    576  1.248  christos 	}
    577  1.200      elad 
    578  1.257     pooka 	NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
    579   1.55       cgd 
    580   1.55       cgd 	/*
    581   1.55       cgd 	 * initialize the fields of the exec package.
    582   1.55       cgd 	 */
    583  1.204      cube 	pack.ep_name = path;
    584  1.265      yamt 	pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
    585   1.55       cgd 	pack.ep_hdrlen = exec_maxhdrsz;
    586   1.55       cgd 	pack.ep_hdrvalid = 0;
    587   1.55       cgd 	pack.ep_ndp = &nid;
    588   1.67  christos 	pack.ep_emul_arg = NULL;
    589   1.55       cgd 	pack.ep_vmcmds.evs_cnt = 0;
    590   1.55       cgd 	pack.ep_vmcmds.evs_used = 0;
    591   1.55       cgd 	pack.ep_vap = &attr;
    592   1.55       cgd 	pack.ep_flags = 0;
    593  1.244       dsl 	pack.ep_emul_root = NULL;
    594  1.244       dsl 	pack.ep_interp = NULL;
    595  1.244       dsl 	pack.ep_esch = NULL;
    596  1.273        ad 	pack.ep_pax_flags = 0;
    597   1.55       cgd 
    598  1.237        ad 	rw_enter(&exec_lock, RW_READER);
    599  1.130  jdolecek 
    600   1.55       cgd 	/* see if we can run it. */
    601  1.248  christos 	if ((error = check_exec(l, &pack)) != 0) {
    602  1.261   xtraeme 		if (error != ENOENT) {
    603  1.260  christos 			DPRINTF(("execve: check exec failed %d\n", error));
    604  1.261   xtraeme 		}
    605   1.55       cgd 		goto freehdr;
    606  1.248  christos 	}
    607   1.55       cgd 
    608   1.55       cgd 	/* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
    609   1.55       cgd 
    610   1.55       cgd 	/* allocate an argument buffer */
    611  1.277        ad 	argp = pool_get(&exec_pool, PR_WAITOK);
    612  1.277        ad 	KASSERT(argp != NULL);
    613   1.55       cgd 	dp = argp;
    614   1.55       cgd 	argc = 0;
    615   1.55       cgd 
    616   1.55       cgd 	/* copy the fake args list, if there's one, freeing it as we go */
    617   1.55       cgd 	if (pack.ep_flags & EXEC_HASARGL) {
    618   1.55       cgd 		tmpfap = pack.ep_fa;
    619  1.265      yamt 		while (tmpfap->fa_arg != NULL) {
    620  1.265      yamt 			const char *cp;
    621   1.55       cgd 
    622  1.265      yamt 			cp = tmpfap->fa_arg;
    623   1.55       cgd 			while (*cp)
    624   1.55       cgd 				*dp++ = *cp++;
    625  1.276        ad 			*dp++ = '\0';
    626   1.55       cgd 
    627  1.265      yamt 			kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
    628   1.55       cgd 			tmpfap++; argc++;
    629   1.55       cgd 		}
    630  1.265      yamt 		kmem_free(pack.ep_fa, pack.ep_fa_len);
    631   1.55       cgd 		pack.ep_flags &= ~EXEC_HASARGL;
    632   1.55       cgd 	}
    633   1.55       cgd 
    634   1.55       cgd 	/* Now get argv & environment */
    635  1.204      cube 	if (args == NULL) {
    636  1.248  christos 		DPRINTF(("execve: null args\n"));
    637   1.55       cgd 		error = EINVAL;
    638   1.55       cgd 		goto bad;
    639   1.55       cgd 	}
    640  1.204      cube 	/* 'i' will index the argp/envp element to be retrieved */
    641  1.204      cube 	i = 0;
    642   1.55       cgd 	if (pack.ep_flags & EXEC_SKIPARG)
    643  1.204      cube 		i++;
    644   1.55       cgd 
    645   1.55       cgd 	while (1) {
    646   1.55       cgd 		len = argp + ARG_MAX - dp;
    647  1.248  christos 		if ((error = (*fetch_element)(args, i, &sp)) != 0) {
    648  1.248  christos 			DPRINTF(("execve: fetch_element args %d\n", error));
    649   1.55       cgd 			goto bad;
    650  1.248  christos 		}
    651   1.55       cgd 		if (!sp)
    652   1.55       cgd 			break;
    653   1.74  christos 		if ((error = copyinstr(sp, dp, len, &len)) != 0) {
    654  1.248  christos 			DPRINTF(("execve: copyinstr args %d\n", error));
    655   1.55       cgd 			if (error == ENAMETOOLONG)
    656   1.55       cgd 				error = E2BIG;
    657   1.55       cgd 			goto bad;
    658   1.55       cgd 		}
    659  1.247        ad 		ktrexecarg(dp, len - 1);
    660   1.55       cgd 		dp += len;
    661  1.204      cube 		i++;
    662   1.55       cgd 		argc++;
    663   1.55       cgd 	}
    664   1.55       cgd 
    665   1.55       cgd 	envc = 0;
    666   1.74  christos 	/* environment need not be there */
    667  1.204      cube 	if (envs != NULL) {
    668  1.204      cube 		i = 0;
    669   1.55       cgd 		while (1) {
    670   1.55       cgd 			len = argp + ARG_MAX - dp;
    671  1.248  christos 			if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
    672  1.248  christos 				DPRINTF(("execve: fetch_element env %d\n", error));
    673   1.55       cgd 				goto bad;
    674  1.248  christos 			}
    675   1.55       cgd 			if (!sp)
    676   1.55       cgd 				break;
    677   1.74  christos 			if ((error = copyinstr(sp, dp, len, &len)) != 0) {
    678  1.248  christos 				DPRINTF(("execve: copyinstr env %d\n", error));
    679   1.55       cgd 				if (error == ENAMETOOLONG)
    680   1.55       cgd 					error = E2BIG;
    681   1.55       cgd 				goto bad;
    682   1.55       cgd 			}
    683  1.247        ad 			ktrexecenv(dp, len - 1);
    684   1.55       cgd 			dp += len;
    685  1.204      cube 			i++;
    686   1.55       cgd 			envc++;
    687   1.55       cgd 		}
    688   1.55       cgd 	}
    689   1.61   mycroft 
    690   1.61   mycroft 	dp = (char *) ALIGN(dp);
    691   1.55       cgd 
    692  1.244       dsl 	szsigcode = pack.ep_esch->es_emul->e_esigcode -
    693  1.244       dsl 	    pack.ep_esch->es_emul->e_sigcode;
    694   1.65      fvdl 
    695  1.267       dsl #ifdef __MACHINE_STACK_GROWS_UP
    696  1.267       dsl /* See big comment lower down */
    697  1.267       dsl #define	RTLD_GAP	32
    698  1.267       dsl #else
    699  1.267       dsl #define	RTLD_GAP	0
    700  1.267       dsl #endif
    701  1.267       dsl 
    702   1.55       cgd 	/* Now check if args & environ fit into new stack */
    703  1.105       eeh 	if (pack.ep_flags & EXEC_32)
    704  1.244       dsl 		len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
    705  1.267       dsl 		    sizeof(int) + sizeof(int) + dp + RTLD_GAP +
    706  1.188       chs 		    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
    707  1.188       chs 		    - argp;
    708  1.105       eeh 	else
    709  1.244       dsl 		len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
    710  1.267       dsl 		    sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
    711  1.188       chs 		    szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
    712  1.188       chs 		    - argp;
    713   1.67  christos 
    714  1.262      elad #ifdef PAX_ASLR
    715  1.262      elad 	if (pax_aslr_active(l))
    716  1.262      elad 		len += (arc4random() % PAGE_SIZE);
    717  1.262      elad #endif /* PAX_ASLR */
    718  1.262      elad 
    719  1.243      matt #ifdef STACKLALIGN	/* arm, etc. */
    720  1.243      matt 	len = STACKALIGN(len);	/* make the stack "safely" aligned */
    721  1.243      matt #else
    722   1.55       cgd 	len = ALIGN(len);	/* make the stack "safely" aligned */
    723  1.243      matt #endif
    724   1.55       cgd 
    725   1.55       cgd 	if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
    726  1.248  christos 		DPRINTF(("execve: stack limit exceeded %zu\n", len));
    727   1.55       cgd 		error = ENOMEM;
    728   1.55       cgd 		goto bad;
    729   1.55       cgd 	}
    730   1.55       cgd 
    731  1.237        ad 	/* Get rid of other LWPs. */
    732  1.279  wrstuden 	if (p->p_sa || p->p_nlwps > 1) {
    733  1.272        ad 		mutex_enter(p->p_lock);
    734  1.237        ad 		exit_lwps(l);
    735  1.272        ad 		mutex_exit(p->p_lock);
    736  1.237        ad 	}
    737  1.164   thorpej 	KDASSERT(p->p_nlwps == 1);
    738  1.164   thorpej 
    739  1.253        ad 	/* Destroy any lwpctl info. */
    740  1.253        ad 	if (p->p_lwpctl != NULL)
    741  1.253        ad 		lwp_ctl_exit();
    742  1.253        ad 
    743  1.164   thorpej 	/* This is now LWP 1 */
    744  1.164   thorpej 	l->l_lid = 1;
    745  1.164   thorpej 	p->p_nlwpid = 1;
    746  1.164   thorpej 
    747  1.279  wrstuden #ifdef KERN_SA
    748  1.279  wrstuden 	/* Release any SA state. */
    749  1.279  wrstuden 	if (p->p_sa)
    750  1.279  wrstuden 		sa_release(p);
    751  1.279  wrstuden #endif /* KERN_SA */
    752  1.279  wrstuden 
    753  1.164   thorpej 	/* Remove POSIX timers */
    754  1.164   thorpej 	timers_free(p, TIMERS_POSIX);
    755  1.164   thorpej 
    756   1.55       cgd 	/* adjust "active stack depth" for process VSZ */
    757   1.55       cgd 	pack.ep_ssize = len;	/* maybe should go elsewhere, but... */
    758   1.55       cgd 
    759   1.86   thorpej 	/*
    760   1.86   thorpej 	 * Do whatever is necessary to prepare the address space
    761   1.86   thorpej 	 * for remapping.  Note that this might replace the current
    762   1.86   thorpej 	 * vmspace with another!
    763   1.86   thorpej 	 */
    764  1.164   thorpej 	uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
    765   1.55       cgd 
    766  1.186       chs 	/* record proc's vnode, for use by procfs and others */
    767  1.186       chs         if (p->p_textvp)
    768  1.186       chs                 vrele(p->p_textvp);
    769  1.186       chs 	VREF(pack.ep_vp);
    770  1.186       chs 	p->p_textvp = pack.ep_vp;
    771  1.186       chs 
    772   1.55       cgd 	/* Now map address space */
    773   1.86   thorpej 	vm = p->p_vmspace;
    774  1.241    dogcow 	vm->vm_taddr = (void *)pack.ep_taddr;
    775   1.55       cgd 	vm->vm_tsize = btoc(pack.ep_tsize);
    776  1.241    dogcow 	vm->vm_daddr = (void*)pack.ep_daddr;
    777   1.55       cgd 	vm->vm_dsize = btoc(pack.ep_dsize);
    778   1.55       cgd 	vm->vm_ssize = btoc(pack.ep_ssize);
    779  1.241    dogcow 	vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
    780  1.241    dogcow 	vm->vm_minsaddr = (void *)pack.ep_minsaddr;
    781   1.55       cgd 
    782  1.260  christos #ifdef PAX_ASLR
    783  1.260  christos 	pax_aslr_init(l, vm);
    784  1.260  christos #endif /* PAX_ASLR */
    785  1.260  christos 
    786   1.55       cgd 	/* create the new process's VM space by running the vmcmds */
    787   1.55       cgd #ifdef DIAGNOSTIC
    788   1.55       cgd 	if (pack.ep_vmcmds.evs_used == 0)
    789   1.55       cgd 		panic("execve: no vmcmds");
    790   1.55       cgd #endif
    791   1.55       cgd 	for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
    792   1.55       cgd 		struct exec_vmcmd *vcp;
    793   1.55       cgd 
    794   1.55       cgd 		vcp = &pack.ep_vmcmds.evs_cmds[i];
    795  1.114      matt 		if (vcp->ev_flags & VMCMD_RELATIVE) {
    796  1.114      matt #ifdef DIAGNOSTIC
    797  1.114      matt 			if (base_vcp == NULL)
    798  1.114      matt 				panic("execve: relative vmcmd with no base");
    799  1.114      matt 			if (vcp->ev_flags & VMCMD_BASE)
    800  1.114      matt 				panic("execve: illegal base & relative vmcmd");
    801  1.114      matt #endif
    802  1.114      matt 			vcp->ev_addr += base_vcp->ev_addr;
    803  1.114      matt 		}
    804  1.212  christos 		error = (*vcp->ev_proc)(l, vcp);
    805  1.143  christos #ifdef DEBUG_EXEC
    806  1.111      matt 		if (error) {
    807  1.248  christos 			size_t j;
    808  1.143  christos 			struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
    809  1.143  christos 			for (j = 0; j <= i; j++)
    810  1.143  christos 				uprintf(
    811  1.248  christos 			"vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
    812  1.143  christos 				    j, vp[j].ev_addr, vp[j].ev_len,
    813  1.143  christos 				    vp[j].ev_offset, vp[j].ev_prot,
    814  1.143  christos 				    vp[j].ev_flags);
    815  1.111      matt 		}
    816  1.143  christos #endif /* DEBUG_EXEC */
    817  1.114      matt 		if (vcp->ev_flags & VMCMD_BASE)
    818  1.114      matt 			base_vcp = vcp;
    819   1.55       cgd 	}
    820   1.55       cgd 
    821   1.55       cgd 	/* free the vmspace-creation commands, and release their references */
    822   1.55       cgd 	kill_vmcmds(&pack.ep_vmcmds);
    823   1.55       cgd 
    824  1.186       chs 	vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
    825  1.254     pooka 	VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
    826  1.186       chs 	vput(pack.ep_vp);
    827  1.186       chs 
    828   1.55       cgd 	/* if an error happened, deallocate and punt */
    829  1.111      matt 	if (error) {
    830  1.248  christos 		DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
    831   1.55       cgd 		goto exec_abort;
    832  1.111      matt 	}
    833   1.55       cgd 
    834   1.55       cgd 	/* remember information about the process */
    835   1.55       cgd 	arginfo.ps_nargvstr = argc;
    836   1.55       cgd 	arginfo.ps_nenvstr = envc;
    837   1.55       cgd 
    838  1.255  christos 	/* set command name & other accounting info */
    839  1.255  christos 	i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
    840  1.255  christos 	(void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
    841  1.255  christos 	p->p_comm[i] = '\0';
    842  1.255  christos 
    843  1.255  christos 	dp = PNBUF_GET();
    844  1.255  christos 	/*
    845  1.255  christos 	 * If the path starts with /, we don't need to do any work.
    846  1.255  christos 	 * This handles the majority of the cases.
    847  1.255  christos 	 * In the future perhaps we could canonicalize it?
    848  1.255  christos 	 */
    849  1.255  christos 	if (pathbuf[0] == '/')
    850  1.255  christos 		(void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
    851  1.255  christos #ifdef notyet
    852  1.255  christos 	/*
    853  1.255  christos 	 * Although this works most of the time [since the entry was just
    854  1.255  christos 	 * entered in the cache] we don't use it because it theoretically
    855  1.255  christos 	 * can fail and it is not the cleanest interface, because there
    856  1.255  christos 	 * could be races. When the namei cache is re-written, this can
    857  1.255  christos 	 * be changed to use the appropriate function.
    858  1.255  christos 	 */
    859  1.255  christos 	else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
    860  1.255  christos 		pack.ep_path = dp;
    861  1.255  christos #endif
    862  1.255  christos 	else {
    863  1.256  christos #ifdef notyet
    864  1.255  christos 		printf("Cannot get path for pid %d [%s] (error %d)",
    865  1.255  christos 		    (int)p->p_pid, p->p_comm, error);
    866  1.255  christos #endif
    867  1.255  christos 		pack.ep_path = NULL;
    868  1.255  christos 		PNBUF_PUT(dp);
    869  1.255  christos 	}
    870  1.255  christos 
    871  1.163       chs 	stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
    872  1.188       chs 		STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
    873  1.163       chs 		len - (sizeof(struct ps_strings) + szsigcode));
    874  1.267       dsl 
    875  1.163       chs #ifdef __MACHINE_STACK_GROWS_UP
    876  1.163       chs 	/*
    877  1.163       chs 	 * The copyargs call always copies into lower addresses
    878  1.163       chs 	 * first, moving towards higher addresses, starting with
    879  1.183  junyoung 	 * the stack pointer that we give.  When the stack grows
    880  1.183  junyoung 	 * down, this puts argc/argv/envp very shallow on the
    881  1.267       dsl 	 * stack, right at the first user stack pointer.
    882  1.267       dsl 	 * When the stack grows up, the situation is reversed.
    883  1.163       chs 	 *
    884  1.163       chs 	 * Normally, this is no big deal.  But the ld_elf.so _rtld()
    885  1.183  junyoung 	 * function expects to be called with a single pointer to
    886  1.183  junyoung 	 * a region that has a few words it can stash values into,
    887  1.163       chs 	 * followed by argc/argv/envp.  When the stack grows down,
    888  1.163       chs 	 * it's easy to decrement the stack pointer a little bit to
    889  1.163       chs 	 * allocate the space for these few words and pass the new
    890  1.163       chs 	 * stack pointer to _rtld.  When the stack grows up, however,
    891  1.171       chs 	 * a few words before argc is part of the signal trampoline, XXX
    892  1.163       chs 	 * so we have a problem.
    893  1.163       chs 	 *
    894  1.183  junyoung 	 * Instead of changing how _rtld works, we take the easy way
    895  1.267       dsl 	 * out and steal 32 bytes before we call copyargs.
    896  1.267       dsl 	 * This extra space was allowed for when 'len' was calculated.
    897  1.163       chs 	 */
    898  1.267       dsl 	stack += RTLD_GAP;
    899  1.163       chs #endif /* __MACHINE_STACK_GROWS_UP */
    900  1.163       chs 
    901   1.55       cgd 	/* Now copy argc, args & environ to new stack */
    902  1.244       dsl 	error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
    903  1.255  christos 	if (pack.ep_path) {
    904  1.255  christos 		PNBUF_PUT(pack.ep_path);
    905  1.255  christos 		pack.ep_path = NULL;
    906  1.255  christos 	}
    907  1.144  christos 	if (error) {
    908  1.144  christos 		DPRINTF(("execve: copyargs failed %d\n", error));
    909   1.55       cgd 		goto exec_abort;
    910  1.111      matt 	}
    911  1.144  christos 	/* Move the stack back to original point */
    912  1.163       chs 	stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
    913   1.55       cgd 
    914  1.121       eeh 	/* fill process ps_strings info */
    915  1.188       chs 	p->p_psstr = (struct ps_strings *)
    916  1.188       chs 	    STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
    917  1.163       chs 	    sizeof(struct ps_strings));
    918  1.121       eeh 	p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
    919  1.121       eeh 	p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
    920  1.121       eeh 	p->p_psenv = offsetof(struct ps_strings, ps_envstr);
    921  1.121       eeh 	p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
    922  1.121       eeh 
    923   1.55       cgd 	/* copy out the process's ps_strings structure */
    924  1.213      manu 	if ((error = copyout(aip, (char *)p->p_psstr,
    925  1.144  christos 	    sizeof(arginfo))) != 0) {
    926  1.143  christos 		DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
    927  1.213      manu 		       aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
    928   1.55       cgd 		goto exec_abort;
    929  1.111      matt 	}
    930  1.109    simonb 
    931  1.270        ad 	fd_closeexec();		/* handle close on exec */
    932   1.55       cgd 	execsigs(p);		/* reset catched signals */
    933  1.183  junyoung 
    934  1.164   thorpej 	l->l_ctxlink = NULL;	/* reset ucontext link */
    935   1.55       cgd 
    936  1.255  christos 
    937   1.55       cgd 	p->p_acflag &= ~AFORK;
    938  1.272        ad 	mutex_enter(p->p_lock);
    939  1.238     pavel 	p->p_flag |= PK_EXEC;
    940  1.272        ad 	mutex_exit(p->p_lock);
    941  1.237        ad 
    942  1.237        ad 	/*
    943  1.237        ad 	 * Stop profiling.
    944  1.237        ad 	 */
    945  1.237        ad 	if ((p->p_stflag & PST_PROFIL) != 0) {
    946  1.237        ad 		mutex_spin_enter(&p->p_stmutex);
    947  1.237        ad 		stopprofclock(p);
    948  1.237        ad 		mutex_spin_exit(&p->p_stmutex);
    949  1.237        ad 	}
    950  1.237        ad 
    951  1.237        ad 	/*
    952  1.275        ad 	 * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
    953  1.237        ad 	 * exited and exec()/exit() are the only places it will be cleared.
    954  1.237        ad 	 */
    955  1.275        ad 	if ((p->p_lflag & PL_PPWAIT) != 0) {
    956  1.271        ad 		mutex_enter(proc_lock);
    957  1.275        ad 		p->p_lflag &= ~PL_PPWAIT;
    958  1.237        ad 		cv_broadcast(&p->p_pptr->p_waitcv);
    959  1.271        ad 		mutex_exit(proc_lock);
    960   1.55       cgd 	}
    961   1.55       cgd 
    962   1.55       cgd 	/*
    963  1.237        ad 	 * Deal with set[ug]id.  MNT_NOSUID has already been used to disable
    964  1.237        ad 	 * s[ug]id.  It's OK to check for PSL_TRACED here as we have blocked
    965  1.237        ad 	 * out additional references on the process for the moment.
    966   1.55       cgd 	 */
    967  1.237        ad 	if ((p->p_slflag & PSL_TRACED) == 0 &&
    968  1.141   thorpej 
    969  1.141   thorpej 	    (((attr.va_mode & S_ISUID) != 0 &&
    970  1.221        ad 	      kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
    971  1.141   thorpej 
    972  1.141   thorpej 	     ((attr.va_mode & S_ISGID) != 0 &&
    973  1.221        ad 	      kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
    974  1.141   thorpej 		/*
    975  1.141   thorpej 		 * Mark the process as SUGID before we do
    976  1.141   thorpej 		 * anything that might block.
    977  1.141   thorpej 		 */
    978  1.237        ad 		proc_crmod_enter();
    979  1.240   thorpej 		proc_crmod_leave(NULL, NULL, true);
    980  1.152  christos 
    981  1.152  christos 		/* Make sure file descriptors 0..2 are in use. */
    982  1.270        ad 		if ((error = fd_checkstd()) != 0) {
    983  1.209  christos 			DPRINTF(("execve: fdcheckstd failed %d\n", error));
    984  1.152  christos 			goto exec_abort;
    985  1.209  christos 		}
    986  1.141   thorpej 
    987  1.220        ad 		/*
    988  1.220        ad 		 * Copy the credential so other references don't see our
    989  1.220        ad 		 * changes.
    990  1.220        ad 		 */
    991  1.221        ad 		l->l_cred = kauth_cred_copy(l->l_cred);
    992   1.55       cgd #ifdef KTRACE
    993   1.55       cgd 		/*
    994  1.268      elad 		 * If the persistent trace flag isn't set, turn off.
    995   1.55       cgd 		 */
    996  1.237        ad 		if (p->p_tracep) {
    997  1.247        ad 			mutex_enter(&ktrace_lock);
    998  1.268      elad 			if (!(p->p_traceflag & KTRFAC_PERSISTENT))
    999  1.237        ad 				ktrderef(p);
   1000  1.247        ad 			mutex_exit(&ktrace_lock);
   1001  1.237        ad 		}
   1002   1.55       cgd #endif
   1003   1.83   mycroft 		if (attr.va_mode & S_ISUID)
   1004  1.221        ad 			kauth_cred_seteuid(l->l_cred, attr.va_uid);
   1005   1.83   mycroft 		if (attr.va_mode & S_ISGID)
   1006  1.221        ad 			kauth_cred_setegid(l->l_cred, attr.va_gid);
   1007  1.210  christos 	} else {
   1008  1.221        ad 		if (kauth_cred_geteuid(l->l_cred) ==
   1009  1.221        ad 		    kauth_cred_getuid(l->l_cred) &&
   1010  1.221        ad 		    kauth_cred_getegid(l->l_cred) ==
   1011  1.221        ad 		    kauth_cred_getgid(l->l_cred))
   1012  1.238     pavel 			p->p_flag &= ~PK_SUGID;
   1013  1.210  christos 	}
   1014  1.220        ad 
   1015  1.220        ad 	/*
   1016  1.220        ad 	 * Copy the credential so other references don't see our changes.
   1017  1.220        ad 	 * Test to see if this is necessary first, since in the common case
   1018  1.220        ad 	 * we won't need a private reference.
   1019  1.220        ad 	 */
   1020  1.221        ad 	if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
   1021  1.221        ad 	    kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
   1022  1.221        ad 		l->l_cred = kauth_cred_copy(l->l_cred);
   1023  1.221        ad 		kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
   1024  1.221        ad 		kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
   1025  1.220        ad 	}
   1026  1.155  gmcgarry 
   1027  1.221        ad 	/* Update the master credentials. */
   1028  1.227        ad 	if (l->l_cred != p->p_cred) {
   1029  1.227        ad 		kauth_cred_t ocred;
   1030  1.227        ad 
   1031  1.227        ad 		kauth_cred_hold(l->l_cred);
   1032  1.272        ad 		mutex_enter(p->p_lock);
   1033  1.227        ad 		ocred = p->p_cred;
   1034  1.227        ad 		p->p_cred = l->l_cred;
   1035  1.272        ad 		mutex_exit(p->p_lock);
   1036  1.227        ad 		kauth_cred_free(ocred);
   1037  1.227        ad 	}
   1038  1.221        ad 
   1039  1.155  gmcgarry #if defined(__HAVE_RAS)
   1040  1.155  gmcgarry 	/*
   1041  1.155  gmcgarry 	 * Remove all RASs from the address space.
   1042  1.155  gmcgarry 	 */
   1043  1.251        ad 	ras_purgeall();
   1044  1.155  gmcgarry #endif
   1045  1.107      fvdl 
   1046  1.107      fvdl 	doexechooks(p);
   1047   1.55       cgd 
   1048   1.55       cgd 	/* setup new registers and do misc. setup. */
   1049  1.244       dsl 	(*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
   1050  1.244       dsl 	if (pack.ep_esch->es_setregs)
   1051  1.244       dsl 		(*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
   1052   1.55       cgd 
   1053  1.171       chs 	/* map the process's signal trampoline code */
   1054  1.244       dsl 	if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
   1055  1.209  christos 		DPRINTF(("execve: map sigcode failed %d\n", error));
   1056  1.171       chs 		goto exec_abort;
   1057  1.209  christos 	}
   1058  1.171       chs 
   1059  1.277        ad 	pool_put(&exec_pool, argp);
   1060  1.276        ad 
   1061  1.276        ad 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
   1062  1.276        ad 
   1063  1.276        ad 	/* notify others that we exec'd */
   1064  1.276        ad 	KNOTE(&p->p_klist, NOTE_EXEC);
   1065  1.276        ad 
   1066  1.265      yamt 	kmem_free(pack.ep_hdr, pack.ep_hdrlen);
   1067  1.122  jdolecek 
   1068  1.244       dsl 	/* The emulation root will usually have been found when we looked
   1069  1.244       dsl 	 * for the elf interpreter (or similar), if not look now. */
   1070  1.244       dsl 	if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
   1071  1.244       dsl 		emul_find_root(l, &pack);
   1072  1.244       dsl 
   1073  1.244       dsl 	/* Any old emulation root got removed by fdcloseexec */
   1074  1.259        ad 	rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
   1075  1.244       dsl 	p->p_cwdi->cwdi_edir = pack.ep_emul_root;
   1076  1.259        ad 	rw_exit(&p->p_cwdi->cwdi_lock);
   1077  1.244       dsl 	pack.ep_emul_root = NULL;
   1078  1.244       dsl 	if (pack.ep_interp != NULL)
   1079  1.244       dsl 		vrele(pack.ep_interp);
   1080  1.244       dsl 
   1081  1.122  jdolecek 	/*
   1082  1.194     peter 	 * Call emulation specific exec hook. This can setup per-process
   1083  1.122  jdolecek 	 * p->p_emuldata or do any other per-process stuff an emulation needs.
   1084  1.122  jdolecek 	 *
   1085  1.122  jdolecek 	 * If we are executing process of different emulation than the
   1086  1.122  jdolecek 	 * original forked process, call e_proc_exit() of the old emulation
   1087  1.122  jdolecek 	 * first, then e_proc_exec() of new emulation. If the emulation is
   1088  1.122  jdolecek 	 * same, the exec hook code should deallocate any old emulation
   1089  1.122  jdolecek 	 * resources held previously by this process.
   1090  1.122  jdolecek 	 */
   1091  1.124  jdolecek 	if (p->p_emul && p->p_emul->e_proc_exit
   1092  1.244       dsl 	    && p->p_emul != pack.ep_esch->es_emul)
   1093  1.122  jdolecek 		(*p->p_emul->e_proc_exit)(p);
   1094  1.122  jdolecek 
   1095  1.123  jdolecek 	/*
   1096  1.123  jdolecek 	 * Call exec hook. Emulation code may NOT store reference to anything
   1097  1.123  jdolecek 	 * from &pack.
   1098  1.123  jdolecek 	 */
   1099  1.244       dsl         if (pack.ep_esch->es_emul->e_proc_exec)
   1100  1.244       dsl                 (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
   1101  1.122  jdolecek 
   1102  1.122  jdolecek 	/* update p_emul, the old value is no longer needed */
   1103  1.244       dsl 	p->p_emul = pack.ep_esch->es_emul;
   1104  1.148   thorpej 
   1105  1.148   thorpej 	/* ...and the same for p_execsw */
   1106  1.244       dsl 	p->p_execsw = pack.ep_esch;
   1107  1.148   thorpej 
   1108  1.133   mycroft #ifdef __HAVE_SYSCALL_INTERN
   1109  1.133   mycroft 	(*p->p_emul->e_syscall_intern)(p);
   1110  1.133   mycroft #endif
   1111  1.247        ad 	ktremul();
   1112   1.85   mycroft 
   1113  1.252        ad 	/* Allow new references from the debugger/procfs. */
   1114  1.252        ad 	rw_exit(&p->p_reflock);
   1115  1.237        ad 	rw_exit(&exec_lock);
   1116  1.162      manu 
   1117  1.271        ad 	mutex_enter(proc_lock);
   1118  1.237        ad 
   1119  1.237        ad 	if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
   1120  1.237        ad 		KSI_INIT_EMPTY(&ksi);
   1121  1.237        ad 		ksi.ksi_signo = SIGTRAP;
   1122  1.237        ad 		ksi.ksi_lid = l->l_lid;
   1123  1.237        ad 		kpsignal(p, &ksi, NULL);
   1124  1.237        ad 	}
   1125  1.162      manu 
   1126  1.237        ad 	if (p->p_sflag & PS_STOPEXEC) {
   1127  1.237        ad 		KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
   1128  1.175       dsl 		p->p_pptr->p_nstopchild++;
   1129  1.237        ad 		p->p_pptr->p_waited = 0;
   1130  1.272        ad 		mutex_enter(p->p_lock);
   1131  1.237        ad 		ksiginfo_queue_init(&kq);
   1132  1.237        ad 		sigclearall(p, &contsigmask, &kq);
   1133  1.237        ad 		lwp_lock(l);
   1134  1.237        ad 		l->l_stat = LSSTOP;
   1135  1.162      manu 		p->p_stat = SSTOP;
   1136  1.164   thorpej 		p->p_nrlwps--;
   1137  1.272        ad 		mutex_exit(p->p_lock);
   1138  1.271        ad 		mutex_exit(proc_lock);
   1139  1.245      yamt 		mi_switch(l);
   1140  1.237        ad 		ksiginfo_queue_drain(&kq);
   1141  1.237        ad 		KERNEL_LOCK(l->l_biglocks, l);
   1142  1.237        ad 	} else {
   1143  1.271        ad 		mutex_exit(proc_lock);
   1144  1.162      manu 	}
   1145  1.162      manu 
   1146  1.260  christos 	PNBUF_PUT(pathbuf);
   1147   1.85   mycroft 	return (EJUSTRETURN);
   1148   1.55       cgd 
   1149  1.138     lukem  bad:
   1150   1.55       cgd 	/* free the vmspace-creation commands, and release their references */
   1151   1.55       cgd 	kill_vmcmds(&pack.ep_vmcmds);
   1152   1.55       cgd 	/* kill any opened file descriptor, if necessary */
   1153   1.55       cgd 	if (pack.ep_flags & EXEC_HASFD) {
   1154   1.55       cgd 		pack.ep_flags &= ~EXEC_HASFD;
   1155  1.270        ad 		fd_close(pack.ep_fd);
   1156   1.55       cgd 	}
   1157   1.55       cgd 	/* close and put the exec'd file */
   1158   1.99  wrstuden 	vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
   1159  1.254     pooka 	VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
   1160   1.99  wrstuden 	vput(pack.ep_vp);
   1161  1.120   thorpej 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
   1162  1.277        ad 	pool_put(&exec_pool, argp);
   1163   1.55       cgd 
   1164  1.138     lukem  freehdr:
   1165  1.265      yamt 	kmem_free(pack.ep_hdr, pack.ep_hdrlen);
   1166  1.244       dsl 	if (pack.ep_emul_root != NULL)
   1167  1.244       dsl 		vrele(pack.ep_emul_root);
   1168  1.244       dsl 	if (pack.ep_interp != NULL)
   1169  1.244       dsl 		vrele(pack.ep_interp);
   1170  1.200      elad 
   1171  1.274        ad 	rw_exit(&exec_lock);
   1172  1.274        ad 
   1173  1.200      elad  clrflg:
   1174  1.279  wrstuden 	lwp_lock(l);
   1175  1.279  wrstuden 	l->l_flag |= oldlwpflags;
   1176  1.279  wrstuden 	lwp_unlock(l);
   1177  1.260  christos 	PNBUF_PUT(pathbuf);
   1178  1.252        ad 	rw_exit(&p->p_reflock);
   1179  1.130  jdolecek 
   1180  1.282        ad 	if (modgen != module_gen && error == ENOEXEC) {
   1181  1.282        ad 		modgen = module_gen;
   1182  1.282        ad 		exec_autoload();
   1183  1.282        ad 		goto retry;
   1184  1.282        ad 	}
   1185  1.282        ad 
   1186   1.55       cgd 	return error;
   1187   1.55       cgd 
   1188  1.138     lukem  exec_abort:
   1189  1.260  christos 	PNBUF_PUT(pathbuf);
   1190  1.252        ad 	rw_exit(&p->p_reflock);
   1191  1.237        ad 	rw_exit(&exec_lock);
   1192  1.130  jdolecek 
   1193   1.55       cgd 	/*
   1194   1.55       cgd 	 * the old process doesn't exist anymore.  exit gracefully.
   1195   1.55       cgd 	 * get rid of the (new) address space we have created, if any, get rid
   1196   1.55       cgd 	 * of our namei data and vnode, and exit noting failure
   1197   1.55       cgd 	 */
   1198   1.88       mrg 	uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
   1199   1.88       mrg 		VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
   1200   1.73   mycroft 	if (pack.ep_emul_arg)
   1201  1.284    cegger 		free(pack.ep_emul_arg, M_TEMP);
   1202  1.120   thorpej 	PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
   1203  1.277        ad 	pool_put(&exec_pool, argp);
   1204  1.265      yamt 	kmem_free(pack.ep_hdr, pack.ep_hdrlen);
   1205  1.244       dsl 	if (pack.ep_emul_root != NULL)
   1206  1.244       dsl 		vrele(pack.ep_emul_root);
   1207  1.244       dsl 	if (pack.ep_interp != NULL)
   1208  1.244       dsl 		vrele(pack.ep_interp);
   1209  1.237        ad 
   1210  1.252        ad 	/* Acquire the sched-state mutex (exit1() will release it). */
   1211  1.272        ad 	mutex_enter(p->p_lock);
   1212  1.164   thorpej 	exit1(l, W_EXITCODE(error, SIGABRT));
   1213   1.55       cgd 
   1214   1.55       cgd 	/* NOTREACHED */
   1215   1.55       cgd 	return 0;
   1216   1.67  christos }
   1217   1.67  christos 
   1218   1.67  christos 
   1219  1.144  christos int
   1220  1.231      yamt copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
   1221  1.231      yamt     char **stackp, void *argp)
   1222   1.67  christos {
   1223  1.138     lukem 	char	**cpp, *dp, *sp;
   1224  1.138     lukem 	size_t	len;
   1225  1.138     lukem 	void	*nullp;
   1226  1.138     lukem 	long	argc, envc;
   1227  1.144  christos 	int	error;
   1228  1.138     lukem 
   1229  1.144  christos 	cpp = (char **)*stackp;
   1230  1.138     lukem 	nullp = NULL;
   1231  1.138     lukem 	argc = arginfo->ps_nargvstr;
   1232  1.138     lukem 	envc = arginfo->ps_nenvstr;
   1233  1.144  christos 	if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
   1234  1.144  christos 		return error;
   1235   1.67  christos 
   1236  1.244       dsl 	dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
   1237   1.67  christos 	sp = argp;
   1238   1.67  christos 
   1239   1.67  christos 	/* XXX don't copy them out, remap them! */
   1240   1.69   mycroft 	arginfo->ps_argvstr = cpp; /* remember location of argv for later */
   1241   1.67  christos 
   1242   1.67  christos 	for (; --argc >= 0; sp += len, dp += len)
   1243  1.144  christos 		if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
   1244  1.144  christos 		    (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
   1245  1.144  christos 			return error;
   1246   1.67  christos 
   1247  1.144  christos 	if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
   1248  1.144  christos 		return error;
   1249   1.67  christos 
   1250   1.69   mycroft 	arginfo->ps_envstr = cpp; /* remember location of envp for later */
   1251   1.67  christos 
   1252   1.67  christos 	for (; --envc >= 0; sp += len, dp += len)
   1253  1.144  christos 		if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
   1254  1.144  christos 		    (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
   1255  1.144  christos 			return error;
   1256   1.67  christos 
   1257  1.144  christos 	if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
   1258  1.144  christos 		return error;
   1259   1.67  christos 
   1260  1.144  christos 	*stackp = (char *)cpp;
   1261  1.144  christos 	return 0;
   1262   1.55       cgd }
   1263  1.130  jdolecek 
   1264  1.130  jdolecek 
   1265  1.130  jdolecek /*
   1266  1.282        ad  * Add execsw[] entries.
   1267  1.130  jdolecek  */
   1268  1.130  jdolecek int
   1269  1.282        ad exec_add(struct execsw *esp, int count)
   1270  1.130  jdolecek {
   1271  1.282        ad 	struct exec_entry	*it;
   1272  1.282        ad 	int			i;
   1273  1.130  jdolecek 
   1274  1.283        ad 	if (count == 0) {
   1275  1.283        ad 		return 0;
   1276  1.283        ad 	}
   1277  1.130  jdolecek 
   1278  1.282        ad 	/* Check for duplicates. */
   1279  1.237        ad 	rw_enter(&exec_lock, RW_WRITER);
   1280  1.282        ad 	for (i = 0; i < count; i++) {
   1281  1.282        ad 		LIST_FOREACH(it, &ex_head, ex_list) {
   1282  1.282        ad 			/* assume unique (makecmds, probe_func, emulation) */
   1283  1.282        ad 			if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
   1284  1.282        ad 			    it->ex_sw->u.elf_probe_func ==
   1285  1.282        ad 			    esp[i].u.elf_probe_func &&
   1286  1.282        ad 			    it->ex_sw->es_emul == esp[i].es_emul) {
   1287  1.282        ad 				rw_exit(&exec_lock);
   1288  1.282        ad 				return EEXIST;
   1289  1.130  jdolecek 			}
   1290  1.130  jdolecek 		}
   1291  1.130  jdolecek 	}
   1292  1.130  jdolecek 
   1293  1.282        ad 	/* Allocate new entries. */
   1294  1.282        ad 	for (i = 0; i < count; i++) {
   1295  1.282        ad 		it = kmem_alloc(sizeof(*it), KM_SLEEP);
   1296  1.282        ad 		it->ex_sw = &esp[i];
   1297  1.282        ad 		LIST_INSERT_HEAD(&ex_head, it, ex_list);
   1298  1.130  jdolecek 	}
   1299  1.130  jdolecek 
   1300  1.130  jdolecek 	/* update execsw[] */
   1301  1.130  jdolecek 	exec_init(0);
   1302  1.237        ad 	rw_exit(&exec_lock);
   1303  1.282        ad 	return 0;
   1304  1.130  jdolecek }
   1305  1.130  jdolecek 
   1306  1.130  jdolecek /*
   1307  1.130  jdolecek  * Remove execsw[] entry.
   1308  1.130  jdolecek  */
   1309  1.130  jdolecek int
   1310  1.282        ad exec_remove(struct execsw *esp, int count)
   1311  1.130  jdolecek {
   1312  1.282        ad 	struct exec_entry	*it, *next;
   1313  1.282        ad 	int			i;
   1314  1.282        ad 	const struct proclist_desc *pd;
   1315  1.282        ad 	proc_t			*p;
   1316  1.282        ad 
   1317  1.283        ad 	if (count == 0) {
   1318  1.283        ad 		return 0;
   1319  1.283        ad 	}
   1320  1.130  jdolecek 
   1321  1.282        ad 	/* Abort if any are busy. */
   1322  1.237        ad 	rw_enter(&exec_lock, RW_WRITER);
   1323  1.282        ad 	for (i = 0; i < count; i++) {
   1324  1.282        ad 		mutex_enter(proc_lock);
   1325  1.282        ad 		for (pd = proclists; pd->pd_list != NULL; pd++) {
   1326  1.282        ad 			PROCLIST_FOREACH(p, pd->pd_list) {
   1327  1.282        ad 				if (p->p_execsw == &esp[i]) {
   1328  1.282        ad 					mutex_exit(proc_lock);
   1329  1.282        ad 					rw_exit(&exec_lock);
   1330  1.282        ad 					return EBUSY;
   1331  1.282        ad 				}
   1332  1.282        ad 			}
   1333  1.282        ad 		}
   1334  1.282        ad 		mutex_exit(proc_lock);
   1335  1.282        ad 	}
   1336  1.130  jdolecek 
   1337  1.282        ad 	/* None are busy, so remove them all. */
   1338  1.282        ad 	for (i = 0; i < count; i++) {
   1339  1.282        ad 		for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
   1340  1.282        ad 			next = LIST_NEXT(it, ex_list);
   1341  1.282        ad 			if (it->ex_sw == &esp[i]) {
   1342  1.282        ad 				LIST_REMOVE(it, ex_list);
   1343  1.282        ad 				kmem_free(it, sizeof(*it));
   1344  1.282        ad 				break;
   1345  1.282        ad 			}
   1346  1.282        ad 		}
   1347  1.130  jdolecek 	}
   1348  1.130  jdolecek 
   1349  1.130  jdolecek 	/* update execsw[] */
   1350  1.130  jdolecek 	exec_init(0);
   1351  1.237        ad 	rw_exit(&exec_lock);
   1352  1.282        ad 	return 0;
   1353  1.130  jdolecek }
   1354  1.130  jdolecek 
   1355  1.130  jdolecek /*
   1356  1.130  jdolecek  * Initialize exec structures. If init_boot is true, also does necessary
   1357  1.130  jdolecek  * one-time initialization (it's called from main() that way).
   1358  1.147  jdolecek  * Once system is multiuser, this should be called with exec_lock held,
   1359  1.130  jdolecek  * i.e. via exec_{add|remove}().
   1360  1.130  jdolecek  */
   1361  1.130  jdolecek int
   1362  1.138     lukem exec_init(int init_boot)
   1363  1.130  jdolecek {
   1364  1.282        ad 	const struct execsw 	**sw;
   1365  1.282        ad 	struct exec_entry	*ex;
   1366  1.282        ad 	SLIST_HEAD(,exec_entry)	first;
   1367  1.282        ad 	SLIST_HEAD(,exec_entry)	any;
   1368  1.282        ad 	SLIST_HEAD(,exec_entry)	last;
   1369  1.282        ad 	int			i, sz;
   1370  1.130  jdolecek 
   1371  1.130  jdolecek 	if (init_boot) {
   1372  1.130  jdolecek 		/* do one-time initializations */
   1373  1.237        ad 		rw_init(&exec_lock);
   1374  1.259        ad 		mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
   1375  1.277        ad 		pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
   1376  1.277        ad 		    "execargs", &exec_palloc, IPL_NONE);
   1377  1.277        ad 		pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
   1378  1.282        ad 	} else {
   1379  1.282        ad 		KASSERT(rw_write_held(&exec_lock));
   1380  1.282        ad 	}
   1381  1.130  jdolecek 
   1382  1.282        ad 	/* Sort each entry onto the appropriate queue. */
   1383  1.282        ad 	SLIST_INIT(&first);
   1384  1.282        ad 	SLIST_INIT(&any);
   1385  1.282        ad 	SLIST_INIT(&last);
   1386  1.282        ad 	sz = 0;
   1387  1.282        ad 	LIST_FOREACH(ex, &ex_head, ex_list) {
   1388  1.282        ad 		switch(ex->ex_sw->es_prio) {
   1389  1.282        ad 		case EXECSW_PRIO_FIRST:
   1390  1.282        ad 			SLIST_INSERT_HEAD(&first, ex, ex_slist);
   1391  1.282        ad 			break;
   1392  1.282        ad 		case EXECSW_PRIO_ANY:
   1393  1.282        ad 			SLIST_INSERT_HEAD(&any, ex, ex_slist);
   1394  1.282        ad 			break;
   1395  1.282        ad 		case EXECSW_PRIO_LAST:
   1396  1.282        ad 			SLIST_INSERT_HEAD(&last, ex, ex_slist);
   1397  1.282        ad 			break;
   1398  1.282        ad 		default:
   1399  1.282        ad 			panic("exec_init");
   1400  1.282        ad 			break;
   1401  1.130  jdolecek 		}
   1402  1.282        ad 		sz++;
   1403  1.130  jdolecek 	}
   1404  1.130  jdolecek 
   1405  1.130  jdolecek 	/*
   1406  1.282        ad 	 * Create new execsw[].  Ensure we do not try a zero-sized
   1407  1.282        ad 	 * allocation.
   1408  1.130  jdolecek 	 */
   1409  1.282        ad 	sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
   1410  1.282        ad 	i = 0;
   1411  1.282        ad 	SLIST_FOREACH(ex, &first, ex_slist) {
   1412  1.282        ad 		sw[i++] = ex->ex_sw;
   1413  1.282        ad 	}
   1414  1.282        ad 	SLIST_FOREACH(ex, &any, ex_slist) {
   1415  1.282        ad 		sw[i++] = ex->ex_sw;
   1416  1.282        ad 	}
   1417  1.282        ad 	SLIST_FOREACH(ex, &last, ex_slist) {
   1418  1.282        ad 		sw[i++] = ex->ex_sw;
   1419  1.130  jdolecek 	}
   1420  1.183  junyoung 
   1421  1.282        ad 	/* Replace old execsw[] and free used memory. */
   1422  1.282        ad 	if (execsw != NULL) {
   1423  1.282        ad 		kmem_free(__UNCONST(execsw),
   1424  1.282        ad 		    nexecs * sizeof(struct execsw *) + 1);
   1425  1.130  jdolecek 	}
   1426  1.282        ad 	execsw = sw;
   1427  1.282        ad 	nexecs = sz;
   1428  1.130  jdolecek 
   1429  1.282        ad 	/* Figure out the maximum size of an exec header. */
   1430  1.282        ad 	exec_maxhdrsz = sizeof(int);
   1431  1.130  jdolecek 	for (i = 0; i < nexecs; i++) {
   1432  1.130  jdolecek 		if (execsw[i]->es_hdrsz > exec_maxhdrsz)
   1433  1.130  jdolecek 			exec_maxhdrsz = execsw[i]->es_hdrsz;
   1434  1.130  jdolecek 	}
   1435  1.130  jdolecek 
   1436  1.130  jdolecek 	return 0;
   1437  1.130  jdolecek }
   1438  1.171       chs 
   1439  1.171       chs static int
   1440  1.171       chs exec_sigcode_map(struct proc *p, const struct emul *e)
   1441  1.171       chs {
   1442  1.171       chs 	vaddr_t va;
   1443  1.171       chs 	vsize_t sz;
   1444  1.171       chs 	int error;
   1445  1.171       chs 	struct uvm_object *uobj;
   1446  1.171       chs 
   1447  1.184  drochner 	sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
   1448  1.184  drochner 
   1449  1.184  drochner 	if (e->e_sigobject == NULL || sz == 0) {
   1450  1.171       chs 		return 0;
   1451  1.171       chs 	}
   1452  1.171       chs 
   1453  1.171       chs 	/*
   1454  1.171       chs 	 * If we don't have a sigobject for this emulation, create one.
   1455  1.171       chs 	 *
   1456  1.171       chs 	 * sigobject is an anonymous memory object (just like SYSV shared
   1457  1.171       chs 	 * memory) that we keep a permanent reference to and that we map
   1458  1.171       chs 	 * in all processes that need this sigcode. The creation is simple,
   1459  1.171       chs 	 * we create an object, add a permanent reference to it, map it in
   1460  1.171       chs 	 * kernel space, copy out the sigcode to it and unmap it.
   1461  1.189  jdolecek 	 * We map it with PROT_READ|PROT_EXEC into the process just
   1462  1.189  jdolecek 	 * the way sys_mmap() would map it.
   1463  1.171       chs 	 */
   1464  1.171       chs 
   1465  1.171       chs 	uobj = *e->e_sigobject;
   1466  1.171       chs 	if (uobj == NULL) {
   1467  1.259        ad 		mutex_enter(&sigobject_lock);
   1468  1.259        ad 		if ((uobj = *e->e_sigobject) == NULL) {
   1469  1.259        ad 			uobj = uao_create(sz, 0);
   1470  1.259        ad 			(*uobj->pgops->pgo_reference)(uobj);
   1471  1.259        ad 			va = vm_map_min(kernel_map);
   1472  1.259        ad 			if ((error = uvm_map(kernel_map, &va, round_page(sz),
   1473  1.259        ad 			    uobj, 0, 0,
   1474  1.259        ad 			    UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
   1475  1.259        ad 			    UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
   1476  1.259        ad 				printf("kernel mapping failed %d\n", error);
   1477  1.259        ad 				(*uobj->pgops->pgo_detach)(uobj);
   1478  1.259        ad 				mutex_exit(&sigobject_lock);
   1479  1.259        ad 				return (error);
   1480  1.259        ad 			}
   1481  1.259        ad 			memcpy((void *)va, e->e_sigcode, sz);
   1482  1.171       chs #ifdef PMAP_NEED_PROCWR
   1483  1.259        ad 			pmap_procwr(&proc0, va, sz);
   1484  1.171       chs #endif
   1485  1.259        ad 			uvm_unmap(kernel_map, va, va + round_page(sz));
   1486  1.259        ad 			*e->e_sigobject = uobj;
   1487  1.259        ad 		}
   1488  1.259        ad 		mutex_exit(&sigobject_lock);
   1489  1.171       chs 	}
   1490  1.171       chs 
   1491  1.172     enami 	/* Just a hint to uvm_map where to put it. */
   1492  1.195      fvdl 	va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
   1493  1.195      fvdl 	    round_page(sz));
   1494  1.187       chs 
   1495  1.187       chs #ifdef __alpha__
   1496  1.187       chs 	/*
   1497  1.187       chs 	 * Tru64 puts /sbin/loader at the end of user virtual memory,
   1498  1.187       chs 	 * which causes the above calculation to put the sigcode at
   1499  1.187       chs 	 * an invalid address.  Put it just below the text instead.
   1500  1.187       chs 	 */
   1501  1.193       jmc 	if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
   1502  1.187       chs 		va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
   1503  1.187       chs 	}
   1504  1.187       chs #endif
   1505  1.187       chs 
   1506  1.171       chs 	(*uobj->pgops->pgo_reference)(uobj);
   1507  1.171       chs 	error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
   1508  1.171       chs 			uobj, 0, 0,
   1509  1.171       chs 			UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
   1510  1.171       chs 				    UVM_ADV_RANDOM, 0));
   1511  1.171       chs 	if (error) {
   1512  1.171       chs 		(*uobj->pgops->pgo_detach)(uobj);
   1513  1.171       chs 		return (error);
   1514  1.171       chs 	}
   1515  1.171       chs 	p->p_sigctx.ps_sigcode = (void *)va;
   1516  1.171       chs 	return (0);
   1517  1.171       chs }
   1518