1 1.531 kre /* $NetBSD: kern_exec.c,v 1.531 2025/07/16 19:14:13 kre Exp $ */ 2 1.277 ad 3 1.277 ad /*- 4 1.493 ad * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc. 5 1.277 ad * All rights reserved. 6 1.277 ad * 7 1.484 ad * This code is derived from software contributed to The NetBSD Foundation 8 1.484 ad * by Andrew Doran. 9 1.484 ad * 10 1.277 ad * Redistribution and use in source and binary forms, with or without 11 1.277 ad * modification, are permitted provided that the following conditions 12 1.277 ad * are met: 13 1.277 ad * 1. Redistributions of source code must retain the above copyright 14 1.277 ad * notice, this list of conditions and the following disclaimer. 15 1.277 ad * 2. Redistributions in binary form must reproduce the above copyright 16 1.277 ad * notice, this list of conditions and the following disclaimer in the 17 1.277 ad * documentation and/or other materials provided with the distribution. 18 1.277 ad * 19 1.277 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.277 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.277 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.277 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.277 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.277 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.277 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.277 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.277 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.277 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.277 ad * POSSIBILITY OF SUCH DAMAGE. 30 1.277 ad */ 31 1.55 cgd 32 1.55 cgd /*- 33 1.77 cgd * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou 34 1.55 cgd * Copyright (C) 1992 Wolfgang Solfrank. 35 1.55 cgd * Copyright (C) 1992 TooLs GmbH. 36 1.55 cgd * All rights reserved. 37 1.55 cgd * 38 1.55 cgd * Redistribution and use in source and binary forms, with or without 39 1.55 cgd * modification, are permitted provided that the following conditions 40 1.55 cgd * are met: 41 1.55 cgd * 1. Redistributions of source code must retain the above copyright 42 1.55 cgd * notice, this list of conditions and the following disclaimer. 43 1.55 cgd * 2. Redistributions in binary form must reproduce the above copyright 44 1.55 cgd * notice, this list of conditions and the following disclaimer in the 45 1.55 cgd * documentation and/or other materials provided with the distribution. 46 1.55 cgd * 3. All advertising materials mentioning features or use of this software 47 1.55 cgd * must display the following acknowledgement: 48 1.55 cgd * This product includes software developed by TooLs GmbH. 49 1.55 cgd * 4. The name of TooLs GmbH may not be used to endorse or promote products 50 1.55 cgd * derived from this software without specific prior written permission. 51 1.55 cgd * 52 1.55 cgd * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 53 1.55 cgd * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 54 1.55 cgd * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 55 1.55 cgd * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 56 1.55 cgd * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 57 1.55 cgd * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 58 1.55 cgd * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 59 1.55 cgd * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 60 1.55 cgd * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 61 1.55 cgd * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62 1.55 cgd */ 63 1.146 lukem 64 1.146 lukem #include <sys/cdefs.h> 65 1.531 kre __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.531 2025/07/16 19:14:13 kre Exp $"); 66 1.89 mrg 67 1.325 jmcneill #include "opt_exec.h" 68 1.360 christos #include "opt_execfmt.h" 69 1.92 thorpej #include "opt_ktrace.h" 70 1.285 apb #include "opt_modular.h" 71 1.523 riastrad #include "opt_pax.h" 72 1.124 jdolecek #include "opt_syscall_debug.h" 73 1.226 dogcow #include "veriexec.h" 74 1.55 cgd 75 1.55 cgd #include <sys/param.h> 76 1.523 riastrad #include <sys/types.h> 77 1.523 riastrad 78 1.55 cgd #include <sys/acct.h> 79 1.337 martin #include <sys/atomic.h> 80 1.523 riastrad #include <sys/cprng.h> 81 1.523 riastrad #include <sys/cpu.h> 82 1.55 cgd #include <sys/exec.h> 83 1.523 riastrad #include <sys/file.h> 84 1.523 riastrad #include <sys/filedesc.h> 85 1.507 thorpej #include <sys/futex.h> 86 1.523 riastrad #include <sys/kauth.h> 87 1.523 riastrad #include <sys/kernel.h> 88 1.523 riastrad #include <sys/kmem.h> 89 1.55 cgd #include <sys/ktrace.h> 90 1.523 riastrad #include <sys/lwpctl.h> 91 1.55 cgd #include <sys/mman.h> 92 1.523 riastrad #include <sys/module.h> 93 1.523 riastrad #include <sys/mount.h> 94 1.523 riastrad #include <sys/namei.h> 95 1.523 riastrad #include <sys/pax.h> 96 1.523 riastrad #include <sys/proc.h> 97 1.523 riastrad #include <sys/prot.h> 98 1.523 riastrad #include <sys/ptrace.h> 99 1.155 gmcgarry #include <sys/ras.h> 100 1.523 riastrad #include <sys/sdt.h> 101 1.55 cgd #include <sys/signalvar.h> 102 1.523 riastrad #include <sys/spawn.h> 103 1.55 cgd #include <sys/stat.h> 104 1.124 jdolecek #include <sys/syscall.h> 105 1.523 riastrad #include <sys/syscallargs.h> 106 1.289 pooka #include <sys/syscallvar.h> 107 1.523 riastrad #include <sys/systm.h> 108 1.523 riastrad #include <sys/uidinfo.h> 109 1.222 elad #if NVERIEXEC > 0 110 1.197 blymn #include <sys/verified_exec.h> 111 1.222 elad #endif /* NVERIEXEC > 0 */ 112 1.523 riastrad #include <sys/vfs_syscalls.h> 113 1.523 riastrad #include <sys/vnode.h> 114 1.523 riastrad #include <sys/wait.h> 115 1.55 cgd 116 1.88 mrg #include <uvm/uvm_extern.h> 117 1.88 mrg 118 1.55 cgd #include <machine/reg.h> 119 1.55 cgd 120 1.244 dsl #include <compat/common/compat_util.h> 121 1.244 dsl 122 1.364 martin #ifndef MD_TOPDOWN_INIT 123 1.370 christos #ifdef __USE_TOPDOWN_VM 124 1.364 martin #define MD_TOPDOWN_INIT(epp) (epp)->ep_flags |= EXEC_TOPDOWN_VM 125 1.364 martin #else 126 1.364 martin #define MD_TOPDOWN_INIT(epp) 127 1.364 martin #endif 128 1.364 martin #endif 129 1.364 martin 130 1.391 uebayasi struct execve_data; 131 1.391 uebayasi 132 1.436 maxv extern int user_va0_disable; 133 1.436 maxv 134 1.396 uebayasi static size_t calcargs(struct execve_data * restrict, const size_t); 135 1.396 uebayasi static size_t calcstack(struct execve_data * restrict, const size_t); 136 1.399 uebayasi static int copyoutargs(struct execve_data * restrict, struct lwp *, 137 1.399 uebayasi char * const); 138 1.398 uebayasi static int copyoutpsstrs(struct execve_data * restrict, struct proc *); 139 1.391 uebayasi static int copyinargs(struct execve_data * restrict, char * const *, 140 1.391 uebayasi char * const *, execve_fetch_element_t, char **); 141 1.392 uebayasi static int copyinargstrs(struct execve_data * restrict, char * const *, 142 1.392 uebayasi execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t)); 143 1.171 chs static int exec_sigcode_map(struct proc *, const struct emul *); 144 1.171 chs 145 1.429 ozaki #if defined(DEBUG) && !defined(DEBUG_EXEC) 146 1.428 christos #define DEBUG_EXEC 147 1.428 christos #endif 148 1.143 christos #ifdef DEBUG_EXEC 149 1.305 matt #define DPRINTF(a) printf a 150 1.312 christos #define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \ 151 1.312 christos __LINE__, (s), (a), (b)) 152 1.388 uebayasi static void dump_vmcmds(const struct exec_package * const, size_t, int); 153 1.388 uebayasi #define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0) 154 1.143 christos #else 155 1.143 christos #define DPRINTF(a) 156 1.312 christos #define COPYPRINTF(s, a, b) 157 1.388 uebayasi #define DUMPVMCMDS(p, x, e) do {} while (0) 158 1.143 christos #endif /* DEBUG_EXEC */ 159 1.165 thorpej 160 1.130 jdolecek /* 161 1.294 darran * DTrace SDT provider definitions 162 1.294 darran */ 163 1.418 christos SDT_PROVIDER_DECLARE(proc); 164 1.418 christos SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *"); 165 1.418 christos SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); 166 1.418 christos SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int"); 167 1.294 darran 168 1.294 darran /* 169 1.130 jdolecek * Exec function switch: 170 1.130 jdolecek * 171 1.130 jdolecek * Note that each makecmds function is responsible for loading the 172 1.130 jdolecek * exec package with the necessary functions for any exec-type-specific 173 1.130 jdolecek * handling. 174 1.130 jdolecek * 175 1.130 jdolecek * Functions for specific exec types should be defined in their own 176 1.130 jdolecek * header file. 177 1.130 jdolecek */ 178 1.138 lukem static const struct execsw **execsw = NULL; 179 1.138 lukem static int nexecs; 180 1.138 lukem 181 1.282 ad u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */ 182 1.130 jdolecek 183 1.130 jdolecek /* list of dynamically loaded execsw entries */ 184 1.282 ad static LIST_HEAD(execlist_head, exec_entry) ex_head = 185 1.282 ad LIST_HEAD_INITIALIZER(ex_head); 186 1.130 jdolecek struct exec_entry { 187 1.138 lukem LIST_ENTRY(exec_entry) ex_list; 188 1.282 ad SLIST_ENTRY(exec_entry) ex_slist; 189 1.282 ad const struct execsw *ex_sw; 190 1.130 jdolecek }; 191 1.130 jdolecek 192 1.203 christos #ifndef __HAVE_SYSCALL_INTERN 193 1.203 christos void syscall(void); 194 1.203 christos #endif 195 1.203 christos 196 1.423 pgoyette /* NetBSD autoloadable syscalls */ 197 1.423 pgoyette #ifdef MODULAR 198 1.423 pgoyette #include <kern/syscalls_autoload.c> 199 1.423 pgoyette #endif 200 1.423 pgoyette 201 1.173 christos /* NetBSD emul struct */ 202 1.282 ad struct emul emul_netbsd = { 203 1.291 rmind .e_name = "netbsd", 204 1.371 manu #ifdef EMUL_NATIVEROOT 205 1.371 manu .e_path = EMUL_NATIVEROOT, 206 1.371 manu #else 207 1.371 manu .e_path = NULL, 208 1.371 manu #endif 209 1.133 mycroft #ifndef __HAVE_MINIMAL_EMUL 210 1.291 rmind .e_flags = EMUL_HAS_SYS___syscall, 211 1.291 rmind .e_errno = NULL, 212 1.291 rmind .e_nosys = SYS_syscall, 213 1.291 rmind .e_nsysent = SYS_NSYSENT, 214 1.133 mycroft #endif 215 1.423 pgoyette #ifdef MODULAR 216 1.423 pgoyette .e_sc_autoload = netbsd_syscalls_autoload, 217 1.423 pgoyette #endif 218 1.291 rmind .e_sysent = sysent, 219 1.460 pgoyette .e_nomodbits = sysent_nomodbits, 220 1.124 jdolecek #ifdef SYSCALL_DEBUG 221 1.291 rmind .e_syscallnames = syscallnames, 222 1.124 jdolecek #else 223 1.291 rmind .e_syscallnames = NULL, 224 1.124 jdolecek #endif 225 1.291 rmind .e_sendsig = sendsig, 226 1.291 rmind .e_trapsignal = trapsignal, 227 1.291 rmind .e_sigcode = NULL, 228 1.291 rmind .e_esigcode = NULL, 229 1.291 rmind .e_sigobject = NULL, 230 1.291 rmind .e_setregs = setregs, 231 1.291 rmind .e_proc_exec = NULL, 232 1.291 rmind .e_proc_fork = NULL, 233 1.291 rmind .e_proc_exit = NULL, 234 1.291 rmind .e_lwp_fork = NULL, 235 1.291 rmind .e_lwp_exit = NULL, 236 1.133 mycroft #ifdef __HAVE_SYSCALL_INTERN 237 1.291 rmind .e_syscall_intern = syscall_intern, 238 1.133 mycroft #else 239 1.291 rmind .e_syscall = syscall, 240 1.133 mycroft #endif 241 1.291 rmind .e_sysctlovly = NULL, 242 1.291 rmind .e_vm_default_addr = uvm_default_mapaddr, 243 1.291 rmind .e_usertrap = NULL, 244 1.291 rmind .e_ucsize = sizeof(ucontext_t), 245 1.291 rmind .e_startlwp = startlwp 246 1.124 jdolecek }; 247 1.124 jdolecek 248 1.55 cgd /* 249 1.130 jdolecek * Exec lock. Used to control access to execsw[] structures. 250 1.130 jdolecek * This must not be static so that netbsd32 can access it, too. 251 1.130 jdolecek */ 252 1.489 ad krwlock_t exec_lock __cacheline_aligned; 253 1.352 rmind 254 1.337 martin /* 255 1.337 martin * Data used between a loadvm and execve part of an "exec" operation 256 1.337 martin */ 257 1.337 martin struct execve_data { 258 1.337 martin struct exec_package ed_pack; 259 1.337 martin struct pathbuf *ed_pathbuf; 260 1.337 martin struct vattr ed_attr; 261 1.337 martin struct ps_strings ed_arginfo; 262 1.337 martin char *ed_argp; 263 1.337 martin const char *ed_pathstring; 264 1.480 christos char *ed_resolvedname; 265 1.337 martin size_t ed_ps_strings_sz; 266 1.337 martin int ed_szsigcode; 267 1.396 uebayasi size_t ed_argslen; 268 1.337 martin long ed_argc; 269 1.337 martin long ed_envc; 270 1.337 martin }; 271 1.337 martin 272 1.337 martin /* 273 1.337 martin * data passed from parent lwp to child during a posix_spawn() 274 1.337 martin */ 275 1.337 martin struct spawn_exec_data { 276 1.337 martin struct execve_data sed_exec; 277 1.348 martin struct posix_spawn_file_actions 278 1.337 martin *sed_actions; 279 1.337 martin struct posix_spawnattr *sed_attrs; 280 1.337 martin struct proc *sed_parent; 281 1.337 martin kcondvar_t sed_cv_child_ready; 282 1.337 martin kmutex_t sed_mtx_child; 283 1.337 martin int sed_error; 284 1.526 riastrad bool sed_child_ready; 285 1.348 martin volatile uint32_t sed_refcnt; 286 1.337 martin }; 287 1.337 martin 288 1.448 riastrad static struct vm_map *exec_map; 289 1.448 riastrad static struct pool exec_pool; 290 1.448 riastrad 291 1.277 ad static void * 292 1.277 ad exec_pool_alloc(struct pool *pp, int flags) 293 1.277 ad { 294 1.277 ad 295 1.448 riastrad return (void *)uvm_km_alloc(exec_map, NCARGS, 0, 296 1.277 ad UVM_KMF_PAGEABLE | UVM_KMF_WAITVA); 297 1.277 ad } 298 1.277 ad 299 1.277 ad static void 300 1.277 ad exec_pool_free(struct pool *pp, void *addr) 301 1.277 ad { 302 1.277 ad 303 1.448 riastrad uvm_km_free(exec_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE); 304 1.277 ad } 305 1.277 ad 306 1.277 ad static struct pool_allocator exec_palloc = { 307 1.277 ad .pa_alloc = exec_pool_alloc, 308 1.277 ad .pa_free = exec_pool_free, 309 1.277 ad .pa_pagesz = NCARGS 310 1.277 ad }; 311 1.277 ad 312 1.479 christos static void 313 1.479 christos exec_path_free(struct execve_data *data) 314 1.525 riastrad { 315 1.479 christos pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring); 316 1.479 christos pathbuf_destroy(data->ed_pathbuf); 317 1.480 christos if (data->ed_resolvedname) 318 1.480 christos PNBUF_PUT(data->ed_resolvedname); 319 1.479 christos } 320 1.479 christos 321 1.491 christos static int 322 1.480 christos exec_resolvename(struct lwp *l, struct exec_package *epp, struct vnode *vp, 323 1.480 christos char **rpath) 324 1.480 christos { 325 1.480 christos int error; 326 1.480 christos char *p; 327 1.480 christos 328 1.480 christos KASSERT(rpath != NULL); 329 1.480 christos 330 1.480 christos *rpath = PNBUF_GET(); 331 1.480 christos error = vnode_to_path(*rpath, MAXPATHLEN, vp, l, l->l_proc); 332 1.480 christos if (error) { 333 1.491 christos DPRINTF(("%s: can't resolve name for %s, error %d\n", 334 1.491 christos __func__, epp->ep_kname, error)); 335 1.480 christos PNBUF_PUT(*rpath); 336 1.480 christos *rpath = NULL; 337 1.491 christos return error; 338 1.480 christos } 339 1.480 christos epp->ep_resolvedname = *rpath; 340 1.480 christos if ((p = strrchr(*rpath, '/')) != NULL) 341 1.480 christos epp->ep_kname = p + 1; 342 1.491 christos return 0; 343 1.480 christos } 344 1.480 christos 345 1.480 christos 346 1.130 jdolecek /* 347 1.55 cgd * check exec: 348 1.55 cgd * given an "executable" described in the exec package's namei info, 349 1.55 cgd * see what we can do with it. 350 1.55 cgd * 351 1.55 cgd * ON ENTRY: 352 1.55 cgd * exec package with appropriate namei info 353 1.212 christos * lwp pointer of exec'ing lwp 354 1.55 cgd * NO SELF-LOCKED VNODES 355 1.55 cgd * 356 1.55 cgd * ON EXIT: 357 1.55 cgd * error: nothing held, etc. exec header still allocated. 358 1.77 cgd * ok: filled exec package, executable's vnode (unlocked). 359 1.55 cgd * 360 1.55 cgd * EXEC SWITCH ENTRY: 361 1.55 cgd * Locked vnode to check, exec package, proc. 362 1.55 cgd * 363 1.55 cgd * EXEC SWITCH EXIT: 364 1.77 cgd * ok: return 0, filled exec package, executable's vnode (unlocked). 365 1.55 cgd * error: destructive: 366 1.55 cgd * everything deallocated execept exec header. 367 1.76 cgd * non-destructive: 368 1.77 cgd * error code, executable's vnode (unlocked), 369 1.76 cgd * exec header unmodified. 370 1.55 cgd */ 371 1.55 cgd int 372 1.352 rmind /*ARGSUSED*/ 373 1.480 christos check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb, 374 1.480 christos char **rpath) 375 1.55 cgd { 376 1.138 lukem int error, i; 377 1.138 lukem struct vnode *vp; 378 1.138 lukem size_t resid; 379 1.55 cgd 380 1.480 christos if (epp->ep_resolvedname) { 381 1.480 christos struct nameidata nd; 382 1.480 christos 383 1.480 christos // grab the absolute pathbuf here before namei() trashes it. 384 1.480 christos pathbuf_copystring(pb, epp->ep_resolvedname, PATH_MAX); 385 1.480 christos NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 386 1.295 dholland 387 1.480 christos /* first get the vnode */ 388 1.480 christos if ((error = namei(&nd)) != 0) 389 1.480 christos return error; 390 1.295 dholland 391 1.480 christos epp->ep_vp = vp = nd.ni_vp; 392 1.296 dholland #ifdef DIAGNOSTIC 393 1.480 christos /* paranoia (take this out once namei stuff stabilizes) */ 394 1.480 christos memset(nd.ni_pnbuf, '~', PATH_MAX); 395 1.295 dholland #endif 396 1.480 christos } else { 397 1.480 christos struct file *fp; 398 1.480 christos 399 1.480 christos if ((error = fd_getvnode(epp->ep_xfd, &fp)) != 0) 400 1.480 christos return error; 401 1.480 christos epp->ep_vp = vp = fp->f_vnode; 402 1.480 christos vref(vp); 403 1.480 christos fd_putfile(epp->ep_xfd); 404 1.491 christos if ((error = exec_resolvename(l, epp, vp, rpath)) != 0) 405 1.491 christos return error; 406 1.480 christos vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 407 1.480 christos } 408 1.55 cgd 409 1.84 mycroft /* check access and type */ 410 1.55 cgd if (vp->v_type != VREG) { 411 1.524 riastrad error = SET_ERROR(EACCES); 412 1.55 cgd goto bad1; 413 1.55 cgd } 414 1.254 pooka if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0) 415 1.84 mycroft goto bad1; 416 1.55 cgd 417 1.55 cgd /* get attributes */ 418 1.487 ad /* XXX VOP_GETATTR is the only thing that needs LK_EXCLUSIVE here */ 419 1.254 pooka if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0) 420 1.55 cgd goto bad1; 421 1.55 cgd 422 1.55 cgd /* Check mount point */ 423 1.55 cgd if (vp->v_mount->mnt_flag & MNT_NOEXEC) { 424 1.524 riastrad error = SET_ERROR(EACCES); 425 1.55 cgd goto bad1; 426 1.55 cgd } 427 1.141 thorpej if (vp->v_mount->mnt_flag & MNT_NOSUID) 428 1.83 mycroft epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID); 429 1.55 cgd 430 1.55 cgd /* try to open it */ 431 1.254 pooka if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0) 432 1.55 cgd goto bad1; 433 1.55 cgd 434 1.487 ad /* now we have the file, get the exec header */ 435 1.487 ad error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0, 436 1.487 ad UIO_SYSSPACE, IO_NODELOCKED, l->l_cred, &resid, NULL); 437 1.487 ad if (error) 438 1.487 ad goto bad1; 439 1.487 ad 440 1.99 wrstuden /* unlock vp, since we need it unlocked from here on out. */ 441 1.298 hannken VOP_UNLOCK(vp); 442 1.77 cgd 443 1.222 elad #if NVERIEXEC > 0 444 1.480 christos error = veriexec_verify(l, vp, 445 1.480 christos epp->ep_resolvedname ? epp->ep_resolvedname : epp->ep_kname, 446 1.233 elad epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT, 447 1.236 elad NULL); 448 1.236 elad if (error) 449 1.234 elad goto bad2; 450 1.222 elad #endif /* NVERIEXEC > 0 */ 451 1.160 blymn 452 1.232 elad #ifdef PAX_SEGVGUARD 453 1.295 dholland error = pax_segvguard(l, vp, epp->ep_resolvedname, false); 454 1.234 elad if (error) 455 1.234 elad goto bad2; 456 1.232 elad #endif /* PAX_SEGVGUARD */ 457 1.232 elad 458 1.55 cgd epp->ep_hdrvalid = epp->ep_hdrlen - resid; 459 1.55 cgd 460 1.55 cgd /* 461 1.136 eeh * Set up default address space limits. Can be overridden 462 1.136 eeh * by individual exec packages. 463 1.136 eeh */ 464 1.436 maxv epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS); 465 1.136 eeh epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS; 466 1.436 maxv 467 1.136 eeh /* 468 1.55 cgd * set up the vmcmds for creation of the process 469 1.55 cgd * address space 470 1.55 cgd */ 471 1.524 riastrad error = nexecs == 0 ? SET_ERROR(ENOEXEC) : ENOEXEC; 472 1.244 dsl for (i = 0; i < nexecs; i++) { 473 1.68 cgd int newerror; 474 1.68 cgd 475 1.130 jdolecek epp->ep_esch = execsw[i]; 476 1.212 christos newerror = (*execsw[i]->es_makecmds)(l, epp); 477 1.244 dsl 478 1.244 dsl if (!newerror) { 479 1.318 reinoud /* Seems ok: check that entry point is not too high */ 480 1.456 maxv if (epp->ep_entry >= epp->ep_vm_maxaddr) { 481 1.322 reinoud #ifdef DIAGNOSTIC 482 1.329 reinoud printf("%s: rejecting %p due to " 483 1.456 maxv "too high entry address (>= %p)\n", 484 1.331 christos __func__, (void *)epp->ep_entry, 485 1.331 christos (void *)epp->ep_vm_maxaddr); 486 1.322 reinoud #endif 487 1.524 riastrad error = SET_ERROR(ENOEXEC); 488 1.318 reinoud break; 489 1.318 reinoud } 490 1.318 reinoud /* Seems ok: check that entry point is not too low */ 491 1.323 reinoud if (epp->ep_entry < epp->ep_vm_minaddr) { 492 1.322 reinoud #ifdef DIAGNOSTIC 493 1.329 reinoud printf("%s: rejecting %p due to " 494 1.331 christos "too low entry address (< %p)\n", 495 1.331 christos __func__, (void *)epp->ep_entry, 496 1.331 christos (void *)epp->ep_vm_minaddr); 497 1.322 reinoud #endif 498 1.524 riastrad error = SET_ERROR(ENOEXEC); 499 1.244 dsl break; 500 1.244 dsl } 501 1.244 dsl 502 1.244 dsl /* check limits */ 503 1.503 wiz #ifdef DIAGNOSTIC 504 1.503 wiz #define LMSG "%s: rejecting due to %s limit (%ju > %ju)\n" 505 1.503 wiz #endif 506 1.502 christos #ifdef MAXTSIZ 507 1.502 christos if (epp->ep_tsize > MAXTSIZ) { 508 1.322 reinoud #ifdef DIAGNOSTIC 509 1.502 christos printf(LMSG, __func__, "text", 510 1.502 christos (uintmax_t)epp->ep_tsize, 511 1.502 christos (uintmax_t)MAXTSIZ); 512 1.502 christos #endif 513 1.524 riastrad error = SET_ERROR(ENOMEM); 514 1.502 christos break; 515 1.502 christos } 516 1.502 christos #endif 517 1.502 christos vsize_t dlimit = 518 1.502 christos (vsize_t)l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur; 519 1.502 christos if (epp->ep_dsize > dlimit) { 520 1.502 christos #ifdef DIAGNOSTIC 521 1.502 christos printf(LMSG, __func__, "data", 522 1.502 christos (uintmax_t)epp->ep_dsize, 523 1.502 christos (uintmax_t)dlimit); 524 1.322 reinoud #endif 525 1.524 riastrad error = SET_ERROR(ENOMEM); 526 1.244 dsl break; 527 1.244 dsl } 528 1.244 dsl return 0; 529 1.244 dsl } 530 1.244 dsl 531 1.421 maxv /* 532 1.421 maxv * Reset all the fields that may have been modified by the 533 1.421 maxv * loader. 534 1.421 maxv */ 535 1.421 maxv KASSERT(epp->ep_emul_arg == NULL); 536 1.244 dsl if (epp->ep_emul_root != NULL) { 537 1.244 dsl vrele(epp->ep_emul_root); 538 1.244 dsl epp->ep_emul_root = NULL; 539 1.244 dsl } 540 1.244 dsl if (epp->ep_interp != NULL) { 541 1.244 dsl vrele(epp->ep_interp); 542 1.244 dsl epp->ep_interp = NULL; 543 1.244 dsl } 544 1.421 maxv epp->ep_pax_flags = 0; 545 1.244 dsl 546 1.68 cgd /* make sure the first "interesting" error code is saved. */ 547 1.244 dsl if (error == ENOEXEC) 548 1.68 cgd error = newerror; 549 1.124 jdolecek 550 1.244 dsl if (epp->ep_flags & EXEC_DESTR) 551 1.244 dsl /* Error from "#!" code, tidied up by recursive call */ 552 1.55 cgd return error; 553 1.55 cgd } 554 1.55 cgd 555 1.249 pooka /* not found, error */ 556 1.249 pooka 557 1.55 cgd /* 558 1.55 cgd * free any vmspace-creation commands, 559 1.55 cgd * and release their references 560 1.55 cgd */ 561 1.55 cgd kill_vmcmds(&epp->ep_vmcmds); 562 1.55 cgd 563 1.487 ad #if NVERIEXEC > 0 || defined(PAX_SEGVGUARD) 564 1.55 cgd bad2: 565 1.487 ad #endif 566 1.55 cgd /* 567 1.99 wrstuden * close and release the vnode, restore the old one, free the 568 1.55 cgd * pathname buf, and punt. 569 1.55 cgd */ 570 1.99 wrstuden vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 571 1.254 pooka VOP_CLOSE(vp, FREAD, l->l_cred); 572 1.99 wrstuden vput(vp); 573 1.55 cgd return error; 574 1.55 cgd 575 1.55 cgd bad1: 576 1.55 cgd /* 577 1.55 cgd * free the namei pathname buffer, and put the vnode 578 1.55 cgd * (which we don't yet have open). 579 1.55 cgd */ 580 1.77 cgd vput(vp); /* was still locked */ 581 1.55 cgd return error; 582 1.55 cgd } 583 1.55 cgd 584 1.188 chs #ifdef __MACHINE_STACK_GROWS_UP 585 1.188 chs #define STACK_PTHREADSPACE NBPG 586 1.188 chs #else 587 1.188 chs #define STACK_PTHREADSPACE 0 588 1.188 chs #endif 589 1.188 chs 590 1.204 cube static int 591 1.204 cube execve_fetch_element(char * const *array, size_t index, char **value) 592 1.204 cube { 593 1.204 cube return copyin(array + index, value, sizeof(*value)); 594 1.204 cube } 595 1.204 cube 596 1.55 cgd /* 597 1.55 cgd * exec system call 598 1.55 cgd */ 599 1.75 christos int 600 1.258 dsl sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval) 601 1.71 thorpej { 602 1.258 dsl /* { 603 1.138 lukem syscallarg(const char *) path; 604 1.138 lukem syscallarg(char * const *) argp; 605 1.138 lukem syscallarg(char * const *) envp; 606 1.258 dsl } */ 607 1.204 cube 608 1.481 christos return execve1(l, true, SCARG(uap, path), -1, SCARG(uap, argp), 609 1.204 cube SCARG(uap, envp), execve_fetch_element); 610 1.204 cube } 611 1.204 cube 612 1.376 maxv int 613 1.317 manu sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap, 614 1.317 manu register_t *retval) 615 1.317 manu { 616 1.317 manu /* { 617 1.317 manu syscallarg(int) fd; 618 1.317 manu syscallarg(char * const *) argp; 619 1.317 manu syscallarg(char * const *) envp; 620 1.317 manu } */ 621 1.317 manu 622 1.481 christos return execve1(l, false, NULL, SCARG(uap, fd), SCARG(uap, argp), 623 1.480 christos SCARG(uap, envp), execve_fetch_element); 624 1.317 manu } 625 1.317 manu 626 1.282 ad /* 627 1.282 ad * Load modules to try and execute an image that we do not understand. 628 1.282 ad * If no execsw entries are present, we load those likely to be needed 629 1.282 ad * in order to run native images only. Otherwise, we autoload all 630 1.282 ad * possible modules that could let us run the binary. XXX lame 631 1.282 ad */ 632 1.282 ad static void 633 1.282 ad exec_autoload(void) 634 1.282 ad { 635 1.282 ad #ifdef MODULAR 636 1.282 ad static const char * const native[] = { 637 1.282 ad "exec_elf32", 638 1.282 ad "exec_elf64", 639 1.282 ad "exec_script", 640 1.282 ad NULL 641 1.282 ad }; 642 1.282 ad static const char * const compat[] = { 643 1.282 ad "exec_elf32", 644 1.282 ad "exec_elf64", 645 1.282 ad "exec_script", 646 1.282 ad "exec_aout", 647 1.282 ad "exec_coff", 648 1.282 ad "exec_ecoff", 649 1.282 ad "compat_aoutm68k", 650 1.282 ad "compat_netbsd32", 651 1.491 christos #if 0 652 1.491 christos "compat_linux", 653 1.491 christos "compat_linux32", 654 1.491 christos #endif 655 1.282 ad "compat_sunos", 656 1.282 ad "compat_sunos32", 657 1.282 ad "compat_ultrix", 658 1.282 ad NULL 659 1.282 ad }; 660 1.282 ad char const * const *list; 661 1.282 ad int i; 662 1.282 ad 663 1.491 christos list = nexecs == 0 ? native : compat; 664 1.282 ad for (i = 0; list[i] != NULL; i++) { 665 1.363 christos if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) { 666 1.376 maxv continue; 667 1.282 ad } 668 1.376 maxv yield(); 669 1.282 ad } 670 1.282 ad #endif 671 1.282 ad } 672 1.282 ad 673 1.470 christos /* 674 1.470 christos * Copy the user or kernel supplied upath to the allocated pathbuffer pbp 675 1.470 christos * making it absolute in the process, by prepending the current working 676 1.471 wiz * directory if it is not. If offs is supplied it will contain the offset 677 1.470 christos * where the original supplied copy of upath starts. 678 1.470 christos */ 679 1.457 christos int 680 1.457 christos exec_makepathbuf(struct lwp *l, const char *upath, enum uio_seg seg, 681 1.457 christos struct pathbuf **pbp, size_t *offs) 682 1.414 christos { 683 1.414 christos char *path, *bp; 684 1.415 christos size_t len, tlen; 685 1.414 christos int error; 686 1.498 ad struct cwdinfo *cwdi; 687 1.414 christos 688 1.414 christos path = PNBUF_GET(); 689 1.457 christos if (seg == UIO_SYSSPACE) { 690 1.457 christos error = copystr(upath, path, MAXPATHLEN, &len); 691 1.457 christos } else { 692 1.457 christos error = copyinstr(upath, path, MAXPATHLEN, &len); 693 1.457 christos } 694 1.474 maxv if (error) 695 1.472 christos goto err; 696 1.414 christos 697 1.415 christos if (path[0] == '/') { 698 1.457 christos if (offs) 699 1.457 christos *offs = 0; 700 1.414 christos goto out; 701 1.415 christos } 702 1.414 christos 703 1.414 christos len++; 704 1.477 maxv if (len + 1 >= MAXPATHLEN) { 705 1.524 riastrad error = SET_ERROR(ENAMETOOLONG); 706 1.475 christos goto err; 707 1.477 maxv } 708 1.414 christos bp = path + MAXPATHLEN - len; 709 1.414 christos memmove(bp, path, len); 710 1.414 christos *(--bp) = '/'; 711 1.414 christos 712 1.498 ad cwdi = l->l_proc->p_cwdi; 713 1.498 ad rw_enter(&cwdi->cwdi_lock, RW_READER); 714 1.498 ad error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2, 715 1.414 christos GETCWD_CHECK_ACCESS, l); 716 1.498 ad rw_exit(&cwdi->cwdi_lock); 717 1.414 christos 718 1.474 maxv if (error) 719 1.472 christos goto err; 720 1.415 christos tlen = path + MAXPATHLEN - bp; 721 1.414 christos 722 1.415 christos memmove(path, bp, tlen); 723 1.473 christos path[tlen - 1] = '\0'; 724 1.457 christos if (offs) 725 1.457 christos *offs = tlen - len; 726 1.414 christos out: 727 1.415 christos *pbp = pathbuf_assimilate(path); 728 1.415 christos return 0; 729 1.472 christos err: 730 1.472 christos PNBUF_PUT(path); 731 1.472 christos return error; 732 1.414 christos } 733 1.414 christos 734 1.436 maxv vaddr_t 735 1.436 maxv exec_vm_minaddr(vaddr_t va_min) 736 1.436 maxv { 737 1.436 maxv /* 738 1.436 maxv * Increase va_min if we don't want NULL to be mappable by the 739 1.436 maxv * process. 740 1.436 maxv */ 741 1.437 christos #define VM_MIN_GUARD PAGE_SIZE 742 1.436 maxv if (user_va0_disable && (va_min < VM_MIN_GUARD)) 743 1.436 maxv return VM_MIN_GUARD; 744 1.436 maxv return va_min; 745 1.436 maxv } 746 1.436 maxv 747 1.337 martin static int 748 1.481 christos execve_loadvm(struct lwp *l, bool has_path, const char *path, int fd, 749 1.481 christos char * const *args, char * const *envs, 750 1.481 christos execve_fetch_element_t fetch_element, 751 1.337 martin struct execve_data * restrict data) 752 1.204 cube { 753 1.378 uebayasi struct exec_package * const epp = &data->ed_pack; 754 1.153 thorpej int error; 755 1.164 thorpej struct proc *p; 756 1.391 uebayasi char *dp; 757 1.282 ad u_int modgen; 758 1.337 martin 759 1.337 martin KASSERT(data != NULL); 760 1.55 cgd 761 1.237 ad p = l->l_proc; 762 1.376 maxv modgen = 0; 763 1.164 thorpej 764 1.418 christos SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0); 765 1.294 darran 766 1.149 christos /* 767 1.269 christos * Check if we have exceeded our number of processes limit. 768 1.269 christos * This is so that we handle the case where a root daemon 769 1.269 christos * forked, ran setuid to become the desired user and is trying 770 1.269 christos * to exec. The obvious place to do the reference counting check 771 1.269 christos * is setuid(), but we don't do the reference counting check there 772 1.269 christos * like other OS's do because then all the programs that use setuid() 773 1.269 christos * must be modified to check the return code of setuid() and exit(). 774 1.269 christos * It is dangerous to make setuid() fail, because it fails open and 775 1.269 christos * the program will continue to run as root. If we make it succeed 776 1.269 christos * and return an error code, again we are not enforcing the limit. 777 1.269 christos * The best place to enforce the limit is here, when the process tries 778 1.269 christos * to execute a new image, because eventually the process will need 779 1.269 christos * to call exec in order to do something useful. 780 1.269 christos */ 781 1.282 ad retry: 782 1.347 elad if (p->p_flag & PK_SUGID) { 783 1.347 elad if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT, 784 1.524 riastrad p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 785 1.524 riastrad &p->p_rlimit[RLIMIT_NPROC], 786 1.524 riastrad KAUTH_ARG(RLIMIT_NPROC)) != 0 && 787 1.347 elad chgproccnt(kauth_cred_getuid(l->l_cred), 0) > 788 1.524 riastrad p->p_rlimit[RLIMIT_NPROC].rlim_cur) 789 1.524 riastrad return SET_ERROR(EAGAIN); 790 1.347 elad } 791 1.269 christos 792 1.269 christos /* 793 1.352 rmind * Drain existing references and forbid new ones. The process 794 1.352 rmind * should be left alone until we're done here. This is necessary 795 1.352 rmind * to avoid race conditions - e.g. in ptrace() - that might allow 796 1.352 rmind * a local user to illicitly obtain elevated privileges. 797 1.352 rmind */ 798 1.352 rmind rw_enter(&p->p_reflock, RW_WRITER); 799 1.352 rmind 800 1.481 christos if (has_path) { 801 1.480 christos size_t offs; 802 1.480 christos /* 803 1.480 christos * Init the namei data to point the file user's program name. 804 1.480 christos * This is done here rather than in check_exec(), so that it's 805 1.480 christos * possible to override this settings if any of makecmd/probe 806 1.480 christos * functions call check_exec() recursively - for example, 807 1.480 christos * see exec_script_makecmds(). 808 1.480 christos */ 809 1.480 christos if ((error = exec_makepathbuf(l, path, UIO_USERSPACE, 810 1.480 christos &data->ed_pathbuf, &offs)) != 0) 811 1.480 christos goto clrflg; 812 1.480 christos data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf); 813 1.480 christos epp->ep_kname = data->ed_pathstring + offs; 814 1.480 christos data->ed_resolvedname = PNBUF_GET(); 815 1.480 christos epp->ep_resolvedname = data->ed_resolvedname; 816 1.480 christos epp->ep_xfd = -1; 817 1.481 christos } else { 818 1.481 christos data->ed_pathbuf = pathbuf_assimilate(strcpy(PNBUF_GET(), "/")); 819 1.481 christos data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf); 820 1.481 christos epp->ep_kname = "*fexecve*"; 821 1.481 christos data->ed_resolvedname = NULL; 822 1.481 christos epp->ep_resolvedname = NULL; 823 1.481 christos epp->ep_xfd = fd; 824 1.480 christos } 825 1.480 christos 826 1.55 cgd 827 1.55 cgd /* 828 1.55 cgd * initialize the fields of the exec package. 829 1.55 cgd */ 830 1.378 uebayasi epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP); 831 1.378 uebayasi epp->ep_hdrlen = exec_maxhdrsz; 832 1.378 uebayasi epp->ep_hdrvalid = 0; 833 1.378 uebayasi epp->ep_emul_arg = NULL; 834 1.378 uebayasi epp->ep_emul_arg_free = NULL; 835 1.378 uebayasi memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds)); 836 1.378 uebayasi epp->ep_vap = &data->ed_attr; 837 1.411 christos epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0; 838 1.378 uebayasi MD_TOPDOWN_INIT(epp); 839 1.378 uebayasi epp->ep_emul_root = NULL; 840 1.378 uebayasi epp->ep_interp = NULL; 841 1.378 uebayasi epp->ep_esch = NULL; 842 1.378 uebayasi epp->ep_pax_flags = 0; 843 1.378 uebayasi memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch)); 844 1.55 cgd 845 1.237 ad rw_enter(&exec_lock, RW_READER); 846 1.130 jdolecek 847 1.55 cgd /* see if we can run it. */ 848 1.480 christos if ((error = check_exec(l, epp, data->ed_pathbuf, 849 1.480 christos &data->ed_resolvedname)) != 0) { 850 1.454 christos if (error != ENOENT && error != EACCES && error != ENOEXEC) { 851 1.447 martin DPRINTF(("%s: check exec failed for %s, error %d\n", 852 1.447 martin __func__, epp->ep_kname, error)); 853 1.261 xtraeme } 854 1.352 rmind goto freehdr; 855 1.248 christos } 856 1.55 cgd 857 1.55 cgd /* allocate an argument buffer */ 858 1.337 martin data->ed_argp = pool_get(&exec_pool, PR_WAITOK); 859 1.337 martin KASSERT(data->ed_argp != NULL); 860 1.337 martin dp = data->ed_argp; 861 1.55 cgd 862 1.391 uebayasi if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) { 863 1.55 cgd goto bad; 864 1.55 cgd } 865 1.61 mycroft 866 1.379 uebayasi /* 867 1.379 uebayasi * Calculate the new stack size. 868 1.379 uebayasi */ 869 1.379 uebayasi 870 1.267 dsl #ifdef __MACHINE_STACK_GROWS_UP 871 1.386 uebayasi /* 872 1.386 uebayasi * copyargs() fills argc/argv/envp from the lower address even on 873 1.386 uebayasi * __MACHINE_STACK_GROWS_UP machines. Reserve a few words just below the SP 874 1.386 uebayasi * so that _rtld() use it. 875 1.386 uebayasi */ 876 1.267 dsl #define RTLD_GAP 32 877 1.267 dsl #else 878 1.267 dsl #define RTLD_GAP 0 879 1.267 dsl #endif 880 1.267 dsl 881 1.396 uebayasi const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp; 882 1.386 uebayasi 883 1.396 uebayasi data->ed_argslen = calcargs(data, argenvstrlen); 884 1.386 uebayasi 885 1.430 christos const size_t len = calcstack(data, pax_aslr_stack_gap(epp) + RTLD_GAP); 886 1.55 cgd 887 1.396 uebayasi if (len > epp->ep_ssize) { 888 1.337 martin /* in effect, compare to initial limit */ 889 1.396 uebayasi DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len)); 890 1.524 riastrad error = SET_ERROR(ENOMEM); 891 1.55 cgd goto bad; 892 1.55 cgd } 893 1.337 martin /* adjust "active stack depth" for process VSZ */ 894 1.396 uebayasi epp->ep_ssize = len; 895 1.337 martin 896 1.337 martin return 0; 897 1.337 martin 898 1.352 rmind bad: 899 1.352 rmind /* free the vmspace-creation commands, and release their references */ 900 1.378 uebayasi kill_vmcmds(&epp->ep_vmcmds); 901 1.352 rmind /* kill any opened file descriptor, if necessary */ 902 1.378 uebayasi if (epp->ep_flags & EXEC_HASFD) { 903 1.378 uebayasi epp->ep_flags &= ~EXEC_HASFD; 904 1.378 uebayasi fd_close(epp->ep_fd); 905 1.352 rmind } 906 1.352 rmind /* close and put the exec'd file */ 907 1.378 uebayasi vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 908 1.378 uebayasi VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred); 909 1.378 uebayasi vput(epp->ep_vp); 910 1.352 rmind pool_put(&exec_pool, data->ed_argp); 911 1.352 rmind 912 1.352 rmind freehdr: 913 1.378 uebayasi kmem_free(epp->ep_hdr, epp->ep_hdrlen); 914 1.378 uebayasi if (epp->ep_emul_root != NULL) 915 1.378 uebayasi vrele(epp->ep_emul_root); 916 1.378 uebayasi if (epp->ep_interp != NULL) 917 1.378 uebayasi vrele(epp->ep_interp); 918 1.352 rmind 919 1.337 martin rw_exit(&exec_lock); 920 1.352 rmind 921 1.479 christos exec_path_free(data); 922 1.352 rmind 923 1.352 rmind clrflg: 924 1.351 rmind rw_exit(&p->p_reflock); 925 1.337 martin 926 1.337 martin if (modgen != module_gen && error == ENOEXEC) { 927 1.337 martin modgen = module_gen; 928 1.337 martin exec_autoload(); 929 1.337 martin goto retry; 930 1.337 martin } 931 1.337 martin 932 1.418 christos SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); 933 1.337 martin return error; 934 1.337 martin } 935 1.337 martin 936 1.401 uebayasi static int 937 1.401 uebayasi execve_dovmcmds(struct lwp *l, struct execve_data * restrict data) 938 1.401 uebayasi { 939 1.401 uebayasi struct exec_package * const epp = &data->ed_pack; 940 1.401 uebayasi struct proc *p = l->l_proc; 941 1.401 uebayasi struct exec_vmcmd *base_vcp; 942 1.401 uebayasi int error = 0; 943 1.407 riastrad size_t i; 944 1.401 uebayasi 945 1.401 uebayasi /* record proc's vnode, for use by procfs and others */ 946 1.401 uebayasi if (p->p_textvp) 947 1.401 uebayasi vrele(p->p_textvp); 948 1.401 uebayasi vref(epp->ep_vp); 949 1.401 uebayasi p->p_textvp = epp->ep_vp; 950 1.401 uebayasi 951 1.401 uebayasi /* create the new process's VM space by running the vmcmds */ 952 1.401 uebayasi KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__); 953 1.401 uebayasi 954 1.428 christos #ifdef TRACE_EXEC 955 1.401 uebayasi DUMPVMCMDS(epp, 0, 0); 956 1.428 christos #endif 957 1.401 uebayasi 958 1.401 uebayasi base_vcp = NULL; 959 1.401 uebayasi 960 1.401 uebayasi for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) { 961 1.401 uebayasi struct exec_vmcmd *vcp; 962 1.401 uebayasi 963 1.401 uebayasi vcp = &epp->ep_vmcmds.evs_cmds[i]; 964 1.401 uebayasi if (vcp->ev_flags & VMCMD_RELATIVE) { 965 1.401 uebayasi KASSERTMSG(base_vcp != NULL, 966 1.401 uebayasi "%s: relative vmcmd with no base", __func__); 967 1.401 uebayasi KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0, 968 1.401 uebayasi "%s: illegal base & relative vmcmd", __func__); 969 1.401 uebayasi vcp->ev_addr += base_vcp->ev_addr; 970 1.401 uebayasi } 971 1.401 uebayasi error = (*vcp->ev_proc)(l, vcp); 972 1.401 uebayasi if (error) 973 1.401 uebayasi DUMPVMCMDS(epp, i, error); 974 1.401 uebayasi if (vcp->ev_flags & VMCMD_BASE) 975 1.401 uebayasi base_vcp = vcp; 976 1.401 uebayasi } 977 1.401 uebayasi 978 1.401 uebayasi /* free the vmspace-creation commands, and release their references */ 979 1.401 uebayasi kill_vmcmds(&epp->ep_vmcmds); 980 1.401 uebayasi 981 1.401 uebayasi vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 982 1.401 uebayasi VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred); 983 1.401 uebayasi vput(epp->ep_vp); 984 1.401 uebayasi 985 1.401 uebayasi /* if an error happened, deallocate and punt */ 986 1.401 uebayasi if (error != 0) { 987 1.401 uebayasi DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error)); 988 1.401 uebayasi } 989 1.401 uebayasi return error; 990 1.401 uebayasi } 991 1.401 uebayasi 992 1.352 rmind static void 993 1.352 rmind execve_free_data(struct execve_data *data) 994 1.352 rmind { 995 1.378 uebayasi struct exec_package * const epp = &data->ed_pack; 996 1.352 rmind 997 1.352 rmind /* free the vmspace-creation commands, and release their references */ 998 1.378 uebayasi kill_vmcmds(&epp->ep_vmcmds); 999 1.352 rmind /* kill any opened file descriptor, if necessary */ 1000 1.378 uebayasi if (epp->ep_flags & EXEC_HASFD) { 1001 1.378 uebayasi epp->ep_flags &= ~EXEC_HASFD; 1002 1.378 uebayasi fd_close(epp->ep_fd); 1003 1.352 rmind } 1004 1.352 rmind 1005 1.352 rmind /* close and put the exec'd file */ 1006 1.378 uebayasi vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY); 1007 1.378 uebayasi VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred); 1008 1.378 uebayasi vput(epp->ep_vp); 1009 1.352 rmind pool_put(&exec_pool, data->ed_argp); 1010 1.352 rmind 1011 1.378 uebayasi kmem_free(epp->ep_hdr, epp->ep_hdrlen); 1012 1.378 uebayasi if (epp->ep_emul_root != NULL) 1013 1.378 uebayasi vrele(epp->ep_emul_root); 1014 1.378 uebayasi if (epp->ep_interp != NULL) 1015 1.378 uebayasi vrele(epp->ep_interp); 1016 1.352 rmind 1017 1.479 christos exec_path_free(data); 1018 1.352 rmind } 1019 1.352 rmind 1020 1.400 uebayasi static void 1021 1.450 christos pathexec(struct proc *p, const char *resolvedname) 1022 1.400 uebayasi { 1023 1.480 christos /* set command name & other accounting info */ 1024 1.480 christos const char *cmdname; 1025 1.400 uebayasi 1026 1.480 christos if (resolvedname == NULL) { 1027 1.480 christos cmdname = "*fexecve*"; 1028 1.480 christos resolvedname = "/"; 1029 1.480 christos } else { 1030 1.480 christos cmdname = strrchr(resolvedname, '/') + 1; 1031 1.480 christos } 1032 1.480 christos KASSERTMSG(resolvedname[0] == '/', "bad resolvedname `%s'", 1033 1.480 christos resolvedname); 1034 1.480 christos 1035 1.480 christos strlcpy(p->p_comm, cmdname, sizeof(p->p_comm)); 1036 1.400 uebayasi 1037 1.450 christos kmem_strfree(p->p_path); 1038 1.450 christos p->p_path = kmem_strdupsize(resolvedname, NULL, KM_SLEEP); 1039 1.400 uebayasi } 1040 1.400 uebayasi 1041 1.387 uebayasi /* XXX elsewhere */ 1042 1.387 uebayasi static int 1043 1.515 christos credexec(struct lwp *l, struct execve_data *data) 1044 1.387 uebayasi { 1045 1.387 uebayasi struct proc *p = l->l_proc; 1046 1.515 christos struct vattr *attr = &data->ed_attr; 1047 1.387 uebayasi int error; 1048 1.387 uebayasi 1049 1.387 uebayasi /* 1050 1.387 uebayasi * Deal with set[ug]id. MNT_NOSUID has already been used to disable 1051 1.387 uebayasi * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked 1052 1.387 uebayasi * out additional references on the process for the moment. 1053 1.387 uebayasi */ 1054 1.387 uebayasi if ((p->p_slflag & PSL_TRACED) == 0 && 1055 1.387 uebayasi 1056 1.387 uebayasi (((attr->va_mode & S_ISUID) != 0 && 1057 1.387 uebayasi kauth_cred_geteuid(l->l_cred) != attr->va_uid) || 1058 1.387 uebayasi 1059 1.387 uebayasi ((attr->va_mode & S_ISGID) != 0 && 1060 1.387 uebayasi kauth_cred_getegid(l->l_cred) != attr->va_gid))) { 1061 1.387 uebayasi /* 1062 1.387 uebayasi * Mark the process as SUGID before we do 1063 1.387 uebayasi * anything that might block. 1064 1.387 uebayasi */ 1065 1.387 uebayasi proc_crmod_enter(); 1066 1.387 uebayasi proc_crmod_leave(NULL, NULL, true); 1067 1.515 christos if (data->ed_argc == 0) { 1068 1.515 christos DPRINTF(( 1069 1.515 christos "%s: not executing set[ug]id binary with no args\n", 1070 1.515 christos __func__)); 1071 1.524 riastrad return SET_ERROR(EINVAL); 1072 1.515 christos } 1073 1.387 uebayasi 1074 1.387 uebayasi /* Make sure file descriptors 0..2 are in use. */ 1075 1.387 uebayasi if ((error = fd_checkstd()) != 0) { 1076 1.387 uebayasi DPRINTF(("%s: fdcheckstd failed %d\n", 1077 1.387 uebayasi __func__, error)); 1078 1.387 uebayasi return error; 1079 1.387 uebayasi } 1080 1.387 uebayasi 1081 1.387 uebayasi /* 1082 1.387 uebayasi * Copy the credential so other references don't see our 1083 1.387 uebayasi * changes. 1084 1.387 uebayasi */ 1085 1.387 uebayasi l->l_cred = kauth_cred_copy(l->l_cred); 1086 1.387 uebayasi #ifdef KTRACE 1087 1.387 uebayasi /* 1088 1.387 uebayasi * If the persistent trace flag isn't set, turn off. 1089 1.387 uebayasi */ 1090 1.387 uebayasi if (p->p_tracep) { 1091 1.387 uebayasi mutex_enter(&ktrace_lock); 1092 1.387 uebayasi if (!(p->p_traceflag & KTRFAC_PERSISTENT)) 1093 1.387 uebayasi ktrderef(p); 1094 1.387 uebayasi mutex_exit(&ktrace_lock); 1095 1.387 uebayasi } 1096 1.387 uebayasi #endif 1097 1.387 uebayasi if (attr->va_mode & S_ISUID) 1098 1.387 uebayasi kauth_cred_seteuid(l->l_cred, attr->va_uid); 1099 1.387 uebayasi if (attr->va_mode & S_ISGID) 1100 1.387 uebayasi kauth_cred_setegid(l->l_cred, attr->va_gid); 1101 1.387 uebayasi } else { 1102 1.387 uebayasi if (kauth_cred_geteuid(l->l_cred) == 1103 1.387 uebayasi kauth_cred_getuid(l->l_cred) && 1104 1.387 uebayasi kauth_cred_getegid(l->l_cred) == 1105 1.387 uebayasi kauth_cred_getgid(l->l_cred)) 1106 1.387 uebayasi p->p_flag &= ~PK_SUGID; 1107 1.387 uebayasi } 1108 1.387 uebayasi 1109 1.387 uebayasi /* 1110 1.387 uebayasi * Copy the credential so other references don't see our changes. 1111 1.387 uebayasi * Test to see if this is necessary first, since in the common case 1112 1.387 uebayasi * we won't need a private reference. 1113 1.387 uebayasi */ 1114 1.387 uebayasi if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) || 1115 1.387 uebayasi kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) { 1116 1.387 uebayasi l->l_cred = kauth_cred_copy(l->l_cred); 1117 1.387 uebayasi kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred)); 1118 1.387 uebayasi kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred)); 1119 1.387 uebayasi } 1120 1.387 uebayasi 1121 1.387 uebayasi /* Update the master credentials. */ 1122 1.387 uebayasi if (l->l_cred != p->p_cred) { 1123 1.387 uebayasi kauth_cred_t ocred; 1124 1.387 uebayasi mutex_enter(p->p_lock); 1125 1.387 uebayasi ocred = p->p_cred; 1126 1.520 ad p->p_cred = kauth_cred_hold(l->l_cred); 1127 1.387 uebayasi mutex_exit(p->p_lock); 1128 1.387 uebayasi kauth_cred_free(ocred); 1129 1.387 uebayasi } 1130 1.387 uebayasi 1131 1.387 uebayasi return 0; 1132 1.387 uebayasi } 1133 1.387 uebayasi 1134 1.406 uebayasi static void 1135 1.406 uebayasi emulexec(struct lwp *l, struct exec_package *epp) 1136 1.406 uebayasi { 1137 1.406 uebayasi struct proc *p = l->l_proc; 1138 1.406 uebayasi 1139 1.406 uebayasi /* The emulation root will usually have been found when we looked 1140 1.406 uebayasi * for the elf interpreter (or similar), if not look now. */ 1141 1.406 uebayasi if (epp->ep_esch->es_emul->e_path != NULL && 1142 1.406 uebayasi epp->ep_emul_root == NULL) 1143 1.406 uebayasi emul_find_root(l, epp); 1144 1.406 uebayasi 1145 1.406 uebayasi /* Any old emulation root got removed by fdcloseexec */ 1146 1.498 ad rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER); 1147 1.498 ad p->p_cwdi->cwdi_edir = epp->ep_emul_root; 1148 1.498 ad rw_exit(&p->p_cwdi->cwdi_lock); 1149 1.406 uebayasi epp->ep_emul_root = NULL; 1150 1.406 uebayasi if (epp->ep_interp != NULL) 1151 1.406 uebayasi vrele(epp->ep_interp); 1152 1.406 uebayasi 1153 1.406 uebayasi /* 1154 1.406 uebayasi * Call emulation specific exec hook. This can setup per-process 1155 1.406 uebayasi * p->p_emuldata or do any other per-process stuff an emulation needs. 1156 1.406 uebayasi * 1157 1.406 uebayasi * If we are executing process of different emulation than the 1158 1.406 uebayasi * original forked process, call e_proc_exit() of the old emulation 1159 1.406 uebayasi * first, then e_proc_exec() of new emulation. If the emulation is 1160 1.406 uebayasi * same, the exec hook code should deallocate any old emulation 1161 1.406 uebayasi * resources held previously by this process. 1162 1.406 uebayasi */ 1163 1.406 uebayasi if (p->p_emul && p->p_emul->e_proc_exit 1164 1.406 uebayasi && p->p_emul != epp->ep_esch->es_emul) 1165 1.406 uebayasi (*p->p_emul->e_proc_exit)(p); 1166 1.406 uebayasi 1167 1.406 uebayasi /* 1168 1.406 uebayasi * Call exec hook. Emulation code may NOT store reference to anything 1169 1.406 uebayasi * from &pack. 1170 1.406 uebayasi */ 1171 1.406 uebayasi if (epp->ep_esch->es_emul->e_proc_exec) 1172 1.406 uebayasi (*epp->ep_esch->es_emul->e_proc_exec)(p, epp); 1173 1.406 uebayasi 1174 1.406 uebayasi /* update p_emul, the old value is no longer needed */ 1175 1.406 uebayasi p->p_emul = epp->ep_esch->es_emul; 1176 1.406 uebayasi 1177 1.406 uebayasi /* ...and the same for p_execsw */ 1178 1.406 uebayasi p->p_execsw = epp->ep_esch; 1179 1.406 uebayasi 1180 1.406 uebayasi #ifdef __HAVE_SYSCALL_INTERN 1181 1.406 uebayasi (*p->p_emul->e_syscall_intern)(p); 1182 1.406 uebayasi #endif 1183 1.406 uebayasi ktremul(); 1184 1.406 uebayasi } 1185 1.406 uebayasi 1186 1.337 martin static int 1187 1.348 martin execve_runproc(struct lwp *l, struct execve_data * restrict data, 1188 1.348 martin bool no_local_exec_lock, bool is_spawn) 1189 1.337 martin { 1190 1.378 uebayasi struct exec_package * const epp = &data->ed_pack; 1191 1.352 rmind int error = 0; 1192 1.352 rmind struct proc *p; 1193 1.488 ad struct vmspace *vm; 1194 1.337 martin 1195 1.348 martin /* 1196 1.348 martin * In case of a posix_spawn operation, the child doing the exec 1197 1.348 martin * might not hold the reader lock on exec_lock, but the parent 1198 1.348 martin * will do this instead. 1199 1.348 martin */ 1200 1.348 martin KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock)); 1201 1.381 uebayasi KASSERT(!no_local_exec_lock || is_spawn); 1202 1.337 martin KASSERT(data != NULL); 1203 1.352 rmind 1204 1.352 rmind p = l->l_proc; 1205 1.337 martin 1206 1.237 ad /* Get rid of other LWPs. */ 1207 1.340 rmind if (p->p_nlwps > 1) { 1208 1.272 ad mutex_enter(p->p_lock); 1209 1.237 ad exit_lwps(l); 1210 1.272 ad mutex_exit(p->p_lock); 1211 1.237 ad } 1212 1.164 thorpej KDASSERT(p->p_nlwps == 1); 1213 1.164 thorpej 1214 1.507 thorpej /* 1215 1.507 thorpej * All of the other LWPs got rid of their robust futexes 1216 1.507 thorpej * when they exited above, but we might still have some 1217 1.507 thorpej * to dispose of. Do that now. 1218 1.507 thorpej */ 1219 1.507 thorpej if (__predict_false(l->l_robust_head != 0)) { 1220 1.508 thorpej futex_release_all_lwp(l); 1221 1.509 thorpej /* 1222 1.509 thorpej * Since this LWP will live on with a different 1223 1.509 thorpej * program image, we need to clear the robust 1224 1.509 thorpej * futex list pointer here. 1225 1.509 thorpej */ 1226 1.509 thorpej l->l_robust_head = 0; 1227 1.507 thorpej } 1228 1.507 thorpej 1229 1.253 ad /* Destroy any lwpctl info. */ 1230 1.253 ad if (p->p_lwpctl != NULL) 1231 1.253 ad lwp_ctl_exit(); 1232 1.253 ad 1233 1.164 thorpej /* Remove POSIX timers */ 1234 1.504 thorpej ptimers_free(p, TIMERS_POSIX); 1235 1.164 thorpej 1236 1.417 maxv /* Set the PaX flags. */ 1237 1.431 christos pax_set_flags(epp, p); 1238 1.417 maxv 1239 1.86 thorpej /* 1240 1.86 thorpej * Do whatever is necessary to prepare the address space 1241 1.86 thorpej * for remapping. Note that this might replace the current 1242 1.86 thorpej * vmspace with another! 1243 1.488 ad * 1244 1.488 ad * vfork(): do not touch any user space data in the new child 1245 1.488 ad * until we have awoken the parent below, or it will defeat 1246 1.488 ad * lazy pmap switching (on x86). 1247 1.86 thorpej */ 1248 1.528 riastrad uvmspace_exec(l, epp->ep_vm_minaddr, epp->ep_vm_maxaddr, 1249 1.528 riastrad epp->ep_flags & EXEC_TOPDOWN_VM); 1250 1.488 ad vm = p->p_vmspace; 1251 1.55 cgd 1252 1.378 uebayasi vm->vm_taddr = (void *)epp->ep_taddr; 1253 1.378 uebayasi vm->vm_tsize = btoc(epp->ep_tsize); 1254 1.378 uebayasi vm->vm_daddr = (void*)epp->ep_daddr; 1255 1.378 uebayasi vm->vm_dsize = btoc(epp->ep_dsize); 1256 1.378 uebayasi vm->vm_ssize = btoc(epp->ep_ssize); 1257 1.288 mrg vm->vm_issize = 0; 1258 1.378 uebayasi vm->vm_maxsaddr = (void *)epp->ep_maxsaddr; 1259 1.378 uebayasi vm->vm_minsaddr = (void *)epp->ep_minsaddr; 1260 1.55 cgd 1261 1.424 khorben pax_aslr_init_vm(l, vm, epp); 1262 1.260 christos 1263 1.307 pooka cwdexec(p); 1264 1.531 kre fd_closeexec(); /* handle close on exec & close on fork */ 1265 1.315 alnsn 1266 1.315 alnsn if (__predict_false(ktrace_on)) 1267 1.315 alnsn fd_ktrexecfd(); 1268 1.315 alnsn 1269 1.438 kamil execsigs(p); /* reset caught signals */ 1270 1.183 junyoung 1271 1.380 uebayasi mutex_enter(p->p_lock); 1272 1.164 thorpej l->l_ctxlink = NULL; /* reset ucontext link */ 1273 1.55 cgd p->p_acflag &= ~AFORK; 1274 1.238 pavel p->p_flag |= PK_EXEC; 1275 1.272 ad mutex_exit(p->p_lock); 1276 1.237 ad 1277 1.515 christos error = credexec(l, data); 1278 1.488 ad if (error) 1279 1.488 ad goto exec_abort; 1280 1.488 ad 1281 1.488 ad #if defined(__HAVE_RAS) 1282 1.488 ad /* 1283 1.488 ad * Remove all RASs from the address space. 1284 1.488 ad */ 1285 1.488 ad ras_purgeall(); 1286 1.488 ad #endif 1287 1.488 ad 1288 1.237 ad /* 1289 1.237 ad * Stop profiling. 1290 1.237 ad */ 1291 1.237 ad if ((p->p_stflag & PST_PROFIL) != 0) { 1292 1.237 ad mutex_spin_enter(&p->p_stmutex); 1293 1.237 ad stopprofclock(p); 1294 1.237 ad mutex_spin_exit(&p->p_stmutex); 1295 1.237 ad } 1296 1.237 ad 1297 1.237 ad /* 1298 1.275 ad * It's OK to test PL_PPWAIT unlocked here, as other LWPs have 1299 1.237 ad * exited and exec()/exit() are the only places it will be cleared. 1300 1.488 ad * 1301 1.488 ad * Once the parent has been awoken, curlwp may teleport to a new CPU 1302 1.488 ad * in sched_vforkexec(), and it's then OK to start messing with user 1303 1.488 ad * data. See comment above. 1304 1.237 ad */ 1305 1.275 ad if ((p->p_lflag & PL_PPWAIT) != 0) { 1306 1.488 ad bool samecpu; 1307 1.467 kamil lwp_t *lp; 1308 1.467 kamil 1309 1.501 ad mutex_enter(&proc_lock); 1310 1.467 kamil lp = p->p_vforklwp; 1311 1.467 kamil p->p_vforklwp = NULL; 1312 1.488 ad l->l_lwpctl = NULL; /* was on loan from blocked parent */ 1313 1.467 kamil 1314 1.488 ad /* Clear flags after cv_broadcast() (scheduler needs them). */ 1315 1.354 christos p->p_lflag &= ~PL_PPWAIT; 1316 1.467 kamil lp->l_vforkwaiting = false; 1317 1.467 kamil 1318 1.488 ad /* If parent is still on same CPU, teleport curlwp elsewhere. */ 1319 1.488 ad samecpu = (lp->l_cpu == curlwp->l_cpu); 1320 1.521 ad cv_broadcast(&lp->l_waitcv); 1321 1.501 ad mutex_exit(&proc_lock); 1322 1.488 ad 1323 1.488 ad /* Give the parent its CPU back - find a new home. */ 1324 1.488 ad KASSERT(!is_spawn); 1325 1.488 ad sched_vforkexec(l, samecpu); 1326 1.55 cgd } 1327 1.55 cgd 1328 1.488 ad /* Now map address space. */ 1329 1.488 ad error = execve_dovmcmds(l, data); 1330 1.488 ad if (error != 0) 1331 1.387 uebayasi goto exec_abort; 1332 1.221 ad 1333 1.488 ad pathexec(p, epp->ep_resolvedname); 1334 1.488 ad 1335 1.488 ad char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize); 1336 1.488 ad 1337 1.488 ad error = copyoutargs(data, l, newstack); 1338 1.488 ad if (error != 0) 1339 1.488 ad goto exec_abort; 1340 1.107 fvdl 1341 1.107 fvdl doexechooks(p); 1342 1.55 cgd 1343 1.390 uebayasi /* 1344 1.390 uebayasi * Set initial SP at the top of the stack. 1345 1.390 uebayasi * 1346 1.390 uebayasi * Note that on machines where stack grows up (e.g. hppa), SP points to 1347 1.390 uebayasi * the end of arg/env strings. Userland guesses the address of argc 1348 1.390 uebayasi * via ps_strings::ps_argvstr. 1349 1.390 uebayasi */ 1350 1.390 uebayasi 1351 1.390 uebayasi /* Setup new registers and do misc. setup. */ 1352 1.397 uebayasi (*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack); 1353 1.378 uebayasi if (epp->ep_esch->es_setregs) 1354 1.397 uebayasi (*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack); 1355 1.55 cgd 1356 1.309 joerg /* Provide a consistent LWP private setting */ 1357 1.309 joerg (void)lwp_setprivate(l, NULL); 1358 1.309 joerg 1359 1.316 matt /* Discard all PCU state; need to start fresh */ 1360 1.316 matt pcu_discard_all(l); 1361 1.316 matt 1362 1.171 chs /* map the process's signal trampoline code */ 1363 1.378 uebayasi if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) { 1364 1.312 christos DPRINTF(("%s: map sigcode failed %d\n", __func__, error)); 1365 1.171 chs goto exec_abort; 1366 1.209 christos } 1367 1.171 chs 1368 1.337 martin pool_put(&exec_pool, data->ed_argp); 1369 1.276 ad 1370 1.510 thorpej /* 1371 1.510 thorpej * Notify anyone who might care that we've exec'd. 1372 1.510 thorpej * 1373 1.510 thorpej * This is slightly racy; someone could sneak in and 1374 1.510 thorpej * attach a knote after we've decided not to notify, 1375 1.510 thorpej * or vice-versa, but that's not particularly bothersome. 1376 1.510 thorpej * knote_proc_exec() will acquire p->p_lock as needed. 1377 1.510 thorpej */ 1378 1.510 thorpej if (!SLIST_EMPTY(&p->p_klist)) { 1379 1.510 thorpej knote_proc_exec(p); 1380 1.510 thorpej } 1381 1.276 ad 1382 1.378 uebayasi kmem_free(epp->ep_hdr, epp->ep_hdrlen); 1383 1.122 jdolecek 1384 1.418 christos SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0); 1385 1.294 darran 1386 1.406 uebayasi emulexec(l, epp); 1387 1.85 mycroft 1388 1.252 ad /* Allow new references from the debugger/procfs. */ 1389 1.341 martin rw_exit(&p->p_reflock); 1390 1.348 martin if (!no_local_exec_lock) 1391 1.348 martin rw_exit(&exec_lock); 1392 1.162 manu 1393 1.501 ad mutex_enter(&proc_lock); 1394 1.237 ad 1395 1.466 kamil /* posix_spawn(3) reports a single event with implied exec(3) */ 1396 1.466 kamil if ((p->p_slflag & PSL_TRACED) && !is_spawn) { 1397 1.459 kamil mutex_enter(p->p_lock); 1398 1.482 kamil eventswitch(TRAP_EXEC, 0, 0); 1399 1.501 ad mutex_enter(&proc_lock); 1400 1.237 ad } 1401 1.162 manu 1402 1.237 ad if (p->p_sflag & PS_STOPEXEC) { 1403 1.383 uebayasi ksiginfoq_t kq; 1404 1.383 uebayasi 1405 1.519 ad KASSERT(l->l_blcnt == 0); 1406 1.175 dsl p->p_pptr->p_nstopchild++; 1407 1.419 pgoyette p->p_waited = 0; 1408 1.272 ad mutex_enter(p->p_lock); 1409 1.237 ad ksiginfo_queue_init(&kq); 1410 1.237 ad sigclearall(p, &contsigmask, &kq); 1411 1.237 ad lwp_lock(l); 1412 1.237 ad l->l_stat = LSSTOP; 1413 1.162 manu p->p_stat = SSTOP; 1414 1.164 thorpej p->p_nrlwps--; 1415 1.304 rmind lwp_unlock(l); 1416 1.272 ad mutex_exit(p->p_lock); 1417 1.501 ad mutex_exit(&proc_lock); 1418 1.304 rmind lwp_lock(l); 1419 1.485 ad spc_lock(l->l_cpu); 1420 1.245 yamt mi_switch(l); 1421 1.237 ad ksiginfo_queue_drain(&kq); 1422 1.237 ad } else { 1423 1.501 ad mutex_exit(&proc_lock); 1424 1.162 manu } 1425 1.162 manu 1426 1.479 christos exec_path_free(data); 1427 1.428 christos #ifdef TRACE_EXEC 1428 1.327 reinoud DPRINTF(("%s finished\n", __func__)); 1429 1.428 christos #endif 1430 1.374 martin return EJUSTRETURN; 1431 1.55 cgd 1432 1.138 lukem exec_abort: 1433 1.418 christos SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0); 1434 1.297 rmind rw_exit(&p->p_reflock); 1435 1.348 martin if (!no_local_exec_lock) 1436 1.348 martin rw_exit(&exec_lock); 1437 1.297 rmind 1438 1.479 christos exec_path_free(data); 1439 1.352 rmind 1440 1.55 cgd /* 1441 1.55 cgd * the old process doesn't exist anymore. exit gracefully. 1442 1.55 cgd * get rid of the (new) address space we have created, if any, get rid 1443 1.55 cgd * of our namei data and vnode, and exit noting failure 1444 1.55 cgd */ 1445 1.488 ad if (vm != NULL) { 1446 1.488 ad uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS, 1447 1.488 ad VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS); 1448 1.488 ad } 1449 1.348 martin 1450 1.378 uebayasi exec_free_emul_arg(epp); 1451 1.337 martin pool_put(&exec_pool, data->ed_argp); 1452 1.378 uebayasi kmem_free(epp->ep_hdr, epp->ep_hdrlen); 1453 1.378 uebayasi if (epp->ep_emul_root != NULL) 1454 1.378 uebayasi vrele(epp->ep_emul_root); 1455 1.378 uebayasi if (epp->ep_interp != NULL) 1456 1.378 uebayasi vrele(epp->ep_interp); 1457 1.237 ad 1458 1.252 ad /* Acquire the sched-state mutex (exit1() will release it). */ 1459 1.348 martin if (!is_spawn) { 1460 1.337 martin mutex_enter(p->p_lock); 1461 1.426 christos exit1(l, error, SIGABRT); 1462 1.337 martin } 1463 1.55 cgd 1464 1.348 martin return error; 1465 1.67 christos } 1466 1.67 christos 1467 1.144 christos int 1468 1.481 christos execve1(struct lwp *l, bool has_path, const char *path, int fd, 1469 1.481 christos char * const *args, char * const *envs, 1470 1.481 christos execve_fetch_element_t fetch_element) 1471 1.337 martin { 1472 1.337 martin struct execve_data data; 1473 1.337 martin int error; 1474 1.337 martin 1475 1.481 christos error = execve_loadvm(l, has_path, path, fd, args, envs, fetch_element, 1476 1.481 christos &data); 1477 1.337 martin if (error) 1478 1.337 martin return error; 1479 1.348 martin error = execve_runproc(l, &data, false, false); 1480 1.337 martin return error; 1481 1.337 martin } 1482 1.337 martin 1483 1.396 uebayasi static size_t 1484 1.411 christos fromptrsz(const struct exec_package *epp) 1485 1.411 christos { 1486 1.411 christos return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *); 1487 1.411 christos } 1488 1.411 christos 1489 1.411 christos static size_t 1490 1.409 christos ptrsz(const struct exec_package *epp) 1491 1.409 christos { 1492 1.411 christos return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *); 1493 1.409 christos } 1494 1.409 christos 1495 1.409 christos static size_t 1496 1.396 uebayasi calcargs(struct execve_data * restrict data, const size_t argenvstrlen) 1497 1.396 uebayasi { 1498 1.396 uebayasi struct exec_package * const epp = &data->ed_pack; 1499 1.396 uebayasi 1500 1.396 uebayasi const size_t nargenvptrs = 1501 1.402 uebayasi 1 + /* long argc */ 1502 1.396 uebayasi data->ed_argc + /* char *argv[] */ 1503 1.396 uebayasi 1 + /* \0 */ 1504 1.396 uebayasi data->ed_envc + /* char *env[] */ 1505 1.441 christos 1; /* \0 */ 1506 1.396 uebayasi 1507 1.441 christos return (nargenvptrs * ptrsz(epp)) /* pointers */ 1508 1.441 christos + argenvstrlen /* strings */ 1509 1.441 christos + epp->ep_esch->es_arglen; /* auxinfo */ 1510 1.396 uebayasi } 1511 1.396 uebayasi 1512 1.396 uebayasi static size_t 1513 1.396 uebayasi calcstack(struct execve_data * restrict data, const size_t gaplen) 1514 1.396 uebayasi { 1515 1.396 uebayasi struct exec_package * const epp = &data->ed_pack; 1516 1.396 uebayasi 1517 1.396 uebayasi data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode - 1518 1.396 uebayasi epp->ep_esch->es_emul->e_sigcode; 1519 1.396 uebayasi 1520 1.396 uebayasi data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ? 1521 1.396 uebayasi sizeof(struct ps_strings32) : sizeof(struct ps_strings); 1522 1.396 uebayasi 1523 1.396 uebayasi const size_t sigcode_psstr_sz = 1524 1.396 uebayasi data->ed_szsigcode + /* sigcode */ 1525 1.396 uebayasi data->ed_ps_strings_sz + /* ps_strings */ 1526 1.396 uebayasi STACK_PTHREADSPACE; /* pthread space */ 1527 1.396 uebayasi 1528 1.396 uebayasi const size_t stacklen = 1529 1.396 uebayasi data->ed_argslen + 1530 1.396 uebayasi gaplen + 1531 1.396 uebayasi sigcode_psstr_sz; 1532 1.396 uebayasi 1533 1.396 uebayasi /* make the stack "safely" aligned */ 1534 1.396 uebayasi return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES); 1535 1.396 uebayasi } 1536 1.396 uebayasi 1537 1.391 uebayasi static int 1538 1.399 uebayasi copyoutargs(struct execve_data * restrict data, struct lwp *l, 1539 1.399 uebayasi char * const newstack) 1540 1.399 uebayasi { 1541 1.399 uebayasi struct exec_package * const epp = &data->ed_pack; 1542 1.399 uebayasi struct proc *p = l->l_proc; 1543 1.399 uebayasi int error; 1544 1.399 uebayasi 1545 1.462 maxv memset(&data->ed_arginfo, 0, sizeof(data->ed_arginfo)); 1546 1.462 maxv 1547 1.399 uebayasi /* remember information about the process */ 1548 1.399 uebayasi data->ed_arginfo.ps_nargvstr = data->ed_argc; 1549 1.399 uebayasi data->ed_arginfo.ps_nenvstr = data->ed_envc; 1550 1.399 uebayasi 1551 1.399 uebayasi /* 1552 1.399 uebayasi * Allocate the stack address passed to the newly execve()'ed process. 1553 1.399 uebayasi * 1554 1.399 uebayasi * The new stack address will be set to the SP (stack pointer) register 1555 1.399 uebayasi * in setregs(). 1556 1.399 uebayasi */ 1557 1.399 uebayasi 1558 1.399 uebayasi char *newargs = STACK_ALLOC( 1559 1.399 uebayasi STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen); 1560 1.399 uebayasi 1561 1.399 uebayasi error = (*epp->ep_esch->es_copyargs)(l, epp, 1562 1.399 uebayasi &data->ed_arginfo, &newargs, data->ed_argp); 1563 1.399 uebayasi 1564 1.399 uebayasi if (error) { 1565 1.399 uebayasi DPRINTF(("%s: copyargs failed %d\n", __func__, error)); 1566 1.399 uebayasi return error; 1567 1.399 uebayasi } 1568 1.399 uebayasi 1569 1.399 uebayasi error = copyoutpsstrs(data, p); 1570 1.399 uebayasi if (error != 0) 1571 1.399 uebayasi return error; 1572 1.399 uebayasi 1573 1.399 uebayasi return 0; 1574 1.399 uebayasi } 1575 1.399 uebayasi 1576 1.399 uebayasi static int 1577 1.398 uebayasi copyoutpsstrs(struct execve_data * restrict data, struct proc *p) 1578 1.398 uebayasi { 1579 1.398 uebayasi struct exec_package * const epp = &data->ed_pack; 1580 1.398 uebayasi struct ps_strings32 arginfo32; 1581 1.398 uebayasi void *aip; 1582 1.398 uebayasi int error; 1583 1.398 uebayasi 1584 1.398 uebayasi /* fill process ps_strings info */ 1585 1.398 uebayasi p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr, 1586 1.398 uebayasi STACK_PTHREADSPACE), data->ed_ps_strings_sz); 1587 1.398 uebayasi 1588 1.398 uebayasi if (epp->ep_flags & EXEC_32) { 1589 1.398 uebayasi aip = &arginfo32; 1590 1.398 uebayasi arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr; 1591 1.398 uebayasi arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr; 1592 1.398 uebayasi arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr; 1593 1.398 uebayasi arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr; 1594 1.398 uebayasi } else 1595 1.398 uebayasi aip = &data->ed_arginfo; 1596 1.398 uebayasi 1597 1.398 uebayasi /* copy out the process's ps_strings structure */ 1598 1.398 uebayasi if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz)) 1599 1.398 uebayasi != 0) { 1600 1.398 uebayasi DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n", 1601 1.398 uebayasi __func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz)); 1602 1.398 uebayasi return error; 1603 1.398 uebayasi } 1604 1.398 uebayasi 1605 1.398 uebayasi return 0; 1606 1.398 uebayasi } 1607 1.398 uebayasi 1608 1.398 uebayasi static int 1609 1.391 uebayasi copyinargs(struct execve_data * restrict data, char * const *args, 1610 1.391 uebayasi char * const *envs, execve_fetch_element_t fetch_element, char **dpp) 1611 1.391 uebayasi { 1612 1.391 uebayasi struct exec_package * const epp = &data->ed_pack; 1613 1.392 uebayasi char *dp; 1614 1.391 uebayasi size_t i; 1615 1.391 uebayasi int error; 1616 1.391 uebayasi 1617 1.391 uebayasi dp = *dpp; 1618 1.391 uebayasi 1619 1.391 uebayasi data->ed_argc = 0; 1620 1.391 uebayasi 1621 1.391 uebayasi /* copy the fake args list, if there's one, freeing it as we go */ 1622 1.391 uebayasi if (epp->ep_flags & EXEC_HASARGL) { 1623 1.405 uebayasi struct exec_fakearg *fa = epp->ep_fa; 1624 1.391 uebayasi 1625 1.405 uebayasi while (fa->fa_arg != NULL) { 1626 1.394 uebayasi const size_t maxlen = ARG_MAX - (dp - data->ed_argp); 1627 1.394 uebayasi size_t len; 1628 1.391 uebayasi 1629 1.405 uebayasi len = strlcpy(dp, fa->fa_arg, maxlen); 1630 1.394 uebayasi /* Count NUL into len. */ 1631 1.394 uebayasi if (len < maxlen) 1632 1.394 uebayasi len++; 1633 1.404 uebayasi else { 1634 1.405 uebayasi while (fa->fa_arg != NULL) { 1635 1.405 uebayasi kmem_free(fa->fa_arg, fa->fa_len); 1636 1.405 uebayasi fa++; 1637 1.404 uebayasi } 1638 1.404 uebayasi kmem_free(epp->ep_fa, epp->ep_fa_len); 1639 1.404 uebayasi epp->ep_flags &= ~EXEC_HASARGL; 1640 1.524 riastrad return SET_ERROR(E2BIG); 1641 1.404 uebayasi } 1642 1.405 uebayasi ktrexecarg(fa->fa_arg, len - 1); 1643 1.394 uebayasi dp += len; 1644 1.391 uebayasi 1645 1.405 uebayasi kmem_free(fa->fa_arg, fa->fa_len); 1646 1.405 uebayasi fa++; 1647 1.391 uebayasi data->ed_argc++; 1648 1.391 uebayasi } 1649 1.391 uebayasi kmem_free(epp->ep_fa, epp->ep_fa_len); 1650 1.391 uebayasi epp->ep_flags &= ~EXEC_HASARGL; 1651 1.391 uebayasi } 1652 1.391 uebayasi 1653 1.392 uebayasi /* 1654 1.392 uebayasi * Read and count argument strings from user. 1655 1.392 uebayasi */ 1656 1.392 uebayasi 1657 1.391 uebayasi if (args == NULL) { 1658 1.391 uebayasi DPRINTF(("%s: null args\n", __func__)); 1659 1.524 riastrad return SET_ERROR(EINVAL); 1660 1.391 uebayasi } 1661 1.392 uebayasi if (epp->ep_flags & EXEC_SKIPARG) 1662 1.411 christos args = (const void *)((const char *)args + fromptrsz(epp)); 1663 1.391 uebayasi i = 0; 1664 1.392 uebayasi error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg); 1665 1.392 uebayasi if (error != 0) { 1666 1.392 uebayasi DPRINTF(("%s: copyin arg %d\n", __func__, error)); 1667 1.392 uebayasi return error; 1668 1.392 uebayasi } 1669 1.392 uebayasi data->ed_argc += i; 1670 1.392 uebayasi 1671 1.392 uebayasi /* 1672 1.392 uebayasi * Read and count environment strings from user. 1673 1.392 uebayasi */ 1674 1.392 uebayasi 1675 1.392 uebayasi data->ed_envc = 0; 1676 1.392 uebayasi /* environment need not be there */ 1677 1.392 uebayasi if (envs == NULL) 1678 1.392 uebayasi goto done; 1679 1.392 uebayasi i = 0; 1680 1.392 uebayasi error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv); 1681 1.392 uebayasi if (error != 0) { 1682 1.392 uebayasi DPRINTF(("%s: copyin env %d\n", __func__, error)); 1683 1.392 uebayasi return error; 1684 1.392 uebayasi } 1685 1.392 uebayasi data->ed_envc += i; 1686 1.392 uebayasi 1687 1.392 uebayasi done: 1688 1.392 uebayasi *dpp = dp; 1689 1.392 uebayasi 1690 1.392 uebayasi return 0; 1691 1.392 uebayasi } 1692 1.392 uebayasi 1693 1.392 uebayasi static int 1694 1.392 uebayasi copyinargstrs(struct execve_data * restrict data, char * const *strs, 1695 1.392 uebayasi execve_fetch_element_t fetch_element, char **dpp, size_t *ip, 1696 1.392 uebayasi void (*ktr)(const void *, size_t)) 1697 1.392 uebayasi { 1698 1.392 uebayasi char *dp, *sp; 1699 1.392 uebayasi size_t i; 1700 1.392 uebayasi int error; 1701 1.392 uebayasi 1702 1.392 uebayasi dp = *dpp; 1703 1.391 uebayasi 1704 1.392 uebayasi i = 0; 1705 1.391 uebayasi while (1) { 1706 1.394 uebayasi const size_t maxlen = ARG_MAX - (dp - data->ed_argp); 1707 1.391 uebayasi size_t len; 1708 1.391 uebayasi 1709 1.392 uebayasi if ((error = (*fetch_element)(strs, i, &sp)) != 0) { 1710 1.391 uebayasi return error; 1711 1.391 uebayasi } 1712 1.391 uebayasi if (!sp) 1713 1.391 uebayasi break; 1714 1.391 uebayasi if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) { 1715 1.391 uebayasi if (error == ENAMETOOLONG) 1716 1.524 riastrad error = SET_ERROR(E2BIG); 1717 1.391 uebayasi return error; 1718 1.391 uebayasi } 1719 1.392 uebayasi if (__predict_false(ktrace_on)) 1720 1.392 uebayasi (*ktr)(dp, len - 1); 1721 1.391 uebayasi dp += len; 1722 1.391 uebayasi i++; 1723 1.391 uebayasi } 1724 1.391 uebayasi 1725 1.391 uebayasi *dpp = dp; 1726 1.392 uebayasi *ip = i; 1727 1.391 uebayasi 1728 1.391 uebayasi return 0; 1729 1.391 uebayasi } 1730 1.391 uebayasi 1731 1.382 uebayasi /* 1732 1.382 uebayasi * Copy argv and env strings from kernel buffer (argp) to the new stack. 1733 1.382 uebayasi * Those strings are located just after auxinfo. 1734 1.382 uebayasi */ 1735 1.337 martin int 1736 1.231 yamt copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo, 1737 1.231 yamt char **stackp, void *argp) 1738 1.67 christos { 1739 1.138 lukem char **cpp, *dp, *sp; 1740 1.138 lukem size_t len; 1741 1.138 lukem void *nullp; 1742 1.138 lukem long argc, envc; 1743 1.144 christos int error; 1744 1.138 lukem 1745 1.144 christos cpp = (char **)*stackp; 1746 1.138 lukem nullp = NULL; 1747 1.138 lukem argc = arginfo->ps_nargvstr; 1748 1.138 lukem envc = arginfo->ps_nenvstr; 1749 1.382 uebayasi 1750 1.382 uebayasi /* argc on stack is long */ 1751 1.382 uebayasi CTASSERT(sizeof(*cpp) == sizeof(argc)); 1752 1.382 uebayasi 1753 1.382 uebayasi dp = (char *)(cpp + 1754 1.402 uebayasi 1 + /* long argc */ 1755 1.402 uebayasi argc + /* char *argv[] */ 1756 1.382 uebayasi 1 + /* \0 */ 1757 1.402 uebayasi envc + /* char *env[] */ 1758 1.441 christos 1) + /* \0 */ 1759 1.441 christos pack->ep_esch->es_arglen; /* auxinfo */ 1760 1.382 uebayasi sp = argp; 1761 1.382 uebayasi 1762 1.305 matt if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) { 1763 1.312 christos COPYPRINTF("", cpp - 1, sizeof(argc)); 1764 1.144 christos return error; 1765 1.305 matt } 1766 1.67 christos 1767 1.67 christos /* XXX don't copy them out, remap them! */ 1768 1.69 mycroft arginfo->ps_argvstr = cpp; /* remember location of argv for later */ 1769 1.67 christos 1770 1.305 matt for (; --argc >= 0; sp += len, dp += len) { 1771 1.305 matt if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) { 1772 1.312 christos COPYPRINTF("", cpp - 1, sizeof(dp)); 1773 1.305 matt return error; 1774 1.305 matt } 1775 1.305 matt if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) { 1776 1.313 jakllsch COPYPRINTF("str", dp, (size_t)ARG_MAX); 1777 1.144 christos return error; 1778 1.305 matt } 1779 1.305 matt } 1780 1.67 christos 1781 1.305 matt if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) { 1782 1.312 christos COPYPRINTF("", cpp - 1, sizeof(nullp)); 1783 1.144 christos return error; 1784 1.305 matt } 1785 1.67 christos 1786 1.69 mycroft arginfo->ps_envstr = cpp; /* remember location of envp for later */ 1787 1.67 christos 1788 1.305 matt for (; --envc >= 0; sp += len, dp += len) { 1789 1.305 matt if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) { 1790 1.312 christos COPYPRINTF("", cpp - 1, sizeof(dp)); 1791 1.144 christos return error; 1792 1.305 matt } 1793 1.305 matt if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) { 1794 1.313 jakllsch COPYPRINTF("str", dp, (size_t)ARG_MAX); 1795 1.305 matt return error; 1796 1.305 matt } 1797 1.337 martin 1798 1.305 matt } 1799 1.67 christos 1800 1.305 matt if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) { 1801 1.312 christos COPYPRINTF("", cpp - 1, sizeof(nullp)); 1802 1.144 christos return error; 1803 1.305 matt } 1804 1.67 christos 1805 1.144 christos *stackp = (char *)cpp; 1806 1.144 christos return 0; 1807 1.55 cgd } 1808 1.130 jdolecek 1809 1.130 jdolecek 1810 1.130 jdolecek /* 1811 1.282 ad * Add execsw[] entries. 1812 1.130 jdolecek */ 1813 1.130 jdolecek int 1814 1.282 ad exec_add(struct execsw *esp, int count) 1815 1.130 jdolecek { 1816 1.282 ad struct exec_entry *it; 1817 1.514 ryo int i, error = 0; 1818 1.130 jdolecek 1819 1.283 ad if (count == 0) { 1820 1.283 ad return 0; 1821 1.283 ad } 1822 1.130 jdolecek 1823 1.282 ad /* Check for duplicates. */ 1824 1.237 ad rw_enter(&exec_lock, RW_WRITER); 1825 1.282 ad for (i = 0; i < count; i++) { 1826 1.282 ad LIST_FOREACH(it, &ex_head, ex_list) { 1827 1.282 ad /* assume unique (makecmds, probe_func, emulation) */ 1828 1.282 ad if (it->ex_sw->es_makecmds == esp[i].es_makecmds && 1829 1.282 ad it->ex_sw->u.elf_probe_func == 1830 1.282 ad esp[i].u.elf_probe_func && 1831 1.282 ad it->ex_sw->es_emul == esp[i].es_emul) { 1832 1.282 ad rw_exit(&exec_lock); 1833 1.524 riastrad return SET_ERROR(EEXIST); 1834 1.130 jdolecek } 1835 1.130 jdolecek } 1836 1.130 jdolecek } 1837 1.130 jdolecek 1838 1.282 ad /* Allocate new entries. */ 1839 1.282 ad for (i = 0; i < count; i++) { 1840 1.282 ad it = kmem_alloc(sizeof(*it), KM_SLEEP); 1841 1.282 ad it->ex_sw = &esp[i]; 1842 1.514 ryo error = exec_sigcode_alloc(it->ex_sw->es_emul); 1843 1.514 ryo if (error != 0) { 1844 1.514 ryo kmem_free(it, sizeof(*it)); 1845 1.514 ryo break; 1846 1.514 ryo } 1847 1.282 ad LIST_INSERT_HEAD(&ex_head, it, ex_list); 1848 1.130 jdolecek } 1849 1.514 ryo /* If even one fails, remove them all back. */ 1850 1.514 ryo if (error != 0) { 1851 1.514 ryo for (i--; i >= 0; i--) { 1852 1.514 ryo it = LIST_FIRST(&ex_head); 1853 1.514 ryo LIST_REMOVE(it, ex_list); 1854 1.514 ryo exec_sigcode_free(it->ex_sw->es_emul); 1855 1.514 ryo kmem_free(it, sizeof(*it)); 1856 1.514 ryo } 1857 1.522 mlelstv rw_exit(&exec_lock); 1858 1.514 ryo return error; 1859 1.514 ryo } 1860 1.130 jdolecek 1861 1.130 jdolecek /* update execsw[] */ 1862 1.130 jdolecek exec_init(0); 1863 1.237 ad rw_exit(&exec_lock); 1864 1.282 ad return 0; 1865 1.130 jdolecek } 1866 1.130 jdolecek 1867 1.130 jdolecek /* 1868 1.130 jdolecek * Remove execsw[] entry. 1869 1.130 jdolecek */ 1870 1.130 jdolecek int 1871 1.282 ad exec_remove(struct execsw *esp, int count) 1872 1.130 jdolecek { 1873 1.282 ad struct exec_entry *it, *next; 1874 1.282 ad int i; 1875 1.282 ad const struct proclist_desc *pd; 1876 1.282 ad proc_t *p; 1877 1.282 ad 1878 1.283 ad if (count == 0) { 1879 1.283 ad return 0; 1880 1.283 ad } 1881 1.130 jdolecek 1882 1.282 ad /* Abort if any are busy. */ 1883 1.237 ad rw_enter(&exec_lock, RW_WRITER); 1884 1.282 ad for (i = 0; i < count; i++) { 1885 1.501 ad mutex_enter(&proc_lock); 1886 1.282 ad for (pd = proclists; pd->pd_list != NULL; pd++) { 1887 1.282 ad PROCLIST_FOREACH(p, pd->pd_list) { 1888 1.282 ad if (p->p_execsw == &esp[i]) { 1889 1.501 ad mutex_exit(&proc_lock); 1890 1.282 ad rw_exit(&exec_lock); 1891 1.524 riastrad return SET_ERROR(EBUSY); 1892 1.282 ad } 1893 1.282 ad } 1894 1.282 ad } 1895 1.501 ad mutex_exit(&proc_lock); 1896 1.282 ad } 1897 1.130 jdolecek 1898 1.282 ad /* None are busy, so remove them all. */ 1899 1.282 ad for (i = 0; i < count; i++) { 1900 1.282 ad for (it = LIST_FIRST(&ex_head); it != NULL; it = next) { 1901 1.282 ad next = LIST_NEXT(it, ex_list); 1902 1.282 ad if (it->ex_sw == &esp[i]) { 1903 1.282 ad LIST_REMOVE(it, ex_list); 1904 1.514 ryo exec_sigcode_free(it->ex_sw->es_emul); 1905 1.282 ad kmem_free(it, sizeof(*it)); 1906 1.282 ad break; 1907 1.282 ad } 1908 1.282 ad } 1909 1.130 jdolecek } 1910 1.130 jdolecek 1911 1.130 jdolecek /* update execsw[] */ 1912 1.130 jdolecek exec_init(0); 1913 1.237 ad rw_exit(&exec_lock); 1914 1.282 ad return 0; 1915 1.130 jdolecek } 1916 1.130 jdolecek 1917 1.130 jdolecek /* 1918 1.130 jdolecek * Initialize exec structures. If init_boot is true, also does necessary 1919 1.130 jdolecek * one-time initialization (it's called from main() that way). 1920 1.147 jdolecek * Once system is multiuser, this should be called with exec_lock held, 1921 1.130 jdolecek * i.e. via exec_{add|remove}(). 1922 1.130 jdolecek */ 1923 1.130 jdolecek int 1924 1.138 lukem exec_init(int init_boot) 1925 1.130 jdolecek { 1926 1.282 ad const struct execsw **sw; 1927 1.282 ad struct exec_entry *ex; 1928 1.282 ad SLIST_HEAD(,exec_entry) first; 1929 1.282 ad SLIST_HEAD(,exec_entry) any; 1930 1.282 ad SLIST_HEAD(,exec_entry) last; 1931 1.282 ad int i, sz; 1932 1.130 jdolecek 1933 1.130 jdolecek if (init_boot) { 1934 1.130 jdolecek /* do one-time initializations */ 1935 1.449 riastrad vaddr_t vmin = 0, vmax; 1936 1.448 riastrad 1937 1.237 ad rw_init(&exec_lock); 1938 1.448 riastrad exec_map = uvm_km_suballoc(kernel_map, &vmin, &vmax, 1939 1.448 riastrad maxexec*NCARGS, VM_MAP_PAGEABLE, false, NULL); 1940 1.277 ad pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH, 1941 1.277 ad "execargs", &exec_palloc, IPL_NONE); 1942 1.277 ad pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0); 1943 1.282 ad } else { 1944 1.282 ad KASSERT(rw_write_held(&exec_lock)); 1945 1.282 ad } 1946 1.130 jdolecek 1947 1.282 ad /* Sort each entry onto the appropriate queue. */ 1948 1.282 ad SLIST_INIT(&first); 1949 1.282 ad SLIST_INIT(&any); 1950 1.282 ad SLIST_INIT(&last); 1951 1.282 ad sz = 0; 1952 1.282 ad LIST_FOREACH(ex, &ex_head, ex_list) { 1953 1.282 ad switch(ex->ex_sw->es_prio) { 1954 1.282 ad case EXECSW_PRIO_FIRST: 1955 1.282 ad SLIST_INSERT_HEAD(&first, ex, ex_slist); 1956 1.282 ad break; 1957 1.282 ad case EXECSW_PRIO_ANY: 1958 1.282 ad SLIST_INSERT_HEAD(&any, ex, ex_slist); 1959 1.282 ad break; 1960 1.282 ad case EXECSW_PRIO_LAST: 1961 1.282 ad SLIST_INSERT_HEAD(&last, ex, ex_slist); 1962 1.282 ad break; 1963 1.282 ad default: 1964 1.312 christos panic("%s", __func__); 1965 1.282 ad break; 1966 1.130 jdolecek } 1967 1.282 ad sz++; 1968 1.130 jdolecek } 1969 1.130 jdolecek 1970 1.130 jdolecek /* 1971 1.282 ad * Create new execsw[]. Ensure we do not try a zero-sized 1972 1.282 ad * allocation. 1973 1.130 jdolecek */ 1974 1.282 ad sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP); 1975 1.282 ad i = 0; 1976 1.282 ad SLIST_FOREACH(ex, &first, ex_slist) { 1977 1.282 ad sw[i++] = ex->ex_sw; 1978 1.282 ad } 1979 1.282 ad SLIST_FOREACH(ex, &any, ex_slist) { 1980 1.282 ad sw[i++] = ex->ex_sw; 1981 1.282 ad } 1982 1.282 ad SLIST_FOREACH(ex, &last, ex_slist) { 1983 1.282 ad sw[i++] = ex->ex_sw; 1984 1.130 jdolecek } 1985 1.183 junyoung 1986 1.282 ad /* Replace old execsw[] and free used memory. */ 1987 1.282 ad if (execsw != NULL) { 1988 1.282 ad kmem_free(__UNCONST(execsw), 1989 1.282 ad nexecs * sizeof(struct execsw *) + 1); 1990 1.130 jdolecek } 1991 1.282 ad execsw = sw; 1992 1.282 ad nexecs = sz; 1993 1.130 jdolecek 1994 1.282 ad /* Figure out the maximum size of an exec header. */ 1995 1.282 ad exec_maxhdrsz = sizeof(int); 1996 1.130 jdolecek for (i = 0; i < nexecs; i++) { 1997 1.130 jdolecek if (execsw[i]->es_hdrsz > exec_maxhdrsz) 1998 1.130 jdolecek exec_maxhdrsz = execsw[i]->es_hdrsz; 1999 1.130 jdolecek } 2000 1.130 jdolecek 2001 1.130 jdolecek return 0; 2002 1.130 jdolecek } 2003 1.171 chs 2004 1.514 ryo int 2005 1.514 ryo exec_sigcode_alloc(const struct emul *e) 2006 1.171 chs { 2007 1.171 chs vaddr_t va; 2008 1.171 chs vsize_t sz; 2009 1.171 chs int error; 2010 1.171 chs struct uvm_object *uobj; 2011 1.171 chs 2012 1.514 ryo KASSERT(rw_lock_held(&exec_lock)); 2013 1.514 ryo 2014 1.514 ryo if (e == NULL || e->e_sigobject == NULL) 2015 1.514 ryo return 0; 2016 1.514 ryo 2017 1.513 ryo sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode; 2018 1.514 ryo if (sz == 0) 2019 1.171 chs return 0; 2020 1.171 chs 2021 1.171 chs /* 2022 1.514 ryo * Create a sigobject for this emulation. 2023 1.171 chs * 2024 1.171 chs * sigobject is an anonymous memory object (just like SYSV shared 2025 1.171 chs * memory) that we keep a permanent reference to and that we map 2026 1.171 chs * in all processes that need this sigcode. The creation is simple, 2027 1.171 chs * we create an object, add a permanent reference to it, map it in 2028 1.171 chs * kernel space, copy out the sigcode to it and unmap it. 2029 1.189 jdolecek * We map it with PROT_READ|PROT_EXEC into the process just 2030 1.189 jdolecek * the way sys_mmap() would map it. 2031 1.171 chs */ 2032 1.514 ryo if (*e->e_sigobject == NULL) { 2033 1.514 ryo uobj = uao_create(sz, 0); 2034 1.514 ryo (*uobj->pgops->pgo_reference)(uobj); 2035 1.514 ryo va = vm_map_min(kernel_map); 2036 1.514 ryo if ((error = uvm_map(kernel_map, &va, round_page(sz), 2037 1.514 ryo uobj, 0, 0, 2038 1.514 ryo UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW, 2039 1.514 ryo UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) { 2040 1.514 ryo printf("sigcode kernel mapping failed %d\n", error); 2041 1.514 ryo (*uobj->pgops->pgo_detach)(uobj); 2042 1.514 ryo return error; 2043 1.514 ryo } 2044 1.514 ryo memcpy((void *)va, e->e_sigcode, sz); 2045 1.512 ryo #ifdef PMAP_NEED_PROCWR 2046 1.514 ryo pmap_procwr(&proc0, va, sz); 2047 1.512 ryo #endif 2048 1.514 ryo uvm_unmap(kernel_map, va, va + round_page(sz)); 2049 1.514 ryo *e->e_sigobject = uobj; 2050 1.514 ryo KASSERT(uobj->uo_refs == 1); 2051 1.514 ryo } else { 2052 1.514 ryo /* if already created, reference++ */ 2053 1.514 ryo uobj = *e->e_sigobject; 2054 1.514 ryo (*uobj->pgops->pgo_reference)(uobj); 2055 1.513 ryo } 2056 1.171 chs 2057 1.514 ryo return 0; 2058 1.514 ryo } 2059 1.514 ryo 2060 1.514 ryo void 2061 1.514 ryo exec_sigcode_free(const struct emul *e) 2062 1.514 ryo { 2063 1.514 ryo struct uvm_object *uobj; 2064 1.514 ryo 2065 1.514 ryo KASSERT(rw_lock_held(&exec_lock)); 2066 1.514 ryo 2067 1.514 ryo if (e == NULL || e->e_sigobject == NULL) 2068 1.514 ryo return; 2069 1.514 ryo 2070 1.514 ryo uobj = *e->e_sigobject; 2071 1.514 ryo if (uobj == NULL) 2072 1.514 ryo return; 2073 1.514 ryo 2074 1.514 ryo if (uobj->uo_refs == 1) 2075 1.514 ryo *e->e_sigobject = NULL; /* I'm the last person to reference. */ 2076 1.514 ryo (*uobj->pgops->pgo_detach)(uobj); 2077 1.514 ryo } 2078 1.514 ryo 2079 1.514 ryo static int 2080 1.514 ryo exec_sigcode_map(struct proc *p, const struct emul *e) 2081 1.514 ryo { 2082 1.514 ryo vaddr_t va; 2083 1.514 ryo vsize_t sz; 2084 1.514 ryo int error; 2085 1.514 ryo struct uvm_object *uobj; 2086 1.514 ryo 2087 1.514 ryo sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode; 2088 1.514 ryo if (e->e_sigobject == NULL || sz == 0) 2089 1.514 ryo return 0; 2090 1.514 ryo 2091 1.514 ryo uobj = *e->e_sigobject; 2092 1.514 ryo if (uobj == NULL) 2093 1.514 ryo return 0; 2094 1.514 ryo 2095 1.172 enami /* Just a hint to uvm_map where to put it. */ 2096 1.195 fvdl va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr, 2097 1.422 martin round_page(sz), p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN); 2098 1.187 chs 2099 1.187 chs #ifdef __alpha__ 2100 1.187 chs /* 2101 1.187 chs * Tru64 puts /sbin/loader at the end of user virtual memory, 2102 1.187 chs * which causes the above calculation to put the sigcode at 2103 1.187 chs * an invalid address. Put it just below the text instead. 2104 1.187 chs */ 2105 1.193 jmc if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) { 2106 1.187 chs va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz); 2107 1.187 chs } 2108 1.187 chs #endif 2109 1.187 chs 2110 1.171 chs (*uobj->pgops->pgo_reference)(uobj); 2111 1.171 chs error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz), 2112 1.171 chs uobj, 0, 0, 2113 1.171 chs UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE, 2114 1.171 chs UVM_ADV_RANDOM, 0)); 2115 1.171 chs if (error) { 2116 1.312 christos DPRINTF(("%s, %d: map %p " 2117 1.305 matt "uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n", 2118 1.312 christos __func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz), 2119 1.312 christos va, error)); 2120 1.171 chs (*uobj->pgops->pgo_detach)(uobj); 2121 1.374 martin return error; 2122 1.171 chs } 2123 1.171 chs p->p_sigctx.ps_sigcode = (void *)va; 2124 1.374 martin return 0; 2125 1.171 chs } 2126 1.336 matt 2127 1.337 martin /* 2128 1.348 martin * Release a refcount on spawn_exec_data and destroy memory, if this 2129 1.348 martin * was the last one. 2130 1.348 martin */ 2131 1.348 martin static void 2132 1.348 martin spawn_exec_data_release(struct spawn_exec_data *data) 2133 1.348 martin { 2134 1.516 riastrad 2135 1.517 riastrad membar_release(); 2136 1.348 martin if (atomic_dec_32_nv(&data->sed_refcnt) != 0) 2137 1.348 martin return; 2138 1.517 riastrad membar_acquire(); 2139 1.348 martin 2140 1.348 martin cv_destroy(&data->sed_cv_child_ready); 2141 1.348 martin mutex_destroy(&data->sed_mtx_child); 2142 1.348 martin 2143 1.348 martin if (data->sed_actions) 2144 1.348 martin posix_spawn_fa_free(data->sed_actions, 2145 1.348 martin data->sed_actions->len); 2146 1.348 martin if (data->sed_attrs) 2147 1.348 martin kmem_free(data->sed_attrs, 2148 1.348 martin sizeof(*data->sed_attrs)); 2149 1.348 martin kmem_free(data, sizeof(*data)); 2150 1.348 martin } 2151 1.348 martin 2152 1.494 christos static int 2153 1.494 christos handle_posix_spawn_file_actions(struct posix_spawn_file_actions *actions) 2154 1.494 christos { 2155 1.494 christos struct lwp *l = curlwp; 2156 1.494 christos register_t retval; 2157 1.530 martin int error = 0, newfd; 2158 1.494 christos 2159 1.494 christos if (actions == NULL) 2160 1.494 christos return 0; 2161 1.494 christos 2162 1.494 christos for (size_t i = 0; i < actions->len; i++) { 2163 1.494 christos const struct posix_spawn_file_actions_entry *fae = 2164 1.494 christos &actions->fae[i]; 2165 1.494 christos switch (fae->fae_action) { 2166 1.494 christos case FAE_OPEN: 2167 1.494 christos if (fd_getfile(fae->fae_fildes) != NULL) { 2168 1.494 christos error = fd_close(fae->fae_fildes); 2169 1.494 christos if (error) 2170 1.494 christos return error; 2171 1.494 christos } 2172 1.494 christos error = fd_open(fae->fae_path, fae->fae_oflag, 2173 1.494 christos fae->fae_mode, &newfd); 2174 1.494 christos if (error) 2175 1.494 christos return error; 2176 1.494 christos if (newfd != fae->fae_fildes) { 2177 1.494 christos error = dodup(l, newfd, 2178 1.494 christos fae->fae_fildes, 0, &retval); 2179 1.494 christos if (fd_getfile(newfd) != NULL) 2180 1.494 christos fd_close(newfd); 2181 1.494 christos } 2182 1.494 christos break; 2183 1.494 christos case FAE_DUP2: 2184 1.494 christos error = dodup(l, fae->fae_fildes, 2185 1.494 christos fae->fae_newfildes, 0, &retval); 2186 1.494 christos break; 2187 1.494 christos case FAE_CLOSE: 2188 1.529 martin /* 2189 1.529 martin * posix specifies failures from close() due to 2190 1.529 martin * already closed file descriptors should be ignored. 2191 1.529 martin * out of range filedescriptors would have been 2192 1.529 martin * caught earlier already. 2193 1.529 martin */ 2194 1.529 martin if (fd_getfile(fae->fae_fildes) != NULL) 2195 1.529 martin fd_close(fae->fae_fildes); 2196 1.494 christos break; 2197 1.511 christos case FAE_CHDIR: 2198 1.511 christos error = do_sys_chdir(l, fae->fae_chdir_path, 2199 1.511 christos UIO_SYSSPACE, &retval); 2200 1.511 christos break; 2201 1.511 christos case FAE_FCHDIR: 2202 1.511 christos error = do_sys_fchdir(l, fae->fae_fildes, &retval); 2203 1.511 christos break; 2204 1.494 christos } 2205 1.494 christos if (error) 2206 1.494 christos return error; 2207 1.494 christos } 2208 1.494 christos return 0; 2209 1.494 christos } 2210 1.494 christos 2211 1.494 christos static int 2212 1.494 christos handle_posix_spawn_attrs(struct posix_spawnattr *attrs, struct proc *parent) 2213 1.494 christos { 2214 1.494 christos struct sigaction sigact; 2215 1.527 joe int error = 0; 2216 1.494 christos struct proc *p = curproc; 2217 1.494 christos struct lwp *l = curlwp; 2218 1.494 christos 2219 1.494 christos if (attrs == NULL) 2220 1.494 christos return 0; 2221 1.494 christos 2222 1.494 christos memset(&sigact, 0, sizeof(sigact)); 2223 1.494 christos sigact._sa_u._sa_handler = SIG_DFL; 2224 1.494 christos sigact.sa_flags = 0; 2225 1.494 christos 2226 1.525 riastrad /* 2227 1.494 christos * set state to SSTOP so that this proc can be found by pid. 2228 1.494 christos * see proc_enterprp, do_sched_setparam below 2229 1.494 christos */ 2230 1.501 ad mutex_enter(&proc_lock); 2231 1.494 christos /* 2232 1.494 christos * p_stat should be SACTIVE, so we need to adjust the 2233 1.494 christos * parent's p_nstopchild here. For safety, just make 2234 1.494 christos * we're on the good side of SDEAD before we adjust. 2235 1.494 christos */ 2236 1.494 christos int ostat = p->p_stat; 2237 1.494 christos KASSERT(ostat < SSTOP); 2238 1.494 christos p->p_stat = SSTOP; 2239 1.494 christos p->p_waited = 0; 2240 1.494 christos p->p_pptr->p_nstopchild++; 2241 1.501 ad mutex_exit(&proc_lock); 2242 1.494 christos 2243 1.494 christos /* Set process group */ 2244 1.494 christos if (attrs->sa_flags & POSIX_SPAWN_SETPGROUP) { 2245 1.494 christos pid_t mypid = p->p_pid; 2246 1.494 christos pid_t pgrp = attrs->sa_pgroup; 2247 1.494 christos 2248 1.494 christos if (pgrp == 0) 2249 1.494 christos pgrp = mypid; 2250 1.494 christos 2251 1.494 christos error = proc_enterpgrp(parent, mypid, pgrp, false); 2252 1.494 christos if (error) 2253 1.494 christos goto out; 2254 1.494 christos } 2255 1.494 christos 2256 1.494 christos /* Set scheduler policy */ 2257 1.494 christos if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER) 2258 1.494 christos error = do_sched_setparam(p->p_pid, 0, attrs->sa_schedpolicy, 2259 1.494 christos &attrs->sa_schedparam); 2260 1.494 christos else if (attrs->sa_flags & POSIX_SPAWN_SETSCHEDPARAM) { 2261 1.494 christos error = do_sched_setparam(parent->p_pid, 0, 2262 1.494 christos SCHED_NONE, &attrs->sa_schedparam); 2263 1.494 christos } 2264 1.494 christos if (error) 2265 1.494 christos goto out; 2266 1.494 christos 2267 1.494 christos /* Reset user ID's */ 2268 1.494 christos if (attrs->sa_flags & POSIX_SPAWN_RESETIDS) { 2269 1.505 martin error = do_setresgid(l, -1, kauth_cred_getgid(l->l_cred), -1, 2270 1.494 christos ID_E_EQ_R | ID_E_EQ_S); 2271 1.494 christos if (error) 2272 1.494 christos return error; 2273 1.494 christos error = do_setresuid(l, -1, kauth_cred_getuid(l->l_cred), -1, 2274 1.494 christos ID_E_EQ_R | ID_E_EQ_S); 2275 1.494 christos if (error) 2276 1.494 christos goto out; 2277 1.494 christos } 2278 1.494 christos 2279 1.494 christos /* Set signal masks/defaults */ 2280 1.494 christos if (attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) { 2281 1.494 christos mutex_enter(p->p_lock); 2282 1.494 christos error = sigprocmask1(l, SIG_SETMASK, &attrs->sa_sigmask, NULL); 2283 1.494 christos mutex_exit(p->p_lock); 2284 1.494 christos if (error) 2285 1.494 christos goto out; 2286 1.494 christos } 2287 1.494 christos 2288 1.494 christos if (attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) { 2289 1.494 christos /* 2290 1.494 christos * The following sigaction call is using a sigaction 2291 1.494 christos * version 0 trampoline which is in the compatibility 2292 1.494 christos * code only. This is not a problem because for SIG_DFL 2293 1.494 christos * and SIG_IGN, the trampolines are now ignored. If they 2294 1.494 christos * were not, this would be a problem because we are 2295 1.494 christos * holding the exec_lock, and the compat code needs 2296 1.494 christos * to do the same in order to replace the trampoline 2297 1.494 christos * code of the process. 2298 1.494 christos */ 2299 1.494 christos for (int i = 1; i <= NSIG; i++) { 2300 1.494 christos if (sigismember(&attrs->sa_sigdefault, i)) 2301 1.494 christos sigaction1(l, i, &sigact, NULL, NULL, 0); 2302 1.494 christos } 2303 1.494 christos } 2304 1.494 christos out: 2305 1.501 ad mutex_enter(&proc_lock); 2306 1.494 christos p->p_stat = ostat; 2307 1.494 christos p->p_pptr->p_nstopchild--; 2308 1.501 ad mutex_exit(&proc_lock); 2309 1.494 christos return error; 2310 1.494 christos } 2311 1.494 christos 2312 1.348 martin /* 2313 1.337 martin * A child lwp of a posix_spawn operation starts here and ends up in 2314 1.337 martin * cpu_spawn_return, dealing with all filedescriptor and scheduler 2315 1.337 martin * manipulations in between. 2316 1.369 christos * The parent waits for the child, as it is not clear whether the child 2317 1.369 christos * will be able to acquire its own exec_lock. If it can, the parent can 2318 1.348 martin * be released early and continue running in parallel. If not (or if the 2319 1.348 martin * magic debug flag is passed in the scheduler attribute struct), the 2320 1.369 christos * child rides on the parent's exec lock until it is ready to return to 2321 1.348 martin * to userland - and only then releases the parent. This method loses 2322 1.348 martin * concurrency, but improves error reporting. 2323 1.337 martin */ 2324 1.337 martin static void 2325 1.337 martin spawn_return(void *arg) 2326 1.337 martin { 2327 1.337 martin struct spawn_exec_data *spawn_data = arg; 2328 1.337 martin struct lwp *l = curlwp; 2329 1.466 kamil struct proc *p = l->l_proc; 2330 1.494 christos int error; 2331 1.341 martin bool have_reflock; 2332 1.348 martin bool parent_is_waiting = true; 2333 1.345 martin 2334 1.341 martin /* 2335 1.348 martin * Check if we can release parent early. 2336 1.348 martin * We either need to have no sed_attrs, or sed_attrs does not 2337 1.348 martin * have POSIX_SPAWN_RETURNERROR or one of the flags, that require 2338 1.348 martin * safe access to the parent proc (passed in sed_parent). 2339 1.348 martin * We then try to get the exec_lock, and only if that works, we can 2340 1.348 martin * release the parent here already. 2341 1.348 martin */ 2342 1.494 christos struct posix_spawnattr *attrs = spawn_data->sed_attrs; 2343 1.494 christos if ((!attrs || (attrs->sa_flags 2344 1.494 christos & (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0) 2345 1.348 martin && rw_tryenter(&exec_lock, RW_READER)) { 2346 1.348 martin parent_is_waiting = false; 2347 1.348 martin mutex_enter(&spawn_data->sed_mtx_child); 2348 1.526 riastrad KASSERT(!spawn_data->sed_child_ready); 2349 1.526 riastrad spawn_data->sed_error = 0; 2350 1.526 riastrad spawn_data->sed_child_ready = true; 2351 1.348 martin cv_signal(&spawn_data->sed_cv_child_ready); 2352 1.348 martin mutex_exit(&spawn_data->sed_mtx_child); 2353 1.348 martin } 2354 1.341 martin 2355 1.352 rmind /* don't allow debugger access yet */ 2356 1.466 kamil rw_enter(&p->p_reflock, RW_WRITER); 2357 1.352 rmind have_reflock = true; 2358 1.352 rmind 2359 1.506 martin /* handle posix_spawnattr */ 2360 1.506 martin error = handle_posix_spawn_attrs(attrs, spawn_data->sed_parent); 2361 1.494 christos if (error) 2362 1.494 christos goto report_error; 2363 1.337 martin 2364 1.506 martin /* handle posix_spawn_file_actions */ 2365 1.506 martin error = handle_posix_spawn_file_actions(spawn_data->sed_actions); 2366 1.494 christos if (error) 2367 1.494 christos goto report_error; 2368 1.337 martin 2369 1.352 rmind /* now do the real exec */ 2370 1.348 martin error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting, 2371 1.348 martin true); 2372 1.341 martin have_reflock = false; 2373 1.352 rmind if (error == EJUSTRETURN) 2374 1.352 rmind error = 0; 2375 1.352 rmind else if (error) 2376 1.337 martin goto report_error; 2377 1.337 martin 2378 1.348 martin if (parent_is_waiting) { 2379 1.348 martin mutex_enter(&spawn_data->sed_mtx_child); 2380 1.526 riastrad KASSERT(!spawn_data->sed_child_ready); 2381 1.526 riastrad spawn_data->sed_error = 0; 2382 1.526 riastrad spawn_data->sed_child_ready = true; 2383 1.348 martin cv_signal(&spawn_data->sed_cv_child_ready); 2384 1.348 martin mutex_exit(&spawn_data->sed_mtx_child); 2385 1.348 martin } 2386 1.345 martin 2387 1.348 martin /* release our refcount on the data */ 2388 1.348 martin spawn_exec_data_release(spawn_data); 2389 1.337 martin 2390 1.500 kamil if ((p->p_slflag & (PSL_TRACED|PSL_TRACEDCHILD)) == 2391 1.500 kamil (PSL_TRACED|PSL_TRACEDCHILD)) { 2392 1.494 christos eventswitchchild(p, TRAP_CHLD, PTRACE_POSIX_SPAWN); 2393 1.500 kamil } 2394 1.466 kamil 2395 1.369 christos /* and finally: leave to userland for the first time */ 2396 1.337 martin cpu_spawn_return(l); 2397 1.337 martin 2398 1.337 martin /* NOTREACHED */ 2399 1.337 martin return; 2400 1.337 martin 2401 1.337 martin report_error: 2402 1.376 maxv if (have_reflock) { 2403 1.376 maxv /* 2404 1.350 martin * We have not passed through execve_runproc(), 2405 1.350 martin * which would have released the p_reflock and also 2406 1.350 martin * taken ownership of the sed_exec part of spawn_data, 2407 1.350 martin * so release/free both here. 2408 1.350 martin */ 2409 1.466 kamil rw_exit(&p->p_reflock); 2410 1.350 martin execve_free_data(&spawn_data->sed_exec); 2411 1.350 martin } 2412 1.341 martin 2413 1.348 martin if (parent_is_waiting) { 2414 1.348 martin /* pass error to parent */ 2415 1.348 martin mutex_enter(&spawn_data->sed_mtx_child); 2416 1.526 riastrad KASSERT(!spawn_data->sed_child_ready); 2417 1.348 martin spawn_data->sed_error = error; 2418 1.526 riastrad spawn_data->sed_child_ready = true; 2419 1.348 martin cv_signal(&spawn_data->sed_cv_child_ready); 2420 1.348 martin mutex_exit(&spawn_data->sed_mtx_child); 2421 1.348 martin } else { 2422 1.348 martin rw_exit(&exec_lock); 2423 1.337 martin } 2424 1.337 martin 2425 1.348 martin /* release our refcount on the data */ 2426 1.348 martin spawn_exec_data_release(spawn_data); 2427 1.348 martin 2428 1.352 rmind /* done, exit */ 2429 1.466 kamil mutex_enter(p->p_lock); 2430 1.348 martin /* 2431 1.352 rmind * Posix explicitly asks for an exit code of 127 if we report 2432 1.348 martin * errors from the child process - so, unfortunately, there 2433 1.348 martin * is no way to report a more exact error code. 2434 1.348 martin * A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as 2435 1.348 martin * flag bit in the attrp argument to posix_spawn(2), see above. 2436 1.348 martin */ 2437 1.426 christos exit1(l, 127, 0); 2438 1.337 martin } 2439 1.337 martin 2440 1.511 christos static __inline char ** 2441 1.511 christos posix_spawn_fae_path(struct posix_spawn_file_actions_entry *fae) 2442 1.511 christos { 2443 1.511 christos switch (fae->fae_action) { 2444 1.511 christos case FAE_OPEN: 2445 1.511 christos return &fae->fae_path; 2446 1.511 christos case FAE_CHDIR: 2447 1.511 christos return &fae->fae_chdir_path; 2448 1.511 christos default: 2449 1.511 christos return NULL; 2450 1.511 christos } 2451 1.511 christos } 2452 1.525 riastrad 2453 1.348 martin void 2454 1.344 christos posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len) 2455 1.342 christos { 2456 1.342 christos 2457 1.344 christos for (size_t i = 0; i < len; i++) { 2458 1.511 christos char **pathp = posix_spawn_fae_path(&fa->fae[i]); 2459 1.511 christos if (pathp) 2460 1.511 christos kmem_strfree(*pathp); 2461 1.342 christos } 2462 1.348 martin if (fa->len > 0) 2463 1.343 christos kmem_free(fa->fae, sizeof(*fa->fae) * fa->len); 2464 1.342 christos kmem_free(fa, sizeof(*fa)); 2465 1.342 christos } 2466 1.342 christos 2467 1.342 christos static int 2468 1.342 christos posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap, 2469 1.373 martin const struct posix_spawn_file_actions *ufa, rlim_t lim) 2470 1.342 christos { 2471 1.342 christos struct posix_spawn_file_actions *fa; 2472 1.342 christos struct posix_spawn_file_actions_entry *fae; 2473 1.342 christos char *pbuf = NULL; 2474 1.342 christos int error; 2475 1.352 rmind size_t i = 0; 2476 1.342 christos 2477 1.342 christos fa = kmem_alloc(sizeof(*fa), KM_SLEEP); 2478 1.342 christos error = copyin(ufa, fa, sizeof(*fa)); 2479 1.369 christos if (error || fa->len == 0) { 2480 1.348 martin kmem_free(fa, sizeof(*fa)); 2481 1.369 christos return error; /* 0 if not an error, and len == 0 */ 2482 1.348 martin } 2483 1.342 christos 2484 1.373 martin if (fa->len > lim) { 2485 1.373 martin kmem_free(fa, sizeof(*fa)); 2486 1.524 riastrad return SET_ERROR(EINVAL); 2487 1.373 martin } 2488 1.373 martin 2489 1.348 martin fa->size = fa->len; 2490 1.352 rmind size_t fal = fa->len * sizeof(*fae); 2491 1.352 rmind fae = fa->fae; 2492 1.352 rmind fa->fae = kmem_alloc(fal, KM_SLEEP); 2493 1.352 rmind error = copyin(fae, fa->fae, fal); 2494 1.344 christos if (error) 2495 1.342 christos goto out; 2496 1.342 christos 2497 1.342 christos pbuf = PNBUF_GET(); 2498 1.344 christos for (; i < fa->len; i++) { 2499 1.511 christos char **pathp = posix_spawn_fae_path(&fa->fae[i]); 2500 1.511 christos if (pathp == NULL) 2501 1.342 christos continue; 2502 1.511 christos error = copyinstr(*pathp, pbuf, MAXPATHLEN, &fal); 2503 1.344 christos if (error) 2504 1.342 christos goto out; 2505 1.511 christos *pathp = kmem_alloc(fal, KM_SLEEP); 2506 1.511 christos memcpy(*pathp, pbuf, fal); 2507 1.342 christos } 2508 1.342 christos PNBUF_PUT(pbuf); 2509 1.348 martin 2510 1.342 christos *fap = fa; 2511 1.342 christos return 0; 2512 1.342 christos out: 2513 1.342 christos if (pbuf) 2514 1.342 christos PNBUF_PUT(pbuf); 2515 1.344 christos posix_spawn_fa_free(fa, i); 2516 1.342 christos return error; 2517 1.342 christos } 2518 1.342 christos 2519 1.497 thorpej /* 2520 1.497 thorpej * N.B. increments nprocs upon success. Callers need to drop nprocs if 2521 1.497 thorpej * they fail for some other reason. 2522 1.497 thorpej */ 2523 1.337 martin int 2524 1.348 martin check_posix_spawn(struct lwp *l1) 2525 1.337 martin { 2526 1.348 martin int error, tnprocs, count; 2527 1.337 martin uid_t uid; 2528 1.348 martin struct proc *p1; 2529 1.337 martin 2530 1.337 martin p1 = l1->l_proc; 2531 1.337 martin uid = kauth_cred_getuid(l1->l_cred); 2532 1.337 martin tnprocs = atomic_inc_uint_nv(&nprocs); 2533 1.337 martin 2534 1.337 martin /* 2535 1.337 martin * Although process entries are dynamically created, we still keep 2536 1.337 martin * a global limit on the maximum number we will create. 2537 1.337 martin */ 2538 1.337 martin if (__predict_false(tnprocs >= maxproc)) 2539 1.337 martin error = -1; 2540 1.337 martin else 2541 1.337 martin error = kauth_authorize_process(l1->l_cred, 2542 1.337 martin KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL); 2543 1.337 martin 2544 1.337 martin if (error) { 2545 1.337 martin atomic_dec_uint(&nprocs); 2546 1.524 riastrad return SET_ERROR(EAGAIN); 2547 1.337 martin } 2548 1.337 martin 2549 1.337 martin /* 2550 1.337 martin * Enforce limits. 2551 1.337 martin */ 2552 1.337 martin count = chgproccnt(uid, 1); 2553 1.347 elad if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT, 2554 1.347 elad p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), 2555 1.347 elad &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 && 2556 1.347 elad __predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) { 2557 1.348 martin (void)chgproccnt(uid, -1); 2558 1.348 martin atomic_dec_uint(&nprocs); 2559 1.524 riastrad return SET_ERROR(EAGAIN); 2560 1.337 martin } 2561 1.337 martin 2562 1.348 martin return 0; 2563 1.348 martin } 2564 1.348 martin 2565 1.348 martin int 2566 1.352 rmind do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path, 2567 1.352 rmind struct posix_spawn_file_actions *fa, 2568 1.352 rmind struct posix_spawnattr *sa, 2569 1.352 rmind char *const *argv, char *const *envp, 2570 1.352 rmind execve_fetch_element_t fetch) 2571 1.348 martin { 2572 1.352 rmind 2573 1.348 martin struct proc *p1, *p2; 2574 1.348 martin struct lwp *l2; 2575 1.348 martin int error; 2576 1.348 martin struct spawn_exec_data *spawn_data; 2577 1.518 riastrad vaddr_t uaddr = 0; 2578 1.348 martin pid_t pid; 2579 1.352 rmind bool have_exec_lock = false; 2580 1.348 martin 2581 1.348 martin p1 = l1->l_proc; 2582 1.342 christos 2583 1.348 martin /* Allocate and init spawn_data */ 2584 1.348 martin spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP); 2585 1.348 martin spawn_data->sed_refcnt = 1; /* only parent so far */ 2586 1.348 martin cv_init(&spawn_data->sed_cv_child_ready, "pspawn"); 2587 1.348 martin mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE); 2588 1.352 rmind mutex_enter(&spawn_data->sed_mtx_child); 2589 1.352 rmind 2590 1.352 rmind /* 2591 1.352 rmind * Do the first part of the exec now, collect state 2592 1.352 rmind * in spawn_data. 2593 1.352 rmind */ 2594 1.481 christos error = execve_loadvm(l1, true, path, -1, argv, 2595 1.352 rmind envp, fetch, &spawn_data->sed_exec); 2596 1.352 rmind if (error == EJUSTRETURN) 2597 1.352 rmind error = 0; 2598 1.352 rmind else if (error) 2599 1.352 rmind goto error_exit; 2600 1.352 rmind 2601 1.352 rmind have_exec_lock = true; 2602 1.337 martin 2603 1.337 martin /* 2604 1.337 martin * Allocate virtual address space for the U-area now, while it 2605 1.337 martin * is still easy to abort the fork operation if we're out of 2606 1.337 martin * kernel virtual address space. 2607 1.337 martin */ 2608 1.337 martin uaddr = uvm_uarea_alloc(); 2609 1.337 martin if (__predict_false(uaddr == 0)) { 2610 1.524 riastrad error = SET_ERROR(ENOMEM); 2611 1.352 rmind goto error_exit; 2612 1.351 rmind } 2613 1.525 riastrad 2614 1.337 martin /* 2615 1.348 martin * Allocate new proc. Borrow proc0 vmspace for it, we will 2616 1.348 martin * replace it with its own before returning to userland 2617 1.348 martin * in the child. 2618 1.499 thorpej */ 2619 1.499 thorpej p2 = proc_alloc(); 2620 1.499 thorpej if (p2 == NULL) { 2621 1.499 thorpej /* We were unable to allocate a process ID. */ 2622 1.524 riastrad error = SET_ERROR(EAGAIN); 2623 1.499 thorpej goto error_exit; 2624 1.499 thorpej } 2625 1.499 thorpej 2626 1.499 thorpej /* 2627 1.337 martin * This is a point of no return, we will have to go through 2628 1.337 martin * the child proc to properly clean it up past this point. 2629 1.337 martin */ 2630 1.337 martin pid = p2->p_pid; 2631 1.337 martin 2632 1.337 martin /* 2633 1.337 martin * Make a proc table entry for the new process. 2634 1.337 martin * Start by zeroing the section of proc that is zero-initialized, 2635 1.337 martin * then copy the section that is copied directly from the parent. 2636 1.337 martin */ 2637 1.337 martin memset(&p2->p_startzero, 0, 2638 1.337 martin (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero)); 2639 1.337 martin memcpy(&p2->p_startcopy, &p1->p_startcopy, 2640 1.337 martin (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy)); 2641 1.528 riastrad 2642 1.528 riastrad /* 2643 1.528 riastrad * Allocate an empty user vmspace for the new process now. 2644 1.528 riastrad * The min/max and topdown parameters given here are just placeholders, 2645 1.528 riastrad * the right values will be assigned in uvmspace_exec(). 2646 1.528 riastrad */ 2647 1.528 riastrad p2->p_vmspace = uvmspace_alloc(exec_vm_minaddr(VM_MIN_ADDRESS), 2648 1.528 riastrad VM_MAXUSER_ADDRESS, true); 2649 1.337 martin 2650 1.366 christos TAILQ_INIT(&p2->p_sigpend.sp_info); 2651 1.337 martin 2652 1.337 martin LIST_INIT(&p2->p_lwps); 2653 1.337 martin LIST_INIT(&p2->p_sigwaiters); 2654 1.337 martin 2655 1.337 martin /* 2656 1.337 martin * Duplicate sub-structures as needed. 2657 1.337 martin * Increase reference counts on shared objects. 2658 1.337 martin * Inherit flags we want to keep. The flags related to SIGCHLD 2659 1.337 martin * handling are important in order to keep a consistent behaviour 2660 1.337 martin * for the child after the fork. If we are a 32-bit process, the 2661 1.337 martin * child will be too. 2662 1.337 martin */ 2663 1.337 martin p2->p_flag = 2664 1.337 martin p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32); 2665 1.337 martin p2->p_emul = p1->p_emul; 2666 1.337 martin p2->p_execsw = p1->p_execsw; 2667 1.337 martin 2668 1.337 martin mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); 2669 1.337 martin mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); 2670 1.337 martin rw_init(&p2->p_reflock); 2671 1.337 martin cv_init(&p2->p_waitcv, "wait"); 2672 1.337 martin cv_init(&p2->p_lwpcv, "lwpwait"); 2673 1.337 martin 2674 1.337 martin p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 2675 1.337 martin 2676 1.337 martin kauth_proc_fork(p1, p2); 2677 1.337 martin 2678 1.337 martin p2->p_raslist = NULL; 2679 1.337 martin p2->p_fd = fd_copy(); 2680 1.337 martin 2681 1.337 martin /* XXX racy */ 2682 1.337 martin p2->p_mqueue_cnt = p1->p_mqueue_cnt; 2683 1.337 martin 2684 1.337 martin p2->p_cwdi = cwdinit(); 2685 1.337 martin 2686 1.337 martin /* 2687 1.337 martin * Note: p_limit (rlimit stuff) is copy-on-write, so normally 2688 1.337 martin * we just need increase pl_refcnt. 2689 1.337 martin */ 2690 1.348 martin if (!p1->p_limit->pl_writeable) { 2691 1.348 martin lim_addref(p1->p_limit); 2692 1.348 martin p2->p_limit = p1->p_limit; 2693 1.337 martin } else { 2694 1.337 martin p2->p_limit = lim_copy(p1->p_limit); 2695 1.337 martin } 2696 1.337 martin 2697 1.337 martin p2->p_lflag = 0; 2698 1.468 kamil l1->l_vforkwaiting = false; 2699 1.337 martin p2->p_sflag = 0; 2700 1.337 martin p2->p_slflag = 0; 2701 1.337 martin p2->p_pptr = p1; 2702 1.337 martin p2->p_ppid = p1->p_pid; 2703 1.337 martin LIST_INIT(&p2->p_children); 2704 1.337 martin 2705 1.337 martin p2->p_aio = NULL; 2706 1.337 martin 2707 1.337 martin #ifdef KTRACE 2708 1.337 martin /* 2709 1.337 martin * Copy traceflag and tracefile if enabled. 2710 1.337 martin * If not inherited, these were zeroed above. 2711 1.337 martin */ 2712 1.337 martin if (p1->p_traceflag & KTRFAC_INHERIT) { 2713 1.337 martin mutex_enter(&ktrace_lock); 2714 1.337 martin p2->p_traceflag = p1->p_traceflag; 2715 1.337 martin if ((p2->p_tracep = p1->p_tracep) != NULL) 2716 1.337 martin ktradref(p2); 2717 1.337 martin mutex_exit(&ktrace_lock); 2718 1.337 martin } 2719 1.337 martin #endif 2720 1.337 martin 2721 1.337 martin /* 2722 1.337 martin * Create signal actions for the child process. 2723 1.337 martin */ 2724 1.337 martin p2->p_sigacts = sigactsinit(p1, 0); 2725 1.337 martin mutex_enter(p1->p_lock); 2726 1.337 martin p2->p_sflag |= 2727 1.337 martin (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP)); 2728 1.337 martin sched_proc_fork(p1, p2); 2729 1.337 martin mutex_exit(p1->p_lock); 2730 1.337 martin 2731 1.337 martin p2->p_stflag = p1->p_stflag; 2732 1.337 martin 2733 1.337 martin /* 2734 1.337 martin * p_stats. 2735 1.337 martin * Copy parts of p_stats, and zero out the rest. 2736 1.337 martin */ 2737 1.337 martin p2->p_stats = pstatscopy(p1->p_stats); 2738 1.337 martin 2739 1.337 martin /* copy over machdep flags to the new proc */ 2740 1.337 martin cpu_proc_fork(p1, p2); 2741 1.337 martin 2742 1.337 martin /* 2743 1.352 rmind * Prepare remaining parts of spawn data 2744 1.337 martin */ 2745 1.348 martin spawn_data->sed_actions = fa; 2746 1.348 martin spawn_data->sed_attrs = sa; 2747 1.352 rmind 2748 1.337 martin spawn_data->sed_parent = p1; 2749 1.337 martin 2750 1.352 rmind /* create LWP */ 2751 1.337 martin lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data, 2752 1.442 christos &l2, l1->l_class, &l1->l_sigmask, &l1->l_sigstk); 2753 1.337 martin l2->l_ctxlink = NULL; /* reset ucontext link */ 2754 1.337 martin 2755 1.337 martin /* 2756 1.337 martin * Copy the credential so other references don't see our changes. 2757 1.337 martin * Test to see if this is necessary first, since in the common case 2758 1.337 martin * we won't need a private reference. 2759 1.337 martin */ 2760 1.337 martin if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) || 2761 1.337 martin kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) { 2762 1.337 martin l2->l_cred = kauth_cred_copy(l2->l_cred); 2763 1.337 martin kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred)); 2764 1.337 martin kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred)); 2765 1.337 martin } 2766 1.337 martin 2767 1.337 martin /* Update the master credentials. */ 2768 1.337 martin if (l2->l_cred != p2->p_cred) { 2769 1.337 martin kauth_cred_t ocred; 2770 1.337 martin mutex_enter(p2->p_lock); 2771 1.337 martin ocred = p2->p_cred; 2772 1.520 ad p2->p_cred = kauth_cred_hold(l2->l_cred); 2773 1.337 martin mutex_exit(p2->p_lock); 2774 1.337 martin kauth_cred_free(ocred); 2775 1.337 martin } 2776 1.337 martin 2777 1.352 rmind *child_ok = true; 2778 1.352 rmind spawn_data->sed_refcnt = 2; /* child gets it as well */ 2779 1.348 martin #if 0 2780 1.345 martin l2->l_nopreempt = 1; /* start it non-preemptable */ 2781 1.348 martin #endif 2782 1.345 martin 2783 1.337 martin /* 2784 1.337 martin * It's now safe for the scheduler and other processes to see the 2785 1.337 martin * child process. 2786 1.337 martin */ 2787 1.501 ad mutex_enter(&proc_lock); 2788 1.337 martin 2789 1.337 martin if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT) 2790 1.337 martin p2->p_lflag |= PL_CONTROLT; 2791 1.337 martin 2792 1.337 martin LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling); 2793 1.337 martin p2->p_exitsig = SIGCHLD; /* signal for parent on exit */ 2794 1.337 martin 2795 1.466 kamil if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) == 2796 1.500 kamil (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) { 2797 1.466 kamil proc_changeparent(p2, p1->p_pptr); 2798 1.500 kamil SET(p2->p_slflag, PSL_TRACEDCHILD); 2799 1.500 kamil } 2800 1.496 kamil 2801 1.496 kamil p2->p_oppid = p1->p_pid; /* Remember the original parent id. */ 2802 1.466 kamil 2803 1.337 martin LIST_INSERT_AFTER(p1, p2, p_pglist); 2804 1.337 martin LIST_INSERT_HEAD(&allproc, p2, p_list); 2805 1.337 martin 2806 1.337 martin p2->p_trace_enabled = trace_is_enabled(p2); 2807 1.337 martin #ifdef __HAVE_SYSCALL_INTERN 2808 1.337 martin (*p2->p_emul->e_syscall_intern)(p2); 2809 1.337 martin #endif 2810 1.337 martin 2811 1.337 martin /* 2812 1.337 martin * Make child runnable, set start time, and add to run queue except 2813 1.337 martin * if the parent requested the child to start in SSTOP state. 2814 1.337 martin */ 2815 1.337 martin mutex_enter(p2->p_lock); 2816 1.337 martin 2817 1.337 martin getmicrotime(&p2->p_stats->p_start); 2818 1.337 martin 2819 1.337 martin lwp_lock(l2); 2820 1.337 martin KASSERT(p2->p_nrlwps == 1); 2821 1.484 ad KASSERT(l2->l_stat == LSIDL); 2822 1.337 martin p2->p_nrlwps = 1; 2823 1.337 martin p2->p_stat = SACTIVE; 2824 1.484 ad setrunnable(l2); 2825 1.484 ad /* LWP now unlocked */ 2826 1.337 martin 2827 1.337 martin mutex_exit(p2->p_lock); 2828 1.501 ad mutex_exit(&proc_lock); 2829 1.337 martin 2830 1.526 riastrad while (!spawn_data->sed_child_ready) { 2831 1.526 riastrad cv_wait(&spawn_data->sed_cv_child_ready, 2832 1.526 riastrad &spawn_data->sed_mtx_child); 2833 1.526 riastrad } 2834 1.348 martin error = spawn_data->sed_error; 2835 1.337 martin mutex_exit(&spawn_data->sed_mtx_child); 2836 1.352 rmind spawn_exec_data_release(spawn_data); 2837 1.337 martin 2838 1.341 martin rw_exit(&p1->p_reflock); 2839 1.337 martin rw_exit(&exec_lock); 2840 1.352 rmind have_exec_lock = false; 2841 1.351 rmind 2842 1.352 rmind *pid_res = pid; 2843 1.466 kamil 2844 1.466 kamil if (error) 2845 1.466 kamil return error; 2846 1.466 kamil 2847 1.466 kamil if (p1->p_slflag & PSL_TRACED) { 2848 1.466 kamil /* Paranoid check */ 2849 1.501 ad mutex_enter(&proc_lock); 2850 1.466 kamil if ((p1->p_slflag & (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) != 2851 1.466 kamil (PSL_TRACEPOSIX_SPAWN|PSL_TRACED)) { 2852 1.501 ad mutex_exit(&proc_lock); 2853 1.466 kamil return 0; 2854 1.466 kamil } 2855 1.466 kamil 2856 1.466 kamil mutex_enter(p1->p_lock); 2857 1.482 kamil eventswitch(TRAP_CHLD, PTRACE_POSIX_SPAWN, pid); 2858 1.466 kamil } 2859 1.466 kamil return 0; 2860 1.352 rmind 2861 1.352 rmind error_exit: 2862 1.376 maxv if (have_exec_lock) { 2863 1.352 rmind execve_free_data(&spawn_data->sed_exec); 2864 1.352 rmind rw_exit(&p1->p_reflock); 2865 1.376 maxv rw_exit(&exec_lock); 2866 1.352 rmind } 2867 1.352 rmind mutex_exit(&spawn_data->sed_mtx_child); 2868 1.351 rmind spawn_exec_data_release(spawn_data); 2869 1.518 riastrad if (uaddr != 0) 2870 1.518 riastrad uvm_uarea_free(uaddr); 2871 1.376 maxv 2872 1.348 martin return error; 2873 1.348 martin } 2874 1.337 martin 2875 1.348 martin int 2876 1.348 martin sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap, 2877 1.348 martin register_t *retval) 2878 1.348 martin { 2879 1.348 martin /* { 2880 1.348 martin syscallarg(pid_t *) pid; 2881 1.348 martin syscallarg(const char *) path; 2882 1.348 martin syscallarg(const struct posix_spawn_file_actions *) file_actions; 2883 1.348 martin syscallarg(const struct posix_spawnattr *) attrp; 2884 1.348 martin syscallarg(char *const *) argv; 2885 1.348 martin syscallarg(char *const *) envp; 2886 1.525 riastrad } */ 2887 1.348 martin 2888 1.348 martin int error; 2889 1.348 martin struct posix_spawn_file_actions *fa = NULL; 2890 1.348 martin struct posix_spawnattr *sa = NULL; 2891 1.348 martin pid_t pid; 2892 1.352 rmind bool child_ok = false; 2893 1.373 martin rlim_t max_fileactions; 2894 1.373 martin proc_t *p = l1->l_proc; 2895 1.348 martin 2896 1.497 thorpej /* check_posix_spawn() increments nprocs for us. */ 2897 1.348 martin error = check_posix_spawn(l1); 2898 1.348 martin if (error) { 2899 1.348 martin *retval = error; 2900 1.348 martin return 0; 2901 1.348 martin } 2902 1.348 martin 2903 1.348 martin /* copy in file_actions struct */ 2904 1.348 martin if (SCARG(uap, file_actions) != NULL) { 2905 1.461 riastrad max_fileactions = 2 * uimin(p->p_rlimit[RLIMIT_NOFILE].rlim_cur, 2906 1.373 martin maxfiles); 2907 1.373 martin error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions), 2908 1.373 martin max_fileactions); 2909 1.348 martin if (error) 2910 1.352 rmind goto error_exit; 2911 1.348 martin } 2912 1.348 martin 2913 1.348 martin /* copyin posix_spawnattr struct */ 2914 1.348 martin if (SCARG(uap, attrp) != NULL) { 2915 1.348 martin sa = kmem_alloc(sizeof(*sa), KM_SLEEP); 2916 1.348 martin error = copyin(SCARG(uap, attrp), sa, sizeof(*sa)); 2917 1.348 martin if (error) 2918 1.352 rmind goto error_exit; 2919 1.348 martin } 2920 1.337 martin 2921 1.348 martin /* 2922 1.348 martin * Do the spawn 2923 1.348 martin */ 2924 1.352 rmind error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa, 2925 1.348 martin SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element); 2926 1.348 martin if (error) 2927 1.352 rmind goto error_exit; 2928 1.337 martin 2929 1.337 martin if (error == 0 && SCARG(uap, pid) != NULL) 2930 1.337 martin error = copyout(&pid, SCARG(uap, pid), sizeof(pid)); 2931 1.337 martin 2932 1.337 martin *retval = error; 2933 1.337 martin return 0; 2934 1.337 martin 2935 1.352 rmind error_exit: 2936 1.352 rmind if (!child_ok) { 2937 1.352 rmind (void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1); 2938 1.352 rmind atomic_dec_uint(&nprocs); 2939 1.352 rmind 2940 1.352 rmind if (sa) 2941 1.352 rmind kmem_free(sa, sizeof(*sa)); 2942 1.352 rmind if (fa) 2943 1.352 rmind posix_spawn_fa_free(fa, fa->len); 2944 1.352 rmind } 2945 1.352 rmind 2946 1.337 martin *retval = error; 2947 1.337 martin return 0; 2948 1.337 martin } 2949 1.337 martin 2950 1.336 matt void 2951 1.336 matt exec_free_emul_arg(struct exec_package *epp) 2952 1.336 matt { 2953 1.336 matt if (epp->ep_emul_arg_free != NULL) { 2954 1.336 matt KASSERT(epp->ep_emul_arg != NULL); 2955 1.336 matt (*epp->ep_emul_arg_free)(epp->ep_emul_arg); 2956 1.336 matt epp->ep_emul_arg_free = NULL; 2957 1.336 matt epp->ep_emul_arg = NULL; 2958 1.336 matt } else { 2959 1.336 matt KASSERT(epp->ep_emul_arg == NULL); 2960 1.336 matt } 2961 1.336 matt } 2962 1.388 uebayasi 2963 1.388 uebayasi #ifdef DEBUG_EXEC 2964 1.388 uebayasi static void 2965 1.388 uebayasi dump_vmcmds(const struct exec_package * const epp, size_t x, int error) 2966 1.388 uebayasi { 2967 1.388 uebayasi struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0]; 2968 1.388 uebayasi size_t j; 2969 1.388 uebayasi 2970 1.388 uebayasi if (error == 0) 2971 1.388 uebayasi DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used)); 2972 1.388 uebayasi else 2973 1.525 riastrad DPRINTF(("vmcmds %zu/%u, error %d\n", x, 2974 1.388 uebayasi epp->ep_vmcmds.evs_used, error)); 2975 1.388 uebayasi 2976 1.388 uebayasi for (j = 0; j < epp->ep_vmcmds.evs_used; j++) { 2977 1.388 uebayasi DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#" 2978 1.388 uebayasi PRIxVADDR"/%#"PRIxVSIZE" fd@%#" 2979 1.388 uebayasi PRIxVSIZE" prot=0%o flags=%d\n", j, 2980 1.388 uebayasi vp[j].ev_proc == vmcmd_map_pagedvn ? 2981 1.388 uebayasi "pagedvn" : 2982 1.388 uebayasi vp[j].ev_proc == vmcmd_map_readvn ? 2983 1.388 uebayasi "readvn" : 2984 1.388 uebayasi vp[j].ev_proc == vmcmd_map_zero ? 2985 1.388 uebayasi "zero" : "*unknown*", 2986 1.388 uebayasi vp[j].ev_addr, vp[j].ev_len, 2987 1.388 uebayasi vp[j].ev_offset, vp[j].ev_prot, 2988 1.388 uebayasi vp[j].ev_flags)); 2989 1.388 uebayasi if (error != 0 && j == x) 2990 1.388 uebayasi DPRINTF((" ^--- failed\n")); 2991 1.388 uebayasi } 2992 1.388 uebayasi } 2993 1.388 uebayasi #endif 2994