kern_exec.c revision 1.282 1 1.282 ad /* $NetBSD: kern_exec.c,v 1.282 2008/11/19 18:36:06 ad Exp $ */
2 1.277 ad
3 1.277 ad /*-
4 1.277 ad * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 1.277 ad * All rights reserved.
6 1.277 ad *
7 1.277 ad * Redistribution and use in source and binary forms, with or without
8 1.277 ad * modification, are permitted provided that the following conditions
9 1.277 ad * are met:
10 1.277 ad * 1. Redistributions of source code must retain the above copyright
11 1.277 ad * notice, this list of conditions and the following disclaimer.
12 1.277 ad * 2. Redistributions in binary form must reproduce the above copyright
13 1.277 ad * notice, this list of conditions and the following disclaimer in the
14 1.277 ad * documentation and/or other materials provided with the distribution.
15 1.277 ad *
16 1.277 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.277 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.277 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.277 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.277 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.277 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.277 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.277 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.277 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.277 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.277 ad * POSSIBILITY OF SUCH DAMAGE.
27 1.277 ad */
28 1.55 cgd
29 1.55 cgd /*-
30 1.77 cgd * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
31 1.55 cgd * Copyright (C) 1992 Wolfgang Solfrank.
32 1.55 cgd * Copyright (C) 1992 TooLs GmbH.
33 1.55 cgd * All rights reserved.
34 1.55 cgd *
35 1.55 cgd * Redistribution and use in source and binary forms, with or without
36 1.55 cgd * modification, are permitted provided that the following conditions
37 1.55 cgd * are met:
38 1.55 cgd * 1. Redistributions of source code must retain the above copyright
39 1.55 cgd * notice, this list of conditions and the following disclaimer.
40 1.55 cgd * 2. Redistributions in binary form must reproduce the above copyright
41 1.55 cgd * notice, this list of conditions and the following disclaimer in the
42 1.55 cgd * documentation and/or other materials provided with the distribution.
43 1.55 cgd * 3. All advertising materials mentioning features or use of this software
44 1.55 cgd * must display the following acknowledgement:
45 1.55 cgd * This product includes software developed by TooLs GmbH.
46 1.55 cgd * 4. The name of TooLs GmbH may not be used to endorse or promote products
47 1.55 cgd * derived from this software without specific prior written permission.
48 1.55 cgd *
49 1.55 cgd * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50 1.55 cgd * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51 1.55 cgd * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52 1.55 cgd * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53 1.55 cgd * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54 1.55 cgd * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55 1.55 cgd * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56 1.55 cgd * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57 1.55 cgd * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58 1.55 cgd * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 1.55 cgd */
60 1.146 lukem
61 1.146 lukem #include <sys/cdefs.h>
62 1.282 ad __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.282 2008/11/19 18:36:06 ad Exp $");
63 1.89 mrg
64 1.92 thorpej #include "opt_ktrace.h"
65 1.124 jdolecek #include "opt_syscall_debug.h"
66 1.226 dogcow #include "veriexec.h"
67 1.232 elad #include "opt_pax.h"
68 1.279 wrstuden #include "opt_sa.h"
69 1.55 cgd
70 1.55 cgd #include <sys/param.h>
71 1.55 cgd #include <sys/systm.h>
72 1.55 cgd #include <sys/filedesc.h>
73 1.55 cgd #include <sys/kernel.h>
74 1.55 cgd #include <sys/proc.h>
75 1.55 cgd #include <sys/mount.h>
76 1.55 cgd #include <sys/malloc.h>
77 1.265 yamt #include <sys/kmem.h>
78 1.55 cgd #include <sys/namei.h>
79 1.55 cgd #include <sys/vnode.h>
80 1.55 cgd #include <sys/file.h>
81 1.55 cgd #include <sys/acct.h>
82 1.55 cgd #include <sys/exec.h>
83 1.55 cgd #include <sys/ktrace.h>
84 1.278 pooka #include <sys/uidinfo.h>
85 1.55 cgd #include <sys/wait.h>
86 1.55 cgd #include <sys/mman.h>
87 1.155 gmcgarry #include <sys/ras.h>
88 1.55 cgd #include <sys/signalvar.h>
89 1.55 cgd #include <sys/stat.h>
90 1.124 jdolecek #include <sys/syscall.h>
91 1.218 elad #include <sys/kauth.h>
92 1.253 ad #include <sys/lwpctl.h>
93 1.260 christos #include <sys/pax.h>
94 1.263 ad #include <sys/cpu.h>
95 1.282 ad #include <sys/module.h>
96 1.279 wrstuden #include <sys/sa.h>
97 1.279 wrstuden #include <sys/savar.h>
98 1.56 cgd #include <sys/syscallargs.h>
99 1.222 elad #if NVERIEXEC > 0
100 1.197 blymn #include <sys/verified_exec.h>
101 1.222 elad #endif /* NVERIEXEC > 0 */
102 1.55 cgd
103 1.88 mrg #include <uvm/uvm_extern.h>
104 1.88 mrg
105 1.55 cgd #include <machine/reg.h>
106 1.55 cgd
107 1.244 dsl #include <compat/common/compat_util.h>
108 1.244 dsl
109 1.171 chs static int exec_sigcode_map(struct proc *, const struct emul *);
110 1.171 chs
111 1.143 christos #ifdef DEBUG_EXEC
112 1.143 christos #define DPRINTF(a) uprintf a
113 1.143 christos #else
114 1.143 christos #define DPRINTF(a)
115 1.143 christos #endif /* DEBUG_EXEC */
116 1.165 thorpej
117 1.130 jdolecek /*
118 1.130 jdolecek * Exec function switch:
119 1.130 jdolecek *
120 1.130 jdolecek * Note that each makecmds function is responsible for loading the
121 1.130 jdolecek * exec package with the necessary functions for any exec-type-specific
122 1.130 jdolecek * handling.
123 1.130 jdolecek *
124 1.130 jdolecek * Functions for specific exec types should be defined in their own
125 1.130 jdolecek * header file.
126 1.130 jdolecek */
127 1.138 lukem static const struct execsw **execsw = NULL;
128 1.138 lukem static int nexecs;
129 1.138 lukem
130 1.282 ad u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
131 1.130 jdolecek
132 1.130 jdolecek /* list of dynamically loaded execsw entries */
133 1.282 ad static LIST_HEAD(execlist_head, exec_entry) ex_head =
134 1.282 ad LIST_HEAD_INITIALIZER(ex_head);
135 1.130 jdolecek struct exec_entry {
136 1.138 lukem LIST_ENTRY(exec_entry) ex_list;
137 1.282 ad SLIST_ENTRY(exec_entry) ex_slist;
138 1.282 ad const struct execsw *ex_sw;
139 1.130 jdolecek };
140 1.130 jdolecek
141 1.124 jdolecek #ifdef SYSCALL_DEBUG
142 1.124 jdolecek extern const char * const syscallnames[];
143 1.124 jdolecek #endif
144 1.124 jdolecek
145 1.203 christos #ifndef __HAVE_SYSCALL_INTERN
146 1.203 christos void syscall(void);
147 1.203 christos #endif
148 1.203 christos
149 1.280 matt #ifdef KERN_SA
150 1.282 ad static struct sa_emul saemul_netbsd = {
151 1.279 wrstuden sizeof(ucontext_t),
152 1.279 wrstuden sizeof(struct sa_t),
153 1.279 wrstuden sizeof(struct sa_t *),
154 1.279 wrstuden NULL,
155 1.279 wrstuden NULL,
156 1.279 wrstuden cpu_upcall,
157 1.279 wrstuden (void (*)(struct lwp *, void *))getucontext_sa,
158 1.279 wrstuden sa_ucsp
159 1.279 wrstuden };
160 1.280 matt #endif /* KERN_SA */
161 1.279 wrstuden
162 1.173 christos /* NetBSD emul struct */
163 1.282 ad struct emul emul_netbsd = {
164 1.124 jdolecek "netbsd",
165 1.127 jdolecek NULL, /* emulation path */
166 1.133 mycroft #ifndef __HAVE_MINIMAL_EMUL
167 1.140 manu EMUL_HAS_SYS___syscall,
168 1.124 jdolecek NULL,
169 1.124 jdolecek SYS_syscall,
170 1.161 jdolecek SYS_NSYSENT,
171 1.133 mycroft #endif
172 1.124 jdolecek sysent,
173 1.124 jdolecek #ifdef SYSCALL_DEBUG
174 1.124 jdolecek syscallnames,
175 1.124 jdolecek #else
176 1.124 jdolecek NULL,
177 1.124 jdolecek #endif
178 1.133 mycroft sendsig,
179 1.142 christos trapsignal,
180 1.180 fvdl NULL,
181 1.173 christos NULL,
182 1.173 christos NULL,
183 1.173 christos NULL,
184 1.145 jdolecek setregs,
185 1.128 jdolecek NULL,
186 1.128 jdolecek NULL,
187 1.128 jdolecek NULL,
188 1.179 manu NULL,
189 1.179 manu NULL,
190 1.133 mycroft #ifdef __HAVE_SYSCALL_INTERN
191 1.133 mycroft syscall_intern,
192 1.133 mycroft #else
193 1.133 mycroft syscall,
194 1.133 mycroft #endif
195 1.156 manu NULL,
196 1.156 manu NULL,
197 1.195 fvdl
198 1.195 fvdl uvm_default_mapaddr,
199 1.216 cube NULL,
200 1.280 matt #ifdef KERN_SA
201 1.279 wrstuden &saemul_netbsd,
202 1.280 matt #else
203 1.280 matt NULL,
204 1.280 matt #endif
205 1.237 ad sizeof(ucontext_t),
206 1.239 cube startlwp,
207 1.124 jdolecek };
208 1.124 jdolecek
209 1.55 cgd /*
210 1.130 jdolecek * Exec lock. Used to control access to execsw[] structures.
211 1.130 jdolecek * This must not be static so that netbsd32 can access it, too.
212 1.130 jdolecek */
213 1.237 ad krwlock_t exec_lock;
214 1.183 junyoung
215 1.259 ad static kmutex_t sigobject_lock;
216 1.259 ad
217 1.277 ad static void *
218 1.277 ad exec_pool_alloc(struct pool *pp, int flags)
219 1.277 ad {
220 1.277 ad
221 1.277 ad return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
222 1.277 ad UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
223 1.277 ad }
224 1.277 ad
225 1.277 ad static void
226 1.277 ad exec_pool_free(struct pool *pp, void *addr)
227 1.277 ad {
228 1.277 ad
229 1.277 ad uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
230 1.277 ad }
231 1.277 ad
232 1.277 ad static struct pool exec_pool;
233 1.277 ad
234 1.277 ad static struct pool_allocator exec_palloc = {
235 1.277 ad .pa_alloc = exec_pool_alloc,
236 1.277 ad .pa_free = exec_pool_free,
237 1.277 ad .pa_pagesz = NCARGS
238 1.277 ad };
239 1.277 ad
240 1.130 jdolecek /*
241 1.55 cgd * check exec:
242 1.55 cgd * given an "executable" described in the exec package's namei info,
243 1.55 cgd * see what we can do with it.
244 1.55 cgd *
245 1.55 cgd * ON ENTRY:
246 1.55 cgd * exec package with appropriate namei info
247 1.212 christos * lwp pointer of exec'ing lwp
248 1.55 cgd * NO SELF-LOCKED VNODES
249 1.55 cgd *
250 1.55 cgd * ON EXIT:
251 1.55 cgd * error: nothing held, etc. exec header still allocated.
252 1.77 cgd * ok: filled exec package, executable's vnode (unlocked).
253 1.55 cgd *
254 1.55 cgd * EXEC SWITCH ENTRY:
255 1.55 cgd * Locked vnode to check, exec package, proc.
256 1.55 cgd *
257 1.55 cgd * EXEC SWITCH EXIT:
258 1.77 cgd * ok: return 0, filled exec package, executable's vnode (unlocked).
259 1.55 cgd * error: destructive:
260 1.55 cgd * everything deallocated execept exec header.
261 1.76 cgd * non-destructive:
262 1.77 cgd * error code, executable's vnode (unlocked),
263 1.76 cgd * exec header unmodified.
264 1.55 cgd */
265 1.55 cgd int
266 1.205 christos /*ARGSUSED*/
267 1.233 elad check_exec(struct lwp *l, struct exec_package *epp)
268 1.55 cgd {
269 1.138 lukem int error, i;
270 1.138 lukem struct vnode *vp;
271 1.55 cgd struct nameidata *ndp;
272 1.138 lukem size_t resid;
273 1.55 cgd
274 1.55 cgd ndp = epp->ep_ndp;
275 1.55 cgd ndp->ni_cnd.cn_nameiop = LOOKUP;
276 1.244 dsl ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
277 1.55 cgd /* first get the vnode */
278 1.74 christos if ((error = namei(ndp)) != 0)
279 1.55 cgd return error;
280 1.55 cgd epp->ep_vp = vp = ndp->ni_vp;
281 1.55 cgd
282 1.84 mycroft /* check access and type */
283 1.55 cgd if (vp->v_type != VREG) {
284 1.81 kleink error = EACCES;
285 1.55 cgd goto bad1;
286 1.55 cgd }
287 1.254 pooka if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
288 1.84 mycroft goto bad1;
289 1.55 cgd
290 1.55 cgd /* get attributes */
291 1.254 pooka if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
292 1.55 cgd goto bad1;
293 1.55 cgd
294 1.55 cgd /* Check mount point */
295 1.55 cgd if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
296 1.55 cgd error = EACCES;
297 1.55 cgd goto bad1;
298 1.55 cgd }
299 1.141 thorpej if (vp->v_mount->mnt_flag & MNT_NOSUID)
300 1.83 mycroft epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
301 1.55 cgd
302 1.55 cgd /* try to open it */
303 1.254 pooka if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
304 1.55 cgd goto bad1;
305 1.55 cgd
306 1.99 wrstuden /* unlock vp, since we need it unlocked from here on out. */
307 1.90 fvdl VOP_UNLOCK(vp, 0);
308 1.77 cgd
309 1.222 elad #if NVERIEXEC > 0
310 1.236 elad error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
311 1.233 elad epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
312 1.236 elad NULL);
313 1.236 elad if (error)
314 1.234 elad goto bad2;
315 1.222 elad #endif /* NVERIEXEC > 0 */
316 1.160 blymn
317 1.232 elad #ifdef PAX_SEGVGUARD
318 1.240 thorpej error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
319 1.234 elad if (error)
320 1.234 elad goto bad2;
321 1.232 elad #endif /* PAX_SEGVGUARD */
322 1.232 elad
323 1.55 cgd /* now we have the file, get the exec header */
324 1.74 christos error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
325 1.223 ad UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
326 1.74 christos if (error)
327 1.55 cgd goto bad2;
328 1.55 cgd epp->ep_hdrvalid = epp->ep_hdrlen - resid;
329 1.55 cgd
330 1.55 cgd /*
331 1.136 eeh * Set up default address space limits. Can be overridden
332 1.136 eeh * by individual exec packages.
333 1.183 junyoung *
334 1.235 rillig * XXX probably should be all done in the exec packages.
335 1.136 eeh */
336 1.136 eeh epp->ep_vm_minaddr = VM_MIN_ADDRESS;
337 1.136 eeh epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
338 1.136 eeh /*
339 1.55 cgd * set up the vmcmds for creation of the process
340 1.55 cgd * address space
341 1.55 cgd */
342 1.55 cgd error = ENOEXEC;
343 1.244 dsl for (i = 0; i < nexecs; i++) {
344 1.68 cgd int newerror;
345 1.68 cgd
346 1.130 jdolecek epp->ep_esch = execsw[i];
347 1.212 christos newerror = (*execsw[i]->es_makecmds)(l, epp);
348 1.244 dsl
349 1.244 dsl if (!newerror) {
350 1.244 dsl /* Seems ok: check that entry point is sane */
351 1.244 dsl if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
352 1.244 dsl error = ENOEXEC;
353 1.244 dsl break;
354 1.244 dsl }
355 1.244 dsl
356 1.244 dsl /* check limits */
357 1.244 dsl if ((epp->ep_tsize > MAXTSIZ) ||
358 1.244 dsl (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
359 1.244 dsl [RLIMIT_DATA].rlim_cur)) {
360 1.244 dsl error = ENOMEM;
361 1.244 dsl break;
362 1.244 dsl }
363 1.244 dsl return 0;
364 1.244 dsl }
365 1.244 dsl
366 1.244 dsl if (epp->ep_emul_root != NULL) {
367 1.244 dsl vrele(epp->ep_emul_root);
368 1.244 dsl epp->ep_emul_root = NULL;
369 1.244 dsl }
370 1.244 dsl if (epp->ep_interp != NULL) {
371 1.244 dsl vrele(epp->ep_interp);
372 1.244 dsl epp->ep_interp = NULL;
373 1.244 dsl }
374 1.244 dsl
375 1.68 cgd /* make sure the first "interesting" error code is saved. */
376 1.244 dsl if (error == ENOEXEC)
377 1.68 cgd error = newerror;
378 1.124 jdolecek
379 1.244 dsl if (epp->ep_flags & EXEC_DESTR)
380 1.244 dsl /* Error from "#!" code, tidied up by recursive call */
381 1.55 cgd return error;
382 1.55 cgd }
383 1.55 cgd
384 1.249 pooka /* not found, error */
385 1.249 pooka
386 1.55 cgd /*
387 1.55 cgd * free any vmspace-creation commands,
388 1.55 cgd * and release their references
389 1.55 cgd */
390 1.55 cgd kill_vmcmds(&epp->ep_vmcmds);
391 1.55 cgd
392 1.55 cgd bad2:
393 1.55 cgd /*
394 1.99 wrstuden * close and release the vnode, restore the old one, free the
395 1.55 cgd * pathname buf, and punt.
396 1.55 cgd */
397 1.99 wrstuden vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
398 1.254 pooka VOP_CLOSE(vp, FREAD, l->l_cred);
399 1.99 wrstuden vput(vp);
400 1.120 thorpej PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
401 1.55 cgd return error;
402 1.55 cgd
403 1.55 cgd bad1:
404 1.55 cgd /*
405 1.55 cgd * free the namei pathname buffer, and put the vnode
406 1.55 cgd * (which we don't yet have open).
407 1.55 cgd */
408 1.77 cgd vput(vp); /* was still locked */
409 1.120 thorpej PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
410 1.55 cgd return error;
411 1.55 cgd }
412 1.55 cgd
413 1.188 chs #ifdef __MACHINE_STACK_GROWS_UP
414 1.188 chs #define STACK_PTHREADSPACE NBPG
415 1.188 chs #else
416 1.188 chs #define STACK_PTHREADSPACE 0
417 1.188 chs #endif
418 1.188 chs
419 1.204 cube static int
420 1.204 cube execve_fetch_element(char * const *array, size_t index, char **value)
421 1.204 cube {
422 1.204 cube return copyin(array + index, value, sizeof(*value));
423 1.204 cube }
424 1.204 cube
425 1.55 cgd /*
426 1.55 cgd * exec system call
427 1.55 cgd */
428 1.55 cgd /* ARGSUSED */
429 1.75 christos int
430 1.258 dsl sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
431 1.71 thorpej {
432 1.258 dsl /* {
433 1.138 lukem syscallarg(const char *) path;
434 1.138 lukem syscallarg(char * const *) argp;
435 1.138 lukem syscallarg(char * const *) envp;
436 1.258 dsl } */
437 1.204 cube
438 1.204 cube return execve1(l, SCARG(uap, path), SCARG(uap, argp),
439 1.204 cube SCARG(uap, envp), execve_fetch_element);
440 1.204 cube }
441 1.204 cube
442 1.282 ad /*
443 1.282 ad * Load modules to try and execute an image that we do not understand.
444 1.282 ad * If no execsw entries are present, we load those likely to be needed
445 1.282 ad * in order to run native images only. Otherwise, we autoload all
446 1.282 ad * possible modules that could let us run the binary. XXX lame
447 1.282 ad */
448 1.282 ad static void
449 1.282 ad exec_autoload(void)
450 1.282 ad {
451 1.282 ad #ifdef MODULAR
452 1.282 ad static const char * const native[] = {
453 1.282 ad "exec_elf32",
454 1.282 ad "exec_elf64",
455 1.282 ad "exec_script",
456 1.282 ad NULL
457 1.282 ad };
458 1.282 ad static const char * const compat[] = {
459 1.282 ad "exec_elf32",
460 1.282 ad "exec_elf64",
461 1.282 ad "exec_script",
462 1.282 ad "exec_aout",
463 1.282 ad "exec_coff",
464 1.282 ad "exec_ecoff",
465 1.282 ad "compat_aoutm68k",
466 1.282 ad "compat_freebsd",
467 1.282 ad "compat_ibcs2",
468 1.282 ad "compat_irix",
469 1.282 ad "compat_linux",
470 1.282 ad "compat_linux32",
471 1.282 ad "compat_netbsd32",
472 1.282 ad "compat_sunos",
473 1.282 ad "compat_sunos32",
474 1.282 ad "compat_svr4",
475 1.282 ad "compat_svr4_32",
476 1.282 ad "compat_ultrix",
477 1.282 ad NULL
478 1.282 ad };
479 1.282 ad char const * const *list;
480 1.282 ad int i;
481 1.282 ad
482 1.282 ad mutex_enter(&module_lock);
483 1.282 ad list = (nexecs == 0 ? native : compat);
484 1.282 ad for (i = 0; list[i] != NULL; i++) {
485 1.282 ad if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
486 1.282 ad continue;
487 1.282 ad }
488 1.282 ad mutex_exit(&module_lock);
489 1.282 ad yield();
490 1.282 ad mutex_enter(&module_lock);
491 1.282 ad }
492 1.282 ad mutex_exit(&module_lock);
493 1.282 ad #endif
494 1.282 ad }
495 1.282 ad
496 1.204 cube int
497 1.204 cube execve1(struct lwp *l, const char *path, char * const *args,
498 1.204 cube char * const *envs, execve_fetch_element_t fetch_element)
499 1.204 cube {
500 1.153 thorpej int error;
501 1.138 lukem struct exec_package pack;
502 1.138 lukem struct nameidata nid;
503 1.138 lukem struct vattr attr;
504 1.164 thorpej struct proc *p;
505 1.138 lukem char *argp;
506 1.138 lukem char *dp, *sp;
507 1.138 lukem long argc, envc;
508 1.248 christos size_t i, len;
509 1.138 lukem char *stack;
510 1.138 lukem struct ps_strings arginfo;
511 1.213 manu struct ps_strings *aip = &arginfo;
512 1.138 lukem struct vmspace *vm;
513 1.265 yamt struct exec_fakearg *tmpfap;
514 1.138 lukem int szsigcode;
515 1.138 lukem struct exec_vmcmd *base_vcp;
516 1.279 wrstuden int oldlwpflags;
517 1.237 ad ksiginfo_t ksi;
518 1.237 ad ksiginfoq_t kq;
519 1.260 christos char *pathbuf;
520 1.255 christos size_t pathbuflen;
521 1.282 ad u_int modgen;
522 1.55 cgd
523 1.237 ad p = l->l_proc;
524 1.282 ad modgen = 0;
525 1.164 thorpej
526 1.149 christos /*
527 1.269 christos * Check if we have exceeded our number of processes limit.
528 1.269 christos * This is so that we handle the case where a root daemon
529 1.269 christos * forked, ran setuid to become the desired user and is trying
530 1.269 christos * to exec. The obvious place to do the reference counting check
531 1.269 christos * is setuid(), but we don't do the reference counting check there
532 1.269 christos * like other OS's do because then all the programs that use setuid()
533 1.269 christos * must be modified to check the return code of setuid() and exit().
534 1.269 christos * It is dangerous to make setuid() fail, because it fails open and
535 1.269 christos * the program will continue to run as root. If we make it succeed
536 1.269 christos * and return an error code, again we are not enforcing the limit.
537 1.269 christos * The best place to enforce the limit is here, when the process tries
538 1.269 christos * to execute a new image, because eventually the process will need
539 1.269 christos * to call exec in order to do something useful.
540 1.269 christos */
541 1.282 ad retry:
542 1.269 christos if ((p->p_flag & PK_SUGID) &&
543 1.272 ad chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
544 1.269 christos p->p_rlimit[RLIMIT_NPROC].rlim_cur)
545 1.269 christos return EAGAIN;
546 1.269 christos
547 1.279 wrstuden oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
548 1.279 wrstuden if (l->l_flag & LW_SA) {
549 1.279 wrstuden lwp_lock(l);
550 1.279 wrstuden l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
551 1.279 wrstuden lwp_unlock(l);
552 1.279 wrstuden }
553 1.279 wrstuden
554 1.269 christos /*
555 1.237 ad * Drain existing references and forbid new ones. The process
556 1.237 ad * should be left alone until we're done here. This is necessary
557 1.237 ad * to avoid race conditions - e.g. in ptrace() - that might allow
558 1.237 ad * a local user to illicitly obtain elevated privileges.
559 1.237 ad */
560 1.252 ad rw_enter(&p->p_reflock, RW_WRITER);
561 1.149 christos
562 1.138 lukem base_vcp = NULL;
563 1.55 cgd /*
564 1.129 jdolecek * Init the namei data to point the file user's program name.
565 1.129 jdolecek * This is done here rather than in check_exec(), so that it's
566 1.129 jdolecek * possible to override this settings if any of makecmd/probe
567 1.129 jdolecek * functions call check_exec() recursively - for example,
568 1.129 jdolecek * see exec_script_makecmds().
569 1.129 jdolecek */
570 1.260 christos pathbuf = PNBUF_GET();
571 1.260 christos error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
572 1.248 christos if (error) {
573 1.248 christos DPRINTF(("execve: copyinstr path %d", error));
574 1.200 elad goto clrflg;
575 1.248 christos }
576 1.200 elad
577 1.257 pooka NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
578 1.55 cgd
579 1.55 cgd /*
580 1.55 cgd * initialize the fields of the exec package.
581 1.55 cgd */
582 1.204 cube pack.ep_name = path;
583 1.265 yamt pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
584 1.55 cgd pack.ep_hdrlen = exec_maxhdrsz;
585 1.55 cgd pack.ep_hdrvalid = 0;
586 1.55 cgd pack.ep_ndp = &nid;
587 1.67 christos pack.ep_emul_arg = NULL;
588 1.55 cgd pack.ep_vmcmds.evs_cnt = 0;
589 1.55 cgd pack.ep_vmcmds.evs_used = 0;
590 1.55 cgd pack.ep_vap = &attr;
591 1.55 cgd pack.ep_flags = 0;
592 1.244 dsl pack.ep_emul_root = NULL;
593 1.244 dsl pack.ep_interp = NULL;
594 1.244 dsl pack.ep_esch = NULL;
595 1.273 ad pack.ep_pax_flags = 0;
596 1.55 cgd
597 1.237 ad rw_enter(&exec_lock, RW_READER);
598 1.130 jdolecek
599 1.55 cgd /* see if we can run it. */
600 1.248 christos if ((error = check_exec(l, &pack)) != 0) {
601 1.261 xtraeme if (error != ENOENT) {
602 1.260 christos DPRINTF(("execve: check exec failed %d\n", error));
603 1.261 xtraeme }
604 1.55 cgd goto freehdr;
605 1.248 christos }
606 1.55 cgd
607 1.55 cgd /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
608 1.55 cgd
609 1.55 cgd /* allocate an argument buffer */
610 1.277 ad argp = pool_get(&exec_pool, PR_WAITOK);
611 1.277 ad KASSERT(argp != NULL);
612 1.55 cgd dp = argp;
613 1.55 cgd argc = 0;
614 1.55 cgd
615 1.55 cgd /* copy the fake args list, if there's one, freeing it as we go */
616 1.55 cgd if (pack.ep_flags & EXEC_HASARGL) {
617 1.55 cgd tmpfap = pack.ep_fa;
618 1.265 yamt while (tmpfap->fa_arg != NULL) {
619 1.265 yamt const char *cp;
620 1.55 cgd
621 1.265 yamt cp = tmpfap->fa_arg;
622 1.55 cgd while (*cp)
623 1.55 cgd *dp++ = *cp++;
624 1.276 ad *dp++ = '\0';
625 1.55 cgd
626 1.265 yamt kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
627 1.55 cgd tmpfap++; argc++;
628 1.55 cgd }
629 1.265 yamt kmem_free(pack.ep_fa, pack.ep_fa_len);
630 1.55 cgd pack.ep_flags &= ~EXEC_HASARGL;
631 1.55 cgd }
632 1.55 cgd
633 1.55 cgd /* Now get argv & environment */
634 1.204 cube if (args == NULL) {
635 1.248 christos DPRINTF(("execve: null args\n"));
636 1.55 cgd error = EINVAL;
637 1.55 cgd goto bad;
638 1.55 cgd }
639 1.204 cube /* 'i' will index the argp/envp element to be retrieved */
640 1.204 cube i = 0;
641 1.55 cgd if (pack.ep_flags & EXEC_SKIPARG)
642 1.204 cube i++;
643 1.55 cgd
644 1.55 cgd while (1) {
645 1.55 cgd len = argp + ARG_MAX - dp;
646 1.248 christos if ((error = (*fetch_element)(args, i, &sp)) != 0) {
647 1.248 christos DPRINTF(("execve: fetch_element args %d\n", error));
648 1.55 cgd goto bad;
649 1.248 christos }
650 1.55 cgd if (!sp)
651 1.55 cgd break;
652 1.74 christos if ((error = copyinstr(sp, dp, len, &len)) != 0) {
653 1.248 christos DPRINTF(("execve: copyinstr args %d\n", error));
654 1.55 cgd if (error == ENAMETOOLONG)
655 1.55 cgd error = E2BIG;
656 1.55 cgd goto bad;
657 1.55 cgd }
658 1.247 ad ktrexecarg(dp, len - 1);
659 1.55 cgd dp += len;
660 1.204 cube i++;
661 1.55 cgd argc++;
662 1.55 cgd }
663 1.55 cgd
664 1.55 cgd envc = 0;
665 1.74 christos /* environment need not be there */
666 1.204 cube if (envs != NULL) {
667 1.204 cube i = 0;
668 1.55 cgd while (1) {
669 1.55 cgd len = argp + ARG_MAX - dp;
670 1.248 christos if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
671 1.248 christos DPRINTF(("execve: fetch_element env %d\n", error));
672 1.55 cgd goto bad;
673 1.248 christos }
674 1.55 cgd if (!sp)
675 1.55 cgd break;
676 1.74 christos if ((error = copyinstr(sp, dp, len, &len)) != 0) {
677 1.248 christos DPRINTF(("execve: copyinstr env %d\n", error));
678 1.55 cgd if (error == ENAMETOOLONG)
679 1.55 cgd error = E2BIG;
680 1.55 cgd goto bad;
681 1.55 cgd }
682 1.247 ad ktrexecenv(dp, len - 1);
683 1.55 cgd dp += len;
684 1.204 cube i++;
685 1.55 cgd envc++;
686 1.55 cgd }
687 1.55 cgd }
688 1.61 mycroft
689 1.61 mycroft dp = (char *) ALIGN(dp);
690 1.55 cgd
691 1.244 dsl szsigcode = pack.ep_esch->es_emul->e_esigcode -
692 1.244 dsl pack.ep_esch->es_emul->e_sigcode;
693 1.65 fvdl
694 1.267 dsl #ifdef __MACHINE_STACK_GROWS_UP
695 1.267 dsl /* See big comment lower down */
696 1.267 dsl #define RTLD_GAP 32
697 1.267 dsl #else
698 1.267 dsl #define RTLD_GAP 0
699 1.267 dsl #endif
700 1.267 dsl
701 1.55 cgd /* Now check if args & environ fit into new stack */
702 1.105 eeh if (pack.ep_flags & EXEC_32)
703 1.244 dsl len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
704 1.267 dsl sizeof(int) + sizeof(int) + dp + RTLD_GAP +
705 1.188 chs szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
706 1.188 chs - argp;
707 1.105 eeh else
708 1.244 dsl len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
709 1.267 dsl sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
710 1.188 chs szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
711 1.188 chs - argp;
712 1.67 christos
713 1.262 elad #ifdef PAX_ASLR
714 1.262 elad if (pax_aslr_active(l))
715 1.262 elad len += (arc4random() % PAGE_SIZE);
716 1.262 elad #endif /* PAX_ASLR */
717 1.262 elad
718 1.243 matt #ifdef STACKLALIGN /* arm, etc. */
719 1.243 matt len = STACKALIGN(len); /* make the stack "safely" aligned */
720 1.243 matt #else
721 1.55 cgd len = ALIGN(len); /* make the stack "safely" aligned */
722 1.243 matt #endif
723 1.55 cgd
724 1.55 cgd if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
725 1.248 christos DPRINTF(("execve: stack limit exceeded %zu\n", len));
726 1.55 cgd error = ENOMEM;
727 1.55 cgd goto bad;
728 1.55 cgd }
729 1.55 cgd
730 1.237 ad /* Get rid of other LWPs. */
731 1.279 wrstuden if (p->p_sa || p->p_nlwps > 1) {
732 1.272 ad mutex_enter(p->p_lock);
733 1.237 ad exit_lwps(l);
734 1.272 ad mutex_exit(p->p_lock);
735 1.237 ad }
736 1.164 thorpej KDASSERT(p->p_nlwps == 1);
737 1.164 thorpej
738 1.253 ad /* Destroy any lwpctl info. */
739 1.253 ad if (p->p_lwpctl != NULL)
740 1.253 ad lwp_ctl_exit();
741 1.253 ad
742 1.164 thorpej /* This is now LWP 1 */
743 1.164 thorpej l->l_lid = 1;
744 1.164 thorpej p->p_nlwpid = 1;
745 1.164 thorpej
746 1.279 wrstuden #ifdef KERN_SA
747 1.279 wrstuden /* Release any SA state. */
748 1.279 wrstuden if (p->p_sa)
749 1.279 wrstuden sa_release(p);
750 1.279 wrstuden #endif /* KERN_SA */
751 1.279 wrstuden
752 1.164 thorpej /* Remove POSIX timers */
753 1.164 thorpej timers_free(p, TIMERS_POSIX);
754 1.164 thorpej
755 1.55 cgd /* adjust "active stack depth" for process VSZ */
756 1.55 cgd pack.ep_ssize = len; /* maybe should go elsewhere, but... */
757 1.55 cgd
758 1.86 thorpej /*
759 1.86 thorpej * Do whatever is necessary to prepare the address space
760 1.86 thorpej * for remapping. Note that this might replace the current
761 1.86 thorpej * vmspace with another!
762 1.86 thorpej */
763 1.164 thorpej uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
764 1.55 cgd
765 1.186 chs /* record proc's vnode, for use by procfs and others */
766 1.186 chs if (p->p_textvp)
767 1.186 chs vrele(p->p_textvp);
768 1.186 chs VREF(pack.ep_vp);
769 1.186 chs p->p_textvp = pack.ep_vp;
770 1.186 chs
771 1.55 cgd /* Now map address space */
772 1.86 thorpej vm = p->p_vmspace;
773 1.241 dogcow vm->vm_taddr = (void *)pack.ep_taddr;
774 1.55 cgd vm->vm_tsize = btoc(pack.ep_tsize);
775 1.241 dogcow vm->vm_daddr = (void*)pack.ep_daddr;
776 1.55 cgd vm->vm_dsize = btoc(pack.ep_dsize);
777 1.55 cgd vm->vm_ssize = btoc(pack.ep_ssize);
778 1.241 dogcow vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
779 1.241 dogcow vm->vm_minsaddr = (void *)pack.ep_minsaddr;
780 1.55 cgd
781 1.260 christos #ifdef PAX_ASLR
782 1.260 christos pax_aslr_init(l, vm);
783 1.260 christos #endif /* PAX_ASLR */
784 1.260 christos
785 1.55 cgd /* create the new process's VM space by running the vmcmds */
786 1.55 cgd #ifdef DIAGNOSTIC
787 1.55 cgd if (pack.ep_vmcmds.evs_used == 0)
788 1.55 cgd panic("execve: no vmcmds");
789 1.55 cgd #endif
790 1.55 cgd for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
791 1.55 cgd struct exec_vmcmd *vcp;
792 1.55 cgd
793 1.55 cgd vcp = &pack.ep_vmcmds.evs_cmds[i];
794 1.114 matt if (vcp->ev_flags & VMCMD_RELATIVE) {
795 1.114 matt #ifdef DIAGNOSTIC
796 1.114 matt if (base_vcp == NULL)
797 1.114 matt panic("execve: relative vmcmd with no base");
798 1.114 matt if (vcp->ev_flags & VMCMD_BASE)
799 1.114 matt panic("execve: illegal base & relative vmcmd");
800 1.114 matt #endif
801 1.114 matt vcp->ev_addr += base_vcp->ev_addr;
802 1.114 matt }
803 1.212 christos error = (*vcp->ev_proc)(l, vcp);
804 1.143 christos #ifdef DEBUG_EXEC
805 1.111 matt if (error) {
806 1.248 christos size_t j;
807 1.143 christos struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
808 1.143 christos for (j = 0; j <= i; j++)
809 1.143 christos uprintf(
810 1.248 christos "vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
811 1.143 christos j, vp[j].ev_addr, vp[j].ev_len,
812 1.143 christos vp[j].ev_offset, vp[j].ev_prot,
813 1.143 christos vp[j].ev_flags);
814 1.111 matt }
815 1.143 christos #endif /* DEBUG_EXEC */
816 1.114 matt if (vcp->ev_flags & VMCMD_BASE)
817 1.114 matt base_vcp = vcp;
818 1.55 cgd }
819 1.55 cgd
820 1.55 cgd /* free the vmspace-creation commands, and release their references */
821 1.55 cgd kill_vmcmds(&pack.ep_vmcmds);
822 1.55 cgd
823 1.186 chs vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
824 1.254 pooka VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
825 1.186 chs vput(pack.ep_vp);
826 1.186 chs
827 1.55 cgd /* if an error happened, deallocate and punt */
828 1.111 matt if (error) {
829 1.248 christos DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
830 1.55 cgd goto exec_abort;
831 1.111 matt }
832 1.55 cgd
833 1.55 cgd /* remember information about the process */
834 1.55 cgd arginfo.ps_nargvstr = argc;
835 1.55 cgd arginfo.ps_nenvstr = envc;
836 1.55 cgd
837 1.255 christos /* set command name & other accounting info */
838 1.255 christos i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
839 1.255 christos (void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
840 1.255 christos p->p_comm[i] = '\0';
841 1.255 christos
842 1.255 christos dp = PNBUF_GET();
843 1.255 christos /*
844 1.255 christos * If the path starts with /, we don't need to do any work.
845 1.255 christos * This handles the majority of the cases.
846 1.255 christos * In the future perhaps we could canonicalize it?
847 1.255 christos */
848 1.255 christos if (pathbuf[0] == '/')
849 1.255 christos (void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
850 1.255 christos #ifdef notyet
851 1.255 christos /*
852 1.255 christos * Although this works most of the time [since the entry was just
853 1.255 christos * entered in the cache] we don't use it because it theoretically
854 1.255 christos * can fail and it is not the cleanest interface, because there
855 1.255 christos * could be races. When the namei cache is re-written, this can
856 1.255 christos * be changed to use the appropriate function.
857 1.255 christos */
858 1.255 christos else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
859 1.255 christos pack.ep_path = dp;
860 1.255 christos #endif
861 1.255 christos else {
862 1.256 christos #ifdef notyet
863 1.255 christos printf("Cannot get path for pid %d [%s] (error %d)",
864 1.255 christos (int)p->p_pid, p->p_comm, error);
865 1.255 christos #endif
866 1.255 christos pack.ep_path = NULL;
867 1.255 christos PNBUF_PUT(dp);
868 1.255 christos }
869 1.255 christos
870 1.163 chs stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
871 1.188 chs STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
872 1.163 chs len - (sizeof(struct ps_strings) + szsigcode));
873 1.267 dsl
874 1.163 chs #ifdef __MACHINE_STACK_GROWS_UP
875 1.163 chs /*
876 1.163 chs * The copyargs call always copies into lower addresses
877 1.163 chs * first, moving towards higher addresses, starting with
878 1.183 junyoung * the stack pointer that we give. When the stack grows
879 1.183 junyoung * down, this puts argc/argv/envp very shallow on the
880 1.267 dsl * stack, right at the first user stack pointer.
881 1.267 dsl * When the stack grows up, the situation is reversed.
882 1.163 chs *
883 1.163 chs * Normally, this is no big deal. But the ld_elf.so _rtld()
884 1.183 junyoung * function expects to be called with a single pointer to
885 1.183 junyoung * a region that has a few words it can stash values into,
886 1.163 chs * followed by argc/argv/envp. When the stack grows down,
887 1.163 chs * it's easy to decrement the stack pointer a little bit to
888 1.163 chs * allocate the space for these few words and pass the new
889 1.163 chs * stack pointer to _rtld. When the stack grows up, however,
890 1.171 chs * a few words before argc is part of the signal trampoline, XXX
891 1.163 chs * so we have a problem.
892 1.163 chs *
893 1.183 junyoung * Instead of changing how _rtld works, we take the easy way
894 1.267 dsl * out and steal 32 bytes before we call copyargs.
895 1.267 dsl * This extra space was allowed for when 'len' was calculated.
896 1.163 chs */
897 1.267 dsl stack += RTLD_GAP;
898 1.163 chs #endif /* __MACHINE_STACK_GROWS_UP */
899 1.163 chs
900 1.55 cgd /* Now copy argc, args & environ to new stack */
901 1.244 dsl error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
902 1.255 christos if (pack.ep_path) {
903 1.255 christos PNBUF_PUT(pack.ep_path);
904 1.255 christos pack.ep_path = NULL;
905 1.255 christos }
906 1.144 christos if (error) {
907 1.144 christos DPRINTF(("execve: copyargs failed %d\n", error));
908 1.55 cgd goto exec_abort;
909 1.111 matt }
910 1.144 christos /* Move the stack back to original point */
911 1.163 chs stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
912 1.55 cgd
913 1.121 eeh /* fill process ps_strings info */
914 1.188 chs p->p_psstr = (struct ps_strings *)
915 1.188 chs STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
916 1.163 chs sizeof(struct ps_strings));
917 1.121 eeh p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
918 1.121 eeh p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
919 1.121 eeh p->p_psenv = offsetof(struct ps_strings, ps_envstr);
920 1.121 eeh p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
921 1.121 eeh
922 1.55 cgd /* copy out the process's ps_strings structure */
923 1.213 manu if ((error = copyout(aip, (char *)p->p_psstr,
924 1.144 christos sizeof(arginfo))) != 0) {
925 1.143 christos DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
926 1.213 manu aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
927 1.55 cgd goto exec_abort;
928 1.111 matt }
929 1.109 simonb
930 1.270 ad fd_closeexec(); /* handle close on exec */
931 1.55 cgd execsigs(p); /* reset catched signals */
932 1.183 junyoung
933 1.164 thorpej l->l_ctxlink = NULL; /* reset ucontext link */
934 1.55 cgd
935 1.255 christos
936 1.55 cgd p->p_acflag &= ~AFORK;
937 1.272 ad mutex_enter(p->p_lock);
938 1.238 pavel p->p_flag |= PK_EXEC;
939 1.272 ad mutex_exit(p->p_lock);
940 1.237 ad
941 1.237 ad /*
942 1.237 ad * Stop profiling.
943 1.237 ad */
944 1.237 ad if ((p->p_stflag & PST_PROFIL) != 0) {
945 1.237 ad mutex_spin_enter(&p->p_stmutex);
946 1.237 ad stopprofclock(p);
947 1.237 ad mutex_spin_exit(&p->p_stmutex);
948 1.237 ad }
949 1.237 ad
950 1.237 ad /*
951 1.275 ad * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
952 1.237 ad * exited and exec()/exit() are the only places it will be cleared.
953 1.237 ad */
954 1.275 ad if ((p->p_lflag & PL_PPWAIT) != 0) {
955 1.271 ad mutex_enter(proc_lock);
956 1.275 ad p->p_lflag &= ~PL_PPWAIT;
957 1.237 ad cv_broadcast(&p->p_pptr->p_waitcv);
958 1.271 ad mutex_exit(proc_lock);
959 1.55 cgd }
960 1.55 cgd
961 1.55 cgd /*
962 1.237 ad * Deal with set[ug]id. MNT_NOSUID has already been used to disable
963 1.237 ad * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
964 1.237 ad * out additional references on the process for the moment.
965 1.55 cgd */
966 1.237 ad if ((p->p_slflag & PSL_TRACED) == 0 &&
967 1.141 thorpej
968 1.141 thorpej (((attr.va_mode & S_ISUID) != 0 &&
969 1.221 ad kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
970 1.141 thorpej
971 1.141 thorpej ((attr.va_mode & S_ISGID) != 0 &&
972 1.221 ad kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
973 1.141 thorpej /*
974 1.141 thorpej * Mark the process as SUGID before we do
975 1.141 thorpej * anything that might block.
976 1.141 thorpej */
977 1.237 ad proc_crmod_enter();
978 1.240 thorpej proc_crmod_leave(NULL, NULL, true);
979 1.152 christos
980 1.152 christos /* Make sure file descriptors 0..2 are in use. */
981 1.270 ad if ((error = fd_checkstd()) != 0) {
982 1.209 christos DPRINTF(("execve: fdcheckstd failed %d\n", error));
983 1.152 christos goto exec_abort;
984 1.209 christos }
985 1.141 thorpej
986 1.220 ad /*
987 1.220 ad * Copy the credential so other references don't see our
988 1.220 ad * changes.
989 1.220 ad */
990 1.221 ad l->l_cred = kauth_cred_copy(l->l_cred);
991 1.55 cgd #ifdef KTRACE
992 1.55 cgd /*
993 1.268 elad * If the persistent trace flag isn't set, turn off.
994 1.55 cgd */
995 1.237 ad if (p->p_tracep) {
996 1.247 ad mutex_enter(&ktrace_lock);
997 1.268 elad if (!(p->p_traceflag & KTRFAC_PERSISTENT))
998 1.237 ad ktrderef(p);
999 1.247 ad mutex_exit(&ktrace_lock);
1000 1.237 ad }
1001 1.55 cgd #endif
1002 1.83 mycroft if (attr.va_mode & S_ISUID)
1003 1.221 ad kauth_cred_seteuid(l->l_cred, attr.va_uid);
1004 1.83 mycroft if (attr.va_mode & S_ISGID)
1005 1.221 ad kauth_cred_setegid(l->l_cred, attr.va_gid);
1006 1.210 christos } else {
1007 1.221 ad if (kauth_cred_geteuid(l->l_cred) ==
1008 1.221 ad kauth_cred_getuid(l->l_cred) &&
1009 1.221 ad kauth_cred_getegid(l->l_cred) ==
1010 1.221 ad kauth_cred_getgid(l->l_cred))
1011 1.238 pavel p->p_flag &= ~PK_SUGID;
1012 1.210 christos }
1013 1.220 ad
1014 1.220 ad /*
1015 1.220 ad * Copy the credential so other references don't see our changes.
1016 1.220 ad * Test to see if this is necessary first, since in the common case
1017 1.220 ad * we won't need a private reference.
1018 1.220 ad */
1019 1.221 ad if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1020 1.221 ad kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1021 1.221 ad l->l_cred = kauth_cred_copy(l->l_cred);
1022 1.221 ad kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1023 1.221 ad kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1024 1.220 ad }
1025 1.155 gmcgarry
1026 1.221 ad /* Update the master credentials. */
1027 1.227 ad if (l->l_cred != p->p_cred) {
1028 1.227 ad kauth_cred_t ocred;
1029 1.227 ad
1030 1.227 ad kauth_cred_hold(l->l_cred);
1031 1.272 ad mutex_enter(p->p_lock);
1032 1.227 ad ocred = p->p_cred;
1033 1.227 ad p->p_cred = l->l_cred;
1034 1.272 ad mutex_exit(p->p_lock);
1035 1.227 ad kauth_cred_free(ocred);
1036 1.227 ad }
1037 1.221 ad
1038 1.155 gmcgarry #if defined(__HAVE_RAS)
1039 1.155 gmcgarry /*
1040 1.155 gmcgarry * Remove all RASs from the address space.
1041 1.155 gmcgarry */
1042 1.251 ad ras_purgeall();
1043 1.155 gmcgarry #endif
1044 1.107 fvdl
1045 1.107 fvdl doexechooks(p);
1046 1.55 cgd
1047 1.55 cgd /* setup new registers and do misc. setup. */
1048 1.244 dsl (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
1049 1.244 dsl if (pack.ep_esch->es_setregs)
1050 1.244 dsl (*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
1051 1.55 cgd
1052 1.171 chs /* map the process's signal trampoline code */
1053 1.244 dsl if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
1054 1.209 christos DPRINTF(("execve: map sigcode failed %d\n", error));
1055 1.171 chs goto exec_abort;
1056 1.209 christos }
1057 1.171 chs
1058 1.277 ad pool_put(&exec_pool, argp);
1059 1.276 ad
1060 1.276 ad PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1061 1.276 ad
1062 1.276 ad /* notify others that we exec'd */
1063 1.276 ad KNOTE(&p->p_klist, NOTE_EXEC);
1064 1.276 ad
1065 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1066 1.122 jdolecek
1067 1.244 dsl /* The emulation root will usually have been found when we looked
1068 1.244 dsl * for the elf interpreter (or similar), if not look now. */
1069 1.244 dsl if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
1070 1.244 dsl emul_find_root(l, &pack);
1071 1.244 dsl
1072 1.244 dsl /* Any old emulation root got removed by fdcloseexec */
1073 1.259 ad rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1074 1.244 dsl p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1075 1.259 ad rw_exit(&p->p_cwdi->cwdi_lock);
1076 1.244 dsl pack.ep_emul_root = NULL;
1077 1.244 dsl if (pack.ep_interp != NULL)
1078 1.244 dsl vrele(pack.ep_interp);
1079 1.244 dsl
1080 1.122 jdolecek /*
1081 1.194 peter * Call emulation specific exec hook. This can setup per-process
1082 1.122 jdolecek * p->p_emuldata or do any other per-process stuff an emulation needs.
1083 1.122 jdolecek *
1084 1.122 jdolecek * If we are executing process of different emulation than the
1085 1.122 jdolecek * original forked process, call e_proc_exit() of the old emulation
1086 1.122 jdolecek * first, then e_proc_exec() of new emulation. If the emulation is
1087 1.122 jdolecek * same, the exec hook code should deallocate any old emulation
1088 1.122 jdolecek * resources held previously by this process.
1089 1.122 jdolecek */
1090 1.124 jdolecek if (p->p_emul && p->p_emul->e_proc_exit
1091 1.244 dsl && p->p_emul != pack.ep_esch->es_emul)
1092 1.122 jdolecek (*p->p_emul->e_proc_exit)(p);
1093 1.122 jdolecek
1094 1.123 jdolecek /*
1095 1.123 jdolecek * Call exec hook. Emulation code may NOT store reference to anything
1096 1.123 jdolecek * from &pack.
1097 1.123 jdolecek */
1098 1.244 dsl if (pack.ep_esch->es_emul->e_proc_exec)
1099 1.244 dsl (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1100 1.122 jdolecek
1101 1.122 jdolecek /* update p_emul, the old value is no longer needed */
1102 1.244 dsl p->p_emul = pack.ep_esch->es_emul;
1103 1.148 thorpej
1104 1.148 thorpej /* ...and the same for p_execsw */
1105 1.244 dsl p->p_execsw = pack.ep_esch;
1106 1.148 thorpej
1107 1.133 mycroft #ifdef __HAVE_SYSCALL_INTERN
1108 1.133 mycroft (*p->p_emul->e_syscall_intern)(p);
1109 1.133 mycroft #endif
1110 1.247 ad ktremul();
1111 1.85 mycroft
1112 1.252 ad /* Allow new references from the debugger/procfs. */
1113 1.252 ad rw_exit(&p->p_reflock);
1114 1.237 ad rw_exit(&exec_lock);
1115 1.162 manu
1116 1.271 ad mutex_enter(proc_lock);
1117 1.237 ad
1118 1.237 ad if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1119 1.237 ad KSI_INIT_EMPTY(&ksi);
1120 1.237 ad ksi.ksi_signo = SIGTRAP;
1121 1.237 ad ksi.ksi_lid = l->l_lid;
1122 1.237 ad kpsignal(p, &ksi, NULL);
1123 1.237 ad }
1124 1.162 manu
1125 1.237 ad if (p->p_sflag & PS_STOPEXEC) {
1126 1.237 ad KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1127 1.175 dsl p->p_pptr->p_nstopchild++;
1128 1.237 ad p->p_pptr->p_waited = 0;
1129 1.272 ad mutex_enter(p->p_lock);
1130 1.237 ad ksiginfo_queue_init(&kq);
1131 1.237 ad sigclearall(p, &contsigmask, &kq);
1132 1.237 ad lwp_lock(l);
1133 1.237 ad l->l_stat = LSSTOP;
1134 1.162 manu p->p_stat = SSTOP;
1135 1.164 thorpej p->p_nrlwps--;
1136 1.272 ad mutex_exit(p->p_lock);
1137 1.271 ad mutex_exit(proc_lock);
1138 1.245 yamt mi_switch(l);
1139 1.237 ad ksiginfo_queue_drain(&kq);
1140 1.237 ad KERNEL_LOCK(l->l_biglocks, l);
1141 1.237 ad } else {
1142 1.271 ad mutex_exit(proc_lock);
1143 1.162 manu }
1144 1.162 manu
1145 1.260 christos PNBUF_PUT(pathbuf);
1146 1.85 mycroft return (EJUSTRETURN);
1147 1.55 cgd
1148 1.138 lukem bad:
1149 1.55 cgd /* free the vmspace-creation commands, and release their references */
1150 1.55 cgd kill_vmcmds(&pack.ep_vmcmds);
1151 1.55 cgd /* kill any opened file descriptor, if necessary */
1152 1.55 cgd if (pack.ep_flags & EXEC_HASFD) {
1153 1.55 cgd pack.ep_flags &= ~EXEC_HASFD;
1154 1.270 ad fd_close(pack.ep_fd);
1155 1.55 cgd }
1156 1.55 cgd /* close and put the exec'd file */
1157 1.99 wrstuden vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1158 1.254 pooka VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1159 1.99 wrstuden vput(pack.ep_vp);
1160 1.120 thorpej PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1161 1.277 ad pool_put(&exec_pool, argp);
1162 1.55 cgd
1163 1.138 lukem freehdr:
1164 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1165 1.244 dsl if (pack.ep_emul_root != NULL)
1166 1.244 dsl vrele(pack.ep_emul_root);
1167 1.244 dsl if (pack.ep_interp != NULL)
1168 1.244 dsl vrele(pack.ep_interp);
1169 1.200 elad
1170 1.274 ad rw_exit(&exec_lock);
1171 1.274 ad
1172 1.200 elad clrflg:
1173 1.279 wrstuden lwp_lock(l);
1174 1.279 wrstuden l->l_flag |= oldlwpflags;
1175 1.279 wrstuden lwp_unlock(l);
1176 1.260 christos PNBUF_PUT(pathbuf);
1177 1.252 ad rw_exit(&p->p_reflock);
1178 1.130 jdolecek
1179 1.282 ad if (modgen != module_gen && error == ENOEXEC) {
1180 1.282 ad modgen = module_gen;
1181 1.282 ad exec_autoload();
1182 1.282 ad goto retry;
1183 1.282 ad }
1184 1.282 ad
1185 1.55 cgd return error;
1186 1.55 cgd
1187 1.138 lukem exec_abort:
1188 1.260 christos PNBUF_PUT(pathbuf);
1189 1.252 ad rw_exit(&p->p_reflock);
1190 1.237 ad rw_exit(&exec_lock);
1191 1.130 jdolecek
1192 1.55 cgd /*
1193 1.55 cgd * the old process doesn't exist anymore. exit gracefully.
1194 1.55 cgd * get rid of the (new) address space we have created, if any, get rid
1195 1.55 cgd * of our namei data and vnode, and exit noting failure
1196 1.55 cgd */
1197 1.88 mrg uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1198 1.88 mrg VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1199 1.73 mycroft if (pack.ep_emul_arg)
1200 1.124 jdolecek FREE(pack.ep_emul_arg, M_TEMP);
1201 1.120 thorpej PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1202 1.277 ad pool_put(&exec_pool, argp);
1203 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1204 1.244 dsl if (pack.ep_emul_root != NULL)
1205 1.244 dsl vrele(pack.ep_emul_root);
1206 1.244 dsl if (pack.ep_interp != NULL)
1207 1.244 dsl vrele(pack.ep_interp);
1208 1.237 ad
1209 1.252 ad /* Acquire the sched-state mutex (exit1() will release it). */
1210 1.272 ad mutex_enter(p->p_lock);
1211 1.164 thorpej exit1(l, W_EXITCODE(error, SIGABRT));
1212 1.55 cgd
1213 1.55 cgd /* NOTREACHED */
1214 1.55 cgd return 0;
1215 1.67 christos }
1216 1.67 christos
1217 1.67 christos
1218 1.144 christos int
1219 1.231 yamt copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1220 1.231 yamt char **stackp, void *argp)
1221 1.67 christos {
1222 1.138 lukem char **cpp, *dp, *sp;
1223 1.138 lukem size_t len;
1224 1.138 lukem void *nullp;
1225 1.138 lukem long argc, envc;
1226 1.144 christos int error;
1227 1.138 lukem
1228 1.144 christos cpp = (char **)*stackp;
1229 1.138 lukem nullp = NULL;
1230 1.138 lukem argc = arginfo->ps_nargvstr;
1231 1.138 lukem envc = arginfo->ps_nenvstr;
1232 1.144 christos if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
1233 1.144 christos return error;
1234 1.67 christos
1235 1.244 dsl dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1236 1.67 christos sp = argp;
1237 1.67 christos
1238 1.67 christos /* XXX don't copy them out, remap them! */
1239 1.69 mycroft arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1240 1.67 christos
1241 1.67 christos for (; --argc >= 0; sp += len, dp += len)
1242 1.144 christos if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1243 1.144 christos (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1244 1.144 christos return error;
1245 1.67 christos
1246 1.144 christos if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1247 1.144 christos return error;
1248 1.67 christos
1249 1.69 mycroft arginfo->ps_envstr = cpp; /* remember location of envp for later */
1250 1.67 christos
1251 1.67 christos for (; --envc >= 0; sp += len, dp += len)
1252 1.144 christos if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1253 1.144 christos (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1254 1.144 christos return error;
1255 1.67 christos
1256 1.144 christos if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1257 1.144 christos return error;
1258 1.67 christos
1259 1.144 christos *stackp = (char *)cpp;
1260 1.144 christos return 0;
1261 1.55 cgd }
1262 1.130 jdolecek
1263 1.130 jdolecek
1264 1.130 jdolecek /*
1265 1.282 ad * Add execsw[] entries.
1266 1.130 jdolecek */
1267 1.130 jdolecek int
1268 1.282 ad exec_add(struct execsw *esp, int count)
1269 1.130 jdolecek {
1270 1.282 ad struct exec_entry *it;
1271 1.282 ad int i;
1272 1.130 jdolecek
1273 1.282 ad KASSERT(count > 0);
1274 1.130 jdolecek
1275 1.282 ad /* Check for duplicates. */
1276 1.237 ad rw_enter(&exec_lock, RW_WRITER);
1277 1.282 ad for (i = 0; i < count; i++) {
1278 1.282 ad LIST_FOREACH(it, &ex_head, ex_list) {
1279 1.282 ad /* assume unique (makecmds, probe_func, emulation) */
1280 1.282 ad if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1281 1.282 ad it->ex_sw->u.elf_probe_func ==
1282 1.282 ad esp[i].u.elf_probe_func &&
1283 1.282 ad it->ex_sw->es_emul == esp[i].es_emul) {
1284 1.282 ad rw_exit(&exec_lock);
1285 1.282 ad return EEXIST;
1286 1.130 jdolecek }
1287 1.130 jdolecek }
1288 1.130 jdolecek }
1289 1.130 jdolecek
1290 1.282 ad /* Allocate new entries. */
1291 1.282 ad for (i = 0; i < count; i++) {
1292 1.282 ad it = kmem_alloc(sizeof(*it), KM_SLEEP);
1293 1.282 ad it->ex_sw = &esp[i];
1294 1.282 ad LIST_INSERT_HEAD(&ex_head, it, ex_list);
1295 1.130 jdolecek }
1296 1.130 jdolecek
1297 1.130 jdolecek /* update execsw[] */
1298 1.130 jdolecek exec_init(0);
1299 1.237 ad rw_exit(&exec_lock);
1300 1.282 ad return 0;
1301 1.130 jdolecek }
1302 1.130 jdolecek
1303 1.130 jdolecek /*
1304 1.130 jdolecek * Remove execsw[] entry.
1305 1.130 jdolecek */
1306 1.130 jdolecek int
1307 1.282 ad exec_remove(struct execsw *esp, int count)
1308 1.130 jdolecek {
1309 1.282 ad struct exec_entry *it, *next;
1310 1.282 ad int i;
1311 1.282 ad const struct proclist_desc *pd;
1312 1.282 ad proc_t *p;
1313 1.282 ad
1314 1.282 ad KASSERT(count > 0);
1315 1.130 jdolecek
1316 1.282 ad /* Abort if any are busy. */
1317 1.237 ad rw_enter(&exec_lock, RW_WRITER);
1318 1.282 ad for (i = 0; i < count; i++) {
1319 1.282 ad mutex_enter(proc_lock);
1320 1.282 ad for (pd = proclists; pd->pd_list != NULL; pd++) {
1321 1.282 ad PROCLIST_FOREACH(p, pd->pd_list) {
1322 1.282 ad if (p->p_execsw == &esp[i]) {
1323 1.282 ad mutex_exit(proc_lock);
1324 1.282 ad rw_exit(&exec_lock);
1325 1.282 ad return EBUSY;
1326 1.282 ad }
1327 1.282 ad }
1328 1.282 ad }
1329 1.282 ad mutex_exit(proc_lock);
1330 1.282 ad }
1331 1.130 jdolecek
1332 1.282 ad /* None are busy, so remove them all. */
1333 1.282 ad for (i = 0; i < count; i++) {
1334 1.282 ad for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1335 1.282 ad next = LIST_NEXT(it, ex_list);
1336 1.282 ad if (it->ex_sw == &esp[i]) {
1337 1.282 ad LIST_REMOVE(it, ex_list);
1338 1.282 ad kmem_free(it, sizeof(*it));
1339 1.282 ad break;
1340 1.282 ad }
1341 1.282 ad }
1342 1.130 jdolecek }
1343 1.130 jdolecek
1344 1.130 jdolecek /* update execsw[] */
1345 1.130 jdolecek exec_init(0);
1346 1.237 ad rw_exit(&exec_lock);
1347 1.282 ad return 0;
1348 1.130 jdolecek }
1349 1.130 jdolecek
1350 1.130 jdolecek /*
1351 1.130 jdolecek * Initialize exec structures. If init_boot is true, also does necessary
1352 1.130 jdolecek * one-time initialization (it's called from main() that way).
1353 1.147 jdolecek * Once system is multiuser, this should be called with exec_lock held,
1354 1.130 jdolecek * i.e. via exec_{add|remove}().
1355 1.130 jdolecek */
1356 1.130 jdolecek int
1357 1.138 lukem exec_init(int init_boot)
1358 1.130 jdolecek {
1359 1.282 ad const struct execsw **sw;
1360 1.282 ad struct exec_entry *ex;
1361 1.282 ad SLIST_HEAD(,exec_entry) first;
1362 1.282 ad SLIST_HEAD(,exec_entry) any;
1363 1.282 ad SLIST_HEAD(,exec_entry) last;
1364 1.282 ad int i, sz;
1365 1.130 jdolecek
1366 1.130 jdolecek if (init_boot) {
1367 1.130 jdolecek /* do one-time initializations */
1368 1.237 ad rw_init(&exec_lock);
1369 1.259 ad mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1370 1.277 ad pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1371 1.277 ad "execargs", &exec_palloc, IPL_NONE);
1372 1.277 ad pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1373 1.282 ad } else {
1374 1.282 ad KASSERT(rw_write_held(&exec_lock));
1375 1.282 ad }
1376 1.130 jdolecek
1377 1.282 ad /* Sort each entry onto the appropriate queue. */
1378 1.282 ad SLIST_INIT(&first);
1379 1.282 ad SLIST_INIT(&any);
1380 1.282 ad SLIST_INIT(&last);
1381 1.282 ad sz = 0;
1382 1.282 ad LIST_FOREACH(ex, &ex_head, ex_list) {
1383 1.282 ad switch(ex->ex_sw->es_prio) {
1384 1.282 ad case EXECSW_PRIO_FIRST:
1385 1.282 ad SLIST_INSERT_HEAD(&first, ex, ex_slist);
1386 1.282 ad break;
1387 1.282 ad case EXECSW_PRIO_ANY:
1388 1.282 ad SLIST_INSERT_HEAD(&any, ex, ex_slist);
1389 1.282 ad break;
1390 1.282 ad case EXECSW_PRIO_LAST:
1391 1.282 ad SLIST_INSERT_HEAD(&last, ex, ex_slist);
1392 1.282 ad break;
1393 1.282 ad default:
1394 1.282 ad panic("exec_init");
1395 1.282 ad break;
1396 1.130 jdolecek }
1397 1.282 ad sz++;
1398 1.130 jdolecek }
1399 1.130 jdolecek
1400 1.130 jdolecek /*
1401 1.282 ad * Create new execsw[]. Ensure we do not try a zero-sized
1402 1.282 ad * allocation.
1403 1.130 jdolecek */
1404 1.282 ad sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1405 1.282 ad i = 0;
1406 1.282 ad SLIST_FOREACH(ex, &first, ex_slist) {
1407 1.282 ad sw[i++] = ex->ex_sw;
1408 1.282 ad }
1409 1.282 ad SLIST_FOREACH(ex, &any, ex_slist) {
1410 1.282 ad sw[i++] = ex->ex_sw;
1411 1.282 ad }
1412 1.282 ad SLIST_FOREACH(ex, &last, ex_slist) {
1413 1.282 ad sw[i++] = ex->ex_sw;
1414 1.130 jdolecek }
1415 1.183 junyoung
1416 1.282 ad /* Replace old execsw[] and free used memory. */
1417 1.282 ad if (execsw != NULL) {
1418 1.282 ad kmem_free(__UNCONST(execsw),
1419 1.282 ad nexecs * sizeof(struct execsw *) + 1);
1420 1.130 jdolecek }
1421 1.282 ad execsw = sw;
1422 1.282 ad nexecs = sz;
1423 1.130 jdolecek
1424 1.282 ad /* Figure out the maximum size of an exec header. */
1425 1.282 ad exec_maxhdrsz = sizeof(int);
1426 1.130 jdolecek for (i = 0; i < nexecs; i++) {
1427 1.130 jdolecek if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1428 1.130 jdolecek exec_maxhdrsz = execsw[i]->es_hdrsz;
1429 1.130 jdolecek }
1430 1.130 jdolecek
1431 1.130 jdolecek return 0;
1432 1.130 jdolecek }
1433 1.171 chs
1434 1.171 chs static int
1435 1.171 chs exec_sigcode_map(struct proc *p, const struct emul *e)
1436 1.171 chs {
1437 1.171 chs vaddr_t va;
1438 1.171 chs vsize_t sz;
1439 1.171 chs int error;
1440 1.171 chs struct uvm_object *uobj;
1441 1.171 chs
1442 1.184 drochner sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1443 1.184 drochner
1444 1.184 drochner if (e->e_sigobject == NULL || sz == 0) {
1445 1.171 chs return 0;
1446 1.171 chs }
1447 1.171 chs
1448 1.171 chs /*
1449 1.171 chs * If we don't have a sigobject for this emulation, create one.
1450 1.171 chs *
1451 1.171 chs * sigobject is an anonymous memory object (just like SYSV shared
1452 1.171 chs * memory) that we keep a permanent reference to and that we map
1453 1.171 chs * in all processes that need this sigcode. The creation is simple,
1454 1.171 chs * we create an object, add a permanent reference to it, map it in
1455 1.171 chs * kernel space, copy out the sigcode to it and unmap it.
1456 1.189 jdolecek * We map it with PROT_READ|PROT_EXEC into the process just
1457 1.189 jdolecek * the way sys_mmap() would map it.
1458 1.171 chs */
1459 1.171 chs
1460 1.171 chs uobj = *e->e_sigobject;
1461 1.171 chs if (uobj == NULL) {
1462 1.259 ad mutex_enter(&sigobject_lock);
1463 1.259 ad if ((uobj = *e->e_sigobject) == NULL) {
1464 1.259 ad uobj = uao_create(sz, 0);
1465 1.259 ad (*uobj->pgops->pgo_reference)(uobj);
1466 1.259 ad va = vm_map_min(kernel_map);
1467 1.259 ad if ((error = uvm_map(kernel_map, &va, round_page(sz),
1468 1.259 ad uobj, 0, 0,
1469 1.259 ad UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1470 1.259 ad UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1471 1.259 ad printf("kernel mapping failed %d\n", error);
1472 1.259 ad (*uobj->pgops->pgo_detach)(uobj);
1473 1.259 ad mutex_exit(&sigobject_lock);
1474 1.259 ad return (error);
1475 1.259 ad }
1476 1.259 ad memcpy((void *)va, e->e_sigcode, sz);
1477 1.171 chs #ifdef PMAP_NEED_PROCWR
1478 1.259 ad pmap_procwr(&proc0, va, sz);
1479 1.171 chs #endif
1480 1.259 ad uvm_unmap(kernel_map, va, va + round_page(sz));
1481 1.259 ad *e->e_sigobject = uobj;
1482 1.259 ad }
1483 1.259 ad mutex_exit(&sigobject_lock);
1484 1.171 chs }
1485 1.171 chs
1486 1.172 enami /* Just a hint to uvm_map where to put it. */
1487 1.195 fvdl va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1488 1.195 fvdl round_page(sz));
1489 1.187 chs
1490 1.187 chs #ifdef __alpha__
1491 1.187 chs /*
1492 1.187 chs * Tru64 puts /sbin/loader at the end of user virtual memory,
1493 1.187 chs * which causes the above calculation to put the sigcode at
1494 1.187 chs * an invalid address. Put it just below the text instead.
1495 1.187 chs */
1496 1.193 jmc if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1497 1.187 chs va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1498 1.187 chs }
1499 1.187 chs #endif
1500 1.187 chs
1501 1.171 chs (*uobj->pgops->pgo_reference)(uobj);
1502 1.171 chs error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1503 1.171 chs uobj, 0, 0,
1504 1.171 chs UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1505 1.171 chs UVM_ADV_RANDOM, 0));
1506 1.171 chs if (error) {
1507 1.171 chs (*uobj->pgops->pgo_detach)(uobj);
1508 1.171 chs return (error);
1509 1.171 chs }
1510 1.171 chs p->p_sigctx.ps_sigcode = (void *)va;
1511 1.171 chs return (0);
1512 1.171 chs }
1513