kern_exec.c revision 1.288 1 1.288 mrg /* $NetBSD: kern_exec.c,v 1.288 2009/03/29 01:02:50 mrg Exp $ */
2 1.277 ad
3 1.277 ad /*-
4 1.277 ad * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 1.277 ad * All rights reserved.
6 1.277 ad *
7 1.277 ad * Redistribution and use in source and binary forms, with or without
8 1.277 ad * modification, are permitted provided that the following conditions
9 1.277 ad * are met:
10 1.277 ad * 1. Redistributions of source code must retain the above copyright
11 1.277 ad * notice, this list of conditions and the following disclaimer.
12 1.277 ad * 2. Redistributions in binary form must reproduce the above copyright
13 1.277 ad * notice, this list of conditions and the following disclaimer in the
14 1.277 ad * documentation and/or other materials provided with the distribution.
15 1.277 ad *
16 1.277 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.277 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.277 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.277 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.277 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.277 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.277 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.277 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.277 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.277 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.277 ad * POSSIBILITY OF SUCH DAMAGE.
27 1.277 ad */
28 1.55 cgd
29 1.55 cgd /*-
30 1.77 cgd * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
31 1.55 cgd * Copyright (C) 1992 Wolfgang Solfrank.
32 1.55 cgd * Copyright (C) 1992 TooLs GmbH.
33 1.55 cgd * All rights reserved.
34 1.55 cgd *
35 1.55 cgd * Redistribution and use in source and binary forms, with or without
36 1.55 cgd * modification, are permitted provided that the following conditions
37 1.55 cgd * are met:
38 1.55 cgd * 1. Redistributions of source code must retain the above copyright
39 1.55 cgd * notice, this list of conditions and the following disclaimer.
40 1.55 cgd * 2. Redistributions in binary form must reproduce the above copyright
41 1.55 cgd * notice, this list of conditions and the following disclaimer in the
42 1.55 cgd * documentation and/or other materials provided with the distribution.
43 1.55 cgd * 3. All advertising materials mentioning features or use of this software
44 1.55 cgd * must display the following acknowledgement:
45 1.55 cgd * This product includes software developed by TooLs GmbH.
46 1.55 cgd * 4. The name of TooLs GmbH may not be used to endorse or promote products
47 1.55 cgd * derived from this software without specific prior written permission.
48 1.55 cgd *
49 1.55 cgd * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
50 1.55 cgd * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
51 1.55 cgd * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
52 1.55 cgd * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
53 1.55 cgd * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
54 1.55 cgd * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
55 1.55 cgd * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
56 1.55 cgd * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
57 1.55 cgd * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
58 1.55 cgd * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
59 1.55 cgd */
60 1.146 lukem
61 1.146 lukem #include <sys/cdefs.h>
62 1.288 mrg __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.288 2009/03/29 01:02:50 mrg Exp $");
63 1.89 mrg
64 1.92 thorpej #include "opt_ktrace.h"
65 1.285 apb #include "opt_modular.h"
66 1.124 jdolecek #include "opt_syscall_debug.h"
67 1.226 dogcow #include "veriexec.h"
68 1.232 elad #include "opt_pax.h"
69 1.279 wrstuden #include "opt_sa.h"
70 1.55 cgd
71 1.55 cgd #include <sys/param.h>
72 1.55 cgd #include <sys/systm.h>
73 1.55 cgd #include <sys/filedesc.h>
74 1.55 cgd #include <sys/kernel.h>
75 1.55 cgd #include <sys/proc.h>
76 1.55 cgd #include <sys/mount.h>
77 1.55 cgd #include <sys/malloc.h>
78 1.265 yamt #include <sys/kmem.h>
79 1.55 cgd #include <sys/namei.h>
80 1.55 cgd #include <sys/vnode.h>
81 1.55 cgd #include <sys/file.h>
82 1.55 cgd #include <sys/acct.h>
83 1.55 cgd #include <sys/exec.h>
84 1.55 cgd #include <sys/ktrace.h>
85 1.278 pooka #include <sys/uidinfo.h>
86 1.55 cgd #include <sys/wait.h>
87 1.55 cgd #include <sys/mman.h>
88 1.155 gmcgarry #include <sys/ras.h>
89 1.55 cgd #include <sys/signalvar.h>
90 1.55 cgd #include <sys/stat.h>
91 1.124 jdolecek #include <sys/syscall.h>
92 1.218 elad #include <sys/kauth.h>
93 1.253 ad #include <sys/lwpctl.h>
94 1.260 christos #include <sys/pax.h>
95 1.263 ad #include <sys/cpu.h>
96 1.282 ad #include <sys/module.h>
97 1.279 wrstuden #include <sys/sa.h>
98 1.279 wrstuden #include <sys/savar.h>
99 1.56 cgd #include <sys/syscallargs.h>
100 1.222 elad #if NVERIEXEC > 0
101 1.197 blymn #include <sys/verified_exec.h>
102 1.222 elad #endif /* NVERIEXEC > 0 */
103 1.55 cgd
104 1.88 mrg #include <uvm/uvm_extern.h>
105 1.88 mrg
106 1.55 cgd #include <machine/reg.h>
107 1.55 cgd
108 1.244 dsl #include <compat/common/compat_util.h>
109 1.244 dsl
110 1.171 chs static int exec_sigcode_map(struct proc *, const struct emul *);
111 1.171 chs
112 1.143 christos #ifdef DEBUG_EXEC
113 1.143 christos #define DPRINTF(a) uprintf a
114 1.143 christos #else
115 1.143 christos #define DPRINTF(a)
116 1.143 christos #endif /* DEBUG_EXEC */
117 1.165 thorpej
118 1.130 jdolecek /*
119 1.130 jdolecek * Exec function switch:
120 1.130 jdolecek *
121 1.130 jdolecek * Note that each makecmds function is responsible for loading the
122 1.130 jdolecek * exec package with the necessary functions for any exec-type-specific
123 1.130 jdolecek * handling.
124 1.130 jdolecek *
125 1.130 jdolecek * Functions for specific exec types should be defined in their own
126 1.130 jdolecek * header file.
127 1.130 jdolecek */
128 1.138 lukem static const struct execsw **execsw = NULL;
129 1.138 lukem static int nexecs;
130 1.138 lukem
131 1.282 ad u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
132 1.130 jdolecek
133 1.130 jdolecek /* list of dynamically loaded execsw entries */
134 1.282 ad static LIST_HEAD(execlist_head, exec_entry) ex_head =
135 1.282 ad LIST_HEAD_INITIALIZER(ex_head);
136 1.130 jdolecek struct exec_entry {
137 1.138 lukem LIST_ENTRY(exec_entry) ex_list;
138 1.282 ad SLIST_ENTRY(exec_entry) ex_slist;
139 1.282 ad const struct execsw *ex_sw;
140 1.130 jdolecek };
141 1.130 jdolecek
142 1.124 jdolecek #ifdef SYSCALL_DEBUG
143 1.124 jdolecek extern const char * const syscallnames[];
144 1.124 jdolecek #endif
145 1.124 jdolecek
146 1.203 christos #ifndef __HAVE_SYSCALL_INTERN
147 1.203 christos void syscall(void);
148 1.203 christos #endif
149 1.203 christos
150 1.280 matt #ifdef KERN_SA
151 1.282 ad static struct sa_emul saemul_netbsd = {
152 1.279 wrstuden sizeof(ucontext_t),
153 1.279 wrstuden sizeof(struct sa_t),
154 1.279 wrstuden sizeof(struct sa_t *),
155 1.279 wrstuden NULL,
156 1.279 wrstuden NULL,
157 1.279 wrstuden cpu_upcall,
158 1.279 wrstuden (void (*)(struct lwp *, void *))getucontext_sa,
159 1.279 wrstuden sa_ucsp
160 1.279 wrstuden };
161 1.280 matt #endif /* KERN_SA */
162 1.279 wrstuden
163 1.173 christos /* NetBSD emul struct */
164 1.282 ad struct emul emul_netbsd = {
165 1.124 jdolecek "netbsd",
166 1.127 jdolecek NULL, /* emulation path */
167 1.133 mycroft #ifndef __HAVE_MINIMAL_EMUL
168 1.140 manu EMUL_HAS_SYS___syscall,
169 1.124 jdolecek NULL,
170 1.124 jdolecek SYS_syscall,
171 1.161 jdolecek SYS_NSYSENT,
172 1.133 mycroft #endif
173 1.124 jdolecek sysent,
174 1.124 jdolecek #ifdef SYSCALL_DEBUG
175 1.124 jdolecek syscallnames,
176 1.124 jdolecek #else
177 1.124 jdolecek NULL,
178 1.124 jdolecek #endif
179 1.133 mycroft sendsig,
180 1.142 christos trapsignal,
181 1.180 fvdl NULL,
182 1.173 christos NULL,
183 1.173 christos NULL,
184 1.173 christos NULL,
185 1.145 jdolecek setregs,
186 1.128 jdolecek NULL,
187 1.128 jdolecek NULL,
188 1.128 jdolecek NULL,
189 1.179 manu NULL,
190 1.179 manu NULL,
191 1.133 mycroft #ifdef __HAVE_SYSCALL_INTERN
192 1.133 mycroft syscall_intern,
193 1.133 mycroft #else
194 1.133 mycroft syscall,
195 1.133 mycroft #endif
196 1.156 manu NULL,
197 1.156 manu NULL,
198 1.195 fvdl
199 1.195 fvdl uvm_default_mapaddr,
200 1.216 cube NULL,
201 1.280 matt #ifdef KERN_SA
202 1.279 wrstuden &saemul_netbsd,
203 1.280 matt #else
204 1.280 matt NULL,
205 1.280 matt #endif
206 1.237 ad sizeof(ucontext_t),
207 1.239 cube startlwp,
208 1.124 jdolecek };
209 1.124 jdolecek
210 1.55 cgd /*
211 1.130 jdolecek * Exec lock. Used to control access to execsw[] structures.
212 1.130 jdolecek * This must not be static so that netbsd32 can access it, too.
213 1.130 jdolecek */
214 1.237 ad krwlock_t exec_lock;
215 1.183 junyoung
216 1.259 ad static kmutex_t sigobject_lock;
217 1.259 ad
218 1.277 ad static void *
219 1.277 ad exec_pool_alloc(struct pool *pp, int flags)
220 1.277 ad {
221 1.277 ad
222 1.277 ad return (void *)uvm_km_alloc(kernel_map, NCARGS, 0,
223 1.277 ad UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
224 1.277 ad }
225 1.277 ad
226 1.277 ad static void
227 1.277 ad exec_pool_free(struct pool *pp, void *addr)
228 1.277 ad {
229 1.277 ad
230 1.277 ad uvm_km_free(kernel_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
231 1.277 ad }
232 1.277 ad
233 1.277 ad static struct pool exec_pool;
234 1.277 ad
235 1.277 ad static struct pool_allocator exec_palloc = {
236 1.277 ad .pa_alloc = exec_pool_alloc,
237 1.277 ad .pa_free = exec_pool_free,
238 1.277 ad .pa_pagesz = NCARGS
239 1.277 ad };
240 1.277 ad
241 1.130 jdolecek /*
242 1.55 cgd * check exec:
243 1.55 cgd * given an "executable" described in the exec package's namei info,
244 1.55 cgd * see what we can do with it.
245 1.55 cgd *
246 1.55 cgd * ON ENTRY:
247 1.55 cgd * exec package with appropriate namei info
248 1.212 christos * lwp pointer of exec'ing lwp
249 1.55 cgd * NO SELF-LOCKED VNODES
250 1.55 cgd *
251 1.55 cgd * ON EXIT:
252 1.55 cgd * error: nothing held, etc. exec header still allocated.
253 1.77 cgd * ok: filled exec package, executable's vnode (unlocked).
254 1.55 cgd *
255 1.55 cgd * EXEC SWITCH ENTRY:
256 1.55 cgd * Locked vnode to check, exec package, proc.
257 1.55 cgd *
258 1.55 cgd * EXEC SWITCH EXIT:
259 1.77 cgd * ok: return 0, filled exec package, executable's vnode (unlocked).
260 1.55 cgd * error: destructive:
261 1.55 cgd * everything deallocated execept exec header.
262 1.76 cgd * non-destructive:
263 1.77 cgd * error code, executable's vnode (unlocked),
264 1.76 cgd * exec header unmodified.
265 1.55 cgd */
266 1.55 cgd int
267 1.205 christos /*ARGSUSED*/
268 1.233 elad check_exec(struct lwp *l, struct exec_package *epp)
269 1.55 cgd {
270 1.138 lukem int error, i;
271 1.138 lukem struct vnode *vp;
272 1.55 cgd struct nameidata *ndp;
273 1.138 lukem size_t resid;
274 1.55 cgd
275 1.55 cgd ndp = epp->ep_ndp;
276 1.55 cgd ndp->ni_cnd.cn_nameiop = LOOKUP;
277 1.244 dsl ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME | TRYEMULROOT;
278 1.55 cgd /* first get the vnode */
279 1.74 christos if ((error = namei(ndp)) != 0)
280 1.55 cgd return error;
281 1.55 cgd epp->ep_vp = vp = ndp->ni_vp;
282 1.55 cgd
283 1.84 mycroft /* check access and type */
284 1.55 cgd if (vp->v_type != VREG) {
285 1.81 kleink error = EACCES;
286 1.55 cgd goto bad1;
287 1.55 cgd }
288 1.254 pooka if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
289 1.84 mycroft goto bad1;
290 1.55 cgd
291 1.55 cgd /* get attributes */
292 1.254 pooka if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
293 1.55 cgd goto bad1;
294 1.55 cgd
295 1.55 cgd /* Check mount point */
296 1.55 cgd if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
297 1.55 cgd error = EACCES;
298 1.55 cgd goto bad1;
299 1.55 cgd }
300 1.141 thorpej if (vp->v_mount->mnt_flag & MNT_NOSUID)
301 1.83 mycroft epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
302 1.55 cgd
303 1.55 cgd /* try to open it */
304 1.254 pooka if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
305 1.55 cgd goto bad1;
306 1.55 cgd
307 1.99 wrstuden /* unlock vp, since we need it unlocked from here on out. */
308 1.90 fvdl VOP_UNLOCK(vp, 0);
309 1.77 cgd
310 1.222 elad #if NVERIEXEC > 0
311 1.236 elad error = veriexec_verify(l, vp, ndp->ni_cnd.cn_pnbuf,
312 1.233 elad epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
313 1.236 elad NULL);
314 1.236 elad if (error)
315 1.234 elad goto bad2;
316 1.222 elad #endif /* NVERIEXEC > 0 */
317 1.160 blymn
318 1.232 elad #ifdef PAX_SEGVGUARD
319 1.240 thorpej error = pax_segvguard(l, vp, ndp->ni_cnd.cn_pnbuf, false);
320 1.234 elad if (error)
321 1.234 elad goto bad2;
322 1.232 elad #endif /* PAX_SEGVGUARD */
323 1.232 elad
324 1.55 cgd /* now we have the file, get the exec header */
325 1.74 christos error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
326 1.223 ad UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
327 1.74 christos if (error)
328 1.55 cgd goto bad2;
329 1.55 cgd epp->ep_hdrvalid = epp->ep_hdrlen - resid;
330 1.55 cgd
331 1.55 cgd /*
332 1.136 eeh * Set up default address space limits. Can be overridden
333 1.136 eeh * by individual exec packages.
334 1.183 junyoung *
335 1.235 rillig * XXX probably should be all done in the exec packages.
336 1.136 eeh */
337 1.136 eeh epp->ep_vm_minaddr = VM_MIN_ADDRESS;
338 1.136 eeh epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
339 1.136 eeh /*
340 1.55 cgd * set up the vmcmds for creation of the process
341 1.55 cgd * address space
342 1.55 cgd */
343 1.55 cgd error = ENOEXEC;
344 1.244 dsl for (i = 0; i < nexecs; i++) {
345 1.68 cgd int newerror;
346 1.68 cgd
347 1.130 jdolecek epp->ep_esch = execsw[i];
348 1.212 christos newerror = (*execsw[i]->es_makecmds)(l, epp);
349 1.244 dsl
350 1.244 dsl if (!newerror) {
351 1.244 dsl /* Seems ok: check that entry point is sane */
352 1.244 dsl if (epp->ep_entry > VM_MAXUSER_ADDRESS) {
353 1.244 dsl error = ENOEXEC;
354 1.244 dsl break;
355 1.244 dsl }
356 1.244 dsl
357 1.244 dsl /* check limits */
358 1.244 dsl if ((epp->ep_tsize > MAXTSIZ) ||
359 1.244 dsl (epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
360 1.244 dsl [RLIMIT_DATA].rlim_cur)) {
361 1.244 dsl error = ENOMEM;
362 1.244 dsl break;
363 1.244 dsl }
364 1.244 dsl return 0;
365 1.244 dsl }
366 1.244 dsl
367 1.244 dsl if (epp->ep_emul_root != NULL) {
368 1.244 dsl vrele(epp->ep_emul_root);
369 1.244 dsl epp->ep_emul_root = NULL;
370 1.244 dsl }
371 1.244 dsl if (epp->ep_interp != NULL) {
372 1.244 dsl vrele(epp->ep_interp);
373 1.244 dsl epp->ep_interp = NULL;
374 1.244 dsl }
375 1.244 dsl
376 1.68 cgd /* make sure the first "interesting" error code is saved. */
377 1.244 dsl if (error == ENOEXEC)
378 1.68 cgd error = newerror;
379 1.124 jdolecek
380 1.244 dsl if (epp->ep_flags & EXEC_DESTR)
381 1.244 dsl /* Error from "#!" code, tidied up by recursive call */
382 1.55 cgd return error;
383 1.55 cgd }
384 1.55 cgd
385 1.249 pooka /* not found, error */
386 1.249 pooka
387 1.55 cgd /*
388 1.55 cgd * free any vmspace-creation commands,
389 1.55 cgd * and release their references
390 1.55 cgd */
391 1.55 cgd kill_vmcmds(&epp->ep_vmcmds);
392 1.55 cgd
393 1.55 cgd bad2:
394 1.55 cgd /*
395 1.99 wrstuden * close and release the vnode, restore the old one, free the
396 1.55 cgd * pathname buf, and punt.
397 1.55 cgd */
398 1.99 wrstuden vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
399 1.254 pooka VOP_CLOSE(vp, FREAD, l->l_cred);
400 1.99 wrstuden vput(vp);
401 1.120 thorpej PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
402 1.55 cgd return error;
403 1.55 cgd
404 1.55 cgd bad1:
405 1.55 cgd /*
406 1.55 cgd * free the namei pathname buffer, and put the vnode
407 1.55 cgd * (which we don't yet have open).
408 1.55 cgd */
409 1.77 cgd vput(vp); /* was still locked */
410 1.120 thorpej PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
411 1.55 cgd return error;
412 1.55 cgd }
413 1.55 cgd
414 1.188 chs #ifdef __MACHINE_STACK_GROWS_UP
415 1.188 chs #define STACK_PTHREADSPACE NBPG
416 1.188 chs #else
417 1.188 chs #define STACK_PTHREADSPACE 0
418 1.188 chs #endif
419 1.188 chs
420 1.204 cube static int
421 1.204 cube execve_fetch_element(char * const *array, size_t index, char **value)
422 1.204 cube {
423 1.204 cube return copyin(array + index, value, sizeof(*value));
424 1.204 cube }
425 1.204 cube
426 1.55 cgd /*
427 1.55 cgd * exec system call
428 1.55 cgd */
429 1.55 cgd /* ARGSUSED */
430 1.75 christos int
431 1.258 dsl sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
432 1.71 thorpej {
433 1.258 dsl /* {
434 1.138 lukem syscallarg(const char *) path;
435 1.138 lukem syscallarg(char * const *) argp;
436 1.138 lukem syscallarg(char * const *) envp;
437 1.258 dsl } */
438 1.204 cube
439 1.204 cube return execve1(l, SCARG(uap, path), SCARG(uap, argp),
440 1.204 cube SCARG(uap, envp), execve_fetch_element);
441 1.204 cube }
442 1.204 cube
443 1.282 ad /*
444 1.282 ad * Load modules to try and execute an image that we do not understand.
445 1.282 ad * If no execsw entries are present, we load those likely to be needed
446 1.282 ad * in order to run native images only. Otherwise, we autoload all
447 1.282 ad * possible modules that could let us run the binary. XXX lame
448 1.282 ad */
449 1.282 ad static void
450 1.282 ad exec_autoload(void)
451 1.282 ad {
452 1.282 ad #ifdef MODULAR
453 1.282 ad static const char * const native[] = {
454 1.282 ad "exec_elf32",
455 1.282 ad "exec_elf64",
456 1.282 ad "exec_script",
457 1.282 ad NULL
458 1.282 ad };
459 1.282 ad static const char * const compat[] = {
460 1.282 ad "exec_elf32",
461 1.282 ad "exec_elf64",
462 1.282 ad "exec_script",
463 1.282 ad "exec_aout",
464 1.282 ad "exec_coff",
465 1.282 ad "exec_ecoff",
466 1.282 ad "compat_aoutm68k",
467 1.282 ad "compat_freebsd",
468 1.282 ad "compat_ibcs2",
469 1.282 ad "compat_irix",
470 1.282 ad "compat_linux",
471 1.282 ad "compat_linux32",
472 1.282 ad "compat_netbsd32",
473 1.282 ad "compat_sunos",
474 1.282 ad "compat_sunos32",
475 1.282 ad "compat_svr4",
476 1.282 ad "compat_svr4_32",
477 1.282 ad "compat_ultrix",
478 1.282 ad NULL
479 1.282 ad };
480 1.282 ad char const * const *list;
481 1.282 ad int i;
482 1.282 ad
483 1.282 ad mutex_enter(&module_lock);
484 1.282 ad list = (nexecs == 0 ? native : compat);
485 1.282 ad for (i = 0; list[i] != NULL; i++) {
486 1.282 ad if (module_autoload(list[i], MODULE_CLASS_MISC) != 0) {
487 1.282 ad continue;
488 1.282 ad }
489 1.282 ad mutex_exit(&module_lock);
490 1.282 ad yield();
491 1.282 ad mutex_enter(&module_lock);
492 1.282 ad }
493 1.282 ad mutex_exit(&module_lock);
494 1.282 ad #endif
495 1.282 ad }
496 1.282 ad
497 1.204 cube int
498 1.204 cube execve1(struct lwp *l, const char *path, char * const *args,
499 1.204 cube char * const *envs, execve_fetch_element_t fetch_element)
500 1.204 cube {
501 1.153 thorpej int error;
502 1.138 lukem struct exec_package pack;
503 1.138 lukem struct nameidata nid;
504 1.138 lukem struct vattr attr;
505 1.164 thorpej struct proc *p;
506 1.138 lukem char *argp;
507 1.138 lukem char *dp, *sp;
508 1.138 lukem long argc, envc;
509 1.248 christos size_t i, len;
510 1.138 lukem char *stack;
511 1.138 lukem struct ps_strings arginfo;
512 1.213 manu struct ps_strings *aip = &arginfo;
513 1.138 lukem struct vmspace *vm;
514 1.265 yamt struct exec_fakearg *tmpfap;
515 1.138 lukem int szsigcode;
516 1.138 lukem struct exec_vmcmd *base_vcp;
517 1.279 wrstuden int oldlwpflags;
518 1.237 ad ksiginfo_t ksi;
519 1.237 ad ksiginfoq_t kq;
520 1.260 christos char *pathbuf;
521 1.255 christos size_t pathbuflen;
522 1.282 ad u_int modgen;
523 1.55 cgd
524 1.237 ad p = l->l_proc;
525 1.282 ad modgen = 0;
526 1.164 thorpej
527 1.149 christos /*
528 1.269 christos * Check if we have exceeded our number of processes limit.
529 1.269 christos * This is so that we handle the case where a root daemon
530 1.269 christos * forked, ran setuid to become the desired user and is trying
531 1.269 christos * to exec. The obvious place to do the reference counting check
532 1.269 christos * is setuid(), but we don't do the reference counting check there
533 1.269 christos * like other OS's do because then all the programs that use setuid()
534 1.269 christos * must be modified to check the return code of setuid() and exit().
535 1.269 christos * It is dangerous to make setuid() fail, because it fails open and
536 1.269 christos * the program will continue to run as root. If we make it succeed
537 1.269 christos * and return an error code, again we are not enforcing the limit.
538 1.269 christos * The best place to enforce the limit is here, when the process tries
539 1.269 christos * to execute a new image, because eventually the process will need
540 1.269 christos * to call exec in order to do something useful.
541 1.269 christos */
542 1.282 ad retry:
543 1.287 christos if ((p->p_flag & PK_SUGID) && kauth_authorize_generic(l->l_cred,
544 1.287 christos KAUTH_GENERIC_ISSUSER, NULL) != 0 && chgproccnt(kauth_cred_getuid(
545 1.287 christos l->l_cred), 0) > p->p_rlimit[RLIMIT_NPROC].rlim_cur)
546 1.269 christos return EAGAIN;
547 1.269 christos
548 1.279 wrstuden oldlwpflags = l->l_flag & (LW_SA | LW_SA_UPCALL);
549 1.279 wrstuden if (l->l_flag & LW_SA) {
550 1.279 wrstuden lwp_lock(l);
551 1.279 wrstuden l->l_flag &= ~(LW_SA | LW_SA_UPCALL);
552 1.279 wrstuden lwp_unlock(l);
553 1.279 wrstuden }
554 1.279 wrstuden
555 1.269 christos /*
556 1.237 ad * Drain existing references and forbid new ones. The process
557 1.237 ad * should be left alone until we're done here. This is necessary
558 1.237 ad * to avoid race conditions - e.g. in ptrace() - that might allow
559 1.237 ad * a local user to illicitly obtain elevated privileges.
560 1.237 ad */
561 1.252 ad rw_enter(&p->p_reflock, RW_WRITER);
562 1.149 christos
563 1.138 lukem base_vcp = NULL;
564 1.55 cgd /*
565 1.129 jdolecek * Init the namei data to point the file user's program name.
566 1.129 jdolecek * This is done here rather than in check_exec(), so that it's
567 1.129 jdolecek * possible to override this settings if any of makecmd/probe
568 1.129 jdolecek * functions call check_exec() recursively - for example,
569 1.129 jdolecek * see exec_script_makecmds().
570 1.129 jdolecek */
571 1.260 christos pathbuf = PNBUF_GET();
572 1.260 christos error = copyinstr(path, pathbuf, MAXPATHLEN, &pathbuflen);
573 1.248 christos if (error) {
574 1.248 christos DPRINTF(("execve: copyinstr path %d", error));
575 1.200 elad goto clrflg;
576 1.248 christos }
577 1.200 elad
578 1.257 pooka NDINIT(&nid, LOOKUP, NOFOLLOW | TRYEMULROOT, UIO_SYSSPACE, pathbuf);
579 1.55 cgd
580 1.55 cgd /*
581 1.55 cgd * initialize the fields of the exec package.
582 1.55 cgd */
583 1.204 cube pack.ep_name = path;
584 1.265 yamt pack.ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
585 1.55 cgd pack.ep_hdrlen = exec_maxhdrsz;
586 1.55 cgd pack.ep_hdrvalid = 0;
587 1.55 cgd pack.ep_ndp = &nid;
588 1.67 christos pack.ep_emul_arg = NULL;
589 1.55 cgd pack.ep_vmcmds.evs_cnt = 0;
590 1.55 cgd pack.ep_vmcmds.evs_used = 0;
591 1.55 cgd pack.ep_vap = &attr;
592 1.55 cgd pack.ep_flags = 0;
593 1.244 dsl pack.ep_emul_root = NULL;
594 1.244 dsl pack.ep_interp = NULL;
595 1.244 dsl pack.ep_esch = NULL;
596 1.273 ad pack.ep_pax_flags = 0;
597 1.55 cgd
598 1.237 ad rw_enter(&exec_lock, RW_READER);
599 1.130 jdolecek
600 1.55 cgd /* see if we can run it. */
601 1.248 christos if ((error = check_exec(l, &pack)) != 0) {
602 1.261 xtraeme if (error != ENOENT) {
603 1.260 christos DPRINTF(("execve: check exec failed %d\n", error));
604 1.261 xtraeme }
605 1.55 cgd goto freehdr;
606 1.248 christos }
607 1.55 cgd
608 1.55 cgd /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
609 1.55 cgd
610 1.55 cgd /* allocate an argument buffer */
611 1.277 ad argp = pool_get(&exec_pool, PR_WAITOK);
612 1.277 ad KASSERT(argp != NULL);
613 1.55 cgd dp = argp;
614 1.55 cgd argc = 0;
615 1.55 cgd
616 1.55 cgd /* copy the fake args list, if there's one, freeing it as we go */
617 1.55 cgd if (pack.ep_flags & EXEC_HASARGL) {
618 1.55 cgd tmpfap = pack.ep_fa;
619 1.265 yamt while (tmpfap->fa_arg != NULL) {
620 1.265 yamt const char *cp;
621 1.55 cgd
622 1.265 yamt cp = tmpfap->fa_arg;
623 1.55 cgd while (*cp)
624 1.55 cgd *dp++ = *cp++;
625 1.276 ad *dp++ = '\0';
626 1.55 cgd
627 1.265 yamt kmem_free(tmpfap->fa_arg, tmpfap->fa_len);
628 1.55 cgd tmpfap++; argc++;
629 1.55 cgd }
630 1.265 yamt kmem_free(pack.ep_fa, pack.ep_fa_len);
631 1.55 cgd pack.ep_flags &= ~EXEC_HASARGL;
632 1.55 cgd }
633 1.55 cgd
634 1.55 cgd /* Now get argv & environment */
635 1.204 cube if (args == NULL) {
636 1.248 christos DPRINTF(("execve: null args\n"));
637 1.55 cgd error = EINVAL;
638 1.55 cgd goto bad;
639 1.55 cgd }
640 1.204 cube /* 'i' will index the argp/envp element to be retrieved */
641 1.204 cube i = 0;
642 1.55 cgd if (pack.ep_flags & EXEC_SKIPARG)
643 1.204 cube i++;
644 1.55 cgd
645 1.55 cgd while (1) {
646 1.55 cgd len = argp + ARG_MAX - dp;
647 1.248 christos if ((error = (*fetch_element)(args, i, &sp)) != 0) {
648 1.248 christos DPRINTF(("execve: fetch_element args %d\n", error));
649 1.55 cgd goto bad;
650 1.248 christos }
651 1.55 cgd if (!sp)
652 1.55 cgd break;
653 1.74 christos if ((error = copyinstr(sp, dp, len, &len)) != 0) {
654 1.248 christos DPRINTF(("execve: copyinstr args %d\n", error));
655 1.55 cgd if (error == ENAMETOOLONG)
656 1.55 cgd error = E2BIG;
657 1.55 cgd goto bad;
658 1.55 cgd }
659 1.247 ad ktrexecarg(dp, len - 1);
660 1.55 cgd dp += len;
661 1.204 cube i++;
662 1.55 cgd argc++;
663 1.55 cgd }
664 1.55 cgd
665 1.55 cgd envc = 0;
666 1.74 christos /* environment need not be there */
667 1.204 cube if (envs != NULL) {
668 1.204 cube i = 0;
669 1.55 cgd while (1) {
670 1.55 cgd len = argp + ARG_MAX - dp;
671 1.248 christos if ((error = (*fetch_element)(envs, i, &sp)) != 0) {
672 1.248 christos DPRINTF(("execve: fetch_element env %d\n", error));
673 1.55 cgd goto bad;
674 1.248 christos }
675 1.55 cgd if (!sp)
676 1.55 cgd break;
677 1.74 christos if ((error = copyinstr(sp, dp, len, &len)) != 0) {
678 1.248 christos DPRINTF(("execve: copyinstr env %d\n", error));
679 1.55 cgd if (error == ENAMETOOLONG)
680 1.55 cgd error = E2BIG;
681 1.55 cgd goto bad;
682 1.55 cgd }
683 1.247 ad ktrexecenv(dp, len - 1);
684 1.55 cgd dp += len;
685 1.204 cube i++;
686 1.55 cgd envc++;
687 1.55 cgd }
688 1.55 cgd }
689 1.61 mycroft
690 1.61 mycroft dp = (char *) ALIGN(dp);
691 1.55 cgd
692 1.244 dsl szsigcode = pack.ep_esch->es_emul->e_esigcode -
693 1.244 dsl pack.ep_esch->es_emul->e_sigcode;
694 1.65 fvdl
695 1.267 dsl #ifdef __MACHINE_STACK_GROWS_UP
696 1.267 dsl /* See big comment lower down */
697 1.267 dsl #define RTLD_GAP 32
698 1.267 dsl #else
699 1.267 dsl #define RTLD_GAP 0
700 1.267 dsl #endif
701 1.267 dsl
702 1.55 cgd /* Now check if args & environ fit into new stack */
703 1.105 eeh if (pack.ep_flags & EXEC_32)
704 1.244 dsl len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
705 1.267 dsl sizeof(int) + sizeof(int) + dp + RTLD_GAP +
706 1.188 chs szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
707 1.188 chs - argp;
708 1.105 eeh else
709 1.244 dsl len = ((argc + envc + 2 + pack.ep_esch->es_arglen) *
710 1.267 dsl sizeof(char *) + sizeof(int) + dp + RTLD_GAP +
711 1.188 chs szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
712 1.188 chs - argp;
713 1.67 christos
714 1.262 elad #ifdef PAX_ASLR
715 1.262 elad if (pax_aslr_active(l))
716 1.262 elad len += (arc4random() % PAGE_SIZE);
717 1.262 elad #endif /* PAX_ASLR */
718 1.262 elad
719 1.243 matt #ifdef STACKLALIGN /* arm, etc. */
720 1.243 matt len = STACKALIGN(len); /* make the stack "safely" aligned */
721 1.243 matt #else
722 1.55 cgd len = ALIGN(len); /* make the stack "safely" aligned */
723 1.243 matt #endif
724 1.55 cgd
725 1.55 cgd if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
726 1.248 christos DPRINTF(("execve: stack limit exceeded %zu\n", len));
727 1.55 cgd error = ENOMEM;
728 1.55 cgd goto bad;
729 1.55 cgd }
730 1.55 cgd
731 1.237 ad /* Get rid of other LWPs. */
732 1.279 wrstuden if (p->p_sa || p->p_nlwps > 1) {
733 1.272 ad mutex_enter(p->p_lock);
734 1.237 ad exit_lwps(l);
735 1.272 ad mutex_exit(p->p_lock);
736 1.237 ad }
737 1.164 thorpej KDASSERT(p->p_nlwps == 1);
738 1.164 thorpej
739 1.253 ad /* Destroy any lwpctl info. */
740 1.253 ad if (p->p_lwpctl != NULL)
741 1.253 ad lwp_ctl_exit();
742 1.253 ad
743 1.164 thorpej /* This is now LWP 1 */
744 1.164 thorpej l->l_lid = 1;
745 1.164 thorpej p->p_nlwpid = 1;
746 1.164 thorpej
747 1.279 wrstuden #ifdef KERN_SA
748 1.279 wrstuden /* Release any SA state. */
749 1.279 wrstuden if (p->p_sa)
750 1.279 wrstuden sa_release(p);
751 1.279 wrstuden #endif /* KERN_SA */
752 1.279 wrstuden
753 1.164 thorpej /* Remove POSIX timers */
754 1.164 thorpej timers_free(p, TIMERS_POSIX);
755 1.164 thorpej
756 1.55 cgd /* adjust "active stack depth" for process VSZ */
757 1.55 cgd pack.ep_ssize = len; /* maybe should go elsewhere, but... */
758 1.55 cgd
759 1.86 thorpej /*
760 1.86 thorpej * Do whatever is necessary to prepare the address space
761 1.86 thorpej * for remapping. Note that this might replace the current
762 1.86 thorpej * vmspace with another!
763 1.86 thorpej */
764 1.164 thorpej uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
765 1.55 cgd
766 1.186 chs /* record proc's vnode, for use by procfs and others */
767 1.186 chs if (p->p_textvp)
768 1.186 chs vrele(p->p_textvp);
769 1.186 chs VREF(pack.ep_vp);
770 1.186 chs p->p_textvp = pack.ep_vp;
771 1.186 chs
772 1.55 cgd /* Now map address space */
773 1.86 thorpej vm = p->p_vmspace;
774 1.241 dogcow vm->vm_taddr = (void *)pack.ep_taddr;
775 1.55 cgd vm->vm_tsize = btoc(pack.ep_tsize);
776 1.241 dogcow vm->vm_daddr = (void*)pack.ep_daddr;
777 1.55 cgd vm->vm_dsize = btoc(pack.ep_dsize);
778 1.55 cgd vm->vm_ssize = btoc(pack.ep_ssize);
779 1.288 mrg vm->vm_issize = 0;
780 1.241 dogcow vm->vm_maxsaddr = (void *)pack.ep_maxsaddr;
781 1.241 dogcow vm->vm_minsaddr = (void *)pack.ep_minsaddr;
782 1.55 cgd
783 1.260 christos #ifdef PAX_ASLR
784 1.260 christos pax_aslr_init(l, vm);
785 1.260 christos #endif /* PAX_ASLR */
786 1.260 christos
787 1.55 cgd /* create the new process's VM space by running the vmcmds */
788 1.55 cgd #ifdef DIAGNOSTIC
789 1.55 cgd if (pack.ep_vmcmds.evs_used == 0)
790 1.55 cgd panic("execve: no vmcmds");
791 1.55 cgd #endif
792 1.55 cgd for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
793 1.55 cgd struct exec_vmcmd *vcp;
794 1.55 cgd
795 1.55 cgd vcp = &pack.ep_vmcmds.evs_cmds[i];
796 1.114 matt if (vcp->ev_flags & VMCMD_RELATIVE) {
797 1.114 matt #ifdef DIAGNOSTIC
798 1.114 matt if (base_vcp == NULL)
799 1.114 matt panic("execve: relative vmcmd with no base");
800 1.114 matt if (vcp->ev_flags & VMCMD_BASE)
801 1.114 matt panic("execve: illegal base & relative vmcmd");
802 1.114 matt #endif
803 1.114 matt vcp->ev_addr += base_vcp->ev_addr;
804 1.114 matt }
805 1.212 christos error = (*vcp->ev_proc)(l, vcp);
806 1.143 christos #ifdef DEBUG_EXEC
807 1.111 matt if (error) {
808 1.248 christos size_t j;
809 1.143 christos struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
810 1.143 christos for (j = 0; j <= i; j++)
811 1.143 christos uprintf(
812 1.248 christos "vmcmd[%zu] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
813 1.143 christos j, vp[j].ev_addr, vp[j].ev_len,
814 1.143 christos vp[j].ev_offset, vp[j].ev_prot,
815 1.143 christos vp[j].ev_flags);
816 1.111 matt }
817 1.143 christos #endif /* DEBUG_EXEC */
818 1.114 matt if (vcp->ev_flags & VMCMD_BASE)
819 1.114 matt base_vcp = vcp;
820 1.55 cgd }
821 1.55 cgd
822 1.55 cgd /* free the vmspace-creation commands, and release their references */
823 1.55 cgd kill_vmcmds(&pack.ep_vmcmds);
824 1.55 cgd
825 1.186 chs vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
826 1.254 pooka VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
827 1.186 chs vput(pack.ep_vp);
828 1.186 chs
829 1.55 cgd /* if an error happened, deallocate and punt */
830 1.111 matt if (error) {
831 1.248 christos DPRINTF(("execve: vmcmd %zu failed: %d\n", i - 1, error));
832 1.55 cgd goto exec_abort;
833 1.111 matt }
834 1.55 cgd
835 1.55 cgd /* remember information about the process */
836 1.55 cgd arginfo.ps_nargvstr = argc;
837 1.55 cgd arginfo.ps_nenvstr = envc;
838 1.55 cgd
839 1.255 christos /* set command name & other accounting info */
840 1.255 christos i = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
841 1.255 christos (void)memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, i);
842 1.255 christos p->p_comm[i] = '\0';
843 1.255 christos
844 1.255 christos dp = PNBUF_GET();
845 1.255 christos /*
846 1.255 christos * If the path starts with /, we don't need to do any work.
847 1.255 christos * This handles the majority of the cases.
848 1.255 christos * In the future perhaps we could canonicalize it?
849 1.255 christos */
850 1.255 christos if (pathbuf[0] == '/')
851 1.255 christos (void)strlcpy(pack.ep_path = dp, pathbuf, MAXPATHLEN);
852 1.255 christos #ifdef notyet
853 1.255 christos /*
854 1.255 christos * Although this works most of the time [since the entry was just
855 1.255 christos * entered in the cache] we don't use it because it theoretically
856 1.255 christos * can fail and it is not the cleanest interface, because there
857 1.255 christos * could be races. When the namei cache is re-written, this can
858 1.255 christos * be changed to use the appropriate function.
859 1.255 christos */
860 1.255 christos else if (!(error = vnode_to_path(dp, MAXPATHLEN, p->p_textvp, l, p)))
861 1.255 christos pack.ep_path = dp;
862 1.255 christos #endif
863 1.255 christos else {
864 1.256 christos #ifdef notyet
865 1.255 christos printf("Cannot get path for pid %d [%s] (error %d)",
866 1.255 christos (int)p->p_pid, p->p_comm, error);
867 1.255 christos #endif
868 1.255 christos pack.ep_path = NULL;
869 1.255 christos PNBUF_PUT(dp);
870 1.255 christos }
871 1.255 christos
872 1.163 chs stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
873 1.188 chs STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
874 1.163 chs len - (sizeof(struct ps_strings) + szsigcode));
875 1.267 dsl
876 1.163 chs #ifdef __MACHINE_STACK_GROWS_UP
877 1.163 chs /*
878 1.163 chs * The copyargs call always copies into lower addresses
879 1.163 chs * first, moving towards higher addresses, starting with
880 1.183 junyoung * the stack pointer that we give. When the stack grows
881 1.183 junyoung * down, this puts argc/argv/envp very shallow on the
882 1.267 dsl * stack, right at the first user stack pointer.
883 1.267 dsl * When the stack grows up, the situation is reversed.
884 1.163 chs *
885 1.163 chs * Normally, this is no big deal. But the ld_elf.so _rtld()
886 1.183 junyoung * function expects to be called with a single pointer to
887 1.183 junyoung * a region that has a few words it can stash values into,
888 1.163 chs * followed by argc/argv/envp. When the stack grows down,
889 1.163 chs * it's easy to decrement the stack pointer a little bit to
890 1.163 chs * allocate the space for these few words and pass the new
891 1.163 chs * stack pointer to _rtld. When the stack grows up, however,
892 1.171 chs * a few words before argc is part of the signal trampoline, XXX
893 1.163 chs * so we have a problem.
894 1.163 chs *
895 1.183 junyoung * Instead of changing how _rtld works, we take the easy way
896 1.267 dsl * out and steal 32 bytes before we call copyargs.
897 1.267 dsl * This extra space was allowed for when 'len' was calculated.
898 1.163 chs */
899 1.267 dsl stack += RTLD_GAP;
900 1.163 chs #endif /* __MACHINE_STACK_GROWS_UP */
901 1.163 chs
902 1.55 cgd /* Now copy argc, args & environ to new stack */
903 1.244 dsl error = (*pack.ep_esch->es_copyargs)(l, &pack, &arginfo, &stack, argp);
904 1.255 christos if (pack.ep_path) {
905 1.255 christos PNBUF_PUT(pack.ep_path);
906 1.255 christos pack.ep_path = NULL;
907 1.255 christos }
908 1.144 christos if (error) {
909 1.144 christos DPRINTF(("execve: copyargs failed %d\n", error));
910 1.55 cgd goto exec_abort;
911 1.111 matt }
912 1.144 christos /* Move the stack back to original point */
913 1.163 chs stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
914 1.55 cgd
915 1.121 eeh /* fill process ps_strings info */
916 1.188 chs p->p_psstr = (struct ps_strings *)
917 1.188 chs STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
918 1.163 chs sizeof(struct ps_strings));
919 1.121 eeh p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
920 1.121 eeh p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
921 1.121 eeh p->p_psenv = offsetof(struct ps_strings, ps_envstr);
922 1.121 eeh p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
923 1.121 eeh
924 1.55 cgd /* copy out the process's ps_strings structure */
925 1.213 manu if ((error = copyout(aip, (char *)p->p_psstr,
926 1.144 christos sizeof(arginfo))) != 0) {
927 1.143 christos DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
928 1.213 manu aip, (char *)p->p_psstr, (long)sizeof(arginfo)));
929 1.55 cgd goto exec_abort;
930 1.111 matt }
931 1.109 simonb
932 1.270 ad fd_closeexec(); /* handle close on exec */
933 1.55 cgd execsigs(p); /* reset catched signals */
934 1.183 junyoung
935 1.164 thorpej l->l_ctxlink = NULL; /* reset ucontext link */
936 1.55 cgd
937 1.255 christos
938 1.55 cgd p->p_acflag &= ~AFORK;
939 1.272 ad mutex_enter(p->p_lock);
940 1.238 pavel p->p_flag |= PK_EXEC;
941 1.272 ad mutex_exit(p->p_lock);
942 1.237 ad
943 1.237 ad /*
944 1.237 ad * Stop profiling.
945 1.237 ad */
946 1.237 ad if ((p->p_stflag & PST_PROFIL) != 0) {
947 1.237 ad mutex_spin_enter(&p->p_stmutex);
948 1.237 ad stopprofclock(p);
949 1.237 ad mutex_spin_exit(&p->p_stmutex);
950 1.237 ad }
951 1.237 ad
952 1.237 ad /*
953 1.275 ad * It's OK to test PL_PPWAIT unlocked here, as other LWPs have
954 1.237 ad * exited and exec()/exit() are the only places it will be cleared.
955 1.237 ad */
956 1.275 ad if ((p->p_lflag & PL_PPWAIT) != 0) {
957 1.271 ad mutex_enter(proc_lock);
958 1.275 ad p->p_lflag &= ~PL_PPWAIT;
959 1.237 ad cv_broadcast(&p->p_pptr->p_waitcv);
960 1.271 ad mutex_exit(proc_lock);
961 1.55 cgd }
962 1.55 cgd
963 1.55 cgd /*
964 1.237 ad * Deal with set[ug]id. MNT_NOSUID has already been used to disable
965 1.237 ad * s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
966 1.237 ad * out additional references on the process for the moment.
967 1.55 cgd */
968 1.237 ad if ((p->p_slflag & PSL_TRACED) == 0 &&
969 1.141 thorpej
970 1.141 thorpej (((attr.va_mode & S_ISUID) != 0 &&
971 1.221 ad kauth_cred_geteuid(l->l_cred) != attr.va_uid) ||
972 1.141 thorpej
973 1.141 thorpej ((attr.va_mode & S_ISGID) != 0 &&
974 1.221 ad kauth_cred_getegid(l->l_cred) != attr.va_gid))) {
975 1.141 thorpej /*
976 1.141 thorpej * Mark the process as SUGID before we do
977 1.141 thorpej * anything that might block.
978 1.141 thorpej */
979 1.237 ad proc_crmod_enter();
980 1.240 thorpej proc_crmod_leave(NULL, NULL, true);
981 1.152 christos
982 1.152 christos /* Make sure file descriptors 0..2 are in use. */
983 1.270 ad if ((error = fd_checkstd()) != 0) {
984 1.209 christos DPRINTF(("execve: fdcheckstd failed %d\n", error));
985 1.152 christos goto exec_abort;
986 1.209 christos }
987 1.141 thorpej
988 1.220 ad /*
989 1.220 ad * Copy the credential so other references don't see our
990 1.220 ad * changes.
991 1.220 ad */
992 1.221 ad l->l_cred = kauth_cred_copy(l->l_cred);
993 1.55 cgd #ifdef KTRACE
994 1.55 cgd /*
995 1.268 elad * If the persistent trace flag isn't set, turn off.
996 1.55 cgd */
997 1.237 ad if (p->p_tracep) {
998 1.247 ad mutex_enter(&ktrace_lock);
999 1.268 elad if (!(p->p_traceflag & KTRFAC_PERSISTENT))
1000 1.237 ad ktrderef(p);
1001 1.247 ad mutex_exit(&ktrace_lock);
1002 1.237 ad }
1003 1.55 cgd #endif
1004 1.83 mycroft if (attr.va_mode & S_ISUID)
1005 1.221 ad kauth_cred_seteuid(l->l_cred, attr.va_uid);
1006 1.83 mycroft if (attr.va_mode & S_ISGID)
1007 1.221 ad kauth_cred_setegid(l->l_cred, attr.va_gid);
1008 1.210 christos } else {
1009 1.221 ad if (kauth_cred_geteuid(l->l_cred) ==
1010 1.221 ad kauth_cred_getuid(l->l_cred) &&
1011 1.221 ad kauth_cred_getegid(l->l_cred) ==
1012 1.221 ad kauth_cred_getgid(l->l_cred))
1013 1.238 pavel p->p_flag &= ~PK_SUGID;
1014 1.210 christos }
1015 1.220 ad
1016 1.220 ad /*
1017 1.220 ad * Copy the credential so other references don't see our changes.
1018 1.220 ad * Test to see if this is necessary first, since in the common case
1019 1.220 ad * we won't need a private reference.
1020 1.220 ad */
1021 1.221 ad if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
1022 1.221 ad kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
1023 1.221 ad l->l_cred = kauth_cred_copy(l->l_cred);
1024 1.221 ad kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
1025 1.221 ad kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
1026 1.220 ad }
1027 1.155 gmcgarry
1028 1.221 ad /* Update the master credentials. */
1029 1.227 ad if (l->l_cred != p->p_cred) {
1030 1.227 ad kauth_cred_t ocred;
1031 1.227 ad
1032 1.227 ad kauth_cred_hold(l->l_cred);
1033 1.272 ad mutex_enter(p->p_lock);
1034 1.227 ad ocred = p->p_cred;
1035 1.227 ad p->p_cred = l->l_cred;
1036 1.272 ad mutex_exit(p->p_lock);
1037 1.227 ad kauth_cred_free(ocred);
1038 1.227 ad }
1039 1.221 ad
1040 1.155 gmcgarry #if defined(__HAVE_RAS)
1041 1.155 gmcgarry /*
1042 1.155 gmcgarry * Remove all RASs from the address space.
1043 1.155 gmcgarry */
1044 1.251 ad ras_purgeall();
1045 1.155 gmcgarry #endif
1046 1.107 fvdl
1047 1.107 fvdl doexechooks(p);
1048 1.55 cgd
1049 1.55 cgd /* setup new registers and do misc. setup. */
1050 1.244 dsl (*pack.ep_esch->es_emul->e_setregs)(l, &pack, (u_long) stack);
1051 1.244 dsl if (pack.ep_esch->es_setregs)
1052 1.244 dsl (*pack.ep_esch->es_setregs)(l, &pack, (u_long) stack);
1053 1.55 cgd
1054 1.171 chs /* map the process's signal trampoline code */
1055 1.244 dsl if (exec_sigcode_map(p, pack.ep_esch->es_emul)) {
1056 1.209 christos DPRINTF(("execve: map sigcode failed %d\n", error));
1057 1.171 chs goto exec_abort;
1058 1.209 christos }
1059 1.171 chs
1060 1.277 ad pool_put(&exec_pool, argp);
1061 1.276 ad
1062 1.276 ad PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1063 1.276 ad
1064 1.276 ad /* notify others that we exec'd */
1065 1.276 ad KNOTE(&p->p_klist, NOTE_EXEC);
1066 1.276 ad
1067 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1068 1.122 jdolecek
1069 1.244 dsl /* The emulation root will usually have been found when we looked
1070 1.244 dsl * for the elf interpreter (or similar), if not look now. */
1071 1.244 dsl if (pack.ep_esch->es_emul->e_path != NULL && pack.ep_emul_root == NULL)
1072 1.244 dsl emul_find_root(l, &pack);
1073 1.244 dsl
1074 1.244 dsl /* Any old emulation root got removed by fdcloseexec */
1075 1.259 ad rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
1076 1.244 dsl p->p_cwdi->cwdi_edir = pack.ep_emul_root;
1077 1.259 ad rw_exit(&p->p_cwdi->cwdi_lock);
1078 1.244 dsl pack.ep_emul_root = NULL;
1079 1.244 dsl if (pack.ep_interp != NULL)
1080 1.244 dsl vrele(pack.ep_interp);
1081 1.244 dsl
1082 1.122 jdolecek /*
1083 1.194 peter * Call emulation specific exec hook. This can setup per-process
1084 1.122 jdolecek * p->p_emuldata or do any other per-process stuff an emulation needs.
1085 1.122 jdolecek *
1086 1.122 jdolecek * If we are executing process of different emulation than the
1087 1.122 jdolecek * original forked process, call e_proc_exit() of the old emulation
1088 1.122 jdolecek * first, then e_proc_exec() of new emulation. If the emulation is
1089 1.122 jdolecek * same, the exec hook code should deallocate any old emulation
1090 1.122 jdolecek * resources held previously by this process.
1091 1.122 jdolecek */
1092 1.124 jdolecek if (p->p_emul && p->p_emul->e_proc_exit
1093 1.244 dsl && p->p_emul != pack.ep_esch->es_emul)
1094 1.122 jdolecek (*p->p_emul->e_proc_exit)(p);
1095 1.122 jdolecek
1096 1.123 jdolecek /*
1097 1.123 jdolecek * Call exec hook. Emulation code may NOT store reference to anything
1098 1.123 jdolecek * from &pack.
1099 1.123 jdolecek */
1100 1.244 dsl if (pack.ep_esch->es_emul->e_proc_exec)
1101 1.244 dsl (*pack.ep_esch->es_emul->e_proc_exec)(p, &pack);
1102 1.122 jdolecek
1103 1.122 jdolecek /* update p_emul, the old value is no longer needed */
1104 1.244 dsl p->p_emul = pack.ep_esch->es_emul;
1105 1.148 thorpej
1106 1.148 thorpej /* ...and the same for p_execsw */
1107 1.244 dsl p->p_execsw = pack.ep_esch;
1108 1.148 thorpej
1109 1.133 mycroft #ifdef __HAVE_SYSCALL_INTERN
1110 1.133 mycroft (*p->p_emul->e_syscall_intern)(p);
1111 1.133 mycroft #endif
1112 1.247 ad ktremul();
1113 1.85 mycroft
1114 1.252 ad /* Allow new references from the debugger/procfs. */
1115 1.252 ad rw_exit(&p->p_reflock);
1116 1.237 ad rw_exit(&exec_lock);
1117 1.162 manu
1118 1.271 ad mutex_enter(proc_lock);
1119 1.237 ad
1120 1.237 ad if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
1121 1.237 ad KSI_INIT_EMPTY(&ksi);
1122 1.237 ad ksi.ksi_signo = SIGTRAP;
1123 1.237 ad ksi.ksi_lid = l->l_lid;
1124 1.237 ad kpsignal(p, &ksi, NULL);
1125 1.237 ad }
1126 1.162 manu
1127 1.237 ad if (p->p_sflag & PS_STOPEXEC) {
1128 1.237 ad KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1129 1.175 dsl p->p_pptr->p_nstopchild++;
1130 1.237 ad p->p_pptr->p_waited = 0;
1131 1.272 ad mutex_enter(p->p_lock);
1132 1.237 ad ksiginfo_queue_init(&kq);
1133 1.237 ad sigclearall(p, &contsigmask, &kq);
1134 1.237 ad lwp_lock(l);
1135 1.237 ad l->l_stat = LSSTOP;
1136 1.162 manu p->p_stat = SSTOP;
1137 1.164 thorpej p->p_nrlwps--;
1138 1.272 ad mutex_exit(p->p_lock);
1139 1.271 ad mutex_exit(proc_lock);
1140 1.245 yamt mi_switch(l);
1141 1.237 ad ksiginfo_queue_drain(&kq);
1142 1.237 ad KERNEL_LOCK(l->l_biglocks, l);
1143 1.237 ad } else {
1144 1.271 ad mutex_exit(proc_lock);
1145 1.162 manu }
1146 1.162 manu
1147 1.260 christos PNBUF_PUT(pathbuf);
1148 1.85 mycroft return (EJUSTRETURN);
1149 1.55 cgd
1150 1.138 lukem bad:
1151 1.55 cgd /* free the vmspace-creation commands, and release their references */
1152 1.55 cgd kill_vmcmds(&pack.ep_vmcmds);
1153 1.55 cgd /* kill any opened file descriptor, if necessary */
1154 1.55 cgd if (pack.ep_flags & EXEC_HASFD) {
1155 1.55 cgd pack.ep_flags &= ~EXEC_HASFD;
1156 1.270 ad fd_close(pack.ep_fd);
1157 1.55 cgd }
1158 1.55 cgd /* close and put the exec'd file */
1159 1.99 wrstuden vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
1160 1.254 pooka VOP_CLOSE(pack.ep_vp, FREAD, l->l_cred);
1161 1.99 wrstuden vput(pack.ep_vp);
1162 1.120 thorpej PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1163 1.277 ad pool_put(&exec_pool, argp);
1164 1.55 cgd
1165 1.138 lukem freehdr:
1166 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1167 1.244 dsl if (pack.ep_emul_root != NULL)
1168 1.244 dsl vrele(pack.ep_emul_root);
1169 1.244 dsl if (pack.ep_interp != NULL)
1170 1.244 dsl vrele(pack.ep_interp);
1171 1.200 elad
1172 1.274 ad rw_exit(&exec_lock);
1173 1.274 ad
1174 1.200 elad clrflg:
1175 1.279 wrstuden lwp_lock(l);
1176 1.279 wrstuden l->l_flag |= oldlwpflags;
1177 1.279 wrstuden lwp_unlock(l);
1178 1.260 christos PNBUF_PUT(pathbuf);
1179 1.252 ad rw_exit(&p->p_reflock);
1180 1.130 jdolecek
1181 1.282 ad if (modgen != module_gen && error == ENOEXEC) {
1182 1.282 ad modgen = module_gen;
1183 1.282 ad exec_autoload();
1184 1.282 ad goto retry;
1185 1.282 ad }
1186 1.282 ad
1187 1.55 cgd return error;
1188 1.55 cgd
1189 1.138 lukem exec_abort:
1190 1.260 christos PNBUF_PUT(pathbuf);
1191 1.252 ad rw_exit(&p->p_reflock);
1192 1.237 ad rw_exit(&exec_lock);
1193 1.130 jdolecek
1194 1.55 cgd /*
1195 1.55 cgd * the old process doesn't exist anymore. exit gracefully.
1196 1.55 cgd * get rid of the (new) address space we have created, if any, get rid
1197 1.55 cgd * of our namei data and vnode, and exit noting failure
1198 1.55 cgd */
1199 1.88 mrg uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
1200 1.88 mrg VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
1201 1.73 mycroft if (pack.ep_emul_arg)
1202 1.284 cegger free(pack.ep_emul_arg, M_TEMP);
1203 1.120 thorpej PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
1204 1.277 ad pool_put(&exec_pool, argp);
1205 1.265 yamt kmem_free(pack.ep_hdr, pack.ep_hdrlen);
1206 1.244 dsl if (pack.ep_emul_root != NULL)
1207 1.244 dsl vrele(pack.ep_emul_root);
1208 1.244 dsl if (pack.ep_interp != NULL)
1209 1.244 dsl vrele(pack.ep_interp);
1210 1.237 ad
1211 1.252 ad /* Acquire the sched-state mutex (exit1() will release it). */
1212 1.272 ad mutex_enter(p->p_lock);
1213 1.164 thorpej exit1(l, W_EXITCODE(error, SIGABRT));
1214 1.55 cgd
1215 1.55 cgd /* NOTREACHED */
1216 1.55 cgd return 0;
1217 1.67 christos }
1218 1.67 christos
1219 1.67 christos
1220 1.144 christos int
1221 1.231 yamt copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
1222 1.231 yamt char **stackp, void *argp)
1223 1.67 christos {
1224 1.138 lukem char **cpp, *dp, *sp;
1225 1.138 lukem size_t len;
1226 1.138 lukem void *nullp;
1227 1.138 lukem long argc, envc;
1228 1.144 christos int error;
1229 1.138 lukem
1230 1.144 christos cpp = (char **)*stackp;
1231 1.138 lukem nullp = NULL;
1232 1.138 lukem argc = arginfo->ps_nargvstr;
1233 1.138 lukem envc = arginfo->ps_nenvstr;
1234 1.144 christos if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
1235 1.144 christos return error;
1236 1.67 christos
1237 1.244 dsl dp = (char *) (cpp + argc + envc + 2 + pack->ep_esch->es_arglen);
1238 1.67 christos sp = argp;
1239 1.67 christos
1240 1.67 christos /* XXX don't copy them out, remap them! */
1241 1.69 mycroft arginfo->ps_argvstr = cpp; /* remember location of argv for later */
1242 1.67 christos
1243 1.67 christos for (; --argc >= 0; sp += len, dp += len)
1244 1.144 christos if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1245 1.144 christos (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1246 1.144 christos return error;
1247 1.67 christos
1248 1.144 christos if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1249 1.144 christos return error;
1250 1.67 christos
1251 1.69 mycroft arginfo->ps_envstr = cpp; /* remember location of envp for later */
1252 1.67 christos
1253 1.67 christos for (; --envc >= 0; sp += len, dp += len)
1254 1.144 christos if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
1255 1.144 christos (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
1256 1.144 christos return error;
1257 1.67 christos
1258 1.144 christos if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
1259 1.144 christos return error;
1260 1.67 christos
1261 1.144 christos *stackp = (char *)cpp;
1262 1.144 christos return 0;
1263 1.55 cgd }
1264 1.130 jdolecek
1265 1.130 jdolecek
1266 1.130 jdolecek /*
1267 1.282 ad * Add execsw[] entries.
1268 1.130 jdolecek */
1269 1.130 jdolecek int
1270 1.282 ad exec_add(struct execsw *esp, int count)
1271 1.130 jdolecek {
1272 1.282 ad struct exec_entry *it;
1273 1.282 ad int i;
1274 1.130 jdolecek
1275 1.283 ad if (count == 0) {
1276 1.283 ad return 0;
1277 1.283 ad }
1278 1.130 jdolecek
1279 1.282 ad /* Check for duplicates. */
1280 1.237 ad rw_enter(&exec_lock, RW_WRITER);
1281 1.282 ad for (i = 0; i < count; i++) {
1282 1.282 ad LIST_FOREACH(it, &ex_head, ex_list) {
1283 1.282 ad /* assume unique (makecmds, probe_func, emulation) */
1284 1.282 ad if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
1285 1.282 ad it->ex_sw->u.elf_probe_func ==
1286 1.282 ad esp[i].u.elf_probe_func &&
1287 1.282 ad it->ex_sw->es_emul == esp[i].es_emul) {
1288 1.282 ad rw_exit(&exec_lock);
1289 1.282 ad return EEXIST;
1290 1.130 jdolecek }
1291 1.130 jdolecek }
1292 1.130 jdolecek }
1293 1.130 jdolecek
1294 1.282 ad /* Allocate new entries. */
1295 1.282 ad for (i = 0; i < count; i++) {
1296 1.282 ad it = kmem_alloc(sizeof(*it), KM_SLEEP);
1297 1.282 ad it->ex_sw = &esp[i];
1298 1.282 ad LIST_INSERT_HEAD(&ex_head, it, ex_list);
1299 1.130 jdolecek }
1300 1.130 jdolecek
1301 1.130 jdolecek /* update execsw[] */
1302 1.130 jdolecek exec_init(0);
1303 1.237 ad rw_exit(&exec_lock);
1304 1.282 ad return 0;
1305 1.130 jdolecek }
1306 1.130 jdolecek
1307 1.130 jdolecek /*
1308 1.130 jdolecek * Remove execsw[] entry.
1309 1.130 jdolecek */
1310 1.130 jdolecek int
1311 1.282 ad exec_remove(struct execsw *esp, int count)
1312 1.130 jdolecek {
1313 1.282 ad struct exec_entry *it, *next;
1314 1.282 ad int i;
1315 1.282 ad const struct proclist_desc *pd;
1316 1.282 ad proc_t *p;
1317 1.282 ad
1318 1.283 ad if (count == 0) {
1319 1.283 ad return 0;
1320 1.283 ad }
1321 1.130 jdolecek
1322 1.282 ad /* Abort if any are busy. */
1323 1.237 ad rw_enter(&exec_lock, RW_WRITER);
1324 1.282 ad for (i = 0; i < count; i++) {
1325 1.282 ad mutex_enter(proc_lock);
1326 1.282 ad for (pd = proclists; pd->pd_list != NULL; pd++) {
1327 1.282 ad PROCLIST_FOREACH(p, pd->pd_list) {
1328 1.282 ad if (p->p_execsw == &esp[i]) {
1329 1.282 ad mutex_exit(proc_lock);
1330 1.282 ad rw_exit(&exec_lock);
1331 1.282 ad return EBUSY;
1332 1.282 ad }
1333 1.282 ad }
1334 1.282 ad }
1335 1.282 ad mutex_exit(proc_lock);
1336 1.282 ad }
1337 1.130 jdolecek
1338 1.282 ad /* None are busy, so remove them all. */
1339 1.282 ad for (i = 0; i < count; i++) {
1340 1.282 ad for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
1341 1.282 ad next = LIST_NEXT(it, ex_list);
1342 1.282 ad if (it->ex_sw == &esp[i]) {
1343 1.282 ad LIST_REMOVE(it, ex_list);
1344 1.282 ad kmem_free(it, sizeof(*it));
1345 1.282 ad break;
1346 1.282 ad }
1347 1.282 ad }
1348 1.130 jdolecek }
1349 1.130 jdolecek
1350 1.130 jdolecek /* update execsw[] */
1351 1.130 jdolecek exec_init(0);
1352 1.237 ad rw_exit(&exec_lock);
1353 1.282 ad return 0;
1354 1.130 jdolecek }
1355 1.130 jdolecek
1356 1.130 jdolecek /*
1357 1.130 jdolecek * Initialize exec structures. If init_boot is true, also does necessary
1358 1.130 jdolecek * one-time initialization (it's called from main() that way).
1359 1.147 jdolecek * Once system is multiuser, this should be called with exec_lock held,
1360 1.130 jdolecek * i.e. via exec_{add|remove}().
1361 1.130 jdolecek */
1362 1.130 jdolecek int
1363 1.138 lukem exec_init(int init_boot)
1364 1.130 jdolecek {
1365 1.282 ad const struct execsw **sw;
1366 1.282 ad struct exec_entry *ex;
1367 1.282 ad SLIST_HEAD(,exec_entry) first;
1368 1.282 ad SLIST_HEAD(,exec_entry) any;
1369 1.282 ad SLIST_HEAD(,exec_entry) last;
1370 1.282 ad int i, sz;
1371 1.130 jdolecek
1372 1.130 jdolecek if (init_boot) {
1373 1.130 jdolecek /* do one-time initializations */
1374 1.237 ad rw_init(&exec_lock);
1375 1.259 ad mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
1376 1.277 ad pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
1377 1.277 ad "execargs", &exec_palloc, IPL_NONE);
1378 1.277 ad pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
1379 1.282 ad } else {
1380 1.282 ad KASSERT(rw_write_held(&exec_lock));
1381 1.282 ad }
1382 1.130 jdolecek
1383 1.282 ad /* Sort each entry onto the appropriate queue. */
1384 1.282 ad SLIST_INIT(&first);
1385 1.282 ad SLIST_INIT(&any);
1386 1.282 ad SLIST_INIT(&last);
1387 1.282 ad sz = 0;
1388 1.282 ad LIST_FOREACH(ex, &ex_head, ex_list) {
1389 1.282 ad switch(ex->ex_sw->es_prio) {
1390 1.282 ad case EXECSW_PRIO_FIRST:
1391 1.282 ad SLIST_INSERT_HEAD(&first, ex, ex_slist);
1392 1.282 ad break;
1393 1.282 ad case EXECSW_PRIO_ANY:
1394 1.282 ad SLIST_INSERT_HEAD(&any, ex, ex_slist);
1395 1.282 ad break;
1396 1.282 ad case EXECSW_PRIO_LAST:
1397 1.282 ad SLIST_INSERT_HEAD(&last, ex, ex_slist);
1398 1.282 ad break;
1399 1.282 ad default:
1400 1.282 ad panic("exec_init");
1401 1.282 ad break;
1402 1.130 jdolecek }
1403 1.282 ad sz++;
1404 1.130 jdolecek }
1405 1.130 jdolecek
1406 1.130 jdolecek /*
1407 1.282 ad * Create new execsw[]. Ensure we do not try a zero-sized
1408 1.282 ad * allocation.
1409 1.130 jdolecek */
1410 1.282 ad sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
1411 1.282 ad i = 0;
1412 1.282 ad SLIST_FOREACH(ex, &first, ex_slist) {
1413 1.282 ad sw[i++] = ex->ex_sw;
1414 1.282 ad }
1415 1.282 ad SLIST_FOREACH(ex, &any, ex_slist) {
1416 1.282 ad sw[i++] = ex->ex_sw;
1417 1.282 ad }
1418 1.282 ad SLIST_FOREACH(ex, &last, ex_slist) {
1419 1.282 ad sw[i++] = ex->ex_sw;
1420 1.130 jdolecek }
1421 1.183 junyoung
1422 1.282 ad /* Replace old execsw[] and free used memory. */
1423 1.282 ad if (execsw != NULL) {
1424 1.282 ad kmem_free(__UNCONST(execsw),
1425 1.282 ad nexecs * sizeof(struct execsw *) + 1);
1426 1.130 jdolecek }
1427 1.282 ad execsw = sw;
1428 1.282 ad nexecs = sz;
1429 1.130 jdolecek
1430 1.282 ad /* Figure out the maximum size of an exec header. */
1431 1.282 ad exec_maxhdrsz = sizeof(int);
1432 1.130 jdolecek for (i = 0; i < nexecs; i++) {
1433 1.130 jdolecek if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1434 1.130 jdolecek exec_maxhdrsz = execsw[i]->es_hdrsz;
1435 1.130 jdolecek }
1436 1.130 jdolecek
1437 1.130 jdolecek return 0;
1438 1.130 jdolecek }
1439 1.171 chs
1440 1.171 chs static int
1441 1.171 chs exec_sigcode_map(struct proc *p, const struct emul *e)
1442 1.171 chs {
1443 1.171 chs vaddr_t va;
1444 1.171 chs vsize_t sz;
1445 1.171 chs int error;
1446 1.171 chs struct uvm_object *uobj;
1447 1.171 chs
1448 1.184 drochner sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1449 1.184 drochner
1450 1.184 drochner if (e->e_sigobject == NULL || sz == 0) {
1451 1.171 chs return 0;
1452 1.171 chs }
1453 1.171 chs
1454 1.171 chs /*
1455 1.171 chs * If we don't have a sigobject for this emulation, create one.
1456 1.171 chs *
1457 1.171 chs * sigobject is an anonymous memory object (just like SYSV shared
1458 1.171 chs * memory) that we keep a permanent reference to and that we map
1459 1.171 chs * in all processes that need this sigcode. The creation is simple,
1460 1.171 chs * we create an object, add a permanent reference to it, map it in
1461 1.171 chs * kernel space, copy out the sigcode to it and unmap it.
1462 1.189 jdolecek * We map it with PROT_READ|PROT_EXEC into the process just
1463 1.189 jdolecek * the way sys_mmap() would map it.
1464 1.171 chs */
1465 1.171 chs
1466 1.171 chs uobj = *e->e_sigobject;
1467 1.171 chs if (uobj == NULL) {
1468 1.259 ad mutex_enter(&sigobject_lock);
1469 1.259 ad if ((uobj = *e->e_sigobject) == NULL) {
1470 1.259 ad uobj = uao_create(sz, 0);
1471 1.259 ad (*uobj->pgops->pgo_reference)(uobj);
1472 1.259 ad va = vm_map_min(kernel_map);
1473 1.259 ad if ((error = uvm_map(kernel_map, &va, round_page(sz),
1474 1.259 ad uobj, 0, 0,
1475 1.259 ad UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1476 1.259 ad UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1477 1.259 ad printf("kernel mapping failed %d\n", error);
1478 1.259 ad (*uobj->pgops->pgo_detach)(uobj);
1479 1.259 ad mutex_exit(&sigobject_lock);
1480 1.259 ad return (error);
1481 1.259 ad }
1482 1.259 ad memcpy((void *)va, e->e_sigcode, sz);
1483 1.171 chs #ifdef PMAP_NEED_PROCWR
1484 1.259 ad pmap_procwr(&proc0, va, sz);
1485 1.171 chs #endif
1486 1.259 ad uvm_unmap(kernel_map, va, va + round_page(sz));
1487 1.259 ad *e->e_sigobject = uobj;
1488 1.259 ad }
1489 1.259 ad mutex_exit(&sigobject_lock);
1490 1.171 chs }
1491 1.171 chs
1492 1.172 enami /* Just a hint to uvm_map where to put it. */
1493 1.195 fvdl va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1494 1.195 fvdl round_page(sz));
1495 1.187 chs
1496 1.187 chs #ifdef __alpha__
1497 1.187 chs /*
1498 1.187 chs * Tru64 puts /sbin/loader at the end of user virtual memory,
1499 1.187 chs * which causes the above calculation to put the sigcode at
1500 1.187 chs * an invalid address. Put it just below the text instead.
1501 1.187 chs */
1502 1.193 jmc if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1503 1.187 chs va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1504 1.187 chs }
1505 1.187 chs #endif
1506 1.187 chs
1507 1.171 chs (*uobj->pgops->pgo_reference)(uobj);
1508 1.171 chs error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1509 1.171 chs uobj, 0, 0,
1510 1.171 chs UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1511 1.171 chs UVM_ADV_RANDOM, 0));
1512 1.171 chs if (error) {
1513 1.171 chs (*uobj->pgops->pgo_detach)(uobj);
1514 1.171 chs return (error);
1515 1.171 chs }
1516 1.171 chs p->p_sigctx.ps_sigcode = (void *)va;
1517 1.171 chs return (0);
1518 1.171 chs }
1519