kern_exec.c revision 1.195 1 /* $NetBSD: kern_exec.c,v 1.195 2005/03/26 05:12:36 fvdl Exp $ */
2
3 /*-
4 * Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
5 * Copyright (C) 1992 Wolfgang Solfrank.
6 * Copyright (C) 1992 TooLs GmbH.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed by TooLs GmbH.
20 * 4. The name of TooLs GmbH may not be used to endorse or promote products
21 * derived from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
24 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
25 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
26 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
29 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
30 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
31 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
32 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <sys/cdefs.h>
36 __KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.195 2005/03/26 05:12:36 fvdl Exp $");
37
38 #include "opt_ktrace.h"
39 #include "opt_syscall_debug.h"
40 #include "opt_compat_netbsd.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/filedesc.h>
45 #include <sys/kernel.h>
46 #include <sys/proc.h>
47 #include <sys/mount.h>
48 #include <sys/malloc.h>
49 #include <sys/namei.h>
50 #include <sys/vnode.h>
51 #include <sys/file.h>
52 #include <sys/acct.h>
53 #include <sys/exec.h>
54 #include <sys/ktrace.h>
55 #include <sys/resourcevar.h>
56 #include <sys/wait.h>
57 #include <sys/mman.h>
58 #include <sys/ras.h>
59 #include <sys/signalvar.h>
60 #include <sys/stat.h>
61 #include <sys/syscall.h>
62
63 #include <sys/sa.h>
64 #include <sys/savar.h>
65 #include <sys/syscallargs.h>
66
67 #include <uvm/uvm_extern.h>
68
69 #include <machine/cpu.h>
70 #include <machine/reg.h>
71
72 static int exec_sigcode_map(struct proc *, const struct emul *);
73
74 #ifdef DEBUG_EXEC
75 #define DPRINTF(a) uprintf a
76 #else
77 #define DPRINTF(a)
78 #endif /* DEBUG_EXEC */
79
80 MALLOC_DEFINE(M_EXEC, "exec", "argument lists & other mem used by exec");
81
82 /*
83 * Exec function switch:
84 *
85 * Note that each makecmds function is responsible for loading the
86 * exec package with the necessary functions for any exec-type-specific
87 * handling.
88 *
89 * Functions for specific exec types should be defined in their own
90 * header file.
91 */
92 extern const struct execsw execsw_builtin[];
93 extern int nexecs_builtin;
94 static const struct execsw **execsw = NULL;
95 static int nexecs;
96
97 u_int exec_maxhdrsz; /* must not be static - netbsd32 needs it */
98
99 #ifdef LKM
100 /* list of supported emulations */
101 static
102 LIST_HEAD(emlist_head, emul_entry) el_head = LIST_HEAD_INITIALIZER(el_head);
103 struct emul_entry {
104 LIST_ENTRY(emul_entry) el_list;
105 const struct emul *el_emul;
106 int ro_entry;
107 };
108
109 /* list of dynamically loaded execsw entries */
110 static
111 LIST_HEAD(execlist_head, exec_entry) ex_head = LIST_HEAD_INITIALIZER(ex_head);
112 struct exec_entry {
113 LIST_ENTRY(exec_entry) ex_list;
114 const struct execsw *es;
115 };
116
117 /* structure used for building execw[] */
118 struct execsw_entry {
119 struct execsw_entry *next;
120 const struct execsw *es;
121 };
122 #endif /* LKM */
123
124 #ifdef SYSCALL_DEBUG
125 extern const char * const syscallnames[];
126 #endif
127 #ifdef __HAVE_SYSCALL_INTERN
128 void syscall_intern(struct proc *);
129 #else
130 void syscall(void);
131 #endif
132
133 #ifdef COMPAT_16
134 extern char sigcode[], esigcode[];
135 struct uvm_object *emul_netbsd_object;
136 #endif
137
138 /* NetBSD emul struct */
139 const struct emul emul_netbsd = {
140 "netbsd",
141 NULL, /* emulation path */
142 #ifndef __HAVE_MINIMAL_EMUL
143 EMUL_HAS_SYS___syscall,
144 NULL,
145 SYS_syscall,
146 SYS_NSYSENT,
147 #endif
148 sysent,
149 #ifdef SYSCALL_DEBUG
150 syscallnames,
151 #else
152 NULL,
153 #endif
154 sendsig,
155 trapsignal,
156 NULL,
157 #ifdef COMPAT_16
158 sigcode,
159 esigcode,
160 &emul_netbsd_object,
161 #else
162 NULL,
163 NULL,
164 NULL,
165 #endif
166 setregs,
167 NULL,
168 NULL,
169 NULL,
170 NULL,
171 NULL,
172 #ifdef __HAVE_SYSCALL_INTERN
173 syscall_intern,
174 #else
175 syscall,
176 #endif
177 NULL,
178 NULL,
179
180 uvm_default_mapaddr,
181 };
182
183 #ifdef LKM
184 /*
185 * Exec lock. Used to control access to execsw[] structures.
186 * This must not be static so that netbsd32 can access it, too.
187 */
188 struct lock exec_lock;
189
190 static void link_es(struct execsw_entry **, const struct execsw *);
191 #endif /* LKM */
192
193 /*
194 * check exec:
195 * given an "executable" described in the exec package's namei info,
196 * see what we can do with it.
197 *
198 * ON ENTRY:
199 * exec package with appropriate namei info
200 * proc pointer of exec'ing proc
201 * iff verified exec enabled then flag indicating a direct exec or
202 * an indirect exec (i.e. for a shell script interpreter)
203 * NO SELF-LOCKED VNODES
204 *
205 * ON EXIT:
206 * error: nothing held, etc. exec header still allocated.
207 * ok: filled exec package, executable's vnode (unlocked).
208 *
209 * EXEC SWITCH ENTRY:
210 * Locked vnode to check, exec package, proc.
211 *
212 * EXEC SWITCH EXIT:
213 * ok: return 0, filled exec package, executable's vnode (unlocked).
214 * error: destructive:
215 * everything deallocated execept exec header.
216 * non-destructive:
217 * error code, executable's vnode (unlocked),
218 * exec header unmodified.
219 */
220 int
221 #ifdef VERIFIED_EXEC
222 check_exec(struct proc *p, struct exec_package *epp, int direct_exec)
223 #else
224 check_exec(struct proc *p, struct exec_package *epp)
225 #endif
226 {
227 int error, i;
228 struct vnode *vp;
229 struct nameidata *ndp;
230 size_t resid;
231
232 ndp = epp->ep_ndp;
233 ndp->ni_cnd.cn_nameiop = LOOKUP;
234 ndp->ni_cnd.cn_flags = FOLLOW | LOCKLEAF | SAVENAME;
235 /* first get the vnode */
236 if ((error = namei(ndp)) != 0)
237 return error;
238 epp->ep_vp = vp = ndp->ni_vp;
239
240 /* check access and type */
241 if (vp->v_type != VREG) {
242 error = EACCES;
243 goto bad1;
244 }
245 if ((error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) != 0)
246 goto bad1;
247
248 /* get attributes */
249 if ((error = VOP_GETATTR(vp, epp->ep_vap, p->p_ucred, p)) != 0)
250 goto bad1;
251
252 /* Check mount point */
253 if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
254 error = EACCES;
255 goto bad1;
256 }
257 if (vp->v_mount->mnt_flag & MNT_NOSUID)
258 epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
259
260 /* try to open it */
261 if ((error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) != 0)
262 goto bad1;
263
264 /* unlock vp, since we need it unlocked from here on out. */
265 VOP_UNLOCK(vp, 0);
266
267
268 #ifdef VERIFIED_EXEC
269 /* Evaluate signature for file... */
270 if ((error = check_veriexec(p, vp, epp, direct_exec)) != 0)
271 goto bad2;
272 #endif
273
274 /* now we have the file, get the exec header */
275 uvn_attach(vp, VM_PROT_READ);
276 error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
277 UIO_SYSSPACE, 0, p->p_ucred, &resid, NULL);
278 if (error)
279 goto bad2;
280 epp->ep_hdrvalid = epp->ep_hdrlen - resid;
281
282 /*
283 * Set up default address space limits. Can be overridden
284 * by individual exec packages.
285 *
286 * XXX probably should be all done in the exec pakages.
287 */
288 epp->ep_vm_minaddr = VM_MIN_ADDRESS;
289 epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
290 /*
291 * set up the vmcmds for creation of the process
292 * address space
293 */
294 error = ENOEXEC;
295 for (i = 0; i < nexecs && error != 0; i++) {
296 int newerror;
297
298 epp->ep_esch = execsw[i];
299 newerror = (*execsw[i]->es_makecmds)(p, epp);
300 /* make sure the first "interesting" error code is saved. */
301 if (!newerror || error == ENOEXEC)
302 error = newerror;
303
304 /* if es_makecmds call was successful, update epp->ep_es */
305 if (!newerror && (epp->ep_flags & EXEC_HASES) == 0)
306 epp->ep_es = execsw[i];
307
308 if (epp->ep_flags & EXEC_DESTR && error != 0)
309 return error;
310 }
311 if (!error) {
312 /* check that entry point is sane */
313 if (epp->ep_entry > VM_MAXUSER_ADDRESS)
314 error = ENOEXEC;
315
316 /* check limits */
317 if ((epp->ep_tsize > MAXTSIZ) ||
318 (epp->ep_dsize >
319 (u_quad_t)p->p_rlimit[RLIMIT_DATA].rlim_cur))
320 error = ENOMEM;
321
322 if (!error)
323 return (0);
324 }
325
326 /*
327 * free any vmspace-creation commands,
328 * and release their references
329 */
330 kill_vmcmds(&epp->ep_vmcmds);
331
332 bad2:
333 /*
334 * close and release the vnode, restore the old one, free the
335 * pathname buf, and punt.
336 */
337 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
338 VOP_CLOSE(vp, FREAD, p->p_ucred, p);
339 vput(vp);
340 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
341 return error;
342
343 bad1:
344 /*
345 * free the namei pathname buffer, and put the vnode
346 * (which we don't yet have open).
347 */
348 vput(vp); /* was still locked */
349 PNBUF_PUT(ndp->ni_cnd.cn_pnbuf);
350 return error;
351 }
352
353 #ifdef __MACHINE_STACK_GROWS_UP
354 #define STACK_PTHREADSPACE NBPG
355 #else
356 #define STACK_PTHREADSPACE 0
357 #endif
358
359 /*
360 * exec system call
361 */
362 /* ARGSUSED */
363 int
364 sys_execve(struct lwp *l, void *v, register_t *retval)
365 {
366 struct sys_execve_args /* {
367 syscallarg(const char *) path;
368 syscallarg(char * const *) argp;
369 syscallarg(char * const *) envp;
370 } */ *uap = v;
371 int error;
372 u_int i;
373 struct exec_package pack;
374 struct nameidata nid;
375 struct vattr attr;
376 struct proc *p;
377 struct ucred *cred;
378 char *argp;
379 char * const *cpp;
380 char *dp, *sp;
381 long argc, envc;
382 size_t len;
383 char *stack;
384 struct ps_strings arginfo;
385 struct vmspace *vm;
386 char **tmpfap;
387 int szsigcode;
388 struct exec_vmcmd *base_vcp;
389 int oldlwpflags;
390
391 /* Disable scheduler activation upcalls. */
392 oldlwpflags = l->l_flag & (L_SA | L_SA_UPCALL);
393 if (l->l_flag & L_SA)
394 l->l_flag &= ~(L_SA | L_SA_UPCALL);
395
396 p = l->l_proc;
397 /*
398 * Lock the process and set the P_INEXEC flag to indicate that
399 * it should be left alone until we're done here. This is
400 * necessary to avoid race conditions - e.g. in ptrace() -
401 * that might allow a local user to illicitly obtain elevated
402 * privileges.
403 */
404 p->p_flag |= P_INEXEC;
405
406 cred = p->p_ucred;
407 base_vcp = NULL;
408 /*
409 * Init the namei data to point the file user's program name.
410 * This is done here rather than in check_exec(), so that it's
411 * possible to override this settings if any of makecmd/probe
412 * functions call check_exec() recursively - for example,
413 * see exec_script_makecmds().
414 */
415 NDINIT(&nid, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
416
417 /*
418 * initialize the fields of the exec package.
419 */
420 pack.ep_name = SCARG(uap, path);
421 pack.ep_hdr = malloc(exec_maxhdrsz, M_EXEC, M_WAITOK);
422 pack.ep_hdrlen = exec_maxhdrsz;
423 pack.ep_hdrvalid = 0;
424 pack.ep_ndp = &nid;
425 pack.ep_emul_arg = NULL;
426 pack.ep_vmcmds.evs_cnt = 0;
427 pack.ep_vmcmds.evs_used = 0;
428 pack.ep_vap = &attr;
429 pack.ep_flags = 0;
430
431 #ifdef LKM
432 lockmgr(&exec_lock, LK_SHARED, NULL);
433 #endif
434
435 /* see if we can run it. */
436 #ifdef VERIFIED_EXEC
437 if ((error = check_exec(p, &pack, 1)) != 0)
438 /* if ((error = check_exec(p, &pack, 0)) != 0) */
439 #else
440 if ((error = check_exec(p, &pack)) != 0)
441 #endif
442 goto freehdr;
443
444 /* XXX -- THE FOLLOWING SECTION NEEDS MAJOR CLEANUP */
445
446 /* allocate an argument buffer */
447 argp = (char *) uvm_km_valloc_wait(exec_map, NCARGS);
448 #ifdef DIAGNOSTIC
449 if (argp == (vaddr_t) 0)
450 panic("execve: argp == NULL");
451 #endif
452 dp = argp;
453 argc = 0;
454
455 /* copy the fake args list, if there's one, freeing it as we go */
456 if (pack.ep_flags & EXEC_HASARGL) {
457 tmpfap = pack.ep_fa;
458 while (*tmpfap != NULL) {
459 char *cp;
460
461 cp = *tmpfap;
462 while (*cp)
463 *dp++ = *cp++;
464 dp++;
465
466 FREE(*tmpfap, M_EXEC);
467 tmpfap++; argc++;
468 }
469 FREE(pack.ep_fa, M_EXEC);
470 pack.ep_flags &= ~EXEC_HASARGL;
471 }
472
473 /* Now get argv & environment */
474 if (!(cpp = SCARG(uap, argp))) {
475 error = EINVAL;
476 goto bad;
477 }
478
479 if (pack.ep_flags & EXEC_SKIPARG)
480 cpp++;
481
482 while (1) {
483 len = argp + ARG_MAX - dp;
484 if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
485 goto bad;
486 if (!sp)
487 break;
488 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
489 if (error == ENAMETOOLONG)
490 error = E2BIG;
491 goto bad;
492 }
493 #ifdef KTRACE
494 if (KTRPOINT(p, KTR_EXEC_ARG))
495 ktrkmem(p, KTR_EXEC_ARG, dp, len - 1);
496 #endif
497 dp += len;
498 cpp++;
499 argc++;
500 }
501
502 envc = 0;
503 /* environment need not be there */
504 if ((cpp = SCARG(uap, envp)) != NULL ) {
505 while (1) {
506 len = argp + ARG_MAX - dp;
507 if ((error = copyin(cpp, &sp, sizeof(sp))) != 0)
508 goto bad;
509 if (!sp)
510 break;
511 if ((error = copyinstr(sp, dp, len, &len)) != 0) {
512 if (error == ENAMETOOLONG)
513 error = E2BIG;
514 goto bad;
515 }
516 #ifdef KTRACE
517 if (KTRPOINT(p, KTR_EXEC_ENV))
518 ktrkmem(p, KTR_EXEC_ENV, dp, len - 1);
519 #endif
520 dp += len;
521 cpp++;
522 envc++;
523 }
524 }
525
526 dp = (char *) ALIGN(dp);
527
528 szsigcode = pack.ep_es->es_emul->e_esigcode -
529 pack.ep_es->es_emul->e_sigcode;
530
531 /* Now check if args & environ fit into new stack */
532 if (pack.ep_flags & EXEC_32)
533 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
534 sizeof(int) + sizeof(int) + dp + STACKGAPLEN +
535 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
536 - argp;
537 else
538 len = ((argc + envc + 2 + pack.ep_es->es_arglen) *
539 sizeof(char *) + sizeof(int) + dp + STACKGAPLEN +
540 szsigcode + sizeof(struct ps_strings) + STACK_PTHREADSPACE)
541 - argp;
542
543 len = ALIGN(len); /* make the stack "safely" aligned */
544
545 if (len > pack.ep_ssize) { /* in effect, compare to initial limit */
546 error = ENOMEM;
547 goto bad;
548 }
549
550 /* Get rid of other LWPs/ */
551 p->p_flag |= P_WEXIT; /* XXX hack. lwp-exit stuff wants to see it. */
552 exit_lwps(l);
553 p->p_flag &= ~P_WEXIT;
554 KDASSERT(p->p_nlwps == 1);
555
556 /* This is now LWP 1 */
557 l->l_lid = 1;
558 p->p_nlwpid = 1;
559
560 /* Release any SA state. */
561 if (p->p_sa)
562 sa_release(p);
563
564 /* Remove POSIX timers */
565 timers_free(p, TIMERS_POSIX);
566
567 /* adjust "active stack depth" for process VSZ */
568 pack.ep_ssize = len; /* maybe should go elsewhere, but... */
569
570 /*
571 * Do whatever is necessary to prepare the address space
572 * for remapping. Note that this might replace the current
573 * vmspace with another!
574 */
575 uvmspace_exec(l, pack.ep_vm_minaddr, pack.ep_vm_maxaddr);
576
577 /* record proc's vnode, for use by procfs and others */
578 if (p->p_textvp)
579 vrele(p->p_textvp);
580 VREF(pack.ep_vp);
581 p->p_textvp = pack.ep_vp;
582
583 /* Now map address space */
584 vm = p->p_vmspace;
585 vm->vm_taddr = (caddr_t) pack.ep_taddr;
586 vm->vm_tsize = btoc(pack.ep_tsize);
587 vm->vm_daddr = (caddr_t) pack.ep_daddr;
588 vm->vm_dsize = btoc(pack.ep_dsize);
589 vm->vm_ssize = btoc(pack.ep_ssize);
590 vm->vm_maxsaddr = (caddr_t) pack.ep_maxsaddr;
591 vm->vm_minsaddr = (caddr_t) pack.ep_minsaddr;
592
593 /* create the new process's VM space by running the vmcmds */
594 #ifdef DIAGNOSTIC
595 if (pack.ep_vmcmds.evs_used == 0)
596 panic("execve: no vmcmds");
597 #endif
598 for (i = 0; i < pack.ep_vmcmds.evs_used && !error; i++) {
599 struct exec_vmcmd *vcp;
600
601 vcp = &pack.ep_vmcmds.evs_cmds[i];
602 if (vcp->ev_flags & VMCMD_RELATIVE) {
603 #ifdef DIAGNOSTIC
604 if (base_vcp == NULL)
605 panic("execve: relative vmcmd with no base");
606 if (vcp->ev_flags & VMCMD_BASE)
607 panic("execve: illegal base & relative vmcmd");
608 #endif
609 vcp->ev_addr += base_vcp->ev_addr;
610 }
611 error = (*vcp->ev_proc)(p, vcp);
612 #ifdef DEBUG_EXEC
613 if (error) {
614 int j;
615 struct exec_vmcmd *vp = &pack.ep_vmcmds.evs_cmds[0];
616 for (j = 0; j <= i; j++)
617 uprintf(
618 "vmcmd[%d] = %#lx/%#lx fd@%#lx prot=0%o flags=%d\n",
619 j, vp[j].ev_addr, vp[j].ev_len,
620 vp[j].ev_offset, vp[j].ev_prot,
621 vp[j].ev_flags);
622 }
623 #endif /* DEBUG_EXEC */
624 if (vcp->ev_flags & VMCMD_BASE)
625 base_vcp = vcp;
626 }
627
628 /* free the vmspace-creation commands, and release their references */
629 kill_vmcmds(&pack.ep_vmcmds);
630
631 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
632 VOP_CLOSE(pack.ep_vp, FREAD, cred, p);
633 vput(pack.ep_vp);
634
635 /* if an error happened, deallocate and punt */
636 if (error) {
637 DPRINTF(("execve: vmcmd %i failed: %d\n", i - 1, error));
638 goto exec_abort;
639 }
640
641 /* remember information about the process */
642 arginfo.ps_nargvstr = argc;
643 arginfo.ps_nenvstr = envc;
644
645 stack = (char *)STACK_ALLOC(STACK_GROW(vm->vm_minsaddr,
646 STACK_PTHREADSPACE + sizeof(struct ps_strings) + szsigcode),
647 len - (sizeof(struct ps_strings) + szsigcode));
648 #ifdef __MACHINE_STACK_GROWS_UP
649 /*
650 * The copyargs call always copies into lower addresses
651 * first, moving towards higher addresses, starting with
652 * the stack pointer that we give. When the stack grows
653 * down, this puts argc/argv/envp very shallow on the
654 * stack, right at the first user stack pointer, and puts
655 * STACKGAPLEN very deep in the stack. When the stack
656 * grows up, the situation is reversed.
657 *
658 * Normally, this is no big deal. But the ld_elf.so _rtld()
659 * function expects to be called with a single pointer to
660 * a region that has a few words it can stash values into,
661 * followed by argc/argv/envp. When the stack grows down,
662 * it's easy to decrement the stack pointer a little bit to
663 * allocate the space for these few words and pass the new
664 * stack pointer to _rtld. When the stack grows up, however,
665 * a few words before argc is part of the signal trampoline, XXX
666 * so we have a problem.
667 *
668 * Instead of changing how _rtld works, we take the easy way
669 * out and steal 32 bytes before we call copyargs. This
670 * space is effectively stolen from STACKGAPLEN.
671 */
672 stack += 32;
673 #endif /* __MACHINE_STACK_GROWS_UP */
674
675 /* Now copy argc, args & environ to new stack */
676 error = (*pack.ep_es->es_copyargs)(p, &pack, &arginfo, &stack, argp);
677 if (error) {
678 DPRINTF(("execve: copyargs failed %d\n", error));
679 goto exec_abort;
680 }
681 /* Move the stack back to original point */
682 stack = (char *)STACK_GROW(vm->vm_minsaddr, len);
683
684 /* fill process ps_strings info */
685 p->p_psstr = (struct ps_strings *)
686 STACK_ALLOC(STACK_GROW(vm->vm_minsaddr, STACK_PTHREADSPACE),
687 sizeof(struct ps_strings));
688 p->p_psargv = offsetof(struct ps_strings, ps_argvstr);
689 p->p_psnargv = offsetof(struct ps_strings, ps_nargvstr);
690 p->p_psenv = offsetof(struct ps_strings, ps_envstr);
691 p->p_psnenv = offsetof(struct ps_strings, ps_nenvstr);
692
693 /* copy out the process's ps_strings structure */
694 if ((error = copyout(&arginfo, (char *)p->p_psstr,
695 sizeof(arginfo))) != 0) {
696 DPRINTF(("execve: ps_strings copyout %p->%p size %ld failed\n",
697 &arginfo, (char *)p->p_psstr, (long)sizeof(arginfo)));
698 goto exec_abort;
699 }
700
701 stopprofclock(p); /* stop profiling */
702 fdcloseexec(p); /* handle close on exec */
703 execsigs(p); /* reset catched signals */
704
705 l->l_ctxlink = NULL; /* reset ucontext link */
706
707 /* set command name & other accounting info */
708 len = min(nid.ni_cnd.cn_namelen, MAXCOMLEN);
709 memcpy(p->p_comm, nid.ni_cnd.cn_nameptr, len);
710 p->p_comm[len] = 0;
711 p->p_acflag &= ~AFORK;
712
713 p->p_flag |= P_EXEC;
714 if (p->p_flag & P_PPWAIT) {
715 p->p_flag &= ~P_PPWAIT;
716 wakeup((caddr_t) p->p_pptr);
717 }
718
719 /*
720 * deal with set[ug]id.
721 * MNT_NOSUID has already been used to disable s[ug]id.
722 */
723 if ((p->p_flag & P_TRACED) == 0 &&
724
725 (((attr.va_mode & S_ISUID) != 0 &&
726 p->p_ucred->cr_uid != attr.va_uid) ||
727
728 ((attr.va_mode & S_ISGID) != 0 &&
729 p->p_ucred->cr_gid != attr.va_gid))) {
730 /*
731 * Mark the process as SUGID before we do
732 * anything that might block.
733 */
734 p_sugid(p);
735
736 /* Make sure file descriptors 0..2 are in use. */
737 if ((error = fdcheckstd(p)) != 0)
738 goto exec_abort;
739
740 p->p_ucred = crcopy(cred);
741 #ifdef KTRACE
742 /*
743 * If process is being ktraced, turn off - unless
744 * root set it.
745 */
746 if (p->p_tracep && !(p->p_traceflag & KTRFAC_ROOT))
747 ktrderef(p);
748 #endif
749 if (attr.va_mode & S_ISUID)
750 p->p_ucred->cr_uid = attr.va_uid;
751 if (attr.va_mode & S_ISGID)
752 p->p_ucred->cr_gid = attr.va_gid;
753 } else
754 p->p_flag &= ~P_SUGID;
755 p->p_cred->p_svuid = p->p_ucred->cr_uid;
756 p->p_cred->p_svgid = p->p_ucred->cr_gid;
757
758 #if defined(__HAVE_RAS)
759 /*
760 * Remove all RASs from the address space.
761 */
762 ras_purgeall(p);
763 #endif
764
765 doexechooks(p);
766
767 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
768
769 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
770
771 /* notify others that we exec'd */
772 KNOTE(&p->p_klist, NOTE_EXEC);
773
774 /* setup new registers and do misc. setup. */
775 (*pack.ep_es->es_emul->e_setregs)(l, &pack, (u_long) stack);
776 if (pack.ep_es->es_setregs)
777 (*pack.ep_es->es_setregs)(l, &pack, (u_long) stack);
778
779 /* map the process's signal trampoline code */
780 if (exec_sigcode_map(p, pack.ep_es->es_emul))
781 goto exec_abort;
782
783 if (p->p_flag & P_TRACED)
784 psignal(p, SIGTRAP);
785
786 free(pack.ep_hdr, M_EXEC);
787
788 /*
789 * Call emulation specific exec hook. This can setup per-process
790 * p->p_emuldata or do any other per-process stuff an emulation needs.
791 *
792 * If we are executing process of different emulation than the
793 * original forked process, call e_proc_exit() of the old emulation
794 * first, then e_proc_exec() of new emulation. If the emulation is
795 * same, the exec hook code should deallocate any old emulation
796 * resources held previously by this process.
797 */
798 if (p->p_emul && p->p_emul->e_proc_exit
799 && p->p_emul != pack.ep_es->es_emul)
800 (*p->p_emul->e_proc_exit)(p);
801
802 /*
803 * Call exec hook. Emulation code may NOT store reference to anything
804 * from &pack.
805 */
806 if (pack.ep_es->es_emul->e_proc_exec)
807 (*pack.ep_es->es_emul->e_proc_exec)(p, &pack);
808
809 /* update p_emul, the old value is no longer needed */
810 p->p_emul = pack.ep_es->es_emul;
811
812 /* ...and the same for p_execsw */
813 p->p_execsw = pack.ep_es;
814
815 #ifdef __HAVE_SYSCALL_INTERN
816 (*p->p_emul->e_syscall_intern)(p);
817 #endif
818 #ifdef KTRACE
819 if (KTRPOINT(p, KTR_EMUL))
820 ktremul(p);
821 #endif
822
823 #ifdef LKM
824 lockmgr(&exec_lock, LK_RELEASE, NULL);
825 #endif
826 p->p_flag &= ~P_INEXEC;
827
828 if (p->p_flag & P_STOPEXEC) {
829 int s;
830
831 sigminusset(&contsigmask, &p->p_sigctx.ps_siglist);
832 SCHED_LOCK(s);
833 p->p_pptr->p_nstopchild++;
834 p->p_stat = SSTOP;
835 l->l_stat = LSSTOP;
836 p->p_nrlwps--;
837 mi_switch(l, NULL);
838 SCHED_ASSERT_UNLOCKED();
839 splx(s);
840 }
841
842 return (EJUSTRETURN);
843
844 bad:
845 p->p_flag &= ~P_INEXEC;
846 /* free the vmspace-creation commands, and release their references */
847 kill_vmcmds(&pack.ep_vmcmds);
848 /* kill any opened file descriptor, if necessary */
849 if (pack.ep_flags & EXEC_HASFD) {
850 pack.ep_flags &= ~EXEC_HASFD;
851 (void) fdrelease(p, pack.ep_fd);
852 }
853 /* close and put the exec'd file */
854 vn_lock(pack.ep_vp, LK_EXCLUSIVE | LK_RETRY);
855 VOP_CLOSE(pack.ep_vp, FREAD, cred, p);
856 vput(pack.ep_vp);
857 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
858 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
859
860 freehdr:
861 l->l_flag |= oldlwpflags;
862 p->p_flag &= ~P_INEXEC;
863 #ifdef LKM
864 lockmgr(&exec_lock, LK_RELEASE, NULL);
865 #endif
866
867 free(pack.ep_hdr, M_EXEC);
868 return error;
869
870 exec_abort:
871 p->p_flag &= ~P_INEXEC;
872 #ifdef LKM
873 lockmgr(&exec_lock, LK_RELEASE, NULL);
874 #endif
875
876 /*
877 * the old process doesn't exist anymore. exit gracefully.
878 * get rid of the (new) address space we have created, if any, get rid
879 * of our namei data and vnode, and exit noting failure
880 */
881 uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
882 VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
883 if (pack.ep_emul_arg)
884 FREE(pack.ep_emul_arg, M_TEMP);
885 PNBUF_PUT(nid.ni_cnd.cn_pnbuf);
886 uvm_km_free_wakeup(exec_map, (vaddr_t) argp, NCARGS);
887 free(pack.ep_hdr, M_EXEC);
888 exit1(l, W_EXITCODE(error, SIGABRT));
889
890 /* NOTREACHED */
891 return 0;
892 }
893
894
895 int
896 copyargs(struct proc *p, struct exec_package *pack, struct ps_strings *arginfo,
897 char **stackp, void *argp)
898 {
899 char **cpp, *dp, *sp;
900 size_t len;
901 void *nullp;
902 long argc, envc;
903 int error;
904
905 cpp = (char **)*stackp;
906 nullp = NULL;
907 argc = arginfo->ps_nargvstr;
908 envc = arginfo->ps_nenvstr;
909 if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0)
910 return error;
911
912 dp = (char *) (cpp + argc + envc + 2 + pack->ep_es->es_arglen);
913 sp = argp;
914
915 /* XXX don't copy them out, remap them! */
916 arginfo->ps_argvstr = cpp; /* remember location of argv for later */
917
918 for (; --argc >= 0; sp += len, dp += len)
919 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
920 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
921 return error;
922
923 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
924 return error;
925
926 arginfo->ps_envstr = cpp; /* remember location of envp for later */
927
928 for (; --envc >= 0; sp += len, dp += len)
929 if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0 ||
930 (error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0)
931 return error;
932
933 if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0)
934 return error;
935
936 *stackp = (char *)cpp;
937 return 0;
938 }
939
940 #ifdef LKM
941 /*
942 * Find an emulation of given name in list of emulations.
943 * Needs to be called with the exec_lock held.
944 */
945 const struct emul *
946 emul_search(const char *name)
947 {
948 struct emul_entry *it;
949
950 LIST_FOREACH(it, &el_head, el_list) {
951 if (strcmp(name, it->el_emul->e_name) == 0)
952 return it->el_emul;
953 }
954
955 return NULL;
956 }
957
958 /*
959 * Add an emulation to list, if it's not there already.
960 */
961 int
962 emul_register(const struct emul *emul, int ro_entry)
963 {
964 struct emul_entry *ee;
965 int error;
966
967 error = 0;
968 lockmgr(&exec_lock, LK_SHARED, NULL);
969
970 if (emul_search(emul->e_name)) {
971 error = EEXIST;
972 goto out;
973 }
974
975 MALLOC(ee, struct emul_entry *, sizeof(struct emul_entry),
976 M_EXEC, M_WAITOK);
977 ee->el_emul = emul;
978 ee->ro_entry = ro_entry;
979 LIST_INSERT_HEAD(&el_head, ee, el_list);
980
981 out:
982 lockmgr(&exec_lock, LK_RELEASE, NULL);
983 return error;
984 }
985
986 /*
987 * Remove emulation with name 'name' from list of supported emulations.
988 */
989 int
990 emul_unregister(const char *name)
991 {
992 const struct proclist_desc *pd;
993 struct emul_entry *it;
994 int i, error;
995 struct proc *ptmp;
996
997 error = 0;
998 lockmgr(&exec_lock, LK_SHARED, NULL);
999
1000 LIST_FOREACH(it, &el_head, el_list) {
1001 if (strcmp(it->el_emul->e_name, name) == 0)
1002 break;
1003 }
1004
1005 if (!it) {
1006 error = ENOENT;
1007 goto out;
1008 }
1009
1010 if (it->ro_entry) {
1011 error = EBUSY;
1012 goto out;
1013 }
1014
1015 /* test if any execw[] entry is still using this */
1016 for(i=0; i < nexecs; i++) {
1017 if (execsw[i]->es_emul == it->el_emul) {
1018 error = EBUSY;
1019 goto out;
1020 }
1021 }
1022
1023 /*
1024 * Test if any process is running under this emulation - since
1025 * emul_unregister() is running quite sendomly, it's better
1026 * to do expensive check here than to use any locking.
1027 */
1028 proclist_lock_read();
1029 for (pd = proclists; pd->pd_list != NULL && !error; pd++) {
1030 PROCLIST_FOREACH(ptmp, pd->pd_list) {
1031 if (ptmp->p_emul == it->el_emul) {
1032 error = EBUSY;
1033 break;
1034 }
1035 }
1036 }
1037 proclist_unlock_read();
1038
1039 if (error)
1040 goto out;
1041
1042
1043 /* entry is not used, remove it */
1044 LIST_REMOVE(it, el_list);
1045 FREE(it, M_EXEC);
1046
1047 out:
1048 lockmgr(&exec_lock, LK_RELEASE, NULL);
1049 return error;
1050 }
1051
1052 /*
1053 * Add execsw[] entry.
1054 */
1055 int
1056 exec_add(struct execsw *esp, const char *e_name)
1057 {
1058 struct exec_entry *it;
1059 int error;
1060
1061 error = 0;
1062 lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1063
1064 if (!esp->es_emul) {
1065 esp->es_emul = emul_search(e_name);
1066 if (!esp->es_emul) {
1067 error = ENOENT;
1068 goto out;
1069 }
1070 }
1071
1072 LIST_FOREACH(it, &ex_head, ex_list) {
1073 /* assume tuple (makecmds, probe_func, emulation) is unique */
1074 if (it->es->es_makecmds == esp->es_makecmds
1075 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1076 && it->es->es_emul == esp->es_emul) {
1077 error = EEXIST;
1078 goto out;
1079 }
1080 }
1081
1082 /* if we got here, the entry doesn't exist yet */
1083 MALLOC(it, struct exec_entry *, sizeof(struct exec_entry),
1084 M_EXEC, M_WAITOK);
1085 it->es = esp;
1086 LIST_INSERT_HEAD(&ex_head, it, ex_list);
1087
1088 /* update execsw[] */
1089 exec_init(0);
1090
1091 out:
1092 lockmgr(&exec_lock, LK_RELEASE, NULL);
1093 return error;
1094 }
1095
1096 /*
1097 * Remove execsw[] entry.
1098 */
1099 int
1100 exec_remove(const struct execsw *esp)
1101 {
1102 struct exec_entry *it;
1103 int error;
1104
1105 error = 0;
1106 lockmgr(&exec_lock, LK_EXCLUSIVE, NULL);
1107
1108 LIST_FOREACH(it, &ex_head, ex_list) {
1109 /* assume tuple (makecmds, probe_func, emulation) is unique */
1110 if (it->es->es_makecmds == esp->es_makecmds
1111 && it->es->u.elf_probe_func == esp->u.elf_probe_func
1112 && it->es->es_emul == esp->es_emul)
1113 break;
1114 }
1115 if (!it) {
1116 error = ENOENT;
1117 goto out;
1118 }
1119
1120 /* remove item from list and free resources */
1121 LIST_REMOVE(it, ex_list);
1122 FREE(it, M_EXEC);
1123
1124 /* update execsw[] */
1125 exec_init(0);
1126
1127 out:
1128 lockmgr(&exec_lock, LK_RELEASE, NULL);
1129 return error;
1130 }
1131
1132 static void
1133 link_es(struct execsw_entry **listp, const struct execsw *esp)
1134 {
1135 struct execsw_entry *et, *e1;
1136
1137 MALLOC(et, struct execsw_entry *, sizeof(struct execsw_entry),
1138 M_TEMP, M_WAITOK);
1139 et->next = NULL;
1140 et->es = esp;
1141 if (*listp == NULL) {
1142 *listp = et;
1143 return;
1144 }
1145
1146 switch(et->es->es_prio) {
1147 case EXECSW_PRIO_FIRST:
1148 /* put new entry as the first */
1149 et->next = *listp;
1150 *listp = et;
1151 break;
1152 case EXECSW_PRIO_ANY:
1153 /* put new entry after all *_FIRST and *_ANY entries */
1154 for(e1 = *listp; e1->next
1155 && e1->next->es->es_prio != EXECSW_PRIO_LAST;
1156 e1 = e1->next);
1157 et->next = e1->next;
1158 e1->next = et;
1159 break;
1160 case EXECSW_PRIO_LAST:
1161 /* put new entry as the last one */
1162 for(e1 = *listp; e1->next; e1 = e1->next);
1163 e1->next = et;
1164 break;
1165 default:
1166 #ifdef DIAGNOSTIC
1167 panic("execw[] entry with unknown priority %d found",
1168 et->es->es_prio);
1169 #endif
1170 break;
1171 }
1172 }
1173
1174 /*
1175 * Initialize exec structures. If init_boot is true, also does necessary
1176 * one-time initialization (it's called from main() that way).
1177 * Once system is multiuser, this should be called with exec_lock held,
1178 * i.e. via exec_{add|remove}().
1179 */
1180 int
1181 exec_init(int init_boot)
1182 {
1183 const struct execsw **new_es, * const *old_es;
1184 struct execsw_entry *list, *e1;
1185 struct exec_entry *e2;
1186 int i, es_sz;
1187
1188 if (init_boot) {
1189 /* do one-time initializations */
1190 lockinit(&exec_lock, PWAIT, "execlck", 0, 0);
1191
1192 /* register compiled-in emulations */
1193 for(i=0; i < nexecs_builtin; i++) {
1194 if (execsw_builtin[i].es_emul)
1195 emul_register(execsw_builtin[i].es_emul, 1);
1196 }
1197 #ifdef DIAGNOSTIC
1198 if (i == 0)
1199 panic("no emulations found in execsw_builtin[]");
1200 #endif
1201 }
1202
1203 /*
1204 * Build execsw[] array from builtin entries and entries added
1205 * at runtime.
1206 */
1207 list = NULL;
1208 for(i=0; i < nexecs_builtin; i++)
1209 link_es(&list, &execsw_builtin[i]);
1210
1211 /* Add dynamically loaded entries */
1212 es_sz = nexecs_builtin;
1213 LIST_FOREACH(e2, &ex_head, ex_list) {
1214 link_es(&list, e2->es);
1215 es_sz++;
1216 }
1217
1218 /*
1219 * Now that we have sorted all execw entries, create new execsw[]
1220 * and free no longer needed memory in the process.
1221 */
1222 new_es = malloc(es_sz * sizeof(struct execsw *), M_EXEC, M_WAITOK);
1223 for(i=0; list; i++) {
1224 new_es[i] = list->es;
1225 e1 = list->next;
1226 FREE(list, M_TEMP);
1227 list = e1;
1228 }
1229
1230 /*
1231 * New execsw[] array built, now replace old execsw[] and free
1232 * used memory.
1233 */
1234 old_es = execsw;
1235 execsw = new_es;
1236 nexecs = es_sz;
1237 if (old_es)
1238 free((void *)old_es, M_EXEC);
1239
1240 /*
1241 * Figure out the maximum size of an exec header.
1242 */
1243 exec_maxhdrsz = 0;
1244 for (i = 0; i < nexecs; i++) {
1245 if (execsw[i]->es_hdrsz > exec_maxhdrsz)
1246 exec_maxhdrsz = execsw[i]->es_hdrsz;
1247 }
1248
1249 return 0;
1250 }
1251 #endif
1252
1253 #ifndef LKM
1254 /*
1255 * Simplified exec_init() for kernels without LKMs. Only initialize
1256 * exec_maxhdrsz and execsw[].
1257 */
1258 int
1259 exec_init(int init_boot)
1260 {
1261 int i;
1262
1263 #ifdef DIAGNOSTIC
1264 if (!init_boot)
1265 panic("exec_init(): called with init_boot == 0");
1266 #endif
1267
1268 /* do one-time initializations */
1269 nexecs = nexecs_builtin;
1270 execsw = malloc(nexecs*sizeof(struct execsw *), M_EXEC, M_WAITOK);
1271
1272 /*
1273 * Fill in execsw[] and figure out the maximum size of an exec header.
1274 */
1275 exec_maxhdrsz = 0;
1276 for(i=0; i < nexecs; i++) {
1277 execsw[i] = &execsw_builtin[i];
1278 if (execsw_builtin[i].es_hdrsz > exec_maxhdrsz)
1279 exec_maxhdrsz = execsw_builtin[i].es_hdrsz;
1280 }
1281
1282 return 0;
1283
1284 }
1285 #endif /* !LKM */
1286
1287 static int
1288 exec_sigcode_map(struct proc *p, const struct emul *e)
1289 {
1290 vaddr_t va;
1291 vsize_t sz;
1292 int error;
1293 struct uvm_object *uobj;
1294
1295 sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
1296
1297 if (e->e_sigobject == NULL || sz == 0) {
1298 return 0;
1299 }
1300
1301 /*
1302 * If we don't have a sigobject for this emulation, create one.
1303 *
1304 * sigobject is an anonymous memory object (just like SYSV shared
1305 * memory) that we keep a permanent reference to and that we map
1306 * in all processes that need this sigcode. The creation is simple,
1307 * we create an object, add a permanent reference to it, map it in
1308 * kernel space, copy out the sigcode to it and unmap it.
1309 * We map it with PROT_READ|PROT_EXEC into the process just
1310 * the way sys_mmap() would map it.
1311 */
1312
1313 uobj = *e->e_sigobject;
1314 if (uobj == NULL) {
1315 uobj = uao_create(sz, 0);
1316 (*uobj->pgops->pgo_reference)(uobj);
1317 va = vm_map_min(kernel_map);
1318 if ((error = uvm_map(kernel_map, &va, round_page(sz),
1319 uobj, 0, 0,
1320 UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
1321 UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
1322 printf("kernel mapping failed %d\n", error);
1323 (*uobj->pgops->pgo_detach)(uobj);
1324 return (error);
1325 }
1326 memcpy((void *)va, e->e_sigcode, sz);
1327 #ifdef PMAP_NEED_PROCWR
1328 pmap_procwr(&proc0, va, sz);
1329 #endif
1330 uvm_unmap(kernel_map, va, va + round_page(sz));
1331 *e->e_sigobject = uobj;
1332 }
1333
1334 /* Just a hint to uvm_map where to put it. */
1335 va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
1336 round_page(sz));
1337
1338 #ifdef __alpha__
1339 /*
1340 * Tru64 puts /sbin/loader at the end of user virtual memory,
1341 * which causes the above calculation to put the sigcode at
1342 * an invalid address. Put it just below the text instead.
1343 */
1344 if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
1345 va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
1346 }
1347 #endif
1348
1349 (*uobj->pgops->pgo_reference)(uobj);
1350 error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
1351 uobj, 0, 0,
1352 UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
1353 UVM_ADV_RANDOM, 0));
1354 if (error) {
1355 (*uobj->pgops->pgo_detach)(uobj);
1356 return (error);
1357 }
1358 p->p_sigctx.ps_sigcode = (void *)va;
1359 return (0);
1360 }
1361