kern_exit.c revision 1.300 1 /* $NetBSD: kern_exit.c,v 1.300 2025/03/16 15:52:18 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center, and by Andrew Doran.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Copyright (c) 1982, 1986, 1989, 1991, 1993
36 * The Regents of the University of California. All rights reserved.
37 * (c) UNIX System Laboratories, Inc.
38 * All or some portions of this file are derived from material licensed
39 * to the University of California by American Telephone and Telegraph
40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
41 * the permission of UNIX System Laboratories, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. Neither the name of the University nor the names of its contributors
52 * may be used to endorse or promote products derived from this software
53 * without specific prior written permission.
54 *
55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65 * SUCH DAMAGE.
66 *
67 * @(#)kern_exit.c 8.10 (Berkeley) 2/23/95
68 */
69
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.300 2025/03/16 15:52:18 riastradh Exp $");
72
73 #include "opt_ktrace.h"
74 #include "opt_dtrace.h"
75 #include "opt_sysv.h"
76
77 #include <sys/param.h>
78 #include <sys/types.h>
79
80 #include <sys/acct.h>
81 #include <sys/atomic.h>
82 #include <sys/buf.h>
83 #include <sys/cpu.h>
84 #include <sys/file.h>
85 #include <sys/filedesc.h>
86 #include <sys/fstrans.h>
87 #include <sys/ioctl.h>
88 #include <sys/kauth.h>
89 #include <sys/kernel.h>
90 #include <sys/ktrace.h>
91 #include <sys/lock.h>
92 #include <sys/lockdebug.h>
93 #include <sys/lwpctl.h>
94 #include <sys/mount.h>
95 #include <sys/pool.h>
96 #include <sys/proc.h>
97 #include <sys/psref.h>
98 #include <sys/ptrace.h>
99 #include <sys/ras.h>
100 #include <sys/resource.h>
101 #include <sys/sched.h>
102 #include <sys/sdt.h>
103 #include <sys/signalvar.h>
104 #include <sys/sleepq.h>
105 #include <sys/syscallargs.h>
106 #include <sys/syslog.h>
107 #include <sys/systm.h>
108 #include <sys/time.h>
109 #include <sys/tty.h>
110 #include <sys/uidinfo.h>
111 #include <sys/vnode.h>
112 #include <sys/wait.h>
113
114 #include <uvm/uvm_extern.h>
115
116 #ifdef DEBUG_EXIT
117 int debug_exit = 0;
118 #define DPRINTF(x) if (debug_exit) printf x
119 #else
120 #define DPRINTF(x)
121 #endif
122
123 static int find_stopped_child(struct proc *, idtype_t, id_t, int,
124 struct proc **, struct wrusage *, siginfo_t *);
125 static void proc_free(struct proc *, struct wrusage *);
126
127 /*
128 * DTrace SDT provider definitions
129 */
130 SDT_PROVIDER_DECLARE(proc);
131 SDT_PROBE_DEFINE1(proc, kernel, , exit, "int");
132
133 /*
134 * Fill in the appropriate signal information, and signal the parent.
135 */
136 /* XXX noclone works around a gcc 4.5 bug on arm */
137 static void __noclone
138 exit_psignal(struct proc *p, struct proc *pp, ksiginfo_t *ksi)
139 {
140
141 KSI_INIT(ksi);
142 if ((ksi->ksi_signo = P_EXITSIG(p)) == SIGCHLD) {
143 if (p->p_xsig) {
144 if (p->p_sflag & PS_COREDUMP)
145 ksi->ksi_code = CLD_DUMPED;
146 else
147 ksi->ksi_code = CLD_KILLED;
148 ksi->ksi_status = p->p_xsig;
149 } else {
150 ksi->ksi_code = CLD_EXITED;
151 ksi->ksi_status = p->p_xexit;
152 }
153 } else {
154 ksi->ksi_code = SI_USER;
155 ksi->ksi_status = p->p_xsig;
156 }
157 /*
158 * We fill those in, even for non-SIGCHLD.
159 * It's safe to access p->p_cred unlocked here.
160 */
161 ksi->ksi_pid = p->p_pid;
162 ksi->ksi_uid = kauth_cred_geteuid(p->p_cred);
163 /* XXX: is this still valid? */
164 ksi->ksi_utime = p->p_stats->p_ru.ru_utime.tv_sec;
165 ksi->ksi_stime = p->p_stats->p_ru.ru_stime.tv_sec;
166 }
167
168 /*
169 * exit --
170 * Death of process.
171 */
172 int
173 sys_exit(struct lwp *l, const struct sys_exit_args *uap, register_t *retval)
174 {
175 /* {
176 syscallarg(int) rval;
177 } */
178 struct proc *p = l->l_proc;
179
180 /* Don't call exit1() multiple times in the same process. */
181 mutex_enter(p->p_lock);
182 if (p->p_sflag & PS_WEXIT) {
183 mutex_exit(p->p_lock);
184 lwp_exit(l);
185 }
186
187 /* exit1() will release the mutex. */
188 exit1(l, SCARG(uap, rval), 0);
189 /* NOTREACHED */
190 return (0);
191 }
192
193 /*
194 * Exit: deallocate address space and other resources, change proc state
195 * to zombie, and unlink proc from allproc and parent's lists. Save exit
196 * status and rusage for wait(). Check for child processes and orphan them.
197 *
198 * Must be called with p->p_lock held. Does not return.
199 */
200 void
201 exit1(struct lwp *l, int exitcode, int signo)
202 {
203 struct proc *p, *child, *next_child, *old_parent, *new_parent;
204 struct pgrp *pgrp;
205 ksiginfo_t ksi;
206 ksiginfoq_t kq;
207 int wakeinit;
208
209 p = l->l_proc;
210
211 /* Verify that we hold no locks other than p->p_lock. */
212 LOCKDEBUG_BARRIER(p->p_lock, 0);
213
214 /* XXX Temporary: something is leaking kernel_lock. */
215 KERNEL_UNLOCK_ALL(l, NULL);
216
217 KASSERT(mutex_owned(p->p_lock));
218 KASSERT(p->p_vmspace != NULL);
219
220 if (__predict_false(p == initproc)) {
221 panic("init died (signal %d, exit %d)", signo, exitcode);
222 }
223
224 p->p_sflag |= PS_WEXIT;
225
226 /*
227 * Force all other LWPs to exit before we do. Only then can we
228 * begin to tear down the rest of the process state.
229 */
230 if (p->p_nlwps > 1) {
231 exit_lwps(l);
232 }
233
234 ksiginfo_queue_init(&kq);
235
236 /*
237 * If we have been asked to stop on exit, do so now.
238 */
239 if (__predict_false(p->p_sflag & PS_STOPEXIT)) {
240 KASSERT(l->l_blcnt == 0);
241 sigclearall(p, &contsigmask, &kq);
242
243 if (!mutex_tryenter(&proc_lock)) {
244 mutex_exit(p->p_lock);
245 mutex_enter(&proc_lock);
246 mutex_enter(p->p_lock);
247 }
248 p->p_waited = 0;
249 p->p_pptr->p_nstopchild++;
250 p->p_stat = SSTOP;
251 mutex_exit(&proc_lock);
252 lwp_lock(l);
253 p->p_nrlwps--;
254 l->l_stat = LSSTOP;
255 lwp_unlock(l);
256 mutex_exit(p->p_lock);
257 lwp_lock(l);
258 spc_lock(l->l_cpu);
259 mi_switch(l);
260 mutex_enter(p->p_lock);
261 }
262
263 /*
264 * Bin any remaining signals and mark the process as dying so it will
265 * not be found for, e.g. signals.
266 */
267 sigfillset(&p->p_sigctx.ps_sigignore);
268 sigclearall(p, NULL, &kq);
269 p->p_stat = SDYING;
270
271 /*
272 * Perform any required thread cleanup. Do this early so
273 * anyone wanting to look us up by our global thread ID
274 * will fail to find us.
275 *
276 * N.B. this will unlock p->p_lock on our behalf.
277 */
278 lwp_thread_cleanup(l);
279
280 ksiginfo_queue_drain(&kq);
281
282 /* Destroy any lwpctl info. */
283 if (p->p_lwpctl != NULL)
284 lwp_ctl_exit();
285
286 /*
287 * Drain all remaining references that procfs, ptrace and others may
288 * have on the process.
289 */
290 rw_enter(&p->p_reflock, RW_WRITER);
291
292 DPRINTF(("%s: %d.%d exiting.\n", __func__, p->p_pid, l->l_lid));
293
294 ptimers_free(p, TIMERS_ALL);
295 #if defined(__HAVE_RAS)
296 ras_purgeall();
297 #endif
298
299 /*
300 * Close open files, release open-file table and free signal
301 * actions. This may block!
302 */
303 fd_free();
304 cwdfree(p->p_cwdi);
305 p->p_cwdi = NULL;
306 doexithooks(p);
307 sigactsfree(p->p_sigacts);
308
309 /*
310 * Write out accounting data.
311 */
312 (void)acct_process(l);
313
314 #ifdef KTRACE
315 /*
316 * Release trace file.
317 */
318 if (p->p_tracep != NULL) {
319 mutex_enter(&ktrace_lock);
320 ktrderef(p);
321 mutex_exit(&ktrace_lock);
322 }
323 #endif
324
325 p->p_xexit = exitcode;
326 p->p_xsig = signo;
327
328 /*
329 * If emulation has process exit hook, call it now.
330 * Set the exit status now so that the exit hook has
331 * an opportunity to tweak it (COMPAT_LINUX requires
332 * this for thread group emulation)
333 */
334 if (p->p_emul->e_proc_exit)
335 (*p->p_emul->e_proc_exit)(p);
336
337 /*
338 * Free the VM resources we're still holding on to.
339 * We must do this from a valid thread because doing
340 * so may block. This frees vmspace, which we don't
341 * need anymore. The only remaining lwp is the one
342 * we run at this moment, nothing runs in userland
343 * anymore.
344 */
345 ruspace(p); /* Update our vm resource use */
346 uvm_proc_exit(p);
347
348 /*
349 * Stop profiling.
350 */
351 if (__predict_false((p->p_stflag & PST_PROFIL) != 0)) {
352 mutex_spin_enter(&p->p_stmutex);
353 stopprofclock(p);
354 mutex_spin_exit(&p->p_stmutex);
355 }
356
357 /*
358 * If parent is waiting for us to exit or exec, PL_PPWAIT is set; we
359 * wake up the parent early to avoid deadlock. We can do this once
360 * the VM resources are released.
361 */
362 mutex_enter(&proc_lock);
363 if (p->p_lflag & PL_PPWAIT) {
364 lwp_t *lp;
365
366 l->l_lwpctl = NULL; /* was on loan from blocked parent */
367 p->p_lflag &= ~PL_PPWAIT;
368
369 lp = p->p_vforklwp;
370 p->p_vforklwp = NULL;
371 lp->l_vforkwaiting = false;
372 cv_broadcast(&lp->l_waitcv);
373 }
374
375 if (SESS_LEADER(p)) {
376 struct vnode *vprele = NULL, *vprevoke = NULL;
377 struct session *sp = p->p_session;
378 struct tty *tp;
379
380 if (sp->s_ttyvp) {
381 /*
382 * Controlling process.
383 * Signal foreground pgrp,
384 * drain controlling terminal
385 * and revoke access to controlling terminal.
386 */
387 tp = sp->s_ttyp;
388 mutex_spin_enter(&tty_lock);
389 if (tp->t_session == sp) {
390 /* we can't guarantee the revoke will do this */
391 pgrp = tp->t_pgrp;
392 tp->t_pgrp = NULL;
393 tp->t_session = NULL;
394 mutex_spin_exit(&tty_lock);
395 if (pgrp != NULL) {
396 pgsignal(pgrp, SIGHUP, 1);
397 }
398 mutex_exit(&proc_lock);
399 (void) ttywait(tp);
400 mutex_enter(&proc_lock);
401
402 /* The tty could have been revoked. */
403 vprevoke = sp->s_ttyvp;
404 } else
405 mutex_spin_exit(&tty_lock);
406 vprele = sp->s_ttyvp;
407 sp->s_ttyvp = NULL;
408 /*
409 * s_ttyp is not zero'd; we use this to indicate
410 * that the session once had a controlling terminal.
411 * (for logging and informational purposes)
412 */
413 }
414 sp->s_leader = NULL;
415
416 if (vprevoke != NULL || vprele != NULL) {
417 if (vprevoke != NULL) {
418 /* Releases proc_lock. */
419 proc_sessrele(sp);
420 VOP_REVOKE(vprevoke, REVOKEALL);
421 } else
422 mutex_exit(&proc_lock);
423 if (vprele != NULL)
424 vrele(vprele);
425 mutex_enter(&proc_lock);
426 }
427 }
428 fixjobc(p, p->p_pgrp, 0);
429
430 /* Release fstrans private data. */
431 fstrans_lwp_dtor(l);
432
433 /*
434 * Finalize the last LWP's specificdata, as well as the
435 * specificdata for the proc itself.
436 */
437 lwp_finispecific(l);
438 proc_finispecific(p);
439
440 /*
441 * Reset p_opptr pointer of all former children which got
442 * traced by another process and were reparented. We reset
443 * it to NULL here; the trace detach code then reparents
444 * the child to initproc. We only check allproc list, since
445 * eventual former children on zombproc list won't reference
446 * p_opptr anymore.
447 */
448 if (__predict_false(p->p_slflag & PSL_CHTRACED)) {
449 struct proc *q;
450 PROCLIST_FOREACH(q, &allproc) {
451 if (q->p_opptr == p)
452 q->p_opptr = NULL;
453 }
454 PROCLIST_FOREACH(q, &zombproc) {
455 if (q->p_opptr == p)
456 q->p_opptr = NULL;
457 }
458 }
459
460 /*
461 * Give orphaned children to init(8).
462 */
463 child = LIST_FIRST(&p->p_children);
464 wakeinit = (child != NULL);
465 for (; child != NULL; child = next_child) {
466 next_child = LIST_NEXT(child, p_sibling);
467
468 /*
469 * Traced processes are killed since their existence
470 * means someone is screwing up. Since we reset the
471 * trace flags, the logic in sys_wait4() would not be
472 * triggered to reparent the process to its
473 * original parent, so we must do this here.
474 */
475 if (__predict_false(child->p_slflag & PSL_TRACED)) {
476 mutex_enter(p->p_lock);
477 child->p_slflag &=
478 ~(PSL_TRACED|PSL_SYSCALL);
479 mutex_exit(p->p_lock);
480 if (child->p_opptr != child->p_pptr) {
481 struct proc *t = child->p_opptr;
482 proc_reparent(child, t ? t : initproc);
483 child->p_opptr = NULL;
484 } else
485 proc_reparent(child, initproc);
486 killproc(child, "orphaned traced process");
487 } else
488 proc_reparent(child, initproc);
489 }
490
491 /*
492 * Move proc from allproc to zombproc, it's now nearly ready to be
493 * collected by parent.
494 */
495 LIST_REMOVE(l, l_list);
496 LIST_REMOVE(p, p_list);
497 LIST_INSERT_HEAD(&zombproc, p, p_list);
498
499 /*
500 * Mark the process as dead. We must do this before we signal
501 * the parent.
502 */
503 p->p_stat = SDEAD;
504
505 /*
506 * Let anyone watching this DTrace probe know what we're
507 * on our way out.
508 */
509 SDT_PROBE(proc, kernel, , exit,
510 ((p->p_sflag & PS_COREDUMP) ? CLD_DUMPED :
511 (p->p_xsig ? CLD_KILLED : CLD_EXITED)),
512 0,0,0,0);
513
514 /* Put in front of parent's sibling list for parent to collect it */
515 old_parent = p->p_pptr;
516 old_parent->p_nstopchild++;
517 if (LIST_FIRST(&old_parent->p_children) != p) {
518 /* Put child where it can be found quickly */
519 LIST_REMOVE(p, p_sibling);
520 LIST_INSERT_HEAD(&old_parent->p_children, p, p_sibling);
521 }
522
523 /*
524 * Notify parent that we're gone. If parent has the P_NOCLDWAIT
525 * flag set, notify init instead (and hope it will handle
526 * this situation).
527 */
528 if (old_parent->p_flag & (PK_NOCLDWAIT|PK_CLDSIGIGN)) {
529 proc_reparent(p, initproc);
530 wakeinit = 1;
531
532 /*
533 * If this was the last child of our parent, notify
534 * parent, so in case he was wait(2)ing, he will
535 * continue.
536 */
537 if (LIST_FIRST(&old_parent->p_children) == NULL)
538 cv_broadcast(&old_parent->p_waitcv);
539 }
540
541 /* Reload parent pointer, since p may have been reparented above */
542 new_parent = p->p_pptr;
543
544 if (__predict_false(p->p_exitsig != 0)) {
545 exit_psignal(p, new_parent, &ksi);
546 kpsignal(new_parent, &ksi, NULL);
547 }
548
549 /* Calculate the final rusage info. */
550 calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
551 NULL, NULL);
552
553 callout_destroy(&l->l_timeout_ch);
554
555 /*
556 * Release any PCU resources before becoming a zombie.
557 */
558 pcu_discard_all(l);
559
560 /*
561 * Notify other processes tracking us with a knote that
562 * we're exiting.
563 *
564 * N.B. we do this here because the process is now SDEAD,
565 * and thus cannot have any more knotes attached. Also,
566 * knote_proc_exit() expects that p->p_lock is already
567 * held (and will assert so).
568 */
569 mutex_enter(p->p_lock);
570 if (!SLIST_EMPTY(&p->p_klist)) {
571 knote_proc_exit(p);
572 }
573
574 /* Free the LWP ID */
575 proc_free_lwpid(p, l->l_lid);
576 lwp_drainrefs(l);
577 lwp_lock(l);
578 l->l_prflag &= ~LPR_DETACHED;
579 l->l_stat = LSZOMB;
580 lwp_unlock(l);
581 KASSERT(curlwp == l);
582 KASSERT(p->p_nrlwps == 1);
583 KASSERT(p->p_nlwps == 1);
584 p->p_stat = SZOMB;
585 p->p_nrlwps--;
586 p->p_nzlwps++;
587 p->p_ndlwps = 0;
588 mutex_exit(p->p_lock);
589
590 /*
591 * Signal the parent to collect us, and drop the proclist lock.
592 * Drop debugger/procfs lock; no new references can be gained.
593 */
594 rw_exit(&p->p_reflock);
595 cv_broadcast(&p->p_pptr->p_waitcv);
596 mutex_exit(&proc_lock);
597 if (wakeinit)
598 cv_broadcast(&initproc->p_waitcv);
599
600 /*
601 * NOTE: WE ARE NO LONGER ALLOWED TO SLEEP!
602 */
603
604 /*
605 * Give machine-dependent code a chance to free any MD LWP
606 * resources. This must be done before uvm_lwp_exit(), in
607 * case these resources are in the PCB.
608 */
609 cpu_lwp_free(l, 1);
610
611 /* Switch away into oblivion. */
612 lwp_lock(l);
613 spc_lock(l->l_cpu);
614 mi_switch(l);
615 panic("exit1");
616 }
617
618 void
619 exit_lwps(struct lwp *l)
620 {
621 proc_t *p = l->l_proc;
622 lwp_t *l2;
623
624 retry:
625 KASSERT(mutex_owned(p->p_lock));
626
627 /*
628 * Interrupt LWPs in interruptable sleep, unsuspend suspended
629 * LWPs and then wait for everyone else to finish.
630 */
631 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
632 if (l2 == l)
633 continue;
634 lwp_lock(l2);
635 l2->l_flag |= LW_WEXIT;
636 lwp_need_userret(l2);
637 if ((l2->l_stat == LSSLEEP && (l2->l_flag & LW_SINTR)) ||
638 l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) {
639 l2->l_flag &= ~LW_DBGSUSPEND;
640 /* setrunnable() will release the lock. */
641 setrunnable(l2);
642 continue;
643 }
644 lwp_unlock(l2);
645 }
646
647 /*
648 * Wait for every LWP to exit. Note: LWPs can get suspended/slept
649 * behind us or there may even be new LWPs created. Therefore, a
650 * full retry is required on error.
651 */
652 while (p->p_nlwps > 1) {
653 if (lwp_wait(l, 0, NULL, true)) {
654 goto retry;
655 }
656 }
657
658 KASSERT(p->p_nlwps == 1);
659 }
660
661 int
662 do_sys_waitid(idtype_t idtype, id_t id, int *pid, int *status, int options,
663 struct wrusage *wru, siginfo_t *si)
664 {
665 proc_t *child;
666 int error;
667
668
669 if (wru != NULL)
670 memset(wru, 0, sizeof(*wru));
671 if (si != NULL)
672 memset(si, 0, sizeof(*si));
673
674 mutex_enter(&proc_lock);
675 error = find_stopped_child(curproc, idtype, id, options, &child,
676 wru, si);
677 if (child == NULL) {
678 mutex_exit(&proc_lock);
679 *pid = 0;
680 *status = 0;
681 return error;
682 }
683 *pid = child->p_pid;
684
685 if (child->p_stat == SZOMB) {
686 /* Child is exiting */
687 *status = P_WAITSTATUS(child);
688 /* proc_free() will release the proc_lock. */
689 if (options & WNOWAIT) {
690 mutex_exit(&proc_lock);
691 } else {
692 proc_free(child, wru);
693 }
694 } else {
695 /* Don't mark SIGCONT if we are being stopped */
696 *status = (child->p_xsig == SIGCONT && child->p_stat != SSTOP) ?
697 W_CONTCODE() : W_STOPCODE(child->p_xsig);
698 mutex_exit(&proc_lock);
699 }
700 return 0;
701 }
702
703 int
704 do_sys_wait(int *pid, int *status, int options, struct rusage *ru)
705 {
706 idtype_t idtype;
707 id_t id;
708 int ret;
709 struct wrusage wru;
710
711 /*
712 * Translate the special pid values into the (idtype, pid)
713 * pair for wait6. The WAIT_MYPGRP case is handled by
714 * find_stopped_child() on its own.
715 */
716 if (*pid == WAIT_ANY) {
717 idtype = P_ALL;
718 id = 0;
719 } else if (*pid < 0) {
720 idtype = P_PGID;
721 id = (id_t)-*pid;
722 } else {
723 idtype = P_PID;
724 id = (id_t)*pid;
725 }
726 options |= WEXITED | WTRAPPED;
727 ret = do_sys_waitid(idtype, id, pid, status, options, ru ? &wru : NULL,
728 NULL);
729 if (ru)
730 *ru = wru.wru_self;
731 return ret;
732 }
733
734 int
735 sys___wait450(struct lwp *l, const struct sys___wait450_args *uap,
736 register_t *retval)
737 {
738 /* {
739 syscallarg(int) pid;
740 syscallarg(int *) status;
741 syscallarg(int) options;
742 syscallarg(struct rusage *) rusage;
743 } */
744 int error, status, pid = SCARG(uap, pid);
745 struct rusage ru;
746
747 error = do_sys_wait(&pid, &status, SCARG(uap, options),
748 SCARG(uap, rusage) != NULL ? &ru : NULL);
749
750 retval[0] = pid;
751 if (pid == 0) {
752 return error;
753 }
754 if (SCARG(uap, status)) {
755 error = copyout(&status, SCARG(uap, status), sizeof(status));
756 }
757 if (SCARG(uap, rusage) && error == 0) {
758 error = copyout(&ru, SCARG(uap, rusage), sizeof(ru));
759 }
760 return error;
761 }
762
763 int
764 sys_wait6(struct lwp *l, const struct sys_wait6_args *uap, register_t *retval)
765 {
766 /* {
767 syscallarg(idtype_t) idtype;
768 syscallarg(id_t) id;
769 syscallarg(int *) status;
770 syscallarg(int) options;
771 syscallarg(struct wrusage *) wru;
772 syscallarg(siginfo_t *) si;
773 } */
774 struct wrusage wru, *wrup;
775 siginfo_t si, *sip;
776 idtype_t idtype;
777 int pid;
778 id_t id;
779 int error, status;
780
781 idtype = SCARG(uap, idtype);
782 id = SCARG(uap, id);
783
784 if (SCARG(uap, wru) != NULL)
785 wrup = &wru;
786 else
787 wrup = NULL;
788
789 if (SCARG(uap, info) != NULL)
790 sip = &si;
791 else
792 sip = NULL;
793
794 /*
795 * We expect all callers of wait6() to know about WEXITED and
796 * WTRAPPED.
797 */
798 error = do_sys_waitid(idtype, id, &pid, &status, SCARG(uap, options),
799 wrup, sip);
800
801 retval[0] = pid; /* tell userland who it was */
802
803 #if 0
804 /*
805 * should we copyout if there was no process, hence no useful data?
806 * We don't for an old style wait4() (etc) but I believe
807 * FreeBSD does for wait6(), so a tossup... Go with FreeBSD for now.
808 */
809 if (pid == 0)
810 return error;
811 #endif
812
813 if (SCARG(uap, status) != NULL && error == 0)
814 error = copyout(&status, SCARG(uap, status), sizeof(status));
815 if (SCARG(uap, wru) != NULL && error == 0)
816 error = copyout(&wru, SCARG(uap, wru), sizeof(wru));
817 if (SCARG(uap, info) != NULL && error == 0)
818 error = copyout(&si, SCARG(uap, info), sizeof(si));
819 return error;
820 }
821
822
823 /*
824 * Find a process that matches the provided criteria, and fill siginfo
825 * and resources if found.
826 * Returns:
827 * -1: Not found, abort early
828 * 0: Not matched
829 * 1: Matched, there might be more matches
830 * 2: This is the only match
831 */
832 static int
833 match_process(const struct proc *pp, struct proc **q, idtype_t idtype, id_t id,
834 int options, struct wrusage *wrusage, siginfo_t *siginfo)
835 {
836 struct rusage *rup;
837 struct proc *p = *q;
838 int rv = 1;
839
840 switch (idtype) {
841 case P_ALL:
842 mutex_enter(p->p_lock);
843 break;
844 case P_PID:
845 if (p->p_pid != (pid_t)id) {
846 p = *q = proc_find_raw((pid_t)id);
847 if (p == NULL || p->p_stat == SIDL || p->p_pptr != pp) {
848 *q = NULL;
849 return -1;
850 }
851 }
852 mutex_enter(p->p_lock);
853 rv++;
854 break;
855 case P_PGID:
856 if (p->p_pgid != (pid_t)id)
857 return 0;
858 mutex_enter(p->p_lock);
859 break;
860 case P_SID:
861 if (p->p_session->s_sid != (pid_t)id)
862 return 0;
863 mutex_enter(p->p_lock);
864 break;
865 case P_UID:
866 mutex_enter(p->p_lock);
867 if (kauth_cred_geteuid(p->p_cred) != (uid_t)id) {
868 mutex_exit(p->p_lock);
869 return 0;
870 }
871 break;
872 case P_GID:
873 mutex_enter(p->p_lock);
874 if (kauth_cred_getegid(p->p_cred) != (gid_t)id) {
875 mutex_exit(p->p_lock);
876 return 0;
877 }
878 break;
879 case P_CID:
880 case P_PSETID:
881 case P_CPUID:
882 /* XXX: Implement me */
883 default:
884 return 0;
885 }
886
887 if ((options & WEXITED) == 0 && p->p_stat == SZOMB) {
888 mutex_exit(p->p_lock);
889 return 0;
890 }
891
892 if (siginfo != NULL) {
893 siginfo->si_errno = 0;
894
895 /*
896 * SUSv4 requires that the si_signo value is always
897 * SIGCHLD. Obey it despite the rfork(2) interface
898 * allows to request other signal for child exit
899 * notification.
900 */
901 siginfo->si_signo = SIGCHLD;
902
903 /*
904 * This is still a rough estimate. We will fix the
905 * cases TRAPPED, STOPPED, and CONTINUED later.
906 */
907 if (p->p_sflag & PS_COREDUMP) {
908 siginfo->si_code = CLD_DUMPED;
909 siginfo->si_status = p->p_xsig;
910 } else if (p->p_xsig) {
911 siginfo->si_code = CLD_KILLED;
912 siginfo->si_status = p->p_xsig;
913 } else {
914 siginfo->si_code = CLD_EXITED;
915 siginfo->si_status = p->p_xexit;
916 }
917
918 siginfo->si_pid = p->p_pid;
919 siginfo->si_uid = kauth_cred_geteuid(p->p_cred);
920 siginfo->si_utime = p->p_stats->p_ru.ru_utime.tv_sec;
921 siginfo->si_stime = p->p_stats->p_ru.ru_stime.tv_sec;
922 }
923
924 /*
925 * There should be no reason to limit resources usage info to
926 * exited processes only. A snapshot about any resources used
927 * by a stopped process may be exactly what is needed.
928 */
929 if (wrusage != NULL) {
930 rup = &wrusage->wru_self;
931 *rup = p->p_stats->p_ru;
932 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);
933
934 rup = &wrusage->wru_children;
935 *rup = p->p_stats->p_cru;
936 calcru(p, &rup->ru_utime, &rup->ru_stime, NULL, NULL);
937 }
938
939 mutex_exit(p->p_lock);
940 return rv;
941 }
942
943 /*
944 * Determine if there are existing processes being debugged
945 * that used to be (and sometime later will be again) children
946 * of a specific parent (while matching wait criteria)
947 */
948 static bool
949 debugged_child_exists(idtype_t idtype, id_t id, int options, siginfo_t *si,
950 const struct proc *parent)
951 {
952 struct proc *pp;
953
954 /*
955 * If we are searching for a specific pid, we can optimise a little
956 */
957 if (idtype == P_PID) {
958 /*
959 * Check the specific process to see if its real parent is us
960 */
961 pp = proc_find_raw((pid_t)id);
962 if (pp != NULL && pp->p_stat != SIDL && pp->p_opptr == parent) {
963 /*
964 * using P_ALL here avoids match_process() doing the
965 * same work that we just did, but incorrectly for
966 * this scenario.
967 */
968 if (match_process(parent, &pp, P_ALL, id, options,
969 NULL, si))
970 return true;
971 }
972 return false;
973 }
974
975 /*
976 * For the hard cases, just look everywhere to see if some
977 * stolen (reparented) process is really our lost child.
978 * Then check if that process could satisfy the wait conditions.
979 */
980
981 /*
982 * XXX inefficient, but hopefully fairly rare.
983 * XXX should really use a list of reparented processes.
984 */
985 PROCLIST_FOREACH(pp, &allproc) {
986 if (pp->p_stat == SIDL) /* XXX impossible ?? */
987 continue;
988 if (pp->p_opptr == parent &&
989 match_process(parent, &pp, idtype, id, options, NULL, si))
990 return true;
991 }
992 PROCLIST_FOREACH(pp, &zombproc) {
993 if (pp->p_stat == SIDL) /* XXX impossible ?? */
994 continue;
995 if (pp->p_opptr == parent &&
996 match_process(parent, &pp, idtype, id, options, NULL, si))
997 return true;
998 }
999
1000 return false;
1001 }
1002
1003 /*
1004 * Scan list of child processes for a child process that has stopped or
1005 * exited. Used by sys_wait4 and 'compat' equivalents.
1006 *
1007 * Must be called with the proc_lock held, and may release while waiting.
1008 */
1009 static int
1010 find_stopped_child(struct proc *parent, idtype_t idtype, id_t id, int options,
1011 struct proc **child_p, struct wrusage *wru, siginfo_t *si)
1012 {
1013 struct proc *child, *dead;
1014 int error;
1015
1016 KASSERT(mutex_owned(&proc_lock));
1017
1018 if (options & ~WALLOPTS) {
1019 *child_p = NULL;
1020 return SET_ERROR(EINVAL);
1021 }
1022
1023 if ((options & WSELECTOPTS) == 0) {
1024 /*
1025 * We will be unable to find any matching processes,
1026 * because there are no known events to look for.
1027 * Prefer to return error instead of blocking
1028 * indefinitely.
1029 */
1030 *child_p = NULL;
1031 return SET_ERROR(EINVAL);
1032 }
1033
1034 if ((pid_t)id == WAIT_MYPGRP && (idtype == P_PID || idtype == P_PGID)) {
1035 id = (id_t)parent->p_pgid;
1036 idtype = P_PGID;
1037 }
1038
1039 for (;;) {
1040 error = ECHILD;
1041 dead = NULL;
1042
1043 LIST_FOREACH(child, &parent->p_children, p_sibling) {
1044 int rv = match_process(parent, &child, idtype, id,
1045 options, wru, si);
1046 if (rv == -1)
1047 break;
1048 if (rv == 0)
1049 continue;
1050
1051 /*
1052 * Wait for processes with p_exitsig != SIGCHLD
1053 * processes only if WALTSIG is set; wait for
1054 * processes with p_exitsig == SIGCHLD only
1055 * if WALTSIG is clear.
1056 */
1057 if (((options & WALLSIG) == 0) &&
1058 (options & WALTSIG ? child->p_exitsig == SIGCHLD
1059 : P_EXITSIG(child) != SIGCHLD)){
1060 if (rv == 2) {
1061 child = NULL;
1062 break;
1063 }
1064 continue;
1065 }
1066
1067 error = 0;
1068 if ((options & WNOZOMBIE) == 0) {
1069 if (child->p_stat == SZOMB)
1070 break;
1071 if (child->p_stat == SDEAD) {
1072 /*
1073 * We may occasionally arrive here
1074 * after receiving a signal, but
1075 * immediately before the child
1076 * process is zombified. The wait
1077 * will be short, so avoid returning
1078 * to userspace.
1079 */
1080 dead = child;
1081 }
1082 }
1083
1084 if ((options & WCONTINUED) != 0 &&
1085 child->p_xsig == SIGCONT &&
1086 (child->p_sflag & PS_CONTINUED)) {
1087 if ((options & WNOWAIT) == 0) {
1088 child->p_sflag &= ~PS_CONTINUED;
1089 child->p_waited = 1;
1090 parent->p_nstopchild--;
1091 }
1092 if (si) {
1093 si->si_status = child->p_xsig;
1094 si->si_code = CLD_CONTINUED;
1095 }
1096 break;
1097 }
1098
1099 if ((options & (WTRAPPED|WSTOPPED)) != 0 &&
1100 child->p_stat == SSTOP &&
1101 child->p_waited == 0 &&
1102 ((child->p_slflag & PSL_TRACED) ||
1103 options & (WUNTRACED|WSTOPPED))) {
1104 if ((options & WNOWAIT) == 0) {
1105 child->p_waited = 1;
1106 parent->p_nstopchild--;
1107 }
1108 if (si) {
1109 si->si_status = child->p_xsig;
1110 si->si_code =
1111 (child->p_slflag & PSL_TRACED) ?
1112 CLD_TRAPPED : CLD_STOPPED;
1113 }
1114 break;
1115 }
1116 if (parent->p_nstopchild == 0 || rv == 2) {
1117 child = NULL;
1118 break;
1119 }
1120 }
1121
1122 /*
1123 * If we found nothing, but we are the bereaved parent
1124 * of a stolen child, look and see if that child (or
1125 * one of them) meets our search criteria. If so, then
1126 * we cannot succeed, but we can hang (wait...),
1127 * or if WNOHANG, return 0 instead of ECHILD
1128 */
1129 if (child == NULL && error == ECHILD &&
1130 (parent->p_slflag & PSL_CHTRACED) &&
1131 debugged_child_exists(idtype, id, options, si, parent))
1132 error = 0;
1133
1134 if (child != NULL || error != 0 ||
1135 ((options & WNOHANG) != 0 && dead == NULL)) {
1136 *child_p = child;
1137 return SET_ERROR(error);
1138 }
1139
1140 /*
1141 * Wait for another child process to stop.
1142 */
1143 error = cv_wait_sig(&parent->p_waitcv, &proc_lock);
1144
1145 if (error != 0) {
1146 *child_p = NULL;
1147 return error;
1148 }
1149 }
1150 }
1151
1152 /*
1153 * Free a process after parent has taken all the state info. Must be called
1154 * with the proclist lock held, and will release before returning.
1155 *
1156 * *ru is returned to the caller, and must be freed by the caller.
1157 */
1158 static void
1159 proc_free(struct proc *p, struct wrusage *wru)
1160 {
1161 struct proc *parent = p->p_pptr;
1162 struct lwp *l;
1163 ksiginfo_t ksi;
1164 kauth_cred_t cred1, cred2;
1165 uid_t uid;
1166
1167 KASSERT(mutex_owned(&proc_lock));
1168 KASSERT(p->p_nlwps == 1);
1169 KASSERT(p->p_nzlwps == 1);
1170 KASSERT(p->p_nrlwps == 0);
1171 KASSERT(p->p_stat == SZOMB);
1172
1173 /*
1174 * If we got the child via ptrace(2) or procfs, and
1175 * the parent is different (meaning the process was
1176 * attached, rather than run as a child), then we need
1177 * to give it back to the old parent, and send the
1178 * parent the exit signal. The rest of the cleanup
1179 * will be done when the old parent waits on the child.
1180 */
1181 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_opptr != parent) {
1182 mutex_enter(p->p_lock);
1183 p->p_slflag &= ~(PSL_TRACED|PSL_SYSCALL);
1184 mutex_exit(p->p_lock);
1185 parent = (p->p_opptr == NULL) ? initproc : p->p_opptr;
1186 proc_reparent(p, parent);
1187 p->p_opptr = NULL;
1188 if (p->p_exitsig != 0) {
1189 exit_psignal(p, parent, &ksi);
1190 kpsignal(parent, &ksi, NULL);
1191 }
1192 cv_broadcast(&parent->p_waitcv);
1193 mutex_exit(&proc_lock);
1194 return;
1195 }
1196
1197 sched_proc_exit(parent, p);
1198
1199 /*
1200 * Add child times of exiting process onto its own times.
1201 * This cannot be done any earlier else it might get done twice.
1202 */
1203 l = LIST_FIRST(&p->p_lwps);
1204 ruadd(&p->p_stats->p_ru, &l->l_ru);
1205 ruadd(&p->p_stats->p_ru, &p->p_stats->p_cru);
1206 ruadd(&parent->p_stats->p_cru, &p->p_stats->p_ru);
1207 if (wru != NULL) {
1208 wru->wru_self = p->p_stats->p_ru;
1209 wru->wru_children = p->p_stats->p_cru;
1210 }
1211 p->p_xsig = 0;
1212 p->p_xexit = 0;
1213
1214 /*
1215 * At this point we are going to start freeing the final resources.
1216 * If anyone tries to access the proc structure after here they will
1217 * get a shock - bits are missing. Attempt to make it hard! We
1218 * don't bother with any further locking past this point.
1219 */
1220 p->p_stat = SIDL; /* not even a zombie any more */
1221 LIST_REMOVE(p, p_list); /* off zombproc */
1222 parent->p_nstopchild--;
1223 LIST_REMOVE(p, p_sibling);
1224
1225 /*
1226 * Let pid be reallocated.
1227 */
1228 proc_free_pid(p->p_pid);
1229 atomic_dec_uint(&nprocs);
1230
1231 /*
1232 * Unlink process from its process group.
1233 * Releases the proc_lock.
1234 */
1235 proc_leavepgrp(p);
1236
1237 /*
1238 * Delay release until after lwp_free.
1239 */
1240 cred2 = l->l_cred;
1241
1242 /*
1243 * Free the last LWP's resources.
1244 *
1245 * lwp_free ensures the LWP is no longer running on another CPU.
1246 */
1247 lwp_free(l, false, true);
1248
1249 /*
1250 * Now no one except us can reach the process p.
1251 */
1252
1253 /*
1254 * Decrement the count of procs running with this uid.
1255 */
1256 cred1 = p->p_cred;
1257 uid = kauth_cred_getuid(cred1);
1258 (void)chgproccnt(uid, -1);
1259
1260 /*
1261 * Release substructures.
1262 */
1263
1264 lim_free(p->p_limit);
1265 pstatsfree(p->p_stats);
1266 kauth_cred_free(cred1);
1267 kauth_cred_free(cred2);
1268
1269 /*
1270 * Release reference to text vnode
1271 */
1272 if (p->p_textvp)
1273 vrele(p->p_textvp);
1274 kmem_strfree(p->p_path);
1275
1276 mutex_destroy(&p->p_auxlock);
1277 mutex_obj_free(p->p_lock);
1278 mutex_destroy(&p->p_stmutex);
1279 cv_destroy(&p->p_waitcv);
1280 cv_destroy(&p->p_lwpcv);
1281 rw_destroy(&p->p_reflock);
1282
1283 proc_free_mem(p);
1284 }
1285
1286 /*
1287 * Change the parent of a process for tracing purposes.
1288 */
1289 void
1290 proc_changeparent(struct proc *t, struct proc *p)
1291 {
1292 SET(t->p_slflag, PSL_TRACED);
1293 t->p_opptr = t->p_pptr;
1294 if (t->p_pptr == p)
1295 return;
1296 struct proc *parent = t->p_pptr;
1297
1298 if (parent->p_lock < t->p_lock) {
1299 if (!mutex_tryenter(parent->p_lock)) {
1300 mutex_exit(t->p_lock);
1301 mutex_enter(parent->p_lock);
1302 mutex_enter(t->p_lock);
1303 }
1304 } else if (parent->p_lock > t->p_lock) {
1305 mutex_enter(parent->p_lock);
1306 }
1307 parent->p_slflag |= PSL_CHTRACED;
1308 proc_reparent(t, p);
1309 if (parent->p_lock != t->p_lock)
1310 mutex_exit(parent->p_lock);
1311 }
1312
1313 /*
1314 * make process 'parent' the new parent of process 'child'.
1315 *
1316 * Must be called with proc_lock held.
1317 */
1318 void
1319 proc_reparent(struct proc *child, struct proc *parent)
1320 {
1321
1322 KASSERT(mutex_owned(&proc_lock));
1323
1324 if (child->p_pptr == parent)
1325 return;
1326
1327 if (child->p_stat == SZOMB || child->p_stat == SDEAD ||
1328 (child->p_stat == SSTOP && !child->p_waited)) {
1329 child->p_pptr->p_nstopchild--;
1330 parent->p_nstopchild++;
1331 }
1332 if (parent == initproc) {
1333 child->p_exitsig = SIGCHLD;
1334 child->p_ppid = parent->p_pid;
1335 }
1336
1337 LIST_REMOVE(child, p_sibling);
1338 LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
1339 child->p_pptr = parent;
1340 }
1341