kern_proc.c revision 1.159.2.1 1 /* $NetBSD: kern_proc.c,v 1.159.2.1 2010/04/30 14:44:10 uebayasi Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.159.2.1 2010/04/30 14:44:10 uebayasi Exp $");
66
67 #include "opt_kstack.h"
68 #include "opt_maxuprc.h"
69 #include "opt_dtrace.h"
70
71 #include <sys/param.h>
72 #include <sys/systm.h>
73 #include <sys/kernel.h>
74 #include <sys/proc.h>
75 #include <sys/resourcevar.h>
76 #include <sys/buf.h>
77 #include <sys/acct.h>
78 #include <sys/wait.h>
79 #include <sys/file.h>
80 #include <ufs/ufs/quota.h>
81 #include <sys/uio.h>
82 #include <sys/pool.h>
83 #include <sys/pset.h>
84 #include <sys/mbuf.h>
85 #include <sys/ioctl.h>
86 #include <sys/tty.h>
87 #include <sys/signalvar.h>
88 #include <sys/ras.h>
89 #include <sys/sa.h>
90 #include <sys/savar.h>
91 #include <sys/filedesc.h>
92 #include "sys/syscall_stats.h"
93 #include <sys/kauth.h>
94 #include <sys/sleepq.h>
95 #include <sys/atomic.h>
96 #include <sys/kmem.h>
97 #include <sys/dtrace_bsd.h>
98
99 #include <uvm/uvm.h>
100 #include <uvm/uvm_extern.h>
101
102 /*
103 * Other process lists
104 */
105
106 struct proclist allproc;
107 struct proclist zombproc; /* resources have been freed */
108
109 kmutex_t *proc_lock;
110
111 /*
112 * pid to proc lookup is done by indexing the pid_table array.
113 * Since pid numbers are only allocated when an empty slot
114 * has been found, there is no need to search any lists ever.
115 * (an orphaned pgrp will lock the slot, a session will lock
116 * the pgrp with the same number.)
117 * If the table is too small it is reallocated with twice the
118 * previous size and the entries 'unzipped' into the two halves.
119 * A linked list of free entries is passed through the pt_proc
120 * field of 'free' items - set odd to be an invalid ptr.
121 */
122
123 struct pid_table {
124 struct proc *pt_proc;
125 struct pgrp *pt_pgrp;
126 };
127 #if 1 /* strongly typed cast - should be a noop */
128 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
129 #else
130 #define p2u(p) ((uint)p)
131 #endif
132 #define P_VALID(p) (!(p2u(p) & 1))
133 #define P_NEXT(p) (p2u(p) >> 1)
134 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
135
136 #define INITIAL_PID_TABLE_SIZE (1 << 5)
137 static struct pid_table *pid_table;
138 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
139 static uint pid_alloc_lim; /* max we allocate before growing table */
140 static uint pid_alloc_cnt; /* number of allocated pids */
141
142 /* links through free slots - never empty! */
143 static uint next_free_pt, last_free_pt;
144 static pid_t pid_max = PID_MAX; /* largest value we allocate */
145
146 /* Components of the first process -- never freed. */
147
148 extern struct emul emul_netbsd; /* defined in kern_exec.c */
149
150 struct session session0 = {
151 .s_count = 1,
152 .s_sid = 0,
153 };
154 struct pgrp pgrp0 = {
155 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
156 .pg_session = &session0,
157 };
158 filedesc_t filedesc0;
159 struct cwdinfo cwdi0 = {
160 .cwdi_cmask = CMASK, /* see cmask below */
161 .cwdi_refcnt = 1,
162 };
163 struct plimit limit0;
164 struct pstats pstat0;
165 struct vmspace vmspace0;
166 struct sigacts sigacts0;
167 struct turnstile turnstile0;
168 struct proc proc0 = {
169 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
170 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
171 .p_nlwps = 1,
172 .p_nrlwps = 1,
173 .p_nlwpid = 1, /* must match lwp0.l_lid */
174 .p_pgrp = &pgrp0,
175 .p_comm = "system",
176 /*
177 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
178 * when they exit. init(8) can easily wait them out for us.
179 */
180 .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
181 .p_stat = SACTIVE,
182 .p_nice = NZERO,
183 .p_emul = &emul_netbsd,
184 .p_cwdi = &cwdi0,
185 .p_limit = &limit0,
186 .p_fd = &filedesc0,
187 .p_vmspace = &vmspace0,
188 .p_stats = &pstat0,
189 .p_sigacts = &sigacts0,
190 };
191 struct lwp lwp0 __aligned(MIN_LWP_ALIGNMENT) = {
192 #ifdef LWP0_CPU_INFO
193 .l_cpu = LWP0_CPU_INFO,
194 #endif
195 .l_proc = &proc0,
196 .l_lid = 1,
197 .l_flag = LW_SYSTEM,
198 .l_stat = LSONPROC,
199 .l_ts = &turnstile0,
200 .l_syncobj = &sched_syncobj,
201 .l_refcnt = 1,
202 .l_priority = PRI_USER + NPRI_USER - 1,
203 .l_inheritedprio = -1,
204 .l_class = SCHED_OTHER,
205 .l_psid = PS_NONE,
206 .l_pi_lenders = SLIST_HEAD_INITIALIZER(&lwp0.l_pi_lenders),
207 .l_name = __UNCONST("swapper"),
208 .l_fd = &filedesc0,
209 };
210 kauth_cred_t cred0;
211
212 int nofile = NOFILE;
213 int maxuprc = MAXUPRC;
214 int cmask = CMASK;
215
216 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
217 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
218
219 /*
220 * The process list descriptors, used during pid allocation and
221 * by sysctl. No locking on this data structure is needed since
222 * it is completely static.
223 */
224 const struct proclist_desc proclists[] = {
225 { &allproc },
226 { &zombproc },
227 { NULL },
228 };
229
230 static struct pgrp * pg_remove(pid_t);
231 static void pg_delete(pid_t);
232 static void orphanpg(struct pgrp *);
233
234 static specificdata_domain_t proc_specificdata_domain;
235
236 static pool_cache_t proc_cache;
237
238 static kauth_listener_t proc_listener;
239
240 static int
241 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
242 void *arg0, void *arg1, void *arg2, void *arg3)
243 {
244 struct proc *p;
245 int result;
246
247 result = KAUTH_RESULT_DEFER;
248 p = arg0;
249
250 switch (action) {
251 case KAUTH_PROCESS_CANSEE: {
252 enum kauth_process_req req;
253
254 req = (enum kauth_process_req)arg1;
255
256 switch (req) {
257 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
258 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
259 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
260 result = KAUTH_RESULT_ALLOW;
261
262 break;
263
264 case KAUTH_REQ_PROCESS_CANSEE_ENV:
265 if (kauth_cred_getuid(cred) !=
266 kauth_cred_getuid(p->p_cred) ||
267 kauth_cred_getuid(cred) !=
268 kauth_cred_getsvuid(p->p_cred))
269 break;
270
271 result = KAUTH_RESULT_ALLOW;
272
273 break;
274
275 default:
276 break;
277 }
278
279 break;
280 }
281
282 case KAUTH_PROCESS_FORK: {
283 int lnprocs = (int)(unsigned long)arg2;
284
285 /*
286 * Don't allow a nonprivileged user to use the last few
287 * processes. The variable lnprocs is the current number of
288 * processes, maxproc is the limit.
289 */
290 if (__predict_false((lnprocs >= maxproc - 5)))
291 break;
292
293 result = KAUTH_RESULT_ALLOW;
294
295 break;
296 }
297
298 case KAUTH_PROCESS_CORENAME:
299 case KAUTH_PROCESS_STOPFLAG:
300 if (proc_uidmatch(cred, p->p_cred) == 0)
301 result = KAUTH_RESULT_ALLOW;
302
303 break;
304
305 default:
306 break;
307 }
308
309 return result;
310 }
311
312 /*
313 * Initialize global process hashing structures.
314 */
315 void
316 procinit(void)
317 {
318 const struct proclist_desc *pd;
319 u_int i;
320 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
321
322 for (pd = proclists; pd->pd_list != NULL; pd++)
323 LIST_INIT(pd->pd_list);
324
325 proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
326 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
327 * sizeof(struct pid_table), KM_SLEEP);
328
329 /* Set free list running through table...
330 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
331 for (i = 0; i <= pid_tbl_mask; i++) {
332 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
333 pid_table[i].pt_pgrp = 0;
334 }
335 /* slot 0 is just grabbed */
336 next_free_pt = 1;
337 /* Need to fix last entry. */
338 last_free_pt = pid_tbl_mask;
339 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
340 /* point at which we grow table - to avoid reusing pids too often */
341 pid_alloc_lim = pid_tbl_mask - 1;
342 #undef LINK_EMPTY
343
344 proc_specificdata_domain = specificdata_domain_create();
345 KASSERT(proc_specificdata_domain != NULL);
346
347 proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0,
348 "procpl", NULL, IPL_NONE, NULL, NULL, NULL);
349
350 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
351 proc_listener_cb, NULL);
352 }
353
354 /*
355 * Initialize process 0.
356 */
357 void
358 proc0_init(void)
359 {
360 struct proc *p;
361 struct pgrp *pg;
362 struct lwp *l;
363 rlim_t lim;
364 int i;
365
366 p = &proc0;
367 pg = &pgrp0;
368 l = &lwp0;
369
370 KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
371 KASSERT(l->l_lid == p->p_nlwpid);
372
373 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
374 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
375 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
376
377 rw_init(&p->p_reflock);
378 cv_init(&p->p_waitcv, "wait");
379 cv_init(&p->p_lwpcv, "lwpwait");
380
381 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
382
383 pid_table[0].pt_proc = p;
384 LIST_INSERT_HEAD(&allproc, p, p_list);
385 LIST_INSERT_HEAD(&alllwp, l, l_list);
386
387 pid_table[0].pt_pgrp = pg;
388 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
389
390 #ifdef __HAVE_SYSCALL_INTERN
391 (*p->p_emul->e_syscall_intern)(p);
392 #endif
393
394 callout_init(&l->l_timeout_ch, CALLOUT_MPSAFE);
395 callout_setfunc(&l->l_timeout_ch, sleepq_timeout, l);
396 cv_init(&l->l_sigcv, "sigwait");
397
398 /* Create credentials. */
399 cred0 = kauth_cred_alloc();
400 p->p_cred = cred0;
401 kauth_cred_hold(cred0);
402 l->l_cred = cred0;
403
404 /* Create the CWD info. */
405 rw_init(&cwdi0.cwdi_lock);
406
407 /* Create the limits structures. */
408 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
409 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++)
410 limit0.pl_rlimit[i].rlim_cur =
411 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
412
413 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
414 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
415 maxfiles < nofile ? maxfiles : nofile;
416
417 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
418 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
419 maxproc < maxuprc ? maxproc : maxuprc;
420
421 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free));
422 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
423 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
424 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
425 limit0.pl_corename = defcorename;
426 limit0.pl_refcnt = 1;
427 limit0.pl_sv_limit = NULL;
428
429 /* Configure virtual memory system, set vm rlimits. */
430 uvm_init_limits(p);
431
432 /* Initialize file descriptor table for proc0. */
433 fd_init(&filedesc0);
434
435 /*
436 * Initialize proc0's vmspace, which uses the kernel pmap.
437 * All kernel processes (which never have user space mappings)
438 * share proc0's vmspace, and thus, the kernel pmap.
439 */
440 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
441 trunc_page(VM_MAX_ADDRESS));
442
443 /* Initialize signal state for proc0. XXX IPL_SCHED */
444 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
445 siginit(p);
446
447 kdtrace_proc_ctor(NULL, p);
448
449 proc_initspecific(p);
450 lwp_initspecific(l);
451
452 SYSCALL_TIME_LWP_INIT(l);
453 }
454
455 /*
456 * Session reference counting.
457 */
458
459 void
460 proc_sesshold(struct session *ss)
461 {
462
463 KASSERT(mutex_owned(proc_lock));
464 ss->s_count++;
465 }
466
467 void
468 proc_sessrele(struct session *ss)
469 {
470
471 KASSERT(mutex_owned(proc_lock));
472 /*
473 * We keep the pgrp with the same id as the session in order to
474 * stop a process being given the same pid. Since the pgrp holds
475 * a reference to the session, it must be a 'zombie' pgrp by now.
476 */
477 if (--ss->s_count == 0) {
478 struct pgrp *pg;
479
480 pg = pg_remove(ss->s_sid);
481 mutex_exit(proc_lock);
482
483 kmem_free(pg, sizeof(struct pgrp));
484 kmem_free(ss, sizeof(struct session));
485 } else {
486 mutex_exit(proc_lock);
487 }
488 }
489
490 /*
491 * Check that the specified process group is in the session of the
492 * specified process.
493 * Treats -ve ids as process ids.
494 * Used to validate TIOCSPGRP requests.
495 */
496 int
497 pgid_in_session(struct proc *p, pid_t pg_id)
498 {
499 struct pgrp *pgrp;
500 struct session *session;
501 int error;
502
503 mutex_enter(proc_lock);
504 if (pg_id < 0) {
505 struct proc *p1 = p_find(-pg_id, PFIND_LOCKED | PFIND_UNLOCK_FAIL);
506 if (p1 == NULL)
507 return EINVAL;
508 pgrp = p1->p_pgrp;
509 } else {
510 pgrp = pg_find(pg_id, PFIND_LOCKED | PFIND_UNLOCK_FAIL);
511 if (pgrp == NULL)
512 return EINVAL;
513 }
514 session = pgrp->pg_session;
515 if (session != p->p_pgrp->pg_session)
516 error = EPERM;
517 else
518 error = 0;
519 mutex_exit(proc_lock);
520
521 return error;
522 }
523
524 /*
525 * p_inferior: is p an inferior of q?
526 */
527 static inline bool
528 p_inferior(struct proc *p, struct proc *q)
529 {
530
531 KASSERT(mutex_owned(proc_lock));
532
533 for (; p != q; p = p->p_pptr)
534 if (p->p_pid == 0)
535 return false;
536 return true;
537 }
538
539 /*
540 * Locate a process by number
541 */
542 struct proc *
543 p_find(pid_t pid, uint flags)
544 {
545 struct proc *p;
546 char stat;
547
548 if (!(flags & PFIND_LOCKED))
549 mutex_enter(proc_lock);
550
551 p = pid_table[pid & pid_tbl_mask].pt_proc;
552
553 /* Only allow live processes to be found by pid. */
554 /* XXXSMP p_stat */
555 if (P_VALID(p) && p->p_pid == pid && ((stat = p->p_stat) == SACTIVE ||
556 stat == SSTOP || ((flags & PFIND_ZOMBIE) &&
557 (stat == SZOMB || stat == SDEAD || stat == SDYING)))) {
558 if (flags & PFIND_UNLOCK_OK)
559 mutex_exit(proc_lock);
560 return p;
561 }
562 if (flags & PFIND_UNLOCK_FAIL)
563 mutex_exit(proc_lock);
564 return NULL;
565 }
566
567
568 /*
569 * Locate a process group by number
570 */
571 struct pgrp *
572 pg_find(pid_t pgid, uint flags)
573 {
574 struct pgrp *pg;
575
576 if (!(flags & PFIND_LOCKED))
577 mutex_enter(proc_lock);
578 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
579 /*
580 * Can't look up a pgrp that only exists because the session
581 * hasn't died yet (traditional)
582 */
583 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
584 if (flags & PFIND_UNLOCK_FAIL)
585 mutex_exit(proc_lock);
586 return NULL;
587 }
588
589 if (flags & PFIND_UNLOCK_OK)
590 mutex_exit(proc_lock);
591 return pg;
592 }
593
594 static void
595 expand_pid_table(void)
596 {
597 size_t pt_size, tsz;
598 struct pid_table *n_pt, *new_pt;
599 struct proc *proc;
600 struct pgrp *pgrp;
601 pid_t pid;
602 u_int i;
603
604 pt_size = pid_tbl_mask + 1;
605 tsz = pt_size * 2 * sizeof(struct pid_table);
606 new_pt = kmem_alloc(tsz, KM_SLEEP);
607
608 mutex_enter(proc_lock);
609 if (pt_size != pid_tbl_mask + 1) {
610 /* Another process beat us to it... */
611 mutex_exit(proc_lock);
612 kmem_free(new_pt, tsz);
613 return;
614 }
615
616 /*
617 * Copy entries from old table into new one.
618 * If 'pid' is 'odd' we need to place in the upper half,
619 * even pid's to the lower half.
620 * Free items stay in the low half so we don't have to
621 * fixup the reference to them.
622 * We stuff free items on the front of the freelist
623 * because we can't write to unmodified entries.
624 * Processing the table backwards maintains a semblance
625 * of issueing pid numbers that increase with time.
626 */
627 i = pt_size - 1;
628 n_pt = new_pt + i;
629 for (; ; i--, n_pt--) {
630 proc = pid_table[i].pt_proc;
631 pgrp = pid_table[i].pt_pgrp;
632 if (!P_VALID(proc)) {
633 /* Up 'use count' so that link is valid */
634 pid = (P_NEXT(proc) + pt_size) & ~pt_size;
635 proc = P_FREE(pid);
636 if (pgrp)
637 pid = pgrp->pg_id;
638 } else
639 pid = proc->p_pid;
640
641 /* Save entry in appropriate half of table */
642 n_pt[pid & pt_size].pt_proc = proc;
643 n_pt[pid & pt_size].pt_pgrp = pgrp;
644
645 /* Put other piece on start of free list */
646 pid = (pid ^ pt_size) & ~pid_tbl_mask;
647 n_pt[pid & pt_size].pt_proc =
648 P_FREE((pid & ~pt_size) | next_free_pt);
649 n_pt[pid & pt_size].pt_pgrp = 0;
650 next_free_pt = i | (pid & pt_size);
651 if (i == 0)
652 break;
653 }
654
655 /* Save old table size and switch tables */
656 tsz = pt_size * sizeof(struct pid_table);
657 n_pt = pid_table;
658 pid_table = new_pt;
659 pid_tbl_mask = pt_size * 2 - 1;
660
661 /*
662 * pid_max starts as PID_MAX (= 30000), once we have 16384
663 * allocated pids we need it to be larger!
664 */
665 if (pid_tbl_mask > PID_MAX) {
666 pid_max = pid_tbl_mask * 2 + 1;
667 pid_alloc_lim |= pid_alloc_lim << 1;
668 } else
669 pid_alloc_lim <<= 1; /* doubles number of free slots... */
670
671 mutex_exit(proc_lock);
672 kmem_free(n_pt, tsz);
673 }
674
675 struct proc *
676 proc_alloc(void)
677 {
678 struct proc *p;
679 int nxt;
680 pid_t pid;
681 struct pid_table *pt;
682
683 p = pool_cache_get(proc_cache, PR_WAITOK);
684 p->p_stat = SIDL; /* protect against others */
685
686 proc_initspecific(p);
687 /* allocate next free pid */
688
689 for (;;expand_pid_table()) {
690 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
691 /* ensure pids cycle through 2000+ values */
692 continue;
693 mutex_enter(proc_lock);
694 pt = &pid_table[next_free_pt];
695 #ifdef DIAGNOSTIC
696 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
697 panic("proc_alloc: slot busy");
698 #endif
699 nxt = P_NEXT(pt->pt_proc);
700 if (nxt & pid_tbl_mask)
701 break;
702 /* Table full - expand (NB last entry not used....) */
703 mutex_exit(proc_lock);
704 }
705
706 /* pid is 'saved use count' + 'size' + entry */
707 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
708 if ((uint)pid > (uint)pid_max)
709 pid &= pid_tbl_mask;
710 p->p_pid = pid;
711 next_free_pt = nxt & pid_tbl_mask;
712
713 /* Grab table slot */
714 pt->pt_proc = p;
715 pid_alloc_cnt++;
716
717 kdtrace_proc_ctor(NULL, p);
718
719 mutex_exit(proc_lock);
720
721 return p;
722 }
723
724 /*
725 * Free a process id - called from proc_free (in kern_exit.c)
726 *
727 * Called with the proc_lock held.
728 */
729 void
730 proc_free_pid(struct proc *p)
731 {
732 pid_t pid = p->p_pid;
733 struct pid_table *pt;
734
735 KASSERT(mutex_owned(proc_lock));
736
737 pt = &pid_table[pid & pid_tbl_mask];
738 #ifdef DIAGNOSTIC
739 if (__predict_false(pt->pt_proc != p))
740 panic("proc_free: pid_table mismatch, pid %x, proc %p",
741 pid, p);
742 #endif
743 /* save pid use count in slot */
744 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
745
746 if (pt->pt_pgrp == NULL) {
747 /* link last freed entry onto ours */
748 pid &= pid_tbl_mask;
749 pt = &pid_table[last_free_pt];
750 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
751 last_free_pt = pid;
752 pid_alloc_cnt--;
753 }
754
755 atomic_dec_uint(&nprocs);
756 }
757
758 void
759 proc_free_mem(struct proc *p)
760 {
761
762 kdtrace_proc_dtor(NULL, p);
763 pool_cache_put(proc_cache, p);
764 }
765
766 /*
767 * proc_enterpgrp: move p to a new or existing process group (and session).
768 *
769 * If we are creating a new pgrp, the pgid should equal
770 * the calling process' pid.
771 * If is only valid to enter a process group that is in the session
772 * of the process.
773 * Also mksess should only be set if we are creating a process group
774 *
775 * Only called from sys_setsid and sys_setpgid.
776 */
777 int
778 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
779 {
780 struct pgrp *new_pgrp, *pgrp;
781 struct session *sess;
782 struct proc *p;
783 int rval;
784 pid_t pg_id = NO_PGID;
785
786 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
787
788 /* Allocate data areas we might need before doing any validity checks */
789 mutex_enter(proc_lock); /* Because pid_table might change */
790 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
791 mutex_exit(proc_lock);
792 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
793 mutex_enter(proc_lock);
794 } else
795 new_pgrp = NULL;
796 rval = EPERM; /* most common error (to save typing) */
797
798 /* Check pgrp exists or can be created */
799 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
800 if (pgrp != NULL && pgrp->pg_id != pgid)
801 goto done;
802
803 /* Can only set another process under restricted circumstances. */
804 if (pid != curp->p_pid) {
805 /* must exist and be one of our children... */
806 if ((p = p_find(pid, PFIND_LOCKED)) == NULL ||
807 !p_inferior(p, curp)) {
808 rval = ESRCH;
809 goto done;
810 }
811 /* ... in the same session... */
812 if (sess != NULL || p->p_session != curp->p_session)
813 goto done;
814 /* ... existing pgid must be in same session ... */
815 if (pgrp != NULL && pgrp->pg_session != p->p_session)
816 goto done;
817 /* ... and not done an exec. */
818 if (p->p_flag & PK_EXEC) {
819 rval = EACCES;
820 goto done;
821 }
822 } else {
823 /* ... setsid() cannot re-enter a pgrp */
824 if (mksess && (curp->p_pgid == curp->p_pid ||
825 pg_find(curp->p_pid, PFIND_LOCKED)))
826 goto done;
827 p = curp;
828 }
829
830 /* Changing the process group/session of a session
831 leader is definitely off limits. */
832 if (SESS_LEADER(p)) {
833 if (sess == NULL && p->p_pgrp == pgrp)
834 /* unless it's a definite noop */
835 rval = 0;
836 goto done;
837 }
838
839 /* Can only create a process group with id of process */
840 if (pgrp == NULL && pgid != pid)
841 goto done;
842
843 /* Can only create a session if creating pgrp */
844 if (sess != NULL && pgrp != NULL)
845 goto done;
846
847 /* Check we allocated memory for a pgrp... */
848 if (pgrp == NULL && new_pgrp == NULL)
849 goto done;
850
851 /* Don't attach to 'zombie' pgrp */
852 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
853 goto done;
854
855 /* Expect to succeed now */
856 rval = 0;
857
858 if (pgrp == p->p_pgrp)
859 /* nothing to do */
860 goto done;
861
862 /* Ok all setup, link up required structures */
863
864 if (pgrp == NULL) {
865 pgrp = new_pgrp;
866 new_pgrp = NULL;
867 if (sess != NULL) {
868 sess->s_sid = p->p_pid;
869 sess->s_leader = p;
870 sess->s_count = 1;
871 sess->s_ttyvp = NULL;
872 sess->s_ttyp = NULL;
873 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
874 memcpy(sess->s_login, p->p_session->s_login,
875 sizeof(sess->s_login));
876 p->p_lflag &= ~PL_CONTROLT;
877 } else {
878 sess = p->p_pgrp->pg_session;
879 proc_sesshold(sess);
880 }
881 pgrp->pg_session = sess;
882 sess = NULL;
883
884 pgrp->pg_id = pgid;
885 LIST_INIT(&pgrp->pg_members);
886 #ifdef DIAGNOSTIC
887 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
888 panic("enterpgrp: pgrp table slot in use");
889 if (__predict_false(mksess && p != curp))
890 panic("enterpgrp: mksession and p != curproc");
891 #endif
892 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
893 pgrp->pg_jobc = 0;
894 }
895
896 /*
897 * Adjust eligibility of affected pgrps to participate in job control.
898 * Increment eligibility counts before decrementing, otherwise we
899 * could reach 0 spuriously during the first call.
900 */
901 fixjobc(p, pgrp, 1);
902 fixjobc(p, p->p_pgrp, 0);
903
904 /* Interlock with ttread(). */
905 mutex_spin_enter(&tty_lock);
906
907 /* Move process to requested group. */
908 LIST_REMOVE(p, p_pglist);
909 if (LIST_EMPTY(&p->p_pgrp->pg_members))
910 /* defer delete until we've dumped the lock */
911 pg_id = p->p_pgrp->pg_id;
912 p->p_pgrp = pgrp;
913 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
914
915 /* Done with the swap; we can release the tty mutex. */
916 mutex_spin_exit(&tty_lock);
917
918 done:
919 if (pg_id != NO_PGID) {
920 /* Releases proc_lock. */
921 pg_delete(pg_id);
922 } else {
923 mutex_exit(proc_lock);
924 }
925 if (sess != NULL)
926 kmem_free(sess, sizeof(*sess));
927 if (new_pgrp != NULL)
928 kmem_free(new_pgrp, sizeof(*new_pgrp));
929 #ifdef DEBUG_PGRP
930 if (__predict_false(rval))
931 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
932 pid, pgid, mksess, curp->p_pid, rval);
933 #endif
934 return rval;
935 }
936
937 /*
938 * proc_leavepgrp: remove a process from its process group.
939 * => must be called with the proc_lock held, which will be released;
940 */
941 void
942 proc_leavepgrp(struct proc *p)
943 {
944 struct pgrp *pgrp;
945
946 KASSERT(mutex_owned(proc_lock));
947
948 /* Interlock with ttread() */
949 mutex_spin_enter(&tty_lock);
950 pgrp = p->p_pgrp;
951 LIST_REMOVE(p, p_pglist);
952 p->p_pgrp = NULL;
953 mutex_spin_exit(&tty_lock);
954
955 if (LIST_EMPTY(&pgrp->pg_members)) {
956 /* Releases proc_lock. */
957 pg_delete(pgrp->pg_id);
958 } else {
959 mutex_exit(proc_lock);
960 }
961 }
962
963 /*
964 * pg_remove: remove a process group from the table.
965 * => must be called with the proc_lock held;
966 * => returns process group to free;
967 */
968 static struct pgrp *
969 pg_remove(pid_t pg_id)
970 {
971 struct pgrp *pgrp;
972 struct pid_table *pt;
973
974 KASSERT(mutex_owned(proc_lock));
975
976 pt = &pid_table[pg_id & pid_tbl_mask];
977 pgrp = pt->pt_pgrp;
978
979 KASSERT(pgrp != NULL);
980 KASSERT(pgrp->pg_id == pg_id);
981 KASSERT(LIST_EMPTY(&pgrp->pg_members));
982
983 pt->pt_pgrp = NULL;
984
985 if (!P_VALID(pt->pt_proc)) {
986 /* Orphaned pgrp, put slot onto free list. */
987 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
988 pg_id &= pid_tbl_mask;
989 pt = &pid_table[last_free_pt];
990 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
991 last_free_pt = pg_id;
992 pid_alloc_cnt--;
993 }
994 return pgrp;
995 }
996
997 /*
998 * pg_delete: delete and free a process group.
999 * => must be called with the proc_lock held, which will be released.
1000 */
1001 static void
1002 pg_delete(pid_t pg_id)
1003 {
1004 struct pgrp *pg;
1005 struct tty *ttyp;
1006 struct session *ss;
1007
1008 KASSERT(mutex_owned(proc_lock));
1009
1010 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1011 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1012 mutex_exit(proc_lock);
1013 return;
1014 }
1015
1016 ss = pg->pg_session;
1017
1018 /* Remove reference (if any) from tty to this process group */
1019 mutex_spin_enter(&tty_lock);
1020 ttyp = ss->s_ttyp;
1021 if (ttyp != NULL && ttyp->t_pgrp == pg) {
1022 ttyp->t_pgrp = NULL;
1023 KASSERT(ttyp->t_session == ss);
1024 }
1025 mutex_spin_exit(&tty_lock);
1026
1027 /*
1028 * The leading process group in a session is freed by proc_sessrele(),
1029 * if last reference. Note: proc_sessrele() releases proc_lock.
1030 */
1031 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1032 proc_sessrele(ss);
1033
1034 if (pg != NULL) {
1035 /* Free it, if was not done by proc_sessrele(). */
1036 kmem_free(pg, sizeof(struct pgrp));
1037 }
1038 }
1039
1040 /*
1041 * Adjust pgrp jobc counters when specified process changes process group.
1042 * We count the number of processes in each process group that "qualify"
1043 * the group for terminal job control (those with a parent in a different
1044 * process group of the same session). If that count reaches zero, the
1045 * process group becomes orphaned. Check both the specified process'
1046 * process group and that of its children.
1047 * entering == 0 => p is leaving specified group.
1048 * entering == 1 => p is entering specified group.
1049 *
1050 * Call with proc_lock held.
1051 */
1052 void
1053 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1054 {
1055 struct pgrp *hispgrp;
1056 struct session *mysession = pgrp->pg_session;
1057 struct proc *child;
1058
1059 KASSERT(mutex_owned(proc_lock));
1060
1061 /*
1062 * Check p's parent to see whether p qualifies its own process
1063 * group; if so, adjust count for p's process group.
1064 */
1065 hispgrp = p->p_pptr->p_pgrp;
1066 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1067 if (entering) {
1068 pgrp->pg_jobc++;
1069 p->p_lflag &= ~PL_ORPHANPG;
1070 } else if (--pgrp->pg_jobc == 0)
1071 orphanpg(pgrp);
1072 }
1073
1074 /*
1075 * Check this process' children to see whether they qualify
1076 * their process groups; if so, adjust counts for children's
1077 * process groups.
1078 */
1079 LIST_FOREACH(child, &p->p_children, p_sibling) {
1080 hispgrp = child->p_pgrp;
1081 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1082 !P_ZOMBIE(child)) {
1083 if (entering) {
1084 child->p_lflag &= ~PL_ORPHANPG;
1085 hispgrp->pg_jobc++;
1086 } else if (--hispgrp->pg_jobc == 0)
1087 orphanpg(hispgrp);
1088 }
1089 }
1090 }
1091
1092 /*
1093 * A process group has become orphaned;
1094 * if there are any stopped processes in the group,
1095 * hang-up all process in that group.
1096 *
1097 * Call with proc_lock held.
1098 */
1099 static void
1100 orphanpg(struct pgrp *pg)
1101 {
1102 struct proc *p;
1103
1104 KASSERT(mutex_owned(proc_lock));
1105
1106 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1107 if (p->p_stat == SSTOP) {
1108 p->p_lflag |= PL_ORPHANPG;
1109 psignal(p, SIGHUP);
1110 psignal(p, SIGCONT);
1111 }
1112 }
1113 }
1114
1115 #ifdef DDB
1116 #include <ddb/db_output.h>
1117 void pidtbl_dump(void);
1118 void
1119 pidtbl_dump(void)
1120 {
1121 struct pid_table *pt;
1122 struct proc *p;
1123 struct pgrp *pgrp;
1124 int id;
1125
1126 db_printf("pid table %p size %x, next %x, last %x\n",
1127 pid_table, pid_tbl_mask+1,
1128 next_free_pt, last_free_pt);
1129 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1130 p = pt->pt_proc;
1131 if (!P_VALID(p) && !pt->pt_pgrp)
1132 continue;
1133 db_printf(" id %x: ", id);
1134 if (P_VALID(p))
1135 db_printf("proc %p id %d (0x%x) %s\n",
1136 p, p->p_pid, p->p_pid, p->p_comm);
1137 else
1138 db_printf("next %x use %x\n",
1139 P_NEXT(p) & pid_tbl_mask,
1140 P_NEXT(p) & ~pid_tbl_mask);
1141 if ((pgrp = pt->pt_pgrp)) {
1142 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1143 pgrp->pg_session, pgrp->pg_session->s_sid,
1144 pgrp->pg_session->s_count,
1145 pgrp->pg_session->s_login);
1146 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1147 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1148 LIST_FIRST(&pgrp->pg_members));
1149 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1150 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1151 p->p_pid, p, p->p_pgrp, p->p_comm);
1152 }
1153 }
1154 }
1155 }
1156 #endif /* DDB */
1157
1158 #ifdef KSTACK_CHECK_MAGIC
1159
1160 #define KSTACK_MAGIC 0xdeadbeaf
1161
1162 /* XXX should be per process basis? */
1163 static int kstackleftmin = KSTACK_SIZE;
1164 static int kstackleftthres = KSTACK_SIZE / 8;
1165
1166 void
1167 kstack_setup_magic(const struct lwp *l)
1168 {
1169 uint32_t *ip;
1170 uint32_t const *end;
1171
1172 KASSERT(l != NULL);
1173 KASSERT(l != &lwp0);
1174
1175 /*
1176 * fill all the stack with magic number
1177 * so that later modification on it can be detected.
1178 */
1179 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1180 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1181 for (; ip < end; ip++) {
1182 *ip = KSTACK_MAGIC;
1183 }
1184 }
1185
1186 void
1187 kstack_check_magic(const struct lwp *l)
1188 {
1189 uint32_t const *ip, *end;
1190 int stackleft;
1191
1192 KASSERT(l != NULL);
1193
1194 /* don't check proc0 */ /*XXX*/
1195 if (l == &lwp0)
1196 return;
1197
1198 #ifdef __MACHINE_STACK_GROWS_UP
1199 /* stack grows upwards (eg. hppa) */
1200 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1201 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1202 for (ip--; ip >= end; ip--)
1203 if (*ip != KSTACK_MAGIC)
1204 break;
1205
1206 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1207 #else /* __MACHINE_STACK_GROWS_UP */
1208 /* stack grows downwards (eg. i386) */
1209 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1210 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1211 for (; ip < end; ip++)
1212 if (*ip != KSTACK_MAGIC)
1213 break;
1214
1215 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1216 #endif /* __MACHINE_STACK_GROWS_UP */
1217
1218 if (kstackleftmin > stackleft) {
1219 kstackleftmin = stackleft;
1220 if (stackleft < kstackleftthres)
1221 printf("warning: kernel stack left %d bytes"
1222 "(pid %u:lid %u)\n", stackleft,
1223 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1224 }
1225
1226 if (stackleft <= 0) {
1227 panic("magic on the top of kernel stack changed for "
1228 "pid %u, lid %u: maybe kernel stack overflow",
1229 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1230 }
1231 }
1232 #endif /* KSTACK_CHECK_MAGIC */
1233
1234 int
1235 proclist_foreach_call(struct proclist *list,
1236 int (*callback)(struct proc *, void *arg), void *arg)
1237 {
1238 struct proc marker;
1239 struct proc *p;
1240 int ret = 0;
1241
1242 marker.p_flag = PK_MARKER;
1243 mutex_enter(proc_lock);
1244 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1245 if (p->p_flag & PK_MARKER) {
1246 p = LIST_NEXT(p, p_list);
1247 continue;
1248 }
1249 LIST_INSERT_AFTER(p, &marker, p_list);
1250 ret = (*callback)(p, arg);
1251 KASSERT(mutex_owned(proc_lock));
1252 p = LIST_NEXT(&marker, p_list);
1253 LIST_REMOVE(&marker, p_list);
1254 }
1255 mutex_exit(proc_lock);
1256
1257 return ret;
1258 }
1259
1260 int
1261 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1262 {
1263
1264 /* XXXCDC: how should locking work here? */
1265
1266 /* curproc exception is for coredump. */
1267
1268 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1269 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1270 return EFAULT;
1271 }
1272
1273 uvmspace_addref(p->p_vmspace);
1274 *vm = p->p_vmspace;
1275
1276 return 0;
1277 }
1278
1279 /*
1280 * Acquire a write lock on the process credential.
1281 */
1282 void
1283 proc_crmod_enter(void)
1284 {
1285 struct lwp *l = curlwp;
1286 struct proc *p = l->l_proc;
1287 struct plimit *lim;
1288 kauth_cred_t oc;
1289 char *cn;
1290
1291 /* Reset what needs to be reset in plimit. */
1292 if (p->p_limit->pl_corename != defcorename) {
1293 lim_privatise(p, false);
1294 lim = p->p_limit;
1295 mutex_enter(&lim->pl_lock);
1296 cn = lim->pl_corename;
1297 lim->pl_corename = defcorename;
1298 mutex_exit(&lim->pl_lock);
1299 if (cn != defcorename)
1300 free(cn, M_TEMP);
1301 }
1302
1303 mutex_enter(p->p_lock);
1304
1305 /* Ensure the LWP cached credentials are up to date. */
1306 if ((oc = l->l_cred) != p->p_cred) {
1307 kauth_cred_hold(p->p_cred);
1308 l->l_cred = p->p_cred;
1309 kauth_cred_free(oc);
1310 }
1311
1312 }
1313
1314 /*
1315 * Set in a new process credential, and drop the write lock. The credential
1316 * must have a reference already. Optionally, free a no-longer required
1317 * credential. The scheduler also needs to inspect p_cred, so we also
1318 * briefly acquire the sched state mutex.
1319 */
1320 void
1321 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1322 {
1323 struct lwp *l = curlwp, *l2;
1324 struct proc *p = l->l_proc;
1325 kauth_cred_t oc;
1326
1327 KASSERT(mutex_owned(p->p_lock));
1328
1329 /* Is there a new credential to set in? */
1330 if (scred != NULL) {
1331 p->p_cred = scred;
1332 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1333 if (l2 != l)
1334 l2->l_prflag |= LPR_CRMOD;
1335 }
1336
1337 /* Ensure the LWP cached credentials are up to date. */
1338 if ((oc = l->l_cred) != scred) {
1339 kauth_cred_hold(scred);
1340 l->l_cred = scred;
1341 }
1342 } else
1343 oc = NULL; /* XXXgcc */
1344
1345 if (sugid) {
1346 /*
1347 * Mark process as having changed credentials, stops
1348 * tracing etc.
1349 */
1350 p->p_flag |= PK_SUGID;
1351 }
1352
1353 mutex_exit(p->p_lock);
1354
1355 /* If there is a credential to be released, free it now. */
1356 if (fcred != NULL) {
1357 KASSERT(scred != NULL);
1358 kauth_cred_free(fcred);
1359 if (oc != scred)
1360 kauth_cred_free(oc);
1361 }
1362 }
1363
1364 /*
1365 * proc_specific_key_create --
1366 * Create a key for subsystem proc-specific data.
1367 */
1368 int
1369 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1370 {
1371
1372 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1373 }
1374
1375 /*
1376 * proc_specific_key_delete --
1377 * Delete a key for subsystem proc-specific data.
1378 */
1379 void
1380 proc_specific_key_delete(specificdata_key_t key)
1381 {
1382
1383 specificdata_key_delete(proc_specificdata_domain, key);
1384 }
1385
1386 /*
1387 * proc_initspecific --
1388 * Initialize a proc's specificdata container.
1389 */
1390 void
1391 proc_initspecific(struct proc *p)
1392 {
1393 int error;
1394
1395 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1396 KASSERT(error == 0);
1397 }
1398
1399 /*
1400 * proc_finispecific --
1401 * Finalize a proc's specificdata container.
1402 */
1403 void
1404 proc_finispecific(struct proc *p)
1405 {
1406
1407 specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1408 }
1409
1410 /*
1411 * proc_getspecific --
1412 * Return proc-specific data corresponding to the specified key.
1413 */
1414 void *
1415 proc_getspecific(struct proc *p, specificdata_key_t key)
1416 {
1417
1418 return (specificdata_getspecific(proc_specificdata_domain,
1419 &p->p_specdataref, key));
1420 }
1421
1422 /*
1423 * proc_setspecific --
1424 * Set proc-specific data corresponding to the specified key.
1425 */
1426 void
1427 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1428 {
1429
1430 specificdata_setspecific(proc_specificdata_domain,
1431 &p->p_specdataref, key, data);
1432 }
1433
1434 int
1435 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1436 {
1437 int r = 0;
1438
1439 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1440 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1441 /*
1442 * suid proc of ours or proc not ours
1443 */
1444 r = EPERM;
1445 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1446 /*
1447 * sgid proc has sgid back to us temporarily
1448 */
1449 r = EPERM;
1450 } else {
1451 /*
1452 * our rgid must be in target's group list (ie,
1453 * sub-processes started by a sgid process)
1454 */
1455 int ismember = 0;
1456
1457 if (kauth_cred_ismember_gid(cred,
1458 kauth_cred_getgid(target), &ismember) != 0 ||
1459 !ismember)
1460 r = EPERM;
1461 }
1462
1463 return (r);
1464 }
1465
1466