kern_proc.c revision 1.250 1 /* $NetBSD: kern_proc.c,v 1.250 2020/04/26 18:53:33 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.250 2020/04/26 18:53:33 thorpej Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_kstack.h"
69 #include "opt_maxuprc.h"
70 #include "opt_dtrace.h"
71 #include "opt_compat_netbsd32.h"
72 #include "opt_kaslr.h"
73 #endif
74
75 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
76 && !defined(_RUMPKERNEL)
77 #define COMPAT_NETBSD32
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/proc.h>
84 #include <sys/resourcevar.h>
85 #include <sys/buf.h>
86 #include <sys/acct.h>
87 #include <sys/wait.h>
88 #include <sys/file.h>
89 #include <ufs/ufs/quota.h>
90 #include <sys/uio.h>
91 #include <sys/pool.h>
92 #include <sys/pset.h>
93 #include <sys/ioctl.h>
94 #include <sys/tty.h>
95 #include <sys/signalvar.h>
96 #include <sys/ras.h>
97 #include <sys/filedesc.h>
98 #include <sys/syscall_stats.h>
99 #include <sys/kauth.h>
100 #include <sys/sleepq.h>
101 #include <sys/atomic.h>
102 #include <sys/kmem.h>
103 #include <sys/namei.h>
104 #include <sys/dtrace_bsd.h>
105 #include <sys/sysctl.h>
106 #include <sys/exec.h>
107 #include <sys/cpu.h>
108 #include <sys/compat_stub.h>
109 #include <sys/futex.h>
110
111 #include <uvm/uvm_extern.h>
112 #include <uvm/uvm.h>
113
114 /*
115 * Process lists.
116 */
117
118 struct proclist allproc __cacheline_aligned;
119 struct proclist zombproc __cacheline_aligned;
120
121 static kmutex_t proc_lock_s __cacheline_aligned;
122 kmutex_t * proc_lock __read_mostly;
123
124 /*
125 * pid to lwp/proc lookup is done by indexing the pid_table array.
126 * Since pid numbers are only allocated when an empty slot
127 * has been found, there is no need to search any lists ever.
128 * (an orphaned pgrp will lock the slot, a session will lock
129 * the pgrp with the same number.)
130 * If the table is too small it is reallocated with twice the
131 * previous size and the entries 'unzipped' into the two halves.
132 * A linked list of free entries is passed through the pt_lwp
133 * field of 'free' items - set odd to be an invalid ptr. Two
134 * additional bits are also used to indicate if the slot is
135 * currently occupied by a proc or lwp, and if the PID is
136 * hidden from certain kinds of lookups. We thus require a
137 * minimum alignment for proc and lwp structures (LWPs are
138 * at least 32-byte aligned).
139 */
140
141 struct pid_table {
142 uintptr_t pt_slot;
143 struct pgrp *pt_pgrp;
144 pid_t pt_pid;
145 };
146
147 #define PT_F_FREE ((uintptr_t)__BIT(0))
148 #define PT_F_LWP 0 /* pseudo-flag */
149 #define PT_F_PROC ((uintptr_t)__BIT(1))
150 #define PT_F_HIDDEN ((uintptr_t)__BIT(2))
151
152 #define PT_F_TYPEBITS (PT_F_FREE|PT_F_PROC)
153 #define PT_F_ALLBITS (PT_F_FREE|PT_F_PROC|PT_F_HIDDEN)
154
155 #define PT_VALID(s) (((s) & PT_F_FREE) == 0)
156 #define PT_RESERVED(s) ((s) == 0)
157 #define PT_HIDDEN(s) ((s) & PT_F_HIDDEN)
158 #define PT_NEXT(s) ((u_int)(s) >> 1)
159 #define PT_SET_FREE(pid) (((pid) << 1) | PT_F_FREE)
160 #define PT_SET_HIDDEN(s) ((s) | PT_F_HIDDEN)
161 #define PT_SET_LWP(l) ((uintptr_t)(l))
162 #define PT_SET_PROC(p) (((uintptr_t)(p)) | PT_F_PROC)
163 #define PT_SET_RESERVED 0
164 #define PT_GET_LWP(s) ((struct lwp *)((s) & ~PT_F_ALLBITS))
165 #define PT_GET_PROC(s) ((struct proc *)((s) & ~PT_F_ALLBITS))
166 #define PT_GET_TYPE(s) ((s) & PT_F_TYPEBITS)
167 #define PT_IS_LWP(s) (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
168 #define PT_IS_PROC(s) (PT_GET_TYPE(s) == PT_F_PROC)
169
170 #define MIN_PROC_ALIGNMENT (PT_F_ALLBITS + 1)
171
172 /*
173 * Table of process IDs (PIDs).
174 *
175 * Locking order:
176 * proc_lock -> pid_table_lock
177 * or
178 * proc::p_lock -> pid_table_lock
179 */
180 static krwlock_t pid_table_lock __cacheline_aligned;
181 static struct pid_table *pid_table __read_mostly;
182
183 #define INITIAL_PID_TABLE_SIZE (1 << 5)
184
185 /* Table mask, threshold for growing and number of allocated PIDs. */
186 static u_int pid_tbl_mask __read_mostly;
187 static u_int pid_alloc_lim __read_mostly;
188 static u_int pid_alloc_cnt __cacheline_aligned;
189
190 /* Next free, last free and maximum PIDs. */
191 static u_int next_free_pt __cacheline_aligned;
192 static u_int last_free_pt __cacheline_aligned;
193 static pid_t pid_max __read_mostly;
194
195 /* Components of the first process -- never freed. */
196
197 extern struct emul emul_netbsd; /* defined in kern_exec.c */
198
199 struct session session0 = {
200 .s_count = 1,
201 .s_sid = 0,
202 };
203 struct pgrp pgrp0 = {
204 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
205 .pg_session = &session0,
206 };
207 filedesc_t filedesc0;
208 struct cwdinfo cwdi0 = {
209 .cwdi_cmask = CMASK,
210 .cwdi_refcnt = 1,
211 };
212 struct plimit limit0;
213 struct pstats pstat0;
214 struct vmspace vmspace0;
215 struct sigacts sigacts0;
216 struct proc proc0 = {
217 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
218 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
219 .p_nlwps = 1,
220 .p_nrlwps = 1,
221 .p_pgrp = &pgrp0,
222 .p_comm = "system",
223 /*
224 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
225 * when they exit. init(8) can easily wait them out for us.
226 */
227 .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
228 .p_stat = SACTIVE,
229 .p_nice = NZERO,
230 .p_emul = &emul_netbsd,
231 .p_cwdi = &cwdi0,
232 .p_limit = &limit0,
233 .p_fd = &filedesc0,
234 .p_vmspace = &vmspace0,
235 .p_stats = &pstat0,
236 .p_sigacts = &sigacts0,
237 #ifdef PROC0_MD_INITIALIZERS
238 PROC0_MD_INITIALIZERS
239 #endif
240 };
241 kauth_cred_t cred0;
242
243 static const int nofile = NOFILE;
244 static const int maxuprc = MAXUPRC;
245
246 static int sysctl_doeproc(SYSCTLFN_PROTO);
247 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
248 static int sysctl_security_expose_address(SYSCTLFN_PROTO);
249
250 #ifdef KASLR
251 static int kern_expose_address = 0;
252 #else
253 static int kern_expose_address = 1;
254 #endif
255 /*
256 * The process list descriptors, used during pid allocation and
257 * by sysctl. No locking on this data structure is needed since
258 * it is completely static.
259 */
260 const struct proclist_desc proclists[] = {
261 { &allproc },
262 { &zombproc },
263 { NULL },
264 };
265
266 static struct pgrp * pg_remove(pid_t);
267 static void pg_delete(pid_t);
268 static void orphanpg(struct pgrp *);
269
270 static specificdata_domain_t proc_specificdata_domain;
271
272 static pool_cache_t proc_cache;
273
274 static kauth_listener_t proc_listener;
275
276 static void fill_proc(const struct proc *, struct proc *, bool);
277 static int fill_pathname(struct lwp *, pid_t, void *, size_t *);
278 static int fill_cwd(struct lwp *, pid_t, void *, size_t *);
279
280 static int
281 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
282 void *arg0, void *arg1, void *arg2, void *arg3)
283 {
284 struct proc *p;
285 int result;
286
287 result = KAUTH_RESULT_DEFER;
288 p = arg0;
289
290 switch (action) {
291 case KAUTH_PROCESS_CANSEE: {
292 enum kauth_process_req req;
293
294 req = (enum kauth_process_req)(uintptr_t)arg1;
295
296 switch (req) {
297 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
298 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
299 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
300 case KAUTH_REQ_PROCESS_CANSEE_EPROC:
301 result = KAUTH_RESULT_ALLOW;
302 break;
303
304 case KAUTH_REQ_PROCESS_CANSEE_ENV:
305 if (kauth_cred_getuid(cred) !=
306 kauth_cred_getuid(p->p_cred) ||
307 kauth_cred_getuid(cred) !=
308 kauth_cred_getsvuid(p->p_cred))
309 break;
310
311 result = KAUTH_RESULT_ALLOW;
312
313 break;
314
315 case KAUTH_REQ_PROCESS_CANSEE_KPTR:
316 if (!kern_expose_address)
317 break;
318
319 if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
320 break;
321
322 result = KAUTH_RESULT_ALLOW;
323
324 break;
325
326 default:
327 break;
328 }
329
330 break;
331 }
332
333 case KAUTH_PROCESS_FORK: {
334 int lnprocs = (int)(unsigned long)arg2;
335
336 /*
337 * Don't allow a nonprivileged user to use the last few
338 * processes. The variable lnprocs is the current number of
339 * processes, maxproc is the limit.
340 */
341 if (__predict_false((lnprocs >= maxproc - 5)))
342 break;
343
344 result = KAUTH_RESULT_ALLOW;
345
346 break;
347 }
348
349 case KAUTH_PROCESS_CORENAME:
350 case KAUTH_PROCESS_STOPFLAG:
351 if (proc_uidmatch(cred, p->p_cred) == 0)
352 result = KAUTH_RESULT_ALLOW;
353
354 break;
355
356 default:
357 break;
358 }
359
360 return result;
361 }
362
363 static int
364 proc_ctor(void *arg __unused, void *obj, int flags __unused)
365 {
366 memset(obj, 0, sizeof(struct proc));
367 return 0;
368 }
369
370 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
371
372 /*
373 * Initialize global process hashing structures.
374 */
375 void
376 procinit(void)
377 {
378 const struct proclist_desc *pd;
379 u_int i;
380 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
381
382 for (pd = proclists; pd->pd_list != NULL; pd++)
383 LIST_INIT(pd->pd_list);
384
385 mutex_init(&proc_lock_s, MUTEX_DEFAULT, IPL_NONE);
386 proc_lock = &proc_lock_s;
387
388 rw_init(&pid_table_lock);
389
390 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
391 * sizeof(struct pid_table), KM_SLEEP);
392 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
393 pid_max = PID_MAX;
394
395 /* Set free list running through table...
396 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
397 for (i = 0; i <= pid_tbl_mask; i++) {
398 pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
399 pid_table[i].pt_pgrp = 0;
400 pid_table[i].pt_pid = 0;
401 }
402 /* slot 0 is just grabbed */
403 next_free_pt = 1;
404 /* Need to fix last entry. */
405 last_free_pt = pid_tbl_mask;
406 pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
407 /* point at which we grow table - to avoid reusing pids too often */
408 pid_alloc_lim = pid_tbl_mask - 1;
409 #undef LINK_EMPTY
410
411 /* Reserve PID 1 for init(8). */ /* XXX slightly gross */
412 rw_enter(&pid_table_lock, RW_WRITER);
413 if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
414 panic("failed to reserve PID 1 for init(8)");
415 rw_exit(&pid_table_lock);
416
417 proc_specificdata_domain = specificdata_domain_create();
418 KASSERT(proc_specificdata_domain != NULL);
419
420 size_t proc_alignment = coherency_unit;
421 if (proc_alignment < MIN_PROC_ALIGNMENT)
422 proc_alignment = MIN_PROC_ALIGNMENT;
423
424 proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
425 "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
426
427 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
428 proc_listener_cb, NULL);
429 }
430
431 void
432 procinit_sysctl(void)
433 {
434 static struct sysctllog *clog;
435
436 sysctl_createv(&clog, 0, NULL, NULL,
437 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
438 CTLTYPE_INT, "expose_address",
439 SYSCTL_DESCR("Enable exposing kernel addresses"),
440 sysctl_security_expose_address, 0,
441 &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
442 sysctl_createv(&clog, 0, NULL, NULL,
443 CTLFLAG_PERMANENT,
444 CTLTYPE_NODE, "proc",
445 SYSCTL_DESCR("System-wide process information"),
446 sysctl_doeproc, 0, NULL, 0,
447 CTL_KERN, KERN_PROC, CTL_EOL);
448 sysctl_createv(&clog, 0, NULL, NULL,
449 CTLFLAG_PERMANENT,
450 CTLTYPE_NODE, "proc2",
451 SYSCTL_DESCR("Machine-independent process information"),
452 sysctl_doeproc, 0, NULL, 0,
453 CTL_KERN, KERN_PROC2, CTL_EOL);
454 sysctl_createv(&clog, 0, NULL, NULL,
455 CTLFLAG_PERMANENT,
456 CTLTYPE_NODE, "proc_args",
457 SYSCTL_DESCR("Process argument information"),
458 sysctl_kern_proc_args, 0, NULL, 0,
459 CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
460
461 /*
462 "nodes" under these:
463
464 KERN_PROC_ALL
465 KERN_PROC_PID pid
466 KERN_PROC_PGRP pgrp
467 KERN_PROC_SESSION sess
468 KERN_PROC_TTY tty
469 KERN_PROC_UID uid
470 KERN_PROC_RUID uid
471 KERN_PROC_GID gid
472 KERN_PROC_RGID gid
473
474 all in all, probably not worth the effort...
475 */
476 }
477
478 /*
479 * Initialize process 0.
480 */
481 void
482 proc0_init(void)
483 {
484 struct proc *p;
485 struct pgrp *pg;
486 struct rlimit *rlim;
487 rlim_t lim;
488 int i;
489
490 p = &proc0;
491 pg = &pgrp0;
492
493 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
494 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
495 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
496
497 rw_init(&p->p_reflock);
498 cv_init(&p->p_waitcv, "wait");
499 cv_init(&p->p_lwpcv, "lwpwait");
500
501 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
502
503 KASSERT(lwp0.l_lid == 0);
504 pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
505 LIST_INSERT_HEAD(&allproc, p, p_list);
506
507 pid_table[lwp0.l_lid].pt_pgrp = pg;
508 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
509
510 #ifdef __HAVE_SYSCALL_INTERN
511 (*p->p_emul->e_syscall_intern)(p);
512 #endif
513
514 /* Create credentials. */
515 cred0 = kauth_cred_alloc();
516 p->p_cred = cred0;
517
518 /* Create the CWD info. */
519 rw_init(&cwdi0.cwdi_lock);
520
521 /* Create the limits structures. */
522 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
523
524 rlim = limit0.pl_rlimit;
525 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
526 rlim[i].rlim_cur = RLIM_INFINITY;
527 rlim[i].rlim_max = RLIM_INFINITY;
528 }
529
530 rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
531 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
532
533 rlim[RLIMIT_NPROC].rlim_max = maxproc;
534 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
535
536 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem()));
537 rlim[RLIMIT_RSS].rlim_max = lim;
538 rlim[RLIMIT_MEMLOCK].rlim_max = lim;
539 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
540
541 rlim[RLIMIT_NTHR].rlim_max = maxlwp;
542 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc;
543
544 /* Note that default core name has zero length. */
545 limit0.pl_corename = defcorename;
546 limit0.pl_cnlen = 0;
547 limit0.pl_refcnt = 1;
548 limit0.pl_writeable = false;
549 limit0.pl_sv_limit = NULL;
550
551 /* Configure virtual memory system, set vm rlimits. */
552 uvm_init_limits(p);
553
554 /* Initialize file descriptor table for proc0. */
555 fd_init(&filedesc0);
556
557 /*
558 * Initialize proc0's vmspace, which uses the kernel pmap.
559 * All kernel processes (which never have user space mappings)
560 * share proc0's vmspace, and thus, the kernel pmap.
561 */
562 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
563 trunc_page(VM_MAXUSER_ADDRESS),
564 #ifdef __USE_TOPDOWN_VM
565 true
566 #else
567 false
568 #endif
569 );
570
571 /* Initialize signal state for proc0. XXX IPL_SCHED */
572 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
573 siginit(p);
574
575 proc_initspecific(p);
576 kdtrace_proc_ctor(NULL, p);
577 }
578
579 /*
580 * Session reference counting.
581 */
582
583 void
584 proc_sesshold(struct session *ss)
585 {
586
587 KASSERT(mutex_owned(proc_lock));
588 ss->s_count++;
589 }
590
591 static void
592 proc_sessrele_pid_table_write_locked(struct session *ss)
593 {
594 struct pgrp *pg;
595
596 KASSERT(mutex_owned(proc_lock));
597 KASSERT(rw_write_held(&pid_table_lock));
598 KASSERT(ss->s_count > 0);
599
600 /*
601 * We keep the pgrp with the same id as the session in order to
602 * stop a process being given the same pid. Since the pgrp holds
603 * a reference to the session, it must be a 'zombie' pgrp by now.
604 */
605 if (--ss->s_count == 0) {
606 pg = pg_remove(ss->s_sid);
607 } else {
608 pg = NULL;
609 ss = NULL;
610 }
611
612 rw_exit(&pid_table_lock);
613 mutex_exit(proc_lock);
614
615 if (pg)
616 kmem_free(pg, sizeof(struct pgrp));
617 if (ss)
618 kmem_free(ss, sizeof(struct session));
619 }
620
621 void
622 proc_sessrele(struct session *ss)
623 {
624 rw_enter(&pid_table_lock, RW_WRITER);
625 proc_sessrele_pid_table_write_locked(ss);
626 }
627
628 /*
629 * Check that the specified process group is in the session of the
630 * specified process.
631 * Treats -ve ids as process ids.
632 * Used to validate TIOCSPGRP requests.
633 */
634 int
635 pgid_in_session(struct proc *p, pid_t pg_id)
636 {
637 struct pgrp *pgrp;
638 struct session *session;
639 int error;
640
641 mutex_enter(proc_lock);
642 if (pg_id < 0) {
643 struct proc *p1 = proc_find(-pg_id);
644 if (p1 == NULL) {
645 error = EINVAL;
646 goto fail;
647 }
648 pgrp = p1->p_pgrp;
649 } else {
650 pgrp = pgrp_find(pg_id);
651 if (pgrp == NULL) {
652 error = EINVAL;
653 goto fail;
654 }
655 }
656 session = pgrp->pg_session;
657 error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
658 fail:
659 mutex_exit(proc_lock);
660 return error;
661 }
662
663 /*
664 * p_inferior: is p an inferior of q?
665 */
666 static inline bool
667 p_inferior(struct proc *p, struct proc *q)
668 {
669
670 KASSERT(mutex_owned(proc_lock));
671
672 for (; p != q; p = p->p_pptr)
673 if (p->p_pid == 0)
674 return false;
675 return true;
676 }
677
678 /*
679 * proc_find_lwp: locate an lwp in said proc by the ID.
680 *
681 * => Must be called with p::p_lock held.
682 * => LARVAL lwps are not returned because they are only partially
683 * constructed while occupying the slot.
684 * => Callers need to be careful about lwp::l_stat of the returned
685 * lwp.
686 */
687 struct lwp *
688 proc_find_lwp(proc_t *p, pid_t pid)
689 {
690 struct pid_table *pt;
691 struct lwp *l = NULL;
692 uintptr_t slot;
693
694 KASSERT(mutex_owned(p->p_lock));
695 rw_enter(&pid_table_lock, RW_READER);
696 pt = &pid_table[pid & pid_tbl_mask];
697
698 slot = pt->pt_slot;
699 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
700 l = PT_GET_LWP(slot);
701 if (__predict_false(l->l_proc != p || l->l_stat == LSLARVAL)) {
702 l = NULL;
703 }
704 }
705 rw_exit(&pid_table_lock);
706
707 return l;
708 }
709
710 /*
711 * proc_seek_lwpid: locate an lwp by only the ID.
712 *
713 * => This is a specialized interface used for looking up an LWP
714 * without holding a lock on its owner process.
715 * => Callers of this interface MUST provide a separate synchronization
716 * mechanism to ensure the validity of the returned LWP. LARVAL LWPs
717 * are found there, so callers must check for them!
718 * => Only returns LWPs whose ID has not been hidden from us.
719 */
720 struct lwp *
721 proc_seek_lwpid(pid_t pid)
722 {
723 struct pid_table *pt;
724 struct lwp *l = NULL;
725 uintptr_t slot;
726
727 rw_enter(&pid_table_lock, RW_READER);
728 pt = &pid_table[pid & pid_tbl_mask];
729
730 slot = pt->pt_slot;
731 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid &&
732 !PT_HIDDEN(slot))) {
733 l = PT_GET_LWP(slot);
734 }
735 rw_exit(&pid_table_lock);
736
737 return l;
738 }
739
740 /*
741 * proc_hide_lwpid: hide an lwp ID from seekers.
742 */
743 void
744 proc_hide_lwpid(pid_t pid)
745 {
746 struct pid_table *pt;
747 uintptr_t slot;
748
749 rw_enter(&pid_table_lock, RW_WRITER);
750 pt = &pid_table[pid & pid_tbl_mask];
751
752 slot = pt->pt_slot;
753 KASSERT(PT_IS_LWP(slot));
754 KASSERT(pt->pt_pid == pid);
755 pt->pt_slot = PT_SET_HIDDEN(slot);
756
757 rw_exit(&pid_table_lock);
758 }
759
760 /*
761 * proc_find_raw_pid_table_locked: locate a process by the ID.
762 *
763 * => Must be called with proc_lock held and the pid_table_lock
764 * at least held for reading.
765 */
766 static proc_t *
767 proc_find_raw_pid_table_locked(pid_t pid)
768 {
769 struct pid_table *pt;
770 proc_t *p = NULL;
771 uintptr_t slot;
772
773 KASSERT(mutex_owned(proc_lock));
774 pt = &pid_table[pid & pid_tbl_mask];
775
776 slot = pt->pt_slot;
777 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
778 /*
779 * When looking up processes, require a direct match
780 * on the PID assigned to the proc, not just one of
781 * its LWPs.
782 *
783 * N.B. We require lwp::l_proc of LARVAL LWPs to be
784 * valid here.
785 */
786 p = PT_GET_LWP(slot)->l_proc;
787 if (__predict_false(p->p_pid != pid))
788 p = NULL;
789 } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
790 p = PT_GET_PROC(slot);
791 }
792 return p;
793 }
794
795 proc_t *
796 proc_find_raw(pid_t pid)
797 {
798 KASSERT(mutex_owned(proc_lock));
799 rw_enter(&pid_table_lock, RW_READER);
800 proc_t *p = proc_find_raw_pid_table_locked(pid);
801 rw_exit(&pid_table_lock);
802 return p;
803 }
804
805 static proc_t *
806 proc_find_pid_table_locked(pid_t pid)
807 {
808 proc_t *p;
809
810 KASSERT(mutex_owned(proc_lock));
811
812 p = proc_find_raw_pid_table_locked(pid);
813 if (__predict_false(p == NULL)) {
814 return NULL;
815 }
816
817 /*
818 * Only allow live processes to be found by PID.
819 * XXX: p_stat might change, since proc unlocked.
820 */
821 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
822 return p;
823 }
824 return NULL;
825 }
826
827 proc_t *
828 proc_find(pid_t pid)
829 {
830 KASSERT(mutex_owned(proc_lock));
831 rw_enter(&pid_table_lock, RW_READER);
832 proc_t *p = proc_find_pid_table_locked(pid);
833 rw_exit(&pid_table_lock);
834 return p;
835 }
836
837 /*
838 * pgrp_find_pid_table_locked: locate a process group by the ID.
839 *
840 * => Must be called with proc_lock held and the pid_table_lock
841 * held at least for reading.
842 */
843 static struct pgrp *
844 pgrp_find_pid_table_locked(pid_t pgid)
845 {
846 struct pgrp *pg;
847
848 KASSERT(mutex_owned(proc_lock));
849
850 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
851
852 /*
853 * Cannot look up a process group that only exists because the
854 * session has not died yet (traditional).
855 */
856 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
857 return NULL;
858 }
859 return pg;
860 }
861
862 struct pgrp *
863 pgrp_find(pid_t pgid)
864 {
865 KASSERT(mutex_owned(proc_lock));
866 rw_enter(&pid_table_lock, RW_READER);
867 struct pgrp *pg = pgrp_find_pid_table_locked(pgid);
868 rw_exit(&pid_table_lock);
869 return pg;
870 }
871
872 static void
873 expand_pid_table(void)
874 {
875 size_t pt_size, tsz;
876 struct pid_table *n_pt, *new_pt;
877 uintptr_t slot;
878 struct pgrp *pgrp;
879 pid_t pid, rpid;
880 u_int i;
881 uint new_pt_mask;
882
883 KASSERT(rw_write_held(&pid_table_lock));
884
885 /* Unlock the pid_table briefly to allocate memory. */
886 pt_size = pid_tbl_mask + 1;
887 rw_exit(&pid_table_lock);
888
889 tsz = pt_size * 2 * sizeof(struct pid_table);
890 new_pt = kmem_alloc(tsz, KM_SLEEP);
891 new_pt_mask = pt_size * 2 - 1;
892
893 /* XXX For now. The pratical limit is much lower anyway. */
894 KASSERT(new_pt_mask <= FUTEX_TID_MASK);
895
896 rw_enter(&pid_table_lock, RW_WRITER);
897 if (pt_size != pid_tbl_mask + 1) {
898 /* Another process beat us to it... */
899 rw_exit(&pid_table_lock);
900 kmem_free(new_pt, tsz);
901 goto out;
902 }
903
904 /*
905 * Copy entries from old table into new one.
906 * If 'pid' is 'odd' we need to place in the upper half,
907 * even pid's to the lower half.
908 * Free items stay in the low half so we don't have to
909 * fixup the reference to them.
910 * We stuff free items on the front of the freelist
911 * because we can't write to unmodified entries.
912 * Processing the table backwards maintains a semblance
913 * of issuing pid numbers that increase with time.
914 */
915 i = pt_size - 1;
916 n_pt = new_pt + i;
917 for (; ; i--, n_pt--) {
918 slot = pid_table[i].pt_slot;
919 pgrp = pid_table[i].pt_pgrp;
920 if (!PT_VALID(slot)) {
921 /* Up 'use count' so that link is valid */
922 pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
923 rpid = 0;
924 slot = PT_SET_FREE(pid);
925 if (pgrp)
926 pid = pgrp->pg_id;
927 } else {
928 pid = pid_table[i].pt_pid;
929 rpid = pid;
930 }
931
932 /* Save entry in appropriate half of table */
933 n_pt[pid & pt_size].pt_slot = slot;
934 n_pt[pid & pt_size].pt_pgrp = pgrp;
935 n_pt[pid & pt_size].pt_pid = rpid;
936
937 /* Put other piece on start of free list */
938 pid = (pid ^ pt_size) & ~pid_tbl_mask;
939 n_pt[pid & pt_size].pt_slot =
940 PT_SET_FREE((pid & ~pt_size) | next_free_pt);
941 n_pt[pid & pt_size].pt_pgrp = 0;
942 n_pt[pid & pt_size].pt_pid = 0;
943
944 next_free_pt = i | (pid & pt_size);
945 if (i == 0)
946 break;
947 }
948
949 /* Save old table size and switch tables */
950 tsz = pt_size * sizeof(struct pid_table);
951 n_pt = pid_table;
952 pid_table = new_pt;
953 pid_tbl_mask = new_pt_mask;
954
955 /*
956 * pid_max starts as PID_MAX (= 30000), once we have 16384
957 * allocated pids we need it to be larger!
958 */
959 if (pid_tbl_mask > PID_MAX) {
960 pid_max = pid_tbl_mask * 2 + 1;
961 pid_alloc_lim |= pid_alloc_lim << 1;
962 } else
963 pid_alloc_lim <<= 1; /* doubles number of free slots... */
964
965 rw_exit(&pid_table_lock);
966 kmem_free(n_pt, tsz);
967
968 out: /* Return with the pid_table_lock held again. */
969 rw_enter(&pid_table_lock, RW_WRITER);
970 }
971
972 struct proc *
973 proc_alloc(void)
974 {
975 struct proc *p;
976
977 p = pool_cache_get(proc_cache, PR_WAITOK);
978 p->p_stat = SIDL; /* protect against others */
979 proc_initspecific(p);
980 kdtrace_proc_ctor(NULL, p);
981
982 /*
983 * Allocate a placeholder in the pid_table. When we create the
984 * first LWP for this process, it will take ownership of the
985 * slot.
986 */
987 if (__predict_false(proc_alloc_pid(p) == -1)) {
988 /* Allocating the PID failed; unwind. */
989 proc_finispecific(p);
990 proc_free_mem(p);
991 p = NULL;
992 }
993 return p;
994 }
995
996 /*
997 * proc_alloc_pid_slot: allocate PID and record the occcupant so that
998 * proc_find_raw() can find it by the PID.
999 */
1000 static pid_t __noinline
1001 proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
1002 {
1003 struct pid_table *pt;
1004 pid_t pid;
1005 int nxt;
1006
1007 KASSERT(rw_write_held(&pid_table_lock));
1008
1009 for (;;expand_pid_table()) {
1010 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
1011 /* ensure pids cycle through 2000+ values */
1012 continue;
1013 }
1014 /*
1015 * The first user process *must* be given PID 1.
1016 * it has already been reserved for us. This
1017 * will be coming in from the proc_alloc() call
1018 * above, and the entry will be usurped later when
1019 * the first user LWP is created.
1020 * XXX this is slightly gross.
1021 */
1022 if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
1023 p != &proc0)) {
1024 KASSERT(PT_IS_PROC(slot));
1025 pt = &pid_table[1];
1026 pt->pt_slot = slot;
1027 return 1;
1028 }
1029 pt = &pid_table[next_free_pt];
1030 #ifdef DIAGNOSTIC
1031 if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
1032 panic("proc_alloc: slot busy");
1033 #endif
1034 nxt = PT_NEXT(pt->pt_slot);
1035 if (nxt & pid_tbl_mask)
1036 break;
1037 /* Table full - expand (NB last entry not used....) */
1038 }
1039
1040 /* pid is 'saved use count' + 'size' + entry */
1041 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
1042 if ((uint)pid > (uint)pid_max)
1043 pid &= pid_tbl_mask;
1044 next_free_pt = nxt & pid_tbl_mask;
1045
1046 /* XXX For now. The pratical limit is much lower anyway. */
1047 KASSERT(pid <= FUTEX_TID_MASK);
1048
1049 /* Grab table slot */
1050 pt->pt_slot = slot;
1051
1052 KASSERT(pt->pt_pid == 0);
1053 pt->pt_pid = pid;
1054 pid_alloc_cnt++;
1055
1056 return pid;
1057 }
1058
1059 pid_t
1060 proc_alloc_pid(struct proc *p)
1061 {
1062 pid_t pid;
1063
1064 KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
1065
1066 rw_enter(&pid_table_lock, RW_WRITER);
1067 pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
1068 if (pid != -1)
1069 p->p_pid = pid;
1070 rw_exit(&pid_table_lock);
1071
1072 return pid;
1073 }
1074
1075 pid_t
1076 proc_alloc_lwpid(struct proc *p, struct lwp *l)
1077 {
1078 struct pid_table *pt;
1079 pid_t pid;
1080
1081 KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
1082
1083 /*
1084 * If the slot for p->p_pid currently points to the proc,
1085 * then we should usurp this ID for the LWP. This happens
1086 * at least once per process (for the first LWP), and can
1087 * happen again if the first LWP for a process exits and
1088 * before the process creates another.
1089 */
1090 rw_enter(&pid_table_lock, RW_WRITER);
1091 pid = p->p_pid;
1092 pt = &pid_table[pid & pid_tbl_mask];
1093 KASSERT(pt->pt_pid == pid);
1094 if (PT_IS_PROC(pt->pt_slot)) {
1095 KASSERT(PT_GET_PROC(pt->pt_slot) == p);
1096 l->l_lid = pid;
1097 pt->pt_slot = PT_SET_LWP(l);
1098 } else {
1099 /* Need to allocate a new slot. */
1100 pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
1101 if (pid != -1)
1102 l->l_lid = pid;
1103 }
1104 rw_exit(&pid_table_lock);
1105
1106 return pid;
1107 }
1108
1109 static void __noinline
1110 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
1111 {
1112 struct pid_table *pt;
1113
1114 rw_enter(&pid_table_lock, RW_WRITER);
1115 pt = &pid_table[pid & pid_tbl_mask];
1116
1117 KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
1118 KASSERT(pt->pt_pid == pid);
1119
1120 /* save pid use count in slot */
1121 pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
1122 pt->pt_pid = 0;
1123
1124 if (pt->pt_pgrp == NULL) {
1125 /* link last freed entry onto ours */
1126 pid &= pid_tbl_mask;
1127 pt = &pid_table[last_free_pt];
1128 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
1129 pt->pt_pid = 0;
1130 last_free_pt = pid;
1131 pid_alloc_cnt--;
1132 }
1133 rw_exit(&pid_table_lock);
1134 }
1135
1136 /*
1137 * Free a process id - called from proc_free (in kern_exit.c)
1138 *
1139 * Called with the proc_lock held.
1140 */
1141 void
1142 proc_free_pid(pid_t pid)
1143 {
1144 KASSERT(mutex_owned(proc_lock));
1145 proc_free_pid_internal(pid, PT_F_PROC);
1146 }
1147
1148 /*
1149 * Free a process id used by an LWP. If this was the process's
1150 * first LWP, we convert the slot to point to the process; the
1151 * entry will get cleaned up later when the process finishes exiting.
1152 *
1153 * If not, then it's the same as proc_free_pid().
1154 */
1155 void
1156 proc_free_lwpid(struct proc *p, pid_t pid)
1157 {
1158
1159 KASSERT(mutex_owned(p->p_lock));
1160
1161 if (__predict_true(p->p_pid == pid)) {
1162 struct pid_table *pt;
1163
1164 rw_enter(&pid_table_lock, RW_WRITER);
1165 pt = &pid_table[pid & pid_tbl_mask];
1166
1167 KASSERT(pt->pt_pid == pid);
1168 KASSERT(PT_IS_LWP(pt->pt_slot));
1169 KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
1170
1171 pt->pt_slot = PT_SET_PROC(p);
1172
1173 rw_exit(&pid_table_lock);
1174 return;
1175 }
1176 proc_free_pid_internal(pid, PT_F_LWP);
1177 }
1178
1179 void
1180 proc_free_mem(struct proc *p)
1181 {
1182
1183 kdtrace_proc_dtor(NULL, p);
1184 pool_cache_put(proc_cache, p);
1185 }
1186
1187 /*
1188 * proc_enterpgrp: move p to a new or existing process group (and session).
1189 *
1190 * If we are creating a new pgrp, the pgid should equal
1191 * the calling process' pid.
1192 * If is only valid to enter a process group that is in the session
1193 * of the process.
1194 * Also mksess should only be set if we are creating a process group
1195 *
1196 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
1197 */
1198 int
1199 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
1200 {
1201 struct pgrp *new_pgrp, *pgrp;
1202 struct session *sess;
1203 struct proc *p;
1204 int rval;
1205 pid_t pg_id = NO_PGID;
1206
1207 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
1208
1209 /* Allocate data areas we might need before doing any validity checks */
1210 rw_enter(&pid_table_lock, RW_READER);/* Because pid_table might change */
1211 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
1212 rw_exit(&pid_table_lock);
1213 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
1214 } else {
1215 rw_exit(&pid_table_lock);
1216 new_pgrp = NULL;
1217 }
1218 mutex_enter(proc_lock);
1219 rw_enter(&pid_table_lock, RW_WRITER);
1220 rval = EPERM; /* most common error (to save typing) */
1221
1222 /* Check pgrp exists or can be created */
1223 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
1224 if (pgrp != NULL && pgrp->pg_id != pgid)
1225 goto done;
1226
1227 /* Can only set another process under restricted circumstances. */
1228 if (pid != curp->p_pid) {
1229 /* Must exist and be one of our children... */
1230 p = proc_find_pid_table_locked(pid);
1231 if (p == NULL || !p_inferior(p, curp)) {
1232 rval = ESRCH;
1233 goto done;
1234 }
1235 /* ... in the same session... */
1236 if (sess != NULL || p->p_session != curp->p_session)
1237 goto done;
1238 /* ... existing pgid must be in same session ... */
1239 if (pgrp != NULL && pgrp->pg_session != p->p_session)
1240 goto done;
1241 /* ... and not done an exec. */
1242 if (p->p_flag & PK_EXEC) {
1243 rval = EACCES;
1244 goto done;
1245 }
1246 } else {
1247 /* ... setsid() cannot re-enter a pgrp */
1248 if (mksess && (curp->p_pgid == curp->p_pid ||
1249 pgrp_find_pid_table_locked(curp->p_pid)))
1250 goto done;
1251 p = curp;
1252 }
1253
1254 /* Changing the process group/session of a session
1255 leader is definitely off limits. */
1256 if (SESS_LEADER(p)) {
1257 if (sess == NULL && p->p_pgrp == pgrp)
1258 /* unless it's a definite noop */
1259 rval = 0;
1260 goto done;
1261 }
1262
1263 /* Can only create a process group with id of process */
1264 if (pgrp == NULL && pgid != pid)
1265 goto done;
1266
1267 /* Can only create a session if creating pgrp */
1268 if (sess != NULL && pgrp != NULL)
1269 goto done;
1270
1271 /* Check we allocated memory for a pgrp... */
1272 if (pgrp == NULL && new_pgrp == NULL)
1273 goto done;
1274
1275 /* Don't attach to 'zombie' pgrp */
1276 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
1277 goto done;
1278
1279 /* Expect to succeed now */
1280 rval = 0;
1281
1282 if (pgrp == p->p_pgrp)
1283 /* nothing to do */
1284 goto done;
1285
1286 /* Ok all setup, link up required structures */
1287
1288 if (pgrp == NULL) {
1289 pgrp = new_pgrp;
1290 new_pgrp = NULL;
1291 if (sess != NULL) {
1292 sess->s_sid = p->p_pid;
1293 sess->s_leader = p;
1294 sess->s_count = 1;
1295 sess->s_ttyvp = NULL;
1296 sess->s_ttyp = NULL;
1297 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
1298 memcpy(sess->s_login, p->p_session->s_login,
1299 sizeof(sess->s_login));
1300 p->p_lflag &= ~PL_CONTROLT;
1301 } else {
1302 sess = p->p_pgrp->pg_session;
1303 proc_sesshold(sess);
1304 }
1305 pgrp->pg_session = sess;
1306 sess = NULL;
1307
1308 pgrp->pg_id = pgid;
1309 LIST_INIT(&pgrp->pg_members);
1310 #ifdef DIAGNOSTIC
1311 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
1312 panic("enterpgrp: pgrp table slot in use");
1313 if (__predict_false(mksess && p != curp))
1314 panic("enterpgrp: mksession and p != curproc");
1315 #endif
1316 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
1317 pgrp->pg_jobc = 0;
1318 }
1319
1320 /*
1321 * Adjust eligibility of affected pgrps to participate in job control.
1322 * Increment eligibility counts before decrementing, otherwise we
1323 * could reach 0 spuriously during the first call.
1324 */
1325 fixjobc(p, pgrp, 1);
1326 fixjobc(p, p->p_pgrp, 0);
1327
1328 /* Interlock with ttread(). */
1329 mutex_spin_enter(&tty_lock);
1330
1331 /* Move process to requested group. */
1332 LIST_REMOVE(p, p_pglist);
1333 if (LIST_EMPTY(&p->p_pgrp->pg_members))
1334 /* defer delete until we've dumped the lock */
1335 pg_id = p->p_pgrp->pg_id;
1336 p->p_pgrp = pgrp;
1337 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
1338
1339 /* Done with the swap; we can release the tty mutex. */
1340 mutex_spin_exit(&tty_lock);
1341
1342 done:
1343 rw_exit(&pid_table_lock);
1344 if (pg_id != NO_PGID) {
1345 /* Releases proc_lock. */
1346 pg_delete(pg_id);
1347 } else {
1348 mutex_exit(proc_lock);
1349 }
1350 if (sess != NULL)
1351 kmem_free(sess, sizeof(*sess));
1352 if (new_pgrp != NULL)
1353 kmem_free(new_pgrp, sizeof(*new_pgrp));
1354 #ifdef DEBUG_PGRP
1355 if (__predict_false(rval))
1356 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
1357 pid, pgid, mksess, curp->p_pid, rval);
1358 #endif
1359 return rval;
1360 }
1361
1362 /*
1363 * proc_leavepgrp: remove a process from its process group.
1364 * => must be called with the proc_lock held, which will be released;
1365 */
1366 void
1367 proc_leavepgrp(struct proc *p)
1368 {
1369 struct pgrp *pgrp;
1370
1371 KASSERT(mutex_owned(proc_lock));
1372
1373 /* Interlock with ttread() */
1374 mutex_spin_enter(&tty_lock);
1375 pgrp = p->p_pgrp;
1376 LIST_REMOVE(p, p_pglist);
1377 p->p_pgrp = NULL;
1378 mutex_spin_exit(&tty_lock);
1379
1380 if (LIST_EMPTY(&pgrp->pg_members)) {
1381 /* Releases proc_lock. */
1382 pg_delete(pgrp->pg_id);
1383 } else {
1384 mutex_exit(proc_lock);
1385 }
1386 }
1387
1388 /*
1389 * pg_remove: remove a process group from the table.
1390 * => must be called with the proc_lock held;
1391 * => returns process group to free;
1392 */
1393 static struct pgrp *
1394 pg_remove(pid_t pg_id)
1395 {
1396 struct pgrp *pgrp;
1397 struct pid_table *pt;
1398
1399 KASSERT(mutex_owned(proc_lock));
1400 KASSERT(rw_write_held(&pid_table_lock));
1401
1402 pt = &pid_table[pg_id & pid_tbl_mask];
1403 pgrp = pt->pt_pgrp;
1404
1405 KASSERT(pgrp != NULL);
1406 KASSERT(pgrp->pg_id == pg_id);
1407 KASSERT(LIST_EMPTY(&pgrp->pg_members));
1408
1409 pt->pt_pgrp = NULL;
1410
1411 if (!PT_VALID(pt->pt_slot)) {
1412 /* Orphaned pgrp, put slot onto free list. */
1413 KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
1414 pg_id &= pid_tbl_mask;
1415 pt = &pid_table[last_free_pt];
1416 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
1417 KASSERT(pt->pt_pid == 0);
1418 last_free_pt = pg_id;
1419 pid_alloc_cnt--;
1420 }
1421 return pgrp;
1422 }
1423
1424 /*
1425 * pg_delete: delete and free a process group.
1426 * => must be called with the proc_lock held, which will be released.
1427 */
1428 static void
1429 pg_delete(pid_t pg_id)
1430 {
1431 struct pgrp *pg;
1432 struct tty *ttyp;
1433 struct session *ss;
1434
1435 KASSERT(mutex_owned(proc_lock));
1436
1437 rw_enter(&pid_table_lock, RW_WRITER);
1438 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1439 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1440 rw_exit(&pid_table_lock);
1441 mutex_exit(proc_lock);
1442 return;
1443 }
1444
1445 ss = pg->pg_session;
1446
1447 /* Remove reference (if any) from tty to this process group */
1448 mutex_spin_enter(&tty_lock);
1449 ttyp = ss->s_ttyp;
1450 if (ttyp != NULL && ttyp->t_pgrp == pg) {
1451 ttyp->t_pgrp = NULL;
1452 KASSERT(ttyp->t_session == ss);
1453 }
1454 mutex_spin_exit(&tty_lock);
1455
1456 /*
1457 * The leading process group in a session is freed by
1458 * proc_sessrele_pid_table_write_locked(), if last
1459 * reference. It will also release the locks.
1460 */
1461 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1462 proc_sessrele_pid_table_write_locked(ss);
1463
1464 if (pg != NULL) {
1465 /* Free it, if was not done above. */
1466 kmem_free(pg, sizeof(struct pgrp));
1467 }
1468 }
1469
1470 /*
1471 * Adjust pgrp jobc counters when specified process changes process group.
1472 * We count the number of processes in each process group that "qualify"
1473 * the group for terminal job control (those with a parent in a different
1474 * process group of the same session). If that count reaches zero, the
1475 * process group becomes orphaned. Check both the specified process'
1476 * process group and that of its children.
1477 * entering == 0 => p is leaving specified group.
1478 * entering == 1 => p is entering specified group.
1479 *
1480 * Call with proc_lock held.
1481 */
1482 void
1483 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1484 {
1485 struct pgrp *hispgrp;
1486 struct session *mysession = pgrp->pg_session;
1487 struct proc *child;
1488
1489 KASSERT(mutex_owned(proc_lock));
1490
1491 /*
1492 * Check p's parent to see whether p qualifies its own process
1493 * group; if so, adjust count for p's process group.
1494 */
1495 hispgrp = p->p_pptr->p_pgrp;
1496 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1497 if (entering) {
1498 pgrp->pg_jobc++;
1499 p->p_lflag &= ~PL_ORPHANPG;
1500 } else {
1501 KASSERT(pgrp->pg_jobc > 0);
1502 if (--pgrp->pg_jobc == 0)
1503 orphanpg(pgrp);
1504 }
1505 }
1506
1507 /*
1508 * Check this process' children to see whether they qualify
1509 * their process groups; if so, adjust counts for children's
1510 * process groups.
1511 */
1512 LIST_FOREACH(child, &p->p_children, p_sibling) {
1513 hispgrp = child->p_pgrp;
1514 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1515 !P_ZOMBIE(child)) {
1516 if (entering) {
1517 child->p_lflag &= ~PL_ORPHANPG;
1518 hispgrp->pg_jobc++;
1519 } else {
1520 KASSERT(hispgrp->pg_jobc > 0);
1521 if (--hispgrp->pg_jobc == 0)
1522 orphanpg(hispgrp);
1523 }
1524 }
1525 }
1526 }
1527
1528 /*
1529 * A process group has become orphaned;
1530 * if there are any stopped processes in the group,
1531 * hang-up all process in that group.
1532 *
1533 * Call with proc_lock held.
1534 */
1535 static void
1536 orphanpg(struct pgrp *pg)
1537 {
1538 struct proc *p;
1539
1540 KASSERT(mutex_owned(proc_lock));
1541
1542 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1543 if (p->p_stat == SSTOP) {
1544 p->p_lflag |= PL_ORPHANPG;
1545 psignal(p, SIGHUP);
1546 psignal(p, SIGCONT);
1547 }
1548 }
1549 }
1550
1551 #ifdef DDB
1552 #include <ddb/db_output.h>
1553 void pidtbl_dump(void);
1554 void
1555 pidtbl_dump(void)
1556 {
1557 struct pid_table *pt;
1558 struct proc *p;
1559 struct pgrp *pgrp;
1560 uintptr_t slot;
1561 int id;
1562
1563 db_printf("pid table %p size %x, next %x, last %x\n",
1564 pid_table, pid_tbl_mask+1,
1565 next_free_pt, last_free_pt);
1566 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1567 slot = pt->pt_slot;
1568 if (!PT_VALID(slot) && !pt->pt_pgrp)
1569 continue;
1570 if (PT_IS_LWP(slot)) {
1571 p = PT_GET_LWP(slot)->l_proc;
1572 } else if (PT_IS_PROC(slot)) {
1573 p = PT_GET_PROC(slot);
1574 } else {
1575 p = NULL;
1576 }
1577 db_printf(" id %x: ", id);
1578 if (p != NULL)
1579 db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1580 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1581 else
1582 db_printf("next %x use %x\n",
1583 PT_NEXT(slot) & pid_tbl_mask,
1584 PT_NEXT(slot) & ~pid_tbl_mask);
1585 if ((pgrp = pt->pt_pgrp)) {
1586 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1587 pgrp->pg_session, pgrp->pg_session->s_sid,
1588 pgrp->pg_session->s_count,
1589 pgrp->pg_session->s_login);
1590 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1591 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1592 LIST_FIRST(&pgrp->pg_members));
1593 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1594 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1595 p->p_pid, p, p->p_pgrp, p->p_comm);
1596 }
1597 }
1598 }
1599 }
1600 #endif /* DDB */
1601
1602 #ifdef KSTACK_CHECK_MAGIC
1603
1604 #define KSTACK_MAGIC 0xdeadbeaf
1605
1606 /* XXX should be per process basis? */
1607 static int kstackleftmin = KSTACK_SIZE;
1608 static int kstackleftthres = KSTACK_SIZE / 8;
1609
1610 void
1611 kstack_setup_magic(const struct lwp *l)
1612 {
1613 uint32_t *ip;
1614 uint32_t const *end;
1615
1616 KASSERT(l != NULL);
1617 KASSERT(l != &lwp0);
1618
1619 /*
1620 * fill all the stack with magic number
1621 * so that later modification on it can be detected.
1622 */
1623 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1624 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1625 for (; ip < end; ip++) {
1626 *ip = KSTACK_MAGIC;
1627 }
1628 }
1629
1630 void
1631 kstack_check_magic(const struct lwp *l)
1632 {
1633 uint32_t const *ip, *end;
1634 int stackleft;
1635
1636 KASSERT(l != NULL);
1637
1638 /* don't check proc0 */ /*XXX*/
1639 if (l == &lwp0)
1640 return;
1641
1642 #ifdef __MACHINE_STACK_GROWS_UP
1643 /* stack grows upwards (eg. hppa) */
1644 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1645 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1646 for (ip--; ip >= end; ip--)
1647 if (*ip != KSTACK_MAGIC)
1648 break;
1649
1650 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1651 #else /* __MACHINE_STACK_GROWS_UP */
1652 /* stack grows downwards (eg. i386) */
1653 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1654 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1655 for (; ip < end; ip++)
1656 if (*ip != KSTACK_MAGIC)
1657 break;
1658
1659 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1660 #endif /* __MACHINE_STACK_GROWS_UP */
1661
1662 if (kstackleftmin > stackleft) {
1663 kstackleftmin = stackleft;
1664 if (stackleft < kstackleftthres)
1665 printf("warning: kernel stack left %d bytes"
1666 "(pid %u:lid %u)\n", stackleft,
1667 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1668 }
1669
1670 if (stackleft <= 0) {
1671 panic("magic on the top of kernel stack changed for "
1672 "pid %u, lid %u: maybe kernel stack overflow",
1673 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1674 }
1675 }
1676 #endif /* KSTACK_CHECK_MAGIC */
1677
1678 int
1679 proclist_foreach_call(struct proclist *list,
1680 int (*callback)(struct proc *, void *arg), void *arg)
1681 {
1682 struct proc marker;
1683 struct proc *p;
1684 int ret = 0;
1685
1686 marker.p_flag = PK_MARKER;
1687 mutex_enter(proc_lock);
1688 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1689 if (p->p_flag & PK_MARKER) {
1690 p = LIST_NEXT(p, p_list);
1691 continue;
1692 }
1693 LIST_INSERT_AFTER(p, &marker, p_list);
1694 ret = (*callback)(p, arg);
1695 KASSERT(mutex_owned(proc_lock));
1696 p = LIST_NEXT(&marker, p_list);
1697 LIST_REMOVE(&marker, p_list);
1698 }
1699 mutex_exit(proc_lock);
1700
1701 return ret;
1702 }
1703
1704 int
1705 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1706 {
1707
1708 /* XXXCDC: how should locking work here? */
1709
1710 /* curproc exception is for coredump. */
1711
1712 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1713 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1714 return EFAULT;
1715 }
1716
1717 uvmspace_addref(p->p_vmspace);
1718 *vm = p->p_vmspace;
1719
1720 return 0;
1721 }
1722
1723 /*
1724 * Acquire a write lock on the process credential.
1725 */
1726 void
1727 proc_crmod_enter(void)
1728 {
1729 struct lwp *l = curlwp;
1730 struct proc *p = l->l_proc;
1731 kauth_cred_t oc;
1732
1733 /* Reset what needs to be reset in plimit. */
1734 if (p->p_limit->pl_corename != defcorename) {
1735 lim_setcorename(p, defcorename, 0);
1736 }
1737
1738 mutex_enter(p->p_lock);
1739
1740 /* Ensure the LWP cached credentials are up to date. */
1741 if ((oc = l->l_cred) != p->p_cred) {
1742 kauth_cred_hold(p->p_cred);
1743 l->l_cred = p->p_cred;
1744 kauth_cred_free(oc);
1745 }
1746 }
1747
1748 /*
1749 * Set in a new process credential, and drop the write lock. The credential
1750 * must have a reference already. Optionally, free a no-longer required
1751 * credential. The scheduler also needs to inspect p_cred, so we also
1752 * briefly acquire the sched state mutex.
1753 */
1754 void
1755 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1756 {
1757 struct lwp *l = curlwp, *l2;
1758 struct proc *p = l->l_proc;
1759 kauth_cred_t oc;
1760
1761 KASSERT(mutex_owned(p->p_lock));
1762
1763 /* Is there a new credential to set in? */
1764 if (scred != NULL) {
1765 p->p_cred = scred;
1766 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1767 if (l2 != l)
1768 l2->l_prflag |= LPR_CRMOD;
1769 }
1770
1771 /* Ensure the LWP cached credentials are up to date. */
1772 if ((oc = l->l_cred) != scred) {
1773 kauth_cred_hold(scred);
1774 l->l_cred = scred;
1775 }
1776 } else
1777 oc = NULL; /* XXXgcc */
1778
1779 if (sugid) {
1780 /*
1781 * Mark process as having changed credentials, stops
1782 * tracing etc.
1783 */
1784 p->p_flag |= PK_SUGID;
1785 }
1786
1787 mutex_exit(p->p_lock);
1788
1789 /* If there is a credential to be released, free it now. */
1790 if (fcred != NULL) {
1791 KASSERT(scred != NULL);
1792 kauth_cred_free(fcred);
1793 if (oc != scred)
1794 kauth_cred_free(oc);
1795 }
1796 }
1797
1798 /*
1799 * proc_specific_key_create --
1800 * Create a key for subsystem proc-specific data.
1801 */
1802 int
1803 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1804 {
1805
1806 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1807 }
1808
1809 /*
1810 * proc_specific_key_delete --
1811 * Delete a key for subsystem proc-specific data.
1812 */
1813 void
1814 proc_specific_key_delete(specificdata_key_t key)
1815 {
1816
1817 specificdata_key_delete(proc_specificdata_domain, key);
1818 }
1819
1820 /*
1821 * proc_initspecific --
1822 * Initialize a proc's specificdata container.
1823 */
1824 void
1825 proc_initspecific(struct proc *p)
1826 {
1827 int error __diagused;
1828
1829 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1830 KASSERT(error == 0);
1831 }
1832
1833 /*
1834 * proc_finispecific --
1835 * Finalize a proc's specificdata container.
1836 */
1837 void
1838 proc_finispecific(struct proc *p)
1839 {
1840
1841 specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1842 }
1843
1844 /*
1845 * proc_getspecific --
1846 * Return proc-specific data corresponding to the specified key.
1847 */
1848 void *
1849 proc_getspecific(struct proc *p, specificdata_key_t key)
1850 {
1851
1852 return (specificdata_getspecific(proc_specificdata_domain,
1853 &p->p_specdataref, key));
1854 }
1855
1856 /*
1857 * proc_setspecific --
1858 * Set proc-specific data corresponding to the specified key.
1859 */
1860 void
1861 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1862 {
1863
1864 specificdata_setspecific(proc_specificdata_domain,
1865 &p->p_specdataref, key, data);
1866 }
1867
1868 int
1869 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1870 {
1871 int r = 0;
1872
1873 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1874 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1875 /*
1876 * suid proc of ours or proc not ours
1877 */
1878 r = EPERM;
1879 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1880 /*
1881 * sgid proc has sgid back to us temporarily
1882 */
1883 r = EPERM;
1884 } else {
1885 /*
1886 * our rgid must be in target's group list (ie,
1887 * sub-processes started by a sgid process)
1888 */
1889 int ismember = 0;
1890
1891 if (kauth_cred_ismember_gid(cred,
1892 kauth_cred_getgid(target), &ismember) != 0 ||
1893 !ismember)
1894 r = EPERM;
1895 }
1896
1897 return (r);
1898 }
1899
1900 /*
1901 * sysctl stuff
1902 */
1903
1904 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc))
1905
1906 static const u_int sysctl_flagmap[] = {
1907 PK_ADVLOCK, P_ADVLOCK,
1908 PK_EXEC, P_EXEC,
1909 PK_NOCLDWAIT, P_NOCLDWAIT,
1910 PK_32, P_32,
1911 PK_CLDSIGIGN, P_CLDSIGIGN,
1912 PK_SUGID, P_SUGID,
1913 0
1914 };
1915
1916 static const u_int sysctl_sflagmap[] = {
1917 PS_NOCLDSTOP, P_NOCLDSTOP,
1918 PS_WEXIT, P_WEXIT,
1919 PS_STOPFORK, P_STOPFORK,
1920 PS_STOPEXEC, P_STOPEXEC,
1921 PS_STOPEXIT, P_STOPEXIT,
1922 0
1923 };
1924
1925 static const u_int sysctl_slflagmap[] = {
1926 PSL_TRACED, P_TRACED,
1927 PSL_CHTRACED, P_CHTRACED,
1928 PSL_SYSCALL, P_SYSCALL,
1929 0
1930 };
1931
1932 static const u_int sysctl_lflagmap[] = {
1933 PL_CONTROLT, P_CONTROLT,
1934 PL_PPWAIT, P_PPWAIT,
1935 0
1936 };
1937
1938 static const u_int sysctl_stflagmap[] = {
1939 PST_PROFIL, P_PROFIL,
1940 0
1941
1942 };
1943
1944 /* used by kern_lwp also */
1945 const u_int sysctl_lwpflagmap[] = {
1946 LW_SINTR, L_SINTR,
1947 LW_SYSTEM, L_SYSTEM,
1948 0
1949 };
1950
1951 /*
1952 * Find the most ``active'' lwp of a process and return it for ps display
1953 * purposes
1954 */
1955 static struct lwp *
1956 proc_active_lwp(struct proc *p)
1957 {
1958 static const int ostat[] = {
1959 0,
1960 2, /* LSIDL */
1961 6, /* LSRUN */
1962 5, /* LSSLEEP */
1963 4, /* LSSTOP */
1964 0, /* LSZOMB */
1965 1, /* LSDEAD */
1966 7, /* LSONPROC */
1967 3 /* LSSUSPENDED */
1968 };
1969
1970 struct lwp *l, *lp = NULL;
1971 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1972 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
1973 if (lp == NULL ||
1974 ostat[l->l_stat] > ostat[lp->l_stat] ||
1975 (ostat[l->l_stat] == ostat[lp->l_stat] &&
1976 l->l_cpticks > lp->l_cpticks)) {
1977 lp = l;
1978 continue;
1979 }
1980 }
1981 return lp;
1982 }
1983
1984 static int
1985 sysctl_doeproc(SYSCTLFN_ARGS)
1986 {
1987 union {
1988 struct kinfo_proc kproc;
1989 struct kinfo_proc2 kproc2;
1990 } *kbuf;
1991 struct proc *p, *next, *marker;
1992 char *where, *dp;
1993 int type, op, arg, error;
1994 u_int elem_size, kelem_size, elem_count;
1995 size_t buflen, needed;
1996 bool match, zombie, mmmbrains;
1997 const bool allowaddr = get_expose_address(curproc);
1998
1999 if (namelen == 1 && name[0] == CTL_QUERY)
2000 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2001
2002 dp = where = oldp;
2003 buflen = where != NULL ? *oldlenp : 0;
2004 error = 0;
2005 needed = 0;
2006 type = rnode->sysctl_num;
2007
2008 if (type == KERN_PROC) {
2009 if (namelen == 0)
2010 return EINVAL;
2011 switch (op = name[0]) {
2012 case KERN_PROC_ALL:
2013 if (namelen != 1)
2014 return EINVAL;
2015 arg = 0;
2016 break;
2017 default:
2018 if (namelen != 2)
2019 return EINVAL;
2020 arg = name[1];
2021 break;
2022 }
2023 elem_count = 0; /* Hush little compiler, don't you cry */
2024 kelem_size = elem_size = sizeof(kbuf->kproc);
2025 } else {
2026 if (namelen != 4)
2027 return EINVAL;
2028 op = name[0];
2029 arg = name[1];
2030 elem_size = name[2];
2031 elem_count = name[3];
2032 kelem_size = sizeof(kbuf->kproc2);
2033 }
2034
2035 sysctl_unlock();
2036
2037 kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP);
2038 marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
2039 marker->p_flag = PK_MARKER;
2040
2041 mutex_enter(proc_lock);
2042 /*
2043 * Start with zombies to prevent reporting processes twice, in case they
2044 * are dying and being moved from the list of alive processes to zombies.
2045 */
2046 mmmbrains = true;
2047 for (p = LIST_FIRST(&zombproc);; p = next) {
2048 if (p == NULL) {
2049 if (mmmbrains) {
2050 p = LIST_FIRST(&allproc);
2051 mmmbrains = false;
2052 }
2053 if (p == NULL)
2054 break;
2055 }
2056 next = LIST_NEXT(p, p_list);
2057 if ((p->p_flag & PK_MARKER) != 0)
2058 continue;
2059
2060 /*
2061 * Skip embryonic processes.
2062 */
2063 if (p->p_stat == SIDL)
2064 continue;
2065
2066 mutex_enter(p->p_lock);
2067 error = kauth_authorize_process(l->l_cred,
2068 KAUTH_PROCESS_CANSEE, p,
2069 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL);
2070 if (error != 0) {
2071 mutex_exit(p->p_lock);
2072 continue;
2073 }
2074
2075 /*
2076 * Hande all the operations in one switch on the cost of
2077 * algorithm complexity is on purpose. The win splitting this
2078 * function into several similar copies makes maintenance burden
2079 * burden, code grow and boost is neglible in practical systems.
2080 */
2081 switch (op) {
2082 case KERN_PROC_PID:
2083 match = (p->p_pid == (pid_t)arg);
2084 break;
2085
2086 case KERN_PROC_PGRP:
2087 match = (p->p_pgrp->pg_id == (pid_t)arg);
2088 break;
2089
2090 case KERN_PROC_SESSION:
2091 match = (p->p_session->s_sid == (pid_t)arg);
2092 break;
2093
2094 case KERN_PROC_TTY:
2095 match = true;
2096 if (arg == (int) KERN_PROC_TTY_REVOKE) {
2097 if ((p->p_lflag & PL_CONTROLT) == 0 ||
2098 p->p_session->s_ttyp == NULL ||
2099 p->p_session->s_ttyvp != NULL) {
2100 match = false;
2101 }
2102 } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
2103 p->p_session->s_ttyp == NULL) {
2104 if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
2105 match = false;
2106 }
2107 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
2108 match = false;
2109 }
2110 break;
2111
2112 case KERN_PROC_UID:
2113 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
2114 break;
2115
2116 case KERN_PROC_RUID:
2117 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
2118 break;
2119
2120 case KERN_PROC_GID:
2121 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
2122 break;
2123
2124 case KERN_PROC_RGID:
2125 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
2126 break;
2127
2128 case KERN_PROC_ALL:
2129 match = true;
2130 /* allow everything */
2131 break;
2132
2133 default:
2134 error = EINVAL;
2135 mutex_exit(p->p_lock);
2136 goto cleanup;
2137 }
2138 if (!match) {
2139 mutex_exit(p->p_lock);
2140 continue;
2141 }
2142
2143 /*
2144 * Grab a hold on the process.
2145 */
2146 if (mmmbrains) {
2147 zombie = true;
2148 } else {
2149 zombie = !rw_tryenter(&p->p_reflock, RW_READER);
2150 }
2151 if (zombie) {
2152 LIST_INSERT_AFTER(p, marker, p_list);
2153 }
2154
2155 if (buflen >= elem_size &&
2156 (type == KERN_PROC || elem_count > 0)) {
2157 ruspace(p); /* Update process vm resource use */
2158
2159 if (type == KERN_PROC) {
2160 fill_proc(p, &kbuf->kproc.kp_proc, allowaddr);
2161 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie,
2162 allowaddr);
2163 } else {
2164 fill_kproc2(p, &kbuf->kproc2, zombie,
2165 allowaddr);
2166 elem_count--;
2167 }
2168 mutex_exit(p->p_lock);
2169 mutex_exit(proc_lock);
2170 /*
2171 * Copy out elem_size, but not larger than kelem_size
2172 */
2173 error = sysctl_copyout(l, kbuf, dp,
2174 uimin(kelem_size, elem_size));
2175 mutex_enter(proc_lock);
2176 if (error) {
2177 goto bah;
2178 }
2179 dp += elem_size;
2180 buflen -= elem_size;
2181 } else {
2182 mutex_exit(p->p_lock);
2183 }
2184 needed += elem_size;
2185
2186 /*
2187 * Release reference to process.
2188 */
2189 if (zombie) {
2190 next = LIST_NEXT(marker, p_list);
2191 LIST_REMOVE(marker, p_list);
2192 } else {
2193 rw_exit(&p->p_reflock);
2194 next = LIST_NEXT(p, p_list);
2195 }
2196
2197 /*
2198 * Short-circuit break quickly!
2199 */
2200 if (op == KERN_PROC_PID)
2201 break;
2202 }
2203 mutex_exit(proc_lock);
2204
2205 if (where != NULL) {
2206 *oldlenp = dp - where;
2207 if (needed > *oldlenp) {
2208 error = ENOMEM;
2209 goto out;
2210 }
2211 } else {
2212 needed += KERN_PROCSLOP;
2213 *oldlenp = needed;
2214 }
2215 kmem_free(kbuf, sizeof(*kbuf));
2216 kmem_free(marker, sizeof(*marker));
2217 sysctl_relock();
2218 return 0;
2219 bah:
2220 if (zombie)
2221 LIST_REMOVE(marker, p_list);
2222 else
2223 rw_exit(&p->p_reflock);
2224 cleanup:
2225 mutex_exit(proc_lock);
2226 out:
2227 kmem_free(kbuf, sizeof(*kbuf));
2228 kmem_free(marker, sizeof(*marker));
2229 sysctl_relock();
2230 return error;
2231 }
2232
2233 int
2234 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
2235 {
2236 #if !defined(_RUMPKERNEL)
2237 int retval;
2238
2239 if (p->p_flag & PK_32) {
2240 MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo),
2241 enosys(), retval);
2242 return retval;
2243 }
2244 #endif /* !defined(_RUMPKERNEL) */
2245
2246 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
2247 }
2248
2249 static int
2250 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
2251 {
2252 void **cookie = cookie_;
2253 struct lwp *l = cookie[0];
2254 char *dst = cookie[1];
2255
2256 return sysctl_copyout(l, src, dst + off, len);
2257 }
2258
2259 /*
2260 * sysctl helper routine for kern.proc_args pseudo-subtree.
2261 */
2262 static int
2263 sysctl_kern_proc_args(SYSCTLFN_ARGS)
2264 {
2265 struct ps_strings pss;
2266 struct proc *p;
2267 pid_t pid;
2268 int type, error;
2269 void *cookie[2];
2270
2271 if (namelen == 1 && name[0] == CTL_QUERY)
2272 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2273
2274 if (newp != NULL || namelen != 2)
2275 return (EINVAL);
2276 pid = name[0];
2277 type = name[1];
2278
2279 switch (type) {
2280 case KERN_PROC_PATHNAME:
2281 sysctl_unlock();
2282 error = fill_pathname(l, pid, oldp, oldlenp);
2283 sysctl_relock();
2284 return error;
2285
2286 case KERN_PROC_CWD:
2287 sysctl_unlock();
2288 error = fill_cwd(l, pid, oldp, oldlenp);
2289 sysctl_relock();
2290 return error;
2291
2292 case KERN_PROC_ARGV:
2293 case KERN_PROC_NARGV:
2294 case KERN_PROC_ENV:
2295 case KERN_PROC_NENV:
2296 /* ok */
2297 break;
2298 default:
2299 return (EINVAL);
2300 }
2301
2302 sysctl_unlock();
2303
2304 /* check pid */
2305 mutex_enter(proc_lock);
2306 if ((p = proc_find(pid)) == NULL) {
2307 error = EINVAL;
2308 goto out_locked;
2309 }
2310 mutex_enter(p->p_lock);
2311
2312 /* Check permission. */
2313 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
2314 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2315 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
2316 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
2317 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2318 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
2319 else
2320 error = EINVAL; /* XXXGCC */
2321 if (error) {
2322 mutex_exit(p->p_lock);
2323 goto out_locked;
2324 }
2325
2326 if (oldp == NULL) {
2327 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
2328 *oldlenp = sizeof (int);
2329 else
2330 *oldlenp = ARG_MAX; /* XXX XXX XXX */
2331 error = 0;
2332 mutex_exit(p->p_lock);
2333 goto out_locked;
2334 }
2335
2336 /*
2337 * Zombies don't have a stack, so we can't read their psstrings.
2338 * System processes also don't have a user stack.
2339 */
2340 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
2341 error = EINVAL;
2342 mutex_exit(p->p_lock);
2343 goto out_locked;
2344 }
2345
2346 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
2347 mutex_exit(p->p_lock);
2348 if (error) {
2349 goto out_locked;
2350 }
2351 mutex_exit(proc_lock);
2352
2353 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
2354 int value;
2355 if ((error = copyin_psstrings(p, &pss)) == 0) {
2356 if (type == KERN_PROC_NARGV)
2357 value = pss.ps_nargvstr;
2358 else
2359 value = pss.ps_nenvstr;
2360 error = sysctl_copyout(l, &value, oldp, sizeof(value));
2361 *oldlenp = sizeof(value);
2362 }
2363 } else {
2364 cookie[0] = l;
2365 cookie[1] = oldp;
2366 error = copy_procargs(p, type, oldlenp,
2367 copy_procargs_sysctl_cb, cookie);
2368 }
2369 rw_exit(&p->p_reflock);
2370 sysctl_relock();
2371 return error;
2372
2373 out_locked:
2374 mutex_exit(proc_lock);
2375 sysctl_relock();
2376 return error;
2377 }
2378
2379 int
2380 copy_procargs(struct proc *p, int oid, size_t *limit,
2381 int (*cb)(void *, const void *, size_t, size_t), void *cookie)
2382 {
2383 struct ps_strings pss;
2384 size_t len, i, loaded, entry_len;
2385 struct uio auio;
2386 struct iovec aiov;
2387 int error, argvlen;
2388 char *arg;
2389 char **argv;
2390 vaddr_t user_argv;
2391 struct vmspace *vmspace;
2392
2393 /*
2394 * Allocate a temporary buffer to hold the argument vector and
2395 * the arguments themselve.
2396 */
2397 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2398 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2399
2400 /*
2401 * Lock the process down in memory.
2402 */
2403 vmspace = p->p_vmspace;
2404 uvmspace_addref(vmspace);
2405
2406 /*
2407 * Read in the ps_strings structure.
2408 */
2409 if ((error = copyin_psstrings(p, &pss)) != 0)
2410 goto done;
2411
2412 /*
2413 * Now read the address of the argument vector.
2414 */
2415 switch (oid) {
2416 case KERN_PROC_ARGV:
2417 user_argv = (uintptr_t)pss.ps_argvstr;
2418 argvlen = pss.ps_nargvstr;
2419 break;
2420 case KERN_PROC_ENV:
2421 user_argv = (uintptr_t)pss.ps_envstr;
2422 argvlen = pss.ps_nenvstr;
2423 break;
2424 default:
2425 error = EINVAL;
2426 goto done;
2427 }
2428
2429 if (argvlen < 0) {
2430 error = EIO;
2431 goto done;
2432 }
2433
2434
2435 /*
2436 * Now copy each string.
2437 */
2438 len = 0; /* bytes written to user buffer */
2439 loaded = 0; /* bytes from argv already processed */
2440 i = 0; /* To make compiler happy */
2441 entry_len = PROC_PTRSZ(p);
2442
2443 for (; argvlen; --argvlen) {
2444 int finished = 0;
2445 vaddr_t base;
2446 size_t xlen;
2447 int j;
2448
2449 if (loaded == 0) {
2450 size_t rem = entry_len * argvlen;
2451 loaded = MIN(rem, PAGE_SIZE);
2452 error = copyin_vmspace(vmspace,
2453 (const void *)user_argv, argv, loaded);
2454 if (error)
2455 break;
2456 user_argv += loaded;
2457 i = 0;
2458 }
2459
2460 #if !defined(_RUMPKERNEL)
2461 if (p->p_flag & PK_32)
2462 MODULE_HOOK_CALL(kern_proc32_base_hook,
2463 (argv, i++), 0, base);
2464 else
2465 #endif /* !defined(_RUMPKERNEL) */
2466 base = (vaddr_t)argv[i++];
2467 loaded -= entry_len;
2468
2469 /*
2470 * The program has messed around with its arguments,
2471 * possibly deleting some, and replacing them with
2472 * NULL's. Treat this as the last argument and not
2473 * a failure.
2474 */
2475 if (base == 0)
2476 break;
2477
2478 while (!finished) {
2479 xlen = PAGE_SIZE - (base & PAGE_MASK);
2480
2481 aiov.iov_base = arg;
2482 aiov.iov_len = PAGE_SIZE;
2483 auio.uio_iov = &aiov;
2484 auio.uio_iovcnt = 1;
2485 auio.uio_offset = base;
2486 auio.uio_resid = xlen;
2487 auio.uio_rw = UIO_READ;
2488 UIO_SETUP_SYSSPACE(&auio);
2489 error = uvm_io(&vmspace->vm_map, &auio, 0);
2490 if (error)
2491 goto done;
2492
2493 /* Look for the end of the string */
2494 for (j = 0; j < xlen; j++) {
2495 if (arg[j] == '\0') {
2496 xlen = j + 1;
2497 finished = 1;
2498 break;
2499 }
2500 }
2501
2502 /* Check for user buffer overflow */
2503 if (len + xlen > *limit) {
2504 finished = 1;
2505 if (len > *limit)
2506 xlen = 0;
2507 else
2508 xlen = *limit - len;
2509 }
2510
2511 /* Copyout the page */
2512 error = (*cb)(cookie, arg, len, xlen);
2513 if (error)
2514 goto done;
2515
2516 len += xlen;
2517 base += xlen;
2518 }
2519 }
2520 *limit = len;
2521
2522 done:
2523 kmem_free(argv, PAGE_SIZE);
2524 kmem_free(arg, PAGE_SIZE);
2525 uvmspace_free(vmspace);
2526 return error;
2527 }
2528
2529 /*
2530 * Fill in a proc structure for the specified process.
2531 */
2532 static void
2533 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
2534 {
2535 COND_SET_VALUE(p->p_list, psrc->p_list, allowaddr);
2536 COND_SET_VALUE(p->p_auxlock, psrc->p_auxlock, allowaddr);
2537 COND_SET_VALUE(p->p_lock, psrc->p_lock, allowaddr);
2538 COND_SET_VALUE(p->p_stmutex, psrc->p_stmutex, allowaddr);
2539 COND_SET_VALUE(p->p_reflock, psrc->p_reflock, allowaddr);
2540 COND_SET_VALUE(p->p_waitcv, psrc->p_waitcv, allowaddr);
2541 COND_SET_VALUE(p->p_lwpcv, psrc->p_lwpcv, allowaddr);
2542 COND_SET_VALUE(p->p_cred, psrc->p_cred, allowaddr);
2543 COND_SET_VALUE(p->p_fd, psrc->p_fd, allowaddr);
2544 COND_SET_VALUE(p->p_cwdi, psrc->p_cwdi, allowaddr);
2545 COND_SET_VALUE(p->p_stats, psrc->p_stats, allowaddr);
2546 COND_SET_VALUE(p->p_limit, psrc->p_limit, allowaddr);
2547 COND_SET_VALUE(p->p_vmspace, psrc->p_vmspace, allowaddr);
2548 COND_SET_VALUE(p->p_sigacts, psrc->p_sigacts, allowaddr);
2549 COND_SET_VALUE(p->p_aio, psrc->p_aio, allowaddr);
2550 p->p_mqueue_cnt = psrc->p_mqueue_cnt;
2551 COND_SET_VALUE(p->p_specdataref, psrc->p_specdataref, allowaddr);
2552 p->p_exitsig = psrc->p_exitsig;
2553 p->p_flag = psrc->p_flag;
2554 p->p_sflag = psrc->p_sflag;
2555 p->p_slflag = psrc->p_slflag;
2556 p->p_lflag = psrc->p_lflag;
2557 p->p_stflag = psrc->p_stflag;
2558 p->p_stat = psrc->p_stat;
2559 p->p_trace_enabled = psrc->p_trace_enabled;
2560 p->p_pid = psrc->p_pid;
2561 COND_SET_VALUE(p->p_pglist, psrc->p_pglist, allowaddr);
2562 COND_SET_VALUE(p->p_pptr, psrc->p_pptr, allowaddr);
2563 COND_SET_VALUE(p->p_sibling, psrc->p_sibling, allowaddr);
2564 COND_SET_VALUE(p->p_children, psrc->p_children, allowaddr);
2565 COND_SET_VALUE(p->p_lwps, psrc->p_lwps, allowaddr);
2566 COND_SET_VALUE(p->p_raslist, psrc->p_raslist, allowaddr);
2567 p->p_nlwps = psrc->p_nlwps;
2568 p->p_nzlwps = psrc->p_nzlwps;
2569 p->p_nrlwps = psrc->p_nrlwps;
2570 p->p_nlwpwait = psrc->p_nlwpwait;
2571 p->p_ndlwps = psrc->p_ndlwps;
2572 p->p_nstopchild = psrc->p_nstopchild;
2573 p->p_waited = psrc->p_waited;
2574 COND_SET_VALUE(p->p_zomblwp, psrc->p_zomblwp, allowaddr);
2575 COND_SET_VALUE(p->p_vforklwp, psrc->p_vforklwp, allowaddr);
2576 COND_SET_VALUE(p->p_sched_info, psrc->p_sched_info, allowaddr);
2577 p->p_estcpu = psrc->p_estcpu;
2578 p->p_estcpu_inherited = psrc->p_estcpu_inherited;
2579 p->p_forktime = psrc->p_forktime;
2580 p->p_pctcpu = psrc->p_pctcpu;
2581 COND_SET_VALUE(p->p_opptr, psrc->p_opptr, allowaddr);
2582 COND_SET_VALUE(p->p_timers, psrc->p_timers, allowaddr);
2583 p->p_rtime = psrc->p_rtime;
2584 p->p_uticks = psrc->p_uticks;
2585 p->p_sticks = psrc->p_sticks;
2586 p->p_iticks = psrc->p_iticks;
2587 p->p_xutime = psrc->p_xutime;
2588 p->p_xstime = psrc->p_xstime;
2589 p->p_traceflag = psrc->p_traceflag;
2590 COND_SET_VALUE(p->p_tracep, psrc->p_tracep, allowaddr);
2591 COND_SET_VALUE(p->p_textvp, psrc->p_textvp, allowaddr);
2592 COND_SET_VALUE(p->p_emul, psrc->p_emul, allowaddr);
2593 COND_SET_VALUE(p->p_emuldata, psrc->p_emuldata, allowaddr);
2594 COND_SET_VALUE(p->p_execsw, psrc->p_execsw, allowaddr);
2595 COND_SET_VALUE(p->p_klist, psrc->p_klist, allowaddr);
2596 COND_SET_VALUE(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr);
2597 COND_SET_VALUE(p->p_sigpend, psrc->p_sigpend, allowaddr);
2598 COND_SET_VALUE(p->p_lwpctl, psrc->p_lwpctl, allowaddr);
2599 p->p_ppid = psrc->p_ppid;
2600 p->p_oppid = psrc->p_oppid;
2601 COND_SET_VALUE(p->p_path, psrc->p_path, allowaddr);
2602 COND_SET_VALUE(p->p_sigctx, psrc->p_sigctx, allowaddr);
2603 p->p_nice = psrc->p_nice;
2604 memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm));
2605 COND_SET_VALUE(p->p_pgrp, psrc->p_pgrp, allowaddr);
2606 COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr);
2607 p->p_pax = psrc->p_pax;
2608 p->p_xexit = psrc->p_xexit;
2609 p->p_xsig = psrc->p_xsig;
2610 p->p_acflag = psrc->p_acflag;
2611 COND_SET_VALUE(p->p_md, psrc->p_md, allowaddr);
2612 p->p_stackbase = psrc->p_stackbase;
2613 COND_SET_VALUE(p->p_dtrace, psrc->p_dtrace, allowaddr);
2614 }
2615
2616 /*
2617 * Fill in an eproc structure for the specified process.
2618 */
2619 void
2620 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr)
2621 {
2622 struct tty *tp;
2623 struct lwp *l;
2624
2625 KASSERT(mutex_owned(proc_lock));
2626 KASSERT(mutex_owned(p->p_lock));
2627
2628 COND_SET_VALUE(ep->e_paddr, p, allowaddr);
2629 COND_SET_VALUE(ep->e_sess, p->p_session, allowaddr);
2630 if (p->p_cred) {
2631 kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2632 kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2633 }
2634 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2635 struct vmspace *vm = p->p_vmspace;
2636
2637 ep->e_vm.vm_rssize = vm_resident_count(vm);
2638 ep->e_vm.vm_tsize = vm->vm_tsize;
2639 ep->e_vm.vm_dsize = vm->vm_dsize;
2640 ep->e_vm.vm_ssize = vm->vm_ssize;
2641 ep->e_vm.vm_map.size = vm->vm_map.size;
2642
2643 /* Pick the primary (first) LWP */
2644 l = proc_active_lwp(p);
2645 KASSERT(l != NULL);
2646 lwp_lock(l);
2647 if (l->l_wchan)
2648 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2649 lwp_unlock(l);
2650 }
2651 ep->e_ppid = p->p_ppid;
2652 if (p->p_pgrp && p->p_session) {
2653 ep->e_pgid = p->p_pgrp->pg_id;
2654 ep->e_jobc = p->p_pgrp->pg_jobc;
2655 ep->e_sid = p->p_session->s_sid;
2656 if ((p->p_lflag & PL_CONTROLT) &&
2657 (tp = p->p_session->s_ttyp)) {
2658 ep->e_tdev = tp->t_dev;
2659 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2660 COND_SET_VALUE(ep->e_tsess, tp->t_session, allowaddr);
2661 } else
2662 ep->e_tdev = (uint32_t)NODEV;
2663 ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0;
2664 if (SESS_LEADER(p))
2665 ep->e_flag |= EPROC_SLEADER;
2666 strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME);
2667 }
2668 ep->e_xsize = ep->e_xrssize = 0;
2669 ep->e_xccount = ep->e_xswrss = 0;
2670 }
2671
2672 /*
2673 * Fill in a kinfo_proc2 structure for the specified process.
2674 */
2675 void
2676 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr)
2677 {
2678 struct tty *tp;
2679 struct lwp *l, *l2;
2680 struct timeval ut, st, rt;
2681 sigset_t ss1, ss2;
2682 struct rusage ru;
2683 struct vmspace *vm;
2684
2685 KASSERT(mutex_owned(proc_lock));
2686 KASSERT(mutex_owned(p->p_lock));
2687
2688 sigemptyset(&ss1);
2689 sigemptyset(&ss2);
2690
2691 COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr);
2692 COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr);
2693 COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr);
2694 COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr);
2695 COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr);
2696 COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr);
2697 COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr);
2698 COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr);
2699 ki->p_tsess = 0; /* may be changed if controlling tty below */
2700 COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr);
2701 ki->p_eflag = 0;
2702 ki->p_exitsig = p->p_exitsig;
2703 ki->p_flag = L_INMEM; /* Process never swapped out */
2704 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2705 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2706 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2707 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2708 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2709 ki->p_pid = p->p_pid;
2710 ki->p_ppid = p->p_ppid;
2711 ki->p_uid = kauth_cred_geteuid(p->p_cred);
2712 ki->p_ruid = kauth_cred_getuid(p->p_cred);
2713 ki->p_gid = kauth_cred_getegid(p->p_cred);
2714 ki->p_rgid = kauth_cred_getgid(p->p_cred);
2715 ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2716 ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2717 ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2718 kauth_cred_getgroups(p->p_cred, ki->p_groups,
2719 uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
2720 UIO_SYSSPACE);
2721
2722 ki->p_uticks = p->p_uticks;
2723 ki->p_sticks = p->p_sticks;
2724 ki->p_iticks = p->p_iticks;
2725 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */
2726 COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr);
2727 ki->p_traceflag = p->p_traceflag;
2728
2729 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2730 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2731
2732 ki->p_cpticks = 0;
2733 ki->p_pctcpu = p->p_pctcpu;
2734 ki->p_estcpu = 0;
2735 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
2736 ki->p_realstat = p->p_stat;
2737 ki->p_nice = p->p_nice;
2738 ki->p_xstat = P_WAITSTATUS(p);
2739 ki->p_acflag = p->p_acflag;
2740
2741 strncpy(ki->p_comm, p->p_comm,
2742 uimin(sizeof(ki->p_comm), sizeof(p->p_comm)));
2743 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2744
2745 ki->p_nlwps = p->p_nlwps;
2746 ki->p_realflag = ki->p_flag;
2747
2748 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2749 vm = p->p_vmspace;
2750 ki->p_vm_rssize = vm_resident_count(vm);
2751 ki->p_vm_tsize = vm->vm_tsize;
2752 ki->p_vm_dsize = vm->vm_dsize;
2753 ki->p_vm_ssize = vm->vm_ssize;
2754 ki->p_vm_vsize = atop(vm->vm_map.size);
2755 /*
2756 * Since the stack is initially mapped mostly with
2757 * PROT_NONE and grown as needed, adjust the "mapped size"
2758 * to skip the unused stack portion.
2759 */
2760 ki->p_vm_msize =
2761 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2762
2763 /* Pick the primary (first) LWP */
2764 l = proc_active_lwp(p);
2765 KASSERT(l != NULL);
2766 lwp_lock(l);
2767 ki->p_nrlwps = p->p_nrlwps;
2768 ki->p_forw = 0;
2769 ki->p_back = 0;
2770 COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr);
2771 ki->p_stat = l->l_stat;
2772 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2773 ki->p_swtime = l->l_swtime;
2774 ki->p_slptime = l->l_slptime;
2775 if (l->l_stat == LSONPROC)
2776 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2777 else
2778 ki->p_schedflags = 0;
2779 ki->p_priority = lwp_eprio(l);
2780 ki->p_usrpri = l->l_priority;
2781 if (l->l_wchan)
2782 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2783 COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
2784 ki->p_cpuid = cpu_index(l->l_cpu);
2785 lwp_unlock(l);
2786 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2787 /* This is hardly correct, but... */
2788 sigplusset(&l->l_sigpend.sp_set, &ss1);
2789 sigplusset(&l->l_sigmask, &ss2);
2790 ki->p_cpticks += l->l_cpticks;
2791 ki->p_pctcpu += l->l_pctcpu;
2792 ki->p_estcpu += l->l_estcpu;
2793 }
2794 }
2795 sigplusset(&p->p_sigpend.sp_set, &ss1);
2796 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2797 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2798
2799 if (p->p_session != NULL) {
2800 ki->p_sid = p->p_session->s_sid;
2801 ki->p__pgid = p->p_pgrp->pg_id;
2802 if (p->p_session->s_ttyvp)
2803 ki->p_eflag |= EPROC_CTTY;
2804 if (SESS_LEADER(p))
2805 ki->p_eflag |= EPROC_SLEADER;
2806 strncpy(ki->p_login, p->p_session->s_login,
2807 uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
2808 ki->p_jobc = p->p_pgrp->pg_jobc;
2809 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2810 ki->p_tdev = tp->t_dev;
2811 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2812 COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session),
2813 allowaddr);
2814 } else {
2815 ki->p_tdev = (int32_t)NODEV;
2816 }
2817 }
2818
2819 if (!P_ZOMBIE(p) && !zombie) {
2820 ki->p_uvalid = 1;
2821 ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2822 ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2823
2824 calcru(p, &ut, &st, NULL, &rt);
2825 ki->p_rtime_sec = rt.tv_sec;
2826 ki->p_rtime_usec = rt.tv_usec;
2827 ki->p_uutime_sec = ut.tv_sec;
2828 ki->p_uutime_usec = ut.tv_usec;
2829 ki->p_ustime_sec = st.tv_sec;
2830 ki->p_ustime_usec = st.tv_usec;
2831
2832 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2833 ki->p_uru_nvcsw = 0;
2834 ki->p_uru_nivcsw = 0;
2835 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
2836 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
2837 ki->p_uru_nivcsw += l2->l_nivcsw;
2838 ruadd(&ru, &l2->l_ru);
2839 }
2840 ki->p_uru_maxrss = ru.ru_maxrss;
2841 ki->p_uru_ixrss = ru.ru_ixrss;
2842 ki->p_uru_idrss = ru.ru_idrss;
2843 ki->p_uru_isrss = ru.ru_isrss;
2844 ki->p_uru_minflt = ru.ru_minflt;
2845 ki->p_uru_majflt = ru.ru_majflt;
2846 ki->p_uru_nswap = ru.ru_nswap;
2847 ki->p_uru_inblock = ru.ru_inblock;
2848 ki->p_uru_oublock = ru.ru_oublock;
2849 ki->p_uru_msgsnd = ru.ru_msgsnd;
2850 ki->p_uru_msgrcv = ru.ru_msgrcv;
2851 ki->p_uru_nsignals = ru.ru_nsignals;
2852
2853 timeradd(&p->p_stats->p_cru.ru_utime,
2854 &p->p_stats->p_cru.ru_stime, &ut);
2855 ki->p_uctime_sec = ut.tv_sec;
2856 ki->p_uctime_usec = ut.tv_usec;
2857 }
2858 }
2859
2860
2861 int
2862 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid)
2863 {
2864 int error;
2865
2866 mutex_enter(proc_lock);
2867 if (pid == -1)
2868 *p = l->l_proc;
2869 else
2870 *p = proc_find(pid);
2871
2872 if (*p == NULL) {
2873 if (pid != -1)
2874 mutex_exit(proc_lock);
2875 return ESRCH;
2876 }
2877 if (pid != -1)
2878 mutex_enter((*p)->p_lock);
2879 mutex_exit(proc_lock);
2880
2881 error = kauth_authorize_process(l->l_cred,
2882 KAUTH_PROCESS_CANSEE, *p,
2883 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
2884 if (error) {
2885 if (pid != -1)
2886 mutex_exit((*p)->p_lock);
2887 }
2888 return error;
2889 }
2890
2891 static int
2892 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
2893 {
2894 int error;
2895 struct proc *p;
2896
2897 if ((error = proc_find_locked(l, &p, pid)) != 0)
2898 return error;
2899
2900 if (p->p_path == NULL) {
2901 if (pid != -1)
2902 mutex_exit(p->p_lock);
2903 return ENOENT;
2904 }
2905
2906 size_t len = strlen(p->p_path) + 1;
2907 if (oldp != NULL) {
2908 size_t copylen = uimin(len, *oldlenp);
2909 error = sysctl_copyout(l, p->p_path, oldp, copylen);
2910 if (error == 0 && *oldlenp < len)
2911 error = ENOSPC;
2912 }
2913 *oldlenp = len;
2914 if (pid != -1)
2915 mutex_exit(p->p_lock);
2916 return error;
2917 }
2918
2919 static int
2920 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
2921 {
2922 int error;
2923 struct proc *p;
2924 char *path;
2925 char *bp, *bend;
2926 struct cwdinfo *cwdi;
2927 struct vnode *vp;
2928 size_t len, lenused;
2929
2930 if ((error = proc_find_locked(l, &p, pid)) != 0)
2931 return error;
2932
2933 len = MAXPATHLEN * 4;
2934
2935 path = kmem_alloc(len, KM_SLEEP);
2936
2937 bp = &path[len];
2938 bend = bp;
2939 *(--bp) = '\0';
2940
2941 cwdi = p->p_cwdi;
2942 rw_enter(&cwdi->cwdi_lock, RW_READER);
2943 vp = cwdi->cwdi_cdir;
2944 error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
2945 rw_exit(&cwdi->cwdi_lock);
2946
2947 if (error)
2948 goto out;
2949
2950 lenused = bend - bp;
2951
2952 if (oldp != NULL) {
2953 size_t copylen = uimin(lenused, *oldlenp);
2954 error = sysctl_copyout(l, bp, oldp, copylen);
2955 if (error == 0 && *oldlenp < lenused)
2956 error = ENOSPC;
2957 }
2958 *oldlenp = lenused;
2959 out:
2960 if (pid != -1)
2961 mutex_exit(p->p_lock);
2962 kmem_free(path, len);
2963 return error;
2964 }
2965
2966 int
2967 proc_getauxv(struct proc *p, void **buf, size_t *len)
2968 {
2969 struct ps_strings pss;
2970 int error;
2971 void *uauxv, *kauxv;
2972 size_t size;
2973
2974 if ((error = copyin_psstrings(p, &pss)) != 0)
2975 return error;
2976 if (pss.ps_envstr == NULL)
2977 return EIO;
2978
2979 size = p->p_execsw->es_arglen;
2980 if (size == 0)
2981 return EIO;
2982
2983 size_t ptrsz = PROC_PTRSZ(p);
2984 uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz);
2985
2986 kauxv = kmem_alloc(size, KM_SLEEP);
2987
2988 error = copyin_proc(p, uauxv, kauxv, size);
2989 if (error) {
2990 kmem_free(kauxv, size);
2991 return error;
2992 }
2993
2994 *buf = kauxv;
2995 *len = size;
2996
2997 return 0;
2998 }
2999
3000
3001 static int
3002 sysctl_security_expose_address(SYSCTLFN_ARGS)
3003 {
3004 int expose_address, error;
3005 struct sysctlnode node;
3006
3007 node = *rnode;
3008 node.sysctl_data = &expose_address;
3009 expose_address = *(int *)rnode->sysctl_data;
3010 error = sysctl_lookup(SYSCTLFN_CALL(&node));
3011 if (error || newp == NULL)
3012 return error;
3013
3014 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR,
3015 0, NULL, NULL, NULL))
3016 return EPERM;
3017
3018 switch (expose_address) {
3019 case 0:
3020 case 1:
3021 case 2:
3022 break;
3023 default:
3024 return EINVAL;
3025 }
3026
3027 *(int *)rnode->sysctl_data = expose_address;
3028
3029 return 0;
3030 }
3031
3032 bool
3033 get_expose_address(struct proc *p)
3034 {
3035 /* allow only if sysctl variable is set or privileged */
3036 return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
3037 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0;
3038 }
3039