kern_proc.c revision 1.260 1 /* $NetBSD: kern_proc.c,v 1.260 2020/09/05 16:30:12 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.260 2020/09/05 16:30:12 riastradh Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_kstack.h"
69 #include "opt_maxuprc.h"
70 #include "opt_dtrace.h"
71 #include "opt_compat_netbsd32.h"
72 #include "opt_kaslr.h"
73 #endif
74
75 #if defined(__HAVE_COMPAT_NETBSD32) && !defined(COMPAT_NETBSD32) \
76 && !defined(_RUMPKERNEL)
77 #define COMPAT_NETBSD32
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/kernel.h>
83 #include <sys/proc.h>
84 #include <sys/resourcevar.h>
85 #include <sys/buf.h>
86 #include <sys/acct.h>
87 #include <sys/wait.h>
88 #include <sys/file.h>
89 #include <ufs/ufs/quota.h>
90 #include <sys/uio.h>
91 #include <sys/pool.h>
92 #include <sys/pset.h>
93 #include <sys/ioctl.h>
94 #include <sys/tty.h>
95 #include <sys/signalvar.h>
96 #include <sys/ras.h>
97 #include <sys/filedesc.h>
98 #include <sys/syscall_stats.h>
99 #include <sys/kauth.h>
100 #include <sys/sleepq.h>
101 #include <sys/atomic.h>
102 #include <sys/kmem.h>
103 #include <sys/namei.h>
104 #include <sys/dtrace_bsd.h>
105 #include <sys/sysctl.h>
106 #include <sys/exec.h>
107 #include <sys/cpu.h>
108 #include <sys/compat_stub.h>
109 #include <sys/futex.h>
110 #include <sys/pserialize.h>
111
112 #include <uvm/uvm_extern.h>
113
114 /*
115 * Process lists.
116 */
117
118 struct proclist allproc __cacheline_aligned;
119 struct proclist zombproc __cacheline_aligned;
120
121 kmutex_t proc_lock __cacheline_aligned;
122 static pserialize_t proc_psz;
123
124 /*
125 * pid to lwp/proc lookup is done by indexing the pid_table array.
126 * Since pid numbers are only allocated when an empty slot
127 * has been found, there is no need to search any lists ever.
128 * (an orphaned pgrp will lock the slot, a session will lock
129 * the pgrp with the same number.)
130 * If the table is too small it is reallocated with twice the
131 * previous size and the entries 'unzipped' into the two halves.
132 * A linked list of free entries is passed through the pt_lwp
133 * field of 'free' items - set odd to be an invalid ptr. Two
134 * additional bits are also used to indicate if the slot is
135 * currently occupied by a proc or lwp, and if the PID is
136 * hidden from certain kinds of lookups. We thus require a
137 * minimum alignment for proc and lwp structures (LWPs are
138 * at least 32-byte aligned).
139 */
140
141 struct pid_table {
142 uintptr_t pt_slot;
143 struct pgrp *pt_pgrp;
144 pid_t pt_pid;
145 };
146
147 #define PT_F_FREE ((uintptr_t)__BIT(0))
148 #define PT_F_LWP 0 /* pseudo-flag */
149 #define PT_F_PROC ((uintptr_t)__BIT(1))
150
151 #define PT_F_TYPEBITS (PT_F_FREE|PT_F_PROC)
152 #define PT_F_ALLBITS (PT_F_FREE|PT_F_PROC)
153
154 #define PT_VALID(s) (((s) & PT_F_FREE) == 0)
155 #define PT_RESERVED(s) ((s) == 0)
156 #define PT_NEXT(s) ((u_int)(s) >> 1)
157 #define PT_SET_FREE(pid) (((pid) << 1) | PT_F_FREE)
158 #define PT_SET_LWP(l) ((uintptr_t)(l))
159 #define PT_SET_PROC(p) (((uintptr_t)(p)) | PT_F_PROC)
160 #define PT_SET_RESERVED 0
161 #define PT_GET_LWP(s) ((struct lwp *)((s) & ~PT_F_ALLBITS))
162 #define PT_GET_PROC(s) ((struct proc *)((s) & ~PT_F_ALLBITS))
163 #define PT_GET_TYPE(s) ((s) & PT_F_TYPEBITS)
164 #define PT_IS_LWP(s) (PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
165 #define PT_IS_PROC(s) (PT_GET_TYPE(s) == PT_F_PROC)
166
167 #define MIN_PROC_ALIGNMENT (PT_F_ALLBITS + 1)
168
169 /*
170 * Table of process IDs (PIDs).
171 */
172 static struct pid_table *pid_table __read_mostly;
173
174 #define INITIAL_PID_TABLE_SIZE (1 << 5)
175
176 /* Table mask, threshold for growing and number of allocated PIDs. */
177 static u_int pid_tbl_mask __read_mostly;
178 static u_int pid_alloc_lim __read_mostly;
179 static u_int pid_alloc_cnt __cacheline_aligned;
180
181 /* Next free, last free and maximum PIDs. */
182 static u_int next_free_pt __cacheline_aligned;
183 static u_int last_free_pt __cacheline_aligned;
184 static pid_t pid_max __read_mostly;
185
186 /* Components of the first process -- never freed. */
187
188 extern struct emul emul_netbsd; /* defined in kern_exec.c */
189
190 struct session session0 = {
191 .s_count = 1,
192 .s_sid = 0,
193 };
194 struct pgrp pgrp0 = {
195 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
196 .pg_session = &session0,
197 };
198 filedesc_t filedesc0;
199 struct cwdinfo cwdi0 = {
200 .cwdi_cmask = CMASK,
201 .cwdi_refcnt = 1,
202 };
203 struct plimit limit0;
204 struct pstats pstat0;
205 struct vmspace vmspace0;
206 struct sigacts sigacts0;
207 struct proc proc0 = {
208 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
209 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
210 .p_nlwps = 1,
211 .p_nrlwps = 1,
212 .p_pgrp = &pgrp0,
213 .p_comm = "system",
214 /*
215 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
216 * when they exit. init(8) can easily wait them out for us.
217 */
218 .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
219 .p_stat = SACTIVE,
220 .p_nice = NZERO,
221 .p_emul = &emul_netbsd,
222 .p_cwdi = &cwdi0,
223 .p_limit = &limit0,
224 .p_fd = &filedesc0,
225 .p_vmspace = &vmspace0,
226 .p_stats = &pstat0,
227 .p_sigacts = &sigacts0,
228 #ifdef PROC0_MD_INITIALIZERS
229 PROC0_MD_INITIALIZERS
230 #endif
231 };
232 kauth_cred_t cred0;
233
234 static const int nofile = NOFILE;
235 static const int maxuprc = MAXUPRC;
236
237 static int sysctl_doeproc(SYSCTLFN_PROTO);
238 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
239 static int sysctl_security_expose_address(SYSCTLFN_PROTO);
240
241 #ifdef KASLR
242 static int kern_expose_address = 0;
243 #else
244 static int kern_expose_address = 1;
245 #endif
246 /*
247 * The process list descriptors, used during pid allocation and
248 * by sysctl. No locking on this data structure is needed since
249 * it is completely static.
250 */
251 const struct proclist_desc proclists[] = {
252 { &allproc },
253 { &zombproc },
254 { NULL },
255 };
256
257 static struct pgrp * pg_remove(pid_t);
258 static void pg_delete(pid_t);
259 static void orphanpg(struct pgrp *);
260
261 static specificdata_domain_t proc_specificdata_domain;
262
263 static pool_cache_t proc_cache;
264
265 static kauth_listener_t proc_listener;
266
267 static void fill_proc(const struct proc *, struct proc *, bool);
268 static int fill_pathname(struct lwp *, pid_t, void *, size_t *);
269 static int fill_cwd(struct lwp *, pid_t, void *, size_t *);
270
271 static int
272 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
273 void *arg0, void *arg1, void *arg2, void *arg3)
274 {
275 struct proc *p;
276 int result;
277
278 result = KAUTH_RESULT_DEFER;
279 p = arg0;
280
281 switch (action) {
282 case KAUTH_PROCESS_CANSEE: {
283 enum kauth_process_req req;
284
285 req = (enum kauth_process_req)(uintptr_t)arg1;
286
287 switch (req) {
288 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
289 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
290 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
291 case KAUTH_REQ_PROCESS_CANSEE_EPROC:
292 result = KAUTH_RESULT_ALLOW;
293 break;
294
295 case KAUTH_REQ_PROCESS_CANSEE_ENV:
296 if (kauth_cred_getuid(cred) !=
297 kauth_cred_getuid(p->p_cred) ||
298 kauth_cred_getuid(cred) !=
299 kauth_cred_getsvuid(p->p_cred))
300 break;
301
302 result = KAUTH_RESULT_ALLOW;
303
304 break;
305
306 case KAUTH_REQ_PROCESS_CANSEE_KPTR:
307 if (!kern_expose_address)
308 break;
309
310 if (kern_expose_address == 1 && !(p->p_flag & PK_KMEM))
311 break;
312
313 result = KAUTH_RESULT_ALLOW;
314
315 break;
316
317 default:
318 break;
319 }
320
321 break;
322 }
323
324 case KAUTH_PROCESS_FORK: {
325 int lnprocs = (int)(unsigned long)arg2;
326
327 /*
328 * Don't allow a nonprivileged user to use the last few
329 * processes. The variable lnprocs is the current number of
330 * processes, maxproc is the limit.
331 */
332 if (__predict_false((lnprocs >= maxproc - 5)))
333 break;
334
335 result = KAUTH_RESULT_ALLOW;
336
337 break;
338 }
339
340 case KAUTH_PROCESS_CORENAME:
341 case KAUTH_PROCESS_STOPFLAG:
342 if (proc_uidmatch(cred, p->p_cred) == 0)
343 result = KAUTH_RESULT_ALLOW;
344
345 break;
346
347 default:
348 break;
349 }
350
351 return result;
352 }
353
354 static int
355 proc_ctor(void *arg __unused, void *obj, int flags __unused)
356 {
357 memset(obj, 0, sizeof(struct proc));
358 return 0;
359 }
360
361 static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
362
363 /*
364 * Initialize global process hashing structures.
365 */
366 void
367 procinit(void)
368 {
369 const struct proclist_desc *pd;
370 u_int i;
371 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
372
373 for (pd = proclists; pd->pd_list != NULL; pd++)
374 LIST_INIT(pd->pd_list);
375
376 mutex_init(&proc_lock, MUTEX_DEFAULT, IPL_NONE);
377
378 proc_psz = pserialize_create();
379
380 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
381 * sizeof(struct pid_table), KM_SLEEP);
382 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
383 pid_max = PID_MAX;
384
385 /* Set free list running through table...
386 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
387 for (i = 0; i <= pid_tbl_mask; i++) {
388 pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
389 pid_table[i].pt_pgrp = 0;
390 pid_table[i].pt_pid = 0;
391 }
392 /* slot 0 is just grabbed */
393 next_free_pt = 1;
394 /* Need to fix last entry. */
395 last_free_pt = pid_tbl_mask;
396 pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
397 /* point at which we grow table - to avoid reusing pids too often */
398 pid_alloc_lim = pid_tbl_mask - 1;
399 #undef LINK_EMPTY
400
401 /* Reserve PID 1 for init(8). */ /* XXX slightly gross */
402 mutex_enter(&proc_lock);
403 if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
404 panic("failed to reserve PID 1 for init(8)");
405 mutex_exit(&proc_lock);
406
407 proc_specificdata_domain = specificdata_domain_create();
408 KASSERT(proc_specificdata_domain != NULL);
409
410 size_t proc_alignment = coherency_unit;
411 if (proc_alignment < MIN_PROC_ALIGNMENT)
412 proc_alignment = MIN_PROC_ALIGNMENT;
413
414 proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
415 "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
416
417 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
418 proc_listener_cb, NULL);
419 }
420
421 void
422 procinit_sysctl(void)
423 {
424 static struct sysctllog *clog;
425
426 sysctl_createv(&clog, 0, NULL, NULL,
427 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
428 CTLTYPE_INT, "expose_address",
429 SYSCTL_DESCR("Enable exposing kernel addresses"),
430 sysctl_security_expose_address, 0,
431 &kern_expose_address, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
432 sysctl_createv(&clog, 0, NULL, NULL,
433 CTLFLAG_PERMANENT,
434 CTLTYPE_NODE, "proc",
435 SYSCTL_DESCR("System-wide process information"),
436 sysctl_doeproc, 0, NULL, 0,
437 CTL_KERN, KERN_PROC, CTL_EOL);
438 sysctl_createv(&clog, 0, NULL, NULL,
439 CTLFLAG_PERMANENT,
440 CTLTYPE_NODE, "proc2",
441 SYSCTL_DESCR("Machine-independent process information"),
442 sysctl_doeproc, 0, NULL, 0,
443 CTL_KERN, KERN_PROC2, CTL_EOL);
444 sysctl_createv(&clog, 0, NULL, NULL,
445 CTLFLAG_PERMANENT,
446 CTLTYPE_NODE, "proc_args",
447 SYSCTL_DESCR("Process argument information"),
448 sysctl_kern_proc_args, 0, NULL, 0,
449 CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
450
451 /*
452 "nodes" under these:
453
454 KERN_PROC_ALL
455 KERN_PROC_PID pid
456 KERN_PROC_PGRP pgrp
457 KERN_PROC_SESSION sess
458 KERN_PROC_TTY tty
459 KERN_PROC_UID uid
460 KERN_PROC_RUID uid
461 KERN_PROC_GID gid
462 KERN_PROC_RGID gid
463
464 all in all, probably not worth the effort...
465 */
466 }
467
468 /*
469 * Initialize process 0.
470 */
471 void
472 proc0_init(void)
473 {
474 struct proc *p;
475 struct pgrp *pg;
476 struct rlimit *rlim;
477 rlim_t lim;
478 int i;
479
480 p = &proc0;
481 pg = &pgrp0;
482
483 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
484 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
485 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
486
487 rw_init(&p->p_reflock);
488 cv_init(&p->p_waitcv, "wait");
489 cv_init(&p->p_lwpcv, "lwpwait");
490
491 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
492
493 KASSERT(lwp0.l_lid == 0);
494 pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
495 LIST_INSERT_HEAD(&allproc, p, p_list);
496
497 pid_table[lwp0.l_lid].pt_pgrp = pg;
498 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
499
500 #ifdef __HAVE_SYSCALL_INTERN
501 (*p->p_emul->e_syscall_intern)(p);
502 #endif
503
504 /* Create credentials. */
505 cred0 = kauth_cred_alloc();
506 p->p_cred = cred0;
507
508 /* Create the CWD info. */
509 rw_init(&cwdi0.cwdi_lock);
510
511 /* Create the limits structures. */
512 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
513
514 rlim = limit0.pl_rlimit;
515 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
516 rlim[i].rlim_cur = RLIM_INFINITY;
517 rlim[i].rlim_max = RLIM_INFINITY;
518 }
519
520 rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
521 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
522
523 rlim[RLIMIT_NPROC].rlim_max = maxproc;
524 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
525
526 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvm_availmem(false)));
527 rlim[RLIMIT_RSS].rlim_max = lim;
528 rlim[RLIMIT_MEMLOCK].rlim_max = lim;
529 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
530
531 rlim[RLIMIT_NTHR].rlim_max = maxlwp;
532 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc;
533
534 /* Note that default core name has zero length. */
535 limit0.pl_corename = defcorename;
536 limit0.pl_cnlen = 0;
537 limit0.pl_refcnt = 1;
538 limit0.pl_writeable = false;
539 limit0.pl_sv_limit = NULL;
540
541 /* Configure virtual memory system, set vm rlimits. */
542 uvm_init_limits(p);
543
544 /* Initialize file descriptor table for proc0. */
545 fd_init(&filedesc0);
546
547 /*
548 * Initialize proc0's vmspace, which uses the kernel pmap.
549 * All kernel processes (which never have user space mappings)
550 * share proc0's vmspace, and thus, the kernel pmap.
551 */
552 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
553 trunc_page(VM_MAXUSER_ADDRESS),
554 #ifdef __USE_TOPDOWN_VM
555 true
556 #else
557 false
558 #endif
559 );
560
561 /* Initialize signal state for proc0. XXX IPL_SCHED */
562 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
563 siginit(p);
564
565 proc_initspecific(p);
566 kdtrace_proc_ctor(NULL, p);
567 }
568
569 /*
570 * Session reference counting.
571 */
572
573 void
574 proc_sesshold(struct session *ss)
575 {
576
577 KASSERT(mutex_owned(&proc_lock));
578 ss->s_count++;
579 }
580
581 void
582 proc_sessrele(struct session *ss)
583 {
584 struct pgrp *pg;
585
586 KASSERT(mutex_owned(&proc_lock));
587 KASSERT(ss->s_count > 0);
588
589 /*
590 * We keep the pgrp with the same id as the session in order to
591 * stop a process being given the same pid. Since the pgrp holds
592 * a reference to the session, it must be a 'zombie' pgrp by now.
593 */
594 if (--ss->s_count == 0) {
595 pg = pg_remove(ss->s_sid);
596 } else {
597 pg = NULL;
598 ss = NULL;
599 }
600
601 mutex_exit(&proc_lock);
602
603 if (pg)
604 kmem_free(pg, sizeof(struct pgrp));
605 if (ss)
606 kmem_free(ss, sizeof(struct session));
607 }
608
609 /*
610 * Check that the specified process group is in the session of the
611 * specified process.
612 * Treats -ve ids as process ids.
613 * Used to validate TIOCSPGRP requests.
614 */
615 int
616 pgid_in_session(struct proc *p, pid_t pg_id)
617 {
618 struct pgrp *pgrp;
619 struct session *session;
620 int error;
621
622 mutex_enter(&proc_lock);
623 if (pg_id < 0) {
624 struct proc *p1 = proc_find(-pg_id);
625 if (p1 == NULL) {
626 error = EINVAL;
627 goto fail;
628 }
629 pgrp = p1->p_pgrp;
630 } else {
631 pgrp = pgrp_find(pg_id);
632 if (pgrp == NULL) {
633 error = EINVAL;
634 goto fail;
635 }
636 }
637 session = pgrp->pg_session;
638 error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
639 fail:
640 mutex_exit(&proc_lock);
641 return error;
642 }
643
644 /*
645 * p_inferior: is p an inferior of q?
646 */
647 static inline bool
648 p_inferior(struct proc *p, struct proc *q)
649 {
650
651 KASSERT(mutex_owned(&proc_lock));
652
653 for (; p != q; p = p->p_pptr)
654 if (p->p_pid == 0)
655 return false;
656 return true;
657 }
658
659 /*
660 * proc_find_lwp: locate an lwp in said proc by the ID.
661 *
662 * => Must be called with p::p_lock held.
663 * => LSIDL lwps are not returned because they are only partially
664 * constructed while occupying the slot.
665 * => Callers need to be careful about lwp::l_stat of the returned
666 * lwp.
667 */
668 struct lwp *
669 proc_find_lwp(proc_t *p, pid_t pid)
670 {
671 struct pid_table *pt;
672 struct lwp *l = NULL;
673 uintptr_t slot;
674 int s;
675
676 KASSERT(mutex_owned(p->p_lock));
677
678 /*
679 * Look in the pid_table. This is done unlocked inside a pserialize
680 * read section covering pid_table's memory allocation only, so take
681 * care to read the slot atomically and only once. This issues a
682 * memory barrier for dependent loads on alpha.
683 */
684 s = pserialize_read_enter();
685 pt = &pid_table[pid & pid_tbl_mask];
686 slot = atomic_load_consume(&pt->pt_slot);
687 if (__predict_false(!PT_IS_LWP(slot))) {
688 pserialize_read_exit(s);
689 return NULL;
690 }
691
692 /*
693 * Check to see if the LWP is from the correct process. We won't
694 * see entries in pid_table from a prior process that also used "p",
695 * by virtue of the fact that allocating "p" means all prior updates
696 * to dependant data structures are visible to this thread.
697 */
698 l = PT_GET_LWP(slot);
699 if (__predict_false(atomic_load_relaxed(&l->l_proc) != p)) {
700 pserialize_read_exit(s);
701 return NULL;
702 }
703
704 /*
705 * We now know that p->p_lock holds this LWP stable.
706 *
707 * If the status is not LSIDL, it means the LWP is intended to be
708 * findable by LID and l_lid cannot change behind us.
709 *
710 * No need to acquire the LWP's lock to check for LSIDL, as
711 * p->p_lock must be held to transition in and out of LSIDL.
712 * Any other observed state of is no particular interest.
713 */
714 pserialize_read_exit(s);
715 return l->l_stat != LSIDL && l->l_lid == pid ? l : NULL;
716 }
717
718 /*
719 * proc_find_lwp_unlocked: locate an lwp in said proc by the ID.
720 *
721 * => Called in a pserialize read section with no locks held.
722 * => LSIDL lwps are not returned because they are only partially
723 * constructed while occupying the slot.
724 * => Callers need to be careful about lwp::l_stat of the returned
725 * lwp.
726 * => If an LWP is found, it's returned locked.
727 */
728 struct lwp *
729 proc_find_lwp_unlocked(proc_t *p, pid_t pid)
730 {
731 struct pid_table *pt;
732 struct lwp *l = NULL;
733 uintptr_t slot;
734
735 KASSERT(pserialize_in_read_section());
736
737 /*
738 * Look in the pid_table. This is done unlocked inside a pserialize
739 * read section covering pid_table's memory allocation only, so take
740 * care to read the slot atomically and only once. This issues a
741 * memory barrier for dependent loads on alpha.
742 */
743 pt = &pid_table[pid & pid_tbl_mask];
744 slot = atomic_load_consume(&pt->pt_slot);
745 if (__predict_false(!PT_IS_LWP(slot))) {
746 return NULL;
747 }
748
749 /*
750 * Lock the LWP we found to get it stable. If it's embryonic or
751 * reaped (LSIDL) then none of the other fields can safely be
752 * checked.
753 */
754 l = PT_GET_LWP(slot);
755 lwp_lock(l);
756 if (__predict_false(l->l_stat == LSIDL)) {
757 lwp_unlock(l);
758 return NULL;
759 }
760
761 /*
762 * l_proc and l_lid are now known stable because the LWP is not
763 * LSIDL, so check those fields too to make sure we found the
764 * right thing.
765 */
766 if (__predict_false(l->l_proc != p || l->l_lid != pid)) {
767 lwp_unlock(l);
768 return NULL;
769 }
770
771 /* Everything checks out, return it locked. */
772 return l;
773 }
774
775 /*
776 * proc_find_lwp_acquire_proc: locate an lwp and acquire a lock
777 * on its containing proc.
778 *
779 * => Similar to proc_find_lwp(), but does not require you to have
780 * the proc a priori.
781 * => Also returns proc * to caller, with p::p_lock held.
782 * => Same caveats apply.
783 */
784 struct lwp *
785 proc_find_lwp_acquire_proc(pid_t pid, struct proc **pp)
786 {
787 struct pid_table *pt;
788 struct proc *p = NULL;
789 struct lwp *l = NULL;
790 uintptr_t slot;
791
792 KASSERT(pp != NULL);
793 mutex_enter(&proc_lock);
794 pt = &pid_table[pid & pid_tbl_mask];
795
796 slot = pt->pt_slot;
797 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
798 l = PT_GET_LWP(slot);
799 p = l->l_proc;
800 mutex_enter(p->p_lock);
801 if (__predict_false(l->l_stat == LSIDL)) {
802 mutex_exit(p->p_lock);
803 l = NULL;
804 p = NULL;
805 }
806 }
807 mutex_exit(&proc_lock);
808
809 KASSERT(p == NULL || mutex_owned(p->p_lock));
810 *pp = p;
811 return l;
812 }
813
814 /*
815 * proc_find_raw_pid_table_locked: locate a process by the ID.
816 *
817 * => Must be called with proc_lock held.
818 */
819 static proc_t *
820 proc_find_raw_pid_table_locked(pid_t pid, bool any_lwpid)
821 {
822 struct pid_table *pt;
823 proc_t *p = NULL;
824 uintptr_t slot;
825
826 /* No - used by DDB. KASSERT(mutex_owned(&proc_lock)); */
827 pt = &pid_table[pid & pid_tbl_mask];
828
829 slot = pt->pt_slot;
830 if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
831 /*
832 * When looking up processes, require a direct match
833 * on the PID assigned to the proc, not just one of
834 * its LWPs.
835 *
836 * N.B. We require lwp::l_proc of LSIDL LWPs to be
837 * valid here.
838 */
839 p = PT_GET_LWP(slot)->l_proc;
840 if (__predict_false(p->p_pid != pid && !any_lwpid))
841 p = NULL;
842 } else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
843 p = PT_GET_PROC(slot);
844 }
845 return p;
846 }
847
848 proc_t *
849 proc_find_raw(pid_t pid)
850 {
851
852 return proc_find_raw_pid_table_locked(pid, false);
853 }
854
855 static proc_t *
856 proc_find_internal(pid_t pid, bool any_lwpid)
857 {
858 proc_t *p;
859
860 KASSERT(mutex_owned(&proc_lock));
861
862 p = proc_find_raw_pid_table_locked(pid, any_lwpid);
863 if (__predict_false(p == NULL)) {
864 return NULL;
865 }
866
867 /*
868 * Only allow live processes to be found by PID.
869 * XXX: p_stat might change, since proc unlocked.
870 */
871 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
872 return p;
873 }
874 return NULL;
875 }
876
877 proc_t *
878 proc_find(pid_t pid)
879 {
880 return proc_find_internal(pid, false);
881 }
882
883 proc_t *
884 proc_find_lwpid(pid_t pid)
885 {
886 return proc_find_internal(pid, true);
887 }
888
889 /*
890 * pgrp_find: locate a process group by the ID.
891 *
892 * => Must be called with proc_lock held.
893 */
894 struct pgrp *
895 pgrp_find(pid_t pgid)
896 {
897 struct pgrp *pg;
898
899 KASSERT(mutex_owned(&proc_lock));
900
901 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
902
903 /*
904 * Cannot look up a process group that only exists because the
905 * session has not died yet (traditional).
906 */
907 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
908 return NULL;
909 }
910 return pg;
911 }
912
913 static void
914 expand_pid_table(void)
915 {
916 size_t pt_size, tsz;
917 struct pid_table *n_pt, *new_pt;
918 uintptr_t slot;
919 struct pgrp *pgrp;
920 pid_t pid, rpid;
921 u_int i;
922 uint new_pt_mask;
923
924 KASSERT(mutex_owned(&proc_lock));
925
926 /* Unlock the pid_table briefly to allocate memory. */
927 pt_size = pid_tbl_mask + 1;
928 mutex_exit(&proc_lock);
929
930 tsz = pt_size * 2 * sizeof(struct pid_table);
931 new_pt = kmem_alloc(tsz, KM_SLEEP);
932 new_pt_mask = pt_size * 2 - 1;
933
934 /* XXX For now. The pratical limit is much lower anyway. */
935 KASSERT(new_pt_mask <= FUTEX_TID_MASK);
936
937 mutex_enter(&proc_lock);
938 if (pt_size != pid_tbl_mask + 1) {
939 /* Another process beat us to it... */
940 mutex_exit(&proc_lock);
941 kmem_free(new_pt, tsz);
942 goto out;
943 }
944
945 /*
946 * Copy entries from old table into new one.
947 * If 'pid' is 'odd' we need to place in the upper half,
948 * even pid's to the lower half.
949 * Free items stay in the low half so we don't have to
950 * fixup the reference to them.
951 * We stuff free items on the front of the freelist
952 * because we can't write to unmodified entries.
953 * Processing the table backwards maintains a semblance
954 * of issuing pid numbers that increase with time.
955 */
956 i = pt_size - 1;
957 n_pt = new_pt + i;
958 for (; ; i--, n_pt--) {
959 slot = pid_table[i].pt_slot;
960 pgrp = pid_table[i].pt_pgrp;
961 if (!PT_VALID(slot)) {
962 /* Up 'use count' so that link is valid */
963 pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
964 rpid = 0;
965 slot = PT_SET_FREE(pid);
966 if (pgrp)
967 pid = pgrp->pg_id;
968 } else {
969 pid = pid_table[i].pt_pid;
970 rpid = pid;
971 }
972
973 /* Save entry in appropriate half of table */
974 n_pt[pid & pt_size].pt_slot = slot;
975 n_pt[pid & pt_size].pt_pgrp = pgrp;
976 n_pt[pid & pt_size].pt_pid = rpid;
977
978 /* Put other piece on start of free list */
979 pid = (pid ^ pt_size) & ~pid_tbl_mask;
980 n_pt[pid & pt_size].pt_slot =
981 PT_SET_FREE((pid & ~pt_size) | next_free_pt);
982 n_pt[pid & pt_size].pt_pgrp = 0;
983 n_pt[pid & pt_size].pt_pid = 0;
984
985 next_free_pt = i | (pid & pt_size);
986 if (i == 0)
987 break;
988 }
989
990 /* Save old table size and switch tables */
991 tsz = pt_size * sizeof(struct pid_table);
992 n_pt = pid_table;
993 pid_table = new_pt;
994 pid_tbl_mask = new_pt_mask;
995
996 /*
997 * pid_max starts as PID_MAX (= 30000), once we have 16384
998 * allocated pids we need it to be larger!
999 */
1000 if (pid_tbl_mask > PID_MAX) {
1001 pid_max = pid_tbl_mask * 2 + 1;
1002 pid_alloc_lim |= pid_alloc_lim << 1;
1003 } else
1004 pid_alloc_lim <<= 1; /* doubles number of free slots... */
1005
1006 mutex_exit(&proc_lock);
1007
1008 /*
1009 * Make sure that unlocked access to the old pid_table is complete
1010 * and then free it.
1011 */
1012 pserialize_perform(proc_psz);
1013 kmem_free(n_pt, tsz);
1014
1015 out: /* Return with proc_lock held again. */
1016 mutex_enter(&proc_lock);
1017 }
1018
1019 struct proc *
1020 proc_alloc(void)
1021 {
1022 struct proc *p;
1023
1024 p = pool_cache_get(proc_cache, PR_WAITOK);
1025 p->p_stat = SIDL; /* protect against others */
1026 proc_initspecific(p);
1027 kdtrace_proc_ctor(NULL, p);
1028
1029 /*
1030 * Allocate a placeholder in the pid_table. When we create the
1031 * first LWP for this process, it will take ownership of the
1032 * slot.
1033 */
1034 if (__predict_false(proc_alloc_pid(p) == -1)) {
1035 /* Allocating the PID failed; unwind. */
1036 proc_finispecific(p);
1037 proc_free_mem(p);
1038 p = NULL;
1039 }
1040 return p;
1041 }
1042
1043 /*
1044 * proc_alloc_pid_slot: allocate PID and record the occcupant so that
1045 * proc_find_raw() can find it by the PID.
1046 */
1047 static pid_t __noinline
1048 proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
1049 {
1050 struct pid_table *pt;
1051 pid_t pid;
1052 int nxt;
1053
1054 KASSERT(mutex_owned(&proc_lock));
1055
1056 for (;;expand_pid_table()) {
1057 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
1058 /* ensure pids cycle through 2000+ values */
1059 continue;
1060 }
1061 /*
1062 * The first user process *must* be given PID 1.
1063 * it has already been reserved for us. This
1064 * will be coming in from the proc_alloc() call
1065 * above, and the entry will be usurped later when
1066 * the first user LWP is created.
1067 * XXX this is slightly gross.
1068 */
1069 if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
1070 p != &proc0)) {
1071 KASSERT(PT_IS_PROC(slot));
1072 pt = &pid_table[1];
1073 pt->pt_slot = slot;
1074 return 1;
1075 }
1076 pt = &pid_table[next_free_pt];
1077 #ifdef DIAGNOSTIC
1078 if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
1079 panic("proc_alloc: slot busy");
1080 #endif
1081 nxt = PT_NEXT(pt->pt_slot);
1082 if (nxt & pid_tbl_mask)
1083 break;
1084 /* Table full - expand (NB last entry not used....) */
1085 }
1086
1087 /* pid is 'saved use count' + 'size' + entry */
1088 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
1089 if ((uint)pid > (uint)pid_max)
1090 pid &= pid_tbl_mask;
1091 next_free_pt = nxt & pid_tbl_mask;
1092
1093 /* XXX For now. The pratical limit is much lower anyway. */
1094 KASSERT(pid <= FUTEX_TID_MASK);
1095
1096 /* Grab table slot */
1097 pt->pt_slot = slot;
1098
1099 KASSERT(pt->pt_pid == 0);
1100 pt->pt_pid = pid;
1101 pid_alloc_cnt++;
1102
1103 return pid;
1104 }
1105
1106 pid_t
1107 proc_alloc_pid(struct proc *p)
1108 {
1109 pid_t pid;
1110
1111 KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
1112 KASSERT(p->p_stat == SIDL);
1113
1114 mutex_enter(&proc_lock);
1115 pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
1116 if (pid != -1)
1117 p->p_pid = pid;
1118 mutex_exit(&proc_lock);
1119
1120 return pid;
1121 }
1122
1123 pid_t
1124 proc_alloc_lwpid(struct proc *p, struct lwp *l)
1125 {
1126 struct pid_table *pt;
1127 pid_t pid;
1128
1129 KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
1130 KASSERT(l->l_proc == p);
1131 KASSERT(l->l_stat == LSIDL);
1132
1133 /*
1134 * For unlocked lookup in proc_find_lwp(), make sure l->l_proc
1135 * is globally visible before the LWP becomes visible via the
1136 * pid_table.
1137 */
1138 #ifndef __HAVE_ATOMIC_AS_MEMBAR
1139 membar_producer();
1140 #endif
1141
1142 /*
1143 * If the slot for p->p_pid currently points to the proc,
1144 * then we should usurp this ID for the LWP. This happens
1145 * at least once per process (for the first LWP), and can
1146 * happen again if the first LWP for a process exits and
1147 * before the process creates another.
1148 */
1149 mutex_enter(&proc_lock);
1150 pid = p->p_pid;
1151 pt = &pid_table[pid & pid_tbl_mask];
1152 KASSERT(pt->pt_pid == pid);
1153 if (PT_IS_PROC(pt->pt_slot)) {
1154 KASSERT(PT_GET_PROC(pt->pt_slot) == p);
1155 l->l_lid = pid;
1156 pt->pt_slot = PT_SET_LWP(l);
1157 } else {
1158 /* Need to allocate a new slot. */
1159 pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
1160 if (pid != -1)
1161 l->l_lid = pid;
1162 }
1163 mutex_exit(&proc_lock);
1164
1165 return pid;
1166 }
1167
1168 static void __noinline
1169 proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
1170 {
1171 struct pid_table *pt;
1172
1173 pt = &pid_table[pid & pid_tbl_mask];
1174
1175 KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
1176 KASSERT(pt->pt_pid == pid);
1177
1178 /* save pid use count in slot */
1179 pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
1180 pt->pt_pid = 0;
1181
1182 if (pt->pt_pgrp == NULL) {
1183 /* link last freed entry onto ours */
1184 pid &= pid_tbl_mask;
1185 pt = &pid_table[last_free_pt];
1186 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
1187 pt->pt_pid = 0;
1188 last_free_pt = pid;
1189 pid_alloc_cnt--;
1190 }
1191 }
1192
1193 /*
1194 * Free a process id - called from proc_free (in kern_exit.c)
1195 *
1196 * Called with the proc_lock held.
1197 */
1198 void
1199 proc_free_pid(pid_t pid)
1200 {
1201
1202 KASSERT(mutex_owned(&proc_lock));
1203 proc_free_pid_internal(pid, PT_F_PROC);
1204 }
1205
1206 /*
1207 * Free a process id used by an LWP. If this was the process's
1208 * first LWP, we convert the slot to point to the process; the
1209 * entry will get cleaned up later when the process finishes exiting.
1210 *
1211 * If not, then it's the same as proc_free_pid().
1212 */
1213 void
1214 proc_free_lwpid(struct proc *p, pid_t pid)
1215 {
1216
1217 KASSERT(mutex_owned(&proc_lock));
1218
1219 if (__predict_true(p->p_pid == pid)) {
1220 struct pid_table *pt;
1221
1222 pt = &pid_table[pid & pid_tbl_mask];
1223
1224 KASSERT(pt->pt_pid == pid);
1225 KASSERT(PT_IS_LWP(pt->pt_slot));
1226 KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
1227
1228 pt->pt_slot = PT_SET_PROC(p);
1229 return;
1230 }
1231 proc_free_pid_internal(pid, PT_F_LWP);
1232 }
1233
1234 void
1235 proc_free_mem(struct proc *p)
1236 {
1237
1238 kdtrace_proc_dtor(NULL, p);
1239 pool_cache_put(proc_cache, p);
1240 }
1241
1242 /*
1243 * proc_enterpgrp: move p to a new or existing process group (and session).
1244 *
1245 * If we are creating a new pgrp, the pgid should equal
1246 * the calling process' pid.
1247 * If is only valid to enter a process group that is in the session
1248 * of the process.
1249 * Also mksess should only be set if we are creating a process group
1250 *
1251 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
1252 */
1253 int
1254 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
1255 {
1256 struct pgrp *new_pgrp, *pgrp;
1257 struct session *sess;
1258 struct proc *p;
1259 int rval;
1260 pid_t pg_id = NO_PGID;
1261
1262 /* Allocate data areas we might need before doing any validity checks */
1263 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
1264 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
1265
1266 mutex_enter(&proc_lock);
1267 rval = EPERM; /* most common error (to save typing) */
1268
1269 /* Check pgrp exists or can be created */
1270 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
1271 if (pgrp != NULL && pgrp->pg_id != pgid)
1272 goto done;
1273
1274 /* Can only set another process under restricted circumstances. */
1275 if (pid != curp->p_pid) {
1276 /* Must exist and be one of our children... */
1277 p = proc_find_internal(pid, false);
1278 if (p == NULL || !p_inferior(p, curp)) {
1279 rval = ESRCH;
1280 goto done;
1281 }
1282 /* ... in the same session... */
1283 if (sess != NULL || p->p_session != curp->p_session)
1284 goto done;
1285 /* ... existing pgid must be in same session ... */
1286 if (pgrp != NULL && pgrp->pg_session != p->p_session)
1287 goto done;
1288 /* ... and not done an exec. */
1289 if (p->p_flag & PK_EXEC) {
1290 rval = EACCES;
1291 goto done;
1292 }
1293 } else {
1294 /* ... setsid() cannot re-enter a pgrp */
1295 if (mksess && (curp->p_pgid == curp->p_pid ||
1296 pgrp_find(curp->p_pid)))
1297 goto done;
1298 p = curp;
1299 }
1300
1301 /* Changing the process group/session of a session
1302 leader is definitely off limits. */
1303 if (SESS_LEADER(p)) {
1304 if (sess == NULL && p->p_pgrp == pgrp)
1305 /* unless it's a definite noop */
1306 rval = 0;
1307 goto done;
1308 }
1309
1310 /* Can only create a process group with id of process */
1311 if (pgrp == NULL && pgid != pid)
1312 goto done;
1313
1314 /* Can only create a session if creating pgrp */
1315 if (sess != NULL && pgrp != NULL)
1316 goto done;
1317
1318 /* Check we allocated memory for a pgrp... */
1319 if (pgrp == NULL && new_pgrp == NULL)
1320 goto done;
1321
1322 /* Don't attach to 'zombie' pgrp */
1323 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
1324 goto done;
1325
1326 /* Expect to succeed now */
1327 rval = 0;
1328
1329 if (pgrp == p->p_pgrp)
1330 /* nothing to do */
1331 goto done;
1332
1333 /* Ok all setup, link up required structures */
1334
1335 if (pgrp == NULL) {
1336 pgrp = new_pgrp;
1337 new_pgrp = NULL;
1338 if (sess != NULL) {
1339 sess->s_sid = p->p_pid;
1340 sess->s_leader = p;
1341 sess->s_count = 1;
1342 sess->s_ttyvp = NULL;
1343 sess->s_ttyp = NULL;
1344 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
1345 memcpy(sess->s_login, p->p_session->s_login,
1346 sizeof(sess->s_login));
1347 p->p_lflag &= ~PL_CONTROLT;
1348 } else {
1349 sess = p->p_pgrp->pg_session;
1350 proc_sesshold(sess);
1351 }
1352 pgrp->pg_session = sess;
1353 sess = NULL;
1354
1355 pgrp->pg_id = pgid;
1356 LIST_INIT(&pgrp->pg_members);
1357 #ifdef DIAGNOSTIC
1358 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
1359 panic("enterpgrp: pgrp table slot in use");
1360 if (__predict_false(mksess && p != curp))
1361 panic("enterpgrp: mksession and p != curproc");
1362 #endif
1363 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
1364 pgrp->pg_jobc = 0;
1365 }
1366
1367 /*
1368 * Adjust eligibility of affected pgrps to participate in job control.
1369 * Increment eligibility counts before decrementing, otherwise we
1370 * could reach 0 spuriously during the first call.
1371 */
1372 fixjobc(p, pgrp, 1);
1373 fixjobc(p, p->p_pgrp, 0);
1374
1375 /* Interlock with ttread(). */
1376 mutex_spin_enter(&tty_lock);
1377
1378 /* Move process to requested group. */
1379 LIST_REMOVE(p, p_pglist);
1380 if (LIST_EMPTY(&p->p_pgrp->pg_members))
1381 /* defer delete until we've dumped the lock */
1382 pg_id = p->p_pgrp->pg_id;
1383 p->p_pgrp = pgrp;
1384 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
1385
1386 /* Done with the swap; we can release the tty mutex. */
1387 mutex_spin_exit(&tty_lock);
1388
1389 done:
1390 if (pg_id != NO_PGID) {
1391 /* Releases proc_lock. */
1392 pg_delete(pg_id);
1393 } else {
1394 mutex_exit(&proc_lock);
1395 }
1396 if (sess != NULL)
1397 kmem_free(sess, sizeof(*sess));
1398 if (new_pgrp != NULL)
1399 kmem_free(new_pgrp, sizeof(*new_pgrp));
1400 #ifdef DEBUG_PGRP
1401 if (__predict_false(rval))
1402 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
1403 pid, pgid, mksess, curp->p_pid, rval);
1404 #endif
1405 return rval;
1406 }
1407
1408 /*
1409 * proc_leavepgrp: remove a process from its process group.
1410 * => must be called with the proc_lock held, which will be released;
1411 */
1412 void
1413 proc_leavepgrp(struct proc *p)
1414 {
1415 struct pgrp *pgrp;
1416
1417 KASSERT(mutex_owned(&proc_lock));
1418
1419 /* Interlock with ttread() */
1420 mutex_spin_enter(&tty_lock);
1421 pgrp = p->p_pgrp;
1422 LIST_REMOVE(p, p_pglist);
1423 p->p_pgrp = NULL;
1424 mutex_spin_exit(&tty_lock);
1425
1426 if (LIST_EMPTY(&pgrp->pg_members)) {
1427 /* Releases proc_lock. */
1428 pg_delete(pgrp->pg_id);
1429 } else {
1430 mutex_exit(&proc_lock);
1431 }
1432 }
1433
1434 /*
1435 * pg_remove: remove a process group from the table.
1436 * => must be called with the proc_lock held;
1437 * => returns process group to free;
1438 */
1439 static struct pgrp *
1440 pg_remove(pid_t pg_id)
1441 {
1442 struct pgrp *pgrp;
1443 struct pid_table *pt;
1444
1445 KASSERT(mutex_owned(&proc_lock));
1446
1447 pt = &pid_table[pg_id & pid_tbl_mask];
1448 pgrp = pt->pt_pgrp;
1449
1450 KASSERT(pgrp != NULL);
1451 KASSERT(pgrp->pg_id == pg_id);
1452 KASSERT(LIST_EMPTY(&pgrp->pg_members));
1453
1454 pt->pt_pgrp = NULL;
1455
1456 if (!PT_VALID(pt->pt_slot)) {
1457 /* Orphaned pgrp, put slot onto free list. */
1458 KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
1459 pg_id &= pid_tbl_mask;
1460 pt = &pid_table[last_free_pt];
1461 pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
1462 KASSERT(pt->pt_pid == 0);
1463 last_free_pt = pg_id;
1464 pid_alloc_cnt--;
1465 }
1466 return pgrp;
1467 }
1468
1469 /*
1470 * pg_delete: delete and free a process group.
1471 * => must be called with the proc_lock held, which will be released.
1472 */
1473 static void
1474 pg_delete(pid_t pg_id)
1475 {
1476 struct pgrp *pg;
1477 struct tty *ttyp;
1478 struct session *ss;
1479
1480 KASSERT(mutex_owned(&proc_lock));
1481
1482 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1483 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1484 mutex_exit(&proc_lock);
1485 return;
1486 }
1487
1488 ss = pg->pg_session;
1489
1490 /* Remove reference (if any) from tty to this process group */
1491 mutex_spin_enter(&tty_lock);
1492 ttyp = ss->s_ttyp;
1493 if (ttyp != NULL && ttyp->t_pgrp == pg) {
1494 ttyp->t_pgrp = NULL;
1495 KASSERT(ttyp->t_session == ss);
1496 }
1497 mutex_spin_exit(&tty_lock);
1498
1499 /*
1500 * The leading process group in a session is freed by proc_sessrele(),
1501 * if last reference. It will also release the locks.
1502 */
1503 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1504 proc_sessrele(ss);
1505
1506 if (pg != NULL) {
1507 /* Free it, if was not done above. */
1508 kmem_free(pg, sizeof(struct pgrp));
1509 }
1510 }
1511
1512 /*
1513 * Adjust pgrp jobc counters when specified process changes process group.
1514 * We count the number of processes in each process group that "qualify"
1515 * the group for terminal job control (those with a parent in a different
1516 * process group of the same session). If that count reaches zero, the
1517 * process group becomes orphaned. Check both the specified process'
1518 * process group and that of its children.
1519 * entering == 0 => p is leaving specified group.
1520 * entering == 1 => p is entering specified group.
1521 *
1522 * Call with proc_lock held.
1523 */
1524 void
1525 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1526 {
1527 struct pgrp *hispgrp;
1528 struct session *mysession = pgrp->pg_session;
1529 struct proc *child;
1530
1531 KASSERT(mutex_owned(&proc_lock));
1532
1533 /*
1534 * Check p's parent to see whether p qualifies its own process
1535 * group; if so, adjust count for p's process group.
1536 */
1537 hispgrp = p->p_pptr->p_pgrp;
1538 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1539 if (entering) {
1540 pgrp->pg_jobc++;
1541 p->p_lflag &= ~PL_ORPHANPG;
1542 } else {
1543 KASSERT(pgrp->pg_jobc > 0);
1544 if (--pgrp->pg_jobc == 0)
1545 orphanpg(pgrp);
1546 }
1547 }
1548
1549 /*
1550 * Check this process' children to see whether they qualify
1551 * their process groups; if so, adjust counts for children's
1552 * process groups.
1553 */
1554 LIST_FOREACH(child, &p->p_children, p_sibling) {
1555 hispgrp = child->p_pgrp;
1556 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1557 !P_ZOMBIE(child)) {
1558 if (entering) {
1559 child->p_lflag &= ~PL_ORPHANPG;
1560 hispgrp->pg_jobc++;
1561 } else {
1562 KASSERT(hispgrp->pg_jobc > 0);
1563 if (--hispgrp->pg_jobc == 0)
1564 orphanpg(hispgrp);
1565 }
1566 }
1567 }
1568 }
1569
1570 /*
1571 * A process group has become orphaned;
1572 * if there are any stopped processes in the group,
1573 * hang-up all process in that group.
1574 *
1575 * Call with proc_lock held.
1576 */
1577 static void
1578 orphanpg(struct pgrp *pg)
1579 {
1580 struct proc *p;
1581
1582 KASSERT(mutex_owned(&proc_lock));
1583
1584 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1585 if (p->p_stat == SSTOP) {
1586 p->p_lflag |= PL_ORPHANPG;
1587 psignal(p, SIGHUP);
1588 psignal(p, SIGCONT);
1589 }
1590 }
1591 }
1592
1593 #ifdef DDB
1594 #include <ddb/db_output.h>
1595 void pidtbl_dump(void);
1596 void
1597 pidtbl_dump(void)
1598 {
1599 struct pid_table *pt;
1600 struct proc *p;
1601 struct pgrp *pgrp;
1602 uintptr_t slot;
1603 int id;
1604
1605 db_printf("pid table %p size %x, next %x, last %x\n",
1606 pid_table, pid_tbl_mask+1,
1607 next_free_pt, last_free_pt);
1608 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1609 slot = pt->pt_slot;
1610 if (!PT_VALID(slot) && !pt->pt_pgrp)
1611 continue;
1612 if (PT_IS_LWP(slot)) {
1613 p = PT_GET_LWP(slot)->l_proc;
1614 } else if (PT_IS_PROC(slot)) {
1615 p = PT_GET_PROC(slot);
1616 } else {
1617 p = NULL;
1618 }
1619 db_printf(" id %x: ", id);
1620 if (p != NULL)
1621 db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1622 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1623 else
1624 db_printf("next %x use %x\n",
1625 PT_NEXT(slot) & pid_tbl_mask,
1626 PT_NEXT(slot) & ~pid_tbl_mask);
1627 if ((pgrp = pt->pt_pgrp)) {
1628 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1629 pgrp->pg_session, pgrp->pg_session->s_sid,
1630 pgrp->pg_session->s_count,
1631 pgrp->pg_session->s_login);
1632 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1633 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1634 LIST_FIRST(&pgrp->pg_members));
1635 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1636 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1637 p->p_pid, p, p->p_pgrp, p->p_comm);
1638 }
1639 }
1640 }
1641 }
1642 #endif /* DDB */
1643
1644 #ifdef KSTACK_CHECK_MAGIC
1645
1646 #define KSTACK_MAGIC 0xdeadbeaf
1647
1648 /* XXX should be per process basis? */
1649 static int kstackleftmin = KSTACK_SIZE;
1650 static int kstackleftthres = KSTACK_SIZE / 8;
1651
1652 void
1653 kstack_setup_magic(const struct lwp *l)
1654 {
1655 uint32_t *ip;
1656 uint32_t const *end;
1657
1658 KASSERT(l != NULL);
1659 KASSERT(l != &lwp0);
1660
1661 /*
1662 * fill all the stack with magic number
1663 * so that later modification on it can be detected.
1664 */
1665 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1666 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1667 for (; ip < end; ip++) {
1668 *ip = KSTACK_MAGIC;
1669 }
1670 }
1671
1672 void
1673 kstack_check_magic(const struct lwp *l)
1674 {
1675 uint32_t const *ip, *end;
1676 int stackleft;
1677
1678 KASSERT(l != NULL);
1679
1680 /* don't check proc0 */ /*XXX*/
1681 if (l == &lwp0)
1682 return;
1683
1684 #ifdef __MACHINE_STACK_GROWS_UP
1685 /* stack grows upwards (eg. hppa) */
1686 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1687 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1688 for (ip--; ip >= end; ip--)
1689 if (*ip != KSTACK_MAGIC)
1690 break;
1691
1692 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1693 #else /* __MACHINE_STACK_GROWS_UP */
1694 /* stack grows downwards (eg. i386) */
1695 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1696 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1697 for (; ip < end; ip++)
1698 if (*ip != KSTACK_MAGIC)
1699 break;
1700
1701 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1702 #endif /* __MACHINE_STACK_GROWS_UP */
1703
1704 if (kstackleftmin > stackleft) {
1705 kstackleftmin = stackleft;
1706 if (stackleft < kstackleftthres)
1707 printf("warning: kernel stack left %d bytes"
1708 "(pid %u:lid %u)\n", stackleft,
1709 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1710 }
1711
1712 if (stackleft <= 0) {
1713 panic("magic on the top of kernel stack changed for "
1714 "pid %u, lid %u: maybe kernel stack overflow",
1715 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1716 }
1717 }
1718 #endif /* KSTACK_CHECK_MAGIC */
1719
1720 int
1721 proclist_foreach_call(struct proclist *list,
1722 int (*callback)(struct proc *, void *arg), void *arg)
1723 {
1724 struct proc marker;
1725 struct proc *p;
1726 int ret = 0;
1727
1728 marker.p_flag = PK_MARKER;
1729 mutex_enter(&proc_lock);
1730 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1731 if (p->p_flag & PK_MARKER) {
1732 p = LIST_NEXT(p, p_list);
1733 continue;
1734 }
1735 LIST_INSERT_AFTER(p, &marker, p_list);
1736 ret = (*callback)(p, arg);
1737 KASSERT(mutex_owned(&proc_lock));
1738 p = LIST_NEXT(&marker, p_list);
1739 LIST_REMOVE(&marker, p_list);
1740 }
1741 mutex_exit(&proc_lock);
1742
1743 return ret;
1744 }
1745
1746 int
1747 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1748 {
1749
1750 /* XXXCDC: how should locking work here? */
1751
1752 /* curproc exception is for coredump. */
1753
1754 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1755 (p->p_vmspace->vm_refcnt < 1)) {
1756 return EFAULT;
1757 }
1758
1759 uvmspace_addref(p->p_vmspace);
1760 *vm = p->p_vmspace;
1761
1762 return 0;
1763 }
1764
1765 /*
1766 * Acquire a write lock on the process credential.
1767 */
1768 void
1769 proc_crmod_enter(void)
1770 {
1771 struct lwp *l = curlwp;
1772 struct proc *p = l->l_proc;
1773 kauth_cred_t oc;
1774
1775 /* Reset what needs to be reset in plimit. */
1776 if (p->p_limit->pl_corename != defcorename) {
1777 lim_setcorename(p, defcorename, 0);
1778 }
1779
1780 mutex_enter(p->p_lock);
1781
1782 /* Ensure the LWP cached credentials are up to date. */
1783 if ((oc = l->l_cred) != p->p_cred) {
1784 kauth_cred_hold(p->p_cred);
1785 l->l_cred = p->p_cred;
1786 kauth_cred_free(oc);
1787 }
1788 }
1789
1790 /*
1791 * Set in a new process credential, and drop the write lock. The credential
1792 * must have a reference already. Optionally, free a no-longer required
1793 * credential. The scheduler also needs to inspect p_cred, so we also
1794 * briefly acquire the sched state mutex.
1795 */
1796 void
1797 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1798 {
1799 struct lwp *l = curlwp, *l2;
1800 struct proc *p = l->l_proc;
1801 kauth_cred_t oc;
1802
1803 KASSERT(mutex_owned(p->p_lock));
1804
1805 /* Is there a new credential to set in? */
1806 if (scred != NULL) {
1807 p->p_cred = scred;
1808 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1809 if (l2 != l)
1810 l2->l_prflag |= LPR_CRMOD;
1811 }
1812
1813 /* Ensure the LWP cached credentials are up to date. */
1814 if ((oc = l->l_cred) != scred) {
1815 kauth_cred_hold(scred);
1816 l->l_cred = scred;
1817 }
1818 } else
1819 oc = NULL; /* XXXgcc */
1820
1821 if (sugid) {
1822 /*
1823 * Mark process as having changed credentials, stops
1824 * tracing etc.
1825 */
1826 p->p_flag |= PK_SUGID;
1827 }
1828
1829 mutex_exit(p->p_lock);
1830
1831 /* If there is a credential to be released, free it now. */
1832 if (fcred != NULL) {
1833 KASSERT(scred != NULL);
1834 kauth_cred_free(fcred);
1835 if (oc != scred)
1836 kauth_cred_free(oc);
1837 }
1838 }
1839
1840 /*
1841 * proc_specific_key_create --
1842 * Create a key for subsystem proc-specific data.
1843 */
1844 int
1845 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1846 {
1847
1848 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1849 }
1850
1851 /*
1852 * proc_specific_key_delete --
1853 * Delete a key for subsystem proc-specific data.
1854 */
1855 void
1856 proc_specific_key_delete(specificdata_key_t key)
1857 {
1858
1859 specificdata_key_delete(proc_specificdata_domain, key);
1860 }
1861
1862 /*
1863 * proc_initspecific --
1864 * Initialize a proc's specificdata container.
1865 */
1866 void
1867 proc_initspecific(struct proc *p)
1868 {
1869 int error __diagused;
1870
1871 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1872 KASSERT(error == 0);
1873 }
1874
1875 /*
1876 * proc_finispecific --
1877 * Finalize a proc's specificdata container.
1878 */
1879 void
1880 proc_finispecific(struct proc *p)
1881 {
1882
1883 specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1884 }
1885
1886 /*
1887 * proc_getspecific --
1888 * Return proc-specific data corresponding to the specified key.
1889 */
1890 void *
1891 proc_getspecific(struct proc *p, specificdata_key_t key)
1892 {
1893
1894 return (specificdata_getspecific(proc_specificdata_domain,
1895 &p->p_specdataref, key));
1896 }
1897
1898 /*
1899 * proc_setspecific --
1900 * Set proc-specific data corresponding to the specified key.
1901 */
1902 void
1903 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1904 {
1905
1906 specificdata_setspecific(proc_specificdata_domain,
1907 &p->p_specdataref, key, data);
1908 }
1909
1910 int
1911 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1912 {
1913 int r = 0;
1914
1915 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1916 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1917 /*
1918 * suid proc of ours or proc not ours
1919 */
1920 r = EPERM;
1921 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1922 /*
1923 * sgid proc has sgid back to us temporarily
1924 */
1925 r = EPERM;
1926 } else {
1927 /*
1928 * our rgid must be in target's group list (ie,
1929 * sub-processes started by a sgid process)
1930 */
1931 int ismember = 0;
1932
1933 if (kauth_cred_ismember_gid(cred,
1934 kauth_cred_getgid(target), &ismember) != 0 ||
1935 !ismember)
1936 r = EPERM;
1937 }
1938
1939 return (r);
1940 }
1941
1942 /*
1943 * sysctl stuff
1944 */
1945
1946 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc))
1947
1948 static const u_int sysctl_flagmap[] = {
1949 PK_ADVLOCK, P_ADVLOCK,
1950 PK_EXEC, P_EXEC,
1951 PK_NOCLDWAIT, P_NOCLDWAIT,
1952 PK_32, P_32,
1953 PK_CLDSIGIGN, P_CLDSIGIGN,
1954 PK_SUGID, P_SUGID,
1955 0
1956 };
1957
1958 static const u_int sysctl_sflagmap[] = {
1959 PS_NOCLDSTOP, P_NOCLDSTOP,
1960 PS_WEXIT, P_WEXIT,
1961 PS_STOPFORK, P_STOPFORK,
1962 PS_STOPEXEC, P_STOPEXEC,
1963 PS_STOPEXIT, P_STOPEXIT,
1964 0
1965 };
1966
1967 static const u_int sysctl_slflagmap[] = {
1968 PSL_TRACED, P_TRACED,
1969 PSL_CHTRACED, P_CHTRACED,
1970 PSL_SYSCALL, P_SYSCALL,
1971 0
1972 };
1973
1974 static const u_int sysctl_lflagmap[] = {
1975 PL_CONTROLT, P_CONTROLT,
1976 PL_PPWAIT, P_PPWAIT,
1977 0
1978 };
1979
1980 static const u_int sysctl_stflagmap[] = {
1981 PST_PROFIL, P_PROFIL,
1982 0
1983
1984 };
1985
1986 /* used by kern_lwp also */
1987 const u_int sysctl_lwpflagmap[] = {
1988 LW_SINTR, L_SINTR,
1989 LW_SYSTEM, L_SYSTEM,
1990 0
1991 };
1992
1993 /*
1994 * Find the most ``active'' lwp of a process and return it for ps display
1995 * purposes
1996 */
1997 static struct lwp *
1998 proc_active_lwp(struct proc *p)
1999 {
2000 static const int ostat[] = {
2001 0,
2002 2, /* LSIDL */
2003 6, /* LSRUN */
2004 5, /* LSSLEEP */
2005 4, /* LSSTOP */
2006 0, /* LSZOMB */
2007 1, /* LSDEAD */
2008 7, /* LSONPROC */
2009 3 /* LSSUSPENDED */
2010 };
2011
2012 struct lwp *l, *lp = NULL;
2013 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2014 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
2015 if (lp == NULL ||
2016 ostat[l->l_stat] > ostat[lp->l_stat] ||
2017 (ostat[l->l_stat] == ostat[lp->l_stat] &&
2018 l->l_cpticks > lp->l_cpticks)) {
2019 lp = l;
2020 continue;
2021 }
2022 }
2023 return lp;
2024 }
2025
2026 static int
2027 sysctl_doeproc(SYSCTLFN_ARGS)
2028 {
2029 union {
2030 struct kinfo_proc kproc;
2031 struct kinfo_proc2 kproc2;
2032 } *kbuf;
2033 struct proc *p, *next, *marker;
2034 char *where, *dp;
2035 int type, op, arg, error;
2036 u_int elem_size, kelem_size, elem_count;
2037 size_t buflen, needed;
2038 bool match, zombie, mmmbrains;
2039 const bool allowaddr = get_expose_address(curproc);
2040
2041 if (namelen == 1 && name[0] == CTL_QUERY)
2042 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2043
2044 dp = where = oldp;
2045 buflen = where != NULL ? *oldlenp : 0;
2046 error = 0;
2047 needed = 0;
2048 type = rnode->sysctl_num;
2049
2050 if (type == KERN_PROC) {
2051 if (namelen == 0)
2052 return EINVAL;
2053 switch (op = name[0]) {
2054 case KERN_PROC_ALL:
2055 if (namelen != 1)
2056 return EINVAL;
2057 arg = 0;
2058 break;
2059 default:
2060 if (namelen != 2)
2061 return EINVAL;
2062 arg = name[1];
2063 break;
2064 }
2065 elem_count = 0; /* Hush little compiler, don't you cry */
2066 kelem_size = elem_size = sizeof(kbuf->kproc);
2067 } else {
2068 if (namelen != 4)
2069 return EINVAL;
2070 op = name[0];
2071 arg = name[1];
2072 elem_size = name[2];
2073 elem_count = name[3];
2074 kelem_size = sizeof(kbuf->kproc2);
2075 }
2076
2077 sysctl_unlock();
2078
2079 kbuf = kmem_zalloc(sizeof(*kbuf), KM_SLEEP);
2080 marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
2081 marker->p_flag = PK_MARKER;
2082
2083 mutex_enter(&proc_lock);
2084 /*
2085 * Start with zombies to prevent reporting processes twice, in case they
2086 * are dying and being moved from the list of alive processes to zombies.
2087 */
2088 mmmbrains = true;
2089 for (p = LIST_FIRST(&zombproc);; p = next) {
2090 if (p == NULL) {
2091 if (mmmbrains) {
2092 p = LIST_FIRST(&allproc);
2093 mmmbrains = false;
2094 }
2095 if (p == NULL)
2096 break;
2097 }
2098 next = LIST_NEXT(p, p_list);
2099 if ((p->p_flag & PK_MARKER) != 0)
2100 continue;
2101
2102 /*
2103 * Skip embryonic processes.
2104 */
2105 if (p->p_stat == SIDL)
2106 continue;
2107
2108 mutex_enter(p->p_lock);
2109 error = kauth_authorize_process(l->l_cred,
2110 KAUTH_PROCESS_CANSEE, p,
2111 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_EPROC), NULL, NULL);
2112 if (error != 0) {
2113 mutex_exit(p->p_lock);
2114 continue;
2115 }
2116
2117 /*
2118 * Hande all the operations in one switch on the cost of
2119 * algorithm complexity is on purpose. The win splitting this
2120 * function into several similar copies makes maintenance burden
2121 * burden, code grow and boost is neglible in practical systems.
2122 */
2123 switch (op) {
2124 case KERN_PROC_PID:
2125 match = (p->p_pid == (pid_t)arg);
2126 break;
2127
2128 case KERN_PROC_PGRP:
2129 match = (p->p_pgrp->pg_id == (pid_t)arg);
2130 break;
2131
2132 case KERN_PROC_SESSION:
2133 match = (p->p_session->s_sid == (pid_t)arg);
2134 break;
2135
2136 case KERN_PROC_TTY:
2137 match = true;
2138 if (arg == (int) KERN_PROC_TTY_REVOKE) {
2139 if ((p->p_lflag & PL_CONTROLT) == 0 ||
2140 p->p_session->s_ttyp == NULL ||
2141 p->p_session->s_ttyvp != NULL) {
2142 match = false;
2143 }
2144 } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
2145 p->p_session->s_ttyp == NULL) {
2146 if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
2147 match = false;
2148 }
2149 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
2150 match = false;
2151 }
2152 break;
2153
2154 case KERN_PROC_UID:
2155 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
2156 break;
2157
2158 case KERN_PROC_RUID:
2159 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
2160 break;
2161
2162 case KERN_PROC_GID:
2163 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
2164 break;
2165
2166 case KERN_PROC_RGID:
2167 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
2168 break;
2169
2170 case KERN_PROC_ALL:
2171 match = true;
2172 /* allow everything */
2173 break;
2174
2175 default:
2176 error = EINVAL;
2177 mutex_exit(p->p_lock);
2178 goto cleanup;
2179 }
2180 if (!match) {
2181 mutex_exit(p->p_lock);
2182 continue;
2183 }
2184
2185 /*
2186 * Grab a hold on the process.
2187 */
2188 if (mmmbrains) {
2189 zombie = true;
2190 } else {
2191 zombie = !rw_tryenter(&p->p_reflock, RW_READER);
2192 }
2193 if (zombie) {
2194 LIST_INSERT_AFTER(p, marker, p_list);
2195 }
2196
2197 if (buflen >= elem_size &&
2198 (type == KERN_PROC || elem_count > 0)) {
2199 ruspace(p); /* Update process vm resource use */
2200
2201 if (type == KERN_PROC) {
2202 fill_proc(p, &kbuf->kproc.kp_proc, allowaddr);
2203 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie,
2204 allowaddr);
2205 } else {
2206 fill_kproc2(p, &kbuf->kproc2, zombie,
2207 allowaddr);
2208 elem_count--;
2209 }
2210 mutex_exit(p->p_lock);
2211 mutex_exit(&proc_lock);
2212 /*
2213 * Copy out elem_size, but not larger than kelem_size
2214 */
2215 error = sysctl_copyout(l, kbuf, dp,
2216 uimin(kelem_size, elem_size));
2217 mutex_enter(&proc_lock);
2218 if (error) {
2219 goto bah;
2220 }
2221 dp += elem_size;
2222 buflen -= elem_size;
2223 } else {
2224 mutex_exit(p->p_lock);
2225 }
2226 needed += elem_size;
2227
2228 /*
2229 * Release reference to process.
2230 */
2231 if (zombie) {
2232 next = LIST_NEXT(marker, p_list);
2233 LIST_REMOVE(marker, p_list);
2234 } else {
2235 rw_exit(&p->p_reflock);
2236 next = LIST_NEXT(p, p_list);
2237 }
2238
2239 /*
2240 * Short-circuit break quickly!
2241 */
2242 if (op == KERN_PROC_PID)
2243 break;
2244 }
2245 mutex_exit(&proc_lock);
2246
2247 if (where != NULL) {
2248 *oldlenp = dp - where;
2249 if (needed > *oldlenp) {
2250 error = ENOMEM;
2251 goto out;
2252 }
2253 } else {
2254 needed += KERN_PROCSLOP;
2255 *oldlenp = needed;
2256 }
2257 kmem_free(kbuf, sizeof(*kbuf));
2258 kmem_free(marker, sizeof(*marker));
2259 sysctl_relock();
2260 return 0;
2261 bah:
2262 if (zombie)
2263 LIST_REMOVE(marker, p_list);
2264 else
2265 rw_exit(&p->p_reflock);
2266 cleanup:
2267 mutex_exit(&proc_lock);
2268 out:
2269 kmem_free(kbuf, sizeof(*kbuf));
2270 kmem_free(marker, sizeof(*marker));
2271 sysctl_relock();
2272 return error;
2273 }
2274
2275 int
2276 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
2277 {
2278 #if !defined(_RUMPKERNEL)
2279 int retval;
2280
2281 if (p->p_flag & PK_32) {
2282 MODULE_HOOK_CALL(kern_proc32_copyin_hook, (p, arginfo),
2283 enosys(), retval);
2284 return retval;
2285 }
2286 #endif /* !defined(_RUMPKERNEL) */
2287
2288 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
2289 }
2290
2291 static int
2292 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
2293 {
2294 void **cookie = cookie_;
2295 struct lwp *l = cookie[0];
2296 char *dst = cookie[1];
2297
2298 return sysctl_copyout(l, src, dst + off, len);
2299 }
2300
2301 /*
2302 * sysctl helper routine for kern.proc_args pseudo-subtree.
2303 */
2304 static int
2305 sysctl_kern_proc_args(SYSCTLFN_ARGS)
2306 {
2307 struct ps_strings pss;
2308 struct proc *p;
2309 pid_t pid;
2310 int type, error;
2311 void *cookie[2];
2312
2313 if (namelen == 1 && name[0] == CTL_QUERY)
2314 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2315
2316 if (newp != NULL || namelen != 2)
2317 return (EINVAL);
2318 pid = name[0];
2319 type = name[1];
2320
2321 switch (type) {
2322 case KERN_PROC_PATHNAME:
2323 sysctl_unlock();
2324 error = fill_pathname(l, pid, oldp, oldlenp);
2325 sysctl_relock();
2326 return error;
2327
2328 case KERN_PROC_CWD:
2329 sysctl_unlock();
2330 error = fill_cwd(l, pid, oldp, oldlenp);
2331 sysctl_relock();
2332 return error;
2333
2334 case KERN_PROC_ARGV:
2335 case KERN_PROC_NARGV:
2336 case KERN_PROC_ENV:
2337 case KERN_PROC_NENV:
2338 /* ok */
2339 break;
2340 default:
2341 return (EINVAL);
2342 }
2343
2344 sysctl_unlock();
2345
2346 /* check pid */
2347 mutex_enter(&proc_lock);
2348 if ((p = proc_find(pid)) == NULL) {
2349 error = EINVAL;
2350 goto out_locked;
2351 }
2352 mutex_enter(p->p_lock);
2353
2354 /* Check permission. */
2355 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
2356 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2357 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
2358 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
2359 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
2360 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
2361 else
2362 error = EINVAL; /* XXXGCC */
2363 if (error) {
2364 mutex_exit(p->p_lock);
2365 goto out_locked;
2366 }
2367
2368 if (oldp == NULL) {
2369 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
2370 *oldlenp = sizeof (int);
2371 else
2372 *oldlenp = ARG_MAX; /* XXX XXX XXX */
2373 error = 0;
2374 mutex_exit(p->p_lock);
2375 goto out_locked;
2376 }
2377
2378 /*
2379 * Zombies don't have a stack, so we can't read their psstrings.
2380 * System processes also don't have a user stack.
2381 */
2382 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
2383 error = EINVAL;
2384 mutex_exit(p->p_lock);
2385 goto out_locked;
2386 }
2387
2388 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
2389 mutex_exit(p->p_lock);
2390 if (error) {
2391 goto out_locked;
2392 }
2393 mutex_exit(&proc_lock);
2394
2395 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
2396 int value;
2397 if ((error = copyin_psstrings(p, &pss)) == 0) {
2398 if (type == KERN_PROC_NARGV)
2399 value = pss.ps_nargvstr;
2400 else
2401 value = pss.ps_nenvstr;
2402 error = sysctl_copyout(l, &value, oldp, sizeof(value));
2403 *oldlenp = sizeof(value);
2404 }
2405 } else {
2406 cookie[0] = l;
2407 cookie[1] = oldp;
2408 error = copy_procargs(p, type, oldlenp,
2409 copy_procargs_sysctl_cb, cookie);
2410 }
2411 rw_exit(&p->p_reflock);
2412 sysctl_relock();
2413 return error;
2414
2415 out_locked:
2416 mutex_exit(&proc_lock);
2417 sysctl_relock();
2418 return error;
2419 }
2420
2421 int
2422 copy_procargs(struct proc *p, int oid, size_t *limit,
2423 int (*cb)(void *, const void *, size_t, size_t), void *cookie)
2424 {
2425 struct ps_strings pss;
2426 size_t len, i, loaded, entry_len;
2427 struct uio auio;
2428 struct iovec aiov;
2429 int error, argvlen;
2430 char *arg;
2431 char **argv;
2432 vaddr_t user_argv;
2433 struct vmspace *vmspace;
2434
2435 /*
2436 * Allocate a temporary buffer to hold the argument vector and
2437 * the arguments themselve.
2438 */
2439 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2440 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2441
2442 /*
2443 * Lock the process down in memory.
2444 */
2445 vmspace = p->p_vmspace;
2446 uvmspace_addref(vmspace);
2447
2448 /*
2449 * Read in the ps_strings structure.
2450 */
2451 if ((error = copyin_psstrings(p, &pss)) != 0)
2452 goto done;
2453
2454 /*
2455 * Now read the address of the argument vector.
2456 */
2457 switch (oid) {
2458 case KERN_PROC_ARGV:
2459 user_argv = (uintptr_t)pss.ps_argvstr;
2460 argvlen = pss.ps_nargvstr;
2461 break;
2462 case KERN_PROC_ENV:
2463 user_argv = (uintptr_t)pss.ps_envstr;
2464 argvlen = pss.ps_nenvstr;
2465 break;
2466 default:
2467 error = EINVAL;
2468 goto done;
2469 }
2470
2471 if (argvlen < 0) {
2472 error = EIO;
2473 goto done;
2474 }
2475
2476
2477 /*
2478 * Now copy each string.
2479 */
2480 len = 0; /* bytes written to user buffer */
2481 loaded = 0; /* bytes from argv already processed */
2482 i = 0; /* To make compiler happy */
2483 entry_len = PROC_PTRSZ(p);
2484
2485 for (; argvlen; --argvlen) {
2486 int finished = 0;
2487 vaddr_t base;
2488 size_t xlen;
2489 int j;
2490
2491 if (loaded == 0) {
2492 size_t rem = entry_len * argvlen;
2493 loaded = MIN(rem, PAGE_SIZE);
2494 error = copyin_vmspace(vmspace,
2495 (const void *)user_argv, argv, loaded);
2496 if (error)
2497 break;
2498 user_argv += loaded;
2499 i = 0;
2500 }
2501
2502 #if !defined(_RUMPKERNEL)
2503 if (p->p_flag & PK_32)
2504 MODULE_HOOK_CALL(kern_proc32_base_hook,
2505 (argv, i++), 0, base);
2506 else
2507 #endif /* !defined(_RUMPKERNEL) */
2508 base = (vaddr_t)argv[i++];
2509 loaded -= entry_len;
2510
2511 /*
2512 * The program has messed around with its arguments,
2513 * possibly deleting some, and replacing them with
2514 * NULL's. Treat this as the last argument and not
2515 * a failure.
2516 */
2517 if (base == 0)
2518 break;
2519
2520 while (!finished) {
2521 xlen = PAGE_SIZE - (base & PAGE_MASK);
2522
2523 aiov.iov_base = arg;
2524 aiov.iov_len = PAGE_SIZE;
2525 auio.uio_iov = &aiov;
2526 auio.uio_iovcnt = 1;
2527 auio.uio_offset = base;
2528 auio.uio_resid = xlen;
2529 auio.uio_rw = UIO_READ;
2530 UIO_SETUP_SYSSPACE(&auio);
2531 error = uvm_io(&vmspace->vm_map, &auio, 0);
2532 if (error)
2533 goto done;
2534
2535 /* Look for the end of the string */
2536 for (j = 0; j < xlen; j++) {
2537 if (arg[j] == '\0') {
2538 xlen = j + 1;
2539 finished = 1;
2540 break;
2541 }
2542 }
2543
2544 /* Check for user buffer overflow */
2545 if (len + xlen > *limit) {
2546 finished = 1;
2547 if (len > *limit)
2548 xlen = 0;
2549 else
2550 xlen = *limit - len;
2551 }
2552
2553 /* Copyout the page */
2554 error = (*cb)(cookie, arg, len, xlen);
2555 if (error)
2556 goto done;
2557
2558 len += xlen;
2559 base += xlen;
2560 }
2561 }
2562 *limit = len;
2563
2564 done:
2565 kmem_free(argv, PAGE_SIZE);
2566 kmem_free(arg, PAGE_SIZE);
2567 uvmspace_free(vmspace);
2568 return error;
2569 }
2570
2571 /*
2572 * Fill in a proc structure for the specified process.
2573 */
2574 static void
2575 fill_proc(const struct proc *psrc, struct proc *p, bool allowaddr)
2576 {
2577 COND_SET_STRUCT(p->p_list, psrc->p_list, allowaddr);
2578 memset(&p->p_auxlock, 0, sizeof(p->p_auxlock));
2579 COND_SET_STRUCT(p->p_lock, psrc->p_lock, allowaddr);
2580 memset(&p->p_stmutex, 0, sizeof(p->p_stmutex));
2581 memset(&p->p_reflock, 0, sizeof(p->p_reflock));
2582 COND_SET_STRUCT(p->p_waitcv, psrc->p_waitcv, allowaddr);
2583 COND_SET_STRUCT(p->p_lwpcv, psrc->p_lwpcv, allowaddr);
2584 COND_SET_PTR(p->p_cred, psrc->p_cred, allowaddr);
2585 COND_SET_PTR(p->p_fd, psrc->p_fd, allowaddr);
2586 COND_SET_PTR(p->p_cwdi, psrc->p_cwdi, allowaddr);
2587 COND_SET_PTR(p->p_stats, psrc->p_stats, allowaddr);
2588 COND_SET_PTR(p->p_limit, psrc->p_limit, allowaddr);
2589 COND_SET_PTR(p->p_vmspace, psrc->p_vmspace, allowaddr);
2590 COND_SET_PTR(p->p_sigacts, psrc->p_sigacts, allowaddr);
2591 COND_SET_PTR(p->p_aio, psrc->p_aio, allowaddr);
2592 p->p_mqueue_cnt = psrc->p_mqueue_cnt;
2593 memset(&p->p_specdataref, 0, sizeof(p->p_specdataref));
2594 p->p_exitsig = psrc->p_exitsig;
2595 p->p_flag = psrc->p_flag;
2596 p->p_sflag = psrc->p_sflag;
2597 p->p_slflag = psrc->p_slflag;
2598 p->p_lflag = psrc->p_lflag;
2599 p->p_stflag = psrc->p_stflag;
2600 p->p_stat = psrc->p_stat;
2601 p->p_trace_enabled = psrc->p_trace_enabled;
2602 p->p_pid = psrc->p_pid;
2603 COND_SET_STRUCT(p->p_pglist, psrc->p_pglist, allowaddr);
2604 COND_SET_PTR(p->p_pptr, psrc->p_pptr, allowaddr);
2605 COND_SET_STRUCT(p->p_sibling, psrc->p_sibling, allowaddr);
2606 COND_SET_STRUCT(p->p_children, psrc->p_children, allowaddr);
2607 COND_SET_STRUCT(p->p_lwps, psrc->p_lwps, allowaddr);
2608 COND_SET_PTR(p->p_raslist, psrc->p_raslist, allowaddr);
2609 p->p_nlwps = psrc->p_nlwps;
2610 p->p_nzlwps = psrc->p_nzlwps;
2611 p->p_nrlwps = psrc->p_nrlwps;
2612 p->p_nlwpwait = psrc->p_nlwpwait;
2613 p->p_ndlwps = psrc->p_ndlwps;
2614 p->p_nstopchild = psrc->p_nstopchild;
2615 p->p_waited = psrc->p_waited;
2616 COND_SET_PTR(p->p_zomblwp, psrc->p_zomblwp, allowaddr);
2617 COND_SET_PTR(p->p_vforklwp, psrc->p_vforklwp, allowaddr);
2618 COND_SET_PTR(p->p_sched_info, psrc->p_sched_info, allowaddr);
2619 p->p_estcpu = psrc->p_estcpu;
2620 p->p_estcpu_inherited = psrc->p_estcpu_inherited;
2621 p->p_forktime = psrc->p_forktime;
2622 p->p_pctcpu = psrc->p_pctcpu;
2623 COND_SET_PTR(p->p_opptr, psrc->p_opptr, allowaddr);
2624 COND_SET_PTR(p->p_timers, psrc->p_timers, allowaddr);
2625 p->p_rtime = psrc->p_rtime;
2626 p->p_uticks = psrc->p_uticks;
2627 p->p_sticks = psrc->p_sticks;
2628 p->p_iticks = psrc->p_iticks;
2629 p->p_xutime = psrc->p_xutime;
2630 p->p_xstime = psrc->p_xstime;
2631 p->p_traceflag = psrc->p_traceflag;
2632 COND_SET_PTR(p->p_tracep, psrc->p_tracep, allowaddr);
2633 COND_SET_PTR(p->p_textvp, psrc->p_textvp, allowaddr);
2634 COND_SET_PTR(p->p_emul, psrc->p_emul, allowaddr);
2635 COND_SET_PTR(p->p_emuldata, psrc->p_emuldata, allowaddr);
2636 COND_SET_CPTR(p->p_execsw, psrc->p_execsw, allowaddr);
2637 COND_SET_STRUCT(p->p_klist, psrc->p_klist, allowaddr);
2638 COND_SET_STRUCT(p->p_sigwaiters, psrc->p_sigwaiters, allowaddr);
2639 COND_SET_STRUCT(p->p_sigpend.sp_info, psrc->p_sigpend.sp_info,
2640 allowaddr);
2641 p->p_sigpend.sp_set = psrc->p_sigpend.sp_set;
2642 COND_SET_PTR(p->p_lwpctl, psrc->p_lwpctl, allowaddr);
2643 p->p_ppid = psrc->p_ppid;
2644 p->p_oppid = psrc->p_oppid;
2645 COND_SET_PTR(p->p_path, psrc->p_path, allowaddr);
2646 p->p_sigctx = psrc->p_sigctx;
2647 p->p_nice = psrc->p_nice;
2648 memcpy(p->p_comm, psrc->p_comm, sizeof(p->p_comm));
2649 COND_SET_PTR(p->p_pgrp, psrc->p_pgrp, allowaddr);
2650 COND_SET_VALUE(p->p_psstrp, psrc->p_psstrp, allowaddr);
2651 p->p_pax = psrc->p_pax;
2652 p->p_xexit = psrc->p_xexit;
2653 p->p_xsig = psrc->p_xsig;
2654 p->p_acflag = psrc->p_acflag;
2655 COND_SET_STRUCT(p->p_md, psrc->p_md, allowaddr);
2656 p->p_stackbase = psrc->p_stackbase;
2657 COND_SET_PTR(p->p_dtrace, psrc->p_dtrace, allowaddr);
2658 }
2659
2660 /*
2661 * Fill in an eproc structure for the specified process.
2662 */
2663 void
2664 fill_eproc(struct proc *p, struct eproc *ep, bool zombie, bool allowaddr)
2665 {
2666 struct tty *tp;
2667 struct lwp *l;
2668
2669 KASSERT(mutex_owned(&proc_lock));
2670 KASSERT(mutex_owned(p->p_lock));
2671
2672 COND_SET_PTR(ep->e_paddr, p, allowaddr);
2673 COND_SET_PTR(ep->e_sess, p->p_session, allowaddr);
2674 if (p->p_cred) {
2675 kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2676 kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2677 }
2678 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2679 struct vmspace *vm = p->p_vmspace;
2680
2681 ep->e_vm.vm_rssize = vm_resident_count(vm);
2682 ep->e_vm.vm_tsize = vm->vm_tsize;
2683 ep->e_vm.vm_dsize = vm->vm_dsize;
2684 ep->e_vm.vm_ssize = vm->vm_ssize;
2685 ep->e_vm.vm_map.size = vm->vm_map.size;
2686
2687 /* Pick the primary (first) LWP */
2688 l = proc_active_lwp(p);
2689 KASSERT(l != NULL);
2690 lwp_lock(l);
2691 if (l->l_wchan)
2692 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2693 lwp_unlock(l);
2694 }
2695 ep->e_ppid = p->p_ppid;
2696 if (p->p_pgrp && p->p_session) {
2697 ep->e_pgid = p->p_pgrp->pg_id;
2698 ep->e_jobc = p->p_pgrp->pg_jobc;
2699 ep->e_sid = p->p_session->s_sid;
2700 if ((p->p_lflag & PL_CONTROLT) &&
2701 (tp = p->p_session->s_ttyp)) {
2702 ep->e_tdev = tp->t_dev;
2703 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2704 COND_SET_PTR(ep->e_tsess, tp->t_session, allowaddr);
2705 } else
2706 ep->e_tdev = (uint32_t)NODEV;
2707 ep->e_flag = p->p_session->s_ttyvp ? EPROC_CTTY : 0;
2708 if (SESS_LEADER(p))
2709 ep->e_flag |= EPROC_SLEADER;
2710 strncpy(ep->e_login, p->p_session->s_login, MAXLOGNAME);
2711 }
2712 ep->e_xsize = ep->e_xrssize = 0;
2713 ep->e_xccount = ep->e_xswrss = 0;
2714 }
2715
2716 /*
2717 * Fill in a kinfo_proc2 structure for the specified process.
2718 */
2719 void
2720 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie, bool allowaddr)
2721 {
2722 struct tty *tp;
2723 struct lwp *l, *l2;
2724 struct timeval ut, st, rt;
2725 sigset_t ss1, ss2;
2726 struct rusage ru;
2727 struct vmspace *vm;
2728
2729 KASSERT(mutex_owned(&proc_lock));
2730 KASSERT(mutex_owned(p->p_lock));
2731
2732 sigemptyset(&ss1);
2733 sigemptyset(&ss2);
2734
2735 COND_SET_VALUE(ki->p_paddr, PTRTOUINT64(p), allowaddr);
2736 COND_SET_VALUE(ki->p_fd, PTRTOUINT64(p->p_fd), allowaddr);
2737 COND_SET_VALUE(ki->p_cwdi, PTRTOUINT64(p->p_cwdi), allowaddr);
2738 COND_SET_VALUE(ki->p_stats, PTRTOUINT64(p->p_stats), allowaddr);
2739 COND_SET_VALUE(ki->p_limit, PTRTOUINT64(p->p_limit), allowaddr);
2740 COND_SET_VALUE(ki->p_vmspace, PTRTOUINT64(p->p_vmspace), allowaddr);
2741 COND_SET_VALUE(ki->p_sigacts, PTRTOUINT64(p->p_sigacts), allowaddr);
2742 COND_SET_VALUE(ki->p_sess, PTRTOUINT64(p->p_session), allowaddr);
2743 ki->p_tsess = 0; /* may be changed if controlling tty below */
2744 COND_SET_VALUE(ki->p_ru, PTRTOUINT64(&p->p_stats->p_ru), allowaddr);
2745 ki->p_eflag = 0;
2746 ki->p_exitsig = p->p_exitsig;
2747 ki->p_flag = L_INMEM; /* Process never swapped out */
2748 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2749 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2750 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2751 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2752 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2753 ki->p_pid = p->p_pid;
2754 ki->p_ppid = p->p_ppid;
2755 ki->p_uid = kauth_cred_geteuid(p->p_cred);
2756 ki->p_ruid = kauth_cred_getuid(p->p_cred);
2757 ki->p_gid = kauth_cred_getegid(p->p_cred);
2758 ki->p_rgid = kauth_cred_getgid(p->p_cred);
2759 ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2760 ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2761 ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2762 kauth_cred_getgroups(p->p_cred, ki->p_groups,
2763 uimin(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
2764 UIO_SYSSPACE);
2765
2766 ki->p_uticks = p->p_uticks;
2767 ki->p_sticks = p->p_sticks;
2768 ki->p_iticks = p->p_iticks;
2769 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */
2770 COND_SET_VALUE(ki->p_tracep, PTRTOUINT64(p->p_tracep), allowaddr);
2771 ki->p_traceflag = p->p_traceflag;
2772
2773 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2774 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2775
2776 ki->p_cpticks = 0;
2777 ki->p_pctcpu = p->p_pctcpu;
2778 ki->p_estcpu = 0;
2779 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
2780 ki->p_realstat = p->p_stat;
2781 ki->p_nice = p->p_nice;
2782 ki->p_xstat = P_WAITSTATUS(p);
2783 ki->p_acflag = p->p_acflag;
2784
2785 strncpy(ki->p_comm, p->p_comm,
2786 uimin(sizeof(ki->p_comm), sizeof(p->p_comm)));
2787 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2788
2789 ki->p_nlwps = p->p_nlwps;
2790 ki->p_realflag = ki->p_flag;
2791
2792 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2793 vm = p->p_vmspace;
2794 ki->p_vm_rssize = vm_resident_count(vm);
2795 ki->p_vm_tsize = vm->vm_tsize;
2796 ki->p_vm_dsize = vm->vm_dsize;
2797 ki->p_vm_ssize = vm->vm_ssize;
2798 ki->p_vm_vsize = atop(vm->vm_map.size);
2799 /*
2800 * Since the stack is initially mapped mostly with
2801 * PROT_NONE and grown as needed, adjust the "mapped size"
2802 * to skip the unused stack portion.
2803 */
2804 ki->p_vm_msize =
2805 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2806
2807 /* Pick the primary (first) LWP */
2808 l = proc_active_lwp(p);
2809 KASSERT(l != NULL);
2810 lwp_lock(l);
2811 ki->p_nrlwps = p->p_nrlwps;
2812 ki->p_forw = 0;
2813 ki->p_back = 0;
2814 COND_SET_VALUE(ki->p_addr, PTRTOUINT64(l->l_addr), allowaddr);
2815 ki->p_stat = l->l_stat;
2816 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2817 ki->p_swtime = l->l_swtime;
2818 ki->p_slptime = l->l_slptime;
2819 if (l->l_stat == LSONPROC)
2820 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2821 else
2822 ki->p_schedflags = 0;
2823 ki->p_priority = lwp_eprio(l);
2824 ki->p_usrpri = l->l_priority;
2825 if (l->l_wchan)
2826 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2827 COND_SET_VALUE(ki->p_wchan, PTRTOUINT64(l->l_wchan), allowaddr);
2828 ki->p_cpuid = cpu_index(l->l_cpu);
2829 lwp_unlock(l);
2830 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2831 /* This is hardly correct, but... */
2832 sigplusset(&l->l_sigpend.sp_set, &ss1);
2833 sigplusset(&l->l_sigmask, &ss2);
2834 ki->p_cpticks += l->l_cpticks;
2835 ki->p_pctcpu += l->l_pctcpu;
2836 ki->p_estcpu += l->l_estcpu;
2837 }
2838 }
2839 sigplusset(&p->p_sigpend.sp_set, &ss1);
2840 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2841 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2842
2843 if (p->p_session != NULL) {
2844 ki->p_sid = p->p_session->s_sid;
2845 ki->p__pgid = p->p_pgrp->pg_id;
2846 if (p->p_session->s_ttyvp)
2847 ki->p_eflag |= EPROC_CTTY;
2848 if (SESS_LEADER(p))
2849 ki->p_eflag |= EPROC_SLEADER;
2850 strncpy(ki->p_login, p->p_session->s_login,
2851 uimin(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
2852 ki->p_jobc = p->p_pgrp->pg_jobc;
2853 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2854 ki->p_tdev = tp->t_dev;
2855 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2856 COND_SET_VALUE(ki->p_tsess, PTRTOUINT64(tp->t_session),
2857 allowaddr);
2858 } else {
2859 ki->p_tdev = (int32_t)NODEV;
2860 }
2861 }
2862
2863 if (!P_ZOMBIE(p) && !zombie) {
2864 ki->p_uvalid = 1;
2865 ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2866 ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2867
2868 calcru(p, &ut, &st, NULL, &rt);
2869 ki->p_rtime_sec = rt.tv_sec;
2870 ki->p_rtime_usec = rt.tv_usec;
2871 ki->p_uutime_sec = ut.tv_sec;
2872 ki->p_uutime_usec = ut.tv_usec;
2873 ki->p_ustime_sec = st.tv_sec;
2874 ki->p_ustime_usec = st.tv_usec;
2875
2876 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2877 ki->p_uru_nvcsw = 0;
2878 ki->p_uru_nivcsw = 0;
2879 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
2880 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
2881 ki->p_uru_nivcsw += l2->l_nivcsw;
2882 ruadd(&ru, &l2->l_ru);
2883 }
2884 ki->p_uru_maxrss = ru.ru_maxrss;
2885 ki->p_uru_ixrss = ru.ru_ixrss;
2886 ki->p_uru_idrss = ru.ru_idrss;
2887 ki->p_uru_isrss = ru.ru_isrss;
2888 ki->p_uru_minflt = ru.ru_minflt;
2889 ki->p_uru_majflt = ru.ru_majflt;
2890 ki->p_uru_nswap = ru.ru_nswap;
2891 ki->p_uru_inblock = ru.ru_inblock;
2892 ki->p_uru_oublock = ru.ru_oublock;
2893 ki->p_uru_msgsnd = ru.ru_msgsnd;
2894 ki->p_uru_msgrcv = ru.ru_msgrcv;
2895 ki->p_uru_nsignals = ru.ru_nsignals;
2896
2897 timeradd(&p->p_stats->p_cru.ru_utime,
2898 &p->p_stats->p_cru.ru_stime, &ut);
2899 ki->p_uctime_sec = ut.tv_sec;
2900 ki->p_uctime_usec = ut.tv_usec;
2901 }
2902 }
2903
2904
2905 int
2906 proc_find_locked(struct lwp *l, struct proc **p, pid_t pid)
2907 {
2908 int error;
2909
2910 mutex_enter(&proc_lock);
2911 if (pid == -1)
2912 *p = l->l_proc;
2913 else
2914 *p = proc_find(pid);
2915
2916 if (*p == NULL) {
2917 if (pid != -1)
2918 mutex_exit(&proc_lock);
2919 return ESRCH;
2920 }
2921 if (pid != -1)
2922 mutex_enter((*p)->p_lock);
2923 mutex_exit(&proc_lock);
2924
2925 error = kauth_authorize_process(l->l_cred,
2926 KAUTH_PROCESS_CANSEE, *p,
2927 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
2928 if (error) {
2929 if (pid != -1)
2930 mutex_exit((*p)->p_lock);
2931 }
2932 return error;
2933 }
2934
2935 static int
2936 fill_pathname(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
2937 {
2938 int error;
2939 struct proc *p;
2940
2941 if ((error = proc_find_locked(l, &p, pid)) != 0)
2942 return error;
2943
2944 if (p->p_path == NULL) {
2945 if (pid != -1)
2946 mutex_exit(p->p_lock);
2947 return ENOENT;
2948 }
2949
2950 size_t len = strlen(p->p_path) + 1;
2951 if (oldp != NULL) {
2952 size_t copylen = uimin(len, *oldlenp);
2953 error = sysctl_copyout(l, p->p_path, oldp, copylen);
2954 if (error == 0 && *oldlenp < len)
2955 error = ENOSPC;
2956 }
2957 *oldlenp = len;
2958 if (pid != -1)
2959 mutex_exit(p->p_lock);
2960 return error;
2961 }
2962
2963 static int
2964 fill_cwd(struct lwp *l, pid_t pid, void *oldp, size_t *oldlenp)
2965 {
2966 int error;
2967 struct proc *p;
2968 char *path;
2969 char *bp, *bend;
2970 struct cwdinfo *cwdi;
2971 struct vnode *vp;
2972 size_t len, lenused;
2973
2974 if ((error = proc_find_locked(l, &p, pid)) != 0)
2975 return error;
2976
2977 len = MAXPATHLEN * 4;
2978
2979 path = kmem_alloc(len, KM_SLEEP);
2980
2981 bp = &path[len];
2982 bend = bp;
2983 *(--bp) = '\0';
2984
2985 cwdi = p->p_cwdi;
2986 rw_enter(&cwdi->cwdi_lock, RW_READER);
2987 vp = cwdi->cwdi_cdir;
2988 error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
2989 rw_exit(&cwdi->cwdi_lock);
2990
2991 if (error)
2992 goto out;
2993
2994 lenused = bend - bp;
2995
2996 if (oldp != NULL) {
2997 size_t copylen = uimin(lenused, *oldlenp);
2998 error = sysctl_copyout(l, bp, oldp, copylen);
2999 if (error == 0 && *oldlenp < lenused)
3000 error = ENOSPC;
3001 }
3002 *oldlenp = lenused;
3003 out:
3004 if (pid != -1)
3005 mutex_exit(p->p_lock);
3006 kmem_free(path, len);
3007 return error;
3008 }
3009
3010 int
3011 proc_getauxv(struct proc *p, void **buf, size_t *len)
3012 {
3013 struct ps_strings pss;
3014 int error;
3015 void *uauxv, *kauxv;
3016 size_t size;
3017
3018 if ((error = copyin_psstrings(p, &pss)) != 0)
3019 return error;
3020 if (pss.ps_envstr == NULL)
3021 return EIO;
3022
3023 size = p->p_execsw->es_arglen;
3024 if (size == 0)
3025 return EIO;
3026
3027 size_t ptrsz = PROC_PTRSZ(p);
3028 uauxv = (void *)((char *)pss.ps_envstr + (pss.ps_nenvstr + 1) * ptrsz);
3029
3030 kauxv = kmem_alloc(size, KM_SLEEP);
3031
3032 error = copyin_proc(p, uauxv, kauxv, size);
3033 if (error) {
3034 kmem_free(kauxv, size);
3035 return error;
3036 }
3037
3038 *buf = kauxv;
3039 *len = size;
3040
3041 return 0;
3042 }
3043
3044
3045 static int
3046 sysctl_security_expose_address(SYSCTLFN_ARGS)
3047 {
3048 int expose_address, error;
3049 struct sysctlnode node;
3050
3051 node = *rnode;
3052 node.sysctl_data = &expose_address;
3053 expose_address = *(int *)rnode->sysctl_data;
3054 error = sysctl_lookup(SYSCTLFN_CALL(&node));
3055 if (error || newp == NULL)
3056 return error;
3057
3058 if (kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_KERNADDR,
3059 0, NULL, NULL, NULL))
3060 return EPERM;
3061
3062 switch (expose_address) {
3063 case 0:
3064 case 1:
3065 case 2:
3066 break;
3067 default:
3068 return EINVAL;
3069 }
3070
3071 *(int *)rnode->sysctl_data = expose_address;
3072
3073 return 0;
3074 }
3075
3076 bool
3077 get_expose_address(struct proc *p)
3078 {
3079 /* allow only if sysctl variable is set or privileged */
3080 return kauth_authorize_process(kauth_cred_get(), KAUTH_PROCESS_CANSEE,
3081 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_KPTR), NULL, NULL) == 0;
3082 }
3083