kern_proc.c revision 1.187 1 /* $NetBSD: kern_proc.c,v 1.187 2013/06/10 14:53:52 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.187 2013/06/10 14:53:52 pooka Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_kstack.h"
69 #include "opt_maxuprc.h"
70 #include "opt_dtrace.h"
71 #include "opt_compat_netbsd32.h"
72 #endif
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/proc.h>
78 #include <sys/resourcevar.h>
79 #include <sys/buf.h>
80 #include <sys/acct.h>
81 #include <sys/wait.h>
82 #include <sys/file.h>
83 #include <ufs/ufs/quota.h>
84 #include <sys/uio.h>
85 #include <sys/pool.h>
86 #include <sys/pset.h>
87 #include <sys/mbuf.h>
88 #include <sys/ioctl.h>
89 #include <sys/tty.h>
90 #include <sys/signalvar.h>
91 #include <sys/ras.h>
92 #include <sys/filedesc.h>
93 #include <sys/syscall_stats.h>
94 #include <sys/kauth.h>
95 #include <sys/sleepq.h>
96 #include <sys/atomic.h>
97 #include <sys/kmem.h>
98 #include <sys/dtrace_bsd.h>
99 #include <sys/sysctl.h>
100 #include <sys/exec.h>
101 #include <sys/cpu.h>
102
103 #include <uvm/uvm_extern.h>
104
105 #ifdef COMPAT_NETBSD32
106 #include <compat/netbsd32/netbsd32.h>
107 #endif
108
109 /*
110 * Process lists.
111 */
112
113 struct proclist allproc __cacheline_aligned;
114 struct proclist zombproc __cacheline_aligned;
115
116 kmutex_t * proc_lock __cacheline_aligned;
117
118 /*
119 * pid to proc lookup is done by indexing the pid_table array.
120 * Since pid numbers are only allocated when an empty slot
121 * has been found, there is no need to search any lists ever.
122 * (an orphaned pgrp will lock the slot, a session will lock
123 * the pgrp with the same number.)
124 * If the table is too small it is reallocated with twice the
125 * previous size and the entries 'unzipped' into the two halves.
126 * A linked list of free entries is passed through the pt_proc
127 * field of 'free' items - set odd to be an invalid ptr.
128 */
129
130 struct pid_table {
131 struct proc *pt_proc;
132 struct pgrp *pt_pgrp;
133 pid_t pt_pid;
134 };
135 #if 1 /* strongly typed cast - should be a noop */
136 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
137 #else
138 #define p2u(p) ((uint)p)
139 #endif
140 #define P_VALID(p) (!(p2u(p) & 1))
141 #define P_NEXT(p) (p2u(p) >> 1)
142 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
143
144 /*
145 * Table of process IDs (PIDs).
146 */
147 static struct pid_table *pid_table __read_mostly;
148
149 #define INITIAL_PID_TABLE_SIZE (1 << 5)
150
151 /* Table mask, threshold for growing and number of allocated PIDs. */
152 static u_int pid_tbl_mask __read_mostly;
153 static u_int pid_alloc_lim __read_mostly;
154 static u_int pid_alloc_cnt __cacheline_aligned;
155
156 /* Next free, last free and maximum PIDs. */
157 static u_int next_free_pt __cacheline_aligned;
158 static u_int last_free_pt __cacheline_aligned;
159 static pid_t pid_max __read_mostly;
160
161 /* Components of the first process -- never freed. */
162
163 extern struct emul emul_netbsd; /* defined in kern_exec.c */
164
165 struct session session0 = {
166 .s_count = 1,
167 .s_sid = 0,
168 };
169 struct pgrp pgrp0 = {
170 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
171 .pg_session = &session0,
172 };
173 filedesc_t filedesc0;
174 struct cwdinfo cwdi0 = {
175 .cwdi_cmask = CMASK,
176 .cwdi_refcnt = 1,
177 };
178 struct plimit limit0;
179 struct pstats pstat0;
180 struct vmspace vmspace0;
181 struct sigacts sigacts0;
182 struct proc proc0 = {
183 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
184 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
185 .p_nlwps = 1,
186 .p_nrlwps = 1,
187 .p_nlwpid = 1, /* must match lwp0.l_lid */
188 .p_pgrp = &pgrp0,
189 .p_comm = "system",
190 /*
191 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
192 * when they exit. init(8) can easily wait them out for us.
193 */
194 .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
195 .p_stat = SACTIVE,
196 .p_nice = NZERO,
197 .p_emul = &emul_netbsd,
198 .p_cwdi = &cwdi0,
199 .p_limit = &limit0,
200 .p_fd = &filedesc0,
201 .p_vmspace = &vmspace0,
202 .p_stats = &pstat0,
203 .p_sigacts = &sigacts0,
204 };
205 kauth_cred_t cred0;
206
207 static const int nofile = NOFILE;
208 static const int maxuprc = MAXUPRC;
209
210 static int sysctl_doeproc(SYSCTLFN_PROTO);
211 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
212 static void fill_kproc2(struct proc *, struct kinfo_proc2 *, bool);
213
214 /*
215 * The process list descriptors, used during pid allocation and
216 * by sysctl. No locking on this data structure is needed since
217 * it is completely static.
218 */
219 const struct proclist_desc proclists[] = {
220 { &allproc },
221 { &zombproc },
222 { NULL },
223 };
224
225 static struct pgrp * pg_remove(pid_t);
226 static void pg_delete(pid_t);
227 static void orphanpg(struct pgrp *);
228
229 static specificdata_domain_t proc_specificdata_domain;
230
231 static pool_cache_t proc_cache;
232
233 static kauth_listener_t proc_listener;
234
235 static int
236 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
237 void *arg0, void *arg1, void *arg2, void *arg3)
238 {
239 struct proc *p;
240 int result;
241
242 result = KAUTH_RESULT_DEFER;
243 p = arg0;
244
245 switch (action) {
246 case KAUTH_PROCESS_CANSEE: {
247 enum kauth_process_req req;
248
249 req = (enum kauth_process_req)arg1;
250
251 switch (req) {
252 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
253 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
254 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
255 result = KAUTH_RESULT_ALLOW;
256
257 break;
258
259 case KAUTH_REQ_PROCESS_CANSEE_ENV:
260 if (kauth_cred_getuid(cred) !=
261 kauth_cred_getuid(p->p_cred) ||
262 kauth_cred_getuid(cred) !=
263 kauth_cred_getsvuid(p->p_cred))
264 break;
265
266 result = KAUTH_RESULT_ALLOW;
267
268 break;
269
270 default:
271 break;
272 }
273
274 break;
275 }
276
277 case KAUTH_PROCESS_FORK: {
278 int lnprocs = (int)(unsigned long)arg2;
279
280 /*
281 * Don't allow a nonprivileged user to use the last few
282 * processes. The variable lnprocs is the current number of
283 * processes, maxproc is the limit.
284 */
285 if (__predict_false((lnprocs >= maxproc - 5)))
286 break;
287
288 result = KAUTH_RESULT_ALLOW;
289
290 break;
291 }
292
293 case KAUTH_PROCESS_CORENAME:
294 case KAUTH_PROCESS_STOPFLAG:
295 if (proc_uidmatch(cred, p->p_cred) == 0)
296 result = KAUTH_RESULT_ALLOW;
297
298 break;
299
300 default:
301 break;
302 }
303
304 return result;
305 }
306
307 /*
308 * Initialize global process hashing structures.
309 */
310 void
311 procinit(void)
312 {
313 const struct proclist_desc *pd;
314 u_int i;
315 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
316
317 for (pd = proclists; pd->pd_list != NULL; pd++)
318 LIST_INIT(pd->pd_list);
319
320 proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
321 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
322 * sizeof(struct pid_table), KM_SLEEP);
323 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
324 pid_max = PID_MAX;
325
326 /* Set free list running through table...
327 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
328 for (i = 0; i <= pid_tbl_mask; i++) {
329 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
330 pid_table[i].pt_pgrp = 0;
331 pid_table[i].pt_pid = 0;
332 }
333 /* slot 0 is just grabbed */
334 next_free_pt = 1;
335 /* Need to fix last entry. */
336 last_free_pt = pid_tbl_mask;
337 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
338 /* point at which we grow table - to avoid reusing pids too often */
339 pid_alloc_lim = pid_tbl_mask - 1;
340 #undef LINK_EMPTY
341
342 proc_specificdata_domain = specificdata_domain_create();
343 KASSERT(proc_specificdata_domain != NULL);
344
345 proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0,
346 "procpl", NULL, IPL_NONE, NULL, NULL, NULL);
347
348 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
349 proc_listener_cb, NULL);
350 }
351
352 void
353 procinit_sysctl(void)
354 {
355 static struct sysctllog *clog;
356
357 sysctl_createv(&clog, 0, NULL, NULL,
358 CTLFLAG_PERMANENT,
359 CTLTYPE_NODE, "kern", NULL,
360 NULL, 0, NULL, 0,
361 CTL_KERN, CTL_EOL);
362
363 sysctl_createv(&clog, 0, NULL, NULL,
364 CTLFLAG_PERMANENT,
365 CTLTYPE_NODE, "proc",
366 SYSCTL_DESCR("System-wide process information"),
367 sysctl_doeproc, 0, NULL, 0,
368 CTL_KERN, KERN_PROC, CTL_EOL);
369 sysctl_createv(&clog, 0, NULL, NULL,
370 CTLFLAG_PERMANENT,
371 CTLTYPE_NODE, "proc2",
372 SYSCTL_DESCR("Machine-independent process information"),
373 sysctl_doeproc, 0, NULL, 0,
374 CTL_KERN, KERN_PROC2, CTL_EOL);
375 sysctl_createv(&clog, 0, NULL, NULL,
376 CTLFLAG_PERMANENT,
377 CTLTYPE_NODE, "proc_args",
378 SYSCTL_DESCR("Process argument information"),
379 sysctl_kern_proc_args, 0, NULL, 0,
380 CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
381
382 /*
383 "nodes" under these:
384
385 KERN_PROC_ALL
386 KERN_PROC_PID pid
387 KERN_PROC_PGRP pgrp
388 KERN_PROC_SESSION sess
389 KERN_PROC_TTY tty
390 KERN_PROC_UID uid
391 KERN_PROC_RUID uid
392 KERN_PROC_GID gid
393 KERN_PROC_RGID gid
394
395 all in all, probably not worth the effort...
396 */
397 }
398
399 /*
400 * Initialize process 0.
401 */
402 void
403 proc0_init(void)
404 {
405 struct proc *p;
406 struct pgrp *pg;
407 struct rlimit *rlim;
408 rlim_t lim;
409 int i;
410
411 p = &proc0;
412 pg = &pgrp0;
413
414 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
415 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
416 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
417
418 rw_init(&p->p_reflock);
419 cv_init(&p->p_waitcv, "wait");
420 cv_init(&p->p_lwpcv, "lwpwait");
421
422 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
423
424 pid_table[0].pt_proc = p;
425 LIST_INSERT_HEAD(&allproc, p, p_list);
426
427 pid_table[0].pt_pgrp = pg;
428 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
429
430 #ifdef __HAVE_SYSCALL_INTERN
431 (*p->p_emul->e_syscall_intern)(p);
432 #endif
433
434 /* Create credentials. */
435 cred0 = kauth_cred_alloc();
436 p->p_cred = cred0;
437
438 /* Create the CWD info. */
439 rw_init(&cwdi0.cwdi_lock);
440
441 /* Create the limits structures. */
442 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
443
444 rlim = limit0.pl_rlimit;
445 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
446 rlim[i].rlim_cur = RLIM_INFINITY;
447 rlim[i].rlim_max = RLIM_INFINITY;
448 }
449
450 rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
451 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
452
453 rlim[RLIMIT_NPROC].rlim_max = maxproc;
454 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
455
456 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free));
457 rlim[RLIMIT_RSS].rlim_max = lim;
458 rlim[RLIMIT_MEMLOCK].rlim_max = lim;
459 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
460
461 rlim[RLIMIT_NTHR].rlim_max = maxlwp;
462 rlim[RLIMIT_NTHR].rlim_cur = maxlwp < maxuprc ? maxlwp : maxuprc;
463
464 /* Note that default core name has zero length. */
465 limit0.pl_corename = defcorename;
466 limit0.pl_cnlen = 0;
467 limit0.pl_refcnt = 1;
468 limit0.pl_writeable = false;
469 limit0.pl_sv_limit = NULL;
470
471 /* Configure virtual memory system, set vm rlimits. */
472 uvm_init_limits(p);
473
474 /* Initialize file descriptor table for proc0. */
475 fd_init(&filedesc0);
476
477 /*
478 * Initialize proc0's vmspace, which uses the kernel pmap.
479 * All kernel processes (which never have user space mappings)
480 * share proc0's vmspace, and thus, the kernel pmap.
481 */
482 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
483 trunc_page(VM_MAX_ADDRESS));
484
485 /* Initialize signal state for proc0. XXX IPL_SCHED */
486 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
487 siginit(p);
488
489 proc_initspecific(p);
490 kdtrace_proc_ctor(NULL, p);
491 }
492
493 /*
494 * Session reference counting.
495 */
496
497 void
498 proc_sesshold(struct session *ss)
499 {
500
501 KASSERT(mutex_owned(proc_lock));
502 ss->s_count++;
503 }
504
505 void
506 proc_sessrele(struct session *ss)
507 {
508
509 KASSERT(mutex_owned(proc_lock));
510 /*
511 * We keep the pgrp with the same id as the session in order to
512 * stop a process being given the same pid. Since the pgrp holds
513 * a reference to the session, it must be a 'zombie' pgrp by now.
514 */
515 if (--ss->s_count == 0) {
516 struct pgrp *pg;
517
518 pg = pg_remove(ss->s_sid);
519 mutex_exit(proc_lock);
520
521 kmem_free(pg, sizeof(struct pgrp));
522 kmem_free(ss, sizeof(struct session));
523 } else {
524 mutex_exit(proc_lock);
525 }
526 }
527
528 /*
529 * Check that the specified process group is in the session of the
530 * specified process.
531 * Treats -ve ids as process ids.
532 * Used to validate TIOCSPGRP requests.
533 */
534 int
535 pgid_in_session(struct proc *p, pid_t pg_id)
536 {
537 struct pgrp *pgrp;
538 struct session *session;
539 int error;
540
541 mutex_enter(proc_lock);
542 if (pg_id < 0) {
543 struct proc *p1 = proc_find(-pg_id);
544 if (p1 == NULL) {
545 error = EINVAL;
546 goto fail;
547 }
548 pgrp = p1->p_pgrp;
549 } else {
550 pgrp = pgrp_find(pg_id);
551 if (pgrp == NULL) {
552 error = EINVAL;
553 goto fail;
554 }
555 }
556 session = pgrp->pg_session;
557 error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
558 fail:
559 mutex_exit(proc_lock);
560 return error;
561 }
562
563 /*
564 * p_inferior: is p an inferior of q?
565 */
566 static inline bool
567 p_inferior(struct proc *p, struct proc *q)
568 {
569
570 KASSERT(mutex_owned(proc_lock));
571
572 for (; p != q; p = p->p_pptr)
573 if (p->p_pid == 0)
574 return false;
575 return true;
576 }
577
578 /*
579 * proc_find: locate a process by the ID.
580 *
581 * => Must be called with proc_lock held.
582 */
583 proc_t *
584 proc_find_raw(pid_t pid)
585 {
586 struct pid_table *pt;
587 proc_t *p;
588
589 KASSERT(mutex_owned(proc_lock));
590 pt = &pid_table[pid & pid_tbl_mask];
591 p = pt->pt_proc;
592 if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) {
593 return NULL;
594 }
595 return p;
596 }
597
598 proc_t *
599 proc_find(pid_t pid)
600 {
601 proc_t *p;
602
603 p = proc_find_raw(pid);
604 if (__predict_false(p == NULL)) {
605 return NULL;
606 }
607
608 /*
609 * Only allow live processes to be found by PID.
610 * XXX: p_stat might change, since unlocked.
611 */
612 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
613 return p;
614 }
615 return NULL;
616 }
617
618 /*
619 * pgrp_find: locate a process group by the ID.
620 *
621 * => Must be called with proc_lock held.
622 */
623 struct pgrp *
624 pgrp_find(pid_t pgid)
625 {
626 struct pgrp *pg;
627
628 KASSERT(mutex_owned(proc_lock));
629
630 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
631
632 /*
633 * Cannot look up a process group that only exists because the
634 * session has not died yet (traditional).
635 */
636 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
637 return NULL;
638 }
639 return pg;
640 }
641
642 static void
643 expand_pid_table(void)
644 {
645 size_t pt_size, tsz;
646 struct pid_table *n_pt, *new_pt;
647 struct proc *proc;
648 struct pgrp *pgrp;
649 pid_t pid, rpid;
650 u_int i;
651 uint new_pt_mask;
652
653 pt_size = pid_tbl_mask + 1;
654 tsz = pt_size * 2 * sizeof(struct pid_table);
655 new_pt = kmem_alloc(tsz, KM_SLEEP);
656 new_pt_mask = pt_size * 2 - 1;
657
658 mutex_enter(proc_lock);
659 if (pt_size != pid_tbl_mask + 1) {
660 /* Another process beat us to it... */
661 mutex_exit(proc_lock);
662 kmem_free(new_pt, tsz);
663 return;
664 }
665
666 /*
667 * Copy entries from old table into new one.
668 * If 'pid' is 'odd' we need to place in the upper half,
669 * even pid's to the lower half.
670 * Free items stay in the low half so we don't have to
671 * fixup the reference to them.
672 * We stuff free items on the front of the freelist
673 * because we can't write to unmodified entries.
674 * Processing the table backwards maintains a semblance
675 * of issuing pid numbers that increase with time.
676 */
677 i = pt_size - 1;
678 n_pt = new_pt + i;
679 for (; ; i--, n_pt--) {
680 proc = pid_table[i].pt_proc;
681 pgrp = pid_table[i].pt_pgrp;
682 if (!P_VALID(proc)) {
683 /* Up 'use count' so that link is valid */
684 pid = (P_NEXT(proc) + pt_size) & ~pt_size;
685 rpid = 0;
686 proc = P_FREE(pid);
687 if (pgrp)
688 pid = pgrp->pg_id;
689 } else {
690 pid = pid_table[i].pt_pid;
691 rpid = pid;
692 }
693
694 /* Save entry in appropriate half of table */
695 n_pt[pid & pt_size].pt_proc = proc;
696 n_pt[pid & pt_size].pt_pgrp = pgrp;
697 n_pt[pid & pt_size].pt_pid = rpid;
698
699 /* Put other piece on start of free list */
700 pid = (pid ^ pt_size) & ~pid_tbl_mask;
701 n_pt[pid & pt_size].pt_proc =
702 P_FREE((pid & ~pt_size) | next_free_pt);
703 n_pt[pid & pt_size].pt_pgrp = 0;
704 n_pt[pid & pt_size].pt_pid = 0;
705
706 next_free_pt = i | (pid & pt_size);
707 if (i == 0)
708 break;
709 }
710
711 /* Save old table size and switch tables */
712 tsz = pt_size * sizeof(struct pid_table);
713 n_pt = pid_table;
714 pid_table = new_pt;
715 pid_tbl_mask = new_pt_mask;
716
717 /*
718 * pid_max starts as PID_MAX (= 30000), once we have 16384
719 * allocated pids we need it to be larger!
720 */
721 if (pid_tbl_mask > PID_MAX) {
722 pid_max = pid_tbl_mask * 2 + 1;
723 pid_alloc_lim |= pid_alloc_lim << 1;
724 } else
725 pid_alloc_lim <<= 1; /* doubles number of free slots... */
726
727 mutex_exit(proc_lock);
728 kmem_free(n_pt, tsz);
729 }
730
731 struct proc *
732 proc_alloc(void)
733 {
734 struct proc *p;
735
736 p = pool_cache_get(proc_cache, PR_WAITOK);
737 p->p_stat = SIDL; /* protect against others */
738 proc_initspecific(p);
739 kdtrace_proc_ctor(NULL, p);
740 p->p_pid = -1;
741 proc_alloc_pid(p);
742 return p;
743 }
744
745 /*
746 * proc_alloc_pid: allocate PID and record the given proc 'p' so that
747 * proc_find_raw() can find it by the PID.
748 */
749
750 pid_t
751 proc_alloc_pid(struct proc *p)
752 {
753 struct pid_table *pt;
754 pid_t pid;
755 int nxt;
756
757 for (;;expand_pid_table()) {
758 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
759 /* ensure pids cycle through 2000+ values */
760 continue;
761 mutex_enter(proc_lock);
762 pt = &pid_table[next_free_pt];
763 #ifdef DIAGNOSTIC
764 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
765 panic("proc_alloc: slot busy");
766 #endif
767 nxt = P_NEXT(pt->pt_proc);
768 if (nxt & pid_tbl_mask)
769 break;
770 /* Table full - expand (NB last entry not used....) */
771 mutex_exit(proc_lock);
772 }
773
774 /* pid is 'saved use count' + 'size' + entry */
775 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
776 if ((uint)pid > (uint)pid_max)
777 pid &= pid_tbl_mask;
778 next_free_pt = nxt & pid_tbl_mask;
779
780 /* Grab table slot */
781 pt->pt_proc = p;
782
783 KASSERT(pt->pt_pid == 0);
784 pt->pt_pid = pid;
785 if (p->p_pid == -1) {
786 p->p_pid = pid;
787 }
788 pid_alloc_cnt++;
789 mutex_exit(proc_lock);
790
791 return pid;
792 }
793
794 /*
795 * Free a process id - called from proc_free (in kern_exit.c)
796 *
797 * Called with the proc_lock held.
798 */
799 void
800 proc_free_pid(pid_t pid)
801 {
802 struct pid_table *pt;
803
804 KASSERT(mutex_owned(proc_lock));
805
806 pt = &pid_table[pid & pid_tbl_mask];
807
808 /* save pid use count in slot */
809 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
810 KASSERT(pt->pt_pid == pid);
811 pt->pt_pid = 0;
812
813 if (pt->pt_pgrp == NULL) {
814 /* link last freed entry onto ours */
815 pid &= pid_tbl_mask;
816 pt = &pid_table[last_free_pt];
817 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
818 pt->pt_pid = 0;
819 last_free_pt = pid;
820 pid_alloc_cnt--;
821 }
822
823 atomic_dec_uint(&nprocs);
824 }
825
826 void
827 proc_free_mem(struct proc *p)
828 {
829
830 kdtrace_proc_dtor(NULL, p);
831 pool_cache_put(proc_cache, p);
832 }
833
834 /*
835 * proc_enterpgrp: move p to a new or existing process group (and session).
836 *
837 * If we are creating a new pgrp, the pgid should equal
838 * the calling process' pid.
839 * If is only valid to enter a process group that is in the session
840 * of the process.
841 * Also mksess should only be set if we are creating a process group
842 *
843 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
844 */
845 int
846 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
847 {
848 struct pgrp *new_pgrp, *pgrp;
849 struct session *sess;
850 struct proc *p;
851 int rval;
852 pid_t pg_id = NO_PGID;
853
854 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
855
856 /* Allocate data areas we might need before doing any validity checks */
857 mutex_enter(proc_lock); /* Because pid_table might change */
858 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
859 mutex_exit(proc_lock);
860 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
861 mutex_enter(proc_lock);
862 } else
863 new_pgrp = NULL;
864 rval = EPERM; /* most common error (to save typing) */
865
866 /* Check pgrp exists or can be created */
867 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
868 if (pgrp != NULL && pgrp->pg_id != pgid)
869 goto done;
870
871 /* Can only set another process under restricted circumstances. */
872 if (pid != curp->p_pid) {
873 /* Must exist and be one of our children... */
874 p = proc_find(pid);
875 if (p == NULL || !p_inferior(p, curp)) {
876 rval = ESRCH;
877 goto done;
878 }
879 /* ... in the same session... */
880 if (sess != NULL || p->p_session != curp->p_session)
881 goto done;
882 /* ... existing pgid must be in same session ... */
883 if (pgrp != NULL && pgrp->pg_session != p->p_session)
884 goto done;
885 /* ... and not done an exec. */
886 if (p->p_flag & PK_EXEC) {
887 rval = EACCES;
888 goto done;
889 }
890 } else {
891 /* ... setsid() cannot re-enter a pgrp */
892 if (mksess && (curp->p_pgid == curp->p_pid ||
893 pgrp_find(curp->p_pid)))
894 goto done;
895 p = curp;
896 }
897
898 /* Changing the process group/session of a session
899 leader is definitely off limits. */
900 if (SESS_LEADER(p)) {
901 if (sess == NULL && p->p_pgrp == pgrp)
902 /* unless it's a definite noop */
903 rval = 0;
904 goto done;
905 }
906
907 /* Can only create a process group with id of process */
908 if (pgrp == NULL && pgid != pid)
909 goto done;
910
911 /* Can only create a session if creating pgrp */
912 if (sess != NULL && pgrp != NULL)
913 goto done;
914
915 /* Check we allocated memory for a pgrp... */
916 if (pgrp == NULL && new_pgrp == NULL)
917 goto done;
918
919 /* Don't attach to 'zombie' pgrp */
920 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
921 goto done;
922
923 /* Expect to succeed now */
924 rval = 0;
925
926 if (pgrp == p->p_pgrp)
927 /* nothing to do */
928 goto done;
929
930 /* Ok all setup, link up required structures */
931
932 if (pgrp == NULL) {
933 pgrp = new_pgrp;
934 new_pgrp = NULL;
935 if (sess != NULL) {
936 sess->s_sid = p->p_pid;
937 sess->s_leader = p;
938 sess->s_count = 1;
939 sess->s_ttyvp = NULL;
940 sess->s_ttyp = NULL;
941 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
942 memcpy(sess->s_login, p->p_session->s_login,
943 sizeof(sess->s_login));
944 p->p_lflag &= ~PL_CONTROLT;
945 } else {
946 sess = p->p_pgrp->pg_session;
947 proc_sesshold(sess);
948 }
949 pgrp->pg_session = sess;
950 sess = NULL;
951
952 pgrp->pg_id = pgid;
953 LIST_INIT(&pgrp->pg_members);
954 #ifdef DIAGNOSTIC
955 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
956 panic("enterpgrp: pgrp table slot in use");
957 if (__predict_false(mksess && p != curp))
958 panic("enterpgrp: mksession and p != curproc");
959 #endif
960 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
961 pgrp->pg_jobc = 0;
962 }
963
964 /*
965 * Adjust eligibility of affected pgrps to participate in job control.
966 * Increment eligibility counts before decrementing, otherwise we
967 * could reach 0 spuriously during the first call.
968 */
969 fixjobc(p, pgrp, 1);
970 fixjobc(p, p->p_pgrp, 0);
971
972 /* Interlock with ttread(). */
973 mutex_spin_enter(&tty_lock);
974
975 /* Move process to requested group. */
976 LIST_REMOVE(p, p_pglist);
977 if (LIST_EMPTY(&p->p_pgrp->pg_members))
978 /* defer delete until we've dumped the lock */
979 pg_id = p->p_pgrp->pg_id;
980 p->p_pgrp = pgrp;
981 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
982
983 /* Done with the swap; we can release the tty mutex. */
984 mutex_spin_exit(&tty_lock);
985
986 done:
987 if (pg_id != NO_PGID) {
988 /* Releases proc_lock. */
989 pg_delete(pg_id);
990 } else {
991 mutex_exit(proc_lock);
992 }
993 if (sess != NULL)
994 kmem_free(sess, sizeof(*sess));
995 if (new_pgrp != NULL)
996 kmem_free(new_pgrp, sizeof(*new_pgrp));
997 #ifdef DEBUG_PGRP
998 if (__predict_false(rval))
999 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
1000 pid, pgid, mksess, curp->p_pid, rval);
1001 #endif
1002 return rval;
1003 }
1004
1005 /*
1006 * proc_leavepgrp: remove a process from its process group.
1007 * => must be called with the proc_lock held, which will be released;
1008 */
1009 void
1010 proc_leavepgrp(struct proc *p)
1011 {
1012 struct pgrp *pgrp;
1013
1014 KASSERT(mutex_owned(proc_lock));
1015
1016 /* Interlock with ttread() */
1017 mutex_spin_enter(&tty_lock);
1018 pgrp = p->p_pgrp;
1019 LIST_REMOVE(p, p_pglist);
1020 p->p_pgrp = NULL;
1021 mutex_spin_exit(&tty_lock);
1022
1023 if (LIST_EMPTY(&pgrp->pg_members)) {
1024 /* Releases proc_lock. */
1025 pg_delete(pgrp->pg_id);
1026 } else {
1027 mutex_exit(proc_lock);
1028 }
1029 }
1030
1031 /*
1032 * pg_remove: remove a process group from the table.
1033 * => must be called with the proc_lock held;
1034 * => returns process group to free;
1035 */
1036 static struct pgrp *
1037 pg_remove(pid_t pg_id)
1038 {
1039 struct pgrp *pgrp;
1040 struct pid_table *pt;
1041
1042 KASSERT(mutex_owned(proc_lock));
1043
1044 pt = &pid_table[pg_id & pid_tbl_mask];
1045 pgrp = pt->pt_pgrp;
1046
1047 KASSERT(pgrp != NULL);
1048 KASSERT(pgrp->pg_id == pg_id);
1049 KASSERT(LIST_EMPTY(&pgrp->pg_members));
1050
1051 pt->pt_pgrp = NULL;
1052
1053 if (!P_VALID(pt->pt_proc)) {
1054 /* Orphaned pgrp, put slot onto free list. */
1055 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
1056 pg_id &= pid_tbl_mask;
1057 pt = &pid_table[last_free_pt];
1058 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
1059 KASSERT(pt->pt_pid == 0);
1060 last_free_pt = pg_id;
1061 pid_alloc_cnt--;
1062 }
1063 return pgrp;
1064 }
1065
1066 /*
1067 * pg_delete: delete and free a process group.
1068 * => must be called with the proc_lock held, which will be released.
1069 */
1070 static void
1071 pg_delete(pid_t pg_id)
1072 {
1073 struct pgrp *pg;
1074 struct tty *ttyp;
1075 struct session *ss;
1076
1077 KASSERT(mutex_owned(proc_lock));
1078
1079 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1080 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1081 mutex_exit(proc_lock);
1082 return;
1083 }
1084
1085 ss = pg->pg_session;
1086
1087 /* Remove reference (if any) from tty to this process group */
1088 mutex_spin_enter(&tty_lock);
1089 ttyp = ss->s_ttyp;
1090 if (ttyp != NULL && ttyp->t_pgrp == pg) {
1091 ttyp->t_pgrp = NULL;
1092 KASSERT(ttyp->t_session == ss);
1093 }
1094 mutex_spin_exit(&tty_lock);
1095
1096 /*
1097 * The leading process group in a session is freed by proc_sessrele(),
1098 * if last reference. Note: proc_sessrele() releases proc_lock.
1099 */
1100 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1101 proc_sessrele(ss);
1102
1103 if (pg != NULL) {
1104 /* Free it, if was not done by proc_sessrele(). */
1105 kmem_free(pg, sizeof(struct pgrp));
1106 }
1107 }
1108
1109 /*
1110 * Adjust pgrp jobc counters when specified process changes process group.
1111 * We count the number of processes in each process group that "qualify"
1112 * the group for terminal job control (those with a parent in a different
1113 * process group of the same session). If that count reaches zero, the
1114 * process group becomes orphaned. Check both the specified process'
1115 * process group and that of its children.
1116 * entering == 0 => p is leaving specified group.
1117 * entering == 1 => p is entering specified group.
1118 *
1119 * Call with proc_lock held.
1120 */
1121 void
1122 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1123 {
1124 struct pgrp *hispgrp;
1125 struct session *mysession = pgrp->pg_session;
1126 struct proc *child;
1127
1128 KASSERT(mutex_owned(proc_lock));
1129
1130 /*
1131 * Check p's parent to see whether p qualifies its own process
1132 * group; if so, adjust count for p's process group.
1133 */
1134 hispgrp = p->p_pptr->p_pgrp;
1135 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1136 if (entering) {
1137 pgrp->pg_jobc++;
1138 p->p_lflag &= ~PL_ORPHANPG;
1139 } else if (--pgrp->pg_jobc == 0)
1140 orphanpg(pgrp);
1141 }
1142
1143 /*
1144 * Check this process' children to see whether they qualify
1145 * their process groups; if so, adjust counts for children's
1146 * process groups.
1147 */
1148 LIST_FOREACH(child, &p->p_children, p_sibling) {
1149 hispgrp = child->p_pgrp;
1150 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1151 !P_ZOMBIE(child)) {
1152 if (entering) {
1153 child->p_lflag &= ~PL_ORPHANPG;
1154 hispgrp->pg_jobc++;
1155 } else if (--hispgrp->pg_jobc == 0)
1156 orphanpg(hispgrp);
1157 }
1158 }
1159 }
1160
1161 /*
1162 * A process group has become orphaned;
1163 * if there are any stopped processes in the group,
1164 * hang-up all process in that group.
1165 *
1166 * Call with proc_lock held.
1167 */
1168 static void
1169 orphanpg(struct pgrp *pg)
1170 {
1171 struct proc *p;
1172
1173 KASSERT(mutex_owned(proc_lock));
1174
1175 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1176 if (p->p_stat == SSTOP) {
1177 p->p_lflag |= PL_ORPHANPG;
1178 psignal(p, SIGHUP);
1179 psignal(p, SIGCONT);
1180 }
1181 }
1182 }
1183
1184 #ifdef DDB
1185 #include <ddb/db_output.h>
1186 void pidtbl_dump(void);
1187 void
1188 pidtbl_dump(void)
1189 {
1190 struct pid_table *pt;
1191 struct proc *p;
1192 struct pgrp *pgrp;
1193 int id;
1194
1195 db_printf("pid table %p size %x, next %x, last %x\n",
1196 pid_table, pid_tbl_mask+1,
1197 next_free_pt, last_free_pt);
1198 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1199 p = pt->pt_proc;
1200 if (!P_VALID(p) && !pt->pt_pgrp)
1201 continue;
1202 db_printf(" id %x: ", id);
1203 if (P_VALID(p))
1204 db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1205 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1206 else
1207 db_printf("next %x use %x\n",
1208 P_NEXT(p) & pid_tbl_mask,
1209 P_NEXT(p) & ~pid_tbl_mask);
1210 if ((pgrp = pt->pt_pgrp)) {
1211 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1212 pgrp->pg_session, pgrp->pg_session->s_sid,
1213 pgrp->pg_session->s_count,
1214 pgrp->pg_session->s_login);
1215 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1216 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1217 LIST_FIRST(&pgrp->pg_members));
1218 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1219 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1220 p->p_pid, p, p->p_pgrp, p->p_comm);
1221 }
1222 }
1223 }
1224 }
1225 #endif /* DDB */
1226
1227 #ifdef KSTACK_CHECK_MAGIC
1228
1229 #define KSTACK_MAGIC 0xdeadbeaf
1230
1231 /* XXX should be per process basis? */
1232 static int kstackleftmin = KSTACK_SIZE;
1233 static int kstackleftthres = KSTACK_SIZE / 8;
1234
1235 void
1236 kstack_setup_magic(const struct lwp *l)
1237 {
1238 uint32_t *ip;
1239 uint32_t const *end;
1240
1241 KASSERT(l != NULL);
1242 KASSERT(l != &lwp0);
1243
1244 /*
1245 * fill all the stack with magic number
1246 * so that later modification on it can be detected.
1247 */
1248 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1249 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1250 for (; ip < end; ip++) {
1251 *ip = KSTACK_MAGIC;
1252 }
1253 }
1254
1255 void
1256 kstack_check_magic(const struct lwp *l)
1257 {
1258 uint32_t const *ip, *end;
1259 int stackleft;
1260
1261 KASSERT(l != NULL);
1262
1263 /* don't check proc0 */ /*XXX*/
1264 if (l == &lwp0)
1265 return;
1266
1267 #ifdef __MACHINE_STACK_GROWS_UP
1268 /* stack grows upwards (eg. hppa) */
1269 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1270 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1271 for (ip--; ip >= end; ip--)
1272 if (*ip != KSTACK_MAGIC)
1273 break;
1274
1275 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1276 #else /* __MACHINE_STACK_GROWS_UP */
1277 /* stack grows downwards (eg. i386) */
1278 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1279 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1280 for (; ip < end; ip++)
1281 if (*ip != KSTACK_MAGIC)
1282 break;
1283
1284 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1285 #endif /* __MACHINE_STACK_GROWS_UP */
1286
1287 if (kstackleftmin > stackleft) {
1288 kstackleftmin = stackleft;
1289 if (stackleft < kstackleftthres)
1290 printf("warning: kernel stack left %d bytes"
1291 "(pid %u:lid %u)\n", stackleft,
1292 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1293 }
1294
1295 if (stackleft <= 0) {
1296 panic("magic on the top of kernel stack changed for "
1297 "pid %u, lid %u: maybe kernel stack overflow",
1298 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1299 }
1300 }
1301 #endif /* KSTACK_CHECK_MAGIC */
1302
1303 int
1304 proclist_foreach_call(struct proclist *list,
1305 int (*callback)(struct proc *, void *arg), void *arg)
1306 {
1307 struct proc marker;
1308 struct proc *p;
1309 int ret = 0;
1310
1311 marker.p_flag = PK_MARKER;
1312 mutex_enter(proc_lock);
1313 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1314 if (p->p_flag & PK_MARKER) {
1315 p = LIST_NEXT(p, p_list);
1316 continue;
1317 }
1318 LIST_INSERT_AFTER(p, &marker, p_list);
1319 ret = (*callback)(p, arg);
1320 KASSERT(mutex_owned(proc_lock));
1321 p = LIST_NEXT(&marker, p_list);
1322 LIST_REMOVE(&marker, p_list);
1323 }
1324 mutex_exit(proc_lock);
1325
1326 return ret;
1327 }
1328
1329 int
1330 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1331 {
1332
1333 /* XXXCDC: how should locking work here? */
1334
1335 /* curproc exception is for coredump. */
1336
1337 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1338 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1339 return EFAULT;
1340 }
1341
1342 uvmspace_addref(p->p_vmspace);
1343 *vm = p->p_vmspace;
1344
1345 return 0;
1346 }
1347
1348 /*
1349 * Acquire a write lock on the process credential.
1350 */
1351 void
1352 proc_crmod_enter(void)
1353 {
1354 struct lwp *l = curlwp;
1355 struct proc *p = l->l_proc;
1356 kauth_cred_t oc;
1357
1358 /* Reset what needs to be reset in plimit. */
1359 if (p->p_limit->pl_corename != defcorename) {
1360 lim_setcorename(p, defcorename, 0);
1361 }
1362
1363 mutex_enter(p->p_lock);
1364
1365 /* Ensure the LWP cached credentials are up to date. */
1366 if ((oc = l->l_cred) != p->p_cred) {
1367 kauth_cred_hold(p->p_cred);
1368 l->l_cred = p->p_cred;
1369 kauth_cred_free(oc);
1370 }
1371 }
1372
1373 /*
1374 * Set in a new process credential, and drop the write lock. The credential
1375 * must have a reference already. Optionally, free a no-longer required
1376 * credential. The scheduler also needs to inspect p_cred, so we also
1377 * briefly acquire the sched state mutex.
1378 */
1379 void
1380 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1381 {
1382 struct lwp *l = curlwp, *l2;
1383 struct proc *p = l->l_proc;
1384 kauth_cred_t oc;
1385
1386 KASSERT(mutex_owned(p->p_lock));
1387
1388 /* Is there a new credential to set in? */
1389 if (scred != NULL) {
1390 p->p_cred = scred;
1391 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1392 if (l2 != l)
1393 l2->l_prflag |= LPR_CRMOD;
1394 }
1395
1396 /* Ensure the LWP cached credentials are up to date. */
1397 if ((oc = l->l_cred) != scred) {
1398 kauth_cred_hold(scred);
1399 l->l_cred = scred;
1400 }
1401 } else
1402 oc = NULL; /* XXXgcc */
1403
1404 if (sugid) {
1405 /*
1406 * Mark process as having changed credentials, stops
1407 * tracing etc.
1408 */
1409 p->p_flag |= PK_SUGID;
1410 }
1411
1412 mutex_exit(p->p_lock);
1413
1414 /* If there is a credential to be released, free it now. */
1415 if (fcred != NULL) {
1416 KASSERT(scred != NULL);
1417 kauth_cred_free(fcred);
1418 if (oc != scred)
1419 kauth_cred_free(oc);
1420 }
1421 }
1422
1423 /*
1424 * proc_specific_key_create --
1425 * Create a key for subsystem proc-specific data.
1426 */
1427 int
1428 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1429 {
1430
1431 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1432 }
1433
1434 /*
1435 * proc_specific_key_delete --
1436 * Delete a key for subsystem proc-specific data.
1437 */
1438 void
1439 proc_specific_key_delete(specificdata_key_t key)
1440 {
1441
1442 specificdata_key_delete(proc_specificdata_domain, key);
1443 }
1444
1445 /*
1446 * proc_initspecific --
1447 * Initialize a proc's specificdata container.
1448 */
1449 void
1450 proc_initspecific(struct proc *p)
1451 {
1452 int error;
1453
1454 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1455 KASSERT(error == 0);
1456 }
1457
1458 /*
1459 * proc_finispecific --
1460 * Finalize a proc's specificdata container.
1461 */
1462 void
1463 proc_finispecific(struct proc *p)
1464 {
1465
1466 specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1467 }
1468
1469 /*
1470 * proc_getspecific --
1471 * Return proc-specific data corresponding to the specified key.
1472 */
1473 void *
1474 proc_getspecific(struct proc *p, specificdata_key_t key)
1475 {
1476
1477 return (specificdata_getspecific(proc_specificdata_domain,
1478 &p->p_specdataref, key));
1479 }
1480
1481 /*
1482 * proc_setspecific --
1483 * Set proc-specific data corresponding to the specified key.
1484 */
1485 void
1486 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1487 {
1488
1489 specificdata_setspecific(proc_specificdata_domain,
1490 &p->p_specdataref, key, data);
1491 }
1492
1493 int
1494 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1495 {
1496 int r = 0;
1497
1498 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1499 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1500 /*
1501 * suid proc of ours or proc not ours
1502 */
1503 r = EPERM;
1504 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1505 /*
1506 * sgid proc has sgid back to us temporarily
1507 */
1508 r = EPERM;
1509 } else {
1510 /*
1511 * our rgid must be in target's group list (ie,
1512 * sub-processes started by a sgid process)
1513 */
1514 int ismember = 0;
1515
1516 if (kauth_cred_ismember_gid(cred,
1517 kauth_cred_getgid(target), &ismember) != 0 ||
1518 !ismember)
1519 r = EPERM;
1520 }
1521
1522 return (r);
1523 }
1524
1525 /*
1526 * sysctl stuff
1527 */
1528
1529 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc))
1530
1531 static const u_int sysctl_flagmap[] = {
1532 PK_ADVLOCK, P_ADVLOCK,
1533 PK_EXEC, P_EXEC,
1534 PK_NOCLDWAIT, P_NOCLDWAIT,
1535 PK_32, P_32,
1536 PK_CLDSIGIGN, P_CLDSIGIGN,
1537 PK_SUGID, P_SUGID,
1538 0
1539 };
1540
1541 static const u_int sysctl_sflagmap[] = {
1542 PS_NOCLDSTOP, P_NOCLDSTOP,
1543 PS_WEXIT, P_WEXIT,
1544 PS_STOPFORK, P_STOPFORK,
1545 PS_STOPEXEC, P_STOPEXEC,
1546 PS_STOPEXIT, P_STOPEXIT,
1547 0
1548 };
1549
1550 static const u_int sysctl_slflagmap[] = {
1551 PSL_TRACED, P_TRACED,
1552 PSL_FSTRACE, P_FSTRACE,
1553 PSL_CHTRACED, P_CHTRACED,
1554 PSL_SYSCALL, P_SYSCALL,
1555 0
1556 };
1557
1558 static const u_int sysctl_lflagmap[] = {
1559 PL_CONTROLT, P_CONTROLT,
1560 PL_PPWAIT, P_PPWAIT,
1561 0
1562 };
1563
1564 static const u_int sysctl_stflagmap[] = {
1565 PST_PROFIL, P_PROFIL,
1566 0
1567
1568 };
1569
1570 /* used by kern_lwp also */
1571 const u_int sysctl_lwpflagmap[] = {
1572 LW_SINTR, L_SINTR,
1573 LW_SYSTEM, L_SYSTEM,
1574 0
1575 };
1576
1577 /*
1578 * Find the most ``active'' lwp of a process and return it for ps display
1579 * purposes
1580 */
1581 static struct lwp *
1582 proc_active_lwp(struct proc *p)
1583 {
1584 static const int ostat[] = {
1585 0,
1586 2, /* LSIDL */
1587 6, /* LSRUN */
1588 5, /* LSSLEEP */
1589 4, /* LSSTOP */
1590 0, /* LSZOMB */
1591 1, /* LSDEAD */
1592 7, /* LSONPROC */
1593 3 /* LSSUSPENDED */
1594 };
1595
1596 struct lwp *l, *lp = NULL;
1597 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1598 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
1599 if (lp == NULL ||
1600 ostat[l->l_stat] > ostat[lp->l_stat] ||
1601 (ostat[l->l_stat] == ostat[lp->l_stat] &&
1602 l->l_cpticks > lp->l_cpticks)) {
1603 lp = l;
1604 continue;
1605 }
1606 }
1607 return lp;
1608 }
1609
1610 static int
1611 sysctl_doeproc(SYSCTLFN_ARGS)
1612 {
1613 union {
1614 struct kinfo_proc kproc;
1615 struct kinfo_proc2 kproc2;
1616 } *kbuf;
1617 struct proc *p, *next, *marker;
1618 char *where, *dp;
1619 int type, op, arg, error;
1620 u_int elem_size, kelem_size, elem_count;
1621 size_t buflen, needed;
1622 bool match, zombie, mmmbrains;
1623
1624 if (namelen == 1 && name[0] == CTL_QUERY)
1625 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1626
1627 dp = where = oldp;
1628 buflen = where != NULL ? *oldlenp : 0;
1629 error = 0;
1630 needed = 0;
1631 type = rnode->sysctl_num;
1632
1633 if (type == KERN_PROC) {
1634 if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
1635 return (EINVAL);
1636 op = name[0];
1637 if (op != KERN_PROC_ALL)
1638 arg = name[1];
1639 else
1640 arg = 0; /* Quell compiler warning */
1641 elem_count = 0; /* Ditto */
1642 kelem_size = elem_size = sizeof(kbuf->kproc);
1643 } else {
1644 if (namelen != 4)
1645 return (EINVAL);
1646 op = name[0];
1647 arg = name[1];
1648 elem_size = name[2];
1649 elem_count = name[3];
1650 kelem_size = sizeof(kbuf->kproc2);
1651 }
1652
1653 sysctl_unlock();
1654
1655 kbuf = kmem_alloc(sizeof(*kbuf), KM_SLEEP);
1656 marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
1657 marker->p_flag = PK_MARKER;
1658
1659 mutex_enter(proc_lock);
1660 mmmbrains = false;
1661 for (p = LIST_FIRST(&allproc);; p = next) {
1662 if (p == NULL) {
1663 if (!mmmbrains) {
1664 p = LIST_FIRST(&zombproc);
1665 mmmbrains = true;
1666 }
1667 if (p == NULL)
1668 break;
1669 }
1670 next = LIST_NEXT(p, p_list);
1671 if ((p->p_flag & PK_MARKER) != 0)
1672 continue;
1673
1674 /*
1675 * Skip embryonic processes.
1676 */
1677 if (p->p_stat == SIDL)
1678 continue;
1679
1680 mutex_enter(p->p_lock);
1681 error = kauth_authorize_process(l->l_cred,
1682 KAUTH_PROCESS_CANSEE, p,
1683 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
1684 if (error != 0) {
1685 mutex_exit(p->p_lock);
1686 continue;
1687 }
1688
1689 /*
1690 * TODO - make more efficient (see notes below).
1691 * do by session.
1692 */
1693 switch (op) {
1694 case KERN_PROC_PID:
1695 /* could do this with just a lookup */
1696 match = (p->p_pid == (pid_t)arg);
1697 break;
1698
1699 case KERN_PROC_PGRP:
1700 /* could do this by traversing pgrp */
1701 match = (p->p_pgrp->pg_id == (pid_t)arg);
1702 break;
1703
1704 case KERN_PROC_SESSION:
1705 match = (p->p_session->s_sid == (pid_t)arg);
1706 break;
1707
1708 case KERN_PROC_TTY:
1709 match = true;
1710 if (arg == (int) KERN_PROC_TTY_REVOKE) {
1711 if ((p->p_lflag & PL_CONTROLT) == 0 ||
1712 p->p_session->s_ttyp == NULL ||
1713 p->p_session->s_ttyvp != NULL) {
1714 match = false;
1715 }
1716 } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
1717 p->p_session->s_ttyp == NULL) {
1718 if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
1719 match = false;
1720 }
1721 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
1722 match = false;
1723 }
1724 break;
1725
1726 case KERN_PROC_UID:
1727 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
1728 break;
1729
1730 case KERN_PROC_RUID:
1731 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
1732 break;
1733
1734 case KERN_PROC_GID:
1735 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
1736 break;
1737
1738 case KERN_PROC_RGID:
1739 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
1740 break;
1741
1742 case KERN_PROC_ALL:
1743 match = true;
1744 /* allow everything */
1745 break;
1746
1747 default:
1748 error = EINVAL;
1749 mutex_exit(p->p_lock);
1750 goto cleanup;
1751 }
1752 if (!match) {
1753 mutex_exit(p->p_lock);
1754 continue;
1755 }
1756
1757 /*
1758 * Grab a hold on the process.
1759 */
1760 if (mmmbrains) {
1761 zombie = true;
1762 } else {
1763 zombie = !rw_tryenter(&p->p_reflock, RW_READER);
1764 }
1765 if (zombie) {
1766 LIST_INSERT_AFTER(p, marker, p_list);
1767 }
1768
1769 if (buflen >= elem_size &&
1770 (type == KERN_PROC || elem_count > 0)) {
1771 if (type == KERN_PROC) {
1772 kbuf->kproc.kp_proc = *p;
1773 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie);
1774 } else {
1775 fill_kproc2(p, &kbuf->kproc2, zombie);
1776 elem_count--;
1777 }
1778 mutex_exit(p->p_lock);
1779 mutex_exit(proc_lock);
1780 /*
1781 * Copy out elem_size, but not larger than kelem_size
1782 */
1783 error = sysctl_copyout(l, kbuf, dp,
1784 min(kelem_size, elem_size));
1785 mutex_enter(proc_lock);
1786 if (error) {
1787 goto bah;
1788 }
1789 dp += elem_size;
1790 buflen -= elem_size;
1791 } else {
1792 mutex_exit(p->p_lock);
1793 }
1794 needed += elem_size;
1795
1796 /*
1797 * Release reference to process.
1798 */
1799 if (zombie) {
1800 next = LIST_NEXT(marker, p_list);
1801 LIST_REMOVE(marker, p_list);
1802 } else {
1803 rw_exit(&p->p_reflock);
1804 next = LIST_NEXT(p, p_list);
1805 }
1806 }
1807 mutex_exit(proc_lock);
1808
1809 if (where != NULL) {
1810 *oldlenp = dp - where;
1811 if (needed > *oldlenp) {
1812 error = ENOMEM;
1813 goto out;
1814 }
1815 } else {
1816 needed += KERN_PROCSLOP;
1817 *oldlenp = needed;
1818 }
1819 if (kbuf)
1820 kmem_free(kbuf, sizeof(*kbuf));
1821 if (marker)
1822 kmem_free(marker, sizeof(*marker));
1823 sysctl_relock();
1824 return 0;
1825 bah:
1826 if (zombie)
1827 LIST_REMOVE(marker, p_list);
1828 else
1829 rw_exit(&p->p_reflock);
1830 cleanup:
1831 mutex_exit(proc_lock);
1832 out:
1833 if (kbuf)
1834 kmem_free(kbuf, sizeof(*kbuf));
1835 if (marker)
1836 kmem_free(marker, sizeof(*marker));
1837 sysctl_relock();
1838 return error;
1839 }
1840
1841 int
1842 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
1843 {
1844
1845 #ifdef COMPAT_NETBSD32
1846 if (p->p_flag & PK_32) {
1847 struct ps_strings32 arginfo32;
1848
1849 int error = copyin_proc(p, (void *)p->p_psstrp, &arginfo32,
1850 sizeof(arginfo32));
1851 if (error)
1852 return error;
1853 arginfo->ps_argvstr = (void *)(uintptr_t)arginfo32.ps_argvstr;
1854 arginfo->ps_nargvstr = arginfo32.ps_nargvstr;
1855 arginfo->ps_envstr = (void *)(uintptr_t)arginfo32.ps_envstr;
1856 arginfo->ps_nenvstr = arginfo32.ps_nenvstr;
1857 return 0;
1858 }
1859 #endif
1860 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
1861 }
1862
1863 static int
1864 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
1865 {
1866 void **cookie = cookie_;
1867 struct lwp *l = cookie[0];
1868 char *dst = cookie[1];
1869
1870 return sysctl_copyout(l, src, dst + off, len);
1871 }
1872
1873 /*
1874 * sysctl helper routine for kern.proc_args pseudo-subtree.
1875 */
1876 static int
1877 sysctl_kern_proc_args(SYSCTLFN_ARGS)
1878 {
1879 struct ps_strings pss;
1880 struct proc *p;
1881 pid_t pid;
1882 int type, error;
1883 void *cookie[2];
1884
1885 if (namelen == 1 && name[0] == CTL_QUERY)
1886 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1887
1888 if (newp != NULL || namelen != 2)
1889 return (EINVAL);
1890 pid = name[0];
1891 type = name[1];
1892
1893 switch (type) {
1894 case KERN_PROC_ARGV:
1895 case KERN_PROC_NARGV:
1896 case KERN_PROC_ENV:
1897 case KERN_PROC_NENV:
1898 /* ok */
1899 break;
1900 default:
1901 return (EINVAL);
1902 }
1903
1904 sysctl_unlock();
1905
1906 /* check pid */
1907 mutex_enter(proc_lock);
1908 if ((p = proc_find(pid)) == NULL) {
1909 error = EINVAL;
1910 goto out_locked;
1911 }
1912 mutex_enter(p->p_lock);
1913
1914 /* Check permission. */
1915 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
1916 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1917 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
1918 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
1919 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1920 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
1921 else
1922 error = EINVAL; /* XXXGCC */
1923 if (error) {
1924 mutex_exit(p->p_lock);
1925 goto out_locked;
1926 }
1927
1928 if (oldp == NULL) {
1929 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
1930 *oldlenp = sizeof (int);
1931 else
1932 *oldlenp = ARG_MAX; /* XXX XXX XXX */
1933 error = 0;
1934 mutex_exit(p->p_lock);
1935 goto out_locked;
1936 }
1937
1938 /*
1939 * Zombies don't have a stack, so we can't read their psstrings.
1940 * System processes also don't have a user stack.
1941 */
1942 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
1943 error = EINVAL;
1944 mutex_exit(p->p_lock);
1945 goto out_locked;
1946 }
1947
1948 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
1949 mutex_exit(p->p_lock);
1950 if (error) {
1951 goto out_locked;
1952 }
1953 mutex_exit(proc_lock);
1954
1955 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
1956 int value;
1957 if ((error = copyin_psstrings(p, &pss)) == 0) {
1958 if (type == KERN_PROC_NARGV)
1959 value = pss.ps_nargvstr;
1960 else
1961 value = pss.ps_nenvstr;
1962 error = sysctl_copyout(l, &value, oldp, sizeof(value));
1963 *oldlenp = sizeof(value);
1964 }
1965 } else {
1966 cookie[0] = l;
1967 cookie[1] = oldp;
1968 error = copy_procargs(p, type, oldlenp,
1969 copy_procargs_sysctl_cb, cookie);
1970 }
1971 rw_exit(&p->p_reflock);
1972 sysctl_relock();
1973 return error;
1974
1975 out_locked:
1976 mutex_exit(proc_lock);
1977 sysctl_relock();
1978 return error;
1979 }
1980
1981 int
1982 copy_procargs(struct proc *p, int oid, size_t *limit,
1983 int (*cb)(void *, const void *, size_t, size_t), void *cookie)
1984 {
1985 struct ps_strings pss;
1986 size_t len, i, loaded, entry_len;
1987 struct uio auio;
1988 struct iovec aiov;
1989 int error, argvlen;
1990 char *arg;
1991 char **argv;
1992 vaddr_t user_argv;
1993 struct vmspace *vmspace;
1994
1995 /*
1996 * Allocate a temporary buffer to hold the argument vector and
1997 * the arguments themselve.
1998 */
1999 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2000 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
2001
2002 /*
2003 * Lock the process down in memory.
2004 */
2005 vmspace = p->p_vmspace;
2006 uvmspace_addref(vmspace);
2007
2008 /*
2009 * Read in the ps_strings structure.
2010 */
2011 if ((error = copyin_psstrings(p, &pss)) != 0)
2012 goto done;
2013
2014 /*
2015 * Now read the address of the argument vector.
2016 */
2017 switch (oid) {
2018 case KERN_PROC_ARGV:
2019 user_argv = (uintptr_t)pss.ps_argvstr;
2020 argvlen = pss.ps_nargvstr;
2021 break;
2022 case KERN_PROC_ENV:
2023 user_argv = (uintptr_t)pss.ps_envstr;
2024 argvlen = pss.ps_nenvstr;
2025 break;
2026 default:
2027 error = EINVAL;
2028 goto done;
2029 }
2030
2031 if (argvlen < 0) {
2032 error = EIO;
2033 goto done;
2034 }
2035
2036 #ifdef COMPAT_NETBSD32
2037 if (p->p_flag & PK_32)
2038 entry_len = sizeof(netbsd32_charp);
2039 else
2040 #endif
2041 entry_len = sizeof(char *);
2042
2043 /*
2044 * Now copy each string.
2045 */
2046 len = 0; /* bytes written to user buffer */
2047 loaded = 0; /* bytes from argv already processed */
2048 i = 0; /* To make compiler happy */
2049
2050 for (; argvlen; --argvlen) {
2051 int finished = 0;
2052 vaddr_t base;
2053 size_t xlen;
2054 int j;
2055
2056 if (loaded == 0) {
2057 size_t rem = entry_len * argvlen;
2058 loaded = MIN(rem, PAGE_SIZE);
2059 error = copyin_vmspace(vmspace,
2060 (const void *)user_argv, argv, loaded);
2061 if (error)
2062 break;
2063 user_argv += loaded;
2064 i = 0;
2065 }
2066
2067 #ifdef COMPAT_NETBSD32
2068 if (p->p_flag & PK_32) {
2069 netbsd32_charp *argv32;
2070
2071 argv32 = (netbsd32_charp *)argv;
2072 base = (vaddr_t)NETBSD32PTR64(argv32[i++]);
2073 } else
2074 #endif
2075 base = (vaddr_t)argv[i++];
2076 loaded -= entry_len;
2077
2078 /*
2079 * The program has messed around with its arguments,
2080 * possibly deleting some, and replacing them with
2081 * NULL's. Treat this as the last argument and not
2082 * a failure.
2083 */
2084 if (base == 0)
2085 break;
2086
2087 while (!finished) {
2088 xlen = PAGE_SIZE - (base & PAGE_MASK);
2089
2090 aiov.iov_base = arg;
2091 aiov.iov_len = PAGE_SIZE;
2092 auio.uio_iov = &aiov;
2093 auio.uio_iovcnt = 1;
2094 auio.uio_offset = base;
2095 auio.uio_resid = xlen;
2096 auio.uio_rw = UIO_READ;
2097 UIO_SETUP_SYSSPACE(&auio);
2098 error = uvm_io(&vmspace->vm_map, &auio);
2099 if (error)
2100 goto done;
2101
2102 /* Look for the end of the string */
2103 for (j = 0; j < xlen; j++) {
2104 if (arg[j] == '\0') {
2105 xlen = j + 1;
2106 finished = 1;
2107 break;
2108 }
2109 }
2110
2111 /* Check for user buffer overflow */
2112 if (len + xlen > *limit) {
2113 finished = 1;
2114 if (len > *limit)
2115 xlen = 0;
2116 else
2117 xlen = *limit - len;
2118 }
2119
2120 /* Copyout the page */
2121 error = (*cb)(cookie, arg, len, xlen);
2122 if (error)
2123 goto done;
2124
2125 len += xlen;
2126 base += xlen;
2127 }
2128 }
2129 *limit = len;
2130
2131 done:
2132 kmem_free(argv, PAGE_SIZE);
2133 kmem_free(arg, PAGE_SIZE);
2134 uvmspace_free(vmspace);
2135 return error;
2136 }
2137
2138 /*
2139 * Fill in an eproc structure for the specified process.
2140 */
2141 void
2142 fill_eproc(struct proc *p, struct eproc *ep, bool zombie)
2143 {
2144 struct tty *tp;
2145 struct lwp *l;
2146
2147 KASSERT(mutex_owned(proc_lock));
2148 KASSERT(mutex_owned(p->p_lock));
2149
2150 memset(ep, 0, sizeof(*ep));
2151
2152 ep->e_paddr = p;
2153 ep->e_sess = p->p_session;
2154 if (p->p_cred) {
2155 kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2156 kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2157 }
2158 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2159 struct vmspace *vm = p->p_vmspace;
2160
2161 ep->e_vm.vm_rssize = vm_resident_count(vm);
2162 ep->e_vm.vm_tsize = vm->vm_tsize;
2163 ep->e_vm.vm_dsize = vm->vm_dsize;
2164 ep->e_vm.vm_ssize = vm->vm_ssize;
2165 ep->e_vm.vm_map.size = vm->vm_map.size;
2166
2167 /* Pick the primary (first) LWP */
2168 l = proc_active_lwp(p);
2169 KASSERT(l != NULL);
2170 lwp_lock(l);
2171 if (l->l_wchan)
2172 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2173 lwp_unlock(l);
2174 }
2175 if (p->p_pptr)
2176 ep->e_ppid = p->p_pptr->p_pid;
2177 if (p->p_pgrp && p->p_session) {
2178 ep->e_pgid = p->p_pgrp->pg_id;
2179 ep->e_jobc = p->p_pgrp->pg_jobc;
2180 ep->e_sid = p->p_session->s_sid;
2181 if ((p->p_lflag & PL_CONTROLT) &&
2182 (tp = ep->e_sess->s_ttyp)) {
2183 ep->e_tdev = tp->t_dev;
2184 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2185 ep->e_tsess = tp->t_session;
2186 } else
2187 ep->e_tdev = (uint32_t)NODEV;
2188 ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
2189 if (SESS_LEADER(p))
2190 ep->e_flag |= EPROC_SLEADER;
2191 strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME);
2192 }
2193 ep->e_xsize = ep->e_xrssize = 0;
2194 ep->e_xccount = ep->e_xswrss = 0;
2195 }
2196
2197 /*
2198 * Fill in a kinfo_proc2 structure for the specified process.
2199 */
2200 static void
2201 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie)
2202 {
2203 struct tty *tp;
2204 struct lwp *l, *l2;
2205 struct timeval ut, st, rt;
2206 sigset_t ss1, ss2;
2207 struct rusage ru;
2208 struct vmspace *vm;
2209
2210 KASSERT(mutex_owned(proc_lock));
2211 KASSERT(mutex_owned(p->p_lock));
2212
2213 sigemptyset(&ss1);
2214 sigemptyset(&ss2);
2215 memset(ki, 0, sizeof(*ki));
2216
2217 ki->p_paddr = PTRTOUINT64(p);
2218 ki->p_fd = PTRTOUINT64(p->p_fd);
2219 ki->p_cwdi = PTRTOUINT64(p->p_cwdi);
2220 ki->p_stats = PTRTOUINT64(p->p_stats);
2221 ki->p_limit = PTRTOUINT64(p->p_limit);
2222 ki->p_vmspace = PTRTOUINT64(p->p_vmspace);
2223 ki->p_sigacts = PTRTOUINT64(p->p_sigacts);
2224 ki->p_sess = PTRTOUINT64(p->p_session);
2225 ki->p_tsess = 0; /* may be changed if controlling tty below */
2226 ki->p_ru = PTRTOUINT64(&p->p_stats->p_ru);
2227 ki->p_eflag = 0;
2228 ki->p_exitsig = p->p_exitsig;
2229 ki->p_flag = L_INMEM; /* Process never swapped out */
2230 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2231 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2232 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2233 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2234 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2235 ki->p_pid = p->p_pid;
2236 if (p->p_pptr)
2237 ki->p_ppid = p->p_pptr->p_pid;
2238 else
2239 ki->p_ppid = 0;
2240 ki->p_uid = kauth_cred_geteuid(p->p_cred);
2241 ki->p_ruid = kauth_cred_getuid(p->p_cred);
2242 ki->p_gid = kauth_cred_getegid(p->p_cred);
2243 ki->p_rgid = kauth_cred_getgid(p->p_cred);
2244 ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2245 ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2246 ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2247 kauth_cred_getgroups(p->p_cred, ki->p_groups,
2248 min(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
2249 UIO_SYSSPACE);
2250
2251 ki->p_uticks = p->p_uticks;
2252 ki->p_sticks = p->p_sticks;
2253 ki->p_iticks = p->p_iticks;
2254 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */
2255 ki->p_tracep = PTRTOUINT64(p->p_tracep);
2256 ki->p_traceflag = p->p_traceflag;
2257
2258 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2259 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2260
2261 ki->p_cpticks = 0;
2262 ki->p_pctcpu = p->p_pctcpu;
2263 ki->p_estcpu = 0;
2264 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
2265 ki->p_realstat = p->p_stat;
2266 ki->p_nice = p->p_nice;
2267 ki->p_xstat = p->p_xstat;
2268 ki->p_acflag = p->p_acflag;
2269
2270 strncpy(ki->p_comm, p->p_comm,
2271 min(sizeof(ki->p_comm), sizeof(p->p_comm)));
2272 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2273
2274 ki->p_nlwps = p->p_nlwps;
2275 ki->p_realflag = ki->p_flag;
2276
2277 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2278 vm = p->p_vmspace;
2279 ki->p_vm_rssize = vm_resident_count(vm);
2280 ki->p_vm_tsize = vm->vm_tsize;
2281 ki->p_vm_dsize = vm->vm_dsize;
2282 ki->p_vm_ssize = vm->vm_ssize;
2283 ki->p_vm_vsize = atop(vm->vm_map.size);
2284 /*
2285 * Since the stack is initially mapped mostly with
2286 * PROT_NONE and grown as needed, adjust the "mapped size"
2287 * to skip the unused stack portion.
2288 */
2289 ki->p_vm_msize =
2290 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2291
2292 /* Pick the primary (first) LWP */
2293 l = proc_active_lwp(p);
2294 KASSERT(l != NULL);
2295 lwp_lock(l);
2296 ki->p_nrlwps = p->p_nrlwps;
2297 ki->p_forw = 0;
2298 ki->p_back = 0;
2299 ki->p_addr = PTRTOUINT64(l->l_addr);
2300 ki->p_stat = l->l_stat;
2301 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2302 ki->p_swtime = l->l_swtime;
2303 ki->p_slptime = l->l_slptime;
2304 if (l->l_stat == LSONPROC)
2305 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2306 else
2307 ki->p_schedflags = 0;
2308 ki->p_priority = lwp_eprio(l);
2309 ki->p_usrpri = l->l_priority;
2310 if (l->l_wchan)
2311 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2312 ki->p_wchan = PTRTOUINT64(l->l_wchan);
2313 ki->p_cpuid = cpu_index(l->l_cpu);
2314 lwp_unlock(l);
2315 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2316 /* This is hardly correct, but... */
2317 sigplusset(&l->l_sigpend.sp_set, &ss1);
2318 sigplusset(&l->l_sigmask, &ss2);
2319 ki->p_cpticks += l->l_cpticks;
2320 ki->p_pctcpu += l->l_pctcpu;
2321 ki->p_estcpu += l->l_estcpu;
2322 }
2323 }
2324 sigplusset(&p->p_sigpend.sp_set, &ss2);
2325 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2326 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2327
2328 if (p->p_session != NULL) {
2329 ki->p_sid = p->p_session->s_sid;
2330 ki->p__pgid = p->p_pgrp->pg_id;
2331 if (p->p_session->s_ttyvp)
2332 ki->p_eflag |= EPROC_CTTY;
2333 if (SESS_LEADER(p))
2334 ki->p_eflag |= EPROC_SLEADER;
2335 strncpy(ki->p_login, p->p_session->s_login,
2336 min(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
2337 ki->p_jobc = p->p_pgrp->pg_jobc;
2338 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2339 ki->p_tdev = tp->t_dev;
2340 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2341 ki->p_tsess = PTRTOUINT64(tp->t_session);
2342 } else {
2343 ki->p_tdev = (int32_t)NODEV;
2344 }
2345 }
2346
2347 if (!P_ZOMBIE(p) && !zombie) {
2348 ki->p_uvalid = 1;
2349 ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2350 ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2351
2352 calcru(p, &ut, &st, NULL, &rt);
2353 ki->p_rtime_sec = rt.tv_sec;
2354 ki->p_rtime_usec = rt.tv_usec;
2355 ki->p_uutime_sec = ut.tv_sec;
2356 ki->p_uutime_usec = ut.tv_usec;
2357 ki->p_ustime_sec = st.tv_sec;
2358 ki->p_ustime_usec = st.tv_usec;
2359
2360 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2361 ki->p_uru_nvcsw = 0;
2362 ki->p_uru_nivcsw = 0;
2363 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
2364 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
2365 ki->p_uru_nivcsw += l2->l_nivcsw;
2366 ruadd(&ru, &l2->l_ru);
2367 }
2368 ki->p_uru_maxrss = ru.ru_maxrss;
2369 ki->p_uru_ixrss = ru.ru_ixrss;
2370 ki->p_uru_idrss = ru.ru_idrss;
2371 ki->p_uru_isrss = ru.ru_isrss;
2372 ki->p_uru_minflt = ru.ru_minflt;
2373 ki->p_uru_majflt = ru.ru_majflt;
2374 ki->p_uru_nswap = ru.ru_nswap;
2375 ki->p_uru_inblock = ru.ru_inblock;
2376 ki->p_uru_oublock = ru.ru_oublock;
2377 ki->p_uru_msgsnd = ru.ru_msgsnd;
2378 ki->p_uru_msgrcv = ru.ru_msgrcv;
2379 ki->p_uru_nsignals = ru.ru_nsignals;
2380
2381 timeradd(&p->p_stats->p_cru.ru_utime,
2382 &p->p_stats->p_cru.ru_stime, &ut);
2383 ki->p_uctime_sec = ut.tv_sec;
2384 ki->p_uctime_usec = ut.tv_usec;
2385 }
2386 }
2387