kern_proc.c revision 1.185 1 /* $NetBSD: kern_proc.c,v 1.185 2012/06/06 11:20:21 martin Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
62 */
63
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.185 2012/06/06 11:20:21 martin Exp $");
66
67 #ifdef _KERNEL_OPT
68 #include "opt_kstack.h"
69 #include "opt_maxuprc.h"
70 #include "opt_dtrace.h"
71 #include "opt_compat_netbsd32.h"
72 #endif
73
74 #include <sys/param.h>
75 #include <sys/systm.h>
76 #include <sys/kernel.h>
77 #include <sys/proc.h>
78 #include <sys/resourcevar.h>
79 #include <sys/buf.h>
80 #include <sys/acct.h>
81 #include <sys/wait.h>
82 #include <sys/file.h>
83 #include <ufs/ufs/quota.h>
84 #include <sys/uio.h>
85 #include <sys/pool.h>
86 #include <sys/pset.h>
87 #include <sys/mbuf.h>
88 #include <sys/ioctl.h>
89 #include <sys/tty.h>
90 #include <sys/signalvar.h>
91 #include <sys/ras.h>
92 #include <sys/filedesc.h>
93 #include <sys/syscall_stats.h>
94 #include <sys/kauth.h>
95 #include <sys/sleepq.h>
96 #include <sys/atomic.h>
97 #include <sys/kmem.h>
98 #include <sys/dtrace_bsd.h>
99 #include <sys/sysctl.h>
100 #include <sys/exec.h>
101 #include <sys/cpu.h>
102
103 #include <uvm/uvm_extern.h>
104
105 #ifdef COMPAT_NETBSD32
106 #include <compat/netbsd32/netbsd32.h>
107 #endif
108
109 /*
110 * Process lists.
111 */
112
113 struct proclist allproc __cacheline_aligned;
114 struct proclist zombproc __cacheline_aligned;
115
116 kmutex_t * proc_lock __cacheline_aligned;
117
118 /*
119 * pid to proc lookup is done by indexing the pid_table array.
120 * Since pid numbers are only allocated when an empty slot
121 * has been found, there is no need to search any lists ever.
122 * (an orphaned pgrp will lock the slot, a session will lock
123 * the pgrp with the same number.)
124 * If the table is too small it is reallocated with twice the
125 * previous size and the entries 'unzipped' into the two halves.
126 * A linked list of free entries is passed through the pt_proc
127 * field of 'free' items - set odd to be an invalid ptr.
128 */
129
130 struct pid_table {
131 struct proc *pt_proc;
132 struct pgrp *pt_pgrp;
133 pid_t pt_pid;
134 };
135 #if 1 /* strongly typed cast - should be a noop */
136 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
137 #else
138 #define p2u(p) ((uint)p)
139 #endif
140 #define P_VALID(p) (!(p2u(p) & 1))
141 #define P_NEXT(p) (p2u(p) >> 1)
142 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
143
144 /*
145 * Table of process IDs (PIDs).
146 */
147 static struct pid_table *pid_table __read_mostly;
148
149 #define INITIAL_PID_TABLE_SIZE (1 << 5)
150
151 /* Table mask, threshold for growing and number of allocated PIDs. */
152 static u_int pid_tbl_mask __read_mostly;
153 static u_int pid_alloc_lim __read_mostly;
154 static u_int pid_alloc_cnt __cacheline_aligned;
155
156 /* Next free, last free and maximum PIDs. */
157 static u_int next_free_pt __cacheline_aligned;
158 static u_int last_free_pt __cacheline_aligned;
159 static pid_t pid_max __read_mostly;
160
161 /* Components of the first process -- never freed. */
162
163 extern struct emul emul_netbsd; /* defined in kern_exec.c */
164
165 struct session session0 = {
166 .s_count = 1,
167 .s_sid = 0,
168 };
169 struct pgrp pgrp0 = {
170 .pg_members = LIST_HEAD_INITIALIZER(&pgrp0.pg_members),
171 .pg_session = &session0,
172 };
173 filedesc_t filedesc0;
174 struct cwdinfo cwdi0 = {
175 .cwdi_cmask = CMASK, /* see cmask below */
176 .cwdi_refcnt = 1,
177 };
178 struct plimit limit0;
179 struct pstats pstat0;
180 struct vmspace vmspace0;
181 struct sigacts sigacts0;
182 struct proc proc0 = {
183 .p_lwps = LIST_HEAD_INITIALIZER(&proc0.p_lwps),
184 .p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
185 .p_nlwps = 1,
186 .p_nrlwps = 1,
187 .p_nlwpid = 1, /* must match lwp0.l_lid */
188 .p_pgrp = &pgrp0,
189 .p_comm = "system",
190 /*
191 * Set P_NOCLDWAIT so that kernel threads are reparented to init(8)
192 * when they exit. init(8) can easily wait them out for us.
193 */
194 .p_flag = PK_SYSTEM | PK_NOCLDWAIT,
195 .p_stat = SACTIVE,
196 .p_nice = NZERO,
197 .p_emul = &emul_netbsd,
198 .p_cwdi = &cwdi0,
199 .p_limit = &limit0,
200 .p_fd = &filedesc0,
201 .p_vmspace = &vmspace0,
202 .p_stats = &pstat0,
203 .p_sigacts = &sigacts0,
204 };
205 kauth_cred_t cred0;
206
207 static const int nofile = NOFILE;
208 static const int maxuprc = MAXUPRC;
209 static const int cmask = CMASK;
210
211 static int sysctl_doeproc(SYSCTLFN_PROTO);
212 static int sysctl_kern_proc_args(SYSCTLFN_PROTO);
213 static void fill_kproc2(struct proc *, struct kinfo_proc2 *, bool);
214
215 /*
216 * The process list descriptors, used during pid allocation and
217 * by sysctl. No locking on this data structure is needed since
218 * it is completely static.
219 */
220 const struct proclist_desc proclists[] = {
221 { &allproc },
222 { &zombproc },
223 { NULL },
224 };
225
226 static struct pgrp * pg_remove(pid_t);
227 static void pg_delete(pid_t);
228 static void orphanpg(struct pgrp *);
229
230 static specificdata_domain_t proc_specificdata_domain;
231
232 static pool_cache_t proc_cache;
233
234 static kauth_listener_t proc_listener;
235
236 static int
237 proc_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie,
238 void *arg0, void *arg1, void *arg2, void *arg3)
239 {
240 struct proc *p;
241 int result;
242
243 result = KAUTH_RESULT_DEFER;
244 p = arg0;
245
246 switch (action) {
247 case KAUTH_PROCESS_CANSEE: {
248 enum kauth_process_req req;
249
250 req = (enum kauth_process_req)arg1;
251
252 switch (req) {
253 case KAUTH_REQ_PROCESS_CANSEE_ARGS:
254 case KAUTH_REQ_PROCESS_CANSEE_ENTRY:
255 case KAUTH_REQ_PROCESS_CANSEE_OPENFILES:
256 result = KAUTH_RESULT_ALLOW;
257
258 break;
259
260 case KAUTH_REQ_PROCESS_CANSEE_ENV:
261 if (kauth_cred_getuid(cred) !=
262 kauth_cred_getuid(p->p_cred) ||
263 kauth_cred_getuid(cred) !=
264 kauth_cred_getsvuid(p->p_cred))
265 break;
266
267 result = KAUTH_RESULT_ALLOW;
268
269 break;
270
271 default:
272 break;
273 }
274
275 break;
276 }
277
278 case KAUTH_PROCESS_FORK: {
279 int lnprocs = (int)(unsigned long)arg2;
280
281 /*
282 * Don't allow a nonprivileged user to use the last few
283 * processes. The variable lnprocs is the current number of
284 * processes, maxproc is the limit.
285 */
286 if (__predict_false((lnprocs >= maxproc - 5)))
287 break;
288
289 result = KAUTH_RESULT_ALLOW;
290
291 break;
292 }
293
294 case KAUTH_PROCESS_CORENAME:
295 case KAUTH_PROCESS_STOPFLAG:
296 if (proc_uidmatch(cred, p->p_cred) == 0)
297 result = KAUTH_RESULT_ALLOW;
298
299 break;
300
301 default:
302 break;
303 }
304
305 return result;
306 }
307
308 /*
309 * Initialize global process hashing structures.
310 */
311 void
312 procinit(void)
313 {
314 const struct proclist_desc *pd;
315 u_int i;
316 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
317
318 for (pd = proclists; pd->pd_list != NULL; pd++)
319 LIST_INIT(pd->pd_list);
320
321 proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
322 pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
323 * sizeof(struct pid_table), KM_SLEEP);
324 pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
325 pid_max = PID_MAX;
326
327 /* Set free list running through table...
328 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
329 for (i = 0; i <= pid_tbl_mask; i++) {
330 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
331 pid_table[i].pt_pgrp = 0;
332 pid_table[i].pt_pid = 0;
333 }
334 /* slot 0 is just grabbed */
335 next_free_pt = 1;
336 /* Need to fix last entry. */
337 last_free_pt = pid_tbl_mask;
338 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
339 /* point at which we grow table - to avoid reusing pids too often */
340 pid_alloc_lim = pid_tbl_mask - 1;
341 #undef LINK_EMPTY
342
343 proc_specificdata_domain = specificdata_domain_create();
344 KASSERT(proc_specificdata_domain != NULL);
345
346 proc_cache = pool_cache_init(sizeof(struct proc), 0, 0, 0,
347 "procpl", NULL, IPL_NONE, NULL, NULL, NULL);
348
349 proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
350 proc_listener_cb, NULL);
351 }
352
353 void
354 procinit_sysctl(void)
355 {
356 static struct sysctllog *clog;
357
358 sysctl_createv(&clog, 0, NULL, NULL,
359 CTLFLAG_PERMANENT,
360 CTLTYPE_NODE, "kern", NULL,
361 NULL, 0, NULL, 0,
362 CTL_KERN, CTL_EOL);
363
364 sysctl_createv(&clog, 0, NULL, NULL,
365 CTLFLAG_PERMANENT,
366 CTLTYPE_NODE, "proc",
367 SYSCTL_DESCR("System-wide process information"),
368 sysctl_doeproc, 0, NULL, 0,
369 CTL_KERN, KERN_PROC, CTL_EOL);
370 sysctl_createv(&clog, 0, NULL, NULL,
371 CTLFLAG_PERMANENT,
372 CTLTYPE_NODE, "proc2",
373 SYSCTL_DESCR("Machine-independent process information"),
374 sysctl_doeproc, 0, NULL, 0,
375 CTL_KERN, KERN_PROC2, CTL_EOL);
376 sysctl_createv(&clog, 0, NULL, NULL,
377 CTLFLAG_PERMANENT,
378 CTLTYPE_NODE, "proc_args",
379 SYSCTL_DESCR("Process argument information"),
380 sysctl_kern_proc_args, 0, NULL, 0,
381 CTL_KERN, KERN_PROC_ARGS, CTL_EOL);
382
383 /*
384 "nodes" under these:
385
386 KERN_PROC_ALL
387 KERN_PROC_PID pid
388 KERN_PROC_PGRP pgrp
389 KERN_PROC_SESSION sess
390 KERN_PROC_TTY tty
391 KERN_PROC_UID uid
392 KERN_PROC_RUID uid
393 KERN_PROC_GID gid
394 KERN_PROC_RGID gid
395
396 all in all, probably not worth the effort...
397 */
398 }
399
400 /*
401 * Initialize process 0.
402 */
403 void
404 proc0_init(void)
405 {
406 struct proc *p;
407 struct pgrp *pg;
408 struct rlimit *rlim;
409 rlim_t lim;
410 int i;
411
412 p = &proc0;
413 pg = &pgrp0;
414
415 mutex_init(&p->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
416 mutex_init(&p->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
417 p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
418
419 rw_init(&p->p_reflock);
420 cv_init(&p->p_waitcv, "wait");
421 cv_init(&p->p_lwpcv, "lwpwait");
422
423 LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
424
425 pid_table[0].pt_proc = p;
426 LIST_INSERT_HEAD(&allproc, p, p_list);
427
428 pid_table[0].pt_pgrp = pg;
429 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
430
431 #ifdef __HAVE_SYSCALL_INTERN
432 (*p->p_emul->e_syscall_intern)(p);
433 #endif
434
435 /* Create credentials. */
436 cred0 = kauth_cred_alloc();
437 p->p_cred = cred0;
438
439 /* Create the CWD info. */
440 rw_init(&cwdi0.cwdi_lock);
441
442 /* Create the limits structures. */
443 mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
444
445 rlim = limit0.pl_rlimit;
446 for (i = 0; i < __arraycount(limit0.pl_rlimit); i++) {
447 rlim[i].rlim_cur = RLIM_INFINITY;
448 rlim[i].rlim_max = RLIM_INFINITY;
449 }
450
451 rlim[RLIMIT_NOFILE].rlim_max = maxfiles;
452 rlim[RLIMIT_NOFILE].rlim_cur = maxfiles < nofile ? maxfiles : nofile;
453
454 rlim[RLIMIT_NPROC].rlim_max = maxproc;
455 rlim[RLIMIT_NPROC].rlim_cur = maxproc < maxuprc ? maxproc : maxuprc;
456
457 lim = MIN(VM_MAXUSER_ADDRESS, ctob((rlim_t)uvmexp.free));
458 rlim[RLIMIT_RSS].rlim_max = lim;
459 rlim[RLIMIT_MEMLOCK].rlim_max = lim;
460 rlim[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
461
462 /* Note that default core name has zero length. */
463 limit0.pl_corename = defcorename;
464 limit0.pl_cnlen = 0;
465 limit0.pl_refcnt = 1;
466 limit0.pl_writeable = false;
467 limit0.pl_sv_limit = NULL;
468
469 /* Configure virtual memory system, set vm rlimits. */
470 uvm_init_limits(p);
471
472 /* Initialize file descriptor table for proc0. */
473 fd_init(&filedesc0);
474
475 /*
476 * Initialize proc0's vmspace, which uses the kernel pmap.
477 * All kernel processes (which never have user space mappings)
478 * share proc0's vmspace, and thus, the kernel pmap.
479 */
480 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
481 trunc_page(VM_MAX_ADDRESS));
482
483 /* Initialize signal state for proc0. XXX IPL_SCHED */
484 mutex_init(&p->p_sigacts->sa_mutex, MUTEX_DEFAULT, IPL_SCHED);
485 siginit(p);
486
487 proc_initspecific(p);
488 kdtrace_proc_ctor(NULL, p);
489 }
490
491 /*
492 * Session reference counting.
493 */
494
495 void
496 proc_sesshold(struct session *ss)
497 {
498
499 KASSERT(mutex_owned(proc_lock));
500 ss->s_count++;
501 }
502
503 void
504 proc_sessrele(struct session *ss)
505 {
506
507 KASSERT(mutex_owned(proc_lock));
508 /*
509 * We keep the pgrp with the same id as the session in order to
510 * stop a process being given the same pid. Since the pgrp holds
511 * a reference to the session, it must be a 'zombie' pgrp by now.
512 */
513 if (--ss->s_count == 0) {
514 struct pgrp *pg;
515
516 pg = pg_remove(ss->s_sid);
517 mutex_exit(proc_lock);
518
519 kmem_free(pg, sizeof(struct pgrp));
520 kmem_free(ss, sizeof(struct session));
521 } else {
522 mutex_exit(proc_lock);
523 }
524 }
525
526 /*
527 * Check that the specified process group is in the session of the
528 * specified process.
529 * Treats -ve ids as process ids.
530 * Used to validate TIOCSPGRP requests.
531 */
532 int
533 pgid_in_session(struct proc *p, pid_t pg_id)
534 {
535 struct pgrp *pgrp;
536 struct session *session;
537 int error;
538
539 mutex_enter(proc_lock);
540 if (pg_id < 0) {
541 struct proc *p1 = proc_find(-pg_id);
542 if (p1 == NULL) {
543 error = EINVAL;
544 goto fail;
545 }
546 pgrp = p1->p_pgrp;
547 } else {
548 pgrp = pgrp_find(pg_id);
549 if (pgrp == NULL) {
550 error = EINVAL;
551 goto fail;
552 }
553 }
554 session = pgrp->pg_session;
555 error = (session != p->p_pgrp->pg_session) ? EPERM : 0;
556 fail:
557 mutex_exit(proc_lock);
558 return error;
559 }
560
561 /*
562 * p_inferior: is p an inferior of q?
563 */
564 static inline bool
565 p_inferior(struct proc *p, struct proc *q)
566 {
567
568 KASSERT(mutex_owned(proc_lock));
569
570 for (; p != q; p = p->p_pptr)
571 if (p->p_pid == 0)
572 return false;
573 return true;
574 }
575
576 /*
577 * proc_find: locate a process by the ID.
578 *
579 * => Must be called with proc_lock held.
580 */
581 proc_t *
582 proc_find_raw(pid_t pid)
583 {
584 struct pid_table *pt;
585 proc_t *p;
586
587 KASSERT(mutex_owned(proc_lock));
588 pt = &pid_table[pid & pid_tbl_mask];
589 p = pt->pt_proc;
590 if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) {
591 return NULL;
592 }
593 return p;
594 }
595
596 proc_t *
597 proc_find(pid_t pid)
598 {
599 proc_t *p;
600
601 p = proc_find_raw(pid);
602 if (__predict_false(p == NULL)) {
603 return NULL;
604 }
605
606 /*
607 * Only allow live processes to be found by PID.
608 * XXX: p_stat might change, since unlocked.
609 */
610 if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
611 return p;
612 }
613 return NULL;
614 }
615
616 /*
617 * pgrp_find: locate a process group by the ID.
618 *
619 * => Must be called with proc_lock held.
620 */
621 struct pgrp *
622 pgrp_find(pid_t pgid)
623 {
624 struct pgrp *pg;
625
626 KASSERT(mutex_owned(proc_lock));
627
628 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
629
630 /*
631 * Cannot look up a process group that only exists because the
632 * session has not died yet (traditional).
633 */
634 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
635 return NULL;
636 }
637 return pg;
638 }
639
640 static void
641 expand_pid_table(void)
642 {
643 size_t pt_size, tsz;
644 struct pid_table *n_pt, *new_pt;
645 struct proc *proc;
646 struct pgrp *pgrp;
647 pid_t pid, rpid;
648 u_int i;
649 uint new_pt_mask;
650
651 pt_size = pid_tbl_mask + 1;
652 tsz = pt_size * 2 * sizeof(struct pid_table);
653 new_pt = kmem_alloc(tsz, KM_SLEEP);
654 new_pt_mask = pt_size * 2 - 1;
655
656 mutex_enter(proc_lock);
657 if (pt_size != pid_tbl_mask + 1) {
658 /* Another process beat us to it... */
659 mutex_exit(proc_lock);
660 kmem_free(new_pt, tsz);
661 return;
662 }
663
664 /*
665 * Copy entries from old table into new one.
666 * If 'pid' is 'odd' we need to place in the upper half,
667 * even pid's to the lower half.
668 * Free items stay in the low half so we don't have to
669 * fixup the reference to them.
670 * We stuff free items on the front of the freelist
671 * because we can't write to unmodified entries.
672 * Processing the table backwards maintains a semblance
673 * of issuing pid numbers that increase with time.
674 */
675 i = pt_size - 1;
676 n_pt = new_pt + i;
677 for (; ; i--, n_pt--) {
678 proc = pid_table[i].pt_proc;
679 pgrp = pid_table[i].pt_pgrp;
680 if (!P_VALID(proc)) {
681 /* Up 'use count' so that link is valid */
682 pid = (P_NEXT(proc) + pt_size) & ~pt_size;
683 rpid = 0;
684 proc = P_FREE(pid);
685 if (pgrp)
686 pid = pgrp->pg_id;
687 } else {
688 pid = pid_table[i].pt_pid;
689 rpid = pid;
690 }
691
692 /* Save entry in appropriate half of table */
693 n_pt[pid & pt_size].pt_proc = proc;
694 n_pt[pid & pt_size].pt_pgrp = pgrp;
695 n_pt[pid & pt_size].pt_pid = rpid;
696
697 /* Put other piece on start of free list */
698 pid = (pid ^ pt_size) & ~pid_tbl_mask;
699 n_pt[pid & pt_size].pt_proc =
700 P_FREE((pid & ~pt_size) | next_free_pt);
701 n_pt[pid & pt_size].pt_pgrp = 0;
702 n_pt[pid & pt_size].pt_pid = 0;
703
704 next_free_pt = i | (pid & pt_size);
705 if (i == 0)
706 break;
707 }
708
709 /* Save old table size and switch tables */
710 tsz = pt_size * sizeof(struct pid_table);
711 n_pt = pid_table;
712 pid_table = new_pt;
713 pid_tbl_mask = new_pt_mask;
714
715 /*
716 * pid_max starts as PID_MAX (= 30000), once we have 16384
717 * allocated pids we need it to be larger!
718 */
719 if (pid_tbl_mask > PID_MAX) {
720 pid_max = pid_tbl_mask * 2 + 1;
721 pid_alloc_lim |= pid_alloc_lim << 1;
722 } else
723 pid_alloc_lim <<= 1; /* doubles number of free slots... */
724
725 mutex_exit(proc_lock);
726 kmem_free(n_pt, tsz);
727 }
728
729 struct proc *
730 proc_alloc(void)
731 {
732 struct proc *p;
733
734 p = pool_cache_get(proc_cache, PR_WAITOK);
735 p->p_stat = SIDL; /* protect against others */
736 proc_initspecific(p);
737 kdtrace_proc_ctor(NULL, p);
738 p->p_pid = -1;
739 proc_alloc_pid(p);
740 return p;
741 }
742
743 /*
744 * proc_alloc_pid: allocate PID and record the given proc 'p' so that
745 * proc_find_raw() can find it by the PID.
746 */
747
748 pid_t
749 proc_alloc_pid(struct proc *p)
750 {
751 struct pid_table *pt;
752 pid_t pid;
753 int nxt;
754
755 for (;;expand_pid_table()) {
756 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
757 /* ensure pids cycle through 2000+ values */
758 continue;
759 mutex_enter(proc_lock);
760 pt = &pid_table[next_free_pt];
761 #ifdef DIAGNOSTIC
762 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
763 panic("proc_alloc: slot busy");
764 #endif
765 nxt = P_NEXT(pt->pt_proc);
766 if (nxt & pid_tbl_mask)
767 break;
768 /* Table full - expand (NB last entry not used....) */
769 mutex_exit(proc_lock);
770 }
771
772 /* pid is 'saved use count' + 'size' + entry */
773 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
774 if ((uint)pid > (uint)pid_max)
775 pid &= pid_tbl_mask;
776 next_free_pt = nxt & pid_tbl_mask;
777
778 /* Grab table slot */
779 pt->pt_proc = p;
780
781 KASSERT(pt->pt_pid == 0);
782 pt->pt_pid = pid;
783 if (p->p_pid == -1) {
784 p->p_pid = pid;
785 }
786 pid_alloc_cnt++;
787 mutex_exit(proc_lock);
788
789 return pid;
790 }
791
792 /*
793 * Free a process id - called from proc_free (in kern_exit.c)
794 *
795 * Called with the proc_lock held.
796 */
797 void
798 proc_free_pid(pid_t pid)
799 {
800 struct pid_table *pt;
801
802 KASSERT(mutex_owned(proc_lock));
803
804 pt = &pid_table[pid & pid_tbl_mask];
805
806 /* save pid use count in slot */
807 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
808 KASSERT(pt->pt_pid == pid);
809 pt->pt_pid = 0;
810
811 if (pt->pt_pgrp == NULL) {
812 /* link last freed entry onto ours */
813 pid &= pid_tbl_mask;
814 pt = &pid_table[last_free_pt];
815 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
816 pt->pt_pid = 0;
817 last_free_pt = pid;
818 pid_alloc_cnt--;
819 }
820
821 atomic_dec_uint(&nprocs);
822 }
823
824 void
825 proc_free_mem(struct proc *p)
826 {
827
828 kdtrace_proc_dtor(NULL, p);
829 pool_cache_put(proc_cache, p);
830 }
831
832 /*
833 * proc_enterpgrp: move p to a new or existing process group (and session).
834 *
835 * If we are creating a new pgrp, the pgid should equal
836 * the calling process' pid.
837 * If is only valid to enter a process group that is in the session
838 * of the process.
839 * Also mksess should only be set if we are creating a process group
840 *
841 * Only called from sys_setsid, sys_setpgid and posix_spawn/spawn_return.
842 */
843 int
844 proc_enterpgrp(struct proc *curp, pid_t pid, pid_t pgid, bool mksess)
845 {
846 struct pgrp *new_pgrp, *pgrp;
847 struct session *sess;
848 struct proc *p;
849 int rval;
850 pid_t pg_id = NO_PGID;
851
852 sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
853
854 /* Allocate data areas we might need before doing any validity checks */
855 mutex_enter(proc_lock); /* Because pid_table might change */
856 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
857 mutex_exit(proc_lock);
858 new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
859 mutex_enter(proc_lock);
860 } else
861 new_pgrp = NULL;
862 rval = EPERM; /* most common error (to save typing) */
863
864 /* Check pgrp exists or can be created */
865 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
866 if (pgrp != NULL && pgrp->pg_id != pgid)
867 goto done;
868
869 /* Can only set another process under restricted circumstances. */
870 if (pid != curp->p_pid) {
871 /* Must exist and be one of our children... */
872 p = proc_find(pid);
873 if (p == NULL || !p_inferior(p, curp)) {
874 rval = ESRCH;
875 goto done;
876 }
877 /* ... in the same session... */
878 if (sess != NULL || p->p_session != curp->p_session)
879 goto done;
880 /* ... existing pgid must be in same session ... */
881 if (pgrp != NULL && pgrp->pg_session != p->p_session)
882 goto done;
883 /* ... and not done an exec. */
884 if (p->p_flag & PK_EXEC) {
885 rval = EACCES;
886 goto done;
887 }
888 } else {
889 /* ... setsid() cannot re-enter a pgrp */
890 if (mksess && (curp->p_pgid == curp->p_pid ||
891 pgrp_find(curp->p_pid)))
892 goto done;
893 p = curp;
894 }
895
896 /* Changing the process group/session of a session
897 leader is definitely off limits. */
898 if (SESS_LEADER(p)) {
899 if (sess == NULL && p->p_pgrp == pgrp)
900 /* unless it's a definite noop */
901 rval = 0;
902 goto done;
903 }
904
905 /* Can only create a process group with id of process */
906 if (pgrp == NULL && pgid != pid)
907 goto done;
908
909 /* Can only create a session if creating pgrp */
910 if (sess != NULL && pgrp != NULL)
911 goto done;
912
913 /* Check we allocated memory for a pgrp... */
914 if (pgrp == NULL && new_pgrp == NULL)
915 goto done;
916
917 /* Don't attach to 'zombie' pgrp */
918 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
919 goto done;
920
921 /* Expect to succeed now */
922 rval = 0;
923
924 if (pgrp == p->p_pgrp)
925 /* nothing to do */
926 goto done;
927
928 /* Ok all setup, link up required structures */
929
930 if (pgrp == NULL) {
931 pgrp = new_pgrp;
932 new_pgrp = NULL;
933 if (sess != NULL) {
934 sess->s_sid = p->p_pid;
935 sess->s_leader = p;
936 sess->s_count = 1;
937 sess->s_ttyvp = NULL;
938 sess->s_ttyp = NULL;
939 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
940 memcpy(sess->s_login, p->p_session->s_login,
941 sizeof(sess->s_login));
942 p->p_lflag &= ~PL_CONTROLT;
943 } else {
944 sess = p->p_pgrp->pg_session;
945 proc_sesshold(sess);
946 }
947 pgrp->pg_session = sess;
948 sess = NULL;
949
950 pgrp->pg_id = pgid;
951 LIST_INIT(&pgrp->pg_members);
952 #ifdef DIAGNOSTIC
953 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
954 panic("enterpgrp: pgrp table slot in use");
955 if (__predict_false(mksess && p != curp))
956 panic("enterpgrp: mksession and p != curproc");
957 #endif
958 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
959 pgrp->pg_jobc = 0;
960 }
961
962 /*
963 * Adjust eligibility of affected pgrps to participate in job control.
964 * Increment eligibility counts before decrementing, otherwise we
965 * could reach 0 spuriously during the first call.
966 */
967 fixjobc(p, pgrp, 1);
968 fixjobc(p, p->p_pgrp, 0);
969
970 /* Interlock with ttread(). */
971 mutex_spin_enter(&tty_lock);
972
973 /* Move process to requested group. */
974 LIST_REMOVE(p, p_pglist);
975 if (LIST_EMPTY(&p->p_pgrp->pg_members))
976 /* defer delete until we've dumped the lock */
977 pg_id = p->p_pgrp->pg_id;
978 p->p_pgrp = pgrp;
979 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
980
981 /* Done with the swap; we can release the tty mutex. */
982 mutex_spin_exit(&tty_lock);
983
984 done:
985 if (pg_id != NO_PGID) {
986 /* Releases proc_lock. */
987 pg_delete(pg_id);
988 } else {
989 mutex_exit(proc_lock);
990 }
991 if (sess != NULL)
992 kmem_free(sess, sizeof(*sess));
993 if (new_pgrp != NULL)
994 kmem_free(new_pgrp, sizeof(*new_pgrp));
995 #ifdef DEBUG_PGRP
996 if (__predict_false(rval))
997 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
998 pid, pgid, mksess, curp->p_pid, rval);
999 #endif
1000 return rval;
1001 }
1002
1003 /*
1004 * proc_leavepgrp: remove a process from its process group.
1005 * => must be called with the proc_lock held, which will be released;
1006 */
1007 void
1008 proc_leavepgrp(struct proc *p)
1009 {
1010 struct pgrp *pgrp;
1011
1012 KASSERT(mutex_owned(proc_lock));
1013
1014 /* Interlock with ttread() */
1015 mutex_spin_enter(&tty_lock);
1016 pgrp = p->p_pgrp;
1017 LIST_REMOVE(p, p_pglist);
1018 p->p_pgrp = NULL;
1019 mutex_spin_exit(&tty_lock);
1020
1021 if (LIST_EMPTY(&pgrp->pg_members)) {
1022 /* Releases proc_lock. */
1023 pg_delete(pgrp->pg_id);
1024 } else {
1025 mutex_exit(proc_lock);
1026 }
1027 }
1028
1029 /*
1030 * pg_remove: remove a process group from the table.
1031 * => must be called with the proc_lock held;
1032 * => returns process group to free;
1033 */
1034 static struct pgrp *
1035 pg_remove(pid_t pg_id)
1036 {
1037 struct pgrp *pgrp;
1038 struct pid_table *pt;
1039
1040 KASSERT(mutex_owned(proc_lock));
1041
1042 pt = &pid_table[pg_id & pid_tbl_mask];
1043 pgrp = pt->pt_pgrp;
1044
1045 KASSERT(pgrp != NULL);
1046 KASSERT(pgrp->pg_id == pg_id);
1047 KASSERT(LIST_EMPTY(&pgrp->pg_members));
1048
1049 pt->pt_pgrp = NULL;
1050
1051 if (!P_VALID(pt->pt_proc)) {
1052 /* Orphaned pgrp, put slot onto free list. */
1053 KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
1054 pg_id &= pid_tbl_mask;
1055 pt = &pid_table[last_free_pt];
1056 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
1057 KASSERT(pt->pt_pid == 0);
1058 last_free_pt = pg_id;
1059 pid_alloc_cnt--;
1060 }
1061 return pgrp;
1062 }
1063
1064 /*
1065 * pg_delete: delete and free a process group.
1066 * => must be called with the proc_lock held, which will be released.
1067 */
1068 static void
1069 pg_delete(pid_t pg_id)
1070 {
1071 struct pgrp *pg;
1072 struct tty *ttyp;
1073 struct session *ss;
1074
1075 KASSERT(mutex_owned(proc_lock));
1076
1077 pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
1078 if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
1079 mutex_exit(proc_lock);
1080 return;
1081 }
1082
1083 ss = pg->pg_session;
1084
1085 /* Remove reference (if any) from tty to this process group */
1086 mutex_spin_enter(&tty_lock);
1087 ttyp = ss->s_ttyp;
1088 if (ttyp != NULL && ttyp->t_pgrp == pg) {
1089 ttyp->t_pgrp = NULL;
1090 KASSERT(ttyp->t_session == ss);
1091 }
1092 mutex_spin_exit(&tty_lock);
1093
1094 /*
1095 * The leading process group in a session is freed by proc_sessrele(),
1096 * if last reference. Note: proc_sessrele() releases proc_lock.
1097 */
1098 pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
1099 proc_sessrele(ss);
1100
1101 if (pg != NULL) {
1102 /* Free it, if was not done by proc_sessrele(). */
1103 kmem_free(pg, sizeof(struct pgrp));
1104 }
1105 }
1106
1107 /*
1108 * Adjust pgrp jobc counters when specified process changes process group.
1109 * We count the number of processes in each process group that "qualify"
1110 * the group for terminal job control (those with a parent in a different
1111 * process group of the same session). If that count reaches zero, the
1112 * process group becomes orphaned. Check both the specified process'
1113 * process group and that of its children.
1114 * entering == 0 => p is leaving specified group.
1115 * entering == 1 => p is entering specified group.
1116 *
1117 * Call with proc_lock held.
1118 */
1119 void
1120 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
1121 {
1122 struct pgrp *hispgrp;
1123 struct session *mysession = pgrp->pg_session;
1124 struct proc *child;
1125
1126 KASSERT(mutex_owned(proc_lock));
1127
1128 /*
1129 * Check p's parent to see whether p qualifies its own process
1130 * group; if so, adjust count for p's process group.
1131 */
1132 hispgrp = p->p_pptr->p_pgrp;
1133 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1134 if (entering) {
1135 pgrp->pg_jobc++;
1136 p->p_lflag &= ~PL_ORPHANPG;
1137 } else if (--pgrp->pg_jobc == 0)
1138 orphanpg(pgrp);
1139 }
1140
1141 /*
1142 * Check this process' children to see whether they qualify
1143 * their process groups; if so, adjust counts for children's
1144 * process groups.
1145 */
1146 LIST_FOREACH(child, &p->p_children, p_sibling) {
1147 hispgrp = child->p_pgrp;
1148 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1149 !P_ZOMBIE(child)) {
1150 if (entering) {
1151 child->p_lflag &= ~PL_ORPHANPG;
1152 hispgrp->pg_jobc++;
1153 } else if (--hispgrp->pg_jobc == 0)
1154 orphanpg(hispgrp);
1155 }
1156 }
1157 }
1158
1159 /*
1160 * A process group has become orphaned;
1161 * if there are any stopped processes in the group,
1162 * hang-up all process in that group.
1163 *
1164 * Call with proc_lock held.
1165 */
1166 static void
1167 orphanpg(struct pgrp *pg)
1168 {
1169 struct proc *p;
1170
1171 KASSERT(mutex_owned(proc_lock));
1172
1173 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1174 if (p->p_stat == SSTOP) {
1175 p->p_lflag |= PL_ORPHANPG;
1176 psignal(p, SIGHUP);
1177 psignal(p, SIGCONT);
1178 }
1179 }
1180 }
1181
1182 #ifdef DDB
1183 #include <ddb/db_output.h>
1184 void pidtbl_dump(void);
1185 void
1186 pidtbl_dump(void)
1187 {
1188 struct pid_table *pt;
1189 struct proc *p;
1190 struct pgrp *pgrp;
1191 int id;
1192
1193 db_printf("pid table %p size %x, next %x, last %x\n",
1194 pid_table, pid_tbl_mask+1,
1195 next_free_pt, last_free_pt);
1196 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1197 p = pt->pt_proc;
1198 if (!P_VALID(p) && !pt->pt_pgrp)
1199 continue;
1200 db_printf(" id %x: ", id);
1201 if (P_VALID(p))
1202 db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
1203 pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
1204 else
1205 db_printf("next %x use %x\n",
1206 P_NEXT(p) & pid_tbl_mask,
1207 P_NEXT(p) & ~pid_tbl_mask);
1208 if ((pgrp = pt->pt_pgrp)) {
1209 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1210 pgrp->pg_session, pgrp->pg_session->s_sid,
1211 pgrp->pg_session->s_count,
1212 pgrp->pg_session->s_login);
1213 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1214 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1215 LIST_FIRST(&pgrp->pg_members));
1216 LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1217 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1218 p->p_pid, p, p->p_pgrp, p->p_comm);
1219 }
1220 }
1221 }
1222 }
1223 #endif /* DDB */
1224
1225 #ifdef KSTACK_CHECK_MAGIC
1226
1227 #define KSTACK_MAGIC 0xdeadbeaf
1228
1229 /* XXX should be per process basis? */
1230 static int kstackleftmin = KSTACK_SIZE;
1231 static int kstackleftthres = KSTACK_SIZE / 8;
1232
1233 void
1234 kstack_setup_magic(const struct lwp *l)
1235 {
1236 uint32_t *ip;
1237 uint32_t const *end;
1238
1239 KASSERT(l != NULL);
1240 KASSERT(l != &lwp0);
1241
1242 /*
1243 * fill all the stack with magic number
1244 * so that later modification on it can be detected.
1245 */
1246 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1247 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1248 for (; ip < end; ip++) {
1249 *ip = KSTACK_MAGIC;
1250 }
1251 }
1252
1253 void
1254 kstack_check_magic(const struct lwp *l)
1255 {
1256 uint32_t const *ip, *end;
1257 int stackleft;
1258
1259 KASSERT(l != NULL);
1260
1261 /* don't check proc0 */ /*XXX*/
1262 if (l == &lwp0)
1263 return;
1264
1265 #ifdef __MACHINE_STACK_GROWS_UP
1266 /* stack grows upwards (eg. hppa) */
1267 ip = (uint32_t *)((void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1268 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1269 for (ip--; ip >= end; ip--)
1270 if (*ip != KSTACK_MAGIC)
1271 break;
1272
1273 stackleft = (void *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (void *)ip;
1274 #else /* __MACHINE_STACK_GROWS_UP */
1275 /* stack grows downwards (eg. i386) */
1276 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1277 end = (uint32_t *)((char *)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1278 for (; ip < end; ip++)
1279 if (*ip != KSTACK_MAGIC)
1280 break;
1281
1282 stackleft = ((const char *)ip) - (const char *)KSTACK_LOWEST_ADDR(l);
1283 #endif /* __MACHINE_STACK_GROWS_UP */
1284
1285 if (kstackleftmin > stackleft) {
1286 kstackleftmin = stackleft;
1287 if (stackleft < kstackleftthres)
1288 printf("warning: kernel stack left %d bytes"
1289 "(pid %u:lid %u)\n", stackleft,
1290 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1291 }
1292
1293 if (stackleft <= 0) {
1294 panic("magic on the top of kernel stack changed for "
1295 "pid %u, lid %u: maybe kernel stack overflow",
1296 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1297 }
1298 }
1299 #endif /* KSTACK_CHECK_MAGIC */
1300
1301 int
1302 proclist_foreach_call(struct proclist *list,
1303 int (*callback)(struct proc *, void *arg), void *arg)
1304 {
1305 struct proc marker;
1306 struct proc *p;
1307 int ret = 0;
1308
1309 marker.p_flag = PK_MARKER;
1310 mutex_enter(proc_lock);
1311 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1312 if (p->p_flag & PK_MARKER) {
1313 p = LIST_NEXT(p, p_list);
1314 continue;
1315 }
1316 LIST_INSERT_AFTER(p, &marker, p_list);
1317 ret = (*callback)(p, arg);
1318 KASSERT(mutex_owned(proc_lock));
1319 p = LIST_NEXT(&marker, p_list);
1320 LIST_REMOVE(&marker, p_list);
1321 }
1322 mutex_exit(proc_lock);
1323
1324 return ret;
1325 }
1326
1327 int
1328 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1329 {
1330
1331 /* XXXCDC: how should locking work here? */
1332
1333 /* curproc exception is for coredump. */
1334
1335 if ((p != curproc && (p->p_sflag & PS_WEXIT) != 0) ||
1336 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1337 return EFAULT;
1338 }
1339
1340 uvmspace_addref(p->p_vmspace);
1341 *vm = p->p_vmspace;
1342
1343 return 0;
1344 }
1345
1346 /*
1347 * Acquire a write lock on the process credential.
1348 */
1349 void
1350 proc_crmod_enter(void)
1351 {
1352 struct lwp *l = curlwp;
1353 struct proc *p = l->l_proc;
1354 kauth_cred_t oc;
1355
1356 /* Reset what needs to be reset in plimit. */
1357 if (p->p_limit->pl_corename != defcorename) {
1358 lim_setcorename(p, defcorename, 0);
1359 }
1360
1361 mutex_enter(p->p_lock);
1362
1363 /* Ensure the LWP cached credentials are up to date. */
1364 if ((oc = l->l_cred) != p->p_cred) {
1365 kauth_cred_hold(p->p_cred);
1366 l->l_cred = p->p_cred;
1367 kauth_cred_free(oc);
1368 }
1369 }
1370
1371 /*
1372 * Set in a new process credential, and drop the write lock. The credential
1373 * must have a reference already. Optionally, free a no-longer required
1374 * credential. The scheduler also needs to inspect p_cred, so we also
1375 * briefly acquire the sched state mutex.
1376 */
1377 void
1378 proc_crmod_leave(kauth_cred_t scred, kauth_cred_t fcred, bool sugid)
1379 {
1380 struct lwp *l = curlwp, *l2;
1381 struct proc *p = l->l_proc;
1382 kauth_cred_t oc;
1383
1384 KASSERT(mutex_owned(p->p_lock));
1385
1386 /* Is there a new credential to set in? */
1387 if (scred != NULL) {
1388 p->p_cred = scred;
1389 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
1390 if (l2 != l)
1391 l2->l_prflag |= LPR_CRMOD;
1392 }
1393
1394 /* Ensure the LWP cached credentials are up to date. */
1395 if ((oc = l->l_cred) != scred) {
1396 kauth_cred_hold(scred);
1397 l->l_cred = scred;
1398 }
1399 } else
1400 oc = NULL; /* XXXgcc */
1401
1402 if (sugid) {
1403 /*
1404 * Mark process as having changed credentials, stops
1405 * tracing etc.
1406 */
1407 p->p_flag |= PK_SUGID;
1408 }
1409
1410 mutex_exit(p->p_lock);
1411
1412 /* If there is a credential to be released, free it now. */
1413 if (fcred != NULL) {
1414 KASSERT(scred != NULL);
1415 kauth_cred_free(fcred);
1416 if (oc != scred)
1417 kauth_cred_free(oc);
1418 }
1419 }
1420
1421 /*
1422 * proc_specific_key_create --
1423 * Create a key for subsystem proc-specific data.
1424 */
1425 int
1426 proc_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1427 {
1428
1429 return (specificdata_key_create(proc_specificdata_domain, keyp, dtor));
1430 }
1431
1432 /*
1433 * proc_specific_key_delete --
1434 * Delete a key for subsystem proc-specific data.
1435 */
1436 void
1437 proc_specific_key_delete(specificdata_key_t key)
1438 {
1439
1440 specificdata_key_delete(proc_specificdata_domain, key);
1441 }
1442
1443 /*
1444 * proc_initspecific --
1445 * Initialize a proc's specificdata container.
1446 */
1447 void
1448 proc_initspecific(struct proc *p)
1449 {
1450 int error;
1451
1452 error = specificdata_init(proc_specificdata_domain, &p->p_specdataref);
1453 KASSERT(error == 0);
1454 }
1455
1456 /*
1457 * proc_finispecific --
1458 * Finalize a proc's specificdata container.
1459 */
1460 void
1461 proc_finispecific(struct proc *p)
1462 {
1463
1464 specificdata_fini(proc_specificdata_domain, &p->p_specdataref);
1465 }
1466
1467 /*
1468 * proc_getspecific --
1469 * Return proc-specific data corresponding to the specified key.
1470 */
1471 void *
1472 proc_getspecific(struct proc *p, specificdata_key_t key)
1473 {
1474
1475 return (specificdata_getspecific(proc_specificdata_domain,
1476 &p->p_specdataref, key));
1477 }
1478
1479 /*
1480 * proc_setspecific --
1481 * Set proc-specific data corresponding to the specified key.
1482 */
1483 void
1484 proc_setspecific(struct proc *p, specificdata_key_t key, void *data)
1485 {
1486
1487 specificdata_setspecific(proc_specificdata_domain,
1488 &p->p_specdataref, key, data);
1489 }
1490
1491 int
1492 proc_uidmatch(kauth_cred_t cred, kauth_cred_t target)
1493 {
1494 int r = 0;
1495
1496 if (kauth_cred_getuid(cred) != kauth_cred_getuid(target) ||
1497 kauth_cred_getuid(cred) != kauth_cred_getsvuid(target)) {
1498 /*
1499 * suid proc of ours or proc not ours
1500 */
1501 r = EPERM;
1502 } else if (kauth_cred_getgid(target) != kauth_cred_getsvgid(target)) {
1503 /*
1504 * sgid proc has sgid back to us temporarily
1505 */
1506 r = EPERM;
1507 } else {
1508 /*
1509 * our rgid must be in target's group list (ie,
1510 * sub-processes started by a sgid process)
1511 */
1512 int ismember = 0;
1513
1514 if (kauth_cred_ismember_gid(cred,
1515 kauth_cred_getgid(target), &ismember) != 0 ||
1516 !ismember)
1517 r = EPERM;
1518 }
1519
1520 return (r);
1521 }
1522
1523 /*
1524 * sysctl stuff
1525 */
1526
1527 #define KERN_PROCSLOP (5 * sizeof(struct kinfo_proc))
1528
1529 static const u_int sysctl_flagmap[] = {
1530 PK_ADVLOCK, P_ADVLOCK,
1531 PK_EXEC, P_EXEC,
1532 PK_NOCLDWAIT, P_NOCLDWAIT,
1533 PK_32, P_32,
1534 PK_CLDSIGIGN, P_CLDSIGIGN,
1535 PK_SUGID, P_SUGID,
1536 0
1537 };
1538
1539 static const u_int sysctl_sflagmap[] = {
1540 PS_NOCLDSTOP, P_NOCLDSTOP,
1541 PS_WEXIT, P_WEXIT,
1542 PS_STOPFORK, P_STOPFORK,
1543 PS_STOPEXEC, P_STOPEXEC,
1544 PS_STOPEXIT, P_STOPEXIT,
1545 0
1546 };
1547
1548 static const u_int sysctl_slflagmap[] = {
1549 PSL_TRACED, P_TRACED,
1550 PSL_FSTRACE, P_FSTRACE,
1551 PSL_CHTRACED, P_CHTRACED,
1552 PSL_SYSCALL, P_SYSCALL,
1553 0
1554 };
1555
1556 static const u_int sysctl_lflagmap[] = {
1557 PL_CONTROLT, P_CONTROLT,
1558 PL_PPWAIT, P_PPWAIT,
1559 0
1560 };
1561
1562 static const u_int sysctl_stflagmap[] = {
1563 PST_PROFIL, P_PROFIL,
1564 0
1565
1566 };
1567
1568 /* used by kern_lwp also */
1569 const u_int sysctl_lwpflagmap[] = {
1570 LW_SINTR, L_SINTR,
1571 LW_SYSTEM, L_SYSTEM,
1572 0
1573 };
1574
1575 /*
1576 * Find the most ``active'' lwp of a process and return it for ps display
1577 * purposes
1578 */
1579 static struct lwp *
1580 proc_active_lwp(struct proc *p)
1581 {
1582 static const int ostat[] = {
1583 0,
1584 2, /* LSIDL */
1585 6, /* LSRUN */
1586 5, /* LSSLEEP */
1587 4, /* LSSTOP */
1588 0, /* LSZOMB */
1589 1, /* LSDEAD */
1590 7, /* LSONPROC */
1591 3 /* LSSUSPENDED */
1592 };
1593
1594 struct lwp *l, *lp = NULL;
1595 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1596 KASSERT(l->l_stat >= 0 && l->l_stat < __arraycount(ostat));
1597 if (lp == NULL ||
1598 ostat[l->l_stat] > ostat[lp->l_stat] ||
1599 (ostat[l->l_stat] == ostat[lp->l_stat] &&
1600 l->l_cpticks > lp->l_cpticks)) {
1601 lp = l;
1602 continue;
1603 }
1604 }
1605 return lp;
1606 }
1607
1608 static int
1609 sysctl_doeproc(SYSCTLFN_ARGS)
1610 {
1611 union {
1612 struct kinfo_proc kproc;
1613 struct kinfo_proc2 kproc2;
1614 } *kbuf;
1615 struct proc *p, *next, *marker;
1616 char *where, *dp;
1617 int type, op, arg, error;
1618 u_int elem_size, kelem_size, elem_count;
1619 size_t buflen, needed;
1620 bool match, zombie, mmmbrains;
1621
1622 if (namelen == 1 && name[0] == CTL_QUERY)
1623 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1624
1625 dp = where = oldp;
1626 buflen = where != NULL ? *oldlenp : 0;
1627 error = 0;
1628 needed = 0;
1629 type = rnode->sysctl_num;
1630
1631 if (type == KERN_PROC) {
1632 if (namelen != 2 && !(namelen == 1 && name[0] == KERN_PROC_ALL))
1633 return (EINVAL);
1634 op = name[0];
1635 if (op != KERN_PROC_ALL)
1636 arg = name[1];
1637 else
1638 arg = 0; /* Quell compiler warning */
1639 elem_count = 0; /* Ditto */
1640 kelem_size = elem_size = sizeof(kbuf->kproc);
1641 } else {
1642 if (namelen != 4)
1643 return (EINVAL);
1644 op = name[0];
1645 arg = name[1];
1646 elem_size = name[2];
1647 elem_count = name[3];
1648 kelem_size = sizeof(kbuf->kproc2);
1649 }
1650
1651 sysctl_unlock();
1652
1653 kbuf = kmem_alloc(sizeof(*kbuf), KM_SLEEP);
1654 marker = kmem_alloc(sizeof(*marker), KM_SLEEP);
1655 marker->p_flag = PK_MARKER;
1656
1657 mutex_enter(proc_lock);
1658 mmmbrains = false;
1659 for (p = LIST_FIRST(&allproc);; p = next) {
1660 if (p == NULL) {
1661 if (!mmmbrains) {
1662 p = LIST_FIRST(&zombproc);
1663 mmmbrains = true;
1664 }
1665 if (p == NULL)
1666 break;
1667 }
1668 next = LIST_NEXT(p, p_list);
1669 if ((p->p_flag & PK_MARKER) != 0)
1670 continue;
1671
1672 /*
1673 * Skip embryonic processes.
1674 */
1675 if (p->p_stat == SIDL)
1676 continue;
1677
1678 mutex_enter(p->p_lock);
1679 error = kauth_authorize_process(l->l_cred,
1680 KAUTH_PROCESS_CANSEE, p,
1681 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
1682 if (error != 0) {
1683 mutex_exit(p->p_lock);
1684 continue;
1685 }
1686
1687 /*
1688 * TODO - make more efficient (see notes below).
1689 * do by session.
1690 */
1691 switch (op) {
1692 case KERN_PROC_PID:
1693 /* could do this with just a lookup */
1694 match = (p->p_pid == (pid_t)arg);
1695 break;
1696
1697 case KERN_PROC_PGRP:
1698 /* could do this by traversing pgrp */
1699 match = (p->p_pgrp->pg_id == (pid_t)arg);
1700 break;
1701
1702 case KERN_PROC_SESSION:
1703 match = (p->p_session->s_sid == (pid_t)arg);
1704 break;
1705
1706 case KERN_PROC_TTY:
1707 match = true;
1708 if (arg == (int) KERN_PROC_TTY_REVOKE) {
1709 if ((p->p_lflag & PL_CONTROLT) == 0 ||
1710 p->p_session->s_ttyp == NULL ||
1711 p->p_session->s_ttyvp != NULL) {
1712 match = false;
1713 }
1714 } else if ((p->p_lflag & PL_CONTROLT) == 0 ||
1715 p->p_session->s_ttyp == NULL) {
1716 if ((dev_t)arg != KERN_PROC_TTY_NODEV) {
1717 match = false;
1718 }
1719 } else if (p->p_session->s_ttyp->t_dev != (dev_t)arg) {
1720 match = false;
1721 }
1722 break;
1723
1724 case KERN_PROC_UID:
1725 match = (kauth_cred_geteuid(p->p_cred) == (uid_t)arg);
1726 break;
1727
1728 case KERN_PROC_RUID:
1729 match = (kauth_cred_getuid(p->p_cred) == (uid_t)arg);
1730 break;
1731
1732 case KERN_PROC_GID:
1733 match = (kauth_cred_getegid(p->p_cred) == (uid_t)arg);
1734 break;
1735
1736 case KERN_PROC_RGID:
1737 match = (kauth_cred_getgid(p->p_cred) == (uid_t)arg);
1738 break;
1739
1740 case KERN_PROC_ALL:
1741 match = true;
1742 /* allow everything */
1743 break;
1744
1745 default:
1746 error = EINVAL;
1747 mutex_exit(p->p_lock);
1748 goto cleanup;
1749 }
1750 if (!match) {
1751 mutex_exit(p->p_lock);
1752 continue;
1753 }
1754
1755 /*
1756 * Grab a hold on the process.
1757 */
1758 if (mmmbrains) {
1759 zombie = true;
1760 } else {
1761 zombie = !rw_tryenter(&p->p_reflock, RW_READER);
1762 }
1763 if (zombie) {
1764 LIST_INSERT_AFTER(p, marker, p_list);
1765 }
1766
1767 if (buflen >= elem_size &&
1768 (type == KERN_PROC || elem_count > 0)) {
1769 if (type == KERN_PROC) {
1770 kbuf->kproc.kp_proc = *p;
1771 fill_eproc(p, &kbuf->kproc.kp_eproc, zombie);
1772 } else {
1773 fill_kproc2(p, &kbuf->kproc2, zombie);
1774 elem_count--;
1775 }
1776 mutex_exit(p->p_lock);
1777 mutex_exit(proc_lock);
1778 /*
1779 * Copy out elem_size, but not larger than kelem_size
1780 */
1781 error = sysctl_copyout(l, kbuf, dp,
1782 min(kelem_size, elem_size));
1783 mutex_enter(proc_lock);
1784 if (error) {
1785 goto bah;
1786 }
1787 dp += elem_size;
1788 buflen -= elem_size;
1789 } else {
1790 mutex_exit(p->p_lock);
1791 }
1792 needed += elem_size;
1793
1794 /*
1795 * Release reference to process.
1796 */
1797 if (zombie) {
1798 next = LIST_NEXT(marker, p_list);
1799 LIST_REMOVE(marker, p_list);
1800 } else {
1801 rw_exit(&p->p_reflock);
1802 next = LIST_NEXT(p, p_list);
1803 }
1804 }
1805 mutex_exit(proc_lock);
1806
1807 if (where != NULL) {
1808 *oldlenp = dp - where;
1809 if (needed > *oldlenp) {
1810 error = ENOMEM;
1811 goto out;
1812 }
1813 } else {
1814 needed += KERN_PROCSLOP;
1815 *oldlenp = needed;
1816 }
1817 if (kbuf)
1818 kmem_free(kbuf, sizeof(*kbuf));
1819 if (marker)
1820 kmem_free(marker, sizeof(*marker));
1821 sysctl_relock();
1822 return 0;
1823 bah:
1824 if (zombie)
1825 LIST_REMOVE(marker, p_list);
1826 else
1827 rw_exit(&p->p_reflock);
1828 cleanup:
1829 mutex_exit(proc_lock);
1830 out:
1831 if (kbuf)
1832 kmem_free(kbuf, sizeof(*kbuf));
1833 if (marker)
1834 kmem_free(marker, sizeof(*marker));
1835 sysctl_relock();
1836 return error;
1837 }
1838
1839 int
1840 copyin_psstrings(struct proc *p, struct ps_strings *arginfo)
1841 {
1842
1843 #ifdef COMPAT_NETBSD32
1844 if (p->p_flag & PK_32) {
1845 struct ps_strings32 arginfo32;
1846
1847 int error = copyin_proc(p, (void *)p->p_psstrp, &arginfo32,
1848 sizeof(arginfo32));
1849 if (error)
1850 return error;
1851 arginfo->ps_argvstr = (void *)(uintptr_t)arginfo32.ps_argvstr;
1852 arginfo->ps_nargvstr = arginfo32.ps_nargvstr;
1853 arginfo->ps_envstr = (void *)(uintptr_t)arginfo32.ps_envstr;
1854 arginfo->ps_nenvstr = arginfo32.ps_nenvstr;
1855 return 0;
1856 }
1857 #endif
1858 return copyin_proc(p, (void *)p->p_psstrp, arginfo, sizeof(*arginfo));
1859 }
1860
1861 static int
1862 copy_procargs_sysctl_cb(void *cookie_, const void *src, size_t off, size_t len)
1863 {
1864 void **cookie = cookie_;
1865 struct lwp *l = cookie[0];
1866 char *dst = cookie[1];
1867
1868 return sysctl_copyout(l, src, dst + off, len);
1869 }
1870
1871 /*
1872 * sysctl helper routine for kern.proc_args pseudo-subtree.
1873 */
1874 static int
1875 sysctl_kern_proc_args(SYSCTLFN_ARGS)
1876 {
1877 struct ps_strings pss;
1878 struct proc *p;
1879 pid_t pid;
1880 int type, error;
1881 void *cookie[2];
1882
1883 if (namelen == 1 && name[0] == CTL_QUERY)
1884 return (sysctl_query(SYSCTLFN_CALL(rnode)));
1885
1886 if (newp != NULL || namelen != 2)
1887 return (EINVAL);
1888 pid = name[0];
1889 type = name[1];
1890
1891 switch (type) {
1892 case KERN_PROC_ARGV:
1893 case KERN_PROC_NARGV:
1894 case KERN_PROC_ENV:
1895 case KERN_PROC_NENV:
1896 /* ok */
1897 break;
1898 default:
1899 return (EINVAL);
1900 }
1901
1902 sysctl_unlock();
1903
1904 /* check pid */
1905 mutex_enter(proc_lock);
1906 if ((p = proc_find(pid)) == NULL) {
1907 error = EINVAL;
1908 goto out_locked;
1909 }
1910 mutex_enter(p->p_lock);
1911
1912 /* Check permission. */
1913 if (type == KERN_PROC_ARGV || type == KERN_PROC_NARGV)
1914 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1915 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ARGS), NULL, NULL);
1916 else if (type == KERN_PROC_ENV || type == KERN_PROC_NENV)
1917 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
1918 p, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENV), NULL, NULL);
1919 else
1920 error = EINVAL; /* XXXGCC */
1921 if (error) {
1922 mutex_exit(p->p_lock);
1923 goto out_locked;
1924 }
1925
1926 if (oldp == NULL) {
1927 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV)
1928 *oldlenp = sizeof (int);
1929 else
1930 *oldlenp = ARG_MAX; /* XXX XXX XXX */
1931 error = 0;
1932 mutex_exit(p->p_lock);
1933 goto out_locked;
1934 }
1935
1936 /*
1937 * Zombies don't have a stack, so we can't read their psstrings.
1938 * System processes also don't have a user stack.
1939 */
1940 if (P_ZOMBIE(p) || (p->p_flag & PK_SYSTEM) != 0) {
1941 error = EINVAL;
1942 mutex_exit(p->p_lock);
1943 goto out_locked;
1944 }
1945
1946 error = rw_tryenter(&p->p_reflock, RW_READER) ? 0 : EBUSY;
1947 mutex_exit(p->p_lock);
1948 if (error) {
1949 goto out_locked;
1950 }
1951 mutex_exit(proc_lock);
1952
1953 if (type == KERN_PROC_NARGV || type == KERN_PROC_NENV) {
1954 int value;
1955 if ((error = copyin_psstrings(p, &pss)) == 0) {
1956 if (type == KERN_PROC_NARGV)
1957 value = pss.ps_nargvstr;
1958 else
1959 value = pss.ps_nenvstr;
1960 error = sysctl_copyout(l, &value, oldp, sizeof(value));
1961 *oldlenp = sizeof(value);
1962 }
1963 } else {
1964 cookie[0] = l;
1965 cookie[1] = oldp;
1966 error = copy_procargs(p, type, oldlenp,
1967 copy_procargs_sysctl_cb, cookie);
1968 }
1969 rw_exit(&p->p_reflock);
1970 sysctl_relock();
1971 return error;
1972
1973 out_locked:
1974 mutex_exit(proc_lock);
1975 sysctl_relock();
1976 return error;
1977 }
1978
1979 int
1980 copy_procargs(struct proc *p, int oid, size_t *limit,
1981 int (*cb)(void *, const void *, size_t, size_t), void *cookie)
1982 {
1983 struct ps_strings pss;
1984 size_t len, i, loaded, entry_len;
1985 struct uio auio;
1986 struct iovec aiov;
1987 int error, argvlen;
1988 char *arg;
1989 char **argv;
1990 vaddr_t user_argv;
1991 struct vmspace *vmspace;
1992
1993 /*
1994 * Allocate a temporary buffer to hold the argument vector and
1995 * the arguments themselve.
1996 */
1997 arg = kmem_alloc(PAGE_SIZE, KM_SLEEP);
1998 argv = kmem_alloc(PAGE_SIZE, KM_SLEEP);
1999
2000 /*
2001 * Lock the process down in memory.
2002 */
2003 vmspace = p->p_vmspace;
2004 uvmspace_addref(vmspace);
2005
2006 /*
2007 * Read in the ps_strings structure.
2008 */
2009 if ((error = copyin_psstrings(p, &pss)) != 0)
2010 goto done;
2011
2012 /*
2013 * Now read the address of the argument vector.
2014 */
2015 switch (oid) {
2016 case KERN_PROC_ARGV:
2017 user_argv = (uintptr_t)pss.ps_argvstr;
2018 argvlen = pss.ps_nargvstr;
2019 break;
2020 case KERN_PROC_ENV:
2021 user_argv = (uintptr_t)pss.ps_envstr;
2022 argvlen = pss.ps_nenvstr;
2023 break;
2024 default:
2025 error = EINVAL;
2026 goto done;
2027 }
2028
2029 if (argvlen < 0) {
2030 error = EIO;
2031 goto done;
2032 }
2033
2034 #ifdef COMPAT_NETBSD32
2035 if (p->p_flag & PK_32)
2036 entry_len = sizeof(netbsd32_charp);
2037 else
2038 #endif
2039 entry_len = sizeof(char *);
2040
2041 /*
2042 * Now copy each string.
2043 */
2044 len = 0; /* bytes written to user buffer */
2045 loaded = 0; /* bytes from argv already processed */
2046 i = 0; /* To make compiler happy */
2047
2048 for (; argvlen; --argvlen) {
2049 int finished = 0;
2050 vaddr_t base;
2051 size_t xlen;
2052 int j;
2053
2054 if (loaded == 0) {
2055 size_t rem = entry_len * argvlen;
2056 loaded = MIN(rem, PAGE_SIZE);
2057 error = copyin_vmspace(vmspace,
2058 (const void *)user_argv, argv, loaded);
2059 if (error)
2060 break;
2061 user_argv += loaded;
2062 i = 0;
2063 }
2064
2065 #ifdef COMPAT_NETBSD32
2066 if (p->p_flag & PK_32) {
2067 netbsd32_charp *argv32;
2068
2069 argv32 = (netbsd32_charp *)argv;
2070 base = (vaddr_t)NETBSD32PTR64(argv32[i++]);
2071 } else
2072 #endif
2073 base = (vaddr_t)argv[i++];
2074 loaded -= entry_len;
2075
2076 /*
2077 * The program has messed around with its arguments,
2078 * possibly deleting some, and replacing them with
2079 * NULL's. Treat this as the last argument and not
2080 * a failure.
2081 */
2082 if (base == 0)
2083 break;
2084
2085 while (!finished) {
2086 xlen = PAGE_SIZE - (base & PAGE_MASK);
2087
2088 aiov.iov_base = arg;
2089 aiov.iov_len = PAGE_SIZE;
2090 auio.uio_iov = &aiov;
2091 auio.uio_iovcnt = 1;
2092 auio.uio_offset = base;
2093 auio.uio_resid = xlen;
2094 auio.uio_rw = UIO_READ;
2095 UIO_SETUP_SYSSPACE(&auio);
2096 error = uvm_io(&vmspace->vm_map, &auio);
2097 if (error)
2098 goto done;
2099
2100 /* Look for the end of the string */
2101 for (j = 0; j < xlen; j++) {
2102 if (arg[j] == '\0') {
2103 xlen = j + 1;
2104 finished = 1;
2105 break;
2106 }
2107 }
2108
2109 /* Check for user buffer overflow */
2110 if (len + xlen > *limit) {
2111 finished = 1;
2112 if (len > *limit)
2113 xlen = 0;
2114 else
2115 xlen = *limit - len;
2116 }
2117
2118 /* Copyout the page */
2119 error = (*cb)(cookie, arg, len, xlen);
2120 if (error)
2121 goto done;
2122
2123 len += xlen;
2124 base += xlen;
2125 }
2126 }
2127 *limit = len;
2128
2129 done:
2130 kmem_free(argv, PAGE_SIZE);
2131 kmem_free(arg, PAGE_SIZE);
2132 uvmspace_free(vmspace);
2133 return error;
2134 }
2135
2136 /*
2137 * Fill in an eproc structure for the specified process.
2138 */
2139 void
2140 fill_eproc(struct proc *p, struct eproc *ep, bool zombie)
2141 {
2142 struct tty *tp;
2143 struct lwp *l;
2144
2145 KASSERT(mutex_owned(proc_lock));
2146 KASSERT(mutex_owned(p->p_lock));
2147
2148 memset(ep, 0, sizeof(*ep));
2149
2150 ep->e_paddr = p;
2151 ep->e_sess = p->p_session;
2152 if (p->p_cred) {
2153 kauth_cred_topcred(p->p_cred, &ep->e_pcred);
2154 kauth_cred_toucred(p->p_cred, &ep->e_ucred);
2155 }
2156 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2157 struct vmspace *vm = p->p_vmspace;
2158
2159 ep->e_vm.vm_rssize = vm_resident_count(vm);
2160 ep->e_vm.vm_tsize = vm->vm_tsize;
2161 ep->e_vm.vm_dsize = vm->vm_dsize;
2162 ep->e_vm.vm_ssize = vm->vm_ssize;
2163 ep->e_vm.vm_map.size = vm->vm_map.size;
2164
2165 /* Pick the primary (first) LWP */
2166 l = proc_active_lwp(p);
2167 KASSERT(l != NULL);
2168 lwp_lock(l);
2169 if (l->l_wchan)
2170 strncpy(ep->e_wmesg, l->l_wmesg, WMESGLEN);
2171 lwp_unlock(l);
2172 }
2173 if (p->p_pptr)
2174 ep->e_ppid = p->p_pptr->p_pid;
2175 if (p->p_pgrp && p->p_session) {
2176 ep->e_pgid = p->p_pgrp->pg_id;
2177 ep->e_jobc = p->p_pgrp->pg_jobc;
2178 ep->e_sid = p->p_session->s_sid;
2179 if ((p->p_lflag & PL_CONTROLT) &&
2180 (tp = ep->e_sess->s_ttyp)) {
2181 ep->e_tdev = tp->t_dev;
2182 ep->e_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2183 ep->e_tsess = tp->t_session;
2184 } else
2185 ep->e_tdev = (uint32_t)NODEV;
2186 ep->e_flag = ep->e_sess->s_ttyvp ? EPROC_CTTY : 0;
2187 if (SESS_LEADER(p))
2188 ep->e_flag |= EPROC_SLEADER;
2189 strncpy(ep->e_login, ep->e_sess->s_login, MAXLOGNAME);
2190 }
2191 ep->e_xsize = ep->e_xrssize = 0;
2192 ep->e_xccount = ep->e_xswrss = 0;
2193 }
2194
2195 /*
2196 * Fill in a kinfo_proc2 structure for the specified process.
2197 */
2198 static void
2199 fill_kproc2(struct proc *p, struct kinfo_proc2 *ki, bool zombie)
2200 {
2201 struct tty *tp;
2202 struct lwp *l, *l2;
2203 struct timeval ut, st, rt;
2204 sigset_t ss1, ss2;
2205 struct rusage ru;
2206 struct vmspace *vm;
2207
2208 KASSERT(mutex_owned(proc_lock));
2209 KASSERT(mutex_owned(p->p_lock));
2210
2211 sigemptyset(&ss1);
2212 sigemptyset(&ss2);
2213 memset(ki, 0, sizeof(*ki));
2214
2215 ki->p_paddr = PTRTOUINT64(p);
2216 ki->p_fd = PTRTOUINT64(p->p_fd);
2217 ki->p_cwdi = PTRTOUINT64(p->p_cwdi);
2218 ki->p_stats = PTRTOUINT64(p->p_stats);
2219 ki->p_limit = PTRTOUINT64(p->p_limit);
2220 ki->p_vmspace = PTRTOUINT64(p->p_vmspace);
2221 ki->p_sigacts = PTRTOUINT64(p->p_sigacts);
2222 ki->p_sess = PTRTOUINT64(p->p_session);
2223 ki->p_tsess = 0; /* may be changed if controlling tty below */
2224 ki->p_ru = PTRTOUINT64(&p->p_stats->p_ru);
2225 ki->p_eflag = 0;
2226 ki->p_exitsig = p->p_exitsig;
2227 ki->p_flag = L_INMEM; /* Process never swapped out */
2228 ki->p_flag |= sysctl_map_flags(sysctl_flagmap, p->p_flag);
2229 ki->p_flag |= sysctl_map_flags(sysctl_sflagmap, p->p_sflag);
2230 ki->p_flag |= sysctl_map_flags(sysctl_slflagmap, p->p_slflag);
2231 ki->p_flag |= sysctl_map_flags(sysctl_lflagmap, p->p_lflag);
2232 ki->p_flag |= sysctl_map_flags(sysctl_stflagmap, p->p_stflag);
2233 ki->p_pid = p->p_pid;
2234 if (p->p_pptr)
2235 ki->p_ppid = p->p_pptr->p_pid;
2236 else
2237 ki->p_ppid = 0;
2238 ki->p_uid = kauth_cred_geteuid(p->p_cred);
2239 ki->p_ruid = kauth_cred_getuid(p->p_cred);
2240 ki->p_gid = kauth_cred_getegid(p->p_cred);
2241 ki->p_rgid = kauth_cred_getgid(p->p_cred);
2242 ki->p_svuid = kauth_cred_getsvuid(p->p_cred);
2243 ki->p_svgid = kauth_cred_getsvgid(p->p_cred);
2244 ki->p_ngroups = kauth_cred_ngroups(p->p_cred);
2245 kauth_cred_getgroups(p->p_cred, ki->p_groups,
2246 min(ki->p_ngroups, sizeof(ki->p_groups) / sizeof(ki->p_groups[0])),
2247 UIO_SYSSPACE);
2248
2249 ki->p_uticks = p->p_uticks;
2250 ki->p_sticks = p->p_sticks;
2251 ki->p_iticks = p->p_iticks;
2252 ki->p_tpgid = NO_PGID; /* may be changed if controlling tty below */
2253 ki->p_tracep = PTRTOUINT64(p->p_tracep);
2254 ki->p_traceflag = p->p_traceflag;
2255
2256 memcpy(&ki->p_sigignore, &p->p_sigctx.ps_sigignore,sizeof(ki_sigset_t));
2257 memcpy(&ki->p_sigcatch, &p->p_sigctx.ps_sigcatch, sizeof(ki_sigset_t));
2258
2259 ki->p_cpticks = 0;
2260 ki->p_pctcpu = p->p_pctcpu;
2261 ki->p_estcpu = 0;
2262 ki->p_stat = p->p_stat; /* Will likely be overridden by LWP status */
2263 ki->p_realstat = p->p_stat;
2264 ki->p_nice = p->p_nice;
2265 ki->p_xstat = p->p_xstat;
2266 ki->p_acflag = p->p_acflag;
2267
2268 strncpy(ki->p_comm, p->p_comm,
2269 min(sizeof(ki->p_comm), sizeof(p->p_comm)));
2270 strncpy(ki->p_ename, p->p_emul->e_name, sizeof(ki->p_ename));
2271
2272 ki->p_nlwps = p->p_nlwps;
2273 ki->p_realflag = ki->p_flag;
2274
2275 if (p->p_stat != SIDL && !P_ZOMBIE(p) && !zombie) {
2276 vm = p->p_vmspace;
2277 ki->p_vm_rssize = vm_resident_count(vm);
2278 ki->p_vm_tsize = vm->vm_tsize;
2279 ki->p_vm_dsize = vm->vm_dsize;
2280 ki->p_vm_ssize = vm->vm_ssize;
2281 ki->p_vm_vsize = atop(vm->vm_map.size);
2282 /*
2283 * Since the stack is initially mapped mostly with
2284 * PROT_NONE and grown as needed, adjust the "mapped size"
2285 * to skip the unused stack portion.
2286 */
2287 ki->p_vm_msize =
2288 atop(vm->vm_map.size) - vm->vm_issize + vm->vm_ssize;
2289
2290 /* Pick the primary (first) LWP */
2291 l = proc_active_lwp(p);
2292 KASSERT(l != NULL);
2293 lwp_lock(l);
2294 ki->p_nrlwps = p->p_nrlwps;
2295 ki->p_forw = 0;
2296 ki->p_back = 0;
2297 ki->p_addr = PTRTOUINT64(l->l_addr);
2298 ki->p_stat = l->l_stat;
2299 ki->p_flag |= sysctl_map_flags(sysctl_lwpflagmap, l->l_flag);
2300 ki->p_swtime = l->l_swtime;
2301 ki->p_slptime = l->l_slptime;
2302 if (l->l_stat == LSONPROC)
2303 ki->p_schedflags = l->l_cpu->ci_schedstate.spc_flags;
2304 else
2305 ki->p_schedflags = 0;
2306 ki->p_priority = lwp_eprio(l);
2307 ki->p_usrpri = l->l_priority;
2308 if (l->l_wchan)
2309 strncpy(ki->p_wmesg, l->l_wmesg, sizeof(ki->p_wmesg));
2310 ki->p_wchan = PTRTOUINT64(l->l_wchan);
2311 ki->p_cpuid = cpu_index(l->l_cpu);
2312 lwp_unlock(l);
2313 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
2314 /* This is hardly correct, but... */
2315 sigplusset(&l->l_sigpend.sp_set, &ss1);
2316 sigplusset(&l->l_sigmask, &ss2);
2317 ki->p_cpticks += l->l_cpticks;
2318 ki->p_pctcpu += l->l_pctcpu;
2319 ki->p_estcpu += l->l_estcpu;
2320 }
2321 }
2322 sigplusset(&p->p_sigpend.sp_set, &ss2);
2323 memcpy(&ki->p_siglist, &ss1, sizeof(ki_sigset_t));
2324 memcpy(&ki->p_sigmask, &ss2, sizeof(ki_sigset_t));
2325
2326 if (p->p_session != NULL) {
2327 ki->p_sid = p->p_session->s_sid;
2328 ki->p__pgid = p->p_pgrp->pg_id;
2329 if (p->p_session->s_ttyvp)
2330 ki->p_eflag |= EPROC_CTTY;
2331 if (SESS_LEADER(p))
2332 ki->p_eflag |= EPROC_SLEADER;
2333 strncpy(ki->p_login, p->p_session->s_login,
2334 min(sizeof ki->p_login - 1, sizeof p->p_session->s_login));
2335 ki->p_jobc = p->p_pgrp->pg_jobc;
2336 if ((p->p_lflag & PL_CONTROLT) && (tp = p->p_session->s_ttyp)) {
2337 ki->p_tdev = tp->t_dev;
2338 ki->p_tpgid = tp->t_pgrp ? tp->t_pgrp->pg_id : NO_PGID;
2339 ki->p_tsess = PTRTOUINT64(tp->t_session);
2340 } else {
2341 ki->p_tdev = (int32_t)NODEV;
2342 }
2343 }
2344
2345 if (!P_ZOMBIE(p) && !zombie) {
2346 ki->p_uvalid = 1;
2347 ki->p_ustart_sec = p->p_stats->p_start.tv_sec;
2348 ki->p_ustart_usec = p->p_stats->p_start.tv_usec;
2349
2350 calcru(p, &ut, &st, NULL, &rt);
2351 ki->p_rtime_sec = rt.tv_sec;
2352 ki->p_rtime_usec = rt.tv_usec;
2353 ki->p_uutime_sec = ut.tv_sec;
2354 ki->p_uutime_usec = ut.tv_usec;
2355 ki->p_ustime_sec = st.tv_sec;
2356 ki->p_ustime_usec = st.tv_usec;
2357
2358 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
2359 ki->p_uru_nvcsw = 0;
2360 ki->p_uru_nivcsw = 0;
2361 LIST_FOREACH(l2, &p->p_lwps, l_sibling) {
2362 ki->p_uru_nvcsw += (l2->l_ncsw - l2->l_nivcsw);
2363 ki->p_uru_nivcsw += l2->l_nivcsw;
2364 ruadd(&ru, &l2->l_ru);
2365 }
2366 ki->p_uru_maxrss = ru.ru_maxrss;
2367 ki->p_uru_ixrss = ru.ru_ixrss;
2368 ki->p_uru_idrss = ru.ru_idrss;
2369 ki->p_uru_isrss = ru.ru_isrss;
2370 ki->p_uru_minflt = ru.ru_minflt;
2371 ki->p_uru_majflt = ru.ru_majflt;
2372 ki->p_uru_nswap = ru.ru_nswap;
2373 ki->p_uru_inblock = ru.ru_inblock;
2374 ki->p_uru_oublock = ru.ru_oublock;
2375 ki->p_uru_msgsnd = ru.ru_msgsnd;
2376 ki->p_uru_msgrcv = ru.ru_msgrcv;
2377 ki->p_uru_nsignals = ru.ru_nsignals;
2378
2379 timeradd(&p->p_stats->p_cru.ru_utime,
2380 &p->p_stats->p_cru.ru_stime, &ut);
2381 ki->p_uctime_sec = ut.tv_sec;
2382 ki->p_uctime_usec = ut.tv_usec;
2383 }
2384 }
2385