kern_proc.c revision 1.86.4.3 1 /* $NetBSD: kern_proc.c,v 1.86.4.3 2006/03/10 21:04:13 elad Exp $ */
2
3 /*-
4 * Copyright (c) 1999 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*
41 * Copyright (c) 1982, 1986, 1989, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)kern_proc.c 8.7 (Berkeley) 2/14/95
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.86.4.3 2006/03/10 21:04:13 elad Exp $");
73
74 #include "opt_kstack.h"
75
76 #include <sys/param.h>
77 #include <sys/systm.h>
78 #include <sys/kernel.h>
79 #include <sys/proc.h>
80 #include <sys/resourcevar.h>
81 #include <sys/buf.h>
82 #include <sys/acct.h>
83 #include <sys/wait.h>
84 #include <sys/file.h>
85 #include <ufs/ufs/quota.h>
86 #include <sys/uio.h>
87 #include <sys/malloc.h>
88 #include <sys/pool.h>
89 #include <sys/mbuf.h>
90 #include <sys/ioctl.h>
91 #include <sys/tty.h>
92 #include <sys/signalvar.h>
93 #include <sys/ras.h>
94 #include <sys/sa.h>
95 #include <sys/savar.h>
96 #include <sys/filedesc.h>
97 #include <sys/kauth.h>
98
99 #include <uvm/uvm.h>
100 #include <uvm/uvm_extern.h>
101
102 /*
103 * Other process lists
104 */
105
106 struct proclist allproc;
107 struct proclist zombproc; /* resources have been freed */
108
109
110 /*
111 * Process list locking:
112 *
113 * We have two types of locks on the proclists: read locks and write
114 * locks. Read locks can be used in interrupt context, so while we
115 * hold the write lock, we must also block clock interrupts to
116 * lock out any scheduling changes that may happen in interrupt
117 * context.
118 *
119 * The proclist lock locks the following structures:
120 *
121 * allproc
122 * zombproc
123 * pid_table
124 */
125 struct lock proclist_lock;
126
127 /*
128 * pid to proc lookup is done by indexing the pid_table array.
129 * Since pid numbers are only allocated when an empty slot
130 * has been found, there is no need to search any lists ever.
131 * (an orphaned pgrp will lock the slot, a session will lock
132 * the pgrp with the same number.)
133 * If the table is too small it is reallocated with twice the
134 * previous size and the entries 'unzipped' into the two halves.
135 * A linked list of free entries is passed through the pt_proc
136 * field of 'free' items - set odd to be an invalid ptr.
137 */
138
139 struct pid_table {
140 struct proc *pt_proc;
141 struct pgrp *pt_pgrp;
142 };
143 #if 1 /* strongly typed cast - should be a noop */
144 static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
145 #else
146 #define p2u(p) ((uint)p)
147 #endif
148 #define P_VALID(p) (!(p2u(p) & 1))
149 #define P_NEXT(p) (p2u(p) >> 1)
150 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
151
152 #define INITIAL_PID_TABLE_SIZE (1 << 5)
153 static struct pid_table *pid_table;
154 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
155 static uint pid_alloc_lim; /* max we allocate before growing table */
156 static uint pid_alloc_cnt; /* number of allocated pids */
157
158 /* links through free slots - never empty! */
159 static uint next_free_pt, last_free_pt;
160 static pid_t pid_max = PID_MAX; /* largest value we allocate */
161
162 /* Components of the first process -- never freed. */
163 struct session session0;
164 struct pgrp pgrp0;
165 struct proc proc0;
166 struct lwp lwp0;
167 kauth_cred_t cred0;
168 struct filedesc0 filedesc0;
169 struct cwdinfo cwdi0;
170 struct plimit limit0;
171 struct pstats pstat0;
172 struct vmspace vmspace0;
173 struct sigacts sigacts0;
174
175 extern struct user *proc0paddr;
176
177 extern const struct emul emul_netbsd; /* defined in kern_exec.c */
178
179 int nofile = NOFILE;
180 int maxuprc = MAXUPRC;
181 int cmask = CMASK;
182
183 POOL_INIT(proc_pool, sizeof(struct proc), 0, 0, 0, "procpl",
184 &pool_allocator_nointr);
185 POOL_INIT(lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl",
186 &pool_allocator_nointr);
187 POOL_INIT(lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
188 &pool_allocator_nointr);
189 POOL_INIT(pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl",
190 &pool_allocator_nointr);
191 POOL_INIT(pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl",
192 &pool_allocator_nointr);
193 POOL_INIT(plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl",
194 &pool_allocator_nointr);
195 POOL_INIT(pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl",
196 &pool_allocator_nointr);
197 POOL_INIT(rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl",
198 &pool_allocator_nointr);
199 POOL_INIT(ras_pool, sizeof(struct ras), 0, 0, 0, "raspl",
200 &pool_allocator_nointr);
201 POOL_INIT(sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl",
202 &pool_allocator_nointr);
203 POOL_INIT(saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0, "saupcpl",
204 &pool_allocator_nointr);
205 POOL_INIT(sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl",
206 &pool_allocator_nointr);
207 POOL_INIT(savp_pool, sizeof(struct sadata_vp), 0, 0, 0, "savppl",
208 &pool_allocator_nointr);
209 POOL_INIT(session_pool, sizeof(struct session), 0, 0, 0, "sessionpl",
210 &pool_allocator_nointr);
211
212 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
213 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
214 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
215
216 /*
217 * The process list descriptors, used during pid allocation and
218 * by sysctl. No locking on this data structure is needed since
219 * it is completely static.
220 */
221 const struct proclist_desc proclists[] = {
222 { &allproc },
223 { &zombproc },
224 { NULL },
225 };
226
227 static void orphanpg(struct pgrp *);
228 static void pg_delete(pid_t);
229
230 /*
231 * Initialize global process hashing structures.
232 */
233 void
234 procinit(void)
235 {
236 const struct proclist_desc *pd;
237 int i;
238 #define LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
239
240 for (pd = proclists; pd->pd_list != NULL; pd++)
241 LIST_INIT(pd->pd_list);
242
243 spinlockinit(&proclist_lock, "proclk", 0);
244
245 pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
246 M_PROC, M_WAITOK);
247 /* Set free list running through table...
248 Preset 'use count' above PID_MAX so we allocate pid 1 next. */
249 for (i = 0; i <= pid_tbl_mask; i++) {
250 pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
251 pid_table[i].pt_pgrp = 0;
252 }
253 /* slot 0 is just grabbed */
254 next_free_pt = 1;
255 /* Need to fix last entry. */
256 last_free_pt = pid_tbl_mask;
257 pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
258 /* point at which we grow table - to avoid reusing pids too often */
259 pid_alloc_lim = pid_tbl_mask - 1;
260 #undef LINK_EMPTY
261
262 LIST_INIT(&alllwp);
263
264 uihashtbl =
265 hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
266 }
267
268 /*
269 * Initialize process 0.
270 */
271 void
272 proc0_init(void)
273 {
274 struct proc *p;
275 struct pgrp *pg;
276 struct session *sess;
277 struct lwp *l;
278 int s;
279 u_int i;
280 rlim_t lim;
281
282 p = &proc0;
283 pg = &pgrp0;
284 sess = &session0;
285 l = &lwp0;
286
287 simple_lock_init(&p->p_lock);
288 LIST_INIT(&p->p_lwps);
289 LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
290 p->p_nlwps = 1;
291 simple_lock_init(&p->p_sigctx.ps_silock);
292 CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo);
293
294 s = proclist_lock_write();
295
296 pid_table[0].pt_proc = p;
297 LIST_INSERT_HEAD(&allproc, p, p_list);
298 LIST_INSERT_HEAD(&alllwp, l, l_list);
299
300 p->p_pgrp = pg;
301 pid_table[0].pt_pgrp = pg;
302 LIST_INIT(&pg->pg_members);
303 LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
304
305 pg->pg_session = sess;
306 sess->s_count = 1;
307 sess->s_sid = 0;
308 sess->s_leader = p;
309
310 proclist_unlock_write(s);
311
312 /*
313 * Set P_NOCLDWAIT so that kernel threads are reparented to
314 * init(8) when they exit. init(8) can easily wait them out
315 * for us.
316 */
317 p->p_flag = P_SYSTEM | P_NOCLDWAIT;
318 p->p_stat = SACTIVE;
319 p->p_nice = NZERO;
320 p->p_emul = &emul_netbsd;
321 #ifdef __HAVE_SYSCALL_INTERN
322 (*p->p_emul->e_syscall_intern)(p);
323 #endif
324 strncpy(p->p_comm, "swapper", MAXCOMLEN);
325
326 l->l_flag = L_INMEM;
327 l->l_stat = LSONPROC;
328 p->p_nrlwps = 1;
329
330 callout_init(&l->l_tsleep_ch);
331
332 /* Create credentials. */
333 cred0 = kauth_cred_alloc();
334 p->p_cred = cred0;
335
336 /* Create the CWD info. */
337 p->p_cwdi = &cwdi0;
338 cwdi0.cwdi_cmask = cmask;
339 cwdi0.cwdi_refcnt = 1;
340 simple_lock_init(&cwdi0.cwdi_slock);
341
342 /* Create the limits structures. */
343 p->p_limit = &limit0;
344 simple_lock_init(&limit0.p_slock);
345 for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
346 limit0.pl_rlimit[i].rlim_cur =
347 limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
348
349 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
350 limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
351 maxfiles < nofile ? maxfiles : nofile;
352
353 limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
354 limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
355 maxproc < maxuprc ? maxproc : maxuprc;
356
357 lim = ptoa(uvmexp.free);
358 limit0.pl_rlimit[RLIMIT_RSS].rlim_max = lim;
359 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = lim;
360 limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = lim / 3;
361 limit0.pl_corename = defcorename;
362 limit0.p_refcnt = 1;
363
364 /* Configure virtual memory system, set vm rlimits. */
365 uvm_init_limits(p);
366
367 /* Initialize file descriptor table for proc0. */
368 p->p_fd = &filedesc0.fd_fd;
369 fdinit1(&filedesc0);
370
371 /*
372 * Initialize proc0's vmspace, which uses the kernel pmap.
373 * All kernel processes (which never have user space mappings)
374 * share proc0's vmspace, and thus, the kernel pmap.
375 */
376 uvmspace_init(&vmspace0, pmap_kernel(), round_page(VM_MIN_ADDRESS),
377 trunc_page(VM_MAX_ADDRESS));
378 p->p_vmspace = &vmspace0;
379
380 l->l_addr = proc0paddr; /* XXX */
381
382 p->p_stats = &pstat0;
383
384 /* Initialize signal state for proc0. */
385 p->p_sigacts = &sigacts0;
386 siginit(p);
387 }
388
389 /*
390 * Acquire a read lock on the proclist.
391 */
392 void
393 proclist_lock_read(void)
394 {
395 int error;
396
397 error = spinlockmgr(&proclist_lock, LK_SHARED, NULL);
398 #ifdef DIAGNOSTIC
399 if (__predict_false(error != 0))
400 panic("proclist_lock_read: failed to acquire lock");
401 #endif
402 }
403
404 /*
405 * Release a read lock on the proclist.
406 */
407 void
408 proclist_unlock_read(void)
409 {
410
411 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
412 }
413
414 /*
415 * Acquire a write lock on the proclist.
416 */
417 int
418 proclist_lock_write(void)
419 {
420 int s, error;
421
422 s = splclock();
423 error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL);
424 #ifdef DIAGNOSTIC
425 if (__predict_false(error != 0))
426 panic("proclist_lock: failed to acquire lock");
427 #endif
428 return s;
429 }
430
431 /*
432 * Release a write lock on the proclist.
433 */
434 void
435 proclist_unlock_write(int s)
436 {
437
438 (void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
439 splx(s);
440 }
441
442 /*
443 * Check that the specified process group is in the session of the
444 * specified process.
445 * Treats -ve ids as process ids.
446 * Used to validate TIOCSPGRP requests.
447 */
448 int
449 pgid_in_session(struct proc *p, pid_t pg_id)
450 {
451 struct pgrp *pgrp;
452
453 if (pg_id < 0) {
454 struct proc *p1 = pfind(-pg_id);
455 if (p1 == NULL)
456 return EINVAL;
457 pgrp = p1->p_pgrp;
458 } else {
459 pgrp = pgfind(pg_id);
460 if (pgrp == NULL)
461 return EINVAL;
462 }
463 if (pgrp->pg_session != p->p_pgrp->pg_session)
464 return EPERM;
465 return 0;
466 }
467
468 /*
469 * Is p an inferior of q?
470 */
471 int
472 inferior(struct proc *p, struct proc *q)
473 {
474
475 for (; p != q; p = p->p_pptr)
476 if (p->p_pid == 0)
477 return 0;
478 return 1;
479 }
480
481 /*
482 * Locate a process by number
483 */
484 struct proc *
485 p_find(pid_t pid, uint flags)
486 {
487 struct proc *p;
488 char stat;
489
490 if (!(flags & PFIND_LOCKED))
491 proclist_lock_read();
492 p = pid_table[pid & pid_tbl_mask].pt_proc;
493 /* Only allow live processes to be found by pid. */
494 if (P_VALID(p) && p->p_pid == pid &&
495 ((stat = p->p_stat) == SACTIVE || stat == SSTOP
496 || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) {
497 if (flags & PFIND_UNLOCK_OK)
498 proclist_unlock_read();
499 return p;
500 }
501 if (flags & PFIND_UNLOCK_FAIL)
502 proclist_unlock_read();
503 return NULL;
504 }
505
506
507 /*
508 * Locate a process group by number
509 */
510 struct pgrp *
511 pg_find(pid_t pgid, uint flags)
512 {
513 struct pgrp *pg;
514
515 if (!(flags & PFIND_LOCKED))
516 proclist_lock_read();
517 pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
518 /*
519 * Can't look up a pgrp that only exists because the session
520 * hasn't died yet (traditional)
521 */
522 if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
523 if (flags & PFIND_UNLOCK_FAIL)
524 proclist_unlock_read();
525 return NULL;
526 }
527
528 if (flags & PFIND_UNLOCK_OK)
529 proclist_unlock_read();
530 return pg;
531 }
532
533 static void
534 expand_pid_table(void)
535 {
536 uint pt_size = pid_tbl_mask + 1;
537 struct pid_table *n_pt, *new_pt;
538 struct proc *proc;
539 struct pgrp *pgrp;
540 int i;
541 int s;
542 pid_t pid;
543
544 new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
545
546 s = proclist_lock_write();
547 if (pt_size != pid_tbl_mask + 1) {
548 /* Another process beat us to it... */
549 proclist_unlock_write(s);
550 FREE(new_pt, M_PROC);
551 return;
552 }
553
554 /*
555 * Copy entries from old table into new one.
556 * If 'pid' is 'odd' we need to place in the upper half,
557 * even pid's to the lower half.
558 * Free items stay in the low half so we don't have to
559 * fixup the reference to them.
560 * We stuff free items on the front of the freelist
561 * because we can't write to unmodified entries.
562 * Processing the table backwards maintains a semblance
563 * of issueing pid numbers that increase with time.
564 */
565 i = pt_size - 1;
566 n_pt = new_pt + i;
567 for (; ; i--, n_pt--) {
568 proc = pid_table[i].pt_proc;
569 pgrp = pid_table[i].pt_pgrp;
570 if (!P_VALID(proc)) {
571 /* Up 'use count' so that link is valid */
572 pid = (P_NEXT(proc) + pt_size) & ~pt_size;
573 proc = P_FREE(pid);
574 if (pgrp)
575 pid = pgrp->pg_id;
576 } else
577 pid = proc->p_pid;
578
579 /* Save entry in appropriate half of table */
580 n_pt[pid & pt_size].pt_proc = proc;
581 n_pt[pid & pt_size].pt_pgrp = pgrp;
582
583 /* Put other piece on start of free list */
584 pid = (pid ^ pt_size) & ~pid_tbl_mask;
585 n_pt[pid & pt_size].pt_proc =
586 P_FREE((pid & ~pt_size) | next_free_pt);
587 n_pt[pid & pt_size].pt_pgrp = 0;
588 next_free_pt = i | (pid & pt_size);
589 if (i == 0)
590 break;
591 }
592
593 /* Switch tables */
594 n_pt = pid_table;
595 pid_table = new_pt;
596 pid_tbl_mask = pt_size * 2 - 1;
597
598 /*
599 * pid_max starts as PID_MAX (= 30000), once we have 16384
600 * allocated pids we need it to be larger!
601 */
602 if (pid_tbl_mask > PID_MAX) {
603 pid_max = pid_tbl_mask * 2 + 1;
604 pid_alloc_lim |= pid_alloc_lim << 1;
605 } else
606 pid_alloc_lim <<= 1; /* doubles number of free slots... */
607
608 proclist_unlock_write(s);
609 FREE(n_pt, M_PROC);
610 }
611
612 struct proc *
613 proc_alloc(void)
614 {
615 struct proc *p;
616 int s;
617 int nxt;
618 pid_t pid;
619 struct pid_table *pt;
620
621 p = pool_get(&proc_pool, PR_WAITOK);
622 p->p_stat = SIDL; /* protect against others */
623
624 /* allocate next free pid */
625
626 for (;;expand_pid_table()) {
627 if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
628 /* ensure pids cycle through 2000+ values */
629 continue;
630 s = proclist_lock_write();
631 pt = &pid_table[next_free_pt];
632 #ifdef DIAGNOSTIC
633 if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
634 panic("proc_alloc: slot busy");
635 #endif
636 nxt = P_NEXT(pt->pt_proc);
637 if (nxt & pid_tbl_mask)
638 break;
639 /* Table full - expand (NB last entry not used....) */
640 proclist_unlock_write(s);
641 }
642
643 /* pid is 'saved use count' + 'size' + entry */
644 pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
645 if ((uint)pid > (uint)pid_max)
646 pid &= pid_tbl_mask;
647 p->p_pid = pid;
648 next_free_pt = nxt & pid_tbl_mask;
649
650 /* Grab table slot */
651 pt->pt_proc = p;
652 pid_alloc_cnt++;
653
654 proclist_unlock_write(s);
655
656 return p;
657 }
658
659 /*
660 * Free last resources of a process - called from proc_free (in kern_exit.c)
661 */
662 void
663 proc_free_mem(struct proc *p)
664 {
665 int s;
666 pid_t pid = p->p_pid;
667 struct pid_table *pt;
668
669 s = proclist_lock_write();
670
671 pt = &pid_table[pid & pid_tbl_mask];
672 #ifdef DIAGNOSTIC
673 if (__predict_false(pt->pt_proc != p))
674 panic("proc_free: pid_table mismatch, pid %x, proc %p",
675 pid, p);
676 #endif
677 /* save pid use count in slot */
678 pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
679
680 if (pt->pt_pgrp == NULL) {
681 /* link last freed entry onto ours */
682 pid &= pid_tbl_mask;
683 pt = &pid_table[last_free_pt];
684 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
685 last_free_pt = pid;
686 pid_alloc_cnt--;
687 }
688
689 nprocs--;
690 proclist_unlock_write(s);
691
692 pool_put(&proc_pool, p);
693 }
694
695 /*
696 * Move p to a new or existing process group (and session)
697 *
698 * If we are creating a new pgrp, the pgid should equal
699 * the calling process' pid.
700 * If is only valid to enter a process group that is in the session
701 * of the process.
702 * Also mksess should only be set if we are creating a process group
703 *
704 * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the
705 * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
706 */
707 int
708 enterpgrp(struct proc *p, pid_t pgid, int mksess)
709 {
710 struct pgrp *new_pgrp, *pgrp;
711 struct session *sess;
712 struct proc *curp = curproc;
713 pid_t pid = p->p_pid;
714 int rval;
715 int s;
716 pid_t pg_id = NO_PGID;
717
718 /* Allocate data areas we might need before doing any validity checks */
719 proclist_lock_read(); /* Because pid_table might change */
720 if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
721 proclist_unlock_read();
722 new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
723 } else {
724 proclist_unlock_read();
725 new_pgrp = NULL;
726 }
727 if (mksess)
728 sess = pool_get(&session_pool, M_WAITOK);
729 else
730 sess = NULL;
731
732 s = proclist_lock_write();
733 rval = EPERM; /* most common error (to save typing) */
734
735 /* Check pgrp exists or can be created */
736 pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
737 if (pgrp != NULL && pgrp->pg_id != pgid)
738 goto done;
739
740 /* Can only set another process under restricted circumstances. */
741 if (p != curp) {
742 /* must exist and be one of our children... */
743 if (p != pid_table[pid & pid_tbl_mask].pt_proc
744 || !inferior(p, curp)) {
745 rval = ESRCH;
746 goto done;
747 }
748 /* ... in the same session... */
749 if (sess != NULL || p->p_session != curp->p_session)
750 goto done;
751 /* ... existing pgid must be in same session ... */
752 if (pgrp != NULL && pgrp->pg_session != p->p_session)
753 goto done;
754 /* ... and not done an exec. */
755 if (p->p_flag & P_EXEC) {
756 rval = EACCES;
757 goto done;
758 }
759 }
760
761 /* Changing the process group/session of a session
762 leader is definitely off limits. */
763 if (SESS_LEADER(p)) {
764 if (sess == NULL && p->p_pgrp == pgrp)
765 /* unless it's a definite noop */
766 rval = 0;
767 goto done;
768 }
769
770 /* Can only create a process group with id of process */
771 if (pgrp == NULL && pgid != pid)
772 goto done;
773
774 /* Can only create a session if creating pgrp */
775 if (sess != NULL && pgrp != NULL)
776 goto done;
777
778 /* Check we allocated memory for a pgrp... */
779 if (pgrp == NULL && new_pgrp == NULL)
780 goto done;
781
782 /* Don't attach to 'zombie' pgrp */
783 if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
784 goto done;
785
786 /* Expect to succeed now */
787 rval = 0;
788
789 if (pgrp == p->p_pgrp)
790 /* nothing to do */
791 goto done;
792
793 /* Ok all setup, link up required structures */
794 if (pgrp == NULL) {
795 pgrp = new_pgrp;
796 new_pgrp = 0;
797 if (sess != NULL) {
798 sess->s_sid = p->p_pid;
799 sess->s_leader = p;
800 sess->s_count = 1;
801 sess->s_ttyvp = NULL;
802 sess->s_ttyp = NULL;
803 sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
804 memcpy(sess->s_login, p->p_session->s_login,
805 sizeof(sess->s_login));
806 p->p_flag &= ~P_CONTROLT;
807 } else {
808 sess = p->p_pgrp->pg_session;
809 SESSHOLD(sess);
810 }
811 pgrp->pg_session = sess;
812 sess = 0;
813
814 pgrp->pg_id = pgid;
815 LIST_INIT(&pgrp->pg_members);
816 #ifdef DIAGNOSTIC
817 if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
818 panic("enterpgrp: pgrp table slot in use");
819 if (__predict_false(mksess && p != curp))
820 panic("enterpgrp: mksession and p != curproc");
821 #endif
822 pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
823 pgrp->pg_jobc = 0;
824 }
825
826 /*
827 * Adjust eligibility of affected pgrps to participate in job control.
828 * Increment eligibility counts before decrementing, otherwise we
829 * could reach 0 spuriously during the first call.
830 */
831 fixjobc(p, pgrp, 1);
832 fixjobc(p, p->p_pgrp, 0);
833
834 /* Move process to requested group */
835 LIST_REMOVE(p, p_pglist);
836 if (LIST_EMPTY(&p->p_pgrp->pg_members))
837 /* defer delete until we've dumped the lock */
838 pg_id = p->p_pgrp->pg_id;
839 p->p_pgrp = pgrp;
840 LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
841
842 done:
843 proclist_unlock_write(s);
844 if (sess != NULL)
845 pool_put(&session_pool, sess);
846 if (new_pgrp != NULL)
847 pool_put(&pgrp_pool, new_pgrp);
848 if (pg_id != NO_PGID)
849 pg_delete(pg_id);
850 #ifdef DEBUG_PGRP
851 if (__predict_false(rval))
852 printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
853 pid, pgid, mksess, curp->p_pid, rval);
854 #endif
855 return rval;
856 }
857
858 /*
859 * remove process from process group
860 */
861 int
862 leavepgrp(struct proc *p)
863 {
864 int s;
865 struct pgrp *pgrp;
866 pid_t pg_id;
867
868 s = proclist_lock_write();
869 pgrp = p->p_pgrp;
870 LIST_REMOVE(p, p_pglist);
871 p->p_pgrp = 0;
872 pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID;
873 proclist_unlock_write(s);
874
875 if (pg_id != NO_PGID)
876 pg_delete(pg_id);
877 return 0;
878 }
879
880 static void
881 pg_free(pid_t pg_id)
882 {
883 struct pgrp *pgrp;
884 struct pid_table *pt;
885 int s;
886
887 s = proclist_lock_write();
888 pt = &pid_table[pg_id & pid_tbl_mask];
889 pgrp = pt->pt_pgrp;
890 #ifdef DIAGNOSTIC
891 if (__predict_false(!pgrp || pgrp->pg_id != pg_id
892 || !LIST_EMPTY(&pgrp->pg_members)))
893 panic("pg_free: process group absent or has members");
894 #endif
895 pt->pt_pgrp = 0;
896
897 if (!P_VALID(pt->pt_proc)) {
898 /* orphaned pgrp, put slot onto free list */
899 #ifdef DIAGNOSTIC
900 if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask))
901 panic("pg_free: process slot on free list");
902 #endif
903
904 pg_id &= pid_tbl_mask;
905 pt = &pid_table[last_free_pt];
906 pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
907 last_free_pt = pg_id;
908 pid_alloc_cnt--;
909 }
910 proclist_unlock_write(s);
911
912 pool_put(&pgrp_pool, pgrp);
913 }
914
915 /*
916 * delete a process group
917 */
918 static void
919 pg_delete(pid_t pg_id)
920 {
921 struct pgrp *pgrp;
922 struct tty *ttyp;
923 struct session *ss;
924 int s, is_pgrp_leader;
925
926 s = proclist_lock_write();
927 pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
928 if (pgrp == NULL || pgrp->pg_id != pg_id ||
929 !LIST_EMPTY(&pgrp->pg_members)) {
930 proclist_unlock_write(s);
931 return;
932 }
933
934 ss = pgrp->pg_session;
935
936 /* Remove reference (if any) from tty to this process group */
937 ttyp = ss->s_ttyp;
938 if (ttyp != NULL && ttyp->t_pgrp == pgrp) {
939 ttyp->t_pgrp = NULL;
940 #ifdef DIAGNOSTIC
941 if (ttyp->t_session != ss)
942 panic("pg_delete: wrong session on terminal");
943 #endif
944 }
945
946 /*
947 * The leading process group in a session is freed
948 * by sessdelete() if last reference.
949 */
950 is_pgrp_leader = (ss->s_sid == pgrp->pg_id);
951 proclist_unlock_write(s);
952 SESSRELE(ss);
953
954 if (is_pgrp_leader)
955 return;
956
957 pg_free(pg_id);
958 }
959
960 /*
961 * Delete session - called from SESSRELE when s_count becomes zero.
962 */
963 void
964 sessdelete(struct session *ss)
965 {
966 /*
967 * We keep the pgrp with the same id as the session in
968 * order to stop a process being given the same pid.
969 * Since the pgrp holds a reference to the session, it
970 * must be a 'zombie' pgrp by now.
971 */
972
973 pg_free(ss->s_sid);
974
975 pool_put(&session_pool, ss);
976 }
977
978 /*
979 * Adjust pgrp jobc counters when specified process changes process group.
980 * We count the number of processes in each process group that "qualify"
981 * the group for terminal job control (those with a parent in a different
982 * process group of the same session). If that count reaches zero, the
983 * process group becomes orphaned. Check both the specified process'
984 * process group and that of its children.
985 * entering == 0 => p is leaving specified group.
986 * entering == 1 => p is entering specified group.
987 *
988 * Call with proclist_lock held.
989 */
990 void
991 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
992 {
993 struct pgrp *hispgrp;
994 struct session *mysession = pgrp->pg_session;
995 struct proc *child;
996
997 /*
998 * Check p's parent to see whether p qualifies its own process
999 * group; if so, adjust count for p's process group.
1000 */
1001 hispgrp = p->p_pptr->p_pgrp;
1002 if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
1003 if (entering)
1004 pgrp->pg_jobc++;
1005 else if (--pgrp->pg_jobc == 0)
1006 orphanpg(pgrp);
1007 }
1008
1009 /*
1010 * Check this process' children to see whether they qualify
1011 * their process groups; if so, adjust counts for children's
1012 * process groups.
1013 */
1014 LIST_FOREACH(child, &p->p_children, p_sibling) {
1015 hispgrp = child->p_pgrp;
1016 if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
1017 !P_ZOMBIE(child)) {
1018 if (entering)
1019 hispgrp->pg_jobc++;
1020 else if (--hispgrp->pg_jobc == 0)
1021 orphanpg(hispgrp);
1022 }
1023 }
1024 }
1025
1026 /*
1027 * A process group has become orphaned;
1028 * if there are any stopped processes in the group,
1029 * hang-up all process in that group.
1030 *
1031 * Call with proclist_lock held.
1032 */
1033 static void
1034 orphanpg(struct pgrp *pg)
1035 {
1036 struct proc *p;
1037
1038 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1039 if (p->p_stat == SSTOP) {
1040 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1041 psignal(p, SIGHUP);
1042 psignal(p, SIGCONT);
1043 }
1044 return;
1045 }
1046 }
1047 }
1048
1049 /* mark process as suid/sgid, reset some values to defaults */
1050 void
1051 p_sugid(struct proc *p)
1052 {
1053 struct plimit *lim;
1054 char *cn;
1055
1056 p->p_flag |= P_SUGID;
1057 /* reset what needs to be reset in plimit */
1058 lim = p->p_limit;
1059 if (lim->pl_corename != defcorename) {
1060 if (lim->p_refcnt > 1 &&
1061 (lim->p_lflags & PL_SHAREMOD) == 0) {
1062 p->p_limit = limcopy(lim);
1063 limfree(lim);
1064 lim = p->p_limit;
1065 }
1066 simple_lock(&lim->p_slock);
1067 cn = lim->pl_corename;
1068 lim->pl_corename = defcorename;
1069 simple_unlock(&lim->p_slock);
1070 if (cn != defcorename)
1071 free(cn, M_TEMP);
1072 }
1073 }
1074
1075 #ifdef DDB
1076 #include <ddb/db_output.h>
1077 void pidtbl_dump(void);
1078 void
1079 pidtbl_dump(void)
1080 {
1081 struct pid_table *pt;
1082 struct proc *p;
1083 struct pgrp *pgrp;
1084 int id;
1085
1086 db_printf("pid table %p size %x, next %x, last %x\n",
1087 pid_table, pid_tbl_mask+1,
1088 next_free_pt, last_free_pt);
1089 for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
1090 p = pt->pt_proc;
1091 if (!P_VALID(p) && !pt->pt_pgrp)
1092 continue;
1093 db_printf(" id %x: ", id);
1094 if (P_VALID(p))
1095 db_printf("proc %p id %d (0x%x) %s\n",
1096 p, p->p_pid, p->p_pid, p->p_comm);
1097 else
1098 db_printf("next %x use %x\n",
1099 P_NEXT(p) & pid_tbl_mask,
1100 P_NEXT(p) & ~pid_tbl_mask);
1101 if ((pgrp = pt->pt_pgrp)) {
1102 db_printf("\tsession %p, sid %d, count %d, login %s\n",
1103 pgrp->pg_session, pgrp->pg_session->s_sid,
1104 pgrp->pg_session->s_count,
1105 pgrp->pg_session->s_login);
1106 db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
1107 pgrp, pgrp->pg_id, pgrp->pg_jobc,
1108 pgrp->pg_members.lh_first);
1109 for (p = pgrp->pg_members.lh_first; p != 0;
1110 p = p->p_pglist.le_next) {
1111 db_printf("\t\tpid %d addr %p pgrp %p %s\n",
1112 p->p_pid, p, p->p_pgrp, p->p_comm);
1113 }
1114 }
1115 }
1116 }
1117 #endif /* DDB */
1118
1119 #ifdef KSTACK_CHECK_MAGIC
1120 #include <sys/user.h>
1121
1122 #define KSTACK_MAGIC 0xdeadbeaf
1123
1124 /* XXX should be per process basis? */
1125 int kstackleftmin = KSTACK_SIZE;
1126 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is
1127 less than this */
1128
1129 void
1130 kstack_setup_magic(const struct lwp *l)
1131 {
1132 uint32_t *ip;
1133 uint32_t const *end;
1134
1135 KASSERT(l != NULL);
1136 KASSERT(l != &lwp0);
1137
1138 /*
1139 * fill all the stack with magic number
1140 * so that later modification on it can be detected.
1141 */
1142 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1143 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1144 for (; ip < end; ip++) {
1145 *ip = KSTACK_MAGIC;
1146 }
1147 }
1148
1149 void
1150 kstack_check_magic(const struct lwp *l)
1151 {
1152 uint32_t const *ip, *end;
1153 int stackleft;
1154
1155 KASSERT(l != NULL);
1156
1157 /* don't check proc0 */ /*XXX*/
1158 if (l == &lwp0)
1159 return;
1160
1161 #ifdef __MACHINE_STACK_GROWS_UP
1162 /* stack grows upwards (eg. hppa) */
1163 ip = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1164 end = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1165 for (ip--; ip >= end; ip--)
1166 if (*ip != KSTACK_MAGIC)
1167 break;
1168
1169 stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip;
1170 #else /* __MACHINE_STACK_GROWS_UP */
1171 /* stack grows downwards (eg. i386) */
1172 ip = (uint32_t *)KSTACK_LOWEST_ADDR(l);
1173 end = (uint32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
1174 for (; ip < end; ip++)
1175 if (*ip != KSTACK_MAGIC)
1176 break;
1177
1178 stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l);
1179 #endif /* __MACHINE_STACK_GROWS_UP */
1180
1181 if (kstackleftmin > stackleft) {
1182 kstackleftmin = stackleft;
1183 if (stackleft < kstackleftthres)
1184 printf("warning: kernel stack left %d bytes"
1185 "(pid %u:lid %u)\n", stackleft,
1186 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1187 }
1188
1189 if (stackleft <= 0) {
1190 panic("magic on the top of kernel stack changed for "
1191 "pid %u, lid %u: maybe kernel stack overflow",
1192 (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
1193 }
1194 }
1195 #endif /* KSTACK_CHECK_MAGIC */
1196
1197 /* XXX shouldn't be here */
1198 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1199 #define PROCLIST_ASSERT_LOCKED_READ() \
1200 KASSERT(lockstatus(&proclist_lock) == LK_SHARED)
1201 #else
1202 #define PROCLIST_ASSERT_LOCKED_READ() /* nothing */
1203 #endif
1204
1205 int
1206 proclist_foreach_call(struct proclist *list,
1207 int (*callback)(struct proc *, void *arg), void *arg)
1208 {
1209 struct proc marker;
1210 struct proc *p;
1211 struct lwp * const l = curlwp;
1212 int ret = 0;
1213
1214 marker.p_flag = P_MARKER;
1215 PHOLD(l);
1216 proclist_lock_read();
1217 for (p = LIST_FIRST(list); ret == 0 && p != NULL;) {
1218 if (p->p_flag & P_MARKER) {
1219 p = LIST_NEXT(p, p_list);
1220 continue;
1221 }
1222 LIST_INSERT_AFTER(p, &marker, p_list);
1223 ret = (*callback)(p, arg);
1224 PROCLIST_ASSERT_LOCKED_READ();
1225 p = LIST_NEXT(&marker, p_list);
1226 LIST_REMOVE(&marker, p_list);
1227 }
1228 proclist_unlock_read();
1229 PRELE(l);
1230
1231 return ret;
1232 }
1233
1234 int
1235 proc_vmspace_getref(struct proc *p, struct vmspace **vm)
1236 {
1237
1238 /* XXXCDC: how should locking work here? */
1239
1240 if ((p->p_flag & P_WEXIT) != 0 ||
1241 (p->p_vmspace->vm_refcnt < 1)) { /* XXX */
1242 return EFAULT;
1243 }
1244
1245 uvmspace_addref(p->p_vmspace);
1246 *vm = p->p_vmspace;
1247
1248 return 0;
1249 }
1250