Home | History | Annotate | Line # | Download | only in kern
kern_proc.c revision 1.72
      1 /*	$NetBSD: kern_proc.c,v 1.72 2004/02/26 11:20:08 junyoung Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the NetBSD
     22  *	Foundation, Inc. and its contributors.
     23  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24  *    contributors may be used to endorse or promote products derived
     25  *    from this software without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37  * POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 /*
     41  * Copyright (c) 1982, 1986, 1989, 1991, 1993
     42  *	The Regents of the University of California.  All rights reserved.
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  * 3. Neither the name of the University nor the names of its contributors
     53  *    may be used to endorse or promote products derived from this software
     54  *    without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     66  * SUCH DAMAGE.
     67  *
     68  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
     69  */
     70 
     71 #include <sys/cdefs.h>
     72 __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.72 2004/02/26 11:20:08 junyoung Exp $");
     73 
     74 #include "opt_kstack.h"
     75 
     76 #include <sys/param.h>
     77 #include <sys/systm.h>
     78 #include <sys/kernel.h>
     79 #include <sys/proc.h>
     80 #include <sys/resourcevar.h>
     81 #include <sys/buf.h>
     82 #include <sys/acct.h>
     83 #include <sys/wait.h>
     84 #include <sys/file.h>
     85 #include <ufs/ufs/quota.h>
     86 #include <sys/uio.h>
     87 #include <sys/malloc.h>
     88 #include <sys/pool.h>
     89 #include <sys/mbuf.h>
     90 #include <sys/ioctl.h>
     91 #include <sys/tty.h>
     92 #include <sys/signalvar.h>
     93 #include <sys/ras.h>
     94 #include <sys/sa.h>
     95 #include <sys/savar.h>
     96 
     97 /*
     98  * Structure associated with user cacheing.
     99  */
    100 struct uidinfo {
    101 	LIST_ENTRY(uidinfo) ui_hash;
    102 	uid_t	ui_uid;
    103 	long	ui_proccnt;
    104 };
    105 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
    106 LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
    107 u_long uihash;		/* size of hash table - 1 */
    108 
    109 /*
    110  * Other process lists
    111  */
    112 
    113 struct proclist allproc;
    114 struct proclist zombproc;	/* resources have been freed */
    115 
    116 
    117 /*
    118  * Process list locking:
    119  *
    120  * We have two types of locks on the proclists: read locks and write
    121  * locks.  Read locks can be used in interrupt context, so while we
    122  * hold the write lock, we must also block clock interrupts to
    123  * lock out any scheduling changes that may happen in interrupt
    124  * context.
    125  *
    126  * The proclist lock locks the following structures:
    127  *
    128  *	allproc
    129  *	zombproc
    130  *	pid_table
    131  */
    132 struct lock proclist_lock;
    133 
    134 /*
    135  * pid to proc lookup is done by indexing the pid_table array.
    136  * Since pid numbers are only allocated when an empty slot
    137  * has been found, there is no need to search any lists ever.
    138  * (an orphaned pgrp will lock the slot, a session will lock
    139  * the pgrp with the same number.)
    140  * If the table is too small it is reallocated with twice the
    141  * previous size and the entries 'unzipped' into the two halves.
    142  * A linked list of free entries is passed through the pt_proc
    143  * field of 'free' items - set odd to be an invalid ptr.
    144  */
    145 
    146 struct pid_table {
    147 	struct proc	*pt_proc;
    148 	struct pgrp	*pt_pgrp;
    149 };
    150 #if 1	/* strongly typed cast - should be a noop */
    151 static __inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
    152 #else
    153 #define p2u(p) ((uint)p)
    154 #endif
    155 #define P_VALID(p) (!(p2u(p) & 1))
    156 #define P_NEXT(p) (p2u(p) >> 1)
    157 #define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
    158 
    159 #define INITIAL_PID_TABLE_SIZE	(1 << 5)
    160 static struct pid_table *pid_table;
    161 static uint pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
    162 static uint pid_alloc_lim;	/* max we allocate before growing table */
    163 static uint pid_alloc_cnt;	/* number of allocated pids */
    164 
    165 /* links through free slots - never empty! */
    166 static uint next_free_pt, last_free_pt;
    167 static pid_t pid_max = PID_MAX;		/* largest value we allocate */
    168 
    169 struct pool proc_pool;
    170 struct pool lwp_pool;
    171 struct pool lwp_uc_pool;
    172 struct pool pcred_pool;
    173 struct pool plimit_pool;
    174 struct pool pstats_pool;
    175 struct pool pgrp_pool;
    176 struct pool rusage_pool;
    177 struct pool ras_pool;
    178 struct pool sadata_pool;
    179 struct pool saupcall_pool;
    180 struct pool sastack_pool;
    181 struct pool ptimer_pool;
    182 
    183 MALLOC_DEFINE(M_EMULDATA, "emuldata", "Per-process emulation data");
    184 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
    185 MALLOC_DEFINE(M_SESSION, "session", "session header");
    186 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
    187 
    188 /*
    189  * The process list descriptors, used during pid allocation and
    190  * by sysctl.  No locking on this data structure is needed since
    191  * it is completely static.
    192  */
    193 const struct proclist_desc proclists[] = {
    194 	{ &allproc	},
    195 	{ &zombproc	},
    196 	{ NULL		},
    197 };
    198 
    199 static void orphanpg(struct pgrp *);
    200 static void pg_delete(pid_t);
    201 #ifdef DEBUG
    202 void pgrpdump(void);
    203 #endif
    204 
    205 /*
    206  * Initialize global process hashing structures.
    207  */
    208 void
    209 procinit(void)
    210 {
    211 	const struct proclist_desc *pd;
    212 	int i;
    213 #define	LINK_EMPTY ((PID_MAX + INITIAL_PID_TABLE_SIZE) & ~(INITIAL_PID_TABLE_SIZE - 1))
    214 
    215 	for (pd = proclists; pd->pd_list != NULL; pd++)
    216 		LIST_INIT(pd->pd_list);
    217 
    218 	spinlockinit(&proclist_lock, "proclk", 0);
    219 
    220 	pid_table = malloc(INITIAL_PID_TABLE_SIZE * sizeof *pid_table,
    221 			    M_PROC, M_WAITOK);
    222 	/* Set free list running through table...
    223 	   Preset 'use count' above PID_MAX so we allocate pid 1 next. */
    224 	for (i = 0; i <= pid_tbl_mask; i++) {
    225 		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
    226 		pid_table[i].pt_pgrp = 0;
    227 	}
    228 	/* slot 0 is just grabbed */
    229 	next_free_pt = 1;
    230 	/* Need to fix last entry. */
    231 	last_free_pt = pid_tbl_mask;
    232 	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
    233 	/* point at which we grow table - to avoid reusing pids too often */
    234 	pid_alloc_lim = pid_tbl_mask - 1;
    235 #undef LINK_EMPTY
    236 
    237 	LIST_INIT(&alllwp);
    238 
    239 	uihashtbl =
    240 	    hashinit(maxproc / 16, HASH_LIST, M_PROC, M_WAITOK, &uihash);
    241 
    242 	pool_init(&proc_pool, sizeof(struct proc), 0, 0, 0, "procpl",
    243 	    &pool_allocator_nointr);
    244 	pool_init(&lwp_pool, sizeof(struct lwp), 0, 0, 0, "lwppl",
    245 	    &pool_allocator_nointr);
    246 	pool_init(&lwp_uc_pool, sizeof(ucontext_t), 0, 0, 0, "lwpucpl",
    247 	    &pool_allocator_nointr);
    248 	pool_init(&pgrp_pool, sizeof(struct pgrp), 0, 0, 0, "pgrppl",
    249 	    &pool_allocator_nointr);
    250 	pool_init(&pcred_pool, sizeof(struct pcred), 0, 0, 0, "pcredpl",
    251 	    &pool_allocator_nointr);
    252 	pool_init(&plimit_pool, sizeof(struct plimit), 0, 0, 0, "plimitpl",
    253 	    &pool_allocator_nointr);
    254 	pool_init(&pstats_pool, sizeof(struct pstats), 0, 0, 0, "pstatspl",
    255 	    &pool_allocator_nointr);
    256 	pool_init(&rusage_pool, sizeof(struct rusage), 0, 0, 0, "rusgepl",
    257 	    &pool_allocator_nointr);
    258 	pool_init(&ras_pool, sizeof(struct ras), 0, 0, 0, "raspl",
    259 	    &pool_allocator_nointr);
    260 	pool_init(&sadata_pool, sizeof(struct sadata), 0, 0, 0, "sadatapl",
    261 	    &pool_allocator_nointr);
    262 	pool_init(&saupcall_pool, sizeof(struct sadata_upcall), 0, 0, 0,
    263 	    "saupcpl", &pool_allocator_nointr);
    264 	pool_init(&sastack_pool, sizeof(struct sastack), 0, 0, 0, "sastackpl",
    265 	    &pool_allocator_nointr);
    266 	pool_init(&ptimer_pool, sizeof(struct ptimer), 0, 0, 0, "ptimerpl",
    267 	    &pool_allocator_nointr);
    268 }
    269 
    270 /*
    271  * Acquire a read lock on the proclist.
    272  */
    273 void
    274 proclist_lock_read(void)
    275 {
    276 	int error;
    277 
    278 	error = spinlockmgr(&proclist_lock, LK_SHARED, NULL);
    279 #ifdef DIAGNOSTIC
    280 	if (__predict_false(error != 0))
    281 		panic("proclist_lock_read: failed to acquire lock");
    282 #endif
    283 }
    284 
    285 /*
    286  * Release a read lock on the proclist.
    287  */
    288 void
    289 proclist_unlock_read(void)
    290 {
    291 
    292 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
    293 }
    294 
    295 /*
    296  * Acquire a write lock on the proclist.
    297  */
    298 int
    299 proclist_lock_write(void)
    300 {
    301 	int s, error;
    302 
    303 	s = splclock();
    304 	error = spinlockmgr(&proclist_lock, LK_EXCLUSIVE, NULL);
    305 #ifdef DIAGNOSTIC
    306 	if (__predict_false(error != 0))
    307 		panic("proclist_lock: failed to acquire lock");
    308 #endif
    309 	return (s);
    310 }
    311 
    312 /*
    313  * Release a write lock on the proclist.
    314  */
    315 void
    316 proclist_unlock_write(int s)
    317 {
    318 
    319 	(void) spinlockmgr(&proclist_lock, LK_RELEASE, NULL);
    320 	splx(s);
    321 }
    322 
    323 /*
    324  * Change the count associated with number of processes
    325  * a given user is using.
    326  */
    327 int
    328 chgproccnt(uid_t uid, int diff)
    329 {
    330 	struct uidinfo *uip;
    331 	struct uihashhead *uipp;
    332 
    333 	uipp = UIHASH(uid);
    334 
    335 	LIST_FOREACH(uip, uipp, ui_hash)
    336 		if (uip->ui_uid == uid)
    337 			break;
    338 
    339 	if (uip) {
    340 		uip->ui_proccnt += diff;
    341 		if (uip->ui_proccnt > 0)
    342 			return (uip->ui_proccnt);
    343 		if (uip->ui_proccnt < 0)
    344 			panic("chgproccnt: procs < 0");
    345 		LIST_REMOVE(uip, ui_hash);
    346 		FREE(uip, M_PROC);
    347 		return (0);
    348 	}
    349 	if (diff <= 0) {
    350 		if (diff == 0)
    351 			return(0);
    352 		panic("chgproccnt: lost user");
    353 	}
    354 	MALLOC(uip, struct uidinfo *, sizeof(*uip), M_PROC, M_WAITOK);
    355 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
    356 	uip->ui_uid = uid;
    357 	uip->ui_proccnt = diff;
    358 	return (diff);
    359 }
    360 
    361 /*
    362  * Check that the specifies process group in in the session of the
    363  * specified process.
    364  * Treats -ve ids as process ids.
    365  * Used to validate TIOCSPGRP requests.
    366  */
    367 int
    368 pgid_in_session(struct proc *p, pid_t pg_id)
    369 {
    370 	struct pgrp *pgrp;
    371 
    372 	if (pg_id < 0) {
    373 		struct proc *p1 = pfind(-pg_id);
    374 		if (p1 == NULL)
    375 			return EINVAL;
    376 		pgrp = p1->p_pgrp;
    377 	} else {
    378 		pgrp = pgfind(pg_id);
    379 		if (pgrp == NULL)
    380 			return EINVAL;
    381 	}
    382 	if (pgrp->pg_session != p->p_pgrp->pg_session)
    383 		return EPERM;
    384 	return 0;
    385 }
    386 
    387 /*
    388  * Is p an inferior of q?
    389  */
    390 int
    391 inferior(struct proc *p, struct proc *q)
    392 {
    393 
    394 	for (; p != q; p = p->p_pptr)
    395 		if (p->p_pid == 0)
    396 			return (0);
    397 	return (1);
    398 }
    399 
    400 /*
    401  * Locate a process by number
    402  */
    403 struct proc *
    404 p_find(pid_t pid, uint flags)
    405 {
    406 	struct proc *p;
    407 	char stat;
    408 
    409 	if (!(flags & PFIND_LOCKED))
    410 		proclist_lock_read();
    411 	p = pid_table[pid & pid_tbl_mask].pt_proc;
    412 	/* Only allow live processes to be found by pid. */
    413 	if (P_VALID(p) && p->p_pid == pid &&
    414 	    ((stat = p->p_stat) == SACTIVE || stat == SSTOP
    415 		    || (stat == SZOMB && (flags & PFIND_ZOMBIE)))) {
    416 		if (flags & PFIND_UNLOCK_OK)
    417 			 proclist_unlock_read();
    418 		return p;
    419 	}
    420 	if (flags & PFIND_UNLOCK_FAIL)
    421 		 proclist_unlock_read();
    422 	return NULL;
    423 }
    424 
    425 
    426 /*
    427  * Locate a process group by number
    428  */
    429 struct pgrp *
    430 pg_find(pid_t pgid, uint flags)
    431 {
    432 	struct pgrp *pg;
    433 
    434 	if (!(flags & PFIND_LOCKED))
    435 		proclist_lock_read();
    436 	pg = pid_table[pgid & pid_tbl_mask].pt_pgrp;
    437 	/*
    438 	 * Can't look up a pgrp that only exists because the session
    439 	 * hasn't died yet (traditional)
    440 	 */
    441 	if (pg == NULL || pg->pg_id != pgid || LIST_EMPTY(&pg->pg_members)) {
    442 		if (flags & PFIND_UNLOCK_FAIL)
    443 			 proclist_unlock_read();
    444 		return NULL;
    445 	}
    446 
    447 	if (flags & PFIND_UNLOCK_OK)
    448 		proclist_unlock_read();
    449 	return pg;
    450 }
    451 
    452 /*
    453  * Set entry for process 0
    454  */
    455 void
    456 proc0_insert(struct proc *p, struct lwp *l, struct pgrp *pgrp,
    457 	struct session *sess)
    458 {
    459 	int s;
    460 
    461 	simple_lock_init(&p->p_lock);
    462 	LIST_INIT(&p->p_lwps);
    463 	LIST_INSERT_HEAD(&p->p_lwps, l, l_sibling);
    464 	p->p_nlwps = 1;
    465 	simple_lock_init(&p->p_sigctx.ps_silock);
    466 	CIRCLEQ_INIT(&p->p_sigctx.ps_siginfo);
    467 
    468 	s = proclist_lock_write();
    469 
    470 	pid_table[0].pt_proc = p;
    471 	LIST_INSERT_HEAD(&allproc, p, p_list);
    472 	LIST_INSERT_HEAD(&alllwp, l, l_list);
    473 
    474 	p->p_pgrp = pgrp;
    475 	pid_table[0].pt_pgrp = pgrp;
    476 	LIST_INIT(&pgrp->pg_members);
    477 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
    478 
    479 	pgrp->pg_session = sess;
    480 	sess->s_count = 1;
    481 	sess->s_sid = 0;
    482 	sess->s_leader = p;
    483 
    484 	proclist_unlock_write(s);
    485 }
    486 
    487 static void
    488 expand_pid_table(void)
    489 {
    490 	uint pt_size = pid_tbl_mask + 1;
    491 	struct pid_table *n_pt, *new_pt;
    492 	struct proc *proc;
    493 	struct pgrp *pgrp;
    494 	int i;
    495 	int s;
    496 	pid_t pid;
    497 
    498 	new_pt = malloc(pt_size * 2 * sizeof *new_pt, M_PROC, M_WAITOK);
    499 
    500 	s = proclist_lock_write();
    501 	if (pt_size != pid_tbl_mask + 1) {
    502 		/* Another process beat us to it... */
    503 		proclist_unlock_write(s);
    504 		FREE(new_pt, M_PROC);
    505 		return;
    506 	}
    507 
    508 	/*
    509 	 * Copy entries from old table into new one.
    510 	 * If 'pid' is 'odd' we need to place in the upper half,
    511 	 * even pid's to the lower half.
    512 	 * Free items stay in the low half so we don't have to
    513 	 * fixup the reference to them.
    514 	 * We stuff free items on the front of the freelist
    515 	 * because we can't write to unmodified entries.
    516 	 * Processing the table backwards maintians a semblance
    517 	 * of issueing pid numbers that increase with time.
    518 	 */
    519 	i = pt_size - 1;
    520 	n_pt = new_pt + i;
    521 	for (; ; i--, n_pt--) {
    522 		proc = pid_table[i].pt_proc;
    523 		pgrp = pid_table[i].pt_pgrp;
    524 		if (!P_VALID(proc)) {
    525 			/* Up 'use count' so that link is valid */
    526 			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
    527 			proc = P_FREE(pid);
    528 			if (pgrp)
    529 				pid = pgrp->pg_id;
    530 		} else
    531 			pid = proc->p_pid;
    532 
    533 		/* Save entry in appropriate half of table */
    534 		n_pt[pid & pt_size].pt_proc = proc;
    535 		n_pt[pid & pt_size].pt_pgrp = pgrp;
    536 
    537 		/* Put other piece on start of free list */
    538 		pid = (pid ^ pt_size) & ~pid_tbl_mask;
    539 		n_pt[pid & pt_size].pt_proc =
    540 				    P_FREE((pid & ~pt_size) | next_free_pt);
    541 		n_pt[pid & pt_size].pt_pgrp = 0;
    542 		next_free_pt = i | (pid & pt_size);
    543 		if (i == 0)
    544 			break;
    545 	}
    546 
    547 	/* Switch tables */
    548 	n_pt = pid_table;
    549 	pid_table = new_pt;
    550 	pid_tbl_mask = pt_size * 2 - 1;
    551 
    552 	/*
    553 	 * pid_max starts as PID_MAX (= 30000), once we have 16384
    554 	 * allocated pids we need it to be larger!
    555 	 */
    556 	if (pid_tbl_mask > PID_MAX) {
    557 		pid_max = pid_tbl_mask * 2 + 1;
    558 		pid_alloc_lim |= pid_alloc_lim << 1;
    559 	} else
    560 		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
    561 
    562 	proclist_unlock_write(s);
    563 	FREE(n_pt, M_PROC);
    564 }
    565 
    566 struct proc *
    567 proc_alloc(void)
    568 {
    569 	struct proc *p;
    570 	int s;
    571 	int nxt;
    572 	pid_t pid;
    573 	struct pid_table *pt;
    574 
    575 	p = pool_get(&proc_pool, PR_WAITOK);
    576 	p->p_stat = SIDL;			/* protect against others */
    577 
    578 	/* allocate next free pid */
    579 
    580 	for (;;expand_pid_table()) {
    581 		if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
    582 			/* ensure pids cycle through 2000+ values */
    583 			continue;
    584 		s = proclist_lock_write();
    585 		pt = &pid_table[next_free_pt];
    586 #ifdef DIAGNOSTIC
    587 		if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
    588 			panic("proc_alloc: slot busy");
    589 #endif
    590 		nxt = P_NEXT(pt->pt_proc);
    591 		if (nxt & pid_tbl_mask)
    592 			break;
    593 		/* Table full - expand (NB last entry not used....) */
    594 		proclist_unlock_write(s);
    595 	}
    596 
    597 	/* pid is 'saved use count' + 'size' + entry */
    598 	pid = (nxt & ~pid_tbl_mask) + pid_tbl_mask + 1 + next_free_pt;
    599 	if ((uint)pid > (uint)pid_max)
    600 		pid &= pid_tbl_mask;
    601 	p->p_pid = pid;
    602 	next_free_pt = nxt & pid_tbl_mask;
    603 
    604 	/* Grab table slot */
    605 	pt->pt_proc = p;
    606 	pid_alloc_cnt++;
    607 
    608 	proclist_unlock_write(s);
    609 
    610 	return p;
    611 }
    612 
    613 /*
    614  * Free last resources of a process - called from proc_free (in kern_exit.c)
    615  */
    616 void
    617 proc_free_mem(struct proc *p)
    618 {
    619 	int s;
    620 	pid_t pid = p->p_pid;
    621 	struct pid_table *pt;
    622 
    623 	s = proclist_lock_write();
    624 
    625 	pt = &pid_table[pid & pid_tbl_mask];
    626 #ifdef DIAGNOSTIC
    627 	if (__predict_false(pt->pt_proc != p))
    628 		panic("proc_free: pid_table mismatch, pid %x, proc %p",
    629 			pid, p);
    630 #endif
    631 	/* save pid use count in slot */
    632 	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
    633 
    634 	if (pt->pt_pgrp == NULL) {
    635 		/* link last freed entry onto ours */
    636 		pid &= pid_tbl_mask;
    637 		pt = &pid_table[last_free_pt];
    638 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
    639 		last_free_pt = pid;
    640 		pid_alloc_cnt--;
    641 	}
    642 
    643 	nprocs--;
    644 	proclist_unlock_write(s);
    645 
    646 	pool_put(&proc_pool, p);
    647 }
    648 
    649 /*
    650  * Move p to a new or existing process group (and session)
    651  *
    652  * If we are creating a new pgrp, the pgid should equal
    653  * the calling process' pid.
    654  * If is only valid to enter a process group that is in the session
    655  * of the process.
    656  * Also mksess should only be set if we are creating a process group
    657  *
    658  * Only called from sys_setsid, sys_setpgid/sys_setpgrp and the
    659  * SYSV setpgrp support for hpux == enterpgrp(curproc, curproc->p_pid)
    660  */
    661 int
    662 enterpgrp(struct proc *p, pid_t pgid, int mksess)
    663 {
    664 	struct pgrp *new_pgrp, *pgrp;
    665 	struct session *sess;
    666 	struct proc *curp = curproc;
    667 	pid_t pid = p->p_pid;
    668 	int rval;
    669 	int s;
    670 	pid_t pg_id = NO_PGID;
    671 
    672 	/* Allocate data areas we might need before doing any validity checks */
    673 	proclist_lock_read();		/* Because pid_table might change */
    674 	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
    675 		proclist_unlock_read();
    676 		new_pgrp = pool_get(&pgrp_pool, PR_WAITOK);
    677 	} else {
    678 		proclist_unlock_read();
    679 		new_pgrp = NULL;
    680 	}
    681 	if (mksess)
    682 		MALLOC(sess, struct session *, sizeof(struct session),
    683 			    M_SESSION, M_WAITOK);
    684 	else
    685 		sess = NULL;
    686 
    687 	s = proclist_lock_write();
    688 	rval = EPERM;	/* most common error (to save typing) */
    689 
    690 	/* Check pgrp exists or can be created */
    691 	pgrp = pid_table[pgid & pid_tbl_mask].pt_pgrp;
    692 	if (pgrp != NULL && pgrp->pg_id != pgid)
    693 		goto done;
    694 
    695 	/* Can only set another process under restricted circumstances. */
    696 	if (p != curp) {
    697 		/* must exist and be one of our children... */
    698 		if (p != pid_table[pid & pid_tbl_mask].pt_proc
    699 		    || !inferior(p, curp)) {
    700 			rval = ESRCH;
    701 			goto done;
    702 		}
    703 		/* ... in the same session... */
    704 		if (sess != NULL || p->p_session != curp->p_session)
    705 			goto done;
    706 		/* ... existing pgid must be in same session ... */
    707 		if (pgrp != NULL && pgrp->pg_session != p->p_session)
    708 			goto done;
    709 		/* ... and not done an exec. */
    710 		if (p->p_flag & P_EXEC) {
    711 			rval = EACCES;
    712 			goto done;
    713 		}
    714 	}
    715 
    716 	/* Changing the process group/session of a session
    717 	   leader is definitely off limits. */
    718 	if (SESS_LEADER(p)) {
    719 		if (sess == NULL && p->p_pgrp == pgrp)
    720 			/* unless it's a definite noop */
    721 			rval = 0;
    722 		goto done;
    723 	}
    724 
    725 	/* Can only create a process group with id of process */
    726 	if (pgrp == NULL && pgid != pid)
    727 		goto done;
    728 
    729 	/* Can only create a session if creating pgrp */
    730 	if (sess != NULL && pgrp != NULL)
    731 		goto done;
    732 
    733 	/* Check we allocated memory for a pgrp... */
    734 	if (pgrp == NULL && new_pgrp == NULL)
    735 		goto done;
    736 
    737 	/* Don't attach to 'zombie' pgrp */
    738 	if (pgrp != NULL && LIST_EMPTY(&pgrp->pg_members))
    739 		goto done;
    740 
    741 	/* Expect to succeed now */
    742 	rval = 0;
    743 
    744 	if (pgrp == p->p_pgrp)
    745 		/* nothing to do */
    746 		goto done;
    747 
    748 	/* Ok all setup, link up required structures */
    749 	if (pgrp == NULL) {
    750 		pgrp = new_pgrp;
    751 		new_pgrp = 0;
    752 		if (sess != NULL) {
    753 			sess->s_sid = p->p_pid;
    754 			sess->s_leader = p;
    755 			sess->s_count = 1;
    756 			sess->s_ttyvp = NULL;
    757 			sess->s_ttyp = NULL;
    758 			sess->s_flags = p->p_session->s_flags & ~S_LOGIN_SET;
    759 			memcpy(sess->s_login, p->p_session->s_login,
    760 			    sizeof(sess->s_login));
    761 			p->p_flag &= ~P_CONTROLT;
    762 		} else {
    763 			sess = p->p_pgrp->pg_session;
    764 			SESSHOLD(sess);
    765 		}
    766 		pgrp->pg_session = sess;
    767 		sess = 0;
    768 
    769 		pgrp->pg_id = pgid;
    770 		LIST_INIT(&pgrp->pg_members);
    771 #ifdef DIAGNOSTIC
    772 		if (__predict_false(pid_table[pgid & pid_tbl_mask].pt_pgrp))
    773 			panic("enterpgrp: pgrp table slot in use");
    774 		if (__predict_false(mksess && p != curp))
    775 			panic("enterpgrp: mksession and p != curproc");
    776 #endif
    777 		pid_table[pgid & pid_tbl_mask].pt_pgrp = pgrp;
    778 		pgrp->pg_jobc = 0;
    779 	}
    780 
    781 	/*
    782 	 * Adjust eligibility of affected pgrps to participate in job control.
    783 	 * Increment eligibility counts before decrementing, otherwise we
    784 	 * could reach 0 spuriously during the first call.
    785 	 */
    786 	fixjobc(p, pgrp, 1);
    787 	fixjobc(p, p->p_pgrp, 0);
    788 
    789 	/* Move process to requested group */
    790 	LIST_REMOVE(p, p_pglist);
    791 	if (LIST_EMPTY(&p->p_pgrp->pg_members))
    792 		/* defer delete until we've dumped the lock */
    793 		pg_id = p->p_pgrp->pg_id;
    794 	p->p_pgrp = pgrp;
    795 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
    796 
    797     done:
    798 	proclist_unlock_write(s);
    799 	if (sess != NULL)
    800 		free(sess, M_SESSION);
    801 	if (new_pgrp != NULL)
    802 		pool_put(&pgrp_pool, new_pgrp);
    803 	if (pg_id != NO_PGID)
    804 		pg_delete(pg_id);
    805 #ifdef DEBUG_PGRP
    806 	if (__predict_false(rval))
    807 		printf("enterpgrp(%d,%d,%d), curproc %d, rval %d\n",
    808 			pid, pgid, mksess, curp->p_pid, rval);
    809 #endif
    810 	return rval;
    811 }
    812 
    813 /*
    814  * remove process from process group
    815  */
    816 int
    817 leavepgrp(struct proc *p)
    818 {
    819 	int s;
    820 	struct pgrp *pgrp;
    821 	pid_t pg_id;
    822 
    823 	s = proclist_lock_write();
    824 	pgrp = p->p_pgrp;
    825 	LIST_REMOVE(p, p_pglist);
    826 	p->p_pgrp = 0;
    827 	pg_id = LIST_EMPTY(&pgrp->pg_members) ? pgrp->pg_id : NO_PGID;
    828 	proclist_unlock_write(s);
    829 
    830 	if (pg_id != NO_PGID)
    831 		pg_delete(pg_id);
    832 	return 0;
    833 }
    834 
    835 static void
    836 pg_free(pid_t pg_id)
    837 {
    838 	struct pgrp *pgrp;
    839 	struct pid_table *pt;
    840 	int s;
    841 
    842 	s = proclist_lock_write();
    843 	pt = &pid_table[pg_id & pid_tbl_mask];
    844 	pgrp = pt->pt_pgrp;
    845 #ifdef DIAGNOSTIC
    846 	if (__predict_false(!pgrp || pgrp->pg_id != pg_id
    847 	    || !LIST_EMPTY(&pgrp->pg_members)))
    848 		panic("pg_free: process group absent or has members");
    849 #endif
    850 	pt->pt_pgrp = 0;
    851 
    852 	if (!P_VALID(pt->pt_proc)) {
    853 		/* orphaned pgrp, put slot onto free list */
    854 #ifdef DIAGNOSTIC
    855 		if (__predict_false(P_NEXT(pt->pt_proc) & pid_tbl_mask))
    856 			panic("pg_free: process slot on free list");
    857 #endif
    858 
    859 		pg_id &= pid_tbl_mask;
    860 		pt = &pid_table[last_free_pt];
    861 		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
    862 		last_free_pt = pg_id;
    863 		pid_alloc_cnt--;
    864 	}
    865 	proclist_unlock_write(s);
    866 
    867 	pool_put(&pgrp_pool, pgrp);
    868 }
    869 
    870 /*
    871  * delete a process group
    872  */
    873 static void
    874 pg_delete(pid_t pg_id)
    875 {
    876 	struct pgrp *pgrp;
    877 	struct tty *ttyp;
    878 	struct session *ss;
    879 	int s, is_pgrp_leader;
    880 
    881 	s = proclist_lock_write();
    882 	pgrp = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
    883 	if (pgrp == NULL || pgrp->pg_id != pg_id ||
    884 	    !LIST_EMPTY(&pgrp->pg_members)) {
    885 		proclist_unlock_write(s);
    886 		return;
    887 	}
    888 
    889 	ss = pgrp->pg_session;
    890 
    891 	/* Remove reference (if any) from tty to this process group */
    892 	ttyp = ss->s_ttyp;
    893 	if (ttyp != NULL && ttyp->t_pgrp == pgrp) {
    894 		ttyp->t_pgrp = NULL;
    895 #ifdef DIAGNOSTIC
    896 		if (ttyp->t_session != ss)
    897 			panic("pg_delete: wrong session on terminal");
    898 #endif
    899 	}
    900 
    901 	/*
    902 	 * The leading process group in a session is freed
    903 	 * by sessdelete() if last reference.
    904 	 */
    905 	is_pgrp_leader = (ss->s_sid == pgrp->pg_id);
    906 	proclist_unlock_write(s);
    907 	SESSRELE(ss);
    908 
    909 	if (is_pgrp_leader)
    910 		return;
    911 
    912 	pg_free(pg_id);
    913 }
    914 
    915 /*
    916  * Delete session - called from SESSRELE when s_count becomes zero.
    917  */
    918 void
    919 sessdelete(struct session *ss)
    920 {
    921 	/*
    922 	 * We keep the pgrp with the same id as the session in
    923 	 * order to stop a process being given the same pid.
    924 	 * Since the pgrp holds a reference to the session, it
    925 	 * must be a 'zombie' pgrp by now.
    926 	 */
    927 
    928 	pg_free(ss->s_sid);
    929 
    930 	FREE(ss, M_SESSION);
    931 }
    932 
    933 /*
    934  * Adjust pgrp jobc counters when specified process changes process group.
    935  * We count the number of processes in each process group that "qualify"
    936  * the group for terminal job control (those with a parent in a different
    937  * process group of the same session).  If that count reaches zero, the
    938  * process group becomes orphaned.  Check both the specified process'
    939  * process group and that of its children.
    940  * entering == 0 => p is leaving specified group.
    941  * entering == 1 => p is entering specified group.
    942  *
    943  * Call with proclist_lock held.
    944  */
    945 void
    946 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
    947 {
    948 	struct pgrp *hispgrp;
    949 	struct session *mysession = pgrp->pg_session;
    950 	struct proc *child;
    951 
    952 	/*
    953 	 * Check p's parent to see whether p qualifies its own process
    954 	 * group; if so, adjust count for p's process group.
    955 	 */
    956 	hispgrp = p->p_pptr->p_pgrp;
    957 	if (hispgrp != pgrp && hispgrp->pg_session == mysession) {
    958 		if (entering)
    959 			pgrp->pg_jobc++;
    960 		else if (--pgrp->pg_jobc == 0)
    961 			orphanpg(pgrp);
    962 	}
    963 
    964 	/*
    965 	 * Check this process' children to see whether they qualify
    966 	 * their process groups; if so, adjust counts for children's
    967 	 * process groups.
    968 	 */
    969 	LIST_FOREACH(child, &p->p_children, p_sibling) {
    970 		hispgrp = child->p_pgrp;
    971 		if (hispgrp != pgrp && hispgrp->pg_session == mysession &&
    972 		    !P_ZOMBIE(child)) {
    973 			if (entering)
    974 				hispgrp->pg_jobc++;
    975 			else if (--hispgrp->pg_jobc == 0)
    976 				orphanpg(hispgrp);
    977 		}
    978 	}
    979 }
    980 
    981 /*
    982  * A process group has become orphaned;
    983  * if there are any stopped processes in the group,
    984  * hang-up all process in that group.
    985  *
    986  * Call with proclist_lock held.
    987  */
    988 static void
    989 orphanpg(struct pgrp *pg)
    990 {
    991 	struct proc *p;
    992 
    993 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
    994 		if (p->p_stat == SSTOP) {
    995 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
    996 				psignal(p, SIGHUP);
    997 				psignal(p, SIGCONT);
    998 			}
    999 			return;
   1000 		}
   1001 	}
   1002 }
   1003 
   1004 /* mark process as suid/sgid, reset some values to defaults */
   1005 void
   1006 p_sugid(struct proc *p)
   1007 {
   1008 	struct plimit *newlim;
   1009 
   1010 	p->p_flag |= P_SUGID;
   1011 	/* reset what needs to be reset in plimit */
   1012 	if (p->p_limit->pl_corename != defcorename) {
   1013 		if (p->p_limit->p_refcnt > 1 &&
   1014 		    (p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
   1015 			newlim = limcopy(p->p_limit);
   1016 			limfree(p->p_limit);
   1017 			p->p_limit = newlim;
   1018 		}
   1019 		free(p->p_limit->pl_corename, M_TEMP);
   1020 		p->p_limit->pl_corename = defcorename;
   1021 	}
   1022 }
   1023 
   1024 #ifdef DDB
   1025 #include <ddb/db_output.h>
   1026 void pidtbl_dump(void);
   1027 void
   1028 pidtbl_dump(void)
   1029 {
   1030 	struct pid_table *pt;
   1031 	struct proc *p;
   1032 	struct pgrp *pgrp;
   1033 	int id;
   1034 
   1035 	db_printf("pid table %p size %x, next %x, last %x\n",
   1036 		pid_table, pid_tbl_mask+1,
   1037 		next_free_pt, last_free_pt);
   1038 	for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
   1039 		p = pt->pt_proc;
   1040 		if (!P_VALID(p) && !pt->pt_pgrp)
   1041 			continue;
   1042 		db_printf("  id %x: ", id);
   1043 		if (P_VALID(p))
   1044 			db_printf("proc %p id %d (0x%x) %s\n",
   1045 				p, p->p_pid, p->p_pid, p->p_comm);
   1046 		else
   1047 			db_printf("next %x use %x\n",
   1048 				P_NEXT(p) & pid_tbl_mask,
   1049 				P_NEXT(p) & ~pid_tbl_mask);
   1050 		if ((pgrp = pt->pt_pgrp)) {
   1051 			db_printf("\tsession %p, sid %d, count %d, login %s\n",
   1052 			    pgrp->pg_session, pgrp->pg_session->s_sid,
   1053 			    pgrp->pg_session->s_count,
   1054 			    pgrp->pg_session->s_login);
   1055 			db_printf("\tpgrp %p, pg_id %d, pg_jobc %d, members %p\n",
   1056 			    pgrp, pgrp->pg_id, pgrp->pg_jobc,
   1057 			    pgrp->pg_members.lh_first);
   1058 			for (p = pgrp->pg_members.lh_first; p != 0;
   1059 			    p = p->p_pglist.le_next) {
   1060 				db_printf("\t\tpid %d addr %p pgrp %p %s\n",
   1061 				    p->p_pid, p, p->p_pgrp, p->p_comm);
   1062 			}
   1063 		}
   1064 	}
   1065 }
   1066 #endif /* DDB */
   1067 
   1068 #ifdef KSTACK_CHECK_MAGIC
   1069 #include <sys/user.h>
   1070 
   1071 #define	KSTACK_MAGIC	0xdeadbeaf
   1072 
   1073 /* XXX should be per process basis? */
   1074 int kstackleftmin = KSTACK_SIZE;
   1075 int kstackleftthres = KSTACK_SIZE / 8; /* warn if remaining stack is
   1076 					  less than this */
   1077 
   1078 void
   1079 kstack_setup_magic(const struct lwp *l)
   1080 {
   1081 	u_int32_t *ip;
   1082 	u_int32_t const *end;
   1083 
   1084 	KASSERT(l != NULL);
   1085 	KASSERT(l != &lwp0);
   1086 
   1087 	/*
   1088 	 * fill all the stack with magic number
   1089 	 * so that later modification on it can be detected.
   1090 	 */
   1091 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
   1092 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
   1093 	for (; ip < end; ip++) {
   1094 		*ip = KSTACK_MAGIC;
   1095 	}
   1096 }
   1097 
   1098 void
   1099 kstack_check_magic(const struct lwp *l)
   1100 {
   1101 	u_int32_t const *ip, *end;
   1102 	int stackleft;
   1103 
   1104 	KASSERT(l != NULL);
   1105 
   1106 	/* don't check proc0 */ /*XXX*/
   1107 	if (l == &lwp0)
   1108 		return;
   1109 
   1110 #ifdef __MACHINE_STACK_GROWS_UP
   1111 	/* stack grows upwards (eg. hppa) */
   1112 	ip = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
   1113 	end = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
   1114 	for (ip--; ip >= end; ip--)
   1115 		if (*ip != KSTACK_MAGIC)
   1116 			break;
   1117 
   1118 	stackleft = (caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE - (caddr_t)ip;
   1119 #else /* __MACHINE_STACK_GROWS_UP */
   1120 	/* stack grows downwards (eg. i386) */
   1121 	ip = (u_int32_t *)KSTACK_LOWEST_ADDR(l);
   1122 	end = (u_int32_t *)((caddr_t)KSTACK_LOWEST_ADDR(l) + KSTACK_SIZE);
   1123 	for (; ip < end; ip++)
   1124 		if (*ip != KSTACK_MAGIC)
   1125 			break;
   1126 
   1127 	stackleft = (caddr_t)ip - KSTACK_LOWEST_ADDR(l);
   1128 #endif /* __MACHINE_STACK_GROWS_UP */
   1129 
   1130 	if (kstackleftmin > stackleft) {
   1131 		kstackleftmin = stackleft;
   1132 		if (stackleft < kstackleftthres)
   1133 			printf("warning: kernel stack left %d bytes"
   1134 			    "(pid %u:lid %u)\n", stackleft,
   1135 			    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
   1136 	}
   1137 
   1138 	if (stackleft <= 0) {
   1139 		panic("magic on the top of kernel stack changed for "
   1140 		    "pid %u, lid %u: maybe kernel stack overflow",
   1141 		    (u_int)l->l_proc->p_pid, (u_int)l->l_lid);
   1142 	}
   1143 }
   1144 #endif /* KSTACK_CHECK_MAGIC */
   1145