Home | History | Annotate | Line # | Download | only in kern
kern_resource.c revision 1.98.2.11
      1 /*	$NetBSD: kern_resource.c,v 1.98.2.11 2008/03/17 09:15:33 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1982, 1986, 1991, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  * (c) UNIX System Laboratories, Inc.
      7  * All or some portions of this file are derived from material licensed
      8  * to the University of California by American Telephone and Telegraph
      9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     10  * the permission of UNIX System Laboratories, Inc.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  *
     36  *	@(#)kern_resource.c	8.8 (Berkeley) 2/14/95
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.98.2.11 2008/03/17 09:15:33 yamt Exp $");
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/kernel.h>
     45 #include <sys/file.h>
     46 #include <sys/resourcevar.h>
     47 #include <sys/malloc.h>
     48 #include <sys/kmem.h>
     49 #include <sys/namei.h>
     50 #include <sys/pool.h>
     51 #include <sys/proc.h>
     52 #include <sys/sysctl.h>
     53 #include <sys/timevar.h>
     54 #include <sys/kauth.h>
     55 #include <sys/atomic.h>
     56 #include <sys/mount.h>
     57 #include <sys/syscallargs.h>
     58 
     59 #include <uvm/uvm_extern.h>
     60 
     61 /*
     62  * Maximum process data and stack limits.
     63  * They are variables so they are patchable.
     64  */
     65 rlim_t maxdmap = MAXDSIZ;
     66 rlim_t maxsmap = MAXSSIZ;
     67 
     68 static kmutex_t		uihashtbl_lock;
     69 static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
     70 static u_long 		uihash;
     71 
     72 #define	UIHASH(uid)	(&uihashtbl[(uid) & uihash])
     73 
     74 static pool_cache_t	plimit_cache;
     75 static pool_cache_t	pstats_cache;
     76 
     77 void
     78 resource_init(void)
     79 {
     80 
     81 	plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0,
     82 	    "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL);
     83 	pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0,
     84 	    "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL);
     85 	uihashtbl = hashinit(maxproc / 16, HASH_LIST, M_PROC,
     86 	    M_WAITOK, &uihash);
     87 }
     88 
     89 /*
     90  * Resource controls and accounting.
     91  */
     92 
     93 int
     94 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap,
     95     register_t *retval)
     96 {
     97 	/* {
     98 		syscallarg(int) which;
     99 		syscallarg(id_t) who;
    100 	} */
    101 	struct proc *curp = l->l_proc, *p;
    102 	int low = NZERO + PRIO_MAX + 1;
    103 	int who = SCARG(uap, who);
    104 
    105 	mutex_enter(&proclist_lock);
    106 	switch (SCARG(uap, which)) {
    107 	case PRIO_PROCESS:
    108 		if (who == 0)
    109 			p = curp;
    110 		else
    111 			p = p_find(who, PFIND_LOCKED);
    112 		if (p != NULL)
    113 			low = p->p_nice;
    114 		break;
    115 
    116 	case PRIO_PGRP: {
    117 		struct pgrp *pg;
    118 
    119 		if (who == 0)
    120 			pg = curp->p_pgrp;
    121 		else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
    122 			break;
    123 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
    124 			if (p->p_nice < low)
    125 				low = p->p_nice;
    126 		}
    127 		break;
    128 	}
    129 
    130 	case PRIO_USER:
    131 		if (who == 0)
    132 			who = (int)kauth_cred_geteuid(l->l_cred);
    133 		PROCLIST_FOREACH(p, &allproc) {
    134 			mutex_enter(&p->p_mutex);
    135 			if (kauth_cred_geteuid(p->p_cred) ==
    136 			    (uid_t)who && p->p_nice < low)
    137 				low = p->p_nice;
    138 			mutex_exit(&p->p_mutex);
    139 		}
    140 		break;
    141 
    142 	default:
    143 		mutex_exit(&proclist_lock);
    144 		return (EINVAL);
    145 	}
    146 	mutex_exit(&proclist_lock);
    147 
    148 	if (low == NZERO + PRIO_MAX + 1)
    149 		return (ESRCH);
    150 	*retval = low - NZERO;
    151 	return (0);
    152 }
    153 
    154 /* ARGSUSED */
    155 int
    156 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap,
    157     register_t *retval)
    158 {
    159 	/* {
    160 		syscallarg(int) which;
    161 		syscallarg(id_t) who;
    162 		syscallarg(int) prio;
    163 	} */
    164 	struct proc *curp = l->l_proc, *p;
    165 	int found = 0, error = 0;
    166 	int who = SCARG(uap, who);
    167 
    168 	mutex_enter(&proclist_lock);
    169 	switch (SCARG(uap, which)) {
    170 	case PRIO_PROCESS:
    171 		if (who == 0)
    172 			p = curp;
    173 		else
    174 			p = p_find(who, PFIND_LOCKED);
    175 		if (p != 0) {
    176 			mutex_enter(&p->p_mutex);
    177 			error = donice(l, p, SCARG(uap, prio));
    178 			mutex_exit(&p->p_mutex);
    179 		}
    180 		found++;
    181 		break;
    182 
    183 	case PRIO_PGRP: {
    184 		struct pgrp *pg;
    185 
    186 		if (who == 0)
    187 			pg = curp->p_pgrp;
    188 		else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
    189 			break;
    190 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
    191 			mutex_enter(&p->p_mutex);
    192 			error = donice(l, p, SCARG(uap, prio));
    193 			mutex_exit(&p->p_mutex);
    194 			found++;
    195 		}
    196 		break;
    197 	}
    198 
    199 	case PRIO_USER:
    200 		if (who == 0)
    201 			who = (int)kauth_cred_geteuid(l->l_cred);
    202 		PROCLIST_FOREACH(p, &allproc) {
    203 			mutex_enter(&p->p_mutex);
    204 			if (kauth_cred_geteuid(p->p_cred) ==
    205 			    (uid_t)SCARG(uap, who)) {
    206 				error = donice(l, p, SCARG(uap, prio));
    207 				found++;
    208 			}
    209 			mutex_exit(&p->p_mutex);
    210 		}
    211 		break;
    212 
    213 	default:
    214 		error = EINVAL;
    215 		break;
    216 	}
    217 	mutex_exit(&proclist_lock);
    218 	if (found == 0)
    219 		return (ESRCH);
    220 	return (error);
    221 }
    222 
    223 /*
    224  * Renice a process.
    225  *
    226  * Call with the target process' credentials locked.
    227  */
    228 int
    229 donice(struct lwp *l, struct proc *chgp, int n)
    230 {
    231 	kauth_cred_t cred = l->l_cred;
    232 	int onice;
    233 
    234 	KASSERT(mutex_owned(&chgp->p_mutex));
    235 
    236 	if (n > PRIO_MAX)
    237 		n = PRIO_MAX;
    238 	if (n < PRIO_MIN)
    239 		n = PRIO_MIN;
    240 	n += NZERO;
    241 	onice = chgp->p_nice;
    242 	onice = chgp->p_nice;
    243 
    244   again:
    245 	if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
    246 	    KAUTH_ARG(n), NULL, NULL))
    247 		return (EACCES);
    248 	mutex_spin_enter(&chgp->p_smutex);
    249 	if (onice != chgp->p_nice) {
    250 		mutex_spin_exit(&chgp->p_smutex);
    251 		goto again;
    252 	}
    253 	sched_nice(chgp, n);
    254 	mutex_spin_exit(&chgp->p_smutex);
    255 	return (0);
    256 }
    257 
    258 /* ARGSUSED */
    259 int
    260 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap,
    261     register_t *retval)
    262 {
    263 	/* {
    264 		syscallarg(int) which;
    265 		syscallarg(const struct rlimit *) rlp;
    266 	} */
    267 	int which = SCARG(uap, which);
    268 	struct rlimit alim;
    269 	int error;
    270 
    271 	error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
    272 	if (error)
    273 		return (error);
    274 	return (dosetrlimit(l, l->l_proc, which, &alim));
    275 }
    276 
    277 int
    278 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
    279 {
    280 	struct rlimit *alimp;
    281 	int error;
    282 
    283 	if ((u_int)which >= RLIM_NLIMITS)
    284 		return (EINVAL);
    285 
    286 	if (limp->rlim_cur < 0 || limp->rlim_max < 0)
    287 		return (EINVAL);
    288 
    289 	if (limp->rlim_cur > limp->rlim_max) {
    290 		/*
    291 		 * This is programming error. According to SUSv2, we should
    292 		 * return error in this case.
    293 		 */
    294 		return (EINVAL);
    295 	}
    296 
    297 	alimp = &p->p_rlimit[which];
    298 	/* if we don't change the value, no need to limcopy() */
    299 	if (limp->rlim_cur == alimp->rlim_cur &&
    300 	    limp->rlim_max == alimp->rlim_max)
    301 		return 0;
    302 
    303 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
    304 	    p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
    305 	if (error)
    306 		return (error);
    307 
    308 	lim_privatise(p, false);
    309 	/* p->p_limit is now unchangeable */
    310 	alimp = &p->p_rlimit[which];
    311 
    312 	switch (which) {
    313 
    314 	case RLIMIT_DATA:
    315 		if (limp->rlim_cur > maxdmap)
    316 			limp->rlim_cur = maxdmap;
    317 		if (limp->rlim_max > maxdmap)
    318 			limp->rlim_max = maxdmap;
    319 		break;
    320 
    321 	case RLIMIT_STACK:
    322 		if (limp->rlim_cur > maxsmap)
    323 			limp->rlim_cur = maxsmap;
    324 		if (limp->rlim_max > maxsmap)
    325 			limp->rlim_max = maxsmap;
    326 
    327 		/*
    328 		 * Return EINVAL if the new stack size limit is lower than
    329 		 * current usage. Otherwise, the process would get SIGSEGV the
    330 		 * moment it would try to access anything on it's current stack.
    331 		 * This conforms to SUSv2.
    332 		 */
    333 		if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE
    334 		    || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) {
    335 			return (EINVAL);
    336 		}
    337 
    338 		/*
    339 		 * Stack is allocated to the max at exec time with
    340 		 * only "rlim_cur" bytes accessible (In other words,
    341 		 * allocates stack dividing two contiguous regions at
    342 		 * "rlim_cur" bytes boundary).
    343 		 *
    344 		 * Since allocation is done in terms of page, roundup
    345 		 * "rlim_cur" (otherwise, contiguous regions
    346 		 * overlap).  If stack limit is going up make more
    347 		 * accessible, if going down make inaccessible.
    348 		 */
    349 		limp->rlim_cur = round_page(limp->rlim_cur);
    350 		if (limp->rlim_cur != alimp->rlim_cur) {
    351 			vaddr_t addr;
    352 			vsize_t size;
    353 			vm_prot_t prot;
    354 
    355 			if (limp->rlim_cur > alimp->rlim_cur) {
    356 				prot = VM_PROT_READ | VM_PROT_WRITE;
    357 				size = limp->rlim_cur - alimp->rlim_cur;
    358 				addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
    359 				    limp->rlim_cur;
    360 			} else {
    361 				prot = VM_PROT_NONE;
    362 				size = alimp->rlim_cur - limp->rlim_cur;
    363 				addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
    364 				     alimp->rlim_cur;
    365 			}
    366 			(void) uvm_map_protect(&p->p_vmspace->vm_map,
    367 			    addr, addr+size, prot, false);
    368 		}
    369 		break;
    370 
    371 	case RLIMIT_NOFILE:
    372 		if (limp->rlim_cur > maxfiles)
    373 			limp->rlim_cur = maxfiles;
    374 		if (limp->rlim_max > maxfiles)
    375 			limp->rlim_max = maxfiles;
    376 		break;
    377 
    378 	case RLIMIT_NPROC:
    379 		if (limp->rlim_cur > maxproc)
    380 			limp->rlim_cur = maxproc;
    381 		if (limp->rlim_max > maxproc)
    382 			limp->rlim_max = maxproc;
    383 		break;
    384 	}
    385 
    386 	mutex_enter(&p->p_limit->pl_lock);
    387 	*alimp = *limp;
    388 	mutex_exit(&p->p_limit->pl_lock);
    389 	return (0);
    390 }
    391 
    392 /* ARGSUSED */
    393 int
    394 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap,
    395     register_t *retval)
    396 {
    397 	/* {
    398 		syscallarg(int) which;
    399 		syscallarg(struct rlimit *) rlp;
    400 	} */
    401 	struct proc *p = l->l_proc;
    402 	int which = SCARG(uap, which);
    403 	struct rlimit rl;
    404 
    405 	if ((u_int)which >= RLIM_NLIMITS)
    406 		return (EINVAL);
    407 
    408 	mutex_enter(&p->p_mutex);
    409 	memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
    410 	mutex_exit(&p->p_mutex);
    411 
    412 	return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
    413 }
    414 
    415 /*
    416  * Transform the running time and tick information in proc p into user,
    417  * system, and interrupt time usage.
    418  *
    419  * Should be called with p->p_smutex held unless called from exit1().
    420  */
    421 void
    422 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
    423     struct timeval *ip, struct timeval *rp)
    424 {
    425 	uint64_t u, st, ut, it, tot;
    426 	struct lwp *l;
    427 	struct bintime tm;
    428 	struct timeval tv;
    429 
    430 	mutex_spin_enter(&p->p_stmutex);
    431 	st = p->p_sticks;
    432 	ut = p->p_uticks;
    433 	it = p->p_iticks;
    434 	mutex_spin_exit(&p->p_stmutex);
    435 
    436 	tm = p->p_rtime;
    437 
    438 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    439 		lwp_lock(l);
    440 		bintime_add(&tm, &l->l_rtime);
    441 		if ((l->l_flag & LW_RUNNING) != 0) {
    442 			struct bintime diff;
    443 			/*
    444 			 * Adjust for the current time slice.  This is
    445 			 * actually fairly important since the error
    446 			 * here is on the order of a time quantum,
    447 			 * which is much greater than the sampling
    448 			 * error.
    449 			 */
    450 			binuptime(&diff);
    451 			bintime_sub(&diff, &l->l_stime);
    452 			bintime_add(&tm, &diff);
    453 		}
    454 		lwp_unlock(l);
    455 	}
    456 
    457 	tot = st + ut + it;
    458 	bintime2timeval(&tm, &tv);
    459 	u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
    460 
    461 	if (tot == 0) {
    462 		/* No ticks, so can't use to share time out, split 50-50 */
    463 		st = ut = u / 2;
    464 	} else {
    465 		st = (u * st) / tot;
    466 		ut = (u * ut) / tot;
    467 	}
    468 	if (sp != NULL) {
    469 		sp->tv_sec = st / 1000000;
    470 		sp->tv_usec = st % 1000000;
    471 	}
    472 	if (up != NULL) {
    473 		up->tv_sec = ut / 1000000;
    474 		up->tv_usec = ut % 1000000;
    475 	}
    476 	if (ip != NULL) {
    477 		if (it != 0)
    478 			it = (u * it) / tot;
    479 		ip->tv_sec = it / 1000000;
    480 		ip->tv_usec = it % 1000000;
    481 	}
    482 	if (rp != NULL) {
    483 		*rp = tv;
    484 	}
    485 }
    486 
    487 /* ARGSUSED */
    488 int
    489 sys_getrusage(struct lwp *l, const struct sys_getrusage_args *uap,
    490     register_t *retval)
    491 {
    492 	/* {
    493 		syscallarg(int) who;
    494 		syscallarg(struct rusage *) rusage;
    495 	} */
    496 	struct rusage ru;
    497 	struct proc *p = l->l_proc;
    498 
    499 	switch (SCARG(uap, who)) {
    500 	case RUSAGE_SELF:
    501 		mutex_enter(&p->p_smutex);
    502 		memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
    503 		calcru(p, &ru.ru_utime, &ru.ru_stime, NULL, NULL);
    504 		mutex_exit(&p->p_smutex);
    505 		break;
    506 
    507 	case RUSAGE_CHILDREN:
    508 		mutex_enter(&p->p_smutex);
    509 		memcpy(&ru, &p->p_stats->p_cru, sizeof(ru));
    510 		mutex_exit(&p->p_smutex);
    511 		break;
    512 
    513 	default:
    514 		return EINVAL;
    515 	}
    516 
    517 	return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
    518 }
    519 
    520 void
    521 ruadd(struct rusage *ru, struct rusage *ru2)
    522 {
    523 	long *ip, *ip2;
    524 	int i;
    525 
    526 	timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
    527 	timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
    528 	if (ru->ru_maxrss < ru2->ru_maxrss)
    529 		ru->ru_maxrss = ru2->ru_maxrss;
    530 	ip = &ru->ru_first; ip2 = &ru2->ru_first;
    531 	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
    532 		*ip++ += *ip2++;
    533 }
    534 
    535 /*
    536  * Make a copy of the plimit structure.
    537  * We share these structures copy-on-write after fork,
    538  * and copy when a limit is changed.
    539  *
    540  * Unfortunately (due to PL_SHAREMOD) it is possibly for the structure
    541  * we are copying to change beneath our feet!
    542  */
    543 struct plimit *
    544 lim_copy(struct plimit *lim)
    545 {
    546 	struct plimit *newlim;
    547 	char *corename;
    548 	size_t alen, len;
    549 
    550 	newlim = pool_cache_get(plimit_cache, PR_WAITOK);
    551 	mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
    552 	newlim->pl_flags = 0;
    553 	newlim->pl_refcnt = 1;
    554 	newlim->pl_sv_limit = NULL;
    555 
    556 	mutex_enter(&lim->pl_lock);
    557 	memcpy(newlim->pl_rlimit, lim->pl_rlimit,
    558 	    sizeof(struct rlimit) * RLIM_NLIMITS);
    559 
    560 	alen = 0;
    561 	corename = NULL;
    562 	for (;;) {
    563 		if (lim->pl_corename == defcorename) {
    564 			newlim->pl_corename = defcorename;
    565 			break;
    566 		}
    567 		len = strlen(lim->pl_corename) + 1;
    568 		if (len <= alen) {
    569 			newlim->pl_corename = corename;
    570 			memcpy(corename, lim->pl_corename, len);
    571 			corename = NULL;
    572 			break;
    573 		}
    574 		mutex_exit(&lim->pl_lock);
    575 		if (corename != NULL)
    576 			free(corename, M_TEMP);
    577 		alen = len;
    578 		corename = malloc(alen, M_TEMP, M_WAITOK);
    579 		mutex_enter(&lim->pl_lock);
    580 	}
    581 	mutex_exit(&lim->pl_lock);
    582 	if (corename != NULL)
    583 		free(corename, M_TEMP);
    584 	return newlim;
    585 }
    586 
    587 void
    588 lim_addref(struct plimit *lim)
    589 {
    590 	atomic_inc_uint(&lim->pl_refcnt);
    591 }
    592 
    593 /*
    594  * Give a process it's own private plimit structure.
    595  * This will only be shared (in fork) if modifications are to be shared.
    596  */
    597 void
    598 lim_privatise(struct proc *p, bool set_shared)
    599 {
    600 	struct plimit *lim, *newlim;
    601 
    602 	lim = p->p_limit;
    603 	if (lim->pl_flags & PL_WRITEABLE) {
    604 		if (set_shared)
    605 			lim->pl_flags |= PL_SHAREMOD;
    606 		return;
    607 	}
    608 
    609 	if (set_shared && lim->pl_flags & PL_SHAREMOD)
    610 		return;
    611 
    612 	newlim = lim_copy(lim);
    613 
    614 	mutex_enter(&p->p_mutex);
    615 	if (p->p_limit->pl_flags & PL_WRITEABLE) {
    616 		/* Someone crept in while we were busy */
    617 		mutex_exit(&p->p_mutex);
    618 		limfree(newlim);
    619 		if (set_shared)
    620 			p->p_limit->pl_flags |= PL_SHAREMOD;
    621 		return;
    622 	}
    623 
    624 	/*
    625 	 * Since most accesses to p->p_limit aren't locked, we must not
    626 	 * delete the old limit structure yet.
    627 	 */
    628 	newlim->pl_sv_limit = p->p_limit;
    629 	newlim->pl_flags |= PL_WRITEABLE;
    630 	if (set_shared)
    631 		newlim->pl_flags |= PL_SHAREMOD;
    632 	p->p_limit = newlim;
    633 	mutex_exit(&p->p_mutex);
    634 }
    635 
    636 void
    637 limfree(struct plimit *lim)
    638 {
    639 	struct plimit *sv_lim;
    640 
    641 	do {
    642 		if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0)
    643 			return;
    644 		if (lim->pl_corename != defcorename)
    645 			free(lim->pl_corename, M_TEMP);
    646 		sv_lim = lim->pl_sv_limit;
    647 		mutex_destroy(&lim->pl_lock);
    648 		pool_cache_put(plimit_cache, lim);
    649 	} while ((lim = sv_lim) != NULL);
    650 }
    651 
    652 struct pstats *
    653 pstatscopy(struct pstats *ps)
    654 {
    655 
    656 	struct pstats *newps;
    657 
    658 	newps = pool_cache_get(pstats_cache, PR_WAITOK);
    659 
    660 	memset(&newps->pstat_startzero, 0,
    661 	(unsigned) ((char *)&newps->pstat_endzero -
    662 		    (char *)&newps->pstat_startzero));
    663 	memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy,
    664 	((char *)&newps->pstat_endcopy -
    665 	 (char *)&newps->pstat_startcopy));
    666 
    667 	return (newps);
    668 
    669 }
    670 
    671 void
    672 pstatsfree(struct pstats *ps)
    673 {
    674 
    675 	pool_cache_put(pstats_cache, ps);
    676 }
    677 
    678 /*
    679  * sysctl interface in five parts
    680  */
    681 
    682 /*
    683  * a routine for sysctl proc subtree helpers that need to pick a valid
    684  * process by pid.
    685  */
    686 static int
    687 sysctl_proc_findproc(struct lwp *l, struct proc **p2, pid_t pid)
    688 {
    689 	struct proc *ptmp;
    690 	int error = 0;
    691 
    692 	if (pid == PROC_CURPROC)
    693 		ptmp = l->l_proc;
    694 	else if ((ptmp = pfind(pid)) == NULL)
    695 		error = ESRCH;
    696 
    697 	*p2 = ptmp;
    698 	return (error);
    699 }
    700 
    701 /*
    702  * sysctl helper routine for setting a process's specific corefile
    703  * name.  picks the process based on the given pid and checks the
    704  * correctness of the new value.
    705  */
    706 static int
    707 sysctl_proc_corename(SYSCTLFN_ARGS)
    708 {
    709 	struct proc *ptmp;
    710 	struct plimit *lim;
    711 	int error = 0, len;
    712 	char *cname;
    713 	char *ocore;
    714 	char *tmp;
    715 	struct sysctlnode node;
    716 
    717 	/*
    718 	 * is this all correct?
    719 	 */
    720 	if (namelen != 0)
    721 		return (EINVAL);
    722 	if (name[-1] != PROC_PID_CORENAME)
    723 		return (EINVAL);
    724 
    725 	/*
    726 	 * whom are we tweaking?
    727 	 */
    728 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
    729 	if (error)
    730 		return (error);
    731 
    732 	/* XXX-elad */
    733 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
    734 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
    735 	if (error)
    736 		return (error);
    737 
    738 	if (newp == NULL) {
    739 		error = kauth_authorize_process(l->l_cred,
    740 		    KAUTH_PROCESS_CORENAME, ptmp,
    741 		    KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
    742 		if (error)
    743 			return (error);
    744 	}
    745 
    746 	/*
    747 	 * let them modify a temporary copy of the core name
    748 	 */
    749 	cname = PNBUF_GET();
    750 	lim = ptmp->p_limit;
    751 	mutex_enter(&lim->pl_lock);
    752 	strlcpy(cname, lim->pl_corename, MAXPATHLEN);
    753 	mutex_exit(&lim->pl_lock);
    754 
    755 	node = *rnode;
    756 	node.sysctl_data = cname;
    757 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    758 
    759 	/*
    760 	 * if that failed, or they have nothing new to say, or we've
    761 	 * heard it before...
    762 	 */
    763 	if (error || newp == NULL)
    764 		goto done;
    765 	lim = ptmp->p_limit;
    766 	mutex_enter(&lim->pl_lock);
    767 	error = strcmp(cname, lim->pl_corename);
    768 	mutex_exit(&lim->pl_lock);
    769 	if (error == 0)
    770 		/* Unchanged */
    771 		goto done;
    772 
    773 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
    774 	    ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cname, NULL);
    775 	if (error)
    776 		return (error);
    777 
    778 	/*
    779 	 * no error yet and cname now has the new core name in it.
    780 	 * let's see if it looks acceptable.  it must be either "core"
    781 	 * or end in ".core" or "/core".
    782 	 */
    783 	len = strlen(cname);
    784 	if (len < 4) {
    785 		error = EINVAL;
    786 	} else if (strcmp(cname + len - 4, "core") != 0) {
    787 		error = EINVAL;
    788 	} else if (len > 4 && cname[len - 5] != '/' && cname[len - 5] != '.') {
    789 		error = EINVAL;
    790 	}
    791 	if (error != 0) {
    792 		goto done;
    793 	}
    794 
    795 	/*
    796 	 * hmm...looks good.  now...where do we put it?
    797 	 */
    798 	tmp = malloc(len + 1, M_TEMP, M_WAITOK|M_CANFAIL);
    799 	if (tmp == NULL) {
    800 		error = ENOMEM;
    801 		goto done;
    802 	}
    803 	memcpy(tmp, cname, len + 1);
    804 
    805 	lim_privatise(ptmp, false);
    806 	lim = ptmp->p_limit;
    807 	mutex_enter(&lim->pl_lock);
    808 	ocore = lim->pl_corename;
    809 	lim->pl_corename = tmp;
    810 	mutex_exit(&lim->pl_lock);
    811 	if (ocore != defcorename)
    812 		free(ocore, M_TEMP);
    813 
    814 done:
    815 	PNBUF_PUT(cname);
    816 	return error;
    817 }
    818 
    819 /*
    820  * sysctl helper routine for checking/setting a process's stop flags,
    821  * one for fork and one for exec.
    822  */
    823 static int
    824 sysctl_proc_stop(SYSCTLFN_ARGS)
    825 {
    826 	struct proc *ptmp;
    827 	int i, f, error = 0;
    828 	struct sysctlnode node;
    829 
    830 	if (namelen != 0)
    831 		return (EINVAL);
    832 
    833 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
    834 	if (error)
    835 		return (error);
    836 
    837 	/* XXX-elad */
    838 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
    839 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
    840 	if (error)
    841 		return (error);
    842 
    843 	switch (rnode->sysctl_num) {
    844 	case PROC_PID_STOPFORK:
    845 		f = PS_STOPFORK;
    846 		break;
    847 	case PROC_PID_STOPEXEC:
    848 		f = PS_STOPEXEC;
    849 		break;
    850 	case PROC_PID_STOPEXIT:
    851 		f = PS_STOPEXIT;
    852 		break;
    853 	default:
    854 		return (EINVAL);
    855 	}
    856 
    857 	i = (ptmp->p_flag & f) ? 1 : 0;
    858 	node = *rnode;
    859 	node.sysctl_data = &i;
    860 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    861 	if (error || newp == NULL)
    862 		return (error);
    863 
    864 	mutex_enter(&ptmp->p_smutex);
    865 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
    866 	    ptmp, KAUTH_ARG(f), NULL, NULL);
    867 	if (error)
    868 		return (error);
    869 	if (i)
    870 		ptmp->p_sflag |= f;
    871 	else
    872 		ptmp->p_sflag &= ~f;
    873 	mutex_exit(&ptmp->p_smutex);
    874 
    875 	return (0);
    876 }
    877 
    878 /*
    879  * sysctl helper routine for a process's rlimits as exposed by sysctl.
    880  */
    881 static int
    882 sysctl_proc_plimit(SYSCTLFN_ARGS)
    883 {
    884 	struct proc *ptmp;
    885 	u_int limitno;
    886 	int which, error = 0;
    887         struct rlimit alim;
    888 	struct sysctlnode node;
    889 
    890 	if (namelen != 0)
    891 		return (EINVAL);
    892 
    893 	which = name[-1];
    894 	if (which != PROC_PID_LIMIT_TYPE_SOFT &&
    895 	    which != PROC_PID_LIMIT_TYPE_HARD)
    896 		return (EINVAL);
    897 
    898 	limitno = name[-2] - 1;
    899 	if (limitno >= RLIM_NLIMITS)
    900 		return (EINVAL);
    901 
    902 	if (name[-3] != PROC_PID_LIMIT)
    903 		return (EINVAL);
    904 
    905 	error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-4]);
    906 	if (error)
    907 		return (error);
    908 
    909 	/* XXX-elad */
    910 	error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
    911 	    KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
    912 	if (error)
    913 		return (error);
    914 
    915 	/* Check if we can view limits. */
    916 	if (newp == NULL) {
    917 		error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
    918 		    ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
    919 		    KAUTH_ARG(which));
    920 		if (error)
    921 			return (error);
    922 	}
    923 
    924 	node = *rnode;
    925 	memcpy(&alim, &ptmp->p_rlimit[limitno], sizeof(alim));
    926 	if (which == PROC_PID_LIMIT_TYPE_HARD)
    927 		node.sysctl_data = &alim.rlim_max;
    928 	else
    929 		node.sysctl_data = &alim.rlim_cur;
    930 
    931 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    932 	if (error || newp == NULL)
    933 		return (error);
    934 
    935 	return (dosetrlimit(l, ptmp, limitno, &alim));
    936 }
    937 
    938 /*
    939  * and finally, the actually glue that sticks it to the tree
    940  */
    941 SYSCTL_SETUP(sysctl_proc_setup, "sysctl proc subtree setup")
    942 {
    943 
    944 	sysctl_createv(clog, 0, NULL, NULL,
    945 		       CTLFLAG_PERMANENT,
    946 		       CTLTYPE_NODE, "proc", NULL,
    947 		       NULL, 0, NULL, 0,
    948 		       CTL_PROC, CTL_EOL);
    949 	sysctl_createv(clog, 0, NULL, NULL,
    950 		       CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
    951 		       CTLTYPE_NODE, "curproc",
    952 		       SYSCTL_DESCR("Per-process settings"),
    953 		       NULL, 0, NULL, 0,
    954 		       CTL_PROC, PROC_CURPROC, CTL_EOL);
    955 
    956 	sysctl_createv(clog, 0, NULL, NULL,
    957 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
    958 		       CTLTYPE_STRING, "corename",
    959 		       SYSCTL_DESCR("Core file name"),
    960 		       sysctl_proc_corename, 0, NULL, MAXPATHLEN,
    961 		       CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
    962 	sysctl_createv(clog, 0, NULL, NULL,
    963 		       CTLFLAG_PERMANENT,
    964 		       CTLTYPE_NODE, "rlimit",
    965 		       SYSCTL_DESCR("Process limits"),
    966 		       NULL, 0, NULL, 0,
    967 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);
    968 
    969 #define create_proc_plimit(s, n) do {					\
    970 	sysctl_createv(clog, 0, NULL, NULL,				\
    971 		       CTLFLAG_PERMANENT,				\
    972 		       CTLTYPE_NODE, s,					\
    973 		       SYSCTL_DESCR("Process " s " limits"),		\
    974 		       NULL, 0, NULL, 0,				\
    975 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
    976 		       CTL_EOL);					\
    977 	sysctl_createv(clog, 0, NULL, NULL,				\
    978 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
    979 		       CTLTYPE_QUAD, "soft",				\
    980 		       SYSCTL_DESCR("Process soft " s " limit"),	\
    981 		       sysctl_proc_plimit, 0, NULL, 0,			\
    982 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
    983 		       PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL);		\
    984 	sysctl_createv(clog, 0, NULL, NULL,				\
    985 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
    986 		       CTLTYPE_QUAD, "hard",				\
    987 		       SYSCTL_DESCR("Process hard " s " limit"),	\
    988 		       sysctl_proc_plimit, 0, NULL, 0,			\
    989 		       CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n,	\
    990 		       PROC_PID_LIMIT_TYPE_HARD, CTL_EOL);		\
    991 	} while (0/*CONSTCOND*/)
    992 
    993 	create_proc_plimit("cputime",		PROC_PID_LIMIT_CPU);
    994 	create_proc_plimit("filesize",		PROC_PID_LIMIT_FSIZE);
    995 	create_proc_plimit("datasize",		PROC_PID_LIMIT_DATA);
    996 	create_proc_plimit("stacksize",		PROC_PID_LIMIT_STACK);
    997 	create_proc_plimit("coredumpsize",	PROC_PID_LIMIT_CORE);
    998 	create_proc_plimit("memoryuse",		PROC_PID_LIMIT_RSS);
    999 	create_proc_plimit("memorylocked",	PROC_PID_LIMIT_MEMLOCK);
   1000 	create_proc_plimit("maxproc",		PROC_PID_LIMIT_NPROC);
   1001 	create_proc_plimit("descriptors",	PROC_PID_LIMIT_NOFILE);
   1002 	create_proc_plimit("sbsize",		PROC_PID_LIMIT_SBSIZE);
   1003 
   1004 #undef create_proc_plimit
   1005 
   1006 	sysctl_createv(clog, 0, NULL, NULL,
   1007 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
   1008 		       CTLTYPE_INT, "stopfork",
   1009 		       SYSCTL_DESCR("Stop process at fork(2)"),
   1010 		       sysctl_proc_stop, 0, NULL, 0,
   1011 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
   1012 	sysctl_createv(clog, 0, NULL, NULL,
   1013 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
   1014 		       CTLTYPE_INT, "stopexec",
   1015 		       SYSCTL_DESCR("Stop process at execve(2)"),
   1016 		       sysctl_proc_stop, 0, NULL, 0,
   1017 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
   1018 	sysctl_createv(clog, 0, NULL, NULL,
   1019 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
   1020 		       CTLTYPE_INT, "stopexit",
   1021 		       SYSCTL_DESCR("Stop process before completing exit"),
   1022 		       sysctl_proc_stop, 0, NULL, 0,
   1023 		       CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
   1024 }
   1025 
   1026 void
   1027 uid_init(void)
   1028 {
   1029 
   1030 	/*
   1031 	 * XXXSMP This could be at IPL_SOFTNET, but for now we want
   1032 	 * to to be deadlock free, so it must be at IPL_VM.
   1033 	 */
   1034 	mutex_init(&uihashtbl_lock, MUTEX_DEFAULT, IPL_VM);
   1035 
   1036 	/*
   1037 	 * Ensure that uid 0 is always in the user hash table, as
   1038 	 * sbreserve() expects it available from interrupt context.
   1039 	 */
   1040 	(void)uid_find(0);
   1041 }
   1042 
   1043 struct uidinfo *
   1044 uid_find(uid_t uid)
   1045 {
   1046 	struct uidinfo *uip;
   1047 	struct uidinfo *newuip = NULL;
   1048 	struct uihashhead *uipp;
   1049 
   1050 	uipp = UIHASH(uid);
   1051 
   1052 again:
   1053 	mutex_enter(&uihashtbl_lock);
   1054 	LIST_FOREACH(uip, uipp, ui_hash)
   1055 		if (uip->ui_uid == uid) {
   1056 			mutex_exit(&uihashtbl_lock);
   1057 			if (newuip) {
   1058 				mutex_destroy(&newuip->ui_lock);
   1059 				kmem_free(newuip, sizeof(*newuip));
   1060 			}
   1061 			return uip;
   1062 		}
   1063 	if (newuip == NULL) {
   1064 		mutex_exit(&uihashtbl_lock);
   1065 		/* Must not be called from interrupt context. */
   1066 		newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP);
   1067 		/* XXX this could be IPL_SOFTNET */
   1068 		mutex_init(&newuip->ui_lock, MUTEX_DEFAULT, IPL_VM);
   1069 		goto again;
   1070 	}
   1071 	uip = newuip;
   1072 
   1073 	LIST_INSERT_HEAD(uipp, uip, ui_hash);
   1074 	uip->ui_uid = uid;
   1075 	mutex_exit(&uihashtbl_lock);
   1076 
   1077 	return uip;
   1078 }
   1079 
   1080 /*
   1081  * Change the count associated with number of processes
   1082  * a given user is using.
   1083  */
   1084 int
   1085 chgproccnt(uid_t uid, int diff)
   1086 {
   1087 	struct uidinfo *uip;
   1088 
   1089 	uip = uid_find(uid);
   1090 	mutex_enter(&uip->ui_lock);
   1091 	uip->ui_proccnt += diff;
   1092 	KASSERT(uip->ui_proccnt >= 0);
   1093 	mutex_exit(&uip->ui_lock);
   1094 	return uip->ui_proccnt;
   1095 }
   1096 
   1097 int
   1098 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax)
   1099 {
   1100 	rlim_t nsb;
   1101 
   1102 	mutex_enter(&uip->ui_lock);
   1103 	nsb = uip->ui_sbsize + to - *hiwat;
   1104 	if (to > *hiwat && nsb > xmax) {
   1105 		mutex_exit(&uip->ui_lock);
   1106 		return 0;
   1107 	}
   1108 	*hiwat = to;
   1109 	uip->ui_sbsize = nsb;
   1110 	KASSERT(uip->ui_sbsize >= 0);
   1111 	mutex_exit(&uip->ui_lock);
   1112 	return 1;
   1113 }
   1114