kern_resource.c revision 1.139 1 /* $NetBSD: kern_resource.c,v 1.139 2008/04/24 18:39:24 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)kern_resource.c 8.8 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: kern_resource.c,v 1.139 2008/04/24 18:39:24 ad Exp $");
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/file.h>
46 #include <sys/resourcevar.h>
47 #include <sys/malloc.h>
48 #include <sys/kmem.h>
49 #include <sys/namei.h>
50 #include <sys/pool.h>
51 #include <sys/proc.h>
52 #include <sys/sysctl.h>
53 #include <sys/timevar.h>
54 #include <sys/kauth.h>
55 #include <sys/atomic.h>
56 #include <sys/mount.h>
57 #include <sys/syscallargs.h>
58 #include <sys/atomic.h>
59
60 #include <uvm/uvm_extern.h>
61
62 /*
63 * Maximum process data and stack limits.
64 * They are variables so they are patchable.
65 */
66 rlim_t maxdmap = MAXDSIZ;
67 rlim_t maxsmap = MAXSSIZ;
68
69 static SLIST_HEAD(uihashhead, uidinfo) *uihashtbl;
70 static u_long uihash;
71
72 #define UIHASH(uid) (&uihashtbl[(uid) & uihash])
73
74 static pool_cache_t plimit_cache;
75 static pool_cache_t pstats_cache;
76
77 void
78 resource_init(void)
79 {
80 /*
81 * In case of MP system, SLIST_FOREACH would force a cache line
82 * write-back for every modified 'uidinfo', thus we try to keep the
83 * lists short.
84 */
85 const u_int uihash_sz = (maxproc > 1 ? 1024 : 64);
86
87 plimit_cache = pool_cache_init(sizeof(struct plimit), 0, 0, 0,
88 "plimitpl", NULL, IPL_NONE, NULL, NULL, NULL);
89 pstats_cache = pool_cache_init(sizeof(struct pstats), 0, 0, 0,
90 "pstatspl", NULL, IPL_NONE, NULL, NULL, NULL);
91 uihashtbl = hashinit(uihash_sz, HASH_SLIST, M_PROC, M_WAITOK, &uihash);
92 }
93
94 /*
95 * Resource controls and accounting.
96 */
97
98 int
99 sys_getpriority(struct lwp *l, const struct sys_getpriority_args *uap,
100 register_t *retval)
101 {
102 /* {
103 syscallarg(int) which;
104 syscallarg(id_t) who;
105 } */
106 struct proc *curp = l->l_proc, *p;
107 int low = NZERO + PRIO_MAX + 1;
108 int who = SCARG(uap, who);
109
110 mutex_enter(proc_lock);
111 switch (SCARG(uap, which)) {
112 case PRIO_PROCESS:
113 if (who == 0)
114 p = curp;
115 else
116 p = p_find(who, PFIND_LOCKED);
117 if (p != NULL)
118 low = p->p_nice;
119 break;
120
121 case PRIO_PGRP: {
122 struct pgrp *pg;
123
124 if (who == 0)
125 pg = curp->p_pgrp;
126 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
127 break;
128 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
129 if (p->p_nice < low)
130 low = p->p_nice;
131 }
132 break;
133 }
134
135 case PRIO_USER:
136 if (who == 0)
137 who = (int)kauth_cred_geteuid(l->l_cred);
138 PROCLIST_FOREACH(p, &allproc) {
139 mutex_enter(p->p_lock);
140 if (kauth_cred_geteuid(p->p_cred) ==
141 (uid_t)who && p->p_nice < low)
142 low = p->p_nice;
143 mutex_exit(p->p_lock);
144 }
145 break;
146
147 default:
148 mutex_exit(proc_lock);
149 return (EINVAL);
150 }
151 mutex_exit(proc_lock);
152
153 if (low == NZERO + PRIO_MAX + 1)
154 return (ESRCH);
155 *retval = low - NZERO;
156 return (0);
157 }
158
159 /* ARGSUSED */
160 int
161 sys_setpriority(struct lwp *l, const struct sys_setpriority_args *uap,
162 register_t *retval)
163 {
164 /* {
165 syscallarg(int) which;
166 syscallarg(id_t) who;
167 syscallarg(int) prio;
168 } */
169 struct proc *curp = l->l_proc, *p;
170 int found = 0, error = 0;
171 int who = SCARG(uap, who);
172
173 mutex_enter(proc_lock);
174 switch (SCARG(uap, which)) {
175 case PRIO_PROCESS:
176 if (who == 0)
177 p = curp;
178 else
179 p = p_find(who, PFIND_LOCKED);
180 if (p != 0) {
181 mutex_enter(p->p_lock);
182 error = donice(l, p, SCARG(uap, prio));
183 mutex_exit(p->p_lock);
184 }
185 found++;
186 break;
187
188 case PRIO_PGRP: {
189 struct pgrp *pg;
190
191 if (who == 0)
192 pg = curp->p_pgrp;
193 else if ((pg = pg_find(who, PFIND_LOCKED)) == NULL)
194 break;
195 LIST_FOREACH(p, &pg->pg_members, p_pglist) {
196 mutex_enter(p->p_lock);
197 error = donice(l, p, SCARG(uap, prio));
198 mutex_exit(p->p_lock);
199 found++;
200 }
201 break;
202 }
203
204 case PRIO_USER:
205 if (who == 0)
206 who = (int)kauth_cred_geteuid(l->l_cred);
207 PROCLIST_FOREACH(p, &allproc) {
208 mutex_enter(p->p_lock);
209 if (kauth_cred_geteuid(p->p_cred) ==
210 (uid_t)SCARG(uap, who)) {
211 error = donice(l, p, SCARG(uap, prio));
212 found++;
213 }
214 mutex_exit(p->p_lock);
215 }
216 break;
217
218 default:
219 error = EINVAL;
220 break;
221 }
222 mutex_exit(proc_lock);
223 if (found == 0)
224 return (ESRCH);
225 return (error);
226 }
227
228 /*
229 * Renice a process.
230 *
231 * Call with the target process' credentials locked.
232 */
233 int
234 donice(struct lwp *l, struct proc *chgp, int n)
235 {
236 kauth_cred_t cred = l->l_cred;
237
238 KASSERT(mutex_owned(chgp->p_lock));
239
240 if (n > PRIO_MAX)
241 n = PRIO_MAX;
242 if (n < PRIO_MIN)
243 n = PRIO_MIN;
244 n += NZERO;
245 if (kauth_authorize_process(cred, KAUTH_PROCESS_NICE, chgp,
246 KAUTH_ARG(n), NULL, NULL))
247 return (EACCES);
248 sched_nice(chgp, n);
249 return (0);
250 }
251
252 /* ARGSUSED */
253 int
254 sys_setrlimit(struct lwp *l, const struct sys_setrlimit_args *uap,
255 register_t *retval)
256 {
257 /* {
258 syscallarg(int) which;
259 syscallarg(const struct rlimit *) rlp;
260 } */
261 int which = SCARG(uap, which);
262 struct rlimit alim;
263 int error;
264
265 error = copyin(SCARG(uap, rlp), &alim, sizeof(struct rlimit));
266 if (error)
267 return (error);
268 return (dosetrlimit(l, l->l_proc, which, &alim));
269 }
270
271 int
272 dosetrlimit(struct lwp *l, struct proc *p, int which, struct rlimit *limp)
273 {
274 struct rlimit *alimp;
275 int error;
276
277 if ((u_int)which >= RLIM_NLIMITS)
278 return (EINVAL);
279
280 if (limp->rlim_cur < 0 || limp->rlim_max < 0)
281 return (EINVAL);
282
283 if (limp->rlim_cur > limp->rlim_max) {
284 /*
285 * This is programming error. According to SUSv2, we should
286 * return error in this case.
287 */
288 return (EINVAL);
289 }
290
291 alimp = &p->p_rlimit[which];
292 /* if we don't change the value, no need to limcopy() */
293 if (limp->rlim_cur == alimp->rlim_cur &&
294 limp->rlim_max == alimp->rlim_max)
295 return 0;
296
297 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
298 p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_SET), limp, KAUTH_ARG(which));
299 if (error)
300 return (error);
301
302 lim_privatise(p, false);
303 /* p->p_limit is now unchangeable */
304 alimp = &p->p_rlimit[which];
305
306 switch (which) {
307
308 case RLIMIT_DATA:
309 if (limp->rlim_cur > maxdmap)
310 limp->rlim_cur = maxdmap;
311 if (limp->rlim_max > maxdmap)
312 limp->rlim_max = maxdmap;
313 break;
314
315 case RLIMIT_STACK:
316 if (limp->rlim_cur > maxsmap)
317 limp->rlim_cur = maxsmap;
318 if (limp->rlim_max > maxsmap)
319 limp->rlim_max = maxsmap;
320
321 /*
322 * Return EINVAL if the new stack size limit is lower than
323 * current usage. Otherwise, the process would get SIGSEGV the
324 * moment it would try to access anything on it's current stack.
325 * This conforms to SUSv2.
326 */
327 if (limp->rlim_cur < p->p_vmspace->vm_ssize * PAGE_SIZE
328 || limp->rlim_max < p->p_vmspace->vm_ssize * PAGE_SIZE) {
329 return (EINVAL);
330 }
331
332 /*
333 * Stack is allocated to the max at exec time with
334 * only "rlim_cur" bytes accessible (In other words,
335 * allocates stack dividing two contiguous regions at
336 * "rlim_cur" bytes boundary).
337 *
338 * Since allocation is done in terms of page, roundup
339 * "rlim_cur" (otherwise, contiguous regions
340 * overlap). If stack limit is going up make more
341 * accessible, if going down make inaccessible.
342 */
343 limp->rlim_cur = round_page(limp->rlim_cur);
344 if (limp->rlim_cur != alimp->rlim_cur) {
345 vaddr_t addr;
346 vsize_t size;
347 vm_prot_t prot;
348
349 if (limp->rlim_cur > alimp->rlim_cur) {
350 prot = VM_PROT_READ | VM_PROT_WRITE;
351 size = limp->rlim_cur - alimp->rlim_cur;
352 addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
353 limp->rlim_cur;
354 } else {
355 prot = VM_PROT_NONE;
356 size = alimp->rlim_cur - limp->rlim_cur;
357 addr = (vaddr_t)p->p_vmspace->vm_minsaddr -
358 alimp->rlim_cur;
359 }
360 (void) uvm_map_protect(&p->p_vmspace->vm_map,
361 addr, addr+size, prot, false);
362 }
363 break;
364
365 case RLIMIT_NOFILE:
366 if (limp->rlim_cur > maxfiles)
367 limp->rlim_cur = maxfiles;
368 if (limp->rlim_max > maxfiles)
369 limp->rlim_max = maxfiles;
370 break;
371
372 case RLIMIT_NPROC:
373 if (limp->rlim_cur > maxproc)
374 limp->rlim_cur = maxproc;
375 if (limp->rlim_max > maxproc)
376 limp->rlim_max = maxproc;
377 break;
378 }
379
380 mutex_enter(&p->p_limit->pl_lock);
381 *alimp = *limp;
382 mutex_exit(&p->p_limit->pl_lock);
383 return (0);
384 }
385
386 /* ARGSUSED */
387 int
388 sys_getrlimit(struct lwp *l, const struct sys_getrlimit_args *uap,
389 register_t *retval)
390 {
391 /* {
392 syscallarg(int) which;
393 syscallarg(struct rlimit *) rlp;
394 } */
395 struct proc *p = l->l_proc;
396 int which = SCARG(uap, which);
397 struct rlimit rl;
398
399 if ((u_int)which >= RLIM_NLIMITS)
400 return (EINVAL);
401
402 mutex_enter(p->p_lock);
403 memcpy(&rl, &p->p_rlimit[which], sizeof(rl));
404 mutex_exit(p->p_lock);
405
406 return copyout(&rl, SCARG(uap, rlp), sizeof(rl));
407 }
408
409 /*
410 * Transform the running time and tick information in proc p into user,
411 * system, and interrupt time usage.
412 *
413 * Should be called with p->p_lock held unless called from exit1().
414 */
415 void
416 calcru(struct proc *p, struct timeval *up, struct timeval *sp,
417 struct timeval *ip, struct timeval *rp)
418 {
419 uint64_t u, st, ut, it, tot;
420 struct lwp *l;
421 struct bintime tm;
422 struct timeval tv;
423
424 mutex_spin_enter(&p->p_stmutex);
425 st = p->p_sticks;
426 ut = p->p_uticks;
427 it = p->p_iticks;
428 mutex_spin_exit(&p->p_stmutex);
429
430 tm = p->p_rtime;
431
432 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
433 lwp_lock(l);
434 bintime_add(&tm, &l->l_rtime);
435 if ((l->l_flag & LW_RUNNING) != 0) {
436 struct bintime diff;
437 /*
438 * Adjust for the current time slice. This is
439 * actually fairly important since the error
440 * here is on the order of a time quantum,
441 * which is much greater than the sampling
442 * error.
443 */
444 binuptime(&diff);
445 bintime_sub(&diff, &l->l_stime);
446 bintime_add(&tm, &diff);
447 }
448 lwp_unlock(l);
449 }
450
451 tot = st + ut + it;
452 bintime2timeval(&tm, &tv);
453 u = (uint64_t)tv.tv_sec * 1000000ul + tv.tv_usec;
454
455 if (tot == 0) {
456 /* No ticks, so can't use to share time out, split 50-50 */
457 st = ut = u / 2;
458 } else {
459 st = (u * st) / tot;
460 ut = (u * ut) / tot;
461 }
462 if (sp != NULL) {
463 sp->tv_sec = st / 1000000;
464 sp->tv_usec = st % 1000000;
465 }
466 if (up != NULL) {
467 up->tv_sec = ut / 1000000;
468 up->tv_usec = ut % 1000000;
469 }
470 if (ip != NULL) {
471 if (it != 0)
472 it = (u * it) / tot;
473 ip->tv_sec = it / 1000000;
474 ip->tv_usec = it % 1000000;
475 }
476 if (rp != NULL) {
477 *rp = tv;
478 }
479 }
480
481 /* ARGSUSED */
482 int
483 sys_getrusage(struct lwp *l, const struct sys_getrusage_args *uap,
484 register_t *retval)
485 {
486 /* {
487 syscallarg(int) who;
488 syscallarg(struct rusage *) rusage;
489 } */
490 struct rusage ru;
491 struct proc *p = l->l_proc;
492
493 switch (SCARG(uap, who)) {
494 case RUSAGE_SELF:
495 mutex_enter(p->p_lock);
496 memcpy(&ru, &p->p_stats->p_ru, sizeof(ru));
497 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL, NULL);
498 rulwps(p, &ru);
499 mutex_exit(p->p_lock);
500 break;
501
502 case RUSAGE_CHILDREN:
503 mutex_enter(p->p_lock);
504 memcpy(&ru, &p->p_stats->p_cru, sizeof(ru));
505 mutex_exit(p->p_lock);
506 break;
507
508 default:
509 return EINVAL;
510 }
511
512 return copyout(&ru, SCARG(uap, rusage), sizeof(ru));
513 }
514
515 void
516 ruadd(struct rusage *ru, struct rusage *ru2)
517 {
518 long *ip, *ip2;
519 int i;
520
521 timeradd(&ru->ru_utime, &ru2->ru_utime, &ru->ru_utime);
522 timeradd(&ru->ru_stime, &ru2->ru_stime, &ru->ru_stime);
523 if (ru->ru_maxrss < ru2->ru_maxrss)
524 ru->ru_maxrss = ru2->ru_maxrss;
525 ip = &ru->ru_first; ip2 = &ru2->ru_first;
526 for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
527 *ip++ += *ip2++;
528 }
529
530 void
531 rulwps(proc_t *p, struct rusage *ru)
532 {
533 lwp_t *l;
534
535 KASSERT(mutex_owned(p->p_lock));
536
537 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
538 ruadd(ru, &l->l_ru);
539 ru->ru_nvcsw += (l->l_ncsw - l->l_nivcsw);
540 ru->ru_nivcsw += l->l_nivcsw;
541 }
542 }
543
544 /*
545 * Make a copy of the plimit structure.
546 * We share these structures copy-on-write after fork,
547 * and copy when a limit is changed.
548 *
549 * Unfortunately (due to PL_SHAREMOD) it is possibly for the structure
550 * we are copying to change beneath our feet!
551 */
552 struct plimit *
553 lim_copy(struct plimit *lim)
554 {
555 struct plimit *newlim;
556 char *corename;
557 size_t alen, len;
558
559 newlim = pool_cache_get(plimit_cache, PR_WAITOK);
560 mutex_init(&newlim->pl_lock, MUTEX_DEFAULT, IPL_NONE);
561 newlim->pl_flags = 0;
562 newlim->pl_refcnt = 1;
563 newlim->pl_sv_limit = NULL;
564
565 mutex_enter(&lim->pl_lock);
566 memcpy(newlim->pl_rlimit, lim->pl_rlimit,
567 sizeof(struct rlimit) * RLIM_NLIMITS);
568
569 alen = 0;
570 corename = NULL;
571 for (;;) {
572 if (lim->pl_corename == defcorename) {
573 newlim->pl_corename = defcorename;
574 break;
575 }
576 len = strlen(lim->pl_corename) + 1;
577 if (len <= alen) {
578 newlim->pl_corename = corename;
579 memcpy(corename, lim->pl_corename, len);
580 corename = NULL;
581 break;
582 }
583 mutex_exit(&lim->pl_lock);
584 if (corename != NULL)
585 free(corename, M_TEMP);
586 alen = len;
587 corename = malloc(alen, M_TEMP, M_WAITOK);
588 mutex_enter(&lim->pl_lock);
589 }
590 mutex_exit(&lim->pl_lock);
591 if (corename != NULL)
592 free(corename, M_TEMP);
593 return newlim;
594 }
595
596 void
597 lim_addref(struct plimit *lim)
598 {
599 atomic_inc_uint(&lim->pl_refcnt);
600 }
601
602 /*
603 * Give a process it's own private plimit structure.
604 * This will only be shared (in fork) if modifications are to be shared.
605 */
606 void
607 lim_privatise(struct proc *p, bool set_shared)
608 {
609 struct plimit *lim, *newlim;
610
611 lim = p->p_limit;
612 if (lim->pl_flags & PL_WRITEABLE) {
613 if (set_shared)
614 lim->pl_flags |= PL_SHAREMOD;
615 return;
616 }
617
618 if (set_shared && lim->pl_flags & PL_SHAREMOD)
619 return;
620
621 newlim = lim_copy(lim);
622
623 mutex_enter(p->p_lock);
624 if (p->p_limit->pl_flags & PL_WRITEABLE) {
625 /* Someone crept in while we were busy */
626 mutex_exit(p->p_lock);
627 limfree(newlim);
628 if (set_shared)
629 p->p_limit->pl_flags |= PL_SHAREMOD;
630 return;
631 }
632
633 /*
634 * Since most accesses to p->p_limit aren't locked, we must not
635 * delete the old limit structure yet.
636 */
637 newlim->pl_sv_limit = p->p_limit;
638 newlim->pl_flags |= PL_WRITEABLE;
639 if (set_shared)
640 newlim->pl_flags |= PL_SHAREMOD;
641 p->p_limit = newlim;
642 mutex_exit(p->p_lock);
643 }
644
645 void
646 limfree(struct plimit *lim)
647 {
648 struct plimit *sv_lim;
649
650 do {
651 if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0)
652 return;
653 if (lim->pl_corename != defcorename)
654 free(lim->pl_corename, M_TEMP);
655 sv_lim = lim->pl_sv_limit;
656 mutex_destroy(&lim->pl_lock);
657 pool_cache_put(plimit_cache, lim);
658 } while ((lim = sv_lim) != NULL);
659 }
660
661 struct pstats *
662 pstatscopy(struct pstats *ps)
663 {
664
665 struct pstats *newps;
666
667 newps = pool_cache_get(pstats_cache, PR_WAITOK);
668
669 memset(&newps->pstat_startzero, 0,
670 (unsigned) ((char *)&newps->pstat_endzero -
671 (char *)&newps->pstat_startzero));
672 memcpy(&newps->pstat_startcopy, &ps->pstat_startcopy,
673 ((char *)&newps->pstat_endcopy -
674 (char *)&newps->pstat_startcopy));
675
676 return (newps);
677
678 }
679
680 void
681 pstatsfree(struct pstats *ps)
682 {
683
684 pool_cache_put(pstats_cache, ps);
685 }
686
687 /*
688 * sysctl interface in five parts
689 */
690
691 /*
692 * a routine for sysctl proc subtree helpers that need to pick a valid
693 * process by pid.
694 */
695 static int
696 sysctl_proc_findproc(struct lwp *l, struct proc **p2, pid_t pid)
697 {
698 struct proc *ptmp;
699 int error = 0;
700
701 if (pid == PROC_CURPROC)
702 ptmp = l->l_proc;
703 else if ((ptmp = pfind(pid)) == NULL)
704 error = ESRCH;
705
706 *p2 = ptmp;
707 return (error);
708 }
709
710 /*
711 * sysctl helper routine for setting a process's specific corefile
712 * name. picks the process based on the given pid and checks the
713 * correctness of the new value.
714 */
715 static int
716 sysctl_proc_corename(SYSCTLFN_ARGS)
717 {
718 struct proc *ptmp;
719 struct plimit *lim;
720 int error = 0, len;
721 char *cname;
722 char *ocore;
723 char *tmp;
724 struct sysctlnode node;
725
726 /*
727 * is this all correct?
728 */
729 if (namelen != 0)
730 return (EINVAL);
731 if (name[-1] != PROC_PID_CORENAME)
732 return (EINVAL);
733
734 /*
735 * whom are we tweaking?
736 */
737 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
738 if (error)
739 return (error);
740
741 /* XXX-elad */
742 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
743 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
744 if (error)
745 return (error);
746
747 if (newp == NULL) {
748 error = kauth_authorize_process(l->l_cred,
749 KAUTH_PROCESS_CORENAME, ptmp,
750 KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_GET), NULL, NULL);
751 if (error)
752 return (error);
753 }
754
755 /*
756 * let them modify a temporary copy of the core name
757 */
758 cname = PNBUF_GET();
759 lim = ptmp->p_limit;
760 mutex_enter(&lim->pl_lock);
761 strlcpy(cname, lim->pl_corename, MAXPATHLEN);
762 mutex_exit(&lim->pl_lock);
763
764 node = *rnode;
765 node.sysctl_data = cname;
766 error = sysctl_lookup(SYSCTLFN_CALL(&node));
767
768 /*
769 * if that failed, or they have nothing new to say, or we've
770 * heard it before...
771 */
772 if (error || newp == NULL)
773 goto done;
774 lim = ptmp->p_limit;
775 mutex_enter(&lim->pl_lock);
776 error = strcmp(cname, lim->pl_corename);
777 mutex_exit(&lim->pl_lock);
778 if (error == 0)
779 /* Unchanged */
780 goto done;
781
782 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CORENAME,
783 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_CORENAME_SET), cname, NULL);
784 if (error)
785 return (error);
786
787 /*
788 * no error yet and cname now has the new core name in it.
789 * let's see if it looks acceptable. it must be either "core"
790 * or end in ".core" or "/core".
791 */
792 len = strlen(cname);
793 if (len < 4) {
794 error = EINVAL;
795 } else if (strcmp(cname + len - 4, "core") != 0) {
796 error = EINVAL;
797 } else if (len > 4 && cname[len - 5] != '/' && cname[len - 5] != '.') {
798 error = EINVAL;
799 }
800 if (error != 0) {
801 goto done;
802 }
803
804 /*
805 * hmm...looks good. now...where do we put it?
806 */
807 tmp = malloc(len + 1, M_TEMP, M_WAITOK|M_CANFAIL);
808 if (tmp == NULL) {
809 error = ENOMEM;
810 goto done;
811 }
812 memcpy(tmp, cname, len + 1);
813
814 lim_privatise(ptmp, false);
815 lim = ptmp->p_limit;
816 mutex_enter(&lim->pl_lock);
817 ocore = lim->pl_corename;
818 lim->pl_corename = tmp;
819 mutex_exit(&lim->pl_lock);
820 if (ocore != defcorename)
821 free(ocore, M_TEMP);
822
823 done:
824 PNBUF_PUT(cname);
825 return error;
826 }
827
828 /*
829 * sysctl helper routine for checking/setting a process's stop flags,
830 * one for fork and one for exec.
831 */
832 static int
833 sysctl_proc_stop(SYSCTLFN_ARGS)
834 {
835 struct proc *ptmp;
836 int i, f, error = 0;
837 struct sysctlnode node;
838
839 if (namelen != 0)
840 return (EINVAL);
841
842 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-2]);
843 if (error)
844 return (error);
845
846 /* XXX-elad */
847 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
848 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
849 if (error)
850 return (error);
851
852 switch (rnode->sysctl_num) {
853 case PROC_PID_STOPFORK:
854 f = PS_STOPFORK;
855 break;
856 case PROC_PID_STOPEXEC:
857 f = PS_STOPEXEC;
858 break;
859 case PROC_PID_STOPEXIT:
860 f = PS_STOPEXIT;
861 break;
862 default:
863 return (EINVAL);
864 }
865
866 i = (ptmp->p_flag & f) ? 1 : 0;
867 node = *rnode;
868 node.sysctl_data = &i;
869 error = sysctl_lookup(SYSCTLFN_CALL(&node));
870 if (error || newp == NULL)
871 return (error);
872
873 mutex_enter(ptmp->p_lock);
874 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_STOPFLAG,
875 ptmp, KAUTH_ARG(f), NULL, NULL);
876 if (error)
877 return (error);
878 if (i)
879 ptmp->p_sflag |= f;
880 else
881 ptmp->p_sflag &= ~f;
882 mutex_exit(ptmp->p_lock);
883
884 return (0);
885 }
886
887 /*
888 * sysctl helper routine for a process's rlimits as exposed by sysctl.
889 */
890 static int
891 sysctl_proc_plimit(SYSCTLFN_ARGS)
892 {
893 struct proc *ptmp;
894 u_int limitno;
895 int which, error = 0;
896 struct rlimit alim;
897 struct sysctlnode node;
898
899 if (namelen != 0)
900 return (EINVAL);
901
902 which = name[-1];
903 if (which != PROC_PID_LIMIT_TYPE_SOFT &&
904 which != PROC_PID_LIMIT_TYPE_HARD)
905 return (EINVAL);
906
907 limitno = name[-2] - 1;
908 if (limitno >= RLIM_NLIMITS)
909 return (EINVAL);
910
911 if (name[-3] != PROC_PID_LIMIT)
912 return (EINVAL);
913
914 error = sysctl_proc_findproc(l, &ptmp, (pid_t)name[-4]);
915 if (error)
916 return (error);
917
918 /* XXX-elad */
919 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, ptmp,
920 KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
921 if (error)
922 return (error);
923
924 /* Check if we can view limits. */
925 if (newp == NULL) {
926 error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
927 ptmp, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_GET), &alim,
928 KAUTH_ARG(which));
929 if (error)
930 return (error);
931 }
932
933 node = *rnode;
934 memcpy(&alim, &ptmp->p_rlimit[limitno], sizeof(alim));
935 if (which == PROC_PID_LIMIT_TYPE_HARD)
936 node.sysctl_data = &alim.rlim_max;
937 else
938 node.sysctl_data = &alim.rlim_cur;
939
940 error = sysctl_lookup(SYSCTLFN_CALL(&node));
941 if (error || newp == NULL)
942 return (error);
943
944 return (dosetrlimit(l, ptmp, limitno, &alim));
945 }
946
947 /*
948 * and finally, the actually glue that sticks it to the tree
949 */
950 SYSCTL_SETUP(sysctl_proc_setup, "sysctl proc subtree setup")
951 {
952
953 sysctl_createv(clog, 0, NULL, NULL,
954 CTLFLAG_PERMANENT,
955 CTLTYPE_NODE, "proc", NULL,
956 NULL, 0, NULL, 0,
957 CTL_PROC, CTL_EOL);
958 sysctl_createv(clog, 0, NULL, NULL,
959 CTLFLAG_PERMANENT|CTLFLAG_ANYNUMBER,
960 CTLTYPE_NODE, "curproc",
961 SYSCTL_DESCR("Per-process settings"),
962 NULL, 0, NULL, 0,
963 CTL_PROC, PROC_CURPROC, CTL_EOL);
964
965 sysctl_createv(clog, 0, NULL, NULL,
966 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
967 CTLTYPE_STRING, "corename",
968 SYSCTL_DESCR("Core file name"),
969 sysctl_proc_corename, 0, NULL, MAXPATHLEN,
970 CTL_PROC, PROC_CURPROC, PROC_PID_CORENAME, CTL_EOL);
971 sysctl_createv(clog, 0, NULL, NULL,
972 CTLFLAG_PERMANENT,
973 CTLTYPE_NODE, "rlimit",
974 SYSCTL_DESCR("Process limits"),
975 NULL, 0, NULL, 0,
976 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, CTL_EOL);
977
978 #define create_proc_plimit(s, n) do { \
979 sysctl_createv(clog, 0, NULL, NULL, \
980 CTLFLAG_PERMANENT, \
981 CTLTYPE_NODE, s, \
982 SYSCTL_DESCR("Process " s " limits"), \
983 NULL, 0, NULL, 0, \
984 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
985 CTL_EOL); \
986 sysctl_createv(clog, 0, NULL, NULL, \
987 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
988 CTLTYPE_QUAD, "soft", \
989 SYSCTL_DESCR("Process soft " s " limit"), \
990 sysctl_proc_plimit, 0, NULL, 0, \
991 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
992 PROC_PID_LIMIT_TYPE_SOFT, CTL_EOL); \
993 sysctl_createv(clog, 0, NULL, NULL, \
994 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE, \
995 CTLTYPE_QUAD, "hard", \
996 SYSCTL_DESCR("Process hard " s " limit"), \
997 sysctl_proc_plimit, 0, NULL, 0, \
998 CTL_PROC, PROC_CURPROC, PROC_PID_LIMIT, n, \
999 PROC_PID_LIMIT_TYPE_HARD, CTL_EOL); \
1000 } while (0/*CONSTCOND*/)
1001
1002 create_proc_plimit("cputime", PROC_PID_LIMIT_CPU);
1003 create_proc_plimit("filesize", PROC_PID_LIMIT_FSIZE);
1004 create_proc_plimit("datasize", PROC_PID_LIMIT_DATA);
1005 create_proc_plimit("stacksize", PROC_PID_LIMIT_STACK);
1006 create_proc_plimit("coredumpsize", PROC_PID_LIMIT_CORE);
1007 create_proc_plimit("memoryuse", PROC_PID_LIMIT_RSS);
1008 create_proc_plimit("memorylocked", PROC_PID_LIMIT_MEMLOCK);
1009 create_proc_plimit("maxproc", PROC_PID_LIMIT_NPROC);
1010 create_proc_plimit("descriptors", PROC_PID_LIMIT_NOFILE);
1011 create_proc_plimit("sbsize", PROC_PID_LIMIT_SBSIZE);
1012
1013 #undef create_proc_plimit
1014
1015 sysctl_createv(clog, 0, NULL, NULL,
1016 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1017 CTLTYPE_INT, "stopfork",
1018 SYSCTL_DESCR("Stop process at fork(2)"),
1019 sysctl_proc_stop, 0, NULL, 0,
1020 CTL_PROC, PROC_CURPROC, PROC_PID_STOPFORK, CTL_EOL);
1021 sysctl_createv(clog, 0, NULL, NULL,
1022 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1023 CTLTYPE_INT, "stopexec",
1024 SYSCTL_DESCR("Stop process at execve(2)"),
1025 sysctl_proc_stop, 0, NULL, 0,
1026 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXEC, CTL_EOL);
1027 sysctl_createv(clog, 0, NULL, NULL,
1028 CTLFLAG_PERMANENT|CTLFLAG_READWRITE|CTLFLAG_ANYWRITE,
1029 CTLTYPE_INT, "stopexit",
1030 SYSCTL_DESCR("Stop process before completing exit"),
1031 sysctl_proc_stop, 0, NULL, 0,
1032 CTL_PROC, PROC_CURPROC, PROC_PID_STOPEXIT, CTL_EOL);
1033 }
1034
1035 void
1036 uid_init(void)
1037 {
1038
1039 /*
1040 * Ensure that uid 0 is always in the user hash table, as
1041 * sbreserve() expects it available from interrupt context.
1042 */
1043 (void)uid_find(0);
1044 }
1045
1046 struct uidinfo *
1047 uid_find(uid_t uid)
1048 {
1049 struct uidinfo *uip, *uip_first, *newuip;
1050 struct uihashhead *uipp;
1051
1052 uipp = UIHASH(uid);
1053 newuip = NULL;
1054
1055 /*
1056 * To make insertion atomic, abstraction of SLIST will be violated.
1057 */
1058 uip_first = uipp->slh_first;
1059 again:
1060 SLIST_FOREACH(uip, uipp, ui_hash) {
1061 if (uip->ui_uid != uid)
1062 continue;
1063 if (newuip != NULL)
1064 kmem_free(newuip, sizeof(*newuip));
1065 return uip;
1066 }
1067 if (newuip == NULL)
1068 newuip = kmem_zalloc(sizeof(*newuip), KM_SLEEP);
1069 newuip->ui_uid = uid;
1070
1071 /*
1072 * If atomic insert is unsuccessful, another thread might be
1073 * allocated this 'uid', thus full re-check is needed.
1074 */
1075 newuip->ui_hash.sle_next = uip_first;
1076 membar_producer();
1077 uip = atomic_cas_ptr(&uipp->slh_first, uip_first, newuip);
1078 if (uip != uip_first) {
1079 uip_first = uip;
1080 goto again;
1081 }
1082
1083 return newuip;
1084 }
1085
1086 /*
1087 * Change the count associated with number of processes
1088 * a given user is using.
1089 */
1090 int
1091 chgproccnt(uid_t uid, int diff)
1092 {
1093 struct uidinfo *uip;
1094 long proccnt;
1095
1096 uip = uid_find(uid);
1097 proccnt = atomic_add_long_nv(&uip->ui_proccnt, diff);
1098 KASSERT(proccnt >= 0);
1099 return proccnt;
1100 }
1101
1102 int
1103 chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t xmax)
1104 {
1105 rlim_t nsb;
1106 const long diff = to - *hiwat;
1107
1108 nsb = atomic_add_long_nv((long *)&uip->ui_sbsize, diff);
1109 if (diff > 0 && nsb > xmax) {
1110 atomic_add_long((long *)&uip->ui_sbsize, -diff);
1111 return 0;
1112 }
1113 *hiwat = to;
1114 KASSERT(nsb >= 0);
1115 return 1;
1116 }
1117